[llvm-branch-commits] [llvm] release/18.x: [PowerPC] Update chain uses when emitting lxsizx (#84892) (PR #85648)
https://github.com/ecnelises approved this pull request. LGTM, thanks. https://github.com/llvm/llvm-project/pull/85648 ___ llvm-branch-commits mailing list llvm-branch-commits@lists.llvm.org https://lists.llvm.org/cgi-bin/mailman/listinfo/llvm-branch-commits
[llvm-branch-commits] [llvm] 449f2f7 - [PowerPC] Duplicate inherited heuristic from base scheduler
Author: Qiu Chaofan Date: 2021-01-22T10:11:03+08:00 New Revision: 449f2f7140e1d70d9c08bb609cde6cdd144c6035 URL: https://github.com/llvm/llvm-project/commit/449f2f7140e1d70d9c08bb609cde6cdd144c6035 DIFF: https://github.com/llvm/llvm-project/commit/449f2f7140e1d70d9c08bb609cde6cdd144c6035.diff LOG: [PowerPC] Duplicate inherited heuristic from base scheduler PowerPC has its custom scheduler heuristic. It calls parent classes' tryCandidate in override version, but the function returns void, so this way doesn't actually help. This patch duplicates code from base scheduler into PPC machine scheduler class, which does what we wanted. Reviewed By: steven.zhang Differential Revision: https://reviews.llvm.org/D94464 Added: Modified: llvm/lib/Target/PowerPC/PPCMachineScheduler.cpp llvm/test/CodeGen/PowerPC/botheightreduce.mir llvm/test/CodeGen/PowerPC/loop-instr-form-prepare.ll llvm/test/CodeGen/PowerPC/lsr-ctrloop.ll llvm/test/CodeGen/PowerPC/mma-intrinsics.ll llvm/test/CodeGen/PowerPC/mma-phi-accs.ll llvm/test/CodeGen/PowerPC/more-dq-form-prepare.ll llvm/test/CodeGen/PowerPC/rematerializable-instruction-machine-licm.ll llvm/test/CodeGen/PowerPC/sched-addi.ll llvm/test/CodeGen/PowerPC/sms-cpy-1.ll llvm/test/CodeGen/PowerPC/sms-phi-1.ll llvm/test/CodeGen/PowerPC/sms-simple.ll llvm/test/CodeGen/PowerPC/stack-clash-dynamic-alloca.ll Removed: diff --git a/llvm/lib/Target/PowerPC/PPCMachineScheduler.cpp b/llvm/lib/Target/PowerPC/PPCMachineScheduler.cpp index 5649d7d13966..ce615e554d94 100644 --- a/llvm/lib/Target/PowerPC/PPCMachineScheduler.cpp +++ b/llvm/lib/Target/PowerPC/PPCMachineScheduler.cpp @@ -49,10 +49,103 @@ bool PPCPreRASchedStrategy::biasAddiLoadCandidate(SchedCandidate , void PPCPreRASchedStrategy::tryCandidate(SchedCandidate , SchedCandidate , SchedBoundary *Zone) const { - GenericScheduler::tryCandidate(Cand, TryCand, Zone); + // From GenericScheduler::tryCandidate - if (!Cand.isValid() || !Zone) + // Initialize the candidate if needed. + if (!Cand.isValid()) { +TryCand.Reason = NodeOrder; return; + } + + // Bias PhysReg Defs and copies to their uses and defined respectively. + if (tryGreater(biasPhysReg(TryCand.SU, TryCand.AtTop), + biasPhysReg(Cand.SU, Cand.AtTop), TryCand, Cand, PhysReg)) +return; + + // Avoid exceeding the target's limit. + if (DAG->isTrackingPressure() && + tryPressure(TryCand.RPDelta.Excess, Cand.RPDelta.Excess, TryCand, Cand, + RegExcess, TRI, DAG->MF)) +return; + + // Avoid increasing the max critical pressure in the scheduled region. + if (DAG->isTrackingPressure() && + tryPressure(TryCand.RPDelta.CriticalMax, Cand.RPDelta.CriticalMax, + TryCand, Cand, RegCritical, TRI, DAG->MF)) +return; + + // We only compare a subset of features when comparing nodes between + // Top and Bottom boundary. Some properties are simply incomparable, in many + // other instances we should only override the other boundary if something + // is a clear good pick on one boundary. Skip heuristics that are more + // "tie-breaking" in nature. + bool SameBoundary = Zone != nullptr; + if (SameBoundary) { +// For loops that are acyclic path limited, aggressively schedule for +// latency. Within an single cycle, whenever CurrMOps > 0, allow normal +// heuristics to take precedence. +if (Rem.IsAcyclicLatencyLimited && !Zone->getCurrMOps() && +tryLatency(TryCand, Cand, *Zone)) + return; + +// Prioritize instructions that read unbuffered resources by stall cycles. +if (tryLess(Zone->getLatencyStallCycles(TryCand.SU), +Zone->getLatencyStallCycles(Cand.SU), TryCand, Cand, Stall)) + return; + } + + // Keep clustered nodes together to encourage downstream peephole + // optimizations which may reduce resource requirements. + // + // This is a best effort to set things up for a post-RA pass. Optimizations + // like generating loads of multiple registers should ideally be done within + // the scheduler pass by combining the loads during DAG postprocessing. + const SUnit *CandNextClusterSU = + Cand.AtTop ? DAG->getNextClusterSucc() : DAG->getNextClusterPred(); + const SUnit *TryCandNextClusterSU = + TryCand.AtTop ? DAG->getNextClusterSucc() : DAG->getNextClusterPred(); + if (tryGreater(TryCand.SU == TryCandNextClusterSU, + Cand.SU == CandNextClusterSU, TryCand, Cand, Cluster)) +return; + + if (SameBoundary) { +// Weak edges are for clustering and other constraints. +if (tryLess(getWeakLeft(TryCand.SU, TryCand.AtTop), +getWeakLeft(Cand.SU, Cand.AtTop), TryCand, Cand, Weak)) + return; + } + + // Avoid increasing the max pressure of the entire region. +
[llvm-branch-commits] [llvm] f776d8b - [Legalizer] Promote result type in expanding FP_TO_XINT
Author: Qiu Chaofan Date: 2021-01-18T11:56:11+08:00 New Revision: f776d8b12f0ec19cfff60c967565788ce4f926e6 URL: https://github.com/llvm/llvm-project/commit/f776d8b12f0ec19cfff60c967565788ce4f926e6 DIFF: https://github.com/llvm/llvm-project/commit/f776d8b12f0ec19cfff60c967565788ce4f926e6.diff LOG: [Legalizer] Promote result type in expanding FP_TO_XINT This patch promotes result integer type of FP_TO_XINT in expanding. So crash in conversion from ppc_fp128 to i1 will be fixed. Reviewed By: steven.zhang Differential Revision: https://reviews.llvm.org/D92473 Added: Modified: llvm/lib/CodeGen/SelectionDAG/LegalizeFloatTypes.cpp llvm/test/CodeGen/PowerPC/ppcf128-constrained-fp-intrinsics.ll Removed: diff --git a/llvm/lib/CodeGen/SelectionDAG/LegalizeFloatTypes.cpp b/llvm/lib/CodeGen/SelectionDAG/LegalizeFloatTypes.cpp index ccd2bf2cc924..966645e3256d 100644 --- a/llvm/lib/CodeGen/SelectionDAG/LegalizeFloatTypes.cpp +++ b/llvm/lib/CodeGen/SelectionDAG/LegalizeFloatTypes.cpp @@ -913,6 +913,24 @@ SDValue DAGTypeLegalizer::SoftenFloatOp_BR_CC(SDNode *N) { 0); } +// Even if the result type is legal, no libcall may exactly match. (e.g. We +// don't have FP-i8 conversions) This helper method looks for an appropriate +// promoted libcall. +static RTLIB::Libcall findFPToIntLibcall(EVT SrcVT, EVT RetVT, EVT , + bool Signed) { + RTLIB::Libcall LC = RTLIB::UNKNOWN_LIBCALL; + for (unsigned IntVT = MVT::FIRST_INTEGER_VALUETYPE; + IntVT <= MVT::LAST_INTEGER_VALUETYPE && LC == RTLIB::UNKNOWN_LIBCALL; + ++IntVT) { +Promoted = (MVT::SimpleValueType)IntVT; +// The type needs to big enough to hold the result. +if (Promoted.bitsGE(RetVT)) + LC = Signed ? RTLIB::getFPTOSINT(SrcVT, Promoted) + : RTLIB::getFPTOUINT(SrcVT, Promoted); + } + return LC; +} + SDValue DAGTypeLegalizer::SoftenFloatOp_FP_TO_XINT(SDNode *N) { bool IsStrict = N->isStrictFPOpcode(); bool Signed = N->getOpcode() == ISD::FP_TO_SINT || @@ -928,16 +946,9 @@ SDValue DAGTypeLegalizer::SoftenFloatOp_FP_TO_XINT(SDNode *N) { // a larger type, eg: fp -> i32. Even if it is legal, no libcall may exactly // match, eg. we don't have fp -> i8 conversions. // Look for an appropriate libcall. - RTLIB::Libcall LC = RTLIB::UNKNOWN_LIBCALL; - for (unsigned IntVT = MVT::FIRST_INTEGER_VALUETYPE; - IntVT <= MVT::LAST_INTEGER_VALUETYPE && LC == RTLIB::UNKNOWN_LIBCALL; - ++IntVT) { -NVT = (MVT::SimpleValueType)IntVT; -// The type needs to big enough to hold the result. -if (NVT.bitsGE(RVT)) - LC = Signed ? RTLIB::getFPTOSINT(SVT, NVT) : RTLIB::getFPTOUINT(SVT, NVT); - } - assert(LC != RTLIB::UNKNOWN_LIBCALL && "Unsupported FP_TO_XINT!"); + RTLIB::Libcall LC = findFPToIntLibcall(SVT, RVT, NVT, Signed); + assert(LC != RTLIB::UNKNOWN_LIBCALL && NVT.isSimple() && + "Unsupported FP_TO_XINT!"); Op = GetSoftenedFloat(Op); SDValue Chain = IsStrict ? N->getOperand(0) : SDValue(); @@ -1895,12 +1906,14 @@ SDValue DAGTypeLegalizer::ExpandFloatOp_FP_TO_XINT(SDNode *N) { N->getOpcode() == ISD::STRICT_FP_TO_SINT; SDValue Op = N->getOperand(IsStrict ? 1 : 0); SDValue Chain = IsStrict ? N->getOperand(0) : SDValue(); - RTLIB::Libcall LC = Signed ? RTLIB::getFPTOSINT(Op.getValueType(), RVT) - : RTLIB::getFPTOUINT(Op.getValueType(), RVT); - assert(LC != RTLIB::UNKNOWN_LIBCALL && "Unsupported FP_TO_XINT!"); + + EVT NVT; + RTLIB::Libcall LC = findFPToIntLibcall(Op.getValueType(), RVT, NVT, Signed); + assert(LC != RTLIB::UNKNOWN_LIBCALL && NVT.isSimple() && + "Unsupported FP_TO_XINT!"); TargetLowering::MakeLibCallOptions CallOptions; std::pair Tmp = - TLI.makeLibCall(DAG, LC, RVT, Op, CallOptions, dl, Chain); + TLI.makeLibCall(DAG, LC, NVT, Op, CallOptions, dl, Chain); if (!IsStrict) return Tmp.first; diff --git a/llvm/test/CodeGen/PowerPC/ppcf128-constrained-fp-intrinsics.ll b/llvm/test/CodeGen/PowerPC/ppcf128-constrained-fp-intrinsics.ll index 864a573896b2..44676c7a827b 100644 --- a/llvm/test/CodeGen/PowerPC/ppcf128-constrained-fp-intrinsics.ll +++ b/llvm/test/CodeGen/PowerPC/ppcf128-constrained-fp-intrinsics.ll @@ -1975,6 +1975,100 @@ entry: ret ppc_fp128 %conv } +define i1 @ppcq_to_s1(ppc_fp128 %a) { +; PC64LE-LABEL: ppcq_to_s1: +; PC64LE: # %bb.0: # %entry +; PC64LE-NEXT:mflr 0 +; PC64LE-NEXT:std 0, 16(1) +; PC64LE-NEXT:stdu 1, -32(1) +; PC64LE-NEXT:.cfi_def_cfa_offset 32 +; PC64LE-NEXT:.cfi_offset lr, 16 +; PC64LE-NEXT:bl __gcc_qtou +; PC64LE-NEXT:nop +; PC64LE-NEXT:addi 1, 1, 32 +; PC64LE-NEXT:ld 0, 16(1) +; PC64LE-NEXT:mtlr 0 +; PC64LE-NEXT:blr +; +; PC64LE9-LABEL: ppcq_to_s1: +; PC64LE9: # %bb.0: # %entry +; PC64LE9-NEXT:mflr 0 +; PC64LE9-NEXT:std
[llvm-branch-commits] [llvm] 2d98907 - [PowerPC] [NFC] Add AIX triple to some regression tests
Author: Qiu Chaofan Date: 2021-01-18T11:44:00+08:00 New Revision: 2d9890775f523a7a7ed2d7d064273bf7e28ebf20 URL: https://github.com/llvm/llvm-project/commit/2d9890775f523a7a7ed2d7d064273bf7e28ebf20 DIFF: https://github.com/llvm/llvm-project/commit/2d9890775f523a7a7ed2d7d064273bf7e28ebf20.diff LOG: [PowerPC] [NFC] Add AIX triple to some regression tests As part of the effort to improve AIX support, regression test coverage misses quite a lot for AIX subtarget. This patch adds AIX triple to those don't need extra change, and we can cover more cases in following commits. Reviewed By: steven.zhang Differential Revision: https://reviews.llvm.org/D94159 Added: Modified: llvm/test/CodeGen/PowerPC/and-mask.ll llvm/test/CodeGen/PowerPC/bool-math.ll llvm/test/CodeGen/PowerPC/bswap64.ll llvm/test/CodeGen/PowerPC/builtins-ppc-p9-darn.ll llvm/test/CodeGen/PowerPC/cmpb-ppc32.ll llvm/test/CodeGen/PowerPC/cmpb.ll llvm/test/CodeGen/PowerPC/constant-combines.ll llvm/test/CodeGen/PowerPC/constants-i64.ll llvm/test/CodeGen/PowerPC/fdiv.ll llvm/test/CodeGen/PowerPC/fma-assoc.ll llvm/test/CodeGen/PowerPC/ftrunc-vec.ll llvm/test/CodeGen/PowerPC/hoist-logic.ll llvm/test/CodeGen/PowerPC/inc-of-add.ll llvm/test/CodeGen/PowerPC/maddld.ll llvm/test/CodeGen/PowerPC/mi-peephole-splat.ll llvm/test/CodeGen/PowerPC/mulli.ll llvm/test/CodeGen/PowerPC/ori_imm32.ll llvm/test/CodeGen/PowerPC/ori_imm64.ll llvm/test/CodeGen/PowerPC/popcnt-zext.ll llvm/test/CodeGen/PowerPC/pr33093.ll llvm/test/CodeGen/PowerPC/pr39478.ll llvm/test/CodeGen/PowerPC/rotl-2.ll llvm/test/CodeGen/PowerPC/setcc-to-sub.ll llvm/test/CodeGen/PowerPC/shift-cmp.ll llvm/test/CodeGen/PowerPC/unal-vec-ldst.ll llvm/test/CodeGen/PowerPC/vec_clz.ll llvm/test/CodeGen/PowerPC/vec_constants.ll llvm/test/CodeGen/PowerPC/vec_revb.ll llvm/test/CodeGen/PowerPC/vec_shuffle_p8vector.ll llvm/test/CodeGen/PowerPC/vmladduhm.ll Removed: diff --git a/llvm/test/CodeGen/PowerPC/and-mask.ll b/llvm/test/CodeGen/PowerPC/and-mask.ll index 489880b29e67..e5664f92b3c9 100644 --- a/llvm/test/CodeGen/PowerPC/and-mask.ll +++ b/llvm/test/CodeGen/PowerPC/and-mask.ll @@ -1,5 +1,6 @@ ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py ; RUN: llc -mtriple=powerpc64le-unknown-linux-gnu < %s | FileCheck %s +; RUN: llc -mtriple=powerpc64-ibm-aix-xcoff < %s | FileCheck %s ; mask 0xFFFE define i32 @test1(i32 %a) { diff --git a/llvm/test/CodeGen/PowerPC/bool-math.ll b/llvm/test/CodeGen/PowerPC/bool-math.ll index 9ec3c7b4671a..9e443fb0e507 100644 --- a/llvm/test/CodeGen/PowerPC/bool-math.ll +++ b/llvm/test/CodeGen/PowerPC/bool-math.ll @@ -1,5 +1,6 @@ ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py ; RUN: llc < %s -mtriple=powerpc64le-- -verify-machineinstrs | FileCheck %s +; RUN: llc < %s -mtriple=powerpc64-ibm-aix-xcoff -verify-machineinstrs | FileCheck %s define i32 @sub_zext_cmp_mask_same_size_result(i32 %x) { ; CHECK-LABEL: sub_zext_cmp_mask_same_size_result: diff --git a/llvm/test/CodeGen/PowerPC/bswap64.ll b/llvm/test/CodeGen/PowerPC/bswap64.ll index 75a839a3b95f..ef3cd4aa72ca 100644 --- a/llvm/test/CodeGen/PowerPC/bswap64.ll +++ b/llvm/test/CodeGen/PowerPC/bswap64.ll @@ -1,8 +1,12 @@ ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py ; RUN: llc -verify-machineinstrs < %s -mtriple=powerpc64le-unknown-unknown \ ; RUN: -mcpu=pwr9 | FileCheck %s +; RUN: llc -verify-machineinstrs < %s -mtriple=powerpc64-ibm-aix-xcoff \ +; RUN: -mcpu=pwr9 -vec-extabi | FileCheck %s ; RUN: llc -verify-machineinstrs < %s -mtriple=powerpc64le-unknown-unknown \ ; RUN: -mcpu=pwr9 -mattr=-altivec | FileCheck %s --check-prefix=NO-ALTIVEC +; RUN: llc -verify-machineinstrs < %s -mtriple=powerpc64-ibm-aix-xcoff \ +; RUN: -mcpu=pwr9 -mattr=-altivec | FileCheck %s --check-prefix=NO-ALTIVEC declare i64 @llvm.bswap.i64(i64) diff --git a/llvm/test/CodeGen/PowerPC/builtins-ppc-p9-darn.ll b/llvm/test/CodeGen/PowerPC/builtins-ppc-p9-darn.ll index d53b442fef71..2f96d21f6320 100644 --- a/llvm/test/CodeGen/PowerPC/builtins-ppc-p9-darn.ll +++ b/llvm/test/CodeGen/PowerPC/builtins-ppc-p9-darn.ll @@ -1,5 +1,6 @@ ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py ; RUN: llc < %s -verify-machineinstrs -mtriple powerpc64le -mcpu=pwr9 | FileCheck %s +; RUN: llc < %s -verify-machineinstrs -mtriple powerpc64-ibm-aix-xcoff -vec-extabi -mcpu=pwr9 | FileCheck %s define i64 @raw() { ; CHECK-LABEL: raw: diff --git a/llvm/test/CodeGen/PowerPC/cmpb-ppc32.ll b/llvm/test/CodeGen/PowerPC/cmpb-ppc32.ll index ab63784134f9..af2904cad806 100644 --- a/llvm/test/CodeGen/PowerPC/cmpb-ppc32.ll +++ b/llvm/test/CodeGen/PowerPC/cmpb-ppc32.ll @@ -1,7 +1,6 @@ ; NOTE: Assertions have been
[llvm-branch-commits] [clang] 168be42 - [Clang] Mutate long-double math builtins into f128 under IEEE-quad
Author: Qiu Chaofan Date: 2021-01-15T16:56:20+08:00 New Revision: 168be4208304e36d3bb156b5c413b340a391383e URL: https://github.com/llvm/llvm-project/commit/168be4208304e36d3bb156b5c413b340a391383e DIFF: https://github.com/llvm/llvm-project/commit/168be4208304e36d3bb156b5c413b340a391383e.diff LOG: [Clang] Mutate long-double math builtins into f128 under IEEE-quad Under -mabi=ieeelongdouble on PowerPC, IEEE-quad floating point semantic is used for long double. This patch mutates call to related builtins into f128 version on PowerPC. And in theory, this should be applied to other targets when their backend supports IEEE 128-bit style libcalls. GCC already has these mutations except nansl, which is not available on PowerPC along with other variants (nans, nansf). Reviewed By: RKSimon, nemanjai Differential Revision: https://reviews.llvm.org/D92080 Added: Modified: clang/lib/CodeGen/CGBuiltin.cpp clang/test/CodeGen/math-builtins-long.c Removed: diff --git a/clang/lib/CodeGen/CGBuiltin.cpp b/clang/lib/CodeGen/CGBuiltin.cpp index 7fa4e4d270ad..25ebb67c2ab6 100644 --- a/clang/lib/CodeGen/CGBuiltin.cpp +++ b/clang/lib/CodeGen/CGBuiltin.cpp @@ -2107,6 +2107,78 @@ RValue CodeGenFunction::emitRotate(const CallExpr *E, bool IsRotateRight) { return RValue::get(Builder.CreateCall(F, { Src, Src, ShiftAmt })); } +// Map math builtins for long-double to f128 version. +static unsigned mutateLongDoubleBuiltin(unsigned BuiltinID) { + switch (BuiltinID) { +#define MUTATE_LDBL(func) \ + case Builtin::BI__builtin_##func##l: \ +return Builtin::BI__builtin_##func##f128; + MUTATE_LDBL(sqrt) + MUTATE_LDBL(cbrt) + MUTATE_LDBL(fabs) + MUTATE_LDBL(log) + MUTATE_LDBL(log2) + MUTATE_LDBL(log10) + MUTATE_LDBL(log1p) + MUTATE_LDBL(logb) + MUTATE_LDBL(exp) + MUTATE_LDBL(exp2) + MUTATE_LDBL(expm1) + MUTATE_LDBL(fdim) + MUTATE_LDBL(hypot) + MUTATE_LDBL(ilogb) + MUTATE_LDBL(pow) + MUTATE_LDBL(fmin) + MUTATE_LDBL(fmax) + MUTATE_LDBL(ceil) + MUTATE_LDBL(trunc) + MUTATE_LDBL(rint) + MUTATE_LDBL(nearbyint) + MUTATE_LDBL(round) + MUTATE_LDBL(floor) + MUTATE_LDBL(lround) + MUTATE_LDBL(llround) + MUTATE_LDBL(lrint) + MUTATE_LDBL(llrint) + MUTATE_LDBL(fmod) + MUTATE_LDBL(modf) + MUTATE_LDBL(nan) + MUTATE_LDBL(nans) + MUTATE_LDBL(inf) + MUTATE_LDBL(fma) + MUTATE_LDBL(sin) + MUTATE_LDBL(cos) + MUTATE_LDBL(tan) + MUTATE_LDBL(sinh) + MUTATE_LDBL(cosh) + MUTATE_LDBL(tanh) + MUTATE_LDBL(asin) + MUTATE_LDBL(acos) + MUTATE_LDBL(atan) + MUTATE_LDBL(asinh) + MUTATE_LDBL(acosh) + MUTATE_LDBL(atanh) + MUTATE_LDBL(atan2) + MUTATE_LDBL(erf) + MUTATE_LDBL(erfc) + MUTATE_LDBL(ldexp) + MUTATE_LDBL(frexp) + MUTATE_LDBL(huge_val) + MUTATE_LDBL(copysign) + MUTATE_LDBL(nextafter) + MUTATE_LDBL(nexttoward) + MUTATE_LDBL(remainder) + MUTATE_LDBL(remquo) + MUTATE_LDBL(scalbln) + MUTATE_LDBL(scalbn) + MUTATE_LDBL(tgamma) + MUTATE_LDBL(lgamma) +#undef MUTATE_LDBL + default: +return BuiltinID; + } +} + RValue CodeGenFunction::EmitBuiltinExpr(const GlobalDecl GD, unsigned BuiltinID, const CallExpr *E, ReturnValueSlot ReturnValue) { @@ -2123,6 +2195,14 @@ RValue CodeGenFunction::EmitBuiltinExpr(const GlobalDecl GD, unsigned BuiltinID, Result.Val.getFloat())); } + // If current long-double semantics is IEEE 128-bit, replace math builtins + // of long-double with f128 equivalent. + // TODO: This mutation should also be applied to other targets other than PPC, + // after backend supports IEEE 128-bit style libcalls. + if (getTarget().getTriple().isPPC64() && + ().getLongDoubleFormat() == ::APFloat::IEEEquad()) +BuiltinID = mutateLongDoubleBuiltin(BuiltinID); + // If the builtin has been declared explicitly with an assembler label, // disable the specialized emitting below. Ideally we should communicate the // rename in IR, or at least avoid generating the intrinsic calls that are diff --git a/clang/test/CodeGen/math-builtins-long.c b/clang/test/CodeGen/math-builtins-long.c index bf7ebd31..f5cee75acad6 100644 --- a/clang/test/CodeGen/math-builtins-long.c +++ b/clang/test/CodeGen/math-builtins-long.c @@ -13,13 +13,13 @@ void foo(long double f, long double *l, int *i, const char *c) { // F80: call x86_fp80 @fmodl(x86_fp80 %{{.+}}, x86_fp80 %{{.+}}) // PPC: call ppc_fp128 @fmodl(ppc_fp128 %{{.+}}, ppc_fp128 %{{.+}}) // X86F128: call fp128 @fmodl(fp128 %{{.+}}, fp128 %{{.+}}) - // PPCF128: call fp128 @fmodl(fp128 %{{.+}}, fp128 %{{.+}}) + // PPCF128: call fp128 @fmodf128(fp128 %{{.+}}, fp128 %{{.+}}) __builtin_fmodl(f,f); // F80: call x86_fp80 @atan2l(x86_fp80 %{{.+}}, x86_fp80 %{{.+}}) // PPC: call ppc_fp128 @atan2l(ppc_fp128 %{{.+}}, ppc_fp128 %{{.+}}) // X86F128: call fp128 @atan2l(fp128 %{{.+}},
[llvm-branch-commits] [llvm] 6175fcf - [NFC] Update some PPC tests marked as auto-generated
Author: Qiu Chaofan Date: 2021-01-08T17:59:13+08:00 New Revision: 6175fcf01f17e0bd1155aaaba977b9baa88ee61d URL: https://github.com/llvm/llvm-project/commit/6175fcf01f17e0bd1155aaaba977b9baa88ee61d DIFF: https://github.com/llvm/llvm-project/commit/6175fcf01f17e0bd1155aaaba977b9baa88ee61d.diff LOG: [NFC] Update some PPC tests marked as auto-generated Update CodeGen regression tests with marker at first line telling it's auto-generated by the script, under PowerPC directory. For some reason, these tests are generated but manually written, which makes things unclear when someone's change affecting them. However, some tests only show simple change after re-generated, like extra blank lines, disappearing '.localentry', etc. Besides, some tests are generated but added checks for debug output. This commit doesn't try updating them. Added: Modified: llvm/test/CodeGen/PowerPC/lsr-ctrloop.ll llvm/test/CodeGen/PowerPC/maddld.ll llvm/test/CodeGen/PowerPC/no-ctr-loop-if-exit-in-nested-loop.ll llvm/test/CodeGen/PowerPC/noPermuteFormasking.ll llvm/test/CodeGen/PowerPC/ppc-32bit-shift.ll llvm/test/CodeGen/PowerPC/pr33547.ll llvm/test/CodeGen/PowerPC/pr35688.ll llvm/test/CodeGen/PowerPC/scalar-rounding-ops.ll llvm/test/CodeGen/PowerPC/tls-pie-xform.ll llvm/test/CodeGen/PowerPC/vec_constants.ll llvm/test/CodeGen/PowerPC/vec_int_ext.ll Removed: diff --git a/llvm/test/CodeGen/PowerPC/lsr-ctrloop.ll b/llvm/test/CodeGen/PowerPC/lsr-ctrloop.ll index 72e17f820adb..8d96a784f2bf 100644 --- a/llvm/test/CodeGen/PowerPC/lsr-ctrloop.ll +++ b/llvm/test/CodeGen/PowerPC/lsr-ctrloop.ll @@ -6,15 +6,23 @@ ; for (i = 0; i < 8000; i++) ; data[i] = d; ; } -; +; ; This loop will be unrolled by 96 and vectorized on power9. ; icmp for loop iteration index and loop trip count(384) has LSRUse for 'reg({0,+,384})'. -; Make sure above icmp does not impact LSR choose best formulae sets based on 'reg({(192 + %0),+,384})' +; Make sure above icmp does not impact LSR choose best formulae sets based on 'reg({(192 + %0),+,384})' define void @foo(float* nocapture %data, float %d) { ; CHECK-LABEL: foo: -; CHECK: .LBB0_1: # %vector.body -; CHECK: stxv 0, -192(4) +; CHECK: # %bb.0: # %entry +; CHECK-NEXT:li 5, 83 +; CHECK-NEXT:addi 4, 3, 192 +; CHECK-NEXT:xscvdpspn 0, 1 +; CHECK-NEXT:mtctr 5 +; CHECK-NEXT:xxspltw 0, 0, 0 +; CHECK-NEXT:.p2align 4 +; CHECK-NEXT: .LBB0_1: # %vector.body +; CHECK-NEXT:# +; CHECK-NEXT:stxv 0, -192(4) ; CHECK-NEXT:stxv 0, -176(4) ; CHECK-NEXT:stxv 0, -160(4) ; CHECK-NEXT:stxv 0, -144(4) @@ -40,6 +48,40 @@ define void @foo(float* nocapture %data, float %d) { ; CHECK-NEXT:stxv 0, 176(4) ; CHECK-NEXT:addi 4, 4, 384 ; CHECK-NEXT:bdnz .LBB0_1 +; CHECK-NEXT: # %bb.2: # %for.body +; CHECK-NEXT:stfs 1, 31872(3) +; CHECK-NEXT:stfs 1, 31876(3) +; CHECK-NEXT:stfs 1, 31880(3) +; CHECK-NEXT:stfs 1, 31884(3) +; CHECK-NEXT:stfs 1, 31888(3) +; CHECK-NEXT:stfs 1, 31892(3) +; CHECK-NEXT:stfs 1, 31896(3) +; CHECK-NEXT:stfs 1, 31900(3) +; CHECK-NEXT:stfs 1, 31904(3) +; CHECK-NEXT:stfs 1, 31908(3) +; CHECK-NEXT:stfs 1, 31912(3) +; CHECK-NEXT:stfs 1, 31916(3) +; CHECK-NEXT:stfs 1, 31920(3) +; CHECK-NEXT:stfs 1, 31924(3) +; CHECK-NEXT:stfs 1, 31928(3) +; CHECK-NEXT:stfs 1, 31932(3) +; CHECK-NEXT:stfs 1, 31936(3) +; CHECK-NEXT:stfs 1, 31940(3) +; CHECK-NEXT:stfs 1, 31944(3) +; CHECK-NEXT:stfs 1, 31948(3) +; CHECK-NEXT:stfs 1, 31952(3) +; CHECK-NEXT:stfs 1, 31956(3) +; CHECK-NEXT:stfs 1, 31960(3) +; CHECK-NEXT:stfs 1, 31964(3) +; CHECK-NEXT:stfs 1, 31968(3) +; CHECK-NEXT:stfs 1, 31972(3) +; CHECK-NEXT:stfs 1, 31976(3) +; CHECK-NEXT:stfs 1, 31980(3) +; CHECK-NEXT:stfs 1, 31984(3) +; CHECK-NEXT:stfs 1, 31988(3) +; CHECK-NEXT:stfs 1, 31992(3) +; CHECK-NEXT:stfs 1, 31996(3) +; CHECK-NEXT:blr entry: %broadcast.splatinsert16 = insertelement <4 x float> undef, float %d, i32 0 diff --git a/llvm/test/CodeGen/PowerPC/maddld.ll b/llvm/test/CodeGen/PowerPC/maddld.ll index 3b60a8f88b0b..03ee27a76c94 100644 --- a/llvm/test/CodeGen/PowerPC/maddld.ll +++ b/llvm/test/CodeGen/PowerPC/maddld.ll @@ -1,14 +1,18 @@ ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py -; RUN: llc -verify-machineinstrs -mcpu=pwr9 -mtriple=powerpc64le-unknown-linux-gnu < %s | FileCheck %s --check-prefixes=CHECK,CHECK-P9 -; RUN: llc -verify-machineinstrs -mcpu=pwr8 -mtriple=powerpc64-unknown-linux-gnu < %s | FileCheck %s --check-prefixes=CHECK,CHECK-P8 +; RUN: llc -verify-machineinstrs -mcpu=pwr9 -mtriple=powerpc64le-unknown-linux-gnu < %s | FileCheck %s --check-prefix=CHECK-P9 +; RUN: llc -verify-machineinstrs -mcpu=pwr8 -mtriple=powerpc64-unknown-linux-gnu < %s | FileCheck %s
[llvm-branch-commits] [llvm] b6c8feb - [NFC] [PowerPC] Remove dead code in BUILD_VECTOR peephole
Author: Qiu Chaofan Date: 2021-01-05T11:35:00+08:00 New Revision: b6c8feb29fce39121884f7e08ec6eb0f58da3fb7 URL: https://github.com/llvm/llvm-project/commit/b6c8feb29fce39121884f7e08ec6eb0f58da3fb7 DIFF: https://github.com/llvm/llvm-project/commit/b6c8feb29fce39121884f7e08ec6eb0f58da3fb7.diff LOG: [NFC] [PowerPC] Remove dead code in BUILD_VECTOR peephole The piece of code tries to use splat+shift to lower build_vector with repeating bit pattern. And immediate field of vector splat is only 5 bits (-16~15). It iterates over them one by one to find which shifts/rotates to number in build_vector. This patch removes code to try matching constant with algebraic right-shift because that's meaningless - any negative number's algebraic right-shift won't produce result smaller than itself. Besides, code (int)((unsigned)i >> j) means logical shift-right in C. Reviewed By: RKSimon Differential Revision: https://reviews.llvm.org/D93937 Added: Modified: llvm/lib/Target/PowerPC/PPCISelLowering.cpp Removed: diff --git a/llvm/lib/Target/PowerPC/PPCISelLowering.cpp b/llvm/lib/Target/PowerPC/PPCISelLowering.cpp index e951679f92fa..1b1e9e019476 100644 --- a/llvm/lib/Target/PowerPC/PPCISelLowering.cpp +++ b/llvm/lib/Target/PowerPC/PPCISelLowering.cpp @@ -9555,17 +9555,6 @@ SDValue PPCTargetLowering::LowerBUILD_VECTOR(SDValue Op, return DAG.getNode(ISD::BITCAST, dl, Op.getValueType(), Res); } -// vsplti + sra self. -if (SextVal == (int)((unsigned)i >> TypeShiftAmt)) { - SDValue Res = getCanonicalConstSplat(i, SplatSize, MVT::Other, DAG, dl); - static const unsigned IIDs[] = { // Intrinsic to use for each size. -Intrinsic::ppc_altivec_vsrab, Intrinsic::ppc_altivec_vsrah, 0, -Intrinsic::ppc_altivec_vsraw - }; - Res = BuildIntrinsicOp(IIDs[SplatSize-1], Res, Res, DAG, dl); - return DAG.getNode(ISD::BITCAST, dl, Op.getValueType(), Res); -} - // vsplti + rol self. if (SextVal == (int)(((unsigned)i << TypeShiftAmt) | ((unsigned)i >> (SplatBitSize-TypeShiftAmt { ___ llvm-branch-commits mailing list llvm-branch-commits@lists.llvm.org https://lists.llvm.org/cgi-bin/mailman/listinfo/llvm-branch-commits
[llvm-branch-commits] [llvm] 48340fb - [NFC] [PowerPC] Update vec_constants test to reflect more patterns
Author: Qiu Chaofan Date: 2021-01-05T11:29:08+08:00 New Revision: 48340fbe6a1a126298c4fe16dcd186d94e485203 URL: https://github.com/llvm/llvm-project/commit/48340fbe6a1a126298c4fe16dcd186d94e485203 DIFF: https://github.com/llvm/llvm-project/commit/48340fbe6a1a126298c4fe16dcd186d94e485203.diff LOG: [NFC] [PowerPC] Update vec_constants test to reflect more patterns This patch uses update_llc_check script to update vec_constants.ll, and add two cases to cover 'vsplti+vsldoi' with 16-bit and 24-bit offset. Added: Modified: llvm/test/CodeGen/PowerPC/vec_constants.ll Removed: diff --git a/llvm/test/CodeGen/PowerPC/vec_constants.ll b/llvm/test/CodeGen/PowerPC/vec_constants.ll index d9257c0b41c5..71f448ee66b2 100644 --- a/llvm/test/CodeGen/PowerPC/vec_constants.ll +++ b/llvm/test/CodeGen/PowerPC/vec_constants.ll @@ -1,3 +1,4 @@ +; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py ; RUN: llc -verify-machineinstrs -O0 -mcpu=pwr7 -mtriple=powerpc64-unknown-linux-gnu < %s | FileCheck %s ; RUN: llc -verify-machineinstrs -O0 -mcpu=pwr7 -mtriple=powerpc64le-unknown-linux-gnu < %s | FileCheck %s @@ -20,65 +21,83 @@ define void @test1(<4 x i32>* %P1, <4 x i32>* %P2, <4 x float>* %P3) nounwind { } define <4 x i32> @test_30() nounwind { - ret <4 x i32> < i32 30, i32 30, i32 30, i32 30 > - ; CHECK-LABEL: test_30: -; CHECK: vspltisw -; CHECK-NEXT: vadduwm -; CHECK-NEXT: blr +; CHECK: # %bb.0: +; CHECK-NEXT:vspltisw 2, 15 +; CHECK-NEXT:vadduwm 2, 2, 2 +; CHECK-NEXT:blr + ret <4 x i32> < i32 30, i32 30, i32 30, i32 30 > } define <4 x i32> @test_29() nounwind { - ret <4 x i32> < i32 29, i32 29, i32 29, i32 29 > - ; CHECK-LABEL: test_29: -; CHECK: vspltisw -; CHECK-NEXT: vspltisw -; CHECK-NEXT: vsubuwm -; CHECK-NEXT: blr +; CHECK: # %bb.0: +; CHECK-NEXT:vspltisw 3, -16 +; CHECK-NEXT:vspltisw 2, 13 +; CHECK-NEXT:vsubuwm 2, 2, 3 +; CHECK-NEXT:blr + ret <4 x i32> < i32 29, i32 29, i32 29, i32 29 > } define <8 x i16> @test_n30() nounwind { - ret <8 x i16> < i16 -30, i16 -30, i16 -30, i16 -30, i16 -30, i16 -30, i16 -30, i16 -30 > - ; CHECK-LABEL: test_n30: -; CHECK: vspltish -; CHECK-NEXT: vadduhm -; CHECK-NEXT: blr +; CHECK: # %bb.0: +; CHECK-NEXT:vspltish 2, -15 +; CHECK-NEXT:vadduhm 2, 2, 2 +; CHECK-NEXT:blr + ret <8 x i16> < i16 -30, i16 -30, i16 -30, i16 -30, i16 -30, i16 -30, i16 -30, i16 -30 > } define <16 x i8> @test_n104() nounwind { - ret <16 x i8> < i8 -104, i8 -104, i8 -104, i8 -104, i8 -104, i8 -104, i8 -104, i8 -104, i8 -104, i8 -104, i8 -104, i8 -104, i8 -104, i8 -104, i8 -104, i8 -104 > - ; CHECK-LABEL: test_n104: -; CHECK: vspltisb -; CHECK-NEXT: vslb -; CHECK-NEXT: blr +; CHECK: # %bb.0: +; CHECK-NEXT:vspltisb 2, -13 +; CHECK-NEXT:vslb 2, 2, 2 +; CHECK-NEXT:blr + ret <16 x i8> < i8 -104, i8 -104, i8 -104, i8 -104, i8 -104, i8 -104, i8 -104, i8 -104, i8 -104, i8 -104, i8 -104, i8 -104, i8 -104, i8 -104, i8 -104, i8 -104 > } define <4 x i32> @test_vsldoi() nounwind { - ret <4 x i32> < i32 512, i32 512, i32 512, i32 512 > - ; CHECK-LABEL: test_vsldoi: -; CHECK: vspltisw -; CHECK-NEXT: vsldoi -; CHECK-NEXT: blr +; CHECK: # %bb.0: +; CHECK-NEXT:vspltisw 2, 2 +; CHECK-NEXT:vsldoi 2, 2, 2, 1 +; CHECK-NEXT:blr + ret <4 x i32> < i32 512, i32 512, i32 512, i32 512 > } define <8 x i16> @test_vsldoi_65023() nounwind { +; CHECK-LABEL: test_vsldoi_65023: +; CHECK: # %bb.0: +; CHECK-NEXT:vspltish 2, -3 +; CHECK-NEXT:vsldoi 2, 2, 2, 1 +; CHECK-NEXT:blr ret <8 x i16> < i16 65023, i16 65023,i16 65023,i16 65023,i16 65023,i16 65023,i16 65023,i16 65023 > +} -; CHECK-LABEL: test_vsldoi_65023: -; CHECK: vspltish -; CHECK-NEXT: vsldoi -; CHECK-NEXT: blr +define <4 x i32> @test_vsldoi_x16() nounwind { +; CHECK-LABEL: test_vsldoi_x16: +; CHECK: # %bb.0: +; CHECK-NEXT:vspltisw 2, -3 +; CHECK-NEXT:vsldoi 2, 2, 2, 2 +; CHECK-NEXT:blr + ret <4 x i32> } -define <4 x i32> @test_rol() nounwind { - ret <4 x i32> < i32 -11534337, i32 -11534337, i32 -11534337, i32 -11534337 > +define <4 x i32> @test_vsldoi_x24() nounwind { +; CHECK-LABEL: test_vsldoi_x24: +; CHECK: # %bb.0: +; CHECK-NEXT:vspltisw 2, -3 +; CHECK-NEXT:vsldoi 2, 2, 2, 3 +; CHECK-NEXT:blr + ret <4 x i32> +} +define <4 x i32> @test_rol() nounwind { ; CHECK-LABEL: test_rol: -; CHECK: vspltisw -; CHECK-NEXT: vrlw -; CHECK-NEXT: blr +; CHECK: # %bb.0: +; CHECK-NEXT:vspltisw 2, -12 +; CHECK-NEXT:vrlw 2, 2, 2 +; CHECK-NEXT:blr + ret <4 x i32> < i32 -11534337, i32 -11534337, i32 -11534337, i32 -11534337 > } ___ llvm-branch-commits mailing list llvm-branch-commits@lists.llvm.org
[llvm-branch-commits] [llvm] ae61485 - [UpdateTestChecks] Fix PowerPC RE to support AIX assembly
Author: Qiu Chaofan Date: 2021-01-05T10:28:00+08:00 New Revision: ae614851631387f86cb7ab1f33a4851a6549c279 URL: https://github.com/llvm/llvm-project/commit/ae614851631387f86cb7ab1f33a4851a6549c279 DIFF: https://github.com/llvm/llvm-project/commit/ae614851631387f86cb7ab1f33a4851a6549c279.diff LOG: [UpdateTestChecks] Fix PowerPC RE to support AIX assembly Current update_llc_test_checks.py cannot generate checks for AIX (powerpc64-ibm-aix-xcoff) properly. Assembly generated is little bit different from Linux. So I use begin function comment here to capture function name. Reviewed By: MaskRay, steven.zhang Differential Revision: https://reviews.llvm.org/D93676 Added: Modified: llvm/test/CodeGen/PowerPC/aix-lr.ll llvm/test/tools/UpdateTestChecks/update_llc_test_checks/Inputs/ppc_generated_funcs.ll llvm/test/tools/UpdateTestChecks/update_llc_test_checks/Inputs/ppc_generated_funcs.ll.generated.expected llvm/test/tools/UpdateTestChecks/update_llc_test_checks/Inputs/ppc_generated_funcs.ll.nogenerated.expected llvm/utils/UpdateTestChecks/asm.py Removed: diff --git a/llvm/test/CodeGen/PowerPC/aix-lr.ll b/llvm/test/CodeGen/PowerPC/aix-lr.ll index ea92daf04f07..38ebf297e591 100644 --- a/llvm/test/CodeGen/PowerPC/aix-lr.ll +++ b/llvm/test/CodeGen/PowerPC/aix-lr.ll @@ -1,3 +1,4 @@ +; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py ; RUN: llc -verify-machineinstrs -mtriple powerpc-ibm-aix-xcoff < %s | \ ; RUN: FileCheck --check-prefix=32BIT %s @@ -5,25 +6,32 @@ ; RUN: FileCheck --check-prefix=64BIT %s define void @bar() { +; 32BIT-LABEL: bar: +; 32BIT: # %bb.0: # %entry +; 32BIT-NEXT:mflr 0 +; 32BIT-NEXT:stw 0, 8(1) +; 32BIT-NEXT:stwu 1, -64(1) +; 32BIT-NEXT:bl .foo[PR] +; 32BIT-NEXT:nop +; 32BIT-NEXT:addi 1, 1, 64 +; 32BIT-NEXT:lwz 0, 8(1) +; 32BIT-NEXT:mtlr 0 +; 32BIT-NEXT:blr +; +; 64BIT-LABEL: bar: +; 64BIT: # %bb.0: # %entry +; 64BIT-NEXT:mflr 0 +; 64BIT-NEXT:std 0, 16(1) +; 64BIT-NEXT:stdu 1, -112(1) +; 64BIT-NEXT:bl .foo[PR] +; 64BIT-NEXT:nop +; 64BIT-NEXT:addi 1, 1, 112 +; 64BIT-NEXT:ld 0, 16(1) +; 64BIT-NEXT:mtlr 0 +; 64BIT-NEXT:blr entry: -; 32BIT: mflr 0 -; 32BIT: stw 0, 8(1) -; 32BIT: stwu 1, -64(1) -; 32BIT: bl .foo -; 32BIT: nop -; 32BIT: addi 1, 1, 64 -; 32BIT: lwz 0, 8(1) -; 32BIT: mtlr 0 -; 64BIT: mflr 0 -; 64BIT: std 0, 16(1) -; 64BIT: stdu 1, -112(1) -; 64BIT: bl .foo -; 64BIT: nop -; 64BIT: addi 1, 1, 112 -; 64BIT: ld 0, 16(1) -; 64BIT: mtlr 0 call void bitcast (void (...)* @foo to void ()*)() ret void diff --git a/llvm/test/tools/UpdateTestChecks/update_llc_test_checks/Inputs/ppc_generated_funcs.ll b/llvm/test/tools/UpdateTestChecks/update_llc_test_checks/Inputs/ppc_generated_funcs.ll index cd545199697f..d31a3361635a 100644 --- a/llvm/test/tools/UpdateTestChecks/update_llc_test_checks/Inputs/ppc_generated_funcs.ll +++ b/llvm/test/tools/UpdateTestChecks/update_llc_test_checks/Inputs/ppc_generated_funcs.ll @@ -1,4 +1,5 @@ ; RUN: llc -enable-machine-outliner -mtriple=ppc32-unknown-linux < %s | FileCheck %s +; RUN: llc -enable-machine-outliner -mtriple=powerpc-ibm-aix-xcoff < %s | FileCheck %s -check-prefix=AIX ; ; NOTE: Machine outliner doesn't run. @x = global i32 0, align 4 diff --git a/llvm/test/tools/UpdateTestChecks/update_llc_test_checks/Inputs/ppc_generated_funcs.ll.generated.expected b/llvm/test/tools/UpdateTestChecks/update_llc_test_checks/Inputs/ppc_generated_funcs.ll.generated.expected index 57298fa6e019..1fca598d23fe 100644 --- a/llvm/test/tools/UpdateTestChecks/update_llc_test_checks/Inputs/ppc_generated_funcs.ll.generated.expected +++ b/llvm/test/tools/UpdateTestChecks/update_llc_test_checks/Inputs/ppc_generated_funcs.ll.generated.expected @@ -1,5 +1,6 @@ ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py UTC_ARGS: --include-generated-funcs ; RUN: llc -enable-machine-outliner -mtriple=ppc32-unknown-linux < %s | FileCheck %s +; RUN: llc -enable-machine-outliner -mtriple=powerpc-ibm-aix-xcoff < %s | FileCheck %s -check-prefix=AIX ; NOTE: Machine outliner doesn't run. @x = global i32 0, align 4 @@ -127,3 +128,62 @@ attributes #0 = { noredzone nounwind ssp uwtable "frame-pointer"="all" } ; CHECK-NEXT:lwz 31, 28(1) ; CHECK-NEXT:addi 1, 1, 32 ; CHECK-NEXT:blr +; +; AIX-LABEL: check_boundaries: +; AIX: # %bb.0: +; AIX-NEXT:stw 31, -4(1) +; AIX-NEXT:stwu 1, -48(1) +; AIX-NEXT:mr 31, 1 +; AIX-NEXT:li 4, 0 +; AIX-NEXT:li 3, 1 +; AIX-NEXT:stw 4, 40(31) +; AIX-NEXT:li 4, 2 +; AIX-NEXT:li 5, 3 +; AIX-NEXT:li 6, 4 +; AIX-NEXT:cmplwi 3, 0 +; AIX-NEXT:stw 3, 36(31) +; AIX-NEXT:stw 4, 32(31) +; AIX-NEXT:stw 5, 28(31) +; AIX-NEXT:stw 6, 24(31) +; AIX-NEXT:beq 0, L..BB0_2 +; AIX-NEXT: # %bb.1: +; AIX-NEXT:
[llvm-branch-commits] [clang] f141d1a - [NFC] Pre-commit test for long-double builtins
Author: Qiu Chaofan Date: 2020-12-16T17:19:54+08:00 New Revision: f141d1afc5068d5c5e2c47e25a5d4b4914116b92 URL: https://github.com/llvm/llvm-project/commit/f141d1afc5068d5c5e2c47e25a5d4b4914116b92 DIFF: https://github.com/llvm/llvm-project/commit/f141d1afc5068d5c5e2c47e25a5d4b4914116b92.diff LOG: [NFC] Pre-commit test for long-double builtins This test reflects clang behavior on long-double type math library builtins under default or explicit 128-bit long-double options. Added: clang/test/CodeGen/math-builtins-long.c Modified: Removed: diff --git a/clang/test/CodeGen/math-builtins-long.c b/clang/test/CodeGen/math-builtins-long.c new file mode 100644 index ..bf7ebd31 --- /dev/null +++ b/clang/test/CodeGen/math-builtins-long.c @@ -0,0 +1,371 @@ +// RUN: %clang_cc1 -triple x86_64-unknown-unknown -w -S -o - -emit-llvm %s \ +// RUN: -fmath-errno | FileCheck %s -check-prefix=F80 +// RUN: %clang_cc1 -triple ppc64le-unknown-unknown -w -S -o - -emit-llvm %s \ +// RUN: -fmath-errno | FileCheck %s -check-prefix=PPC +// RUN: %clang_cc1 -triple x86_64-unknown-unknown -mlong-double-128 -w -S \ +// RUN: -o - -emit-llvm %s -fmath-errno | FileCheck %s -check-prefix=X86F128 +// RUN: %clang_cc1 -triple ppc64le-unknown-unknown -mabi=ieeelongdouble -w -S \ +// RUN: -o - -emit-llvm %s -fmath-errno | FileCheck %s -check-prefix=PPCF128 + +void bar(long double); + +void foo(long double f, long double *l, int *i, const char *c) { + // F80: call x86_fp80 @fmodl(x86_fp80 %{{.+}}, x86_fp80 %{{.+}}) + // PPC: call ppc_fp128 @fmodl(ppc_fp128 %{{.+}}, ppc_fp128 %{{.+}}) + // X86F128: call fp128 @fmodl(fp128 %{{.+}}, fp128 %{{.+}}) + // PPCF128: call fp128 @fmodl(fp128 %{{.+}}, fp128 %{{.+}}) + __builtin_fmodl(f,f); + + // F80: call x86_fp80 @atan2l(x86_fp80 %{{.+}}, x86_fp80 %{{.+}}) + // PPC: call ppc_fp128 @atan2l(ppc_fp128 %{{.+}}, ppc_fp128 %{{.+}}) + // X86F128: call fp128 @atan2l(fp128 %{{.+}}, fp128 %{{.+}}) + // PPCF128: call fp128 @atan2l(fp128 %{{.+}}, fp128 %{{.+}}) + __builtin_atan2l(f,f); + + // F80: call x86_fp80 @llvm.copysign.f80(x86_fp80 %{{.+}}, x86_fp80 %{{.+}}) + // PPC: call ppc_fp128 @llvm.copysign.ppcf128(ppc_fp128 %{{.+}}, ppc_fp128 %{{.+}}) + // X86F128: call fp128 @llvm.copysign.f128(fp128 %{{.+}}, fp128 %{{.+}}) + // PPCF128: call fp128 @llvm.copysign.f128(fp128 %{{.+}}, fp128 %{{.+}}) + __builtin_copysignl(f,f); + + // F80: call x86_fp80 @llvm.fabs.f80(x86_fp80 %{{.+}}) + // PPC: call ppc_fp128 @llvm.fabs.ppcf128(ppc_fp128 %{{.+}}) + // X86F128: call fp128 @llvm.fabs.f128(fp128 %{{.+}}) + // PPCF128: call fp128 @llvm.fabs.f128(fp128 %{{.+}}) + __builtin_fabsl(f); + + // F80: call x86_fp80 @frexpl(x86_fp80 %{{.+}}, i32* %{{.+}}) + // PPC: call ppc_fp128 @frexpl(ppc_fp128 %{{.+}}, i32* %{{.+}}) + // X86F128: call fp128 @frexpl(fp128 %{{.+}}, i32* %{{.+}}) + // PPCF128: call fp128 @frexpl(fp128 %{{.+}}, i32* %{{.+}}) + __builtin_frexpl(f,i); + + // F80: store x86_fp80 0xK7FFF8000, x86_fp80* + // PPC: store ppc_fp128 0xM7FF0, ppc_fp128* + // X86F128: store fp128 0xL7FFF, fp128* + // PPCF128: store fp128 0xL7FFF, fp128* + *l = __builtin_huge_vall(); + + // F80: store x86_fp80 0xK7FFF8000, x86_fp80* + // PPC: store ppc_fp128 0xM7FF0, ppc_fp128* + // X86F128: store fp128 0xL7FFF, fp128* + // PPCF128: store fp128 0xL7FFF, fp128* + *l = __builtin_infl(); + + // F80: call x86_fp80 @ldexpl(x86_fp80 %{{.+}}, i32 %{{.+}}) + // PPC: call ppc_fp128 @ldexpl(ppc_fp128 %{{.+}}, {{(signext)?.+}}) + // X86F128: call fp128 @ldexpl(fp128 %{{.+}}, {{(signext)?.+}}) + // PPCF128: call fp128 @ldexpl(fp128 %{{.+}}, {{(signext)?.+}}) + __builtin_ldexpl(f,f); + + // F80: call x86_fp80 @modfl(x86_fp80 %{{.+}}, x86_fp80* %{{.+}}) + // PPC: call ppc_fp128 @modfl(ppc_fp128 %{{.+}}, ppc_fp128* %{{.+}}) + // X86F128: call fp128 @modfl(fp128 %{{.+}}, fp128* %{{.+}}) + // PPCF128: call fp128 @modfl(fp128 %{{.+}}, fp128* %{{.+}}) + __builtin_modfl(f,l); + + // F80: call x86_fp80 @nanl(i8* %{{.+}}) + // PPC: call ppc_fp128 @nanl(i8* %{{.+}}) + // X86F128: call fp128 @nanl(i8* %{{.+}}) + // PPCF128: call fp128 @nanl(i8* %{{.+}}) + __builtin_nanl(c); + + // F80: call x86_fp80 @nansl(i8* %{{.+}}) + // PPC: call ppc_fp128 @nansl(i8* %{{.+}}) + // X86F128: call fp128 @nansl(i8* %{{.+}}) + // PPCF128: call fp128 @nansl(i8* %{{.+}}) + __builtin_nansl(c); + + // F80: call x86_fp80 @powl(x86_fp80 %{{.+}}, x86_fp80 %{{.+}}) + // PPC: call ppc_fp128 @powl(ppc_fp128 %{{.+}}, ppc_fp128 %{{.+}}) + // X86F128: call fp128 @powl(fp128 %{{.+}}, fp128 %{{.+}}) + // PPCF128: call fp128 @powl(fp128 %{{.+}}, fp128 %{{.+}}) + __builtin_powl(f,f); + + // F80: call x86_fp80 @acosl(x86_fp80
[llvm-branch-commits] [llvm] 38b4442 - [NFC] [Legalizer] Use common method for expanding fp-to-int operands
Author: Qiu Chaofan Date: 2020-12-15T10:45:40+08:00 New Revision: 38b44421986937376989bfa4f4eaf0138c1139e1 URL: https://github.com/llvm/llvm-project/commit/38b44421986937376989bfa4f4eaf0138c1139e1 DIFF: https://github.com/llvm/llvm-project/commit/38b44421986937376989bfa4f4eaf0138c1139e1.diff LOG: [NFC] [Legalizer] Use common method for expanding fp-to-int operands Reviewed By: RKSimon, steven.zhang Differential Revision: https://reviews.llvm.org/D92481 Added: Modified: llvm/lib/CodeGen/SelectionDAG/LegalizeFloatTypes.cpp llvm/lib/CodeGen/SelectionDAG/LegalizeTypes.h Removed: diff --git a/llvm/lib/CodeGen/SelectionDAG/LegalizeFloatTypes.cpp b/llvm/lib/CodeGen/SelectionDAG/LegalizeFloatTypes.cpp index 76536b5622db..5c12682f81f9 100644 --- a/llvm/lib/CodeGen/SelectionDAG/LegalizeFloatTypes.cpp +++ b/llvm/lib/CodeGen/SelectionDAG/LegalizeFloatTypes.cpp @@ -1750,9 +1750,9 @@ bool DAGTypeLegalizer::ExpandFloatOperand(SDNode *N, unsigned OpNo) { case ISD::STRICT_FP_ROUND: case ISD::FP_ROUND: Res = ExpandFloatOp_FP_ROUND(N); break; case ISD::STRICT_FP_TO_SINT: - case ISD::FP_TO_SINT: Res = ExpandFloatOp_FP_TO_SINT(N); break; case ISD::STRICT_FP_TO_UINT: - case ISD::FP_TO_UINT: Res = ExpandFloatOp_FP_TO_UINT(N); break; + case ISD::FP_TO_SINT: + case ISD::FP_TO_UINT: Res = ExpandFloatOp_FP_TO_XINT(N); break; case ISD::LROUND: Res = ExpandFloatOp_LROUND(N); break; case ISD::LLROUND:Res = ExpandFloatOp_LLROUND(N); break; case ISD::LRINT: Res = ExpandFloatOp_LRINT(N); break; @@ -1878,38 +1878,21 @@ SDValue DAGTypeLegalizer::ExpandFloatOp_FP_ROUND(SDNode *N) { return SDValue(); } -SDValue DAGTypeLegalizer::ExpandFloatOp_FP_TO_SINT(SDNode *N) { - EVT RVT = N->getValueType(0); - SDLoc dl(N); - - bool IsStrict = N->isStrictFPOpcode(); - SDValue Op = N->getOperand(IsStrict ? 1 : 0); - SDValue Chain = IsStrict ? N->getOperand(0) : SDValue(); - RTLIB::Libcall LC = RTLIB::getFPTOSINT(Op.getValueType(), RVT); - assert(LC != RTLIB::UNKNOWN_LIBCALL && "Unsupported FP_TO_SINT!"); - TargetLowering::MakeLibCallOptions CallOptions; - std::pair Tmp = TLI.makeLibCall(DAG, LC, RVT, Op, -CallOptions, dl, Chain); - if (!IsStrict) -return Tmp.first; - - ReplaceValueWith(SDValue(N, 1), Tmp.second); - ReplaceValueWith(SDValue(N, 0), Tmp.first); - return SDValue(); -} - -SDValue DAGTypeLegalizer::ExpandFloatOp_FP_TO_UINT(SDNode *N) { +SDValue DAGTypeLegalizer::ExpandFloatOp_FP_TO_XINT(SDNode *N) { EVT RVT = N->getValueType(0); SDLoc dl(N); bool IsStrict = N->isStrictFPOpcode(); + bool Signed = N->getOpcode() == ISD::FP_TO_SINT || +N->getOpcode() == ISD::STRICT_FP_TO_SINT; SDValue Op = N->getOperand(IsStrict ? 1 : 0); SDValue Chain = IsStrict ? N->getOperand(0) : SDValue(); - RTLIB::Libcall LC = RTLIB::getFPTOUINT(Op.getValueType(), RVT); - assert(LC != RTLIB::UNKNOWN_LIBCALL && "Unsupported FP_TO_UINT!"); + RTLIB::Libcall LC = Signed ? RTLIB::getFPTOSINT(Op.getValueType(), RVT) + : RTLIB::getFPTOUINT(Op.getValueType(), RVT); + assert(LC != RTLIB::UNKNOWN_LIBCALL && "Unsupported FP_TO_XINT!"); TargetLowering::MakeLibCallOptions CallOptions; - std::pair Tmp = TLI.makeLibCall(DAG, LC, RVT, Op, -CallOptions, dl, Chain); + std::pair Tmp = + TLI.makeLibCall(DAG, LC, RVT, Op, CallOptions, dl, Chain); if (!IsStrict) return Tmp.first; diff --git a/llvm/lib/CodeGen/SelectionDAG/LegalizeTypes.h b/llvm/lib/CodeGen/SelectionDAG/LegalizeTypes.h index aea2e9ba2bd5..c267016cf37e 100644 --- a/llvm/lib/CodeGen/SelectionDAG/LegalizeTypes.h +++ b/llvm/lib/CodeGen/SelectionDAG/LegalizeTypes.h @@ -629,8 +629,7 @@ class LLVM_LIBRARY_VISIBILITY DAGTypeLegalizer { SDValue ExpandFloatOp_BR_CC(SDNode *N); SDValue ExpandFloatOp_FCOPYSIGN(SDNode *N); SDValue ExpandFloatOp_FP_ROUND(SDNode *N); - SDValue ExpandFloatOp_FP_TO_SINT(SDNode *N); - SDValue ExpandFloatOp_FP_TO_UINT(SDNode *N); + SDValue ExpandFloatOp_FP_TO_XINT(SDNode *N); SDValue ExpandFloatOp_LROUND(SDNode *N); SDValue ExpandFloatOp_LLROUND(SDNode *N); SDValue ExpandFloatOp_LRINT(SDNode *N); ___ llvm-branch-commits mailing list llvm-branch-commits@lists.llvm.org https://lists.llvm.org/cgi-bin/mailman/listinfo/llvm-branch-commits
[llvm-branch-commits] [llvm] 92160b2 - [NFC] [PowerPC] Move i1-to-fp tests and use script
Author: Qiu Chaofan Date: 2020-12-08T15:20:15+08:00 New Revision: 92160b23f5e08c1b9d2190989ebfaca404e6d251 URL: https://github.com/llvm/llvm-project/commit/92160b23f5e08c1b9d2190989ebfaca404e6d251 DIFF: https://github.com/llvm/llvm-project/commit/92160b23f5e08c1b9d2190989ebfaca404e6d251.diff LOG: [NFC] [PowerPC] Move i1-to-fp tests and use script Added: Modified: llvm/test/CodeGen/PowerPC/i1-to-double.ll Removed: llvm/test/CodeGen/PowerPC/i1-to-fp-chain.ll diff --git a/llvm/test/CodeGen/PowerPC/i1-to-double.ll b/llvm/test/CodeGen/PowerPC/i1-to-double.ll index 88d6a03cbd30..70506b351e65 100644 --- a/llvm/test/CodeGen/PowerPC/i1-to-double.ll +++ b/llvm/test/CodeGen/PowerPC/i1-to-double.ll @@ -1,17 +1,78 @@ +; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py ; RUN: llc -verify-machineinstrs -mcpu=ppc32 -mtriple=powerpc-unknown-linux-gnu < %s | FileCheck %s + define double @test(i1 %X) { -%Y = uitofp i1 %X to double -ret double %Y +; CHECK-LABEL: test: +; CHECK: # %bb.0: +; CHECK-NEXT:li 4, .LCPI0_0@l +; CHECK-NEXT:andi. 3, 3, 1 +; CHECK-NEXT:addis 3, 4, .LCPI0_0@ha +; CHECK-NEXT:li 4, .LCPI0_1@l +; CHECK-NEXT:addis 4, 4, .LCPI0_1@ha +; CHECK-NEXT:bc 12, 1, .LBB0_1 +; CHECK-NEXT:b .LBB0_2 +; CHECK-NEXT: .LBB0_1: +; CHECK-NEXT:addi 3, 4, 0 +; CHECK-NEXT: .LBB0_2: +; CHECK-NEXT:lfs 1, 0(3) +; CHECK-NEXT:blr + %Y = uitofp i1 %X to double + ret double %Y +} + +; Verify the cases won't crash because of missing chains + +@foo = dso_local global double 0.00e+00, align 8 + +define double @u1tofp(i1 %i, double %d) #0 { +; CHECK-LABEL: u1tofp: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT:li 4, .LCPI1_0@l +; CHECK-NEXT:andi. 3, 3, 1 +; CHECK-NEXT:addis 3, 4, .LCPI1_0@ha +; CHECK-NEXT:li 4, .LCPI1_1@l +; CHECK-NEXT:addis 4, 4, .LCPI1_1@ha +; CHECK-NEXT:bc 12, 1, .LBB1_1 +; CHECK-NEXT:b .LBB1_2 +; CHECK-NEXT: .LBB1_1: # %entry +; CHECK-NEXT:addi 3, 4, 0 +; CHECK-NEXT: .LBB1_2: # %entry +; CHECK-NEXT:fmr 0, 1 +; CHECK-NEXT:lfs 1, 0(3) +; CHECK-NEXT:lis 3, foo@ha +; CHECK-NEXT:stfd 0, foo@l(3) +; CHECK-NEXT:blr +entry: + %conv = tail call double @llvm.experimental.constrained.uitofp.f64.i1(i1 %i, metadata !"round.dynamic", metadata !"fpexcept.strict") #0 + store volatile double %d, double* @foo, align 8 + ret double %conv +} + +define double @s1tofp(i1 %i, double %d) #0 { +; CHECK-LABEL: s1tofp: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT:li 4, .LCPI2_0@l +; CHECK-NEXT:andi. 3, 3, 1 +; CHECK-NEXT:addis 3, 4, .LCPI2_0@ha +; CHECK-NEXT:li 4, .LCPI2_1@l +; CHECK-NEXT:addis 4, 4, .LCPI2_1@ha +; CHECK-NEXT:bc 12, 1, .LBB2_1 +; CHECK-NEXT:b .LBB2_2 +; CHECK-NEXT: .LBB2_1: # %entry +; CHECK-NEXT:addi 3, 4, 0 +; CHECK-NEXT: .LBB2_2: # %entry +; CHECK-NEXT:fmr 0, 1 +; CHECK-NEXT:lfs 1, 0(3) +; CHECK-NEXT:lis 3, foo@ha +; CHECK-NEXT:stfd 0, foo@l(3) +; CHECK-NEXT:blr +entry: + %conv = tail call double @llvm.experimental.constrained.sitofp.f64.i1(i1 %i, metadata !"round.dynamic", metadata !"fpexcept.strict") #0 + store volatile double %d, double* @foo, align 8 + ret double %conv } -; CHECK-LABEL: @test -; CHECK-DAG: addis 3, 4, .LCPI -; CHECK-DAG: addis 4, 4, .LCPI -; CHECK-DAG: andi. {{[0-9]+}}, 3, 1 -; CHECK-NEXT: bc 12, 1, [[TRUE:.LBB[0-9]+]] -; CHECK-NEXT: b [[SUCCESSOR:.LBB[0-9]+]] -; CHECK-NEXT: [[TRUE]] -; CHECK-NEXT: addi 3, 4, 0 -; CHECK-NEXT: [[SUCCESSOR]] -; CHECK-NEXT: lfs 1, 0(3) -; CHECK-NEXT: blr +declare double @llvm.experimental.constrained.uitofp.f64.i1(i1, metadata, metadata) +declare double @llvm.experimental.constrained.sitofp.f64.i1(i1, metadata, metadata) + +attributes #0 = { strictfp } diff --git a/llvm/test/CodeGen/PowerPC/i1-to-fp-chain.ll b/llvm/test/CodeGen/PowerPC/i1-to-fp-chain.ll deleted file mode 100644 index 082c23941cf7.. --- a/llvm/test/CodeGen/PowerPC/i1-to-fp-chain.ll +++ /dev/null @@ -1,59 +0,0 @@ -; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py -; RUN: llc -verify-machineinstrs -mtriple=ppc32 < %s | FileCheck %s - -@foo = dso_local global double 0.00e+00, align 8 - -; Verify the cases won't crash because of missing chains - -define double @u1tofp(i1 %i, double %d) #0 { -; CHECK-LABEL: u1tofp: -; CHECK: # %bb.0: # %entry -; CHECK-NEXT:li 4, .LCPI0_0@l -; CHECK-NEXT:andi. 3, 3, 1 -; CHECK-NEXT:addis 3, 4, .LCPI0_0@ha -; CHECK-NEXT:li 4, .LCPI0_1@l -; CHECK-NEXT:addis 4, 4, .LCPI0_1@ha -; CHECK-NEXT:bc 12, 1, .LBB0_1 -; CHECK-NEXT:b .LBB0_2 -; CHECK-NEXT: .LBB0_1: # %entry -; CHECK-NEXT:addi 3, 4, 0 -; CHECK-NEXT: .LBB0_2: # %entry -; CHECK-NEXT:fmr 0, 1 -; CHECK-NEXT:lfs 1, 0(3) -; CHECK-NEXT:lis 3, foo@ha -; CHECK-NEXT:stfd 0, foo@l(3) -; CHECK-NEXT:blr
[llvm-branch-commits] [llvm] 5e85a2b - [PowerPC] Implement intrinsic for DARN instruction
Author: Qiu Chaofan Date: 2020-12-08T14:08:52+08:00 New Revision: 5e85a2ba1645c3edbf26bba096631fbd318ada47 URL: https://github.com/llvm/llvm-project/commit/5e85a2ba1645c3edbf26bba096631fbd318ada47 DIFF: https://github.com/llvm/llvm-project/commit/5e85a2ba1645c3edbf26bba096631fbd318ada47.diff LOG: [PowerPC] Implement intrinsic for DARN instruction Instruction darn was introduced in ISA 3.0. It means 'Deliver A Random Number'. The immediate number L means: - L=0, the number is 32-bit (higher 32-bits are all-zero) - L=1, the number is 'conditioned' (processed by hardware to reduce bias) - L=2, the number is not conditioned, directly from noise source GCC implements them in three separate intrinsics: __builtin_darn, __builtin_darn_32 and __builtin_darn_raw. This patch implements the same intrinsics. And this change also addresses Bugzilla PR39800. Reviewed By: steven.zhang Differential Revision: https://reviews.llvm.org/D92465 Added: llvm/test/CodeGen/PowerPC/builtins-ppc-p9-darn.ll Modified: clang/include/clang/Basic/BuiltinsPPC.def clang/test/CodeGen/builtins-ppc.c llvm/include/llvm/IR/IntrinsicsPowerPC.td llvm/lib/Target/PowerPC/PPCInstr64Bit.td Removed: diff --git a/clang/include/clang/Basic/BuiltinsPPC.def b/clang/include/clang/Basic/BuiltinsPPC.def index 78ce77043b6f..8975d126b897 100644 --- a/clang/include/clang/Basic/BuiltinsPPC.def +++ b/clang/include/clang/Basic/BuiltinsPPC.def @@ -638,6 +638,11 @@ BUILTIN(__builtin_cfuged, "ULLiULLiULLi", "") BUILTIN(__builtin_cntlzdm, "ULLiULLiULLi", "") BUILTIN(__builtin_cnttzdm, "ULLiULLiULLi", "") +// Generate random number +BUILTIN(__builtin_darn, "LLi", "") +BUILTIN(__builtin_darn_raw, "LLi", "") +BUILTIN(__builtin_darn_32, "i", "") + // Vector int128 (un)pack BUILTIN(__builtin_unpack_vector_int128, "ULLiV1LLLii", "") BUILTIN(__builtin_pack_vector_int128, "V1LLLiULLiULLi", "") diff --git a/clang/test/CodeGen/builtins-ppc.c b/clang/test/CodeGen/builtins-ppc.c index e30cdff3c8ce..0abd540013e2 100644 --- a/clang/test/CodeGen/builtins-ppc.c +++ b/clang/test/CodeGen/builtins-ppc.c @@ -36,3 +36,16 @@ void test_builtin_ppc_flm() { // CHECK: call double @llvm.ppc.setflm(double %1) res = __builtin_setflm(res); } + +void test_builtin_ppc_darn() { + volatile long res; + volatile int x; + // CHECK: call i64 @llvm.ppc.darn() + res = __builtin_darn(); + + // CHECK: call i64 @llvm.ppc.darnraw() + res = __builtin_darn_raw(); + + // CHECK: call i32 @llvm.ppc.darn32() + x = __builtin_darn_32(); +} diff --git a/llvm/include/llvm/IR/IntrinsicsPowerPC.td b/llvm/include/llvm/IR/IntrinsicsPowerPC.td index 8db5c15fe761..d559c000fd93 100644 --- a/llvm/include/llvm/IR/IntrinsicsPowerPC.td +++ b/llvm/include/llvm/IR/IntrinsicsPowerPC.td @@ -70,6 +70,14 @@ let TargetPrefix = "ppc" in { // All intrinsics start with "llvm.ppc.". Intrinsic<[llvm_i64_ty], [llvm_i64_ty, llvm_i64_ty], [IntrNoMem]>; + // Generate a random number + def int_ppc_darn : GCCBuiltin<"__builtin_darn">, + Intrinsic<[llvm_i64_ty], [], [IntrNoMem]>; + def int_ppc_darnraw : GCCBuiltin<"__builtin_darn_raw">, + Intrinsic<[llvm_i64_ty], [], [IntrNoMem]>; + def int_ppc_darn32 : GCCBuiltin<"__builtin_darn_32">, + Intrinsic<[llvm_i32_ty], [], [IntrNoMem]>; + // Bit permute doubleword def int_ppc_bpermd : GCCBuiltin<"__builtin_bpermd">, Intrinsic<[llvm_i64_ty], [llvm_i64_ty, llvm_i64_ty], diff --git a/llvm/lib/Target/PowerPC/PPCInstr64Bit.td b/llvm/lib/Target/PowerPC/PPCInstr64Bit.td index 82b868ec2b10..9265c513c031 100644 --- a/llvm/lib/Target/PowerPC/PPCInstr64Bit.td +++ b/llvm/lib/Target/PowerPC/PPCInstr64Bit.td @@ -1606,6 +1606,11 @@ def : Pat<(atomic_store_64 iaddrX4:$ptr, i64:$val), (STD g8rc:$val, memrix:$ptr def : Pat<(atomic_store_64 xaddrX4:$ptr, i64:$val), (STDX g8rc:$val, memrr:$ptr)>; let Predicates = [IsISA3_0] in { +// DARN (deliver random number) +// L=0 for 32-bit, L=1 for conditioned random, L=2 for raw random +def : Pat<(int_ppc_darn32), (EXTRACT_SUBREG (DARN 0), sub_32)>; +def : Pat<(int_ppc_darn), (DARN 1)>; +def : Pat<(int_ppc_darnraw), (DARN 2)>; class X_L1_RA5_RB5 opcode, bits<10> xo, string opc, RegisterOperand ty, InstrItinClass itin, list pattern> diff --git a/llvm/test/CodeGen/PowerPC/builtins-ppc-p9-darn.ll b/llvm/test/CodeGen/PowerPC/builtins-ppc-p9-darn.ll new file mode 100644 index ..d53b442fef71 --- /dev/null +++ b/llvm/test/CodeGen/PowerPC/builtins-ppc-p9-darn.ll @@ -0,0 +1,37 @@ +; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py +; RUN: llc < %s -verify-machineinstrs -mtriple powerpc64le -mcpu=pwr9 | FileCheck %s + +define i64 @raw() { +; CHECK-LABEL: raw: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT:darn
[llvm-branch-commits] [clang] 6bf29db - [PowerPC] [Clang] Enable float128 feature on P9 by default
Author: Qiu Chaofan Date: 2020-12-07T18:31:00+08:00 New Revision: 6bf29dbb1541aff717e52b5c5fb12b84f5b38f21 URL: https://github.com/llvm/llvm-project/commit/6bf29dbb1541aff717e52b5c5fb12b84f5b38f21 DIFF: https://github.com/llvm/llvm-project/commit/6bf29dbb1541aff717e52b5c5fb12b84f5b38f21.diff LOG: [PowerPC] [Clang] Enable float128 feature on P9 by default As Power9 introduced hardware support for IEEE quad-precision FP type, the feature should be enabled by default on Power9 or newer targets. Reviewed By: steven.zhang Differential Revision: https://reviews.llvm.org/D90213 Added: Modified: clang/lib/Basic/Targets/PPC.cpp clang/test/Driver/ppc-f128-support-check.c Removed: diff --git a/clang/lib/Basic/Targets/PPC.cpp b/clang/lib/Basic/Targets/PPC.cpp index 7f6f3d7e0c9fa..a6997324acf97 100644 --- a/clang/lib/Basic/Targets/PPC.cpp +++ b/clang/lib/Basic/Targets/PPC.cpp @@ -317,6 +317,9 @@ bool PPCTargetInfo::initFeatureMap( .Case("pwr9", true) .Case("pwr8", true) .Default(false); + Features["float128"] = llvm::StringSwitch(CPU) +.Case("pwr9", true) +.Default(false); Features["spe"] = llvm::StringSwitch(CPU) .Case("8548", true) diff --git a/clang/test/Driver/ppc-f128-support-check.c b/clang/test/Driver/ppc-f128-support-check.c index 24748905612ff..2e4b7a7ae09ce 100644 --- a/clang/test/Driver/ppc-f128-support-check.c +++ b/clang/test/Driver/ppc-f128-support-check.c @@ -1,7 +1,7 @@ // RUN: not %clang -target powerpc64le-unknown-linux-gnu -fsyntax-only \ -// RUN: -mcpu=pwr9 -mfloat128 %s 2>&1 | FileCheck %s --check-prefix=HASF128 +// RUN: -mcpu=pwr9 %s 2>&1 | FileCheck %s --check-prefix=HASF128 // RUN: not %clang -target powerpc64le-unknown-linux-gnu -fsyntax-only \ -// RUN: -mcpu=power9 -mfloat128 %s 2>&1 | FileCheck %s --check-prefix=HASF128 +// RUN: -mcpu=power9 %s 2>&1 | FileCheck %s --check-prefix=HASF128 // RUN: not %clang -target powerpc64le-unknown-linux-gnu -fsyntax-only \ // RUN: -mcpu=pwr8 -mfloat128 %s 2>&1 | FileCheck %s --check-prefix=NOF128 ___ llvm-branch-commits mailing list llvm-branch-commits@lists.llvm.org https://lists.llvm.org/cgi-bin/mailman/listinfo/llvm-branch-commits
[llvm-branch-commits] [llvm] efdd463 - [PowerPC] Fix chain for i1-to-fp operation
Author: Qiu Chaofan Date: 2020-12-07T10:38:56+08:00 New Revision: efdd4630507edfe13851475de5d16cc248aacd15 URL: https://github.com/llvm/llvm-project/commit/efdd4630507edfe13851475de5d16cc248aacd15 DIFF: https://github.com/llvm/llvm-project/commit/efdd4630507edfe13851475de5d16cc248aacd15.diff LOG: [PowerPC] Fix chain for i1-to-fp operation A simple SELECT is used for converting i1 to floating types on ppc32, but in constrained cases, the chain is not handled properly. This patch will fix that. Reviewed By: steven.zhang Differential Revision: https://reviews.llvm.org/D92365 Added: llvm/test/CodeGen/PowerPC/i1-to-fp-chain.ll Modified: llvm/lib/Target/PowerPC/PPCISelLowering.cpp Removed: diff --git a/llvm/lib/Target/PowerPC/PPCISelLowering.cpp b/llvm/lib/Target/PowerPC/PPCISelLowering.cpp index c5dbacde6fa5..90968a3ef8a7 100644 --- a/llvm/lib/Target/PowerPC/PPCISelLowering.cpp +++ b/llvm/lib/Target/PowerPC/PPCISelLowering.cpp @@ -8718,10 +8718,15 @@ SDValue PPCTargetLowering::LowerINT_TO_FP(SDValue Op, if (Op.getValueType() != MVT::f32 && Op.getValueType() != MVT::f64) return SDValue(); - if (Src.getValueType() == MVT::i1) -return DAG.getNode(ISD::SELECT, dl, Op.getValueType(), Src, - DAG.getConstantFP(1.0, dl, Op.getValueType()), - DAG.getConstantFP(0.0, dl, Op.getValueType())); + if (Src.getValueType() == MVT::i1) { +SDValue Sel = DAG.getNode(ISD::SELECT, dl, Op.getValueType(), Src, + DAG.getConstantFP(1.0, dl, Op.getValueType()), + DAG.getConstantFP(0.0, dl, Op.getValueType())); +if (IsStrict) + return DAG.getMergeValues({Sel, Chain}, dl); +else + return Sel; + } // If we have direct moves, we can do all the conversion, skip the store/load // however, without FPCVT we can't do most conversions. diff --git a/llvm/test/CodeGen/PowerPC/i1-to-fp-chain.ll b/llvm/test/CodeGen/PowerPC/i1-to-fp-chain.ll new file mode 100644 index ..082c23941cf7 --- /dev/null +++ b/llvm/test/CodeGen/PowerPC/i1-to-fp-chain.ll @@ -0,0 +1,59 @@ +; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py +; RUN: llc -verify-machineinstrs -mtriple=ppc32 < %s | FileCheck %s + +@foo = dso_local global double 0.00e+00, align 8 + +; Verify the cases won't crash because of missing chains + +define double @u1tofp(i1 %i, double %d) #0 { +; CHECK-LABEL: u1tofp: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT:li 4, .LCPI0_0@l +; CHECK-NEXT:andi. 3, 3, 1 +; CHECK-NEXT:addis 3, 4, .LCPI0_0@ha +; CHECK-NEXT:li 4, .LCPI0_1@l +; CHECK-NEXT:addis 4, 4, .LCPI0_1@ha +; CHECK-NEXT:bc 12, 1, .LBB0_1 +; CHECK-NEXT:b .LBB0_2 +; CHECK-NEXT: .LBB0_1: # %entry +; CHECK-NEXT:addi 3, 4, 0 +; CHECK-NEXT: .LBB0_2: # %entry +; CHECK-NEXT:fmr 0, 1 +; CHECK-NEXT:lfs 1, 0(3) +; CHECK-NEXT:lis 3, foo@ha +; CHECK-NEXT:stfd 0, foo@l(3) +; CHECK-NEXT:blr +entry: + %conv = tail call double @llvm.experimental.constrained.uitofp.f64.i1(i1 %i, metadata !"round.dynamic", metadata !"fpexcept.strict") #0 + store volatile double %d, double* @foo, align 8 + ret double %conv +} + +define double @s1tofp(i1 %i, double %d) #0 { +; CHECK-LABEL: s1tofp: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT:li 4, .LCPI1_0@l +; CHECK-NEXT:andi. 3, 3, 1 +; CHECK-NEXT:addis 3, 4, .LCPI1_0@ha +; CHECK-NEXT:li 4, .LCPI1_1@l +; CHECK-NEXT:addis 4, 4, .LCPI1_1@ha +; CHECK-NEXT:bc 12, 1, .LBB1_1 +; CHECK-NEXT:b .LBB1_2 +; CHECK-NEXT: .LBB1_1: # %entry +; CHECK-NEXT:addi 3, 4, 0 +; CHECK-NEXT: .LBB1_2: # %entry +; CHECK-NEXT:fmr 0, 1 +; CHECK-NEXT:lfs 1, 0(3) +; CHECK-NEXT:lis 3, foo@ha +; CHECK-NEXT:stfd 0, foo@l(3) +; CHECK-NEXT:blr +entry: + %conv = tail call double @llvm.experimental.constrained.sitofp.f64.i1(i1 %i, metadata !"round.dynamic", metadata !"fpexcept.strict") #0 + store volatile double %d, double* @foo, align 8 + ret double %conv +} + +declare double @llvm.experimental.constrained.uitofp.f64.i1(i1, metadata, metadata) +declare double @llvm.experimental.constrained.sitofp.f64.i1(i1, metadata, metadata) + +attributes #0 = { strictfp } ___ llvm-branch-commits mailing list llvm-branch-commits@lists.llvm.org https://lists.llvm.org/cgi-bin/mailman/listinfo/llvm-branch-commits
[llvm-branch-commits] [clang] 9378a36 - [NFC] [Clang] Fix ppc64le vaarg OpenMP test in CodeGen
Author: Qiu Chaofan Date: 2020-12-04T11:29:55+08:00 New Revision: 9378a366b2b256ebd1b2763141f683ab9b48c303 URL: https://github.com/llvm/llvm-project/commit/9378a366b2b256ebd1b2763141f683ab9b48c303 DIFF: https://github.com/llvm/llvm-project/commit/9378a366b2b256ebd1b2763141f683ab9b48c303.diff LOG: [NFC] [Clang] Fix ppc64le vaarg OpenMP test in CodeGen Reviewed By: MaskRay Differential Revision: https://reviews.llvm.org/D92544 Added: Modified: clang/test/CodeGen/ppc64le-varargs-f128.c Removed: clang/test/Driver/ppc-openmp-f128.c diff --git a/clang/test/CodeGen/ppc64le-varargs-f128.c b/clang/test/CodeGen/ppc64le-varargs-f128.c index 0b085859c5ac..7868fe322ce8 100644 --- a/clang/test/CodeGen/ppc64le-varargs-f128.c +++ b/clang/test/CodeGen/ppc64le-varargs-f128.c @@ -5,11 +5,51 @@ // RUN: -target-cpu pwr9 -target-feature +float128 \ // RUN: -o - %s | FileCheck %s -check-prefix=IBM +// RUN: %clang_cc1 -triple ppc64le -emit-llvm-bc %s -target-cpu pwr9 \ +// RUN: -target-feature +float128 -mabi=ieeelongdouble -fopenmp \ +// RUN: -fopenmp-targets=ppc64le -o %t-ppc-host.bc +// RUN: %clang_cc1 -triple ppc64le -aux-triple ppc64le %s -target-cpu pwr9 \ +// RUN: -target-feature +float128 -fopenmp -fopenmp-is-device -emit-llvm \ +// RUN: -fopenmp-host-ir-file-path %t-ppc-host.bc -o - | FileCheck %s \ +// RUN: -check-prefix=OMP-TARGET +// RUN: %clang_cc1 %t-ppc-host.bc -emit-llvm -o - | FileCheck %s \ +// RUN: -check-prefix=OMP-HOST + #include void foo_ld(long double); void foo_fq(__float128); +// Verify cases when OpenMP target's and host's long-double semantics diff er. + +// OMP-TARGET-LABEL: define internal void @.omp_outlined.( +// OMP-TARGET: %[[CUR:[0-9a-zA-Z_.]+]] = load i8*, i8** +// OMP-TARGET: %[[V2:[0-9a-zA-Z_.]+]] = bitcast i8* %[[CUR]] to ppc_fp128* +// OMP-TARGET: %[[V3:[0-9a-zA-Z_.]+]] = load ppc_fp128, ppc_fp128* %[[V2]], align 8 +// OMP-TARGET: call void @foo_ld(ppc_fp128 %[[V3]]) + +// OMP-HOST-LABEL: define void @omp( +// OMP-HOST: %[[AP1:[0-9a-zA-Z_.]+]] = bitcast i8** %[[AP:[0-9a-zA-Z_.]+]] to i8* +// OMP-HOST: call void @llvm.va_start(i8* %[[AP1]]) +// OMP-HOST: %[[CUR:[0-9a-zA-Z_.]+]] = load i8*, i8** %[[AP]], align 8 +// OMP-HOST: %[[V0:[0-9a-zA-Z_.]+]] = ptrtoint i8* %[[CUR]] to i64 +// OMP-HOST: %[[V1:[0-9a-zA-Z_.]+]] = add i64 %[[V0]], 15 +// OMP-HOST: %[[V2:[0-9a-zA-Z_.]+]] = and i64 %[[V1]], -16 +// OMP-HOST: %[[ALIGN:[0-9a-zA-Z_.]+]] = inttoptr i64 %[[V2]] to i8* +// OMP-HOST: %[[V3:[0-9a-zA-Z_.]+]] = bitcast i8* %[[ALIGN]] to fp128* +// OMP-HOST: %[[V4:[0-9a-zA-Z_.]+]] = load fp128, fp128* %[[V3]], align 16 +// OMP-HOST: call void @foo_ld(fp128 %[[V4]]) +void omp(int n, ...) { + va_list ap; + va_start(ap, n); + foo_ld(va_arg(ap, long double)); + #pragma omp target parallel + for (int i = 1; i < n; ++i) { +foo_ld(va_arg(ap, long double)); + } + va_end(ap); +} + // IEEE-LABEL: define void @f128 // IEEE: %[[AP1:[0-9a-zA-Z_.]+]] = bitcast i8** %[[AP:[0-9a-zA-Z_.]+]] to i8* // IEEE: call void @llvm.va_start(i8* %[[AP1]]) diff --git a/clang/test/Driver/ppc-openmp-f128.c b/clang/test/Driver/ppc-openmp-f128.c deleted file mode 100644 index bff6fe35e526.. --- a/clang/test/Driver/ppc-openmp-f128.c +++ /dev/null @@ -1,39 +0,0 @@ -// RUN: %clang -target powerpc64le-unknown-linux-gnu -S -emit-llvm \ -// RUN: -fopenmp-targets=ppc64le -mfloat128 -mabi=ieeelongdouble -mcpu=pwr9 \ -// RUN: -Xopenmp-target=ppc64le -mcpu=pwr9 -Xopenmp-target=ppc64le \ -// RUN: -mfloat128 -fopenmp=libomp -o - %s | FileCheck %s -check-prefix=OMP - -#include - -void foo_ld(long double); -void foo_fq(__float128); - -// Verify cases when OpenMP target's and host's long-double semantics diff er. - -// OMP-LABEL: define internal void @.omp_outlined. -// OMP: %[[CUR:[0-9a-zA-Z_.]+]] = load i8*, i8** -// OMP: %[[V2:[0-9a-zA-Z_.]+]] = bitcast i8* %[[CUR]] to ppc_fp128* -// OMP: %[[V3:[0-9a-zA-Z_.]+]] = load ppc_fp128, ppc_fp128* %[[V2]], align 8 -// OMP: call void @foo_ld(ppc_fp128 %[[V3]]) - -// OMP-LABEL: define dso_local void @omp -// OMP: %[[AP1:[0-9a-zA-Z_.]+]] = bitcast i8** %[[AP:[0-9a-zA-Z_.]+]] to i8* -// OMP: call void @llvm.va_start(i8* %[[AP1]]) -// OMP: %[[CUR:[0-9a-zA-Z_.]+]] = load i8*, i8** %[[AP]], align 8 -// OMP: %[[V0:[0-9a-zA-Z_.]+]] = ptrtoint i8* %[[CUR]] to i64 -// OMP: %[[V1:[0-9a-zA-Z_.]+]] = add i64 %[[V0]], 15 -// OMP: %[[V2:[0-9a-zA-Z_.]+]] = and i64 %[[V1]], -16 -// OMP: %[[ALIGN:[0-9a-zA-Z_.]+]] = inttoptr i64 %[[V2]] to i8* -// OMP: %[[V3:[0-9a-zA-Z_.]+]] = bitcast i8* %[[ALIGN]] to fp128* -// OMP: %[[V4:[0-9a-zA-Z_.]+]] = load fp128, fp128* %[[V3]], align 16 -// OMP: call void @foo_ld(fp128 %[[V4]]) -void omp(int n, ...) { - va_list ap; - va_start(ap, n); - foo_ld(va_arg(ap, long double)); - #pragma omp target parallel - for (int i = 1; i < n; ++i) { -foo_ld(va_arg(ap, long double)); - } - va_end(ap); -}
[llvm-branch-commits] [clang] 222da77 - [NFC] [Clang] Move ppc64le f128 vaargs OpenMP test
Author: Qiu Chaofan Date: 2020-12-03T10:50:42+08:00 New Revision: 222da77a82d17cbc6b989779e2ba2bb4904bb672 URL: https://github.com/llvm/llvm-project/commit/222da77a82d17cbc6b989779e2ba2bb4904bb672 DIFF: https://github.com/llvm/llvm-project/commit/222da77a82d17cbc6b989779e2ba2bb4904bb672.diff LOG: [NFC] [Clang] Move ppc64le f128 vaargs OpenMP test This case for long-double semantics mismatch on OpenMP references %clang, which should be located in Driver directory. Added: clang/test/Driver/ppc-openmp-f128.c Modified: clang/test/CodeGen/ppc64le-varargs-f128.c Removed: diff --git a/clang/test/CodeGen/ppc64le-varargs-f128.c b/clang/test/CodeGen/ppc64le-varargs-f128.c index 5e9930ec716f..0b085859c5ac 100644 --- a/clang/test/CodeGen/ppc64le-varargs-f128.c +++ b/clang/test/CodeGen/ppc64le-varargs-f128.c @@ -5,46 +5,11 @@ // RUN: -target-cpu pwr9 -target-feature +float128 \ // RUN: -o - %s | FileCheck %s -check-prefix=IBM -// RUN: %clang -target powerpc64le-unknown-linux-gnu -S -emit-llvm \ -// RUN: -fopenmp-targets=ppc64le -mfloat128 -mabi=ieeelongdouble -mcpu=pwr9 \ -// RUN: -Xopenmp-target=ppc64le -mcpu=pwr9 -Xopenmp-target=ppc64le \ -// RUN: -mfloat128 -fopenmp=libomp -o - %s | FileCheck %s -check-prefix=OMP - #include void foo_ld(long double); void foo_fq(__float128); -// Verify cases when OpenMP target's and host's long-double semantics diff er. - -// OMP-LABEL: define internal void @.omp_outlined. -// OMP: %[[CUR:[0-9a-zA-Z_.]+]] = load i8*, i8** -// OMP: %[[V2:[0-9a-zA-Z_.]+]] = bitcast i8* %[[CUR]] to ppc_fp128* -// OMP: %[[V3:[0-9a-zA-Z_.]+]] = load ppc_fp128, ppc_fp128* %[[V2]], align 8 -// OMP: call void @foo_ld(ppc_fp128 %[[V3]]) - -// OMP-LABEL: define dso_local void @omp -// OMP: %[[AP1:[0-9a-zA-Z_.]+]] = bitcast i8** %[[AP:[0-9a-zA-Z_.]+]] to i8* -// OMP: call void @llvm.va_start(i8* %[[AP1]]) -// OMP: %[[CUR:[0-9a-zA-Z_.]+]] = load i8*, i8** %[[AP]], align 8 -// OMP: %[[V0:[0-9a-zA-Z_.]+]] = ptrtoint i8* %[[CUR]] to i64 -// OMP: %[[V1:[0-9a-zA-Z_.]+]] = add i64 %[[V0]], 15 -// OMP: %[[V2:[0-9a-zA-Z_.]+]] = and i64 %[[V1]], -16 -// OMP: %[[ALIGN:[0-9a-zA-Z_.]+]] = inttoptr i64 %[[V2]] to i8* -// OMP: %[[V3:[0-9a-zA-Z_.]+]] = bitcast i8* %[[ALIGN]] to fp128* -// OMP: %[[V4:[0-9a-zA-Z_.]+]] = load fp128, fp128* %[[V3]], align 16 -// OMP: call void @foo_ld(fp128 %[[V4]]) -void omp(int n, ...) { - va_list ap; - va_start(ap, n); - foo_ld(va_arg(ap, long double)); - #pragma omp target parallel - for (int i = 1; i < n; ++i) { -foo_ld(va_arg(ap, long double)); - } - va_end(ap); -} - // IEEE-LABEL: define void @f128 // IEEE: %[[AP1:[0-9a-zA-Z_.]+]] = bitcast i8** %[[AP:[0-9a-zA-Z_.]+]] to i8* // IEEE: call void @llvm.va_start(i8* %[[AP1]]) diff --git a/clang/test/Driver/ppc-openmp-f128.c b/clang/test/Driver/ppc-openmp-f128.c new file mode 100644 index ..bff6fe35e526 --- /dev/null +++ b/clang/test/Driver/ppc-openmp-f128.c @@ -0,0 +1,39 @@ +// RUN: %clang -target powerpc64le-unknown-linux-gnu -S -emit-llvm \ +// RUN: -fopenmp-targets=ppc64le -mfloat128 -mabi=ieeelongdouble -mcpu=pwr9 \ +// RUN: -Xopenmp-target=ppc64le -mcpu=pwr9 -Xopenmp-target=ppc64le \ +// RUN: -mfloat128 -fopenmp=libomp -o - %s | FileCheck %s -check-prefix=OMP + +#include + +void foo_ld(long double); +void foo_fq(__float128); + +// Verify cases when OpenMP target's and host's long-double semantics diff er. + +// OMP-LABEL: define internal void @.omp_outlined. +// OMP: %[[CUR:[0-9a-zA-Z_.]+]] = load i8*, i8** +// OMP: %[[V2:[0-9a-zA-Z_.]+]] = bitcast i8* %[[CUR]] to ppc_fp128* +// OMP: %[[V3:[0-9a-zA-Z_.]+]] = load ppc_fp128, ppc_fp128* %[[V2]], align 8 +// OMP: call void @foo_ld(ppc_fp128 %[[V3]]) + +// OMP-LABEL: define dso_local void @omp +// OMP: %[[AP1:[0-9a-zA-Z_.]+]] = bitcast i8** %[[AP:[0-9a-zA-Z_.]+]] to i8* +// OMP: call void @llvm.va_start(i8* %[[AP1]]) +// OMP: %[[CUR:[0-9a-zA-Z_.]+]] = load i8*, i8** %[[AP]], align 8 +// OMP: %[[V0:[0-9a-zA-Z_.]+]] = ptrtoint i8* %[[CUR]] to i64 +// OMP: %[[V1:[0-9a-zA-Z_.]+]] = add i64 %[[V0]], 15 +// OMP: %[[V2:[0-9a-zA-Z_.]+]] = and i64 %[[V1]], -16 +// OMP: %[[ALIGN:[0-9a-zA-Z_.]+]] = inttoptr i64 %[[V2]] to i8* +// OMP: %[[V3:[0-9a-zA-Z_.]+]] = bitcast i8* %[[ALIGN]] to fp128* +// OMP: %[[V4:[0-9a-zA-Z_.]+]] = load fp128, fp128* %[[V3]], align 16 +// OMP: call void @foo_ld(fp128 %[[V4]]) +void omp(int n, ...) { + va_list ap; + va_start(ap, n); + foo_ld(va_arg(ap, long double)); + #pragma omp target parallel + for (int i = 1; i < n; ++i) { +foo_ld(va_arg(ap, long double)); + } + va_end(ap); +} ___ llvm-branch-commits mailing list llvm-branch-commits@lists.llvm.org https://lists.llvm.org/cgi-bin/mailman/listinfo/llvm-branch-commits
[llvm-branch-commits] [llvm] ffa2dce - [PowerPC] Fix FLT_ROUNDS_ on little endian
Author: Qiu Chaofan Date: 2020-12-02T17:16:32+08:00 New Revision: ffa2dce59070636e0d83d2797fb80c4ca2d7ea2d URL: https://github.com/llvm/llvm-project/commit/ffa2dce59070636e0d83d2797fb80c4ca2d7ea2d DIFF: https://github.com/llvm/llvm-project/commit/ffa2dce59070636e0d83d2797fb80c4ca2d7ea2d.diff LOG: [PowerPC] Fix FLT_ROUNDS_ on little endian In lowering of FLT_ROUNDS_, FPSCR content will be moved into FP register and then GPR, and then truncated into word. For subtargets without direct move support, it will store and then load. The load address needs adjustment (+4) only on big-endian targets. This patch fixes it on using generic opcodes on little-endian and subtargets with direct-move. Reviewed By: steven.zhang Differential Revision: https://reviews.llvm.org/D91845 Added: Modified: llvm/lib/Target/PowerPC/PPCISelLowering.cpp llvm/test/CodeGen/PowerPC/frounds.ll Removed: diff --git a/llvm/lib/Target/PowerPC/PPCISelLowering.cpp b/llvm/lib/Target/PowerPC/PPCISelLowering.cpp index 1864dc7f3113..f9f84aa668bc 100644 --- a/llvm/lib/Target/PowerPC/PPCISelLowering.cpp +++ b/llvm/lib/Target/PowerPC/PPCISelLowering.cpp @@ -8975,16 +8975,24 @@ SDValue PPCTargetLowering::LowerFLT_ROUNDS_(SDValue Op, SDValue MFFS = DAG.getNode(PPCISD::MFFS, dl, {MVT::f64, MVT::Other}, Chain); Chain = MFFS.getValue(1); - // Save FP register to stack slot - int SSFI = MF.getFrameInfo().CreateStackObject(8, Align(8), false); - SDValue StackSlot = DAG.getFrameIndex(SSFI, PtrVT); - Chain = DAG.getStore(Chain, dl, MFFS, StackSlot, MachinePointerInfo()); - - // Load FP Control Word from low 32 bits of stack slot. - SDValue Four = DAG.getConstant(4, dl, PtrVT); - SDValue Addr = DAG.getNode(ISD::ADD, dl, PtrVT, StackSlot, Four); - SDValue CWD = DAG.getLoad(MVT::i32, dl, Chain, Addr, MachinePointerInfo()); - Chain = CWD.getValue(1); + SDValue CWD; + if (isTypeLegal(MVT::i64)) { +CWD = DAG.getNode(ISD::TRUNCATE, dl, MVT::i32, + DAG.getNode(ISD::BITCAST, dl, MVT::i64, MFFS)); + } else { +// Save FP register to stack slot +int SSFI = MF.getFrameInfo().CreateStackObject(8, Align(8), false); +SDValue StackSlot = DAG.getFrameIndex(SSFI, PtrVT); +Chain = DAG.getStore(Chain, dl, MFFS, StackSlot, MachinePointerInfo()); + +// Load FP Control Word from low 32 bits of stack slot. +assert(hasBigEndianPartOrdering(MVT::i64, MF.getDataLayout()) && + "Stack slot adjustment is valid only on big endian subtargets!"); +SDValue Four = DAG.getConstant(4, dl, PtrVT); +SDValue Addr = DAG.getNode(ISD::ADD, dl, PtrVT, StackSlot, Four); +CWD = DAG.getLoad(MVT::i32, dl, Chain, Addr, MachinePointerInfo()); +Chain = CWD.getValue(1); + } // Transform as necessary SDValue CWD1 = diff --git a/llvm/test/CodeGen/PowerPC/frounds.ll b/llvm/test/CodeGen/PowerPC/frounds.ll index df339ceb3a09..277423f6e2b5 100644 --- a/llvm/test/CodeGen/PowerPC/frounds.ll +++ b/llvm/test/CodeGen/PowerPC/frounds.ll @@ -42,7 +42,7 @@ define i32 @foo() { ; PPC64LE: # %bb.0: # %entry ; PPC64LE-NEXT:mffs 0 ; PPC64LE-NEXT:stfd 0, -16(1) -; PPC64LE-NEXT:lwz 3, -12(1) +; PPC64LE-NEXT:lwz 3, -16(1) ; PPC64LE-NEXT:not 4, 3 ; PPC64LE-NEXT:clrlwi 3, 3, 30 ; PPC64LE-NEXT:rlwinm 4, 4, 31, 31, 31 @@ -54,8 +54,7 @@ define i32 @foo() { ; DM-LABEL: foo: ; DM: # %bb.0: # %entry ; DM-NEXT:mffs 0 -; DM-NEXT:stfd 0, -16(1) -; DM-NEXT:lwz 3, -12(1) +; DM-NEXT:mffprd 3, 0 ; DM-NEXT:not 4, 3 ; DM-NEXT:clrlwi 3, 3, 30 ; DM-NEXT:rlwinm 4, 4, 31, 31, 31 ___ llvm-branch-commits mailing list llvm-branch-commits@lists.llvm.org https://lists.llvm.org/cgi-bin/mailman/listinfo/llvm-branch-commits
[llvm-branch-commits] [clang] 3fca6a7 - [Clang] Don't adjust align for IBM extended double
Author: Qiu Chaofan Date: 2020-12-02T17:02:26+08:00 New Revision: 3fca6a7844b515496446667a18a9703c29cf6e88 URL: https://github.com/llvm/llvm-project/commit/3fca6a7844b515496446667a18a9703c29cf6e88 DIFF: https://github.com/llvm/llvm-project/commit/3fca6a7844b515496446667a18a9703c29cf6e88.diff LOG: [Clang] Don't adjust align for IBM extended double Commit 6b1341eb fixed alignment for 128-bit FP types on PowerPC. However, the quadword alignment adjustment shouldn't be applied to IBM extended double (ppc_fp128 in IR) values. Reviewed By: jsji Differential Revision: https://reviews.llvm.org/D92278 Added: Modified: clang/lib/CodeGen/TargetInfo.cpp clang/test/CodeGen/ppc64le-varargs-f128.c Removed: diff --git a/clang/lib/CodeGen/TargetInfo.cpp b/clang/lib/CodeGen/TargetInfo.cpp index 3469bc6bf081..4815266371bc 100644 --- a/clang/lib/CodeGen/TargetInfo.cpp +++ b/clang/lib/CodeGen/TargetInfo.cpp @@ -5052,10 +5052,12 @@ CharUnits PPC64_SVR4_ABIInfo::getParamTypeAlignment(QualType Ty) const { return CharUnits::fromQuantity(16); } else if (Ty->isVectorType()) { return CharUnits::fromQuantity(getContext().getTypeSize(Ty) == 128 ? 16 : 8); - } else if (Ty->isRealFloatingType() && getContext().getTypeSize(Ty) == 128) { -// IEEE 128-bit floating numbers are also stored in vector registers. -// And both IEEE quad-precision and IBM extended double (ppc_fp128) should -// be quad-word aligned. + } else if (Ty->isRealFloatingType() && + ().getFloatTypeSemantics(Ty) == + ::APFloat::IEEEquad()) { +// According to ABI document section 'Optional Save Areas': If extended +// precision floating-point values in IEEE BINARY 128 QUADRUPLE PRECISION +// format are supported, map them to a single quadword, quadword aligned. return CharUnits::fromQuantity(16); } diff --git a/clang/test/CodeGen/ppc64le-varargs-f128.c b/clang/test/CodeGen/ppc64le-varargs-f128.c index 6562fe6f8fe4..5e9930ec716f 100644 --- a/clang/test/CodeGen/ppc64le-varargs-f128.c +++ b/clang/test/CodeGen/ppc64le-varargs-f128.c @@ -5,45 +5,92 @@ // RUN: -target-cpu pwr9 -target-feature +float128 \ // RUN: -o - %s | FileCheck %s -check-prefix=IBM +// RUN: %clang -target powerpc64le-unknown-linux-gnu -S -emit-llvm \ +// RUN: -fopenmp-targets=ppc64le -mfloat128 -mabi=ieeelongdouble -mcpu=pwr9 \ +// RUN: -Xopenmp-target=ppc64le -mcpu=pwr9 -Xopenmp-target=ppc64le \ +// RUN: -mfloat128 -fopenmp=libomp -o - %s | FileCheck %s -check-prefix=OMP + #include -// IEEE-LABEL: define fp128 @f128(i32 signext %n, ...) -// IEEE: call void @llvm.va_start(i8* %{{[0-9a-zA-Z_.]+}}) -// IEEE: %[[P1:[0-9a-zA-Z_.]+]] = add i64 %{{[0-9a-zA-Z_.]+}}, 15 -// IEEE: %[[P2:[0-9a-zA-Z_.]+]] = and i64 %[[P1]], -16 -// IEEE: %[[P3:[0-9a-zA-Z_.]+]] = inttoptr i64 %[[P2]] to i8* -// IEEE: %[[P4:[0-9a-zA-Z_.]+]] = bitcast i8* %[[P3]] to fp128* -// IEEE: %{{[0-9a-zA-Z_.]+}} = load fp128, fp128* %[[P4]], align 16 -// IEEE: call void @llvm.va_end(i8* %{{[0-9a-zA-Z_.]+}}) -__float128 f128(int n, ...) { +void foo_ld(long double); +void foo_fq(__float128); + +// Verify cases when OpenMP target's and host's long-double semantics diff er. + +// OMP-LABEL: define internal void @.omp_outlined. +// OMP: %[[CUR:[0-9a-zA-Z_.]+]] = load i8*, i8** +// OMP: %[[V2:[0-9a-zA-Z_.]+]] = bitcast i8* %[[CUR]] to ppc_fp128* +// OMP: %[[V3:[0-9a-zA-Z_.]+]] = load ppc_fp128, ppc_fp128* %[[V2]], align 8 +// OMP: call void @foo_ld(ppc_fp128 %[[V3]]) + +// OMP-LABEL: define dso_local void @omp +// OMP: %[[AP1:[0-9a-zA-Z_.]+]] = bitcast i8** %[[AP:[0-9a-zA-Z_.]+]] to i8* +// OMP: call void @llvm.va_start(i8* %[[AP1]]) +// OMP: %[[CUR:[0-9a-zA-Z_.]+]] = load i8*, i8** %[[AP]], align 8 +// OMP: %[[V0:[0-9a-zA-Z_.]+]] = ptrtoint i8* %[[CUR]] to i64 +// OMP: %[[V1:[0-9a-zA-Z_.]+]] = add i64 %[[V0]], 15 +// OMP: %[[V2:[0-9a-zA-Z_.]+]] = and i64 %[[V1]], -16 +// OMP: %[[ALIGN:[0-9a-zA-Z_.]+]] = inttoptr i64 %[[V2]] to i8* +// OMP: %[[V3:[0-9a-zA-Z_.]+]] = bitcast i8* %[[ALIGN]] to fp128* +// OMP: %[[V4:[0-9a-zA-Z_.]+]] = load fp128, fp128* %[[V3]], align 16 +// OMP: call void @foo_ld(fp128 %[[V4]]) +void omp(int n, ...) { + va_list ap; + va_start(ap, n); + foo_ld(va_arg(ap, long double)); + #pragma omp target parallel + for (int i = 1; i < n; ++i) { +foo_ld(va_arg(ap, long double)); + } + va_end(ap); +} + +// IEEE-LABEL: define void @f128 +// IEEE: %[[AP1:[0-9a-zA-Z_.]+]] = bitcast i8** %[[AP:[0-9a-zA-Z_.]+]] to i8* +// IEEE: call void @llvm.va_start(i8* %[[AP1]]) +// IEEE: %[[CUR:[0-9a-zA-Z_.]+]] = load i8*, i8** %[[AP]] +// IEEE: %[[V0:[0-9a-zA-Z_.]+]] = ptrtoint i8* %[[CUR]] to i64 +// IEEE: %[[V1:[0-9a-zA-Z_.]+]] = add i64 %[[V0]], 15 +// IEEE: %[[V2:[0-9a-zA-Z_.]+]] = and i64 %[[V1]], -16 +// IEEE: %[[ALIGN:[0-9a-zA-Z_.]+]] = inttoptr i64 %[[V2]] to i8* +// IEEE: %[[V3:[0-9a-zA-Z_.]+]] = bitcast i8* %[[ALIGN]] to