llvmorg-github-actions[bot] wrote:
<!--LLVM PR SUMMARY COMMENT--> @llvm/pr-subscribers-backend-powerpc Author: Tony Varghese (tonykuttai) <details> <summary>Changes</summary> This patch introduces experimental half-precision floating-point (_Float16) support for PowerPC Power8 and later architectures, enabled via the new -mfloat16 compiler flag. Key changes: - Add -mfloat16 flag to enable half-precision floating-point support - Register f16 as a legal type using VHFRCRegClass (VSX half-precision registers) - Implement f16 operations with promotion to f32 for arithmetic operations - Add hardware conversion support for Power9+ (xscvhpdp/xscvdphp instructions) - Use libcall conversions for Power8 (__extendhfsf2, __truncsfhf2, etc.) - Support direct load/store operations for f16 values - Handle f16 in calling conventions with proper register allocation - Add comprehensive test coverage for AIX and Linux platforms The implementation addresses correctness issues from GitHub issues #<!-- -->97975 (intermediate precision) and #<!-- -->97981 (NaN payload corruption) by treating f16 as a first-class type rather than using lossy conversion functions. Power9+ uses native hardware instructions for conversions, while Power8 falls back to compiler-rt library functions for type conversions. --- Patch is 172.85 KiB, truncated to 20.00 KiB below, full version: https://github.com/llvm/llvm-project/pull/196559.diff 15 Files Affected: - (modified) clang/include/clang/Options/Options.td (+2) - (modified) clang/lib/Basic/Targets/PPC.cpp (+11) - (modified) clang/lib/Basic/Targets/PPC.h (+5) - (modified) clang/lib/Driver/ToolChains/Arch/PPC.cpp (+5) - (added) clang/test/CodeGen/PowerPC/half-float16-ppc.c (+154) - (modified) llvm/lib/Target/PowerPC/GISel/PPCRegisterBankInfo.cpp (+2-1) - (modified) llvm/lib/Target/PowerPC/PPC.td (+5) - (modified) llvm/lib/Target/PowerPC/PPCCallingConv.td (+10-3) - (modified) llvm/lib/Target/PowerPC/PPCISelLowering.cpp (+90-11) - (modified) llvm/lib/Target/PowerPC/PPCISelLowering.h (+5) - (modified) llvm/lib/Target/PowerPC/PPCInstrVSX.td (+82) - (modified) llvm/lib/Target/PowerPC/PPCRegisterInfo.td (+7) - (added) llvm/test/CodeGen/PowerPC/f16-aix-psa.ll (+96) - (added) llvm/test/CodeGen/PowerPC/half-float16-ppc.ll (+4198) - (modified) llvm/test/CodeGen/PowerPC/shrink-wrap.mir (+1-1) ``````````diff diff --git a/clang/include/clang/Options/Options.td b/clang/include/clang/Options/Options.td index 5eeabf4c33b76..3e4f189637e0a 100644 --- a/clang/include/clang/Options/Options.td +++ b/clang/include/clang/Options/Options.td @@ -5919,6 +5919,8 @@ def mcrbits : Flag<["-"], "mcrbits">, Group<m_ppc_Features_Group>, "(the enablement of CR-bit tracking support) is the default for " "POWER8 and above, as well as for all other CPUs when " "optimization is applied (-O2 and above).">; +def mfloat16 : Flag<["-"], "mfloat16">, Group<m_ppc_Features_Group>, + HelpText<"Enable half-precision floating point (experimental).">; def mno_crbits : Flag<["-"], "mno-crbits">, Group<m_ppc_Features_Group>; def minvariant_function_descriptors : Flag<["-"], "minvariant-function-descriptors">, Group<m_ppc_Features_Group>; diff --git a/clang/lib/Basic/Targets/PPC.cpp b/clang/lib/Basic/Targets/PPC.cpp index c9a41df806aff..f999372f31a94 100644 --- a/clang/lib/Basic/Targets/PPC.cpp +++ b/clang/lib/Basic/Targets/PPC.cpp @@ -82,6 +82,8 @@ bool PPCTargetInfo::handleTargetFeatures(std::vector<std::string> &Features, HasQuadwordAtomics = true; } else if (Feature == "+longcall") { UseLongCalls = true; + } else if (Feature == "+float16") { + HasFloat16 = true; } // TODO: Finish this list and add an assert that we've handled them // all. @@ -504,6 +506,7 @@ static bool ppcUserFeaturesCheck(DiagnosticsEngine &Diags, Found |= FindVSXSubfeature("+crypto", "-mcrypto", "-msoft-float"); Found |= FindVSXSubfeature("+power10-vector", "-mpower10-vector", "-msoft-float"); + Found |= FindVSXSubfeature("+float16", "-mfloat16", "-msoft-float"); } if (Found) return false; @@ -555,6 +558,14 @@ bool PPCTargetInfo::initFeatureMap( return false; } + if (!(ArchDefs & ArchDefinePwr8)) { + if (llvm::is_contained(FeaturesVec, "+float16")) { + // Reject -mfloat16 on pre-Power8 CPUs. + Diags.Report(diag::err_opt_not_valid_with_opt) << "-mfloat16" << CPU; + return false; + } + } + if (!(ArchDefs & ArchDefinePwr10)) { if (llvm::is_contained(FeaturesVec, "+mma")) { // MMA operations are not available pre-Power10. diff --git a/clang/lib/Basic/Targets/PPC.h b/clang/lib/Basic/Targets/PPC.h index a9f49aa3aebe1..df263eac0d6de 100644 --- a/clang/lib/Basic/Targets/PPC.h +++ b/clang/lib/Basic/Targets/PPC.h @@ -73,6 +73,7 @@ class LLVM_LIBRARY_VISIBILITY PPCTargetInfo : public TargetInfo { bool HasPCRelativeMemops = false; bool HasQuadwordAtomics = false; bool UseLongCalls = false; + bool HasFloat16 = false; protected: std::string ABI; @@ -362,6 +363,10 @@ class LLVM_LIBRARY_VISIBILITY PPCTargetInfo : public TargetInfo { bool hasBitIntType() const override { return true; } + // Returns true when _Float16 is available as a native type. Requires the + // explicit opt-in (-mfloat16). + bool hasFloat16Type() const override { return HasFloat16; } + bool isSPRegName(StringRef RegName) const override { return RegName == "r1" || RegName == "x1"; } diff --git a/clang/lib/Driver/ToolChains/Arch/PPC.cpp b/clang/lib/Driver/ToolChains/Arch/PPC.cpp index 17051980f34fb..f06645038cfca 100644 --- a/clang/lib/Driver/ToolChains/Arch/PPC.cpp +++ b/clang/lib/Driver/ToolChains/Arch/PPC.cpp @@ -77,6 +77,11 @@ void ppc::getPPCTargetFeatures(const Driver &D, const llvm::Triple &Triple, D.Diag(diag::err_opt_not_valid_on_target) << "-maix-shared-lib-tls-model-opt"; + // The _Float16 datatype is supported throguh the -mfloat16 flag. + if (Args.hasArg(options::OPT_mfloat16)) { + Features.push_back("+float16"); + } + // The integrated assembler counts as a "modern AIX assembler" for the // purposes of the modern-aix-as. if (Args.hasFlag(options::OPT_fintegrated_as, options::OPT_fno_integrated_as, diff --git a/clang/test/CodeGen/PowerPC/half-float16-ppc.c b/clang/test/CodeGen/PowerPC/half-float16-ppc.c new file mode 100644 index 0000000000000..45b336d03f7db --- /dev/null +++ b/clang/test/CodeGen/PowerPC/half-float16-ppc.c @@ -0,0 +1,154 @@ +// NOTE: Assertions have been autogenerated by utils/update_cc_test_checks.py UTC_ARGS: --version 6 +// RUN: %clang_cc1 -triple powerpc-unknown-unknown -O2 -emit-llvm %s -o - \ +// RUN: -target-cpu pwr8 -target-feature +float16 | FileCheck %s + +typedef __INT8_TYPE__ int8_t; +typedef __INT16_TYPE__ int16_t; +typedef __INT32_TYPE__ int32_t; +typedef __INT64_TYPE__ int64_t; +typedef __UINT8_TYPE__ uint8_t; +typedef __UINT16_TYPE__ uint16_t; +typedef __UINT32_TYPE__ uint32_t; +typedef __UINT64_TYPE__ uint64_t; + +// ============================================ +// Arithmetic Operations +// ============================================ +// CHECK-LABEL: define dso_local noundef half @c_add( +// CHECK-SAME: half noundef [[A:%.*]], half noundef [[B:%.*]]) local_unnamed_addr #[[ATTR0:[0-9]+]] { +// CHECK-NEXT: [[ENTRY:.*:]] +// CHECK-NEXT: [[UNPROMOTION:%.*]] = fadd half [[A]], [[B]] +// CHECK-NEXT: ret half [[UNPROMOTION]] +// +_Float16 c_add(_Float16 a, _Float16 b) { return a + b; } + + +// CHECK-LABEL: define dso_local noundef half @c_sub( +// CHECK-SAME: half noundef [[A:%.*]], half noundef [[B:%.*]]) local_unnamed_addr #[[ATTR0]] { +// CHECK-NEXT: [[ENTRY:.*:]] +// CHECK-NEXT: [[UNPROMOTION:%.*]] = fsub half [[A]], [[B]] +// CHECK-NEXT: ret half [[UNPROMOTION]] +// +_Float16 c_sub(_Float16 a, _Float16 b) { return a - b; } + +// CHECK-LABEL: define dso_local noundef half @c_mul( +// CHECK-SAME: half noundef [[A:%.*]], half noundef [[B:%.*]]) local_unnamed_addr #[[ATTR0]] { +// CHECK-NEXT: [[ENTRY:.*:]] +// CHECK-NEXT: [[UNPROMOTION:%.*]] = fmul half [[A]], [[B]] +// CHECK-NEXT: ret half [[UNPROMOTION]] +// +_Float16 c_mul(_Float16 a, _Float16 b) { return a * b; } + +// CHECK-LABEL: define dso_local noundef half @c_div( +// CHECK-SAME: half noundef [[A:%.*]], half noundef [[B:%.*]]) local_unnamed_addr #[[ATTR0]] { +// CHECK-NEXT: [[ENTRY:.*:]] +// CHECK-NEXT: [[UNPROMOTION:%.*]] = fdiv half [[A]], [[B]] +// CHECK-NEXT: ret half [[UNPROMOTION]] +// +_Float16 c_div(_Float16 a, _Float16 b) { return a / b; } + +// ============================================ +// Unary Operations +// ============================================ + +// CHECK-LABEL: define dso_local noundef half @c_neg( +// CHECK-SAME: half noundef [[A:%.*]]) local_unnamed_addr #[[ATTR0]] { +// CHECK-NEXT: [[ENTRY:.*:]] +// CHECK-NEXT: [[UNPROMOTION:%.*]] = fneg half [[A]] +// CHECK-NEXT: ret half [[UNPROMOTION]] +// +_Float16 c_neg(_Float16 a) { return -a; } + +// ============================================ +// Compare Operations +// ============================================ + +// CHECK-LABEL: define dso_local range(i32 0, 2) i32 @c_eq( +// CHECK-SAME: half noundef [[A:%.*]], half noundef [[B:%.*]]) local_unnamed_addr #[[ATTR0]] { +// CHECK-NEXT: [[ENTRY:.*:]] +// CHECK-NEXT: [[CMP:%.*]] = fcmp oeq half [[A]], [[B]] +// CHECK-NEXT: [[CONV:%.*]] = zext i1 [[CMP]] to i32 +// CHECK-NEXT: ret i32 [[CONV]] +// +int c_eq(_Float16 a, _Float16 b){ return a == b; } + +// CHECK-LABEL: define dso_local range(i32 0, 2) i32 @c_ne( +// CHECK-SAME: half noundef [[A:%.*]], half noundef [[B:%.*]]) local_unnamed_addr #[[ATTR0]] { +// CHECK-NEXT: [[ENTRY:.*:]] +// CHECK-NEXT: [[CMP:%.*]] = fcmp une half [[A]], [[B]] +// CHECK-NEXT: [[CONV:%.*]] = zext i1 [[CMP]] to i32 +// CHECK-NEXT: ret i32 [[CONV]] +// +int c_ne(_Float16 a, _Float16 b){ return a != b; } + +// CHECK-LABEL: define dso_local range(i32 0, 2) i32 @c_lt( +// CHECK-SAME: half noundef [[A:%.*]], half noundef [[B:%.*]]) local_unnamed_addr #[[ATTR0]] { +// CHECK-NEXT: [[ENTRY:.*:]] +// CHECK-NEXT: [[CMP:%.*]] = fcmp olt half [[A]], [[B]] +// CHECK-NEXT: [[CONV:%.*]] = zext i1 [[CMP]] to i32 +// CHECK-NEXT: ret i32 [[CONV]] +// +int c_lt(_Float16 a, _Float16 b){ return a < b; } + +// CHECK-LABEL: define dso_local range(i32 0, 2) i32 @c_le( +// CHECK-SAME: half noundef [[A:%.*]], half noundef [[B:%.*]]) local_unnamed_addr #[[ATTR0]] { +// CHECK-NEXT: [[ENTRY:.*:]] +// CHECK-NEXT: [[CMP:%.*]] = fcmp ole half [[A]], [[B]] +// CHECK-NEXT: [[CONV:%.*]] = zext i1 [[CMP]] to i32 +// CHECK-NEXT: ret i32 [[CONV]] +// +int c_le(_Float16 a, _Float16 b){ return a <= b; } + +// CHECK-LABEL: define dso_local range(i32 0, 2) i32 @c_gt( +// CHECK-SAME: half noundef [[A:%.*]], half noundef [[B:%.*]]) local_unnamed_addr #[[ATTR0]] { +// CHECK-NEXT: [[ENTRY:.*:]] +// CHECK-NEXT: [[CMP:%.*]] = fcmp ogt half [[A]], [[B]] +// CHECK-NEXT: [[CONV:%.*]] = zext i1 [[CMP]] to i32 +// CHECK-NEXT: ret i32 [[CONV]] +// +int c_gt(_Float16 a, _Float16 b){ return a > b; } + +// CHECK-LABEL: define dso_local range(i32 0, 2) i32 @c_ge( +// CHECK-SAME: half noundef [[A:%.*]], half noundef [[B:%.*]]) local_unnamed_addr #[[ATTR0]] { +// CHECK-NEXT: [[ENTRY:.*:]] +// CHECK-NEXT: [[CMP:%.*]] = fcmp oge half [[A]], [[B]] +// CHECK-NEXT: [[CONV:%.*]] = zext i1 [[CMP]] to i32 +// CHECK-NEXT: ret i32 [[CONV]] +// +int c_ge(_Float16 a, _Float16 b){ return a >= b; } + +// ============================================ +// Conversion Operations: half <-> float/double +// ============================================ + +// CHECK-LABEL: define dso_local noundef float @to_f32( +// CHECK-SAME: half noundef [[X:%.*]]) local_unnamed_addr #[[ATTR0]] { +// CHECK-NEXT: [[ENTRY:.*:]] +// CHECK-NEXT: [[CONV:%.*]] = fpext half [[X]] to float +// CHECK-NEXT: ret float [[CONV]] +// +float to_f32(_Float16 x){ return (float)x; } + +// CHECK-LABEL: define dso_local noundef double @to_f64( +// CHECK-SAME: half noundef [[X:%.*]]) local_unnamed_addr #[[ATTR0]] { +// CHECK-NEXT: [[ENTRY:.*:]] +// CHECK-NEXT: [[CONV:%.*]] = fpext half [[X]] to double +// CHECK-NEXT: ret double [[CONV]] +// +double to_f64(_Float16 x){ return (double)x; } + +// CHECK-LABEL: define dso_local noundef half @from_f32( +// CHECK-SAME: float noundef [[X:%.*]]) local_unnamed_addr #[[ATTR0]] { +// CHECK-NEXT: [[ENTRY:.*:]] +// CHECK-NEXT: [[CONV:%.*]] = fptrunc float [[X]] to half +// CHECK-NEXT: ret half [[CONV]] +// +_Float16 from_f32(float x){ return (_Float16)x; } + +// CHECK-LABEL: define dso_local noundef half @from_f64( +// CHECK-SAME: double noundef [[X:%.*]]) local_unnamed_addr #[[ATTR0]] { +// CHECK-NEXT: [[ENTRY:.*:]] +// CHECK-NEXT: [[CONV:%.*]] = fptrunc double [[X]] to half +// CHECK-NEXT: ret half [[CONV]] +// +_Float16 from_f64(double x){ return (_Float16)x; } diff --git a/llvm/lib/Target/PowerPC/GISel/PPCRegisterBankInfo.cpp b/llvm/lib/Target/PowerPC/GISel/PPCRegisterBankInfo.cpp index 3370dedbecb16..fb6eccfe043b5 100644 --- a/llvm/lib/Target/PowerPC/GISel/PPCRegisterBankInfo.cpp +++ b/llvm/lib/Target/PowerPC/GISel/PPCRegisterBankInfo.cpp @@ -34,7 +34,8 @@ PPCRegisterBankInfo::getRegBankFromRegClass(const TargetRegisterClass &RC, LLT Ty) const { switch (RC.getID()) { case PPC::VSFRCRegClassID: - case PPC::SPILLTOVSRRC_and_VSFRCRegClassID: + case PPC::VHFRCRegClassID: + case PPC::SPILLTOVSRRC_and_VHFRCRegClassID: case PPC::SPILLTOVSRRC_and_VFRCRegClassID: case PPC::SPILLTOVSRRC_and_F4RCRegClassID: case PPC::F8RCRegClassID: diff --git a/llvm/lib/Target/PowerPC/PPC.td b/llvm/lib/Target/PowerPC/PPC.td index 7b4bae60f7e74..43982c4c5c5fb 100644 --- a/llvm/lib/Target/PowerPC/PPC.td +++ b/llvm/lib/Target/PowerPC/PPC.td @@ -71,6 +71,10 @@ def FeatureModernAIXAs "AIX system assembler is modern enough to support new mnes">; def FeatureHardFloat : SubtargetFeature<"hard-float", "HasHardFloat", "true", "Enable floating-point instructions">; +def FeatureFloat16 : + SubtargetFeature<"float16", + "HasFloat16", "true", + "Enable half-precision floating point support (experimental)">; // Specifies that we are in 64-bit mode or that we should use 64-bit registers // in 32-bit mode when possible. Requires Feature64Bit to be enabled. @@ -405,6 +409,7 @@ def HasOnlySwappingMemOps : Predicate<"!Subtarget->hasP9Vector()">; def NoP10Vector : Predicate<"!Subtarget->hasP10Vector()">; def HasP10Vector : Predicate<"Subtarget->hasP10Vector()">; def HasFutureVector : Predicate<"Subtarget->hasFutureVector()">; +def HasFloat16 : Predicate<"Subtarget->hasFloat16()">; // Predicates used to differenciate between different ISAs. def IsISA2_06 : Predicate<"Subtarget->isISA2_06()">; diff --git a/llvm/lib/Target/PowerPC/PPCCallingConv.td b/llvm/lib/Target/PowerPC/PPCCallingConv.td index 5d4fe06ebdddd..961da883748dc 100644 --- a/llvm/lib/Target/PowerPC/PPCCallingConv.td +++ b/llvm/lib/Target/PowerPC/PPCCallingConv.td @@ -75,6 +75,9 @@ def RetCC_PPC : CallingConv<[ CCIfType<[i64], CCAssignToReg<[X3, X4, X5, X6]>>, CCIfType<[i128], CCAssignToReg<[X3, X4, X5, X6]>>, + // Return f16 in FPR as 16 bit value. + CCIfType<[f16], CCAssignToReg<[F1, F2, F3, F4, F5, F6, F7, F8]>>, + // Floating point types returned as "direct" go into F1 .. F8; note that // only the ELFv2 ABI fully utilizes all these registers. CCIfNotSubtarget<"hasSPE()", @@ -125,8 +128,8 @@ def CC_PPC64_ELF : CallingConv<[ CCIfType<[i64], CCAssignToReg<[X3, X4, X5, X6, X7, X8, X9, X10]>>, // Handle fp types and shadow the corresponding registers as necessary. - CCIfType<[f32, f64], CCIfNotVarArg<CCCustom<"CC_PPC64_ELF_Shadow_GPR_Regs">>>, - CCIfType<[f32, f64], + CCIfType<[f16, f32, f64], CCIfNotVarArg<CCCustom<"CC_PPC64_ELF_Shadow_GPR_Regs">>>, + CCIfType<[f16, f32, f64], CCIfNotVarArg<CCAssignToReg<[F1, F2, F3, F4, F5, F6, F7, F8, F9, F10, F11, F12, F13]>>>, @@ -161,7 +164,7 @@ def CC_PPC64_ELF_FIS : CallingConv<[ CCIfType<[i16], CCPromoteToType<i64>>, CCIfType<[i32], CCPromoteToType<i64>>, CCIfType<[i64], CCAssignToReg<[X3, X4, X5, X6, X7, X8, X9, X10]>>, - CCIfType<[f32, f64], CCAssignToReg<[F1, F2, F3, F4, F5, F6, F7, F8]>> + CCIfType<[f16, f32, f64], CCAssignToReg<[F1, F2, F3, F4, F5, F6, F7, F8]>> ]>; // Simple return-value convention for 64-bit ELF PowerPC fast isel. @@ -178,6 +181,7 @@ def RetCC_PPC64_ELF_FIS : CallingConv<[ CCIfType<[i32], CCPromoteToType<i64>>, CCIfType<[i64], CCAssignToReg<[X3, X4, X5, X6]>>, CCIfType<[i128], CCAssignToReg<[X3, X4, X5, X6]>>, + CCIfType<[f16], CCAssignToReg<[F1, F2, F3, F4, F5, F6, F7, F8]>>, CCIfType<[f32], CCAssignToReg<[F1, F2, F3, F4, F5, F6, F7, F8]>>, CCIfType<[f64], CCAssignToReg<[F1, F2, F3, F4, F5, F6, F7, F8]>>, CCIfType<[f128], @@ -218,6 +222,9 @@ def CC_PPC32_SVR4_Common : CallingConv<[ // The first 8 integer arguments are passed in integer registers. CCIfType<[i32], CCAssignToReg<[R3, R4, R5, R6, R7, R8, R9, R10]>>, + // Pass f16 in FPRs. + CCIfType<[f16], CCAssignToReg<[F1, F2, F3, F4, F5, F6, F7, F8]>>, + // Make sure the i64 words from a long double are either both passed in // registers or both passed on the stack. CCIfType<[f64], CCIfSplit<CCCustom<"CC_PPC32_SVR4_Custom_AlignFPArgRegs">>>, diff --git a/llvm/lib/Target/PowerPC/PPCISelLowering.cpp b/llvm/lib/Target/PowerPC/PPCISelLowering.cpp index 407093fd2b849..3a79e46e51bed 100644 --- a/llvm/lib/Target/PowerPC/PPCISelLowering.cpp +++ b/llvm/lib/Target/PowerPC/PPCISelLowering.cpp @@ -261,6 +261,68 @@ PPCTargetLowering::PPCTargetLowering(const PPCTargetMachine &TM, setTruncStoreAction(MVT::f32, MVT::f16, Expand); } + // Only active when -mfloat16 is passed and hard float is enabled. + // This block intentionally overrides some actions set above + // because when f16 is a first-class type we handle load/store + // directly rather than through extending loads. + if (Subtarget.hasFloat16() && Subtarget.hasHardFloat()) { + // Make f16 a legal type. + addRegisterClass(MVT::f16, &PPC::VHFRCRegClass); + + // PowerPC has no native f16 arithmetic instructions. All arithmetic, + // comparisons, rounding, transcendentals, and min/max must be promoted + // to f32 for computation. On P8 this means libcalls (__extendhfsf2 / + // __truncsfhf2); on P9 this means xscvhpdp/xsaddsp.../xscvdphp sequences. + static const unsigned F16PromoteOps[] = { + ISD::FMINNUM, ISD::FMAXNUM, ISD::FMAXIMUMNUM, ISD::FMINIMUMNUM, + ISD::FMAXIMUM, ISD::FMINIMUM, ISD::FADD, ISD::FSUB, + ISD::FMUL, ISD::FMA, ISD::FDIV, ISD::FSQRT, + ISD::FREM, ISD::FPOW, ISD::FLOG, ISD::FLOG2, + ISD::FLOG10, ISD::FEXP, ISD::FEXP2, ISD::FEXP10, + ISD::FCEIL, ISD::FFLOOR, ISD::FTRUNC, ISD::FRINT, + ISD::FNEARBYINT, ISD::FROUND, ISD::FROUNDEVEN, ISD::FCANONICALIZE, + ISD::FSIN, ISD::FCOS, ISD::SETCC, ISD::SELECT_CC, + ISD::SELECT}; + + // Promote all the arithmetic operations defined above to f32. + setOperationAction(F16PromoteOps, MVT::f16, Promote); + + setOperationAction(ISD::LOAD, MVT::f16, Legal); + setOperationAction(ISD::STORE, MVT::f16, Legal); + + // Legal handling for bit manipulation. + setOperationAction(ISD::FABS, MVT::f16, Legal); + setOperationAction(ISD::FNEG, MVT::f16, Legal); + setOperationAction(ISD::FCOPYSIGN, MVT::f16, Legal); + + // Expand constant FP. + setOperationAction(ISD::ConstantFP, MVT::f16, Expand); + + // Expand extending loads and truncating stores. + for (MVT VT : {MVT::f32, MVT::f64}) { + setLoadExtAction(ISD::EXTLOAD, VT, MVT::f16, Expand); + setTruncStoreAction(VT, MVT::f16, Expand); + } + + if (Subtarget.hasP9Vector()) { + // P9+: Hardware support for conversions. + setOperationAction(ISD::FP_EXTEND, MVT::f32, Legal); + setOperationAction(ISD::FP_EXTEND, MVT::f64, Legal); + setOperationAction(ISD::STRICT_FP_EXTEND, MVT::f32, Legal); + setOperationAction(ISD::STRICT_FP_EXTEND, MVT::f64, Legal); + setOperationAction(ISD::FP_ROUND, MVT::f16, Legal); + setOperationAction(ISD::STRICT_FP_ROUND, MVT::f16, Legal); + } else { + // P8: Conversions via libcalls + setOperationAction(ISD::FP_EXTEND, MVT::f32, Expand); + setOperationAction(ISD::FP_EXTEND, MVT::f64, Expand); + setOperationAction(ISD::STRICT_FP_EXTEND, MVT::f32, Expand); + setOperationAction(ISD::STRICT_FP_EXTEND, MVT::f64, Expand); + setOperationAction(ISD::FP_ROUND, MVT::f16, Expand); + setOperationAction(ISD::STRICT_FP_ROUND, MVT::f16, Expand); + } + } + setTruncStoreAction(MVT::f64, MVT::f32, Expand); // PowerPC has pre-inc load and store's. @@ -1675,6 +1737,12 @@ bool PPCTargetLowering::hasSPE() const { return Subtarget.hasSPE(); } +/// Tell the ABI lowering infrastructure to use FPRs for _Float16 parameters +/// and return values rather than GPRs. Active only when -mfloat16 is enabled. +bool PPCTargetLowering::useFPRegsForHalfType() const { + return Subtarget.hasFloat16() && Subtarget.hasHardFloat(); +} + bool PPCTargetLowering::preferIncOfAddToSubOfNot(EVT VT) const { return VT.isScalarInteger(); } @@ -4630,6 +4698,7 @@ SDValue PPCTargetLowering::LowerFormalArguments_64SVR4( ArgOffset += 8; break; + case MVT::f16: case MVT::f32: case MVT::f64: // These can be scalar arguments or elements of a float array type @@ -6497,6 +6566,7 @@ SDValue PPCTargetLowering::LowerCall_64SVR4( if (!IsFastCall) ArgOffset += PtrByteSize; break; + case MVT::f16: case MVT::f32: case MVT::f64: { // These can be scalar arguments or elements of a float array type @@ -6843,6 +6913,7 @@ static bool CC_AIX(unsigned ValNo, MVT ValVT, MVT LocVT, return false; } + case MVT::f16: case MVT::f32: case MVT::f64: { // Parameter save area (PSA) is reserved even if the float passes in fpr. @@ -6986,10 +7057,9 @@ static bool CC_AIX(unsigned ValNo, MVT ValVT, MVT LocVT, } // So far, this function is only used by LowerFormalArguments_AIX() -static const TargetRegisterClass *getRegClassForSVT(MVT::SimpleValueType SVT, - bool IsPPC64, - bool HasP8Vector, - bool HasVSX) { +static const TargetRegisterClass * +getRegClassFo... [truncated] `````````` </details> https://github.com/llvm/llvm-project/pull/196559 _______________________________________________ cfe-commits mailing list [email protected] https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits
