ACLE 2.0 [1] allows __fp16 to be used as a function argument or return type.
This enables this for AArch64.
I have not enabled this for 32-bit ARM targets yet, as we are expecting to
release an updated version of the 32-bit AAPCS soon, which changes the handling
of __fp16 in a non backwards-compatible way.
This also fixes an existing bug that causes clang to not allow homogeneous
floating-point aggregates with a base type of __fp16. This is valid for
AAPCS64, but not for AAPCS-VFP.
[1]
http://infocenter.arm.com/help/topic/com.arm.doc.ihi0053c/IHI0053C_acle_2_0.pdf
http://reviews.llvm.org/D4456
Files:
include/clang/Basic/LangOptions.def
include/clang/Driver/CC1Options.td
lib/CodeGen/CGCall.cpp
lib/CodeGen/CGExprScalar.cpp
lib/CodeGen/CodeGenTypes.cpp
lib/CodeGen/CodeGenTypes.h
lib/CodeGen/TargetInfo.cpp
lib/Driver/Tools.cpp
lib/Frontend/CompilerInvocation.cpp
lib/Sema/SemaType.cpp
test/CodeGen/arm64-aapcs-arguments.c
Index: include/clang/Basic/LangOptions.def
===================================================================
--- include/clang/Basic/LangOptions.def
+++ include/clang/Basic/LangOptions.def
@@ -126,6 +126,7 @@
LANGOPT(OpenCL , 1, 0, "OpenCL")
LANGOPT(OpenCLVersion , 32, 0, "OpenCL version")
LANGOPT(NativeHalfType , 1, 0, "Native half type support")
+LANGOPT(HalfArgsAndReturns, 1, 0, "Allow function arguments and returns of type half")
LANGOPT(CUDA , 1, 0, "CUDA")
LANGOPT(OpenMP , 1, 0, "OpenMP support")
Index: include/clang/Driver/CC1Options.td
===================================================================
--- include/clang/Driver/CC1Options.td
+++ include/clang/Driver/CC1Options.td
@@ -486,6 +486,8 @@
HelpText<"Control vtordisp placement on win32 targets">;
def fno_rtti_data : Flag<["-"], "fno-rtti-data">,
HelpText<"Control emission of RTTI data">;
+def fallow_half_arguments_and_returns : Flag<["-"], "fallow-half-arguments-and-returns">,
+ HelpText<"Allow function arguments and returns of type half">;
//===----------------------------------------------------------------------===//
// Header Search Options
Index: lib/CodeGen/CGCall.cpp
===================================================================
--- lib/CodeGen/CGCall.cpp
+++ lib/CodeGen/CGCall.cpp
@@ -512,11 +512,12 @@
// default now.
ABIArgInfo &retInfo = FI->getReturnInfo();
if (retInfo.canHaveCoerceToType() && retInfo.getCoerceToType() == nullptr)
- retInfo.setCoerceToType(ConvertType(FI->getReturnType()));
+ retInfo.setCoerceToType(ConvertType(FI->getReturnType(), /* isFunctionArgOrReturn */ true));
for (auto &I : FI->arguments())
if (I.info.canHaveCoerceToType() && I.info.getCoerceToType() == nullptr)
- I.info.setCoerceToType(ConvertType(I.type));
+ I.info.setCoerceToType(
+ ConvertType(I.type, /* isFunctionArgOrReturn */ true));
bool erased = FunctionsBeingProcessed.erase(FI); (void)erased;
assert(erased && "Not in set?");
Index: lib/CodeGen/CGExprScalar.cpp
===================================================================
--- lib/CodeGen/CGExprScalar.cpp
+++ lib/CodeGen/CGExprScalar.cpp
@@ -799,8 +799,17 @@
}
if (DstTy != ResTy) {
- assert(ResTy->isIntegerTy(16) && "Only half FP requires extra conversion");
- Res = Builder.CreateCall(CGF.CGM.getIntrinsic(llvm::Intrinsic::convert_to_fp16), Res);
+ if (ResTy->isIntegerTy(16)) {
+ Res = Builder.CreateCall(CGF.CGM.getIntrinsic(llvm::Intrinsic::convert_to_fp16), Res);
+ } else if (ResTy->isHalfTy()) {
+ // This is required for ARM, where half is a storage and interchange
+ // format only, but may appear in the IR in order to get the calling
+ // convention correct.
+ Res = Builder.CreateCall(CGF.CGM.getIntrinsic(llvm::Intrinsic::convert_to_fp16), Res);
+ Res = Builder.CreateBitCast(Res, ResTy);
+ } else {
+ llvm_unreachable("Only half FP requires extra conversion");
+ }
}
return Res;
Index: lib/CodeGen/CodeGenTypes.cpp
===================================================================
--- lib/CodeGen/CodeGenTypes.cpp
+++ lib/CodeGen/CodeGenTypes.cpp
@@ -288,7 +288,7 @@
}
/// ConvertType - Convert the specified type to its LLVM form.
-llvm::Type *CodeGenTypes::ConvertType(QualType T) {
+llvm::Type *CodeGenTypes::ConvertType(QualType T, bool isFunctionArgOrReturn) {
T = Context.getCanonicalType(T);
const Type *Ty = T.getTypePtr();
@@ -351,12 +351,18 @@
static_cast<unsigned>(Context.getTypeSize(T)));
break;
- case BuiltinType::Half:
- // Half FP can either be storage-only (lowered to i16) or native.
- ResultType = getTypeForFormat(getLLVMContext(),
- Context.getFloatTypeSemantics(T),
- Context.getLangOpts().NativeHalfType);
+ case BuiltinType::Half: {
+ // Half FP can either be storage-only (lowered to i16) or native. However,
+ // the ARM C Language Extensions specify that __fp16 can be used as a
+ // function argument or return, so we need to emit IR which uses the half
+ // type so that the backend can get the calling convention correct.
+ ResultType =
+ getTypeForFormat(getLLVMContext(), Context.getFloatTypeSemantics(T),
+ Context.getLangOpts().NativeHalfType ||
+ (Context.getLangOpts().HalfArgsAndReturns &&
+ isFunctionArgOrReturn));
break;
+ }
case BuiltinType::Float:
case BuiltinType::Double:
case BuiltinType::LongDouble:
@@ -398,7 +404,8 @@
case Type::Auto:
llvm_unreachable("Unexpected undeduced auto type!");
case Type::Complex: {
- llvm::Type *EltTy = ConvertType(cast<ComplexType>(Ty)->getElementType());
+ llvm::Type *EltTy = ConvertType(cast<ComplexType>(Ty)->getElementType(),
+ isFunctionArgOrReturn);
ResultType = llvm::StructType::get(EltTy, EltTy, NULL);
break;
}
@@ -462,8 +469,9 @@
case Type::ExtVector:
case Type::Vector: {
const VectorType *VT = cast<VectorType>(Ty);
- ResultType = llvm::VectorType::get(ConvertType(VT->getElementType()),
- VT->getNumElements());
+ ResultType = llvm::VectorType::get(
+ ConvertType(VT->getElementType(), false),
+ VT->getNumElements());
break;
}
case Type::FunctionNoProto:
@@ -537,7 +545,8 @@
}
case Type::ObjCObject:
- ResultType = ConvertType(cast<ObjCObjectType>(Ty)->getBaseType());
+ ResultType = ConvertType(cast<ObjCObjectType>(Ty)->getBaseType(),
+ isFunctionArgOrReturn);
break;
case Type::ObjCInterface: {
@@ -564,7 +573,7 @@
case Type::Enum: {
const EnumDecl *ED = cast<EnumType>(Ty)->getDecl();
if (ED->isCompleteDefinition() || ED->isFixed())
- return ConvertType(ED->getIntegerType());
+ return ConvertType(ED->getIntegerType(), isFunctionArgOrReturn);
// Return a placeholder 'i32' type. This can be changed later when the
// type is defined (see UpdateCompletedType), but is likely to be the
// "right" answer.
Index: lib/CodeGen/CodeGenTypes.h
===================================================================
--- lib/CodeGen/CodeGenTypes.h
+++ lib/CodeGen/CodeGenTypes.h
@@ -118,8 +118,8 @@
CGCXXABI &getCXXABI() const { return TheCXXABI; }
llvm::LLVMContext &getLLVMContext() { return TheModule.getContext(); }
- /// ConvertType - Convert type T into a llvm::Type.
- llvm::Type *ConvertType(QualType T);
+ /// ConvertType - Convert type T into an llvm::Type.
+ llvm::Type *ConvertType(QualType T, bool isFunctionArgOrReturn = false);
/// ConvertTypeForMem - Convert type T into a llvm::Type. This differs from
/// ConvertType in that it is used to convert to the memory representation for
Index: lib/CodeGen/TargetInfo.cpp
===================================================================
--- lib/CodeGen/TargetInfo.cpp
+++ lib/CodeGen/TargetInfo.cpp
@@ -3233,6 +3233,7 @@
static bool isHomogeneousAggregate(QualType Ty, const Type *&Base,
ASTContext &Context,
+ bool isAArch64,
uint64_t *HAMembers = nullptr);
ABIArgInfo AArch64ABIInfo::classifyArgumentType(QualType Ty,
@@ -3314,7 +3315,7 @@
// Homogeneous Floating-point Aggregates (HFAs) need to be expanded.
const Type *Base = nullptr;
uint64_t Members = 0;
- if (isHomogeneousAggregate(Ty, Base, getContext(), &Members)) {
+ if (isHomogeneousAggregate(Ty, Base, getContext(), true, &Members)) {
IsHA = true;
if (!IsNamedArg && isDarwinPCS()) {
// With the Darwin ABI, variadic arguments are always passed on the stack
@@ -3372,7 +3373,7 @@
return ABIArgInfo::getIgnore();
const Type *Base = nullptr;
- if (isHomogeneousAggregate(RetTy, Base, getContext()))
+ if (isHomogeneousAggregate(RetTy, Base, getContext(), true))
// Homogeneous Floating-point Aggregates (HFAs) are returned directly.
return ABIArgInfo::getDirect();
@@ -3509,7 +3510,7 @@
const Type *Base = nullptr;
uint64_t NumMembers;
- bool IsHFA = isHomogeneousAggregate(Ty, Base, Ctx, &NumMembers);
+ bool IsHFA = isHomogeneousAggregate(Ty, Base, Ctx, true, &NumMembers);
if (IsHFA && NumMembers > 1) {
// Homogeneous aggregates passed in registers will have their elements split
// and stored 16-bytes apart regardless of size (they're notionally in qN,
@@ -3652,7 +3653,7 @@
uint64_t Align = CGF.getContext().getTypeAlign(Ty) / 8;
const Type *Base = nullptr;
- bool isHA = isHomogeneousAggregate(Ty, Base, getContext());
+ bool isHA = isHomogeneousAggregate(Ty, Base, getContext(), true);
bool isIndirect = false;
// Arguments bigger than 16 bytes which aren't homogeneous aggregates should
@@ -3936,10 +3937,11 @@
/// contained in the type is returned through it; this is used for the
/// recursive calls that check aggregate component types.
static bool isHomogeneousAggregate(QualType Ty, const Type *&Base,
- ASTContext &Context, uint64_t *HAMembers) {
+ ASTContext &Context, bool isAArch64,
+ uint64_t *HAMembers) {
uint64_t Members = 0;
if (const ConstantArrayType *AT = Context.getAsConstantArrayType(Ty)) {
- if (!isHomogeneousAggregate(AT->getElementType(), Base, Context, &Members))
+ if (!isHomogeneousAggregate(AT->getElementType(), Base, Context, isAArch64, &Members))
return false;
Members *= AT->getSize().getZExtValue();
} else if (const RecordType *RT = Ty->getAs<RecordType>()) {
@@ -3950,7 +3952,7 @@
Members = 0;
for (const auto *FD : RD->fields()) {
uint64_t FldMembers;
- if (!isHomogeneousAggregate(FD->getType(), Base, Context, &FldMembers))
+ if (!isHomogeneousAggregate(FD->getType(), Base, Context, isAArch64, &FldMembers))
return false;
Members = (RD->isUnion() ?
@@ -3964,12 +3966,22 @@
}
// Homogeneous aggregates for AAPCS-VFP must have base types of float,
- // double, or 64-bit or 128-bit vectors.
+ // double, or 64-bit or 128-bit vectors. "long double" has the same machine
+ // type as double, so it is also allowed as a base type.
+ // Homogeneous aggregates for AAPCS64 must have base types of a floating
+ // point type or a short-vector type. This is the same as the 32-bit ABI,
+ // but with the difference that any floating-point type is allowed,
+ // including __fp16.
if (const BuiltinType *BT = Ty->getAs<BuiltinType>()) {
- if (BT->getKind() != BuiltinType::Float &&
- BT->getKind() != BuiltinType::Double &&
- BT->getKind() != BuiltinType::LongDouble)
- return false;
+ if (isAArch64) {
+ if (!BT->isFloatingPoint())
+ return false;
+ } else {
+ if (BT->getKind() != BuiltinType::Float &&
+ BT->getKind() != BuiltinType::Double &&
+ BT->getKind() != BuiltinType::LongDouble)
+ return false;
+ }
} else if (const VectorType *VT = Ty->getAs<VectorType>()) {
unsigned VecSize = Context.getTypeSize(VT);
if (VecSize != 64 && VecSize != 128)
@@ -4167,7 +4179,7 @@
// into VFP registers.
const Type *Base = nullptr;
uint64_t Members = 0;
- if (isHomogeneousAggregate(Ty, Base, getContext(), &Members)) {
+ if (isHomogeneousAggregate(Ty, Base, getContext(), false, &Members)) {
assert(Base && "Base class should be set for homogeneous aggregate");
// Base can be a floating-point or a vector.
if (Base->isVectorType()) {
@@ -4368,7 +4380,7 @@
// Check for homogeneous aggregates with AAPCS-VFP.
if (getABIKind() == AAPCS_VFP && !isVariadic) {
const Type *Base = nullptr;
- if (isHomogeneousAggregate(RetTy, Base, getContext())) {
+ if (isHomogeneousAggregate(RetTy, Base, getContext(), false)) {
assert(Base && "Base class should be set for homogeneous aggregate");
// Homogeneous Aggregates are returned directly.
return ABIArgInfo::getDirect();
Index: lib/Driver/Tools.cpp
===================================================================
--- lib/Driver/Tools.cpp
+++ lib/Driver/Tools.cpp
@@ -3481,6 +3481,12 @@
}
}
+ if (getToolChain().getTriple().getArch() == llvm::Triple::aarch64 ||
+ getToolChain().getTriple().getArch() == llvm::Triple::aarch64_be ||
+ getToolChain().getTriple().getArch() == llvm::Triple::arm64 ||
+ getToolChain().getTriple().getArch() == llvm::Triple::arm64_be)
+ CmdArgs.push_back("-fallow-half-arguments-and-returns");
+
if (Arg *A = Args.getLastArg(options::OPT_mrestrict_it,
options::OPT_mno_restrict_it)) {
if (A->getOption().matches(options::OPT_mrestrict_it)) {
Index: lib/Frontend/CompilerInvocation.cpp
===================================================================
--- lib/Frontend/CompilerInvocation.cpp
+++ lib/Frontend/CompilerInvocation.cpp
@@ -1512,6 +1512,7 @@
Opts.DebuggerObjCLiteral = Args.hasArg(OPT_fdebugger_objc_literal);
Opts.ApplePragmaPack = Args.hasArg(OPT_fapple_pragma_pack);
Opts.CurrentModule = Args.getLastArgValue(OPT_fmodule_name);
+ Opts.HalfArgsAndReturns = Args.hasArg(OPT_fallow_half_arguments_and_returns);
if (Arg *A = Args.getLastArg(OPT_faddress_space_map_mangling_EQ)) {
switch (llvm::StringSwitch<unsigned>(A->getValue())
Index: lib/Sema/SemaType.cpp
===================================================================
--- lib/Sema/SemaType.cpp
+++ lib/Sema/SemaType.cpp
@@ -1746,7 +1746,7 @@
}
// Functions cannot return half FP.
- if (T->isHalfType()) {
+ if (T->isHalfType() && !getLangOpts().HalfArgsAndReturns) {
Diag(Loc, diag::err_parameters_retval_cannot_have_fp16_type) << 1 <<
FixItHint::CreateInsertion(Loc, "*");
return true;
@@ -1776,7 +1776,7 @@
if (ParamType->isVoidType()) {
Diag(Loc, diag::err_param_with_void_type);
Invalid = true;
- } else if (ParamType->isHalfType()) {
+ } else if (ParamType->isHalfType() && !getLangOpts().HalfArgsAndReturns) {
// Disallow half FP arguments.
Diag(Loc, diag::err_parameters_retval_cannot_have_fp16_type) << 0 <<
FixItHint::CreateInsertion(Loc, "*");
@@ -2749,7 +2749,7 @@
S.Diag(D.getIdentifierLoc(), diag::err_opencl_half_return) << T;
D.setInvalidType(true);
}
- } else {
+ } else if (!S.getLangOpts().HalfArgsAndReturns) {
S.Diag(D.getIdentifierLoc(),
diag::err_parameters_retval_cannot_have_fp16_type) << 1;
D.setInvalidType(true);
@@ -2939,7 +2939,7 @@
D.setInvalidType();
Param->setInvalidDecl();
}
- } else {
+ } else if (!S.getLangOpts().HalfArgsAndReturns) {
S.Diag(Param->getLocation(),
diag::err_parameters_retval_cannot_have_fp16_type) << 0;
D.setInvalidType();
Index: test/CodeGen/arm64-aapcs-arguments.c
===================================================================
--- test/CodeGen/arm64-aapcs-arguments.c
+++ test/CodeGen/arm64-aapcs-arguments.c
@@ -1,4 +1,4 @@
-// RUN: %clang_cc1 -triple arm64-linux-gnu -target-feature +neon -target-abi aapcs -ffreestanding -emit-llvm -w -o - %s | FileCheck %s
+// RUN: %clang_cc1 -triple aarch64-linux-gnu -target-feature +neon -target-abi aapcs -ffreestanding -fallow-half-arguments-and-returns -emit-llvm -w -o - %s | FileCheck %s
// AAPCS clause C.8 says: If the argument has an alignment of 16 then the NGRN
// is rounded up to the next even number.
@@ -40,3 +40,12 @@
// CHECK: define i8 @test5(i8 %a, i16 %b)
unsigned char test5(unsigned char a, signed short b) {
}
+
+// __fp16 can be used as a function argument or return type (ACLE 2.0)
+// CHECK: define half @test_half(half %{{.*}})
+__fp16 test_half(__fp16 A) { }
+
+// __fp16 is a base type for homogeneous floating-point aggregates for AArch64 (but not 32-bit ARM).
+// CHECK: define %struct.HFA_half @test_half_hfa(half %{{.*}}, half %{{.*}}, half %{{.*}}, half %{{.*}})
+struct HFA_half { __fp16 a[4]; };
+struct HFA_half test_half_hfa(struct HFA_half A) { }
_______________________________________________
cfe-commits mailing list
[email protected]
http://lists.cs.uiuc.edu/mailman/listinfo/cfe-commits