On Oct 30, 2012, at 4:21 PM, manman ren <[email protected]> wrote: > Author: mren > Date: Tue Oct 30 18:21:41 2012 > New Revision: 167058 > > URL: http://llvm.org/viewvc/llvm-project?rev=167058&view=rev > Log: > ARM AAPCS-VFP: fix handling of homogeneous aggreate. > > If HA can only partially fit into VFP registers, we add padding to make sure > HA will be on stack and later VFP CPRCs will be on stack as well.
Thanks for working on this, Manman. I noticed while reviewing this that the AAPCS-VFP ABI is even trickier than I remembered. The VFP registers aren't simply allocated in order. If you skip over some registers due to alignment constraints, you may need to "backfill" those registers for later arguments. See test/CodeGen/ARM/arguments_f64_backfill.ll for an example of this. That means that the front-end is going to have to keep track of the registers available for backfilling so you can have an accurate count of how many remain available for homogeneous aggregates. > > Modified: > cfe/trunk/lib/CodeGen/TargetInfo.cpp > cfe/trunk/test/CodeGen/arm-homogenous.c > > Modified: cfe/trunk/lib/CodeGen/TargetInfo.cpp > URL: > http://llvm.org/viewvc/llvm-project/cfe/trunk/lib/CodeGen/TargetInfo.cpp?rev=167058&r1=167057&r2=167058&view=diff > ============================================================================== > --- cfe/trunk/lib/CodeGen/TargetInfo.cpp (original) > +++ cfe/trunk/lib/CodeGen/TargetInfo.cpp Tue Oct 30 18:21:41 2012 > @@ -2863,7 +2863,8 @@ > ABIKind getABIKind() const { return Kind; } > > ABIArgInfo classifyReturnType(QualType RetTy) const; > - ABIArgInfo classifyArgumentType(QualType RetTy) const; > + ABIArgInfo classifyArgumentType(QualType RetTy, unsigned &AllocatedVFP, > + bool &IsHA) const; > bool isIllegalVectorType(QualType Ty) const; > > virtual void computeInfo(CGFunctionInfo &FI) const; > @@ -2907,10 +2908,32 @@ > } > > void ARMABIInfo::computeInfo(CGFunctionInfo &FI) const { > + // To correctly handle Homogeneous Aggregate, we need to keep track of the > + // number of VFP registers allocated so far. > + // C.1.vfp If the argument is a VFP CPRC and there are sufficient > consecutive > + // VFP registers of the appropriate type unallocated then the argument is > + // allocated to the lowest-numbered sequence of such registers. > + // C.2.vfp If the argument is a VFP CPRC then any VFP registers that are > + // unallocated are marked as unavailable. > + unsigned AllocatedVFP = 0; > FI.getReturnInfo() = classifyReturnType(FI.getReturnType()); > for (CGFunctionInfo::arg_iterator it = FI.arg_begin(), ie = FI.arg_end(); > - it != ie; ++it) > - it->info = classifyArgumentType(it->type); > + it != ie; ++it) { > + unsigned PreAllocation = AllocatedVFP; > + bool IsHA = false; > + // 6.1.2.3 There is one VFP co-processor register class using registers > + // s0-s15 (d0-d7) for passing arguments. > + const unsigned NumVFPs = 16; > + it->info = classifyArgumentType(it->type, AllocatedVFP, IsHA); > + // If we do not have enough VFP registers for the HA, any VFP registers > + // that are unallocated are marked as unavailable. To achieve this, we > add > + // padding of (NumVFPs - PreAllocation) floats. > + if (IsHA && AllocatedVFP > NumVFPs && PreAllocation < NumVFPs) { > + llvm::Type *PaddingTy = llvm::ArrayType::get( > + llvm::Type::getFloatTy(getVMContext()), NumVFPs - PreAllocation); > + it->info = ABIArgInfo::getExpandWithPadding(false, PaddingTy); > + } > + } > > // Always honor user-specified calling convention. > if (FI.getCallingConvention() != llvm::CallingConv::C) > @@ -3012,7 +3035,17 @@ > return (Members > 0 && Members <= 4); > } > > -ABIArgInfo ARMABIInfo::classifyArgumentType(QualType Ty) const { > +ABIArgInfo ARMABIInfo::classifyArgumentType(QualType Ty, unsigned > &AllocatedVFP, > + bool &IsHA) const { > + // We update number of allocated VFPs according to > + // 6.1.2.1 The following argument types are VFP CPRCs: > + // A single-precision floating-point type (including promoted > + // half-precision types); A double-precision floating-point type; > + // A 64-bit or 128-bit containerized vector type; Homogeneous Aggregate > + // with a Base Type of a single- or double-precision floating-point type, > + // 64-bit containerized vectors or 128-bit containerized vectors with one > + // to four Elements. > + > // Handle illegal vector types here. > if (isIllegalVectorType(Ty)) { > uint64_t Size = getContext().getTypeSize(Ty); > @@ -3024,15 +3057,38 @@ > if (Size == 64) { > llvm::Type *ResType = llvm::VectorType::get( > llvm::Type::getInt32Ty(getVMContext()), 2); > + // Align AllocatedVFP to an even number to use a D register. > + AllocatedVFP = llvm::RoundUpToAlignment(AllocatedVFP, 2); > + AllocatedVFP += 2; // 1 D register = 2 S registers > return ABIArgInfo::getDirect(ResType); > } > if (Size == 128) { > llvm::Type *ResType = llvm::VectorType::get( > llvm::Type::getInt32Ty(getVMContext()), 4); > + AllocatedVFP = llvm::RoundUpToAlignment(AllocatedVFP, 4); > + AllocatedVFP += 4; // 1 Q register = 4 S registers > return ABIArgInfo::getDirect(ResType); > } > return ABIArgInfo::getIndirect(0, /*ByVal=*/false); > } > + // Update AllocatedVFP for legal vector types. > + if (const VectorType *VT = Ty->getAs<VectorType>()) { > + uint64_t Size = getContext().getTypeSize(VT); > + // Size of a legal vector should be power of 2 and above 64. > + AllocatedVFP = llvm::RoundUpToAlignment(AllocatedVFP, Size >= 128 ? 4 : > 2); > + AllocatedVFP += (Size / 32); > + } > + // Update AllocatedVFP for floating point types. > + if (const BuiltinType *BT = Ty->getAs<BuiltinType>()) { > + if (BT->getKind() == BuiltinType::Half || > + BT->getKind() == BuiltinType::Float) > + AllocatedVFP += 1; > + if (BT->getKind() == BuiltinType::Double || > + BT->getKind() == BuiltinType::LongDouble) { > + AllocatedVFP = llvm::RoundUpToAlignment(AllocatedVFP, 2); > + AllocatedVFP += 2; > + } > + } > > if (!isAggregateTypeForABI(Ty)) { > // Treat an enum type as its underlying type. > @@ -3053,10 +3109,28 @@ > return ABIArgInfo::getIndirect(0, /*ByVal=*/false); > > if (getABIKind() == ARMABIInfo::AAPCS_VFP) { > - // Homogeneous Aggregates need to be expanded. > + // Homogeneous Aggregates need to be expanded when we can fit the > aggregate > + // into VFP registers. > const Type *Base = 0; > - if (isHomogeneousAggregate(Ty, Base, getContext())) { > + uint64_t Members = 0; > + if (isHomogeneousAggregate(Ty, Base, getContext(), &Members)) { > assert(Base && "Base class should be set for homogeneous aggregate"); > + // Base can be a floating-point or a vector. > + if (Base->isVectorType()) { > + // ElementSize is in number of floats. > + unsigned ElementSize = getContext().getTypeSize(Base) == 64 ? 2 : 4; > + AllocatedVFP = llvm::RoundUpToAlignment(AllocatedVFP, > + ElementSize); > + AllocatedVFP += Members * ElementSize; > + } else if (Base->isSpecificBuiltinType(BuiltinType::Float)) > + AllocatedVFP += Members; > + else { > + assert(Base->isSpecificBuiltinType(BuiltinType::Double) || > + Base->isSpecificBuiltinType(BuiltinType::LongDouble)); > + AllocatedVFP = llvm::RoundUpToAlignment(AllocatedVFP, 2); > + AllocatedVFP += Members * 2; // Base type is double. > + } > + IsHA = true; > return ABIArgInfo::getExpand(); > } > } > > Modified: cfe/trunk/test/CodeGen/arm-homogenous.c > URL: > http://llvm.org/viewvc/llvm-project/cfe/trunk/test/CodeGen/arm-homogenous.c?rev=167058&r1=167057&r2=167058&view=diff > ============================================================================== > --- cfe/trunk/test/CodeGen/arm-homogenous.c (original) > +++ cfe/trunk/test/CodeGen/arm-homogenous.c Tue Oct 30 18:21:41 2012 > @@ -156,6 +156,40 @@ > } > // CHECK: declare arm_aapcs_vfpcc > %union.union_with_struct_with_fundamental_elems > @returns_union_with_struct_with_fundamental_elems() > > +// Make sure HAs that can be partially fit into VFP registers will be > allocated > +// on stack and that later VFP candidates will go on stack as well. > +typedef struct { > + double x; > + double a2; > + double a3; > + double a4; > +} struct_of_four_doubles; > +extern void takes_struct_of_four_doubles(double a, struct_of_four_doubles b, > struct_of_four_doubles c, double d); > +struct_of_four_doubles g_s4d; > + > +void test_struct_of_four_doubles(void) { > +// CHECK: test_struct_of_four_doubles > +// CHECK: call arm_aapcs_vfpcc void @takes_struct_of_four_doubles(double > {{.*}}, double {{.*}}, double {{.*}}, double {{.*}}, double {{.*}}, [6 x > float] undef, double {{.*}}, double {{.*}}, double {{.*}}, double {{.*}}, > double {{.*}}) > + takes_struct_of_four_doubles(3.0, g_s4d, g_s4d, 4.0); > +} > + > +typedef __attribute__(( ext_vector_type(8) )) char __char8; > +typedef __attribute__(( ext_vector_type(4) )) short __short4; > +typedef struct { > + __char8 a1; > + __short4 a2; > + __char8 a3; > + __short4 a4; > +} struct_of_vecs; > +extern void takes_struct_of_vecs(double a, struct_of_vecs b, struct_of_vecs > c, double d); > +struct_of_vecs g_vec; > + > +void test_struct_of_vecs(void) { > +// CHECK: test_struct_of_vecs > +// CHECK: call arm_aapcs_vfpcc void @takes_struct_of_vecs(double {{.*}}, <8 > x i8> {{.*}}, <4 x i16> {{.*}}, <8 x i8> {{.*}}, <4 x i16> {{.*}}, [6 x > float] undef, <8 x i8> {{.*}}, <4 x i16> {{.*}}, <8 x i8> {{.*}}, <4 x i16> > {{.*}}, double {{.*}}) > + takes_struct_of_vecs(3.0, g_vec, g_vec, 4.0); > +} > + > // FIXME: Tests necessary: > // - Vectors > // - C++ stuff > > > _______________________________________________ > cfe-commits mailing list > [email protected] > http://lists.cs.uiuc.edu/mailman/listinfo/cfe-commits _______________________________________________ cfe-commits mailing list [email protected] http://lists.cs.uiuc.edu/mailman/listinfo/cfe-commits
