https://github.com/vortex73 updated https://github.com/llvm/llvm-project/pull/194718
>From ff5977ac640a9d59c13353484c700603fae25f87 Mon Sep 17 00:00:00 2001 From: Narayan Sreekumar <[email protected]> Date: Wed, 29 Apr 2026 01:56:09 +0530 Subject: [PATCH 1/2] feat: Implement the SysV ABI for the LLVM ABI Library. --- clang/lib/CodeGen/CodeGenModule.cpp | 41 +- clang/test/CodeGen/X86/avx512fp16-abi.c | 2 + clang/test/CodeGen/X86/bfloat-half-abi.c | 1 + clang/test/CodeGen/X86/fp128-abi.c | 1 + llvm/include/llvm/ABI/TargetInfo.h | 15 + llvm/include/llvm/ABI/Types.h | 5 + llvm/lib/ABI/CMakeLists.txt | 1 + llvm/lib/ABI/Targets/X86.cpp | 1428 ++++++++++++++++++++++ llvm/lib/ABI/Types.cpp | 48 + 9 files changed, 1537 insertions(+), 5 deletions(-) create mode 100644 llvm/lib/ABI/Targets/X86.cpp diff --git a/clang/lib/CodeGen/CodeGenModule.cpp b/clang/lib/CodeGen/CodeGenModule.cpp index 2d91b7eaa52dc..685d496139acc 100644 --- a/clang/lib/CodeGen/CodeGenModule.cpp +++ b/clang/lib/CodeGen/CodeGenModule.cpp @@ -344,7 +344,8 @@ bool CodeGenModule::shouldUseLLVMABILowering() const { return false; // Only opt in for targets that have an LLVMABI implementation; others // continue through the legacy ABIInfo path. - return getTriple().isBPF(); + const llvm::Triple &T = getTriple(); + return T.isBPF() || (T.getArch() == llvm::Triple::x86_64 && T.isOSLinux()); } const llvm::abi::TargetInfo & @@ -352,10 +353,40 @@ CodeGenModule::getLLVMABITargetInfo(llvm::abi::TypeBuilder &TB) { if (TheLLVMABITargetInfo) return *TheLLVMABITargetInfo; - assert(getTriple().isBPF() && - "LLVMABI lowering requested for an unsupported target"); - TheLLVMABITargetInfo = llvm::abi::createBPFTargetInfo(TB); - return *TheLLVMABITargetInfo; + const llvm::Triple &T = getTriple(); + if (T.isBPF()) { + TheLLVMABITargetInfo = llvm::abi::createBPFTargetInfo(TB); + return *TheLLVMABITargetInfo; + } + + if (T.getArch() == llvm::Triple::x86_64) { + StringRef ABI = getTarget().getABI(); + llvm::abi::X86AVXABILevel AVXLevel = + ABI == "avx512" ? llvm::abi::X86AVXABILevel::AVX512 + : ABI == "avx" ? llvm::abi::X86AVXABILevel::AVX + : llvm::abi::X86AVXABILevel::None; + + llvm::abi::ABICompatInfo CompatInfo; + LangOptions::ClangABI Compat = getLangOpts().getClangABICompat(); + CompatInfo.ClassifyIntegerMMXAsSSE = + Compat > LangOptions::ClangABI::Ver3_8 && !T.isOSDarwin() && + !T.isPS() && !T.isOSFreeBSD(); + CompatInfo.HonorsRevision98 = !T.isOSDarwin(); + CompatInfo.PassInt128VectorsInMem = Compat > LangOptions::ClangABI::Ver9 && + (T.isOSLinux() || T.isOSNetBSD()); + CompatInfo.ReturnCXXRecordGreaterThan128InMem = + Compat > LangOptions::ClangABI::Ver20; + CompatInfo.Clang11Compat = + Compat <= LangOptions::ClangABI::Ver11 || T.isPS(); + + bool Has64BitPointers = getTarget().getPointerWidth(LangAS::Default) == 64; + + TheLLVMABITargetInfo = llvm::abi::createX8664TargetInfo( + TB, T, AVXLevel, Has64BitPointers, CompatInfo); + return *TheLLVMABITargetInfo; + } + + llvm_unreachable("LLVMABI lowering requested for an unsupported target"); } static void checkDataLayoutConsistency(const TargetInfo &Target, diff --git a/clang/test/CodeGen/X86/avx512fp16-abi.c b/clang/test/CodeGen/X86/avx512fp16-abi.c index 1856f1e22715c..5daecd3c6a76e 100644 --- a/clang/test/CodeGen/X86/avx512fp16-abi.c +++ b/clang/test/CodeGen/X86/avx512fp16-abi.c @@ -1,5 +1,7 @@ // RUN: %clang_cc1 -triple x86_64-linux -emit-llvm -target-feature +avx512fp16 < %s | FileCheck %s --check-prefixes=CHECK,CHECK-C // RUN: %clang_cc1 -triple x86_64-linux -emit-llvm -target-feature +avx512fp16 -x c++ -std=c++11 < %s | FileCheck %s --check-prefixes=CHECK,CHECK-CPP +// RUN: %clang_cc1 -triple x86_64-linux -emit-llvm -target-feature +avx512fp16 -fexperimental-abi-lowering < %s | FileCheck %s --check-prefixes=CHECK,CHECK-C +// RUN: %clang_cc1 -triple x86_64-linux -emit-llvm -target-feature +avx512fp16 -x c++ -std=c++11 -fexperimental-abi-lowering < %s | FileCheck %s --check-prefixes=CHECK,CHECK-CPP struct half1 { _Float16 a; diff --git a/clang/test/CodeGen/X86/bfloat-half-abi.c b/clang/test/CodeGen/X86/bfloat-half-abi.c index 42250791848ac..59ce742f5181a 100644 --- a/clang/test/CodeGen/X86/bfloat-half-abi.c +++ b/clang/test/CodeGen/X86/bfloat-half-abi.c @@ -1,4 +1,5 @@ // RUN: %clang_cc1 -triple x86_64-linux -emit-llvm -target-feature +sse2 < %s | FileCheck %s --check-prefixes=CHECK +// RUN: %clang_cc1 -triple x86_64-linux -emit-llvm -target-feature +sse2 -fexperimental-abi-lowering < %s | FileCheck %s --check-prefixes=CHECK struct bfloat1 { __bf16 a; diff --git a/clang/test/CodeGen/X86/fp128-abi.c b/clang/test/CodeGen/X86/fp128-abi.c index 2a0ae5009338a..564a0b5912f60 100644 --- a/clang/test/CodeGen/X86/fp128-abi.c +++ b/clang/test/CodeGen/X86/fp128-abi.c @@ -1,5 +1,6 @@ // RUN: %clang_cc1 -triple x86_64-linux -emit-llvm -target-feature +sse2 < %s | FileCheck %s --check-prefixes=CHECK // RUN: %clang_cc1 -triple x86_64-linux -emit-llvm -target-feature -sse2 < %s | FileCheck %s --check-prefixes=CHECK +// RUN: %clang_cc1 -triple x86_64-linux -emit-llvm -target-feature +sse2 -fexperimental-abi-lowering < %s | FileCheck %s --check-prefixes=CHECK struct st1 { __float128 a; diff --git a/llvm/include/llvm/ABI/TargetInfo.h b/llvm/include/llvm/ABI/TargetInfo.h index f4feb3b164fbb..c09f4a153aaf5 100644 --- a/llvm/include/llvm/ABI/TargetInfo.h +++ b/llvm/include/llvm/ABI/TargetInfo.h @@ -16,7 +16,9 @@ #include "llvm/ABI/FunctionInfo.h" #include "llvm/ABI/Types.h" +#include "llvm/TargetParser/Triple.h" #include <cassert> +#include <memory> namespace llvm { namespace abi { @@ -83,6 +85,19 @@ class TargetInfo { std::unique_ptr<TargetInfo> createBPFTargetInfo(TypeBuilder &TB); +/// The AVX ABI level for X86 targets. +enum class X86AVXABILevel { + None, + AVX, + AVX512, +}; + +std::unique_ptr<TargetInfo> createX8664TargetInfo(TypeBuilder &TB, + const Triple &TargetTriple, + X86AVXABILevel AVXLevel, + bool Has64BitPointers, + const ABICompatInfo &Compat); + } // namespace abi } // namespace llvm diff --git a/llvm/include/llvm/ABI/Types.h b/llvm/include/llvm/ABI/Types.h index b66f28212fa4f..0a13af84891f4 100644 --- a/llvm/include/llvm/ABI/Types.h +++ b/llvm/include/llvm/ABI/Types.h @@ -310,6 +310,11 @@ class RecordType : public Type { bool isEmpty() const; + /// Returns the field, base, or virtual base whose extent contains + /// \p OffsetInBits, or nullptr if no such element exists. Empty bases and + /// unnamed bitfields are skipped. + const FieldInfo *getElementContainingOffset(unsigned OffsetInBits) const; + static bool classof(const Type *T) { return T->getKind() == TypeKind::Record; } diff --git a/llvm/lib/ABI/CMakeLists.txt b/llvm/lib/ABI/CMakeLists.txt index a5c102b57d074..47ce5926e6039 100644 --- a/llvm/lib/ABI/CMakeLists.txt +++ b/llvm/lib/ABI/CMakeLists.txt @@ -4,6 +4,7 @@ add_llvm_component_library(LLVMABI TargetInfo.cpp IRTypeMapper.cpp Targets/BPF.cpp + Targets/X86.cpp ADDITIONAL_HEADER_DIRS ${LLVM_MAIN_INCLUDE_DIR}/llvm/ABI diff --git a/llvm/lib/ABI/Targets/X86.cpp b/llvm/lib/ABI/Targets/X86.cpp new file mode 100644 index 0000000000000..2255390a68cd1 --- /dev/null +++ b/llvm/lib/ABI/Targets/X86.cpp @@ -0,0 +1,1428 @@ +//===- X86.cpp ------------------------------------------------------------===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// + +#include "llvm/ABI/FunctionInfo.h" +#include "llvm/ABI/TargetInfo.h" +#include "llvm/ABI/Types.h" +#include "llvm/Support/Alignment.h" +#include "llvm/Support/Casting.h" +#include "llvm/Support/ErrorHandling.h" +#include "llvm/Support/MathExtras.h" +#include "llvm/Support/TypeSize.h" +#include "llvm/TargetParser/Triple.h" +#include <algorithm> +#include <cassert> +#include <cstdint> + +namespace llvm { +namespace abi { + +static unsigned getNativeVectorSizeForAVXABI(X86AVXABILevel AVXLevel) { + switch (AVXLevel) { + case X86AVXABILevel::AVX512: + return 512; + case X86AVXABILevel::AVX: + return 256; + case X86AVXABILevel::None: + return 128; + } + llvm_unreachable("Unknown AVXLevel"); +} + +class X86_64TargetInfo : public TargetInfo { +public: + enum Class { + Integer = 0, + SSE, + SSEUp, + X87, + X87UP, + Complex_X87, + NoClass, + Memory + }; + +private: + TypeBuilder &TB; + X86AVXABILevel AVXLevel; + bool Has64BitPointers; + + static Class merge(Class Accum, Class Field); + + void postMerge(unsigned AggregateSize, Class &Lo, Class &Hi) const; + + void classify(const Type *T, uint64_t OffsetBase, Class &Lo, Class &Hi, + bool IsNamedArg, bool IsRegCall = false) const; + + const Type *getIntegerTypeAtOffset(const Type *IRType, unsigned IROffset, + const Type *SourceTy, + unsigned SourceOffset, + bool InMemory = false) const; + + const Type *getSSETypeAtOffset(const Type *ABIType, unsigned ABIOffset, + const Type *SourceTy, + unsigned SourceOffset) const; + bool isIllegalVectorType(const Type *Ty) const; + bool containsMatrixField(const RecordType *RT) const; + + void computeInfo(FunctionInfo &FI) const override; + ArgInfo getIndirectReturnResult(const Type *Ty) const; + const Type *getFPTypeAtOffset(const Type *Ty, unsigned Offset) const; + + const Type *isSingleElementStruct(const Type *Ty) const; + const Type *getByteVectorType(const Type *Ty) const; + + const Type *createPairType(const Type *Lo, const Type *Hi) const; + ArgInfo getIndirectResult(const Type *Ty, unsigned FreeIntRegs) const; + + ArgInfo classifyReturnType(const Type *RetTy) const; + + ArgInfo classifyArgumentType(const Type *Ty, unsigned FreeIntRegs, + unsigned &NeededInt, unsigned &NeededSse, + bool IsNamedArg, bool IsRegCall = false) const; + const Type *useFirstFieldIfTransparentUnion(const Type *Ty) const; + +public: + X86_64TargetInfo(TypeBuilder &TypeBuilder, const Triple & /*Triple*/, + X86AVXABILevel AVXABILevel, bool Has64BitPtrs, + const ABICompatInfo &Compat) + : TargetInfo(Compat), TB(TypeBuilder), AVXLevel(AVXABILevel), + Has64BitPointers(Has64BitPtrs) {} + + bool has64BitPointers() const { return Has64BitPointers; } +}; + +// Gets the "best" type to represent the union. +static const Type *reduceUnionForX8664(const RecordType *UnionType, + TypeBuilder &TB) { + assert(UnionType->isUnion() && "Expected union type"); + + ArrayRef<FieldInfo> Fields = UnionType->getFields(); + if (Fields.empty()) { + return nullptr; + } + + const Type *StorageType = nullptr; + + for (const auto &Field : Fields) { + if (Field.IsBitField && Field.IsUnnamedBitfield && + Field.BitFieldWidth == 0) { + continue; + } + + const Type *FieldType = Field.FieldType; + + if (UnionType->isTransparentUnion() && !StorageType) { + StorageType = FieldType; + break; + } + + if (!StorageType || + FieldType->getAlignment() > StorageType->getAlignment() || + (FieldType->getAlignment() == StorageType->getAlignment() && + TypeSize::isKnownGT(FieldType->getSizeInBits(), + StorageType->getSizeInBits()))) { + StorageType = FieldType; + } + } + return StorageType; +} + +void X86_64TargetInfo::postMerge(unsigned AggregateSize, Class &Lo, + Class &Hi) const { + // AMD64-ABI 3.2.3p2: Rule 5. Then a post merger cleanup is done: + // + // (a) If one of the classes is Memory, the whole argument is passed in + // memory. + // + // (b) If X87UP is not preceded by X87, the whole argument is passed in + // memory. + // + // (c) If the size of the aggregate exceeds two eightbytes and the first + // eightbyte isn't SSE or any other eightbyte isn't SSEUP, the whole + // argument is passed in memory. NOTE: This is necessary to keep the + // ABI working for processors that don't support the __m256 type. + // + // (d) If SSEUP is not preceded by SSE or SSEUP, it is converted to SSE. + // + // Some of these are enforced by the merging logic. Others can arise + // only with unions; for example: + // union { _Complex double; unsigned; } + // + // Note that clauses (b) and (c) were added in 0.98. + + if (Hi == Memory) + Lo = Memory; + if (Hi == X87UP && Lo != X87 && getABICompatInfo().HonorsRevision98) + Lo = Memory; + if (AggregateSize > 128 && (Lo != SSE || Hi != SSEUp)) + Lo = Memory; + if (Hi == SSEUp && Lo != SSE) + Hi = SSE; +} +X86_64TargetInfo::Class X86_64TargetInfo::merge(Class Accum, Class Field) { + // AMD64-ABI 3.2.3p2: Rule 4. Each field of an object is + // classified recursively so that always two fields are + // considered. The resulting class is calculated according to + // the classes of the fields in the eightbyte: + // + // (a) If both classes are equal, this is the resulting class. + // + // (b) If one of the classes is NO_CLASS, the resulting class is + // the other class. + // + // (c) If one of the classes is MEMORY, the result is the MEMORY + // class. + // + // (d) If one of the classes is INTEGER, the result is the + // INTEGER. + // + // (e) If one of the classes is X87, X87UP, COMPLEX_X87 class, + // MEMORY is used as class. + // + // (f) Otherwise class SSE is used. + + // Accum should never be memory (we should have returned) or + // ComplexX87 (because this cannot be passed in a structure). + assert((Accum != Memory && Accum != Complex_X87) && + "Invalid accumulated classification during merge."); + + if (Accum == Field || Field == NoClass) + return Accum; + if (Field == Memory) + return Memory; + if (Accum == NoClass) + return Field; + if (Accum == Integer || Field == Integer) + return Integer; + if (Field == X87 || Field == X87UP || Field == Complex_X87 || Accum == X87 || + Accum == X87UP) + return Memory; + + return SSE; +} + +bool X86_64TargetInfo::containsMatrixField(const RecordType *RT) const { + for (const auto &Field : RT->getFields()) { + const Type *FieldType = Field.FieldType; + + if (const auto *AT = dyn_cast<ArrayType>(FieldType)) { + if (AT->isMatrixType()) + return true; + continue; + } + + if (const auto *NestedRT = dyn_cast<RecordType>(FieldType)) + if (containsMatrixField(NestedRT)) + return true; + } + return false; +} + +void X86_64TargetInfo::classify(const Type *T, uint64_t OffsetBase, Class &Lo, + Class &Hi, bool IsNamedArg, + bool IsRegCall) const { + Lo = Hi = NoClass; + Class &Current = OffsetBase < 64 ? Lo : Hi; + Current = Memory; + + if (T->isVoid()) { + Current = NoClass; + return; + } + + if (const auto *IT = dyn_cast<IntegerType>(T)) { + auto BitWidth = IT->getSizeInBits().getFixedValue(); + + if (BitWidth == 128 || + (IT->isBitInt() && BitWidth > 64 && BitWidth <= 128)) { + Lo = Integer; + Hi = Integer; + } else if (BitWidth <= 64) + Current = Integer; + + return; + } + + if (const auto *FT = dyn_cast<FloatType>(T)) { + const auto *FltSem = FT->getSemantics(); + + if (FltSem == &llvm::APFloat::IEEEsingle() || + FltSem == &llvm::APFloat::IEEEdouble() || + FltSem == &llvm::APFloat::IEEEhalf() || + FltSem == &llvm::APFloat::BFloat()) { + Current = SSE; + } else if (FltSem == &llvm::APFloat::IEEEquad()) { + Lo = SSE; + Hi = SSEUp; + } else if (FltSem == &llvm::APFloat::x87DoubleExtended()) { + Lo = X87; + Hi = X87UP; + } else + Current = SSE; + return; + } + if (T->isPointer()) { + Current = Integer; + return; + } + + if (const auto *MPT = dyn_cast<MemberPointerType>(T)) { + if (MPT->isFunctionPointer()) { + if (Has64BitPointers) { + Lo = Hi = Integer; + } else { + uint64_t EbFuncPtr = OffsetBase / 64; + uint64_t EbThisAdj = (OffsetBase + 64 - 1) / 64; + if (EbFuncPtr != EbThisAdj) { + Lo = Hi = Integer; + } else + Current = Integer; + } + } else + Current = Integer; + return; + } + + if (const auto *VT = dyn_cast<VectorType>(T)) { + auto Size = VT->getSizeInBits().getFixedValue(); + const Type *ElementType = VT->getElementType(); + + if (Size == 1 || Size == 8 || Size == 16 || Size == 32) { + // gcc passes the following as integer: + // 4 bytes - <4 x char>, <2 x short>, <1 x int>, <1 x float> + // 2 bytes - <2 x char>, <1 x short> + // 1 byte - <1 x char> + Current = Integer; + // If this type crosses an eightbyte boundary, it should be + // split. + uint64_t EbLo = (OffsetBase) / 64; + uint64_t EbHi = (OffsetBase + Size - 1) / 64; + if (EbLo != EbHi) + Hi = Lo; + } else if (Size == 64) { + if (const auto *FT = dyn_cast<FloatType>(ElementType)) { + // gcc passes <1 x double> in memory. :( + if (FT->getSemantics() == &llvm::APFloat::IEEEdouble()) + return; + } + + // gcc passes <1 x long long> as SSE but clang used to unconditionally + // pass them as integer. For platforms where clang is the de facto + // platform compiler, we must continue to use integer. + if (const auto *IT = dyn_cast<IntegerType>(ElementType)) { + uint64_t ElemBits = IT->getSizeInBits().getFixedValue(); + if (!getABICompatInfo().ClassifyIntegerMMXAsSSE && + (ElemBits == 64 || ElemBits == 32)) { + Current = Integer; + } else + Current = SSE; + } else + Current = SSE; + // If this type crosses an eightbyte boundary, it should be + // split. + if (OffsetBase && OffsetBase != 64) + Hi = Lo; + } else if (Size == 128 || + (IsNamedArg && Size <= getNativeVectorSizeForAVXABI(AVXLevel))) { + if (const auto *IT = dyn_cast<IntegerType>(ElementType)) { + uint64_t ElemBits = IT->getSizeInBits().getFixedValue(); + // gcc passes 256 and 512 bit <X x __int128> vectors in memory. :( + if (getABICompatInfo().PassInt128VectorsInMem && Size != 128 && + ElemBits == 128) + return; + } + + // Arguments of 256-bits are split into four eightbyte chunks. The + // least significant one belongs to class SSE and all the others to class + // SSEUP. The original Lo and Hi design considers that types can't be + // greater than 128-bits, so a 64-bit split in Hi and Lo makes sense. + // This design isn't correct for 256-bits, but since there're no cases + // where the upper parts would need to be inspected, avoid adding + // complexity and just consider Hi to match the 64-256 part. + // + // Note that per 3.5.7 of AMD64-ABI, 256-bit args are only passed in + // registers if they are "named", i.e. not part of the "..." of a + // variadic function. + // + // Similarly, per 3.2.3. of the AVX512 draft, 512-bits ("named") args are + // split into eight eightbyte chunks, one SSE and seven SSEUP. + Lo = SSE; + Hi = SSEUp; + } + return; + } + + if (const auto *CT = dyn_cast<ComplexType>(T)) { + const Type *ElementType = CT->getElementType(); + uint64_t Size = T->getSizeInBits().getFixedValue(); + + if (isa<IntegerType>(ElementType)) { + if (Size <= 64) + Current = Integer; + else if (Size <= 128) + Lo = Hi = Integer; + } else if (const auto *EFT = dyn_cast<FloatType>(ElementType)) { + const auto *FltSem = EFT->getSemantics(); + if (FltSem == &llvm::APFloat::IEEEhalf() || + FltSem == &llvm::APFloat::IEEEsingle() || + FltSem == &llvm::APFloat::BFloat()) + Current = SSE; + else if (FltSem == &llvm::APFloat::IEEEquad()) + Current = Memory; + else if (FltSem == &llvm::APFloat::x87DoubleExtended()) + Current = Complex_X87; + else if (FltSem == &llvm::APFloat::IEEEdouble()) + Lo = Hi = SSE; + else + llvm_unreachable("Unexpected long double representation!"); + } + + uint64_t ElementSize = ElementType->getSizeInBits().getFixedValue(); + // If this complex type crosses an eightbyte boundary then it + // should be split. + uint64_t EbReal = OffsetBase / 64; + uint64_t EbImag = (OffsetBase + ElementSize) / 64; + if (Hi == NoClass && EbReal != EbImag) + Hi = Lo; + + return; + } + + if (const auto *AT = dyn_cast<ArrayType>(T)) { + uint64_t Size = AT->getSizeInBits().getFixedValue(); + + if (!IsRegCall && Size > 512) + return; + + const Type *ElementType = AT->getElementType(); + uint64_t ElemAlign = ElementType->getAlignment().value() * 8; + if (OffsetBase % ElemAlign) + return; + + Current = NoClass; + uint64_t EltSize = ElementType->getSizeInBits().getFixedValue(); + uint64_t ArraySize = AT->getNumElements(); + + if (Size > 128 && + (Size != EltSize || Size > getNativeVectorSizeForAVXABI(AVXLevel))) + return; + + for (uint64_t I = 0, Offset = OffsetBase; I < ArraySize; + ++I, Offset += EltSize) { + Class FieldLo, FieldHi; + classify(ElementType, Offset, FieldLo, FieldHi, IsNamedArg); + Lo = merge(Lo, FieldLo); + Hi = merge(Hi, FieldHi); + if (Lo == Memory || Hi == Memory) + break; + } + postMerge(Size, Lo, Hi); + assert((Hi != SSEUp || Lo == SSE) && "Invalid SSEUp array classification."); + return; + } + if (const auto *RT = dyn_cast<RecordType>(T)) { + uint64_t Size = RT->getSizeInBits().getFixedValue(); + + if (containsMatrixField(RT)) { + Lo = Memory; + return; + } + + // AMD64-ABI 3.2.3p2: Rule 1. If the size of an object is larger + // than eight eightbytes, ..., it has class MEMORY. + if (Size > 512) + return; + + // AMD64-ABI 3.2.3p2: Rule 2. If a C++ object has either a non-trivial + // copy constructor or a non-trivial destructor, it is passed by invisible + // reference. + if (getRecordArgABI(RT)) + return; + + // Assume variable sized types are passed in memory. + if (RT->hasFlexibleArrayMember()) + return; + + // Reset Lo class, this will be recomputed. + Current = NoClass; + + // If this is a C++ record, classify the bases first. + if (RT->isCXXRecord()) { + for (const auto &Base : RT->getBaseClasses()) { + + // Classify this field. + // + // AMD64-ABI 3.2.3p2: Rule 3. If the size of the aggregate exceeds a + // single eightbyte, each is classified separately. Each eightbyte gets + // initialized to class NO_CLASS. + Class FieldLo, FieldHi; + uint64_t Offset = OffsetBase + Base.OffsetInBits; + classify(Base.FieldType, Offset, FieldLo, FieldHi, IsNamedArg); + Lo = merge(Lo, FieldLo); + Hi = merge(Hi, FieldHi); + + if (getABICompatInfo().ReturnCXXRecordGreaterThan128InMem && + (Size > 128 && + (Size != Base.FieldType->getSizeInBits().getFixedValue() || + Size > getNativeVectorSizeForAVXABI(AVXLevel)))) + Lo = Memory; + + if (Lo == Memory || Hi == Memory) { + postMerge(Size, Lo, Hi); + return; + } + } + } + + // Classify the fields one at a time, merging the results. + + bool IsUnion = RT->isUnion() && !getABICompatInfo().Clang11Compat; + for (const auto &Field : RT->getFields()) { + uint64_t Offset = OffsetBase + Field.OffsetInBits; + bool BitField = Field.IsBitField; + + if (BitField && Field.IsUnnamedBitfield) + continue; + + if (Size > 128 && + ((!IsUnion && + Size != Field.FieldType->getSizeInBits().getFixedValue()) || + Size > getNativeVectorSizeForAVXABI(AVXLevel))) { + Lo = Memory; + postMerge(Size, Lo, Hi); + return; + } + + bool IsInMemory = Offset % (Field.FieldType->getAlignment().value() * 8); + if (!BitField && IsInMemory) { + Lo = Memory; + postMerge(Size, Lo, Hi); + return; + } + + Class FieldLo, FieldHi; + + if (BitField) { + uint64_t BitFieldSize = Field.BitFieldWidth; + uint64_t EbLo = Offset / 64; + uint64_t EbHi = (Offset + BitFieldSize - 1) / 64; + + if (EbLo) { + assert(EbHi == EbLo && "Invalid classification, type > 16 bytes."); + FieldLo = NoClass; + FieldHi = Integer; + } else { + FieldLo = Integer; + FieldHi = EbHi ? Integer : NoClass; + } + } else + classify(Field.FieldType, Offset, FieldLo, FieldHi, IsNamedArg); + + Lo = merge(Lo, FieldLo); + Hi = merge(Hi, FieldHi); + if (Lo == Memory || Hi == Memory) + break; + } + postMerge(Size, Lo, Hi); + return; + } + + Lo = Memory; + Hi = NoClass; +} + +const Type * +X86_64TargetInfo::useFirstFieldIfTransparentUnion(const Type *Ty) const { + if (const auto *RT = dyn_cast<RecordType>(Ty)) { + if (RT->isUnion() && RT->isTransparentUnion()) { + auto Fields = RT->getFields(); + assert(!Fields.empty() && "sema created an empty transparent union"); + return Fields.front().FieldType; + } + } + return Ty; +} + +ArgInfo +X86_64TargetInfo::classifyArgumentType(const Type *Ty, unsigned FreeIntRegs, + unsigned &NeededInt, unsigned &NeededSSE, + bool IsNamedArg, bool IsRegCall) const { + + Ty = useFirstFieldIfTransparentUnion(Ty); + + X86_64TargetInfo::Class Lo, Hi; + classify(Ty, 0, Lo, Hi, IsNamedArg, IsRegCall); + + // Check some invariants + assert((Hi != Memory || Lo == Memory) && "Invalid memory classification."); + assert((Hi != SSEUp || Lo == SSE) && "Invalid SSEUp classification."); + + NeededInt = 0; + NeededSSE = 0; + const Type *ResType = nullptr; + + switch (Lo) { + case NoClass: + if (Hi == NoClass) + return ArgInfo::getIgnore(); + // If the low part is just padding, it takes no register, leave ResType + // null. + assert((Hi == SSE || Hi == Integer || Hi == X87UP) && + "Unknown missing lo part"); + break; + + // AMD64-ABI 3.2.3p3: Rule 1. If the class is MEMORY, pass the argument + // on the stack. + case Memory: + // AMD64-ABI 3.2.3p3: Rule 5. If the class is X87, X87UP or + // COMPLEX_X87, it is passed in memory. + case X87: + case Complex_X87: + if (getRecordArgABI(Ty) == RAA_Indirect) + ++NeededInt; + return getIndirectResult(Ty, FreeIntRegs); + + case SSEUp: + case X87UP: + llvm_unreachable("Invalid classification for lo word."); + + // AMD64-ABI 3.2.3p3: Rule 2. If the class is INTEGER, the next + // available register of the sequence %rdi, %rsi, %rdx, %rcx, %r8 + // and %r9 is used. + case Integer: + ++NeededInt; + + // Pick an 8-byte type based on the preferred type. + ResType = getIntegerTypeAtOffset(Ty, 0, Ty, 0); + + // If we have a sign or zero extended integer, make sure to return Extend + // so that the parameter gets the right LLVM IR attributes. + if (Hi == NoClass && ResType->isInteger()) { + if (Ty->isInteger() && isPromotableInteger(cast<IntegerType>(Ty))) + return ArgInfo::getExtend(Ty); + } + + if (ResType->isInteger() && ResType->getSizeInBits() == 128) { + assert(Hi == Integer); + ++NeededInt; + return ArgInfo::getDirect(ResType); + } + break; + + // AMD64-ABI 3.2.3p3: Rule 3. If the class is SSE, the next + // available SSE register is used, the registers are taken in the + // order from %xmm0 to %xmm7. + case SSE: + ResType = getSSETypeAtOffset(Ty, 0, Ty, 0); + ++NeededSSE; + break; + } + + const Type *HighPart = nullptr; + switch (Hi) { + // Memory was handled previously, Complex_X87 and X87 should + // never occur as hi classes, and X87UP must be preceded by X87, + // which is passed in memory. + case Memory: + case X87: + case Complex_X87: + llvm_unreachable("Invalid classification for hi word."); + + case NoClass: + break; + + case Integer: + ++NeededInt; + // Pick an 8-byte type based on the preferred type. + HighPart = getIntegerTypeAtOffset(Ty, 8, Ty, 8); + + if (Lo == NoClass) // Pass HighPart at offset 8 in memory. + return ArgInfo::getDirect(HighPart, 8); + break; + + // X87UP generally doesn't occur here (long double is passed in + // memory), except in situations involving unions. + case X87UP: + case SSE: + ++NeededSSE; + HighPart = getSSETypeAtOffset(Ty, 8, Ty, 8); + + if (Lo == NoClass) // Pass HighPart at offset 8 in memory. + return ArgInfo::getDirect(HighPart, 8); + break; + + // AMD64-ABI 3.2.3p3: Rule 4. If the class is SSEUP, the + // eightbyte is passed in the upper half of the last used SSE + // register. This only happens when 128-bit vectors are passed. + case SSEUp: + assert(Lo == SSE && "Unexpected SSEUp classification"); + ResType = getByteVectorType(Ty); + break; + } + + // If a high part was specified, merge it together with the low part. It is + // known to pass in the high eightbyte of the result. We do this by forming a + // first class struct aggregate with the high and low part: {low, high} + if (HighPart) + ResType = createPairType(ResType, HighPart); + + return ArgInfo::getDirect(ResType); +} + +ArgInfo X86_64TargetInfo::classifyReturnType(const Type *RetTy) const { + // AMD64-ABI 3.2.3p4: Rule 1. Classify the return type with the + // classification algorithm. + + X86_64TargetInfo::Class Lo, Hi; + classify(RetTy, 0, Lo, Hi, /*isNamedArg*/ true); + + // Check some invariants + assert((Hi != Memory || Lo == Memory) && "Invalid memory classification."); + assert((Hi != SSEUp || Lo == SSE) && "Invalid SSEUp classification."); + + const Type *ResType = nullptr; + switch (Lo) { + case NoClass: + if (Hi == NoClass) + return ArgInfo::getIgnore(); + // If the low part is just padding, it takes no register, leave ResType + // null. + assert((Hi == SSE || Hi == Integer || Hi == X87UP) && + "Unknown missing lo part"); + break; + case SSEUp: + case X87UP: + llvm_unreachable("Invalid classification for lo word."); + + // AMD64-ABI 3.2.3p4: Rule 2. Types of class memory are returned via + // hidden argument. + case Memory: + return getIndirectReturnResult(RetTy); + + // AMD64-ABI 3.2.3p4: Rule 3. If the class is INTEGER, the next + // available register of the sequence %rax, %rdx is used. + case Integer: + ResType = getIntegerTypeAtOffset(RetTy, 0, RetTy, 0); + // If we have a sign or zero extended integer, make sure to return Extend + // so that the parameter gets the right LLVM IR attributes. + if (Hi == NoClass && ResType->isInteger()) { + if (const IntegerType *IntTy = dyn_cast<IntegerType>(RetTy)) { + if (isPromotableInteger(IntTy)) + return ArgInfo::getExtend(RetTy); + } + } + if (ResType->isInteger() && ResType->getSizeInBits() == 128) { + assert(Hi == Integer); + return ArgInfo::getDirect(ResType); + } + break; + + // AMD64-ABI 3.2.3p4: Rule 4. If the class is SSE, the next + // available SSE register of the sequence %xmm0, %xmm1 is used. + case SSE: + ResType = getSSETypeAtOffset(RetTy, 0, RetTy, 0); + break; + + // AMD64-ABI 3.2.3p4: Rule 6. If the class is X87, the value is + // returned on the X87 stack in %st0 as 80-bit x87 number. + case X87: + ResType = TB.getFloatType(APFloat::x87DoubleExtended(), Align(16)); + break; + + // AMD64-ABI 3.2.3p4: Rule 8. If the class is COMPLEX_X87, the real + // part of the value is returned in %st0 and the imaginary part in + // %st1. + case Complex_X87: + assert(Hi == Complex_X87 && "Unexpected ComplexX87 classification."); + { + const Type *X87Type = + TB.getFloatType(APFloat::x87DoubleExtended(), Align(16)); + FieldInfo Fields[] = {FieldInfo(X87Type, 0), FieldInfo(X87Type, 128)}; + ResType = TB.getRecordType(Fields, TypeSize::getFixed(256), Align(16)); + } + break; + } + + const Type *HighPart = nullptr; + switch (Hi) { + // Memory was handled previously and X87 should + // never occur as a hi class. + case Memory: + case X87: + llvm_unreachable("Invalid classification for hi word."); + + case Complex_X87: + case NoClass: + break; + + case Integer: + HighPart = getIntegerTypeAtOffset(RetTy, 8, RetTy, 8); + if (Lo == NoClass) + return ArgInfo::getDirect(HighPart, 8); + break; + + case SSE: + HighPart = getSSETypeAtOffset(RetTy, 8, RetTy, 8); + if (Lo == NoClass) + return ArgInfo::getDirect(HighPart, 8); + break; + + // AMD64-ABI 3.2.3p4: Rule 5. If the class is SSEUP, the eightbyte + // is passed in the next available eightbyte chunk if the last used + // vector register. + // + // SSEUP should always be preceded by SSE, just widen. + case SSEUp: + assert(Lo == SSE && "Unexpected SSEUp classification."); + ResType = getByteVectorType(RetTy); + break; + + // AMD64-ABI 3.2.3p4: Rule 7. If the class is X87UP, the value is + // returned together with the previous X87 value in %st0. + case X87UP: + // If X87Up is preceded by X87, we don't need to do + // anything. However, in some cases with unions it may not be + // preceded by X87. In such situations we follow gcc and pass the + // extra bits in an SSE reg. + if (Lo != X87) { + HighPart = getSSETypeAtOffset(RetTy, 8, RetTy, 8); + if (Lo == NoClass) // Return HighPart at offset 8 in memory. + return ArgInfo::getDirect(HighPart, 8); + } + break; + } + + // If a high part was specified, merge it together with the low part. It is + // known to pass in the high eightbyte of the result. We do this by forming a + // first class struct aggregate with the high and low part: {low, high} + if (HighPart) + ResType = createPairType(ResType, HighPart); + + return ArgInfo::getDirect(ResType); +} + +/// Given a high and low type that can ideally +/// be used as elements of a two register pair to pass or return, return a +/// first class aggregate to represent them. For example, if the low part of +/// a by-value argument should be passed as i32* and the high part as float, +/// return {i32*, float}. +const Type *X86_64TargetInfo::createPairType(const Type *Lo, + const Type *Hi) const { + // In order to correctly satisfy the ABI, we need to the high part to start + // at offset 8. If the high and low parts we inferred are both 4-byte types + // (e.g. i32 and i32) then the resultant struct type ({i32,i32}) won't have + // the second element at offset 8. Check for this: + unsigned LoSize = (unsigned)Lo->getTypeAllocSize(); + llvm::Align HiAlign = Hi->getAlignment(); + unsigned HiStart = alignTo(LoSize, HiAlign); + + assert(HiStart != 0 && HiStart <= 8 && "Invalid x86-64 argument pair!"); + + // To handle this, we have to increase the size of the low part so that the + // second element will start at an 8 byte offset. We can't increase the size + // of the second element because it might make us access off the end of the + // struct. + const Type *AdjustedLo = Lo; + if (HiStart != 8) { + // There are usually two sorts of types the ABI generation code can produce + // for the low part of a pair that aren't 8 bytes in size: half, float or + // i8/i16/i32. This can also include pointers when they are 32-bit (X32 and + // NaCl). + // Promote these to a larger type. + if (Lo->isFloat()) { + const FloatType *FT = cast<FloatType>(Lo); + if (FT->getSemantics() == &APFloat::IEEEhalf() || + FT->getSemantics() == &APFloat::IEEEsingle() || + FT->getSemantics() == &APFloat::BFloat()) + AdjustedLo = TB.getFloatType(APFloat::IEEEdouble(), Align(8)); + } + // Promote integers and pointers to i64 + else if (Lo->isInteger() || Lo->isPointer()) + AdjustedLo = TB.getIntegerType(64, Align(8), /*Signed=*/false); + else + assert((Lo->isInteger() || Lo->isPointer()) && + "Invalid/unknown low type in pair"); + unsigned AdjustedLoSize = AdjustedLo->getSizeInBits().getFixedValue() / 8; + HiStart = alignTo(AdjustedLoSize, HiAlign); + } + + // Create the pair struct + FieldInfo Fields[] = {FieldInfo(AdjustedLo, 0), FieldInfo(Hi, HiStart * 8)}; + + // Verify the high part is at offset 8 + assert((8 * 8) == Fields[1].OffsetInBits && + "High part must be at offset 8 bytes"); + + return TB.getRecordType(Fields, TypeSize::getFixed(128), Align(8), + StructPacking::Default); +} + +static bool bitsContainNoUserData(const Type *Ty, unsigned StartBit, + unsigned EndBit) { + // If range is completely beyond type size, it's definitely padding + unsigned TySize = Ty->getSizeInBits().getFixedValue(); + if (TySize <= StartBit) + return true; + + // Handle arrays - check each element + if (const ArrayType *AT = dyn_cast<ArrayType>(Ty)) { + const Type *EltTy = AT->getElementType(); + unsigned EltSize = EltTy->getSizeInBits().getFixedValue(); + + for (unsigned I = 0; I < AT->getNumElements(); ++I) { + unsigned EltOffset = I * EltSize; + if (EltOffset >= EndBit) + break; + + unsigned EltStart = (EltOffset < StartBit) ? StartBit - EltOffset : 0; + if (!bitsContainNoUserData(EltTy, EltStart, EndBit - EltOffset)) + return false; + } + return true; + } + + // Handle structs - check all fields and base classes + if (const RecordType *RT = dyn_cast<RecordType>(Ty)) { + if (RT->isUnion()) { + for (const auto &Field : RT->getFields()) { + if (Field.IsUnnamedBitfield) + continue; + + unsigned FieldStart = + (Field.OffsetInBits < StartBit) ? StartBit - Field.OffsetInBits : 0; + unsigned FieldEnd = + FieldStart + Field.FieldType->getSizeInBits().getFixedValue(); + + // Check if field overlaps with the queried range + if (FieldStart < EndBit && FieldEnd > StartBit) { + // There's an overlap, so there is user data + unsigned RelativeStart = + (StartBit > FieldStart) ? StartBit - FieldStart : 0; + unsigned RelativeEnd = + (EndBit < FieldEnd) + ? EndBit - FieldStart + : Field.FieldType->getSizeInBits().getFixedValue(); + + if (!bitsContainNoUserData(Field.FieldType, RelativeStart, + RelativeEnd)) { + return false; + } + } + } + return true; + } + // Check base classes first (for C++ records) + if (RT->isCXXRecord()) { + for (unsigned I = 0; I < RT->getNumBaseClasses(); ++I) { + const FieldInfo &Base = RT->getBaseClasses()[I]; + if (Base.OffsetInBits >= EndBit) + continue; + + unsigned BaseStart = + (Base.OffsetInBits < StartBit) ? StartBit - Base.OffsetInBits : 0; + if (!bitsContainNoUserData(Base.FieldType, BaseStart, + EndBit - Base.OffsetInBits)) + return false; + } + } + + for (unsigned I = 0; I < RT->getNumFields(); ++I) { + const FieldInfo &Field = RT->getFields()[I]; + if (Field.OffsetInBits >= EndBit) + break; + + unsigned FieldStart = + (Field.OffsetInBits < StartBit) ? StartBit - Field.OffsetInBits : 0; + if (!bitsContainNoUserData(Field.FieldType, FieldStart, + EndBit - Field.OffsetInBits)) + return false; + } + return true; + } + + // For unions, vectors, and primitives - assume all bits are user data + return false; +} + +const Type *X86_64TargetInfo::getIntegerTypeAtOffset(const Type *ABIType, + unsigned ABIOffset, + const Type *SourceTy, + unsigned SourceOffset, + bool InMemory) const { + + const Type *WorkingType = ABIType; + if (InMemory && ABIType->isInteger()) { + const auto *IT = cast<IntegerType>(ABIType); + unsigned OriginalBitWidth = IT->getSizeInBits().getFixedValue(); + + unsigned WidenedBitWidth = OriginalBitWidth; + if (OriginalBitWidth <= 8) { + WidenedBitWidth = 8; + } else { + WidenedBitWidth = llvm::bit_ceil(OriginalBitWidth); + } + + if (WidenedBitWidth != OriginalBitWidth) { + WorkingType = TB.getIntegerType(WidenedBitWidth, ABIType->getAlignment(), + IT->isSigned()); + } + } + // If we're dealing with an un-offset ABI type, then it means that we're + // returning an 8-byte unit starting with it. See if we can safely use it. + if (ABIOffset == 0) { + // Pointers and int64's always fill the 8-byte unit. + if ((WorkingType->isPointer() && Has64BitPointers) || + (WorkingType->isInteger() && + cast<IntegerType>(WorkingType)->getSizeInBits() == 64)) + return ABIType; + + // If we have a 1/2/4-byte integer, we can use it only if the rest of the + // goodness in the source type is just tail padding. This is allowed to + // kick in for struct {double,int} on the int, but not on + // struct{double,int,int} because we wouldn't return the second int. We + // have to do this analysis on the source type because we can't depend on + // unions being lowered a specific way etc. + if ((WorkingType->isInteger() && + (cast<IntegerType>(WorkingType)->getSizeInBits() == 1 || + cast<IntegerType>(WorkingType)->getSizeInBits() == 8 || + cast<IntegerType>(WorkingType)->getSizeInBits() == 16 || + cast<IntegerType>(WorkingType)->getSizeInBits() == 32)) || + (WorkingType->isPointer() && !Has64BitPointers)) { + + unsigned BitWidth = WorkingType->isPointer() + ? 32 + : cast<IntegerType>(WorkingType)->getSizeInBits(); + + if (bitsContainNoUserData(SourceTy, SourceOffset * 8 + BitWidth, + SourceOffset * 8 + 64)) + return WorkingType; + } + } + + if (const auto *RTy = dyn_cast<RecordType>(ABIType)) { + if (RTy->isUnion()) { + const Type *ReducedType = reduceUnionForX8664(RTy, TB); + if (ReducedType) + return getIntegerTypeAtOffset(ReducedType, ABIOffset, SourceTy, + SourceOffset, true); + } + if (const FieldInfo *Element = + RTy->getElementContainingOffset(ABIOffset * 8)) { + + unsigned ElementOffsetBytes = Element->OffsetInBits / 8; + return getIntegerTypeAtOffset(Element->FieldType, + ABIOffset - ElementOffsetBytes, SourceTy, + SourceOffset, true); + } + } + + if (const auto *ATy = dyn_cast<ArrayType>(ABIType)) { + const Type *EltTy = ATy->getElementType(); + unsigned EltSize = EltTy->getSizeInBits() / 8; + if (EltSize > 0) { + unsigned EltOffset = (ABIOffset / EltSize) * EltSize; + return getIntegerTypeAtOffset(EltTy, ABIOffset - EltOffset, SourceTy, + SourceOffset, true); + } + } + + // If we have a 128-bit integer, we can pass it safely using an i128 + // so we return that + if (ABIType->isInteger() && ABIType->getSizeInBits() == 128) { + assert(ABIOffset == 0); + return ABIType; + } + + unsigned TySizeInBytes = + llvm::divideCeil(SourceTy->getSizeInBits().getFixedValue(), 8); + if (auto *IT = dyn_cast<IntegerType>(SourceTy)) { + if (IT->isBitInt()) + TySizeInBytes = + alignTo(SourceTy->getSizeInBits().getFixedValue(), 64) / 8; + } + assert(TySizeInBytes != SourceOffset && "Empty field?"); + unsigned AvailableSize = TySizeInBytes - SourceOffset; + return TB.getIntegerType(std::min(AvailableSize, 8U) * 8, Align(1), false); +} +/// Returns the floating point type at the specified offset within a type, or +/// nullptr if no floating point type is found at that offset. +const Type *X86_64TargetInfo::getFPTypeAtOffset(const Type *Ty, + unsigned Offset) const { + // Check for direct match at offset 0 + if (Offset == 0 && Ty->isFloat()) + return Ty; + + if (const ComplexType *CT = dyn_cast<ComplexType>(Ty)) { + const Type *ElementType = CT->getElementType(); + unsigned ElementSize = ElementType->getSizeInBits().getFixedValue() / 8; + + if (Offset == 0 || Offset == ElementSize) + return ElementType; + return nullptr; + } + + // Handle struct types by checking each field + if (const RecordType *RT = dyn_cast<RecordType>(Ty)) { + if (const FieldInfo *Element = RT->getElementContainingOffset(Offset * 8)) { + unsigned ElementOffsetBytes = Element->OffsetInBits / 8; + return getFPTypeAtOffset(Element->FieldType, Offset - ElementOffsetBytes); + } + } + + // Handle array types + if (const ArrayType *AT = dyn_cast<ArrayType>(Ty)) { + const Type *EltTy = AT->getElementType(); + unsigned EltSize = EltTy->getSizeInBits() / 8; + unsigned EltIndex = Offset / EltSize; + + return getFPTypeAtOffset(EltTy, Offset - (EltIndex * EltSize)); + } + + // No floating point type found at this offset + return nullptr; +} + +/// Helper to check if a floating point type matches specific semantics +static bool isFloatTypeWithSemantics(const Type *Ty, + const fltSemantics &Semantics) { + if (!Ty->isFloat()) + return false; + const FloatType *FT = cast<FloatType>(Ty); + return FT->getSemantics() == &Semantics; +} + +/// GetSSETypeAtOffset - Return a type that will be passed by the backend in the +/// low 8 bytes of an XMM register, corresponding to the SSE class. +const Type *X86_64TargetInfo::getSSETypeAtOffset(const Type *ABIType, + unsigned ABIOffset, + const Type *SourceTy, + unsigned SourceOffset) const { + + if (const auto *RTy = dyn_cast<RecordType>(ABIType)) { + if (RTy->isUnion()) { + const Type *ReducedType = reduceUnionForX8664(RTy, TB); + if (ReducedType) { + return getSSETypeAtOffset(ReducedType, ABIOffset, SourceTy, + SourceOffset); + } + } + } + + auto Is16bitFpTy = [](const Type *T) { + return isFloatTypeWithSemantics(T, APFloat::IEEEhalf()) || + isFloatTypeWithSemantics(T, APFloat::BFloat()); + }; + + // Get the floating point type at the requested offset + const Type *T0 = getFPTypeAtOffset(ABIType, ABIOffset); + if (!T0 || isFloatTypeWithSemantics(T0, APFloat::IEEEdouble())) + return TB.getFloatType(APFloat::IEEEdouble(), Align(8)); + + // Calculate remaining source size in bytes + unsigned SourceSize = + (SourceTy->getSizeInBits().getFixedValue() / 8) - SourceOffset; + + // Try to get adjacent FP type + const Type *T1 = nullptr; + unsigned T0Size = + alignTo(T0->getSizeInBits().getFixedValue(), T0->getAlignment().value()) / + 8; + if (SourceSize > T0Size) + T1 = getFPTypeAtOffset(ABIType, ABIOffset + T0Size); + + if (T1 == nullptr) { + if (Is16bitFpTy(T0) && SourceSize > 4) + T1 = getFPTypeAtOffset(ABIType, ABIOffset + 4); + + if (T1 == nullptr) + return T0; + } + // Handle vector cases + if (isFloatTypeWithSemantics(T0, APFloat::IEEEsingle()) && + isFloatTypeWithSemantics(T1, APFloat::IEEEsingle())) + return TB.getVectorType(T0, ElementCount::getFixed(2), Align(8)); + + if (Is16bitFpTy(T0) && Is16bitFpTy(T1)) { + const Type *T2 = nullptr; + if (SourceSize > 4) + T2 = getFPTypeAtOffset(ABIType, ABIOffset + 4); + if (!T2) + return TB.getVectorType(T0, ElementCount::getFixed(2), Align(8)); + return TB.getVectorType(T0, ElementCount::getFixed(4), Align(8)); + } + + // Mixed half-float cases + if (Is16bitFpTy(T0) || Is16bitFpTy(T1)) + return TB.getVectorType(TB.getFloatType(APFloat::IEEEhalf(), Align(2)), + ElementCount::getFixed(4), Align(8)); + + // Default to double + return TB.getFloatType(APFloat::IEEEdouble(), Align(8)); +} + +/// The ABI specifies that a value should be passed in a full vector XMM/YMM +/// register. Pick an LLVM IR type that will be passed as a vector register. +const Type *X86_64TargetInfo::getByteVectorType(const Type *Ty) const { + // Wrapper structs/arrays that only contain vectors are passed just like + // vectors; strip them off if present. + if (const Type *InnerTy = isSingleElementStruct(Ty)) + Ty = InnerTy; + + // Handle vector types + if (const VectorType *VT = dyn_cast<VectorType>(Ty)) { + // Don't pass vXi128 vectors in their native type, the backend can't + // legalize them. + if (getABICompatInfo().PassInt128VectorsInMem && + VT->getElementType()->isInteger() && + cast<IntegerType>(VT->getElementType())->getSizeInBits() == 128) { + unsigned Size = VT->getSizeInBits().getFixedValue(); + return TB.getVectorType(TB.getIntegerType(64, Align(8), /*Signed=*/false), + ElementCount::getFixed(Size / 64), + Align(Size / 8)); + } + return VT; + } + + // Handle fp128 + if (isFloatTypeWithSemantics(Ty, APFloat::IEEEquad())) + return Ty; + + // We couldn't find the preferred IR vector type for 'Ty'. + unsigned Size = Ty->getSizeInBits().getFixedValue(); + assert((Size == 128 || Size == 256 || Size == 512) && "Invalid vector size"); + + return TB.getVectorType(TB.getFloatType(APFloat::IEEEdouble(), Align(8)), + ElementCount::getFixed(Size / 64), Align(Size / 8)); +} + +// Returns the single element if this is a single-element struct wrapper +const Type *X86_64TargetInfo::isSingleElementStruct(const Type *Ty) const { + const auto *RT = dyn_cast<RecordType>(Ty); + if (!RT) + return nullptr; + + if (RT->hasFlexibleArrayMember()) + return nullptr; + + const Type *Found = nullptr; + + for (const auto &Base : RT->getBaseClasses()) { + const Type *BaseTy = Base.FieldType; + auto *BaseRT = dyn_cast<RecordType>(BaseTy); + + if (!BaseRT || BaseRT->isEmpty()) + continue; + + const Type *Elem = isSingleElementStruct(BaseTy); + if (!Elem || Found) + return nullptr; + Found = Elem; + } + + for (const auto &FI : RT->getFields()) { + if (FI.isEmpty()) + continue; + + const Type *FTy = FI.FieldType; + + while (auto *AT = dyn_cast<ArrayType>(FTy)) { + if (AT->getNumElements() != 1) + break; + FTy = AT->getElementType(); + } + + const Type *Elem; + if (auto *InnerRT = dyn_cast<RecordType>(FTy)) + Elem = isSingleElementStruct(InnerRT); + else + Elem = FTy; + if (!Elem || Found) + return nullptr; + Found = Elem; + } + + if (!Found) + return nullptr; + if (Found->getSizeInBits() != Ty->getSizeInBits()) + return nullptr; + + return Found; +} + +bool X86_64TargetInfo::isIllegalVectorType(const Type *Ty) const { + if (const auto *VecTy = dyn_cast<VectorType>(Ty)) { + uint64_t Size = VecTy->getSizeInBits().getFixedValue(); + unsigned LargestVector = getNativeVectorSizeForAVXABI(AVXLevel); + + // Vectors <= 64 bits or > largest supported vector size are illegal + if (Size <= 64 || Size > LargestVector) + return true; + + // Check for 128-bit integer element vectors that should be passed in memory + const Type *EltTy = VecTy->getElementType(); + if (getABICompatInfo().PassInt128VectorsInMem && EltTy->isInteger()) { + const auto *IntTy = cast<IntegerType>(EltTy); + if (IntTy->getSizeInBits().getFixedValue() == 128) + return true; + } + } + return false; +} + +ArgInfo X86_64TargetInfo::getIndirectResult(const Type *Ty, + unsigned FreeIntRegs) const { + // If this is a scalar LLVM value then assume LLVM will pass it in the right + // place naturally. + // + // This assumption is optimistic, as there could be free registers available + // when we need to pass this argument in memory, and LLVM could try to pass + // the argument in the free register. This does not seem to happen currently, + // but this code would be much safer if we could mark the argument with + // 'onstack'. See PR12193. + if (!isAggregateTypeForABI(Ty) && !isIllegalVectorType(Ty) && + !(Ty->isInteger() && cast<IntegerType>(Ty)->isBitInt())) { + return (Ty->isInteger() && isPromotableInteger(cast<IntegerType>(Ty)) + ? ArgInfo::getExtend(Ty) + : ArgInfo::getDirect()); + } + + // Check if this is a record type that needs special handling + if (auto RecordRAA = getRecordArgABI(Ty)) + return getNaturalAlignIndirect(Ty, RecordRAA == + RecordArgABI::RAA_DirectInMemory); + + // Compute the byval alignment. We specify the alignment of the byval in all + // cases so that the mid-level optimizer knows the alignment of the byval. + uint64_t AlignVal = std::max<uint64_t>(Ty->getAlignment().value(), 8u); + + // Attempt to avoid passing indirect results using byval when possible. This + // is important for good codegen. + // + // We do this by coercing the value into a scalar type which the backend can + // handle naturally (i.e., without using byval). + // + // For simplicity, we currently only do this when we have exhausted all of the + // free integer registers. Doing this when there are free integer registers + // would require more care, as we would have to ensure that the coerced value + // did not claim the unused register. That would require either reording the + // arguments to the function (so that any subsequent inreg values came first), + // or only doing this optimization when there were no following arguments that + // might be inreg. + // + // We currently expect it to be rare (particularly in well written code) for + // arguments to be passed on the stack when there are still free integer + // registers available (this would typically imply large structs being passed + // by value), so this seems like a fair tradeoff for now. + // + // We can revisit this if the backend grows support for 'onstack' parameter + // attributes. See PR12193. + if (FreeIntRegs == 0) { + uint64_t Size = Ty->getSizeInBits().getFixedValue(); + + // If this type fits in an eightbyte, coerce it into the matching integral + // type, which will end up on the stack (with alignment 8). + if (AlignVal == 8 && Size <= 64) { + const Type *IntTy = + TB.getIntegerType(Size, llvm::Align(8), /*Signed=*/false); + return ArgInfo::getDirect(IntTy); + } + } + + return ArgInfo::getIndirect(llvm::Align(AlignVal), /*ByVal=*/true); +} + +ArgInfo X86_64TargetInfo::getIndirectReturnResult(const Type *Ty) const { + // If this is a scalar value, handle it specially. + if (!isAggregateTypeForABI(Ty)) { + // Bit-precise integers are returned indirectly regardless of size. + if (const auto *IntTy = dyn_cast<IntegerType>(Ty)) { + if (IntTy->isBitInt()) + return getNaturalAlignIndirect(IntTy, /*ByVal=*/false); + if (isPromotableInteger(IntTy)) + return ArgInfo::getExtend(Ty); + } + return ArgInfo::getDirect(); + } + + return getNaturalAlignIndirect(Ty, /*ByVal=*/false); +} + +static bool classifyCXXReturnType(FunctionInfo &FI) { + const abi::Type *Ty = FI.getReturnType(); + + if (const auto *RT = llvm::dyn_cast<abi::RecordType>(Ty)) { + if (!RT->canPassInRegisters()) { + FI.getReturnInfo() = + ArgInfo::getIndirect(RT->getAlignment(), /*ByVal=*/false); + return true; + } + } + + return false; +} + +void X86_64TargetInfo::computeInfo(FunctionInfo &FI) const { + CallingConv::ID CallingConv = FI.getCallingConvention(); + + if (CallingConv == CallingConv::Win64 || + CallingConv == CallingConv::X86_RegCall) + return; + + bool IsRegCall = false; + + unsigned FreeIntRegs = 6; + unsigned FreeSSERegs = 8; + unsigned NeededInt = 0, NeededSSE = 0; + + if (!classifyCXXReturnType(FI)) { + const Type *RetTy = FI.getReturnType(); + FI.getReturnInfo() = classifyReturnType(RetTy); + } + + if (FI.getReturnInfo().isIndirect()) + --FreeIntRegs; + + unsigned NumRequiredArgs = FI.getNumRequiredArgs(); + + unsigned ArgNo = 0; + for (auto IT = FI.arg_begin(), IE = FI.arg_end(); IT != IE; ++IT, ++ArgNo) { + bool IsNamedArg = ArgNo < NumRequiredArgs; + const Type *ArgTy = IT->ABIType; + NeededInt = 0; + NeededSSE = 0; + + ArgInfo AI = classifyArgumentType(ArgTy, FreeIntRegs, NeededInt, NeededSSE, + IsNamedArg, IsRegCall); + + // AMD64-ABI 3.2.3p3: If there are no registers available for any + // eightbyte of an argument, the whole argument is passed on the + // stack. If registers have already been assigned for some + // eightbytes of such an argument, the assignments get reverted. + if (FreeIntRegs >= NeededInt && FreeSSERegs >= NeededSSE) { + FreeIntRegs -= NeededInt; + FreeSSERegs -= NeededSSE; + IT->Info = AI; + } else { + // Not enough registers, pass on stack + IT->Info = getIndirectResult(ArgTy, FreeIntRegs); + } + } +} + +std::unique_ptr<TargetInfo> createX8664TargetInfo(TypeBuilder &TB, + const Triple &TargetTriple, + X86AVXABILevel AVXLevel, + bool Has64BitPointers, + const ABICompatInfo &Compat) { + return std::make_unique<X86_64TargetInfo>(TB, TargetTriple, AVXLevel, + Has64BitPointers, Compat); +} + +} // namespace abi +} // namespace llvm diff --git a/llvm/lib/ABI/Types.cpp b/llvm/lib/ABI/Types.cpp index 0a6080de3923b..6f2eb7225364a 100644 --- a/llvm/lib/ABI/Types.cpp +++ b/llvm/lib/ABI/Types.cpp @@ -7,6 +7,8 @@ //===----------------------------------------------------------------------===// #include "llvm/ABI/Types.h" +#include "llvm/ADT/STLExtras.h" +#include "llvm/ADT/SmallVector.h" #include "llvm/Support/Casting.h" using namespace llvm; @@ -30,6 +32,52 @@ bool RecordType::isEmpty() const { return true; } +const FieldInfo * +RecordType::getElementContainingOffset(unsigned OffsetInBits) const { + SmallVector<std::pair<unsigned, const FieldInfo *>, 16> AllElements; + + for (const FieldInfo &Base : getBaseClasses()) { + const auto *BaseRT = dyn_cast<RecordType>(Base.FieldType); + if (!BaseRT || !BaseRT->isEmpty()) + AllElements.emplace_back(Base.OffsetInBits, &Base); + } + + for (const FieldInfo &VBase : getVirtualBaseClasses()) { + const auto *VBaseRT = dyn_cast<RecordType>(VBase.FieldType); + if (!VBaseRT || !VBaseRT->isEmpty()) + AllElements.emplace_back(VBase.OffsetInBits, &VBase); + } + + for (const FieldInfo &Field : getFields()) { + if (Field.IsUnnamedBitfield) + continue; + AllElements.emplace_back(Field.OffsetInBits, &Field); + } + + llvm::stable_sort(AllElements, [](const auto &A, const auto &B) { + return A.first < B.first; + }); + + auto *It = llvm::upper_bound(AllElements, OffsetInBits, + [](unsigned Offset, const auto &Element) { + return Offset < Element.first; + }); + + if (It == AllElements.begin()) + return nullptr; + + --It; + + const FieldInfo *Candidate = It->second; + unsigned ElementStart = It->first; + unsigned ElementSize = Candidate->FieldType->getSizeInBits().getFixedValue(); + + if (OffsetInBits >= ElementStart && OffsetInBits < ElementStart + ElementSize) + return Candidate; + + return nullptr; +} + bool FieldInfo::isEmpty() const { if (IsUnnamedBitfield) return true; >From 47c8e85f3b974975c18d6db61f0a47a11f9d3b49 Mon Sep 17 00:00:00 2001 From: Narayan Sreekumar <[email protected]> Date: Tue, 19 May 2026 00:20:24 +0530 Subject: [PATCH 2/2] refactor: fix failing tests --- clang/lib/CodeGen/CGCall.cpp | 2 +- clang/lib/CodeGen/CodeGenModule.cpp | 31 ++++- clang/lib/CodeGen/CodeGenModule.h | 5 +- llvm/include/llvm/ABI/TargetInfo.h | 10 +- llvm/lib/ABI/IRTypeMapper.cpp | 16 ++- llvm/lib/ABI/Targets/X86.cpp | 195 ++++++++++++++-------------- llvm/lib/ABI/Types.cpp | 40 ++---- 7 files changed, 154 insertions(+), 145 deletions(-) diff --git a/clang/lib/CodeGen/CGCall.cpp b/clang/lib/CodeGen/CGCall.cpp index a2b9c945788ee..26088197365da 100644 --- a/clang/lib/CodeGen/CGCall.cpp +++ b/clang/lib/CodeGen/CGCall.cpp @@ -942,7 +942,7 @@ const CGFunctionInfo &CodeGenTypes::arrangeLLVMFunctionInfo( computeSPIRKernelABIInfo(CGM, *FI); } else if (info.getCC() == CC_Swift || info.getCC() == CC_SwiftAsync) { swiftcall::computeABIInfo(CGM, *FI); - } else if (CGM.shouldUseLLVMABILowering()) { + } else if (CGM.shouldUseLLVMABILowering(CC)) { CGM.computeABIInfoUsingLib(*FI); } else { CGM.getABIInfo().computeInfo(*FI); diff --git a/clang/lib/CodeGen/CodeGenModule.cpp b/clang/lib/CodeGen/CodeGenModule.cpp index 685d496139acc..84c4fb6bd3982 100644 --- a/clang/lib/CodeGen/CodeGenModule.cpp +++ b/clang/lib/CodeGen/CodeGenModule.cpp @@ -339,13 +339,29 @@ const TargetCodeGenInfo &CodeGenModule::getTargetCodeGenInfo() { return *TheTargetCodeGenInfo; } -bool CodeGenModule::shouldUseLLVMABILowering() const { +bool CodeGenModule::shouldUseLLVMABILowering(unsigned CallingConv) const { if (!CodeGenOpts.ExperimentalABILowering) return false; - // Only opt in for targets that have an LLVMABI implementation; others - // continue through the legacy ABIInfo path. + const llvm::Triple &T = getTriple(); - return T.isBPF() || (T.getArch() == llvm::Triple::x86_64 && T.isOSLinux()); + if (T.isBPF()) + return true; + + if (T.getArch() == llvm::Triple::x86_64 && !T.isOSWindows() && + !T.isOSDarwin() && !T.isOSCygMing()) { + switch (CallingConv) { + case llvm::CallingConv::Win64: + case llvm::CallingConv::X86_RegCall: + case llvm::CallingConv::X86_FastCall: + case llvm::CallingConv::X86_VectorCall: + case llvm::CallingConv::X86_StdCall: + case llvm::CallingConv::X86_ThisCall: + return false; + default: + return true; + } + } + return false; } const llvm::abi::TargetInfo & @@ -374,15 +390,16 @@ CodeGenModule::getLLVMABITargetInfo(llvm::abi::TypeBuilder &TB) { CompatInfo.HonorsRevision98 = !T.isOSDarwin(); CompatInfo.PassInt128VectorsInMem = Compat > LangOptions::ClangABI::Ver9 && (T.isOSLinux() || T.isOSNetBSD()); + // Clang <= 20.0 did not do this, and PlayStation does not do this. CompatInfo.ReturnCXXRecordGreaterThan128InMem = - Compat > LangOptions::ClangABI::Ver20; + Compat > LangOptions::ClangABI::Ver20 && !T.isPS(); CompatInfo.Clang11Compat = Compat <= LangOptions::ClangABI::Ver11 || T.isPS(); bool Has64BitPointers = getTarget().getPointerWidth(LangAS::Default) == 64; - TheLLVMABITargetInfo = llvm::abi::createX8664TargetInfo( - TB, T, AVXLevel, Has64BitPointers, CompatInfo); + TheLLVMABITargetInfo = llvm::abi::createX86_64TargetInfo( + TB, AVXLevel, Has64BitPointers, CompatInfo); return *TheLLVMABITargetInfo; } diff --git a/clang/lib/CodeGen/CodeGenModule.h b/clang/lib/CodeGen/CodeGenModule.h index b7dab4ef583ff..1148fcd752abb 100644 --- a/clang/lib/CodeGen/CodeGenModule.h +++ b/clang/lib/CodeGen/CodeGenModule.h @@ -906,8 +906,9 @@ class CodeGenModule : public CodeGenTypeCache { const llvm::abi::TargetInfo &getLLVMABITargetInfo(llvm::abi::TypeBuilder &TB); /// True when -fexperimental-abi-lowering is in effect AND the active target - /// has an LLVMABI implementation we can route to. - bool shouldUseLLVMABILowering() const; + /// has an LLVMABI implementation that supports the given LLVM calling + /// convention. Unsupported CCs fall back to the legacy ABIInfo path. + bool shouldUseLLVMABILowering(unsigned CallingConv) const; /// Drive the experimental LLVMABI-based lowering path: map argument and /// return types into the LLVMABI library, ask its target lowering to fill diff --git a/llvm/include/llvm/ABI/TargetInfo.h b/llvm/include/llvm/ABI/TargetInfo.h index c09f4a153aaf5..a1c9a67b57db7 100644 --- a/llvm/include/llvm/ABI/TargetInfo.h +++ b/llvm/include/llvm/ABI/TargetInfo.h @@ -16,7 +16,6 @@ #include "llvm/ABI/FunctionInfo.h" #include "llvm/ABI/Types.h" -#include "llvm/TargetParser/Triple.h" #include <cassert> #include <memory> @@ -92,11 +91,10 @@ enum class X86AVXABILevel { AVX512, }; -std::unique_ptr<TargetInfo> createX8664TargetInfo(TypeBuilder &TB, - const Triple &TargetTriple, - X86AVXABILevel AVXLevel, - bool Has64BitPointers, - const ABICompatInfo &Compat); +std::unique_ptr<TargetInfo> createX86_64TargetInfo(TypeBuilder &TB, + X86AVXABILevel AVXLevel, + bool Has64BitPointers, + const ABICompatInfo &Compat); } // namespace abi } // namespace llvm diff --git a/llvm/lib/ABI/IRTypeMapper.cpp b/llvm/lib/ABI/IRTypeMapper.cpp index 3ba4553420375..bcd133ae30c41 100644 --- a/llvm/lib/ABI/IRTypeMapper.cpp +++ b/llvm/lib/ABI/IRTypeMapper.cpp @@ -13,6 +13,7 @@ #include "llvm/IR/DataLayout.h" #include "llvm/IR/DerivedTypes.h" #include "llvm/IR/Type.h" +#include "llvm/Support/MathExtras.h" using namespace llvm::abi; @@ -139,14 +140,19 @@ IRTypeMapper::createStructFromFields(ArrayRef<abi::FieldInfo> Fields, } else { uint64_t CurrentOffset = 0; for (const auto &Field : Fields) { + assert(!Field.IsBitField && "bitfields should not reach IR type mapping"); + llvm::Type *FieldType = convertType(Field.FieldType); if (Field.OffsetInBits > CurrentOffset) { - if (llvm::Type *PaddingType = - createPaddingType(Field.OffsetInBits - CurrentOffset)) - FieldTypes.push_back(PaddingType); + uint64_t AlignBits = DL.getABITypeAlign(FieldType).value() * 8; + uint64_t NaturalNextOffset = + AlignBits ? alignTo(CurrentOffset, AlignBits) : CurrentOffset; + if (NaturalNextOffset != Field.OffsetInBits) { + if (llvm::Type *PaddingType = + createPaddingType(Field.OffsetInBits - CurrentOffset)) + FieldTypes.push_back(PaddingType); + } CurrentOffset = Field.OffsetInBits; } - assert(!Field.IsBitField && "bitfields should not reach IR type mapping"); - llvm::Type *FieldType = convertType(Field.FieldType); FieldTypes.push_back(FieldType); CurrentOffset += Field.FieldType->getSizeInBits().getFixedValue(); } diff --git a/llvm/lib/ABI/Targets/X86.cpp b/llvm/lib/ABI/Targets/X86.cpp index 2255390a68cd1..f5a4188d185ab 100644 --- a/llvm/lib/ABI/Targets/X86.cpp +++ b/llvm/lib/ABI/Targets/X86.cpp @@ -14,7 +14,6 @@ #include "llvm/Support/ErrorHandling.h" #include "llvm/Support/MathExtras.h" #include "llvm/Support/TypeSize.h" -#include "llvm/TargetParser/Triple.h" #include <algorithm> #include <cassert> #include <cstdint> @@ -36,16 +35,7 @@ static unsigned getNativeVectorSizeForAVXABI(X86AVXABILevel AVXLevel) { class X86_64TargetInfo : public TargetInfo { public: - enum Class { - Integer = 0, - SSE, - SSEUp, - X87, - X87UP, - Complex_X87, - NoClass, - Memory - }; + enum Class { Integer, Sse, SseUp, X87, X87Up, ComplexX87, NoClass, Memory }; private: TypeBuilder &TB; @@ -88,9 +78,8 @@ class X86_64TargetInfo : public TargetInfo { const Type *useFirstFieldIfTransparentUnion(const Type *Ty) const; public: - X86_64TargetInfo(TypeBuilder &TypeBuilder, const Triple & /*Triple*/, - X86AVXABILevel AVXABILevel, bool Has64BitPtrs, - const ABICompatInfo &Compat) + X86_64TargetInfo(TypeBuilder &TypeBuilder, X86AVXABILevel AVXABILevel, + bool Has64BitPtrs, const ABICompatInfo &Compat) : TargetInfo(Compat), TB(TypeBuilder), AVXLevel(AVXABILevel), Has64BitPointers(Has64BitPtrs) {} @@ -140,7 +129,7 @@ void X86_64TargetInfo::postMerge(unsigned AggregateSize, Class &Lo, // (a) If one of the classes is Memory, the whole argument is passed in // memory. // - // (b) If X87UP is not preceded by X87, the whole argument is passed in + // (b) If X87Up is not preceded by X87, the whole argument is passed in // memory. // // (c) If the size of the aggregate exceeds two eightbytes and the first @@ -158,12 +147,12 @@ void X86_64TargetInfo::postMerge(unsigned AggregateSize, Class &Lo, if (Hi == Memory) Lo = Memory; - if (Hi == X87UP && Lo != X87 && getABICompatInfo().HonorsRevision98) + if (Hi == X87Up && Lo != X87 && getABICompatInfo().HonorsRevision98) Lo = Memory; - if (AggregateSize > 128 && (Lo != SSE || Hi != SSEUp)) + if (AggregateSize > 128 && (Lo != Sse || Hi != SseUp)) Lo = Memory; - if (Hi == SSEUp && Lo != SSE) - Hi = SSE; + if (Hi == SseUp && Lo != Sse) + Hi = Sse; } X86_64TargetInfo::Class X86_64TargetInfo::merge(Class Accum, Class Field) { // AMD64-ABI 3.2.3p2: Rule 4. Each field of an object is @@ -182,14 +171,14 @@ X86_64TargetInfo::Class X86_64TargetInfo::merge(Class Accum, Class Field) { // (d) If one of the classes is INTEGER, the result is the // INTEGER. // - // (e) If one of the classes is X87, X87UP, COMPLEX_X87 class, + // (e) If one of the classes is X87, X87Up, COMPLEX_X87 class, // MEMORY is used as class. // // (f) Otherwise class SSE is used. // Accum should never be memory (we should have returned) or // ComplexX87 (because this cannot be passed in a structure). - assert((Accum != Memory && Accum != Complex_X87) && + assert((Accum != Memory && Accum != ComplexX87) && "Invalid accumulated classification during merge."); if (Accum == Field || Field == NoClass) @@ -200,11 +189,11 @@ X86_64TargetInfo::Class X86_64TargetInfo::merge(Class Accum, Class Field) { return Field; if (Accum == Integer || Field == Integer) return Integer; - if (Field == X87 || Field == X87UP || Field == Complex_X87 || Accum == X87 || - Accum == X87UP) + if (Field == X87 || Field == X87Up || Field == ComplexX87 || Accum == X87 || + Accum == X87Up) return Memory; - return SSE; + return Sse; } bool X86_64TargetInfo::containsMatrixField(const RecordType *RT) const { @@ -243,8 +232,9 @@ void X86_64TargetInfo::classify(const Type *T, uint64_t OffsetBase, Class &Lo, (IT->isBitInt() && BitWidth > 64 && BitWidth <= 128)) { Lo = Integer; Hi = Integer; - } else if (BitWidth <= 64) + } else if (BitWidth <= 64) { Current = Integer; + } return; } @@ -256,15 +246,16 @@ void X86_64TargetInfo::classify(const Type *T, uint64_t OffsetBase, Class &Lo, FltSem == &llvm::APFloat::IEEEdouble() || FltSem == &llvm::APFloat::IEEEhalf() || FltSem == &llvm::APFloat::BFloat()) { - Current = SSE; + Current = Sse; } else if (FltSem == &llvm::APFloat::IEEEquad()) { - Lo = SSE; - Hi = SSEUp; + Lo = Sse; + Hi = SseUp; } else if (FltSem == &llvm::APFloat::x87DoubleExtended()) { Lo = X87; - Hi = X87UP; - } else - Current = SSE; + Hi = X87Up; + } else { + Current = Sse; + } return; } if (T->isPointer()) { @@ -281,11 +272,13 @@ void X86_64TargetInfo::classify(const Type *T, uint64_t OffsetBase, Class &Lo, uint64_t EbThisAdj = (OffsetBase + 64 - 1) / 64; if (EbFuncPtr != EbThisAdj) { Lo = Hi = Integer; - } else + } else { Current = Integer; + } } - } else + } else { Current = Integer; + } return; } @@ -317,13 +310,15 @@ void X86_64TargetInfo::classify(const Type *T, uint64_t OffsetBase, Class &Lo, // platform compiler, we must continue to use integer. if (const auto *IT = dyn_cast<IntegerType>(ElementType)) { uint64_t ElemBits = IT->getSizeInBits().getFixedValue(); - if (!getABICompatInfo().ClassifyIntegerMMXAsSSE && - (ElemBits == 64 || ElemBits == 32)) { + if (!getABICompatInfo().ClassifyIntegerMMXAsSSE && ElemBits == 64 && + !IT->isBitInt()) { Current = Integer; - } else - Current = SSE; - } else - Current = SSE; + } else { + Current = Sse; + } + } else { + Current = Sse; + } // If this type crosses an eightbyte boundary, it should be // split. if (OffsetBase && OffsetBase != 64) @@ -334,7 +329,7 @@ void X86_64TargetInfo::classify(const Type *T, uint64_t OffsetBase, Class &Lo, uint64_t ElemBits = IT->getSizeInBits().getFixedValue(); // gcc passes 256 and 512 bit <X x __int128> vectors in memory. :( if (getABICompatInfo().PassInt128VectorsInMem && Size != 128 && - ElemBits == 128) + ElemBits == 128 && !IT->isBitInt()) return; } @@ -352,8 +347,8 @@ void X86_64TargetInfo::classify(const Type *T, uint64_t OffsetBase, Class &Lo, // // Similarly, per 3.2.3. of the AVX512 draft, 512-bits ("named") args are // split into eight eightbyte chunks, one SSE and seven SSEUP. - Lo = SSE; - Hi = SSEUp; + Lo = Sse; + Hi = SseUp; } return; } @@ -372,13 +367,13 @@ void X86_64TargetInfo::classify(const Type *T, uint64_t OffsetBase, Class &Lo, if (FltSem == &llvm::APFloat::IEEEhalf() || FltSem == &llvm::APFloat::IEEEsingle() || FltSem == &llvm::APFloat::BFloat()) - Current = SSE; + Current = Sse; else if (FltSem == &llvm::APFloat::IEEEquad()) Current = Memory; else if (FltSem == &llvm::APFloat::x87DoubleExtended()) - Current = Complex_X87; + Current = ComplexX87; else if (FltSem == &llvm::APFloat::IEEEdouble()) - Lo = Hi = SSE; + Lo = Hi = Sse; else llvm_unreachable("Unexpected long double representation!"); } @@ -423,9 +418,10 @@ void X86_64TargetInfo::classify(const Type *T, uint64_t OffsetBase, Class &Lo, break; } postMerge(Size, Lo, Hi); - assert((Hi != SSEUp || Lo == SSE) && "Invalid SSEUp array classification."); + assert((Hi != SseUp || Lo == Sse) && "Invalid SseUp array classification."); return; } + if (const auto *RT = dyn_cast<RecordType>(T)) { uint64_t Size = RT->getSizeInBits().getFixedValue(); @@ -521,8 +517,9 @@ void X86_64TargetInfo::classify(const Type *T, uint64_t OffsetBase, Class &Lo, FieldLo = Integer; FieldHi = EbHi ? Integer : NoClass; } - } else + } else { classify(Field.FieldType, Offset, FieldLo, FieldHi, IsNamedArg); + } Lo = merge(Lo, FieldLo); Hi = merge(Hi, FieldHi); @@ -542,7 +539,7 @@ X86_64TargetInfo::useFirstFieldIfTransparentUnion(const Type *Ty) const { if (const auto *RT = dyn_cast<RecordType>(Ty)) { if (RT->isUnion() && RT->isTransparentUnion()) { auto Fields = RT->getFields(); - assert(!Fields.empty() && "sema created an empty transparent union"); + assert(!Fields.empty() && "transparent union cannot be empty"); return Fields.front().FieldType; } } @@ -561,7 +558,7 @@ X86_64TargetInfo::classifyArgumentType(const Type *Ty, unsigned FreeIntRegs, // Check some invariants assert((Hi != Memory || Lo == Memory) && "Invalid memory classification."); - assert((Hi != SSEUp || Lo == SSE) && "Invalid SSEUp classification."); + assert((Hi != SseUp || Lo == Sse) && "Invalid SseUp classification."); NeededInt = 0; NeededSSE = 0; @@ -573,23 +570,23 @@ X86_64TargetInfo::classifyArgumentType(const Type *Ty, unsigned FreeIntRegs, return ArgInfo::getIgnore(); // If the low part is just padding, it takes no register, leave ResType // null. - assert((Hi == SSE || Hi == Integer || Hi == X87UP) && + assert((Hi == Sse || Hi == Integer || Hi == X87Up) && "Unknown missing lo part"); break; // AMD64-ABI 3.2.3p3: Rule 1. If the class is MEMORY, pass the argument // on the stack. case Memory: - // AMD64-ABI 3.2.3p3: Rule 5. If the class is X87, X87UP or + // AMD64-ABI 3.2.3p3: Rule 5. If the class is X87, X87Up or // COMPLEX_X87, it is passed in memory. case X87: - case Complex_X87: + case ComplexX87: if (getRecordArgABI(Ty) == RAA_Indirect) ++NeededInt; return getIndirectResult(Ty, FreeIntRegs); - case SSEUp: - case X87UP: + case SseUp: + case X87Up: llvm_unreachable("Invalid classification for lo word."); // AMD64-ABI 3.2.3p3: Rule 2. If the class is INTEGER, the next @@ -618,7 +615,7 @@ X86_64TargetInfo::classifyArgumentType(const Type *Ty, unsigned FreeIntRegs, // AMD64-ABI 3.2.3p3: Rule 3. If the class is SSE, the next // available SSE register is used, the registers are taken in the // order from %xmm0 to %xmm7. - case SSE: + case Sse: ResType = getSSETypeAtOffset(Ty, 0, Ty, 0); ++NeededSSE; break; @@ -626,12 +623,12 @@ X86_64TargetInfo::classifyArgumentType(const Type *Ty, unsigned FreeIntRegs, const Type *HighPart = nullptr; switch (Hi) { - // Memory was handled previously, Complex_X87 and X87 should - // never occur as hi classes, and X87UP must be preceded by X87, + // Memory was handled previously, ComplexX87 and X87 should + // never occur as hi classes, and X87Up must be preceded by X87, // which is passed in memory. case Memory: case X87: - case Complex_X87: + case ComplexX87: llvm_unreachable("Invalid classification for hi word."); case NoClass: @@ -646,10 +643,10 @@ X86_64TargetInfo::classifyArgumentType(const Type *Ty, unsigned FreeIntRegs, return ArgInfo::getDirect(HighPart, 8); break; - // X87UP generally doesn't occur here (long double is passed in + // X87Up generally doesn't occur here (long double is passed in // memory), except in situations involving unions. - case X87UP: - case SSE: + case X87Up: + case Sse: ++NeededSSE; HighPart = getSSETypeAtOffset(Ty, 8, Ty, 8); @@ -660,8 +657,8 @@ X86_64TargetInfo::classifyArgumentType(const Type *Ty, unsigned FreeIntRegs, // AMD64-ABI 3.2.3p3: Rule 4. If the class is SSEUP, the // eightbyte is passed in the upper half of the last used SSE // register. This only happens when 128-bit vectors are passed. - case SSEUp: - assert(Lo == SSE && "Unexpected SSEUp classification"); + case SseUp: + assert(Lo == Sse && "Unexpected SseUp classification"); ResType = getByteVectorType(Ty); break; } @@ -684,7 +681,7 @@ ArgInfo X86_64TargetInfo::classifyReturnType(const Type *RetTy) const { // Check some invariants assert((Hi != Memory || Lo == Memory) && "Invalid memory classification."); - assert((Hi != SSEUp || Lo == SSE) && "Invalid SSEUp classification."); + assert((Hi != SseUp || Lo == Sse) && "Invalid SseUp classification."); const Type *ResType = nullptr; switch (Lo) { @@ -693,11 +690,11 @@ ArgInfo X86_64TargetInfo::classifyReturnType(const Type *RetTy) const { return ArgInfo::getIgnore(); // If the low part is just padding, it takes no register, leave ResType // null. - assert((Hi == SSE || Hi == Integer || Hi == X87UP) && + assert((Hi == Sse || Hi == Integer || Hi == X87Up) && "Unknown missing lo part"); break; - case SSEUp: - case X87UP: + case SseUp: + case X87Up: llvm_unreachable("Invalid classification for lo word."); // AMD64-ABI 3.2.3p4: Rule 2. Types of class memory are returned via @@ -725,7 +722,7 @@ ArgInfo X86_64TargetInfo::classifyReturnType(const Type *RetTy) const { // AMD64-ABI 3.2.3p4: Rule 4. If the class is SSE, the next // available SSE register of the sequence %xmm0, %xmm1 is used. - case SSE: + case Sse: ResType = getSSETypeAtOffset(RetTy, 0, RetTy, 0); break; @@ -738,13 +735,13 @@ ArgInfo X86_64TargetInfo::classifyReturnType(const Type *RetTy) const { // AMD64-ABI 3.2.3p4: Rule 8. If the class is COMPLEX_X87, the real // part of the value is returned in %st0 and the imaginary part in // %st1. - case Complex_X87: - assert(Hi == Complex_X87 && "Unexpected ComplexX87 classification."); + case ComplexX87: + assert(Hi == ComplexX87 && "Unexpected ComplexX87 classification."); { const Type *X87Type = TB.getFloatType(APFloat::x87DoubleExtended(), Align(16)); - FieldInfo Fields[] = {FieldInfo(X87Type, 0), FieldInfo(X87Type, 128)}; - ResType = TB.getRecordType(Fields, TypeSize::getFixed(256), Align(16)); + FieldInfo Fields[] = {FieldInfo(X87Type, 0), FieldInfo(X87Type, 80)}; + ResType = TB.getRecordType(Fields, TypeSize::getFixed(160), Align(16)); } break; } @@ -757,7 +754,7 @@ ArgInfo X86_64TargetInfo::classifyReturnType(const Type *RetTy) const { case X87: llvm_unreachable("Invalid classification for hi word."); - case Complex_X87: + case ComplexX87: case NoClass: break; @@ -767,7 +764,7 @@ ArgInfo X86_64TargetInfo::classifyReturnType(const Type *RetTy) const { return ArgInfo::getDirect(HighPart, 8); break; - case SSE: + case Sse: HighPart = getSSETypeAtOffset(RetTy, 8, RetTy, 8); if (Lo == NoClass) return ArgInfo::getDirect(HighPart, 8); @@ -778,14 +775,14 @@ ArgInfo X86_64TargetInfo::classifyReturnType(const Type *RetTy) const { // vector register. // // SSEUP should always be preceded by SSE, just widen. - case SSEUp: - assert(Lo == SSE && "Unexpected SSEUp classification."); + case SseUp: + assert(Lo == Sse && "Unexpected SseUp classification."); ResType = getByteVectorType(RetTy); break; - // AMD64-ABI 3.2.3p4: Rule 7. If the class is X87UP, the value is + // AMD64-ABI 3.2.3p4: Rule 7. If the class is X87Up, the value is // returned together with the previous X87 value in %st0. - case X87UP: + case X87Up: // If X87Up is preceded by X87, we don't need to do // anything. However, in some cases with unions it may not be // preceded by X87. In such situations we follow gcc and pass the @@ -859,7 +856,9 @@ const Type *X86_64TargetInfo::createPairType(const Type *Lo, assert((8 * 8) == Fields[1].OffsetInBits && "High part must be at offset 8 bytes"); - return TB.getRecordType(Fields, TypeSize::getFixed(128), Align(8), + uint64_t PairSizeInBits = + Fields[1].OffsetInBits + Hi->getSizeInBits().getFixedValue(); + return TB.getRecordType(Fields, TypeSize::getFixed(PairSizeInBits), Align(8), StructPacking::Default); } @@ -1338,19 +1337,18 @@ ArgInfo X86_64TargetInfo::getIndirectResult(const Type *Ty, } ArgInfo X86_64TargetInfo::getIndirectReturnResult(const Type *Ty) const { - // If this is a scalar value, handle it specially. if (!isAggregateTypeForABI(Ty)) { // Bit-precise integers are returned indirectly regardless of size. if (const auto *IntTy = dyn_cast<IntegerType>(Ty)) { if (IntTy->isBitInt()) - return getNaturalAlignIndirect(IntTy, /*ByVal=*/false); + return getNaturalAlignIndirect(IntTy, /*ByVal=*/true); if (isPromotableInteger(IntTy)) return ArgInfo::getExtend(Ty); } return ArgInfo::getDirect(); } - return getNaturalAlignIndirect(Ty, /*ByVal=*/false); + return getNaturalAlignIndirect(Ty, /*ByVal=*/true); } static bool classifyCXXReturnType(FunctionInfo &FI) { @@ -1358,6 +1356,10 @@ static bool classifyCXXReturnType(FunctionInfo &FI) { if (const auto *RT = llvm::dyn_cast<abi::RecordType>(Ty)) { if (!RT->canPassInRegisters()) { + // A C++ record that cannot pass in registers (non-trivial copy/dtor) + // is returned indirectly with ByVal=false, matching + // ItaniumCXXABI::classifyReturnType. This is the RAA path and is distinct + // from getIndirectReturnResult (plain aggregates), which uses ByVal=true. FI.getReturnInfo() = ArgInfo::getIndirect(RT->getAlignment(), /*ByVal=*/false); return true; @@ -1370,11 +1372,16 @@ static bool classifyCXXReturnType(FunctionInfo &FI) { void X86_64TargetInfo::computeInfo(FunctionInfo &FI) const { CallingConv::ID CallingConv = FI.getCallingConvention(); - if (CallingConv == CallingConv::Win64 || - CallingConv == CallingConv::X86_RegCall) - return; - - bool IsRegCall = false; + // Only the standard SysV (C) calling convention is classified here. Any other + // convention must be added explicitly once it has been verified against this + // classifier rather than silently taking the SysV path. + switch (CallingConv) { + case CallingConv::C: + break; + default: + llvm_unreachable( + "calling convention not supported by the LLVMABI X86_64 classifier"); + } unsigned FreeIntRegs = 6; unsigned FreeSSERegs = 8; @@ -1398,7 +1405,7 @@ void X86_64TargetInfo::computeInfo(FunctionInfo &FI) const { NeededSSE = 0; ArgInfo AI = classifyArgumentType(ArgTy, FreeIntRegs, NeededInt, NeededSSE, - IsNamedArg, IsRegCall); + IsNamedArg); // AMD64-ABI 3.2.3p3: If there are no registers available for any // eightbyte of an argument, the whole argument is passed on the @@ -1415,13 +1422,11 @@ void X86_64TargetInfo::computeInfo(FunctionInfo &FI) const { } } -std::unique_ptr<TargetInfo> createX8664TargetInfo(TypeBuilder &TB, - const Triple &TargetTriple, - X86AVXABILevel AVXLevel, - bool Has64BitPointers, - const ABICompatInfo &Compat) { - return std::make_unique<X86_64TargetInfo>(TB, TargetTriple, AVXLevel, - Has64BitPointers, Compat); +std::unique_ptr<TargetInfo> +createX86_64TargetInfo(TypeBuilder &TB, X86AVXABILevel AVXLevel, + bool Has64BitPointers, const ABICompatInfo &Compat) { + return std::make_unique<X86_64TargetInfo>(TB, AVXLevel, Has64BitPointers, + Compat); } } // namespace abi diff --git a/llvm/lib/ABI/Types.cpp b/llvm/lib/ABI/Types.cpp index 6f2eb7225364a..78132aa71fa97 100644 --- a/llvm/lib/ABI/Types.cpp +++ b/llvm/lib/ABI/Types.cpp @@ -7,8 +7,6 @@ //===----------------------------------------------------------------------===// #include "llvm/ABI/Types.h" -#include "llvm/ADT/STLExtras.h" -#include "llvm/ADT/SmallVector.h" #include "llvm/Support/Casting.h" using namespace llvm; @@ -34,47 +32,31 @@ bool RecordType::isEmpty() const { const FieldInfo * RecordType::getElementContainingOffset(unsigned OffsetInBits) const { - SmallVector<std::pair<unsigned, const FieldInfo *>, 16> AllElements; + auto Contains = [&](const FieldInfo &Element) { + unsigned Start = Element.OffsetInBits; + unsigned Size = Element.FieldType->getSizeInBits().getFixedValue(); + return OffsetInBits >= Start && OffsetInBits < Start + Size; + }; for (const FieldInfo &Base : getBaseClasses()) { const auto *BaseRT = dyn_cast<RecordType>(Base.FieldType); - if (!BaseRT || !BaseRT->isEmpty()) - AllElements.emplace_back(Base.OffsetInBits, &Base); + if ((!BaseRT || !BaseRT->isEmpty()) && Contains(Base)) + return &Base; } for (const FieldInfo &VBase : getVirtualBaseClasses()) { const auto *VBaseRT = dyn_cast<RecordType>(VBase.FieldType); - if (!VBaseRT || !VBaseRT->isEmpty()) - AllElements.emplace_back(VBase.OffsetInBits, &VBase); + if ((!VBaseRT || !VBaseRT->isEmpty()) && Contains(VBase)) + return &VBase; } for (const FieldInfo &Field : getFields()) { if (Field.IsUnnamedBitfield) continue; - AllElements.emplace_back(Field.OffsetInBits, &Field); + if (Contains(Field)) + return &Field; } - llvm::stable_sort(AllElements, [](const auto &A, const auto &B) { - return A.first < B.first; - }); - - auto *It = llvm::upper_bound(AllElements, OffsetInBits, - [](unsigned Offset, const auto &Element) { - return Offset < Element.first; - }); - - if (It == AllElements.begin()) - return nullptr; - - --It; - - const FieldInfo *Candidate = It->second; - unsigned ElementStart = It->first; - unsigned ElementSize = Candidate->FieldType->getSizeInBits().getFixedValue(); - - if (OffsetInBits >= ElementStart && OffsetInBits < ElementStart + ElementSize) - return Candidate; - return nullptr; } _______________________________________________ cfe-commits mailing list [email protected] https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits
