llvmbot wrote:
<!--LLVM PR SUMMARY COMMENT--> @llvm/pr-subscribers-backend-aarch64 Author: Momchil Velikov (momchil-velikov) <details> <summary>Changes</summary> Reimplement Neon FP8 vector types using attribute `neon_vector_type` instead of having them as builtin types. This allows to implement FP8 Neon intrinsics without the need to add special cases for these types when using `__builtin_shufflevector` or bitcast (using C-style cast operator) between vectors, both extensively used in the generated code in `arm_neon.h`. --- Patch is 47.17 KiB, truncated to 20.00 KiB below, full version: https://github.com/llvm/llvm-project/pull/123603.diff 19 Files Affected: - (modified) clang/include/clang/AST/Type.h (+5) - (modified) clang/include/clang/Basic/AArch64SVEACLETypes.def (-2) - (modified) clang/include/clang/Basic/TargetBuiltins.h (+3-1) - (modified) clang/lib/AST/ItaniumMangle.cpp (+5) - (modified) clang/lib/CodeGen/CGBuiltin.cpp (+1) - (modified) clang/lib/CodeGen/CGExpr.cpp (+9-2) - (modified) clang/lib/CodeGen/CodeGenTypes.cpp (+3-1) - (modified) clang/lib/CodeGen/Targets/AArch64.cpp (+1-6) - (modified) clang/lib/Sema/SemaARM.cpp (+2) - (modified) clang/lib/Sema/SemaExpr.cpp (+6-1) - (modified) clang/lib/Sema/SemaType.cpp (+2-1) - (added) clang/test/CodeGen/AArch64/builtin-shufflevector-fp8.c (+123) - (added) clang/test/CodeGen/AArch64/fp8-cast.c (+193) - (modified) clang/test/CodeGen/arm-mfp8.c (+53-35) - (modified) clang/test/CodeGenCXX/aarch64-mangle-neon-vectors.cpp (+7) - (modified) clang/test/CodeGenCXX/mangle-neon-vectors.cpp (+11) - (added) clang/test/Sema/aarch64-fp8-cast.c (+104) - (modified) clang/test/Sema/arm-mfp8.cpp (+22-12) - (modified) clang/utils/TableGen/NeonEmitter.cpp (+3-8) ``````````diff diff --git a/clang/include/clang/AST/Type.h b/clang/include/clang/AST/Type.h index 3457d524c63aaa..1d9743520654eb 100644 --- a/clang/include/clang/AST/Type.h +++ b/clang/include/clang/AST/Type.h @@ -2518,6 +2518,7 @@ class alignas(TypeAlignment) Type : public ExtQualsTypeCommonBase { bool isFloat32Type() const; bool isDoubleType() const; bool isBFloat16Type() const; + bool isMFloat8Type() const; bool isFloat128Type() const; bool isIbm128Type() const; bool isRealType() const; // C99 6.2.5p17 (real floating + integer) @@ -8537,6 +8538,10 @@ inline bool Type::isBFloat16Type() const { return isSpecificBuiltinType(BuiltinType::BFloat16); } +inline bool Type::isMFloat8Type() const { + return isSpecificBuiltinType(BuiltinType::MFloat8); +} + inline bool Type::isFloat128Type() const { return isSpecificBuiltinType(BuiltinType::Float128); } diff --git a/clang/include/clang/Basic/AArch64SVEACLETypes.def b/clang/include/clang/Basic/AArch64SVEACLETypes.def index 063cac1f4a58ee..2dd2754e778d60 100644 --- a/clang/include/clang/Basic/AArch64SVEACLETypes.def +++ b/clang/include/clang/Basic/AArch64SVEACLETypes.def @@ -201,8 +201,6 @@ SVE_PREDICATE_TYPE_ALL("__clang_svboolx4_t", "svboolx4_t", SveBoolx4, SveBoolx4T SVE_OPAQUE_TYPE("__SVCount_t", "__SVCount_t", SveCount, SveCountTy) AARCH64_VECTOR_TYPE_MFLOAT("__mfp8", "__mfp8", MFloat8, MFloat8Ty, 1, 8, 1) -AARCH64_VECTOR_TYPE_MFLOAT("__MFloat8x8_t", "__MFloat8x8_t", MFloat8x8, MFloat8x8Ty, 8, 8, 1) -AARCH64_VECTOR_TYPE_MFLOAT("__MFloat8x16_t", "__MFloat8x16_t", MFloat8x16, MFloat8x16Ty, 16, 8, 1) #undef SVE_VECTOR_TYPE #undef SVE_VECTOR_TYPE_BFLOAT diff --git a/clang/include/clang/Basic/TargetBuiltins.h b/clang/include/clang/Basic/TargetBuiltins.h index 4dc8b24ed8ae6c..83ef015018f1a1 100644 --- a/clang/include/clang/Basic/TargetBuiltins.h +++ b/clang/include/clang/Basic/TargetBuiltins.h @@ -208,7 +208,8 @@ namespace clang { Float16, Float32, Float64, - BFloat16 + BFloat16, + MFloat8 }; NeonTypeFlags(unsigned F) : Flags(F) {} @@ -230,6 +231,7 @@ namespace clang { switch (getEltType()) { case Int8: case Poly8: + case MFloat8: return 8; case Int16: case Float16: diff --git a/clang/lib/AST/ItaniumMangle.cpp b/clang/lib/AST/ItaniumMangle.cpp index 1dd936cf4fb518..9948963d7f44b3 100644 --- a/clang/lib/AST/ItaniumMangle.cpp +++ b/clang/lib/AST/ItaniumMangle.cpp @@ -3919,6 +3919,9 @@ void CXXNameMangler::mangleNeonVectorType(const VectorType *T) { case BuiltinType::Float: EltName = "float32_t"; break; case BuiltinType::Half: EltName = "float16_t"; break; case BuiltinType::BFloat16: EltName = "bfloat16_t"; break; + case BuiltinType::MFloat8: + EltName = "mfloat8_t"; + break; default: llvm_unreachable("unexpected Neon vector element type"); } @@ -3972,6 +3975,8 @@ static StringRef mangleAArch64VectorBase(const BuiltinType *EltType) { return "Float64"; case BuiltinType::BFloat16: return "Bfloat16"; + case BuiltinType::MFloat8: + return "Mfloat8"; default: llvm_unreachable("Unexpected vector element base type"); } diff --git a/clang/lib/CodeGen/CGBuiltin.cpp b/clang/lib/CodeGen/CGBuiltin.cpp index b80833fd91884d..5fd4c3e34ebeba 100644 --- a/clang/lib/CodeGen/CGBuiltin.cpp +++ b/clang/lib/CodeGen/CGBuiltin.cpp @@ -6819,6 +6819,7 @@ static llvm::FixedVectorType *GetNeonType(CodeGenFunction *CGF, switch (TypeFlags.getEltType()) { case NeonTypeFlags::Int8: case NeonTypeFlags::Poly8: + case NeonTypeFlags::MFloat8: return llvm::FixedVectorType::get(CGF->Int8Ty, V1Ty ? 1 : (8 << IsQuad)); case NeonTypeFlags::Int16: case NeonTypeFlags::Poly16: diff --git a/clang/lib/CodeGen/CGExpr.cpp b/clang/lib/CodeGen/CGExpr.cpp index 9a9a8c7f6eae09..eeff77db2d2684 100644 --- a/clang/lib/CodeGen/CGExpr.cpp +++ b/clang/lib/CodeGen/CGExpr.cpp @@ -2414,8 +2414,15 @@ void CodeGenFunction::EmitStoreThroughLValue(RValue Src, LValue Dst, Vec = Builder.CreateBitCast(Vec, IRVecTy); // iN --> <N x i1>. } - Vec = Builder.CreateInsertElement(Vec, Src.getScalarVal(), - Dst.getVectorIdx(), "vecins"); + llvm::Value *SrcVal = Src.getScalarVal(); + // Allow inserting `<1 x T>` into an `<N x T>`. It can happen with scalar + // types which are mapped to vector LLVM IR types (e.g. for implementing + // an ABI). + if (auto *EltTy = dyn_cast<llvm::FixedVectorType>(SrcVal->getType()); + EltTy && EltTy->getNumElements() == 1) + SrcVal = Builder.CreateBitCast(SrcVal, EltTy->getElementType()); + Vec = Builder.CreateInsertElement(Vec, SrcVal, Dst.getVectorIdx(), + "vecins"); if (IRStoreTy) { // <N x i1> --> <iN>. Vec = Builder.CreateBitCast(Vec, IRStoreTy); diff --git a/clang/lib/CodeGen/CodeGenTypes.cpp b/clang/lib/CodeGen/CodeGenTypes.cpp index 09191a4901f493..950b23f4e13b99 100644 --- a/clang/lib/CodeGen/CodeGenTypes.cpp +++ b/clang/lib/CodeGen/CodeGenTypes.cpp @@ -650,7 +650,9 @@ llvm::Type *CodeGenTypes::ConvertType(QualType T) { // An ext_vector_type of Bool is really a vector of bits. llvm::Type *IRElemTy = VT->isExtVectorBoolType() ? llvm::Type::getInt1Ty(getLLVMContext()) - : ConvertType(VT->getElementType()); + : (VT->getElementType()->isMFloat8Type() + ? llvm::Type::getInt8Ty(getLLVMContext()) + : ConvertType(VT->getElementType())); ResultType = llvm::FixedVectorType::get(IRElemTy, VT->getNumElements()); break; } diff --git a/clang/lib/CodeGen/Targets/AArch64.cpp b/clang/lib/CodeGen/Targets/AArch64.cpp index 7db67ecba07c8f..c702e79ff8eb98 100644 --- a/clang/lib/CodeGen/Targets/AArch64.cpp +++ b/clang/lib/CodeGen/Targets/AArch64.cpp @@ -383,10 +383,6 @@ ABIArgInfo AArch64ABIInfo::classifyArgumentType(QualType Ty, bool IsVariadicFn, NSRN = std::min(NSRN + 1, 8u); else { switch (BT->getKind()) { - case BuiltinType::MFloat8x8: - case BuiltinType::MFloat8x16: - NSRN = std::min(NSRN + 1, 8u); - break; case BuiltinType::SveBool: case BuiltinType::SveCount: NPRN = std::min(NPRN + 1, 4u); @@ -629,8 +625,7 @@ bool AArch64ABIInfo::isHomogeneousAggregateBaseType(QualType Ty) const { // but with the difference that any floating-point type is allowed, // including __fp16. if (const BuiltinType *BT = Ty->getAs<BuiltinType>()) { - if (BT->isFloatingPoint() || BT->getKind() == BuiltinType::MFloat8x16 || - BT->getKind() == BuiltinType::MFloat8x8) + if (BT->isFloatingPoint()) return true; } else if (const VectorType *VT = Ty->getAs<VectorType>()) { if (auto Kind = VT->getVectorKind(); diff --git a/clang/lib/Sema/SemaARM.cpp b/clang/lib/Sema/SemaARM.cpp index db418d80e0e09c..2620bbc97ba02a 100644 --- a/clang/lib/Sema/SemaARM.cpp +++ b/clang/lib/Sema/SemaARM.cpp @@ -352,6 +352,8 @@ static QualType getNeonEltType(NeonTypeFlags Flags, ASTContext &Context, return Context.DoubleTy; case NeonTypeFlags::BFloat16: return Context.BFloat16Ty; + case NeonTypeFlags::MFloat8: + return Context.MFloat8Ty; } llvm_unreachable("Invalid NeonTypeFlag!"); } diff --git a/clang/lib/Sema/SemaExpr.cpp b/clang/lib/Sema/SemaExpr.cpp index ae40895980d90a..2087e8b251d8c6 100644 --- a/clang/lib/Sema/SemaExpr.cpp +++ b/clang/lib/Sema/SemaExpr.cpp @@ -7502,7 +7502,7 @@ static bool breakDownVectorType(QualType type, uint64_t &len, if (const VectorType *vecType = type->getAs<VectorType>()) { len = vecType->getNumElements(); eltType = vecType->getElementType(); - assert(eltType->isScalarType()); + assert(eltType->isScalarType() || eltType->isMFloat8Type()); return true; } @@ -10168,6 +10168,11 @@ QualType Sema::CheckVectorOperands(ExprResult &LHS, ExprResult &RHS, return HLSL().handleVectorBinOpConversion(LHS, RHS, LHSType, RHSType, IsCompAssign); + // Any operation with MFloat8 type is only possible with C intrinsics + if ((LHSVecType && LHSVecType->getElementType()->isMFloat8Type()) || + (RHSVecType && RHSVecType->getElementType()->isMFloat8Type())) + return InvalidOperands(Loc, LHS, RHS); + // AltiVec-style "vector bool op vector bool" combinations are allowed // for some operators but not others. if (!AllowBothBool && LHSVecType && diff --git a/clang/lib/Sema/SemaType.cpp b/clang/lib/Sema/SemaType.cpp index 2ccf5a8e1d6f31..33d5378944ddbf 100644 --- a/clang/lib/Sema/SemaType.cpp +++ b/clang/lib/Sema/SemaType.cpp @@ -8306,7 +8306,8 @@ static bool isPermittedNeonBaseType(QualType &Ty, VectorKind VecKind, Sema &S) { BTy->getKind() == BuiltinType::ULongLong || BTy->getKind() == BuiltinType::Float || BTy->getKind() == BuiltinType::Half || - BTy->getKind() == BuiltinType::BFloat16; + BTy->getKind() == BuiltinType::BFloat16 || + BTy->getKind() == BuiltinType::MFloat8; } static bool verifyValidIntegerConstantExpr(Sema &S, const ParsedAttr &Attr, diff --git a/clang/test/CodeGen/AArch64/builtin-shufflevector-fp8.c b/clang/test/CodeGen/AArch64/builtin-shufflevector-fp8.c new file mode 100644 index 00000000000000..147ca1d1becc15 --- /dev/null +++ b/clang/test/CodeGen/AArch64/builtin-shufflevector-fp8.c @@ -0,0 +1,123 @@ +// NOTE: Assertions have been autogenerated by utils/update_cc_test_checks.py UTC_ARGS: --version 5 +// RUN: %clang_cc1 -triple aarch64-linux -target-feature +neon -disable-O0-optnone -emit-llvm -o - %s | opt -S -passes=mem2reg | FileCheck %s + +// REQUIRES: aarch64-registered-target + +typedef __attribute__((neon_vector_type(8))) signed char int8x8_t; +typedef __attribute__((neon_vector_type(16))) signed char int8x16_t; + +typedef __attribute__((neon_vector_type(8))) __mfp8 mfloat8x8_t; +typedef __attribute__((neon_vector_type(16))) __mfp8 mfloat8x16_t; + +// CHECK-LABEL: define dso_local <8 x i8> @test_8x8( +// CHECK-SAME: <8 x i8> [[X:%.*]]) #[[ATTR0:[0-9]+]] { +// CHECK-NEXT: [[ENTRY:.*:]] +// CHECK-NEXT: [[SHUFFLE:%.*]] = shufflevector <8 x i8> [[X]], <8 x i8> [[X]], <8 x i32> <i32 3, i32 2, i32 1, i32 0, i32 3, i32 2, i32 1, i32 0> +// CHECK-NEXT: ret <8 x i8> [[SHUFFLE]] +// +mfloat8x8_t test_8x8(mfloat8x8_t x) { + return __builtin_shufflevector(x, x, 3, 2, 1, 0, 3, 2, 1, 0); +} + +// CHECK-LABEL: define dso_local <8 x i8> @test_8x8_v( +// CHECK-SAME: <8 x i8> [[X:%.*]], <8 x i8> noundef [[P:%.*]]) #[[ATTR0]] { +// CHECK-NEXT: [[ENTRY:.*:]] +// CHECK-NEXT: [[MASK:%.*]] = and <8 x i8> [[P]], splat (i8 7) +// CHECK-NEXT: [[SHUF_IDX:%.*]] = extractelement <8 x i8> [[MASK]], i64 0 +// CHECK-NEXT: [[SHUF_ELT:%.*]] = extractelement <8 x i8> [[X]], i8 [[SHUF_IDX]] +// CHECK-NEXT: [[SHUF_INS:%.*]] = insertelement <8 x i8> poison, i8 [[SHUF_ELT]], i64 0 +// CHECK-NEXT: [[SHUF_IDX1:%.*]] = extractelement <8 x i8> [[MASK]], i64 1 +// CHECK-NEXT: [[SHUF_ELT2:%.*]] = extractelement <8 x i8> [[X]], i8 [[SHUF_IDX1]] +// CHECK-NEXT: [[SHUF_INS3:%.*]] = insertelement <8 x i8> [[SHUF_INS]], i8 [[SHUF_ELT2]], i64 1 +// CHECK-NEXT: [[SHUF_IDX4:%.*]] = extractelement <8 x i8> [[MASK]], i64 2 +// CHECK-NEXT: [[SHUF_ELT5:%.*]] = extractelement <8 x i8> [[X]], i8 [[SHUF_IDX4]] +// CHECK-NEXT: [[SHUF_INS6:%.*]] = insertelement <8 x i8> [[SHUF_INS3]], i8 [[SHUF_ELT5]], i64 2 +// CHECK-NEXT: [[SHUF_IDX7:%.*]] = extractelement <8 x i8> [[MASK]], i64 3 +// CHECK-NEXT: [[SHUF_ELT8:%.*]] = extractelement <8 x i8> [[X]], i8 [[SHUF_IDX7]] +// CHECK-NEXT: [[SHUF_INS9:%.*]] = insertelement <8 x i8> [[SHUF_INS6]], i8 [[SHUF_ELT8]], i64 3 +// CHECK-NEXT: [[SHUF_IDX10:%.*]] = extractelement <8 x i8> [[MASK]], i64 4 +// CHECK-NEXT: [[SHUF_ELT11:%.*]] = extractelement <8 x i8> [[X]], i8 [[SHUF_IDX10]] +// CHECK-NEXT: [[SHUF_INS12:%.*]] = insertelement <8 x i8> [[SHUF_INS9]], i8 [[SHUF_ELT11]], i64 4 +// CHECK-NEXT: [[SHUF_IDX13:%.*]] = extractelement <8 x i8> [[MASK]], i64 5 +// CHECK-NEXT: [[SHUF_ELT14:%.*]] = extractelement <8 x i8> [[X]], i8 [[SHUF_IDX13]] +// CHECK-NEXT: [[SHUF_INS15:%.*]] = insertelement <8 x i8> [[SHUF_INS12]], i8 [[SHUF_ELT14]], i64 5 +// CHECK-NEXT: [[SHUF_IDX16:%.*]] = extractelement <8 x i8> [[MASK]], i64 6 +// CHECK-NEXT: [[SHUF_ELT17:%.*]] = extractelement <8 x i8> [[X]], i8 [[SHUF_IDX16]] +// CHECK-NEXT: [[SHUF_INS18:%.*]] = insertelement <8 x i8> [[SHUF_INS15]], i8 [[SHUF_ELT17]], i64 6 +// CHECK-NEXT: [[SHUF_IDX19:%.*]] = extractelement <8 x i8> [[MASK]], i64 7 +// CHECK-NEXT: [[SHUF_ELT20:%.*]] = extractelement <8 x i8> [[X]], i8 [[SHUF_IDX19]] +// CHECK-NEXT: [[SHUF_INS21:%.*]] = insertelement <8 x i8> [[SHUF_INS18]], i8 [[SHUF_ELT20]], i64 7 +// CHECK-NEXT: ret <8 x i8> [[SHUF_INS21]] +// +mfloat8x8_t test_8x8_v(mfloat8x8_t x, int8x8_t p) { + return __builtin_shufflevector(x, p); +} + +// CHECK-LABEL: define dso_local <16 x i8> @test_8x16( +// CHECK-SAME: <16 x i8> [[X:%.*]]) #[[ATTR0]] { +// CHECK-NEXT: [[ENTRY:.*:]] +// CHECK-NEXT: [[SHUFFLE:%.*]] = shufflevector <16 x i8> [[X]], <16 x i8> [[X]], <16 x i32> <i32 7, i32 6, i32 5, i32 4, i32 3, i32 2, i32 1, i32 0, i32 7, i32 6, i32 5, i32 4, i32 3, i32 2, i32 1, i32 0> +// CHECK-NEXT: ret <16 x i8> [[SHUFFLE]] +// +mfloat8x16_t test_8x16(mfloat8x16_t x) { + return __builtin_shufflevector(x, x, 7, 6, 5, 4, 3, 2, 1, 0, 7, 6, 5, 4, 3, 2, + 1, 0); +} + +// CHECK-LABEL: define dso_local <16 x i8> @test_8x16_v( +// CHECK-SAME: <16 x i8> [[X:%.*]], <16 x i8> noundef [[P:%.*]]) #[[ATTR0]] { +// CHECK-NEXT: [[ENTRY:.*:]] +// CHECK-NEXT: [[MASK:%.*]] = and <16 x i8> [[P]], splat (i8 15) +// CHECK-NEXT: [[SHUF_IDX:%.*]] = extractelement <16 x i8> [[MASK]], i64 0 +// CHECK-NEXT: [[SHUF_ELT:%.*]] = extractelement <16 x i8> [[X]], i8 [[SHUF_IDX]] +// CHECK-NEXT: [[SHUF_INS:%.*]] = insertelement <16 x i8> poison, i8 [[SHUF_ELT]], i64 0 +// CHECK-NEXT: [[SHUF_IDX1:%.*]] = extractelement <16 x i8> [[MASK]], i64 1 +// CHECK-NEXT: [[SHUF_ELT2:%.*]] = extractelement <16 x i8> [[X]], i8 [[SHUF_IDX1]] +// CHECK-NEXT: [[SHUF_INS3:%.*]] = insertelement <16 x i8> [[SHUF_INS]], i8 [[SHUF_ELT2]], i64 1 +// CHECK-NEXT: [[SHUF_IDX4:%.*]] = extractelement <16 x i8> [[MASK]], i64 2 +// CHECK-NEXT: [[SHUF_ELT5:%.*]] = extractelement <16 x i8> [[X]], i8 [[SHUF_IDX4]] +// CHECK-NEXT: [[SHUF_INS6:%.*]] = insertelement <16 x i8> [[SHUF_INS3]], i8 [[SHUF_ELT5]], i64 2 +// CHECK-NEXT: [[SHUF_IDX7:%.*]] = extractelement <16 x i8> [[MASK]], i64 3 +// CHECK-NEXT: [[SHUF_ELT8:%.*]] = extractelement <16 x i8> [[X]], i8 [[SHUF_IDX7]] +// CHECK-NEXT: [[SHUF_INS9:%.*]] = insertelement <16 x i8> [[SHUF_INS6]], i8 [[SHUF_ELT8]], i64 3 +// CHECK-NEXT: [[SHUF_IDX10:%.*]] = extractelement <16 x i8> [[MASK]], i64 4 +// CHECK-NEXT: [[SHUF_ELT11:%.*]] = extractelement <16 x i8> [[X]], i8 [[SHUF_IDX10]] +// CHECK-NEXT: [[SHUF_INS12:%.*]] = insertelement <16 x i8> [[SHUF_INS9]], i8 [[SHUF_ELT11]], i64 4 +// CHECK-NEXT: [[SHUF_IDX13:%.*]] = extractelement <16 x i8> [[MASK]], i64 5 +// CHECK-NEXT: [[SHUF_ELT14:%.*]] = extractelement <16 x i8> [[X]], i8 [[SHUF_IDX13]] +// CHECK-NEXT: [[SHUF_INS15:%.*]] = insertelement <16 x i8> [[SHUF_INS12]], i8 [[SHUF_ELT14]], i64 5 +// CHECK-NEXT: [[SHUF_IDX16:%.*]] = extractelement <16 x i8> [[MASK]], i64 6 +// CHECK-NEXT: [[SHUF_ELT17:%.*]] = extractelement <16 x i8> [[X]], i8 [[SHUF_IDX16]] +// CHECK-NEXT: [[SHUF_INS18:%.*]] = insertelement <16 x i8> [[SHUF_INS15]], i8 [[SHUF_ELT17]], i64 6 +// CHECK-NEXT: [[SHUF_IDX19:%.*]] = extractelement <16 x i8> [[MASK]], i64 7 +// CHECK-NEXT: [[SHUF_ELT20:%.*]] = extractelement <16 x i8> [[X]], i8 [[SHUF_IDX19]] +// CHECK-NEXT: [[SHUF_INS21:%.*]] = insertelement <16 x i8> [[SHUF_INS18]], i8 [[SHUF_ELT20]], i64 7 +// CHECK-NEXT: [[SHUF_IDX22:%.*]] = extractelement <16 x i8> [[MASK]], i64 8 +// CHECK-NEXT: [[SHUF_ELT23:%.*]] = extractelement <16 x i8> [[X]], i8 [[SHUF_IDX22]] +// CHECK-NEXT: [[SHUF_INS24:%.*]] = insertelement <16 x i8> [[SHUF_INS21]], i8 [[SHUF_ELT23]], i64 8 +// CHECK-NEXT: [[SHUF_IDX25:%.*]] = extractelement <16 x i8> [[MASK]], i64 9 +// CHECK-NEXT: [[SHUF_ELT26:%.*]] = extractelement <16 x i8> [[X]], i8 [[SHUF_IDX25]] +// CHECK-NEXT: [[SHUF_INS27:%.*]] = insertelement <16 x i8> [[SHUF_INS24]], i8 [[SHUF_ELT26]], i64 9 +// CHECK-NEXT: [[SHUF_IDX28:%.*]] = extractelement <16 x i8> [[MASK]], i64 10 +// CHECK-NEXT: [[SHUF_ELT29:%.*]] = extractelement <16 x i8> [[X]], i8 [[SHUF_IDX28]] +// CHECK-NEXT: [[SHUF_INS30:%.*]] = insertelement <16 x i8> [[SHUF_INS27]], i8 [[SHUF_ELT29]], i64 10 +// CHECK-NEXT: [[SHUF_IDX31:%.*]] = extractelement <16 x i8> [[MASK]], i64 11 +// CHECK-NEXT: [[SHUF_ELT32:%.*]] = extractelement <16 x i8> [[X]], i8 [[SHUF_IDX31]] +// CHECK-NEXT: [[SHUF_INS33:%.*]] = insertelement <16 x i8> [[SHUF_INS30]], i8 [[SHUF_ELT32]], i64 11 +// CHECK-NEXT: [[SHUF_IDX34:%.*]] = extractelement <16 x i8> [[MASK]], i64 12 +// CHECK-NEXT: [[SHUF_ELT35:%.*]] = extractelement <16 x i8> [[X]], i8 [[SHUF_IDX34]] +// CHECK-NEXT: [[SHUF_INS36:%.*]] = insertelement <16 x i8> [[SHUF_INS33]], i8 [[SHUF_ELT35]], i64 12 +// CHECK-NEXT: [[SHUF_IDX37:%.*]] = extractelement <16 x i8> [[MASK]], i64 13 +// CHECK-NEXT: [[SHUF_ELT38:%.*]] = extractelement <16 x i8> [[X]], i8 [[SHUF_IDX37]] +// CHECK-NEXT: [[SHUF_INS39:%.*]] = insertelement <16 x i8> [[SHUF_INS36]], i8 [[SHUF_ELT38]], i64 13 +// CHECK-NEXT: [[SHUF_IDX40:%.*]] = extractelement <16 x i8> [[MASK]], i64 14 +// CHECK-NEXT: [[SHUF_ELT41:%.*]] = extractelement <16 x i8> [[X]], i8 [[SHUF_IDX40]] +// CHECK-NEXT: [[SHUF_INS42:%.*]] = insertelement <16 x i8> [[SHUF_INS39]], i8 [[SHUF_ELT41]], i64 14 +// CHECK-NEXT: [[SHUF_IDX43:%.*]] = extractelement <16 x i8> [[MASK]], i64 15 +// CHECK-NEXT: [[SHUF_ELT44:%.*]] = extractelement <16 x i8> [[X]], i8 [[SHUF_IDX43]] +// CHECK-NEXT: [[SHUF_INS45:%.*]] = insertelement <16 x i8> [[SHUF_INS42]], i8 [[SHUF_ELT44]], i64 15 +// CHECK-NEXT: ret <16 x i8> [[SHUF_INS45]] +// +mfloat8x16_t test_8x16_v(mfloat8x16_t x, int8x16_t p) { + return __builtin_shufflevector(x, p); +} diff --git a/clang/test/CodeGen/AArch64/fp8-cast.c b/clang/test/CodeGen/AArch64/fp8-cast.c new file mode 100644 index 00000000000000..a9ce31b9e6beab --- /dev/null +++ b/clang/test/CodeGen/AArch64/fp8-cast.c @@ -0,0 +1,193 @@ +// NOTE: Assertions have been autogenerated by utils/update_cc_test_checks.py UTC_ARGS: --version 5 +// RUN: %clang_cc1 -triple aarch64-none-linux-gnu -target-feature +neon -disable-O0-optnone -Werror -Wall -emit-llvm -o - %s | opt -S -p mem2reg | FileCheck %s +// RUN: %clang_cc1 -x c++ -triple aarch64-none-linux-gnu -target-feature +neon -disable-O0-optnone -Werror -Wall -emit-llvm -o - %s | opt -S -p mem2reg | FileCheck %s -check-prefix CHECK-CXX + +// RUN: %clang_cc1 -triple aarch64-none-linux-gnu -target-feature +neon -disable-O0-optnone -Werror -Wall -S -o /dev/null %s + +// REQUIRES: aarch64-registered-target + +#include <arm_neon.h> + +// Bitcast between FP8 Neon vectors +// CHECK-LABEL: define dso_local <8 x i8> @test_f8_f8( +// CHECK-SAME: <8 x i8> [[X:%.*]]) #[[ATTR0:[0-9]+]] { +// CHECK-NEXT: [[ENTRY:.*:]] +// CHECK-NEXT: ret <8 x i8> [[X]] +// +// CHECK-CXX-LABEL: define dso_local <8 x i8> @_Z10test_f8_f813__Mfloat8x8_t( +// CHECK-CXX-SAME: <8 x i8> [[X:%.*]]) #[[ATTR0:[0-9]+]] { +// CHECK-CXX-NEXT: [[ENTRY:.*:]] +// CHECK-CXX-NEXT: ret <8 x i8> [[X]] +// +mfloat8x8_t test_f8_f8(mfloat8x8_t x) { + return (mfloat8x8_t) x; +} + +// CHECK-LABEL: define dso_local <16 x i8> @testq_f8_f8( +// CHECK-SAME: <16 x i8> [[X:%.*]]) #[[ATTR0]] { +// CHECK-NEXT: [[ENTRY:.*:]] +// CHECK-NEXT: ret <16 x i8> [[X]] +// +// CHECK-CXX-LABEL: define dso_local <16 x i8> @_Z11testq_f8_f814__Mfloat8x16_t( +// CHECK-CXX-SAME: <16 x i8> [[X:%.*]]) #[[ATTR0]] { +// CHECK-CXX-NEXT: [[ENTRY:.*:]] +// CHECK-CXX-NEXT: ret <16 x i8> [[X]] +// +mfloat8x16_t testq_f8_f8(mfloat8x16_t x) { + return (mfloat8x16_t) x; +} + +// Bitcast between FP8 and int8 Neon vectors +// CHECK-LABEL: define dso_local <... [truncated] `````````` </details> https://github.com/llvm/llvm-project/pull/123603 _______________________________________________ cfe-commits mailing list cfe-commits@lists.llvm.org https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits