[clang] [AArch64] Implement NEON FP8 vectors as VectorType (PR #123603)

via cfe-commits Mon, 20 Jan 2025 04:30:44 -0800

llvmbot wrote:


<!--LLVM PR SUMMARY COMMENT-->

@llvm/pr-subscribers-backend-aarch64

Author: Momchil Velikov (momchil-velikov)

<details>
<summary>Changes</summary>

Reimplement Neon FP8 vector types using attribute `neon_vector_type` instead of 
having them as builtin types.
This allows to implement FP8 Neon intrinsics without the need to add special 
cases for these types 
when using `__builtin_shufflevector` or bitcast (using C-style cast operator) 
between vectors, both
extensively used in the generated code in `arm_neon.h`.

---

Patch is 47.17 KiB, truncated to 20.00 KiB below, full version: 
https://github.com/llvm/llvm-project/pull/123603.diff


19 Files Affected:

- (modified) clang/include/clang/AST/Type.h (+5) 
- (modified) clang/include/clang/Basic/AArch64SVEACLETypes.def (-2) 
- (modified) clang/include/clang/Basic/TargetBuiltins.h (+3-1) 
- (modified) clang/lib/AST/ItaniumMangle.cpp (+5) 
- (modified) clang/lib/CodeGen/CGBuiltin.cpp (+1) 
- (modified) clang/lib/CodeGen/CGExpr.cpp (+9-2) 
- (modified) clang/lib/CodeGen/CodeGenTypes.cpp (+3-1) 
- (modified) clang/lib/CodeGen/Targets/AArch64.cpp (+1-6) 
- (modified) clang/lib/Sema/SemaARM.cpp (+2) 
- (modified) clang/lib/Sema/SemaExpr.cpp (+6-1) 
- (modified) clang/lib/Sema/SemaType.cpp (+2-1) 
- (added) clang/test/CodeGen/AArch64/builtin-shufflevector-fp8.c (+123) 
- (added) clang/test/CodeGen/AArch64/fp8-cast.c (+193) 
- (modified) clang/test/CodeGen/arm-mfp8.c (+53-35) 
- (modified) clang/test/CodeGenCXX/aarch64-mangle-neon-vectors.cpp (+7) 
- (modified) clang/test/CodeGenCXX/mangle-neon-vectors.cpp (+11) 
- (added) clang/test/Sema/aarch64-fp8-cast.c (+104) 
- (modified) clang/test/Sema/arm-mfp8.cpp (+22-12) 
- (modified) clang/utils/TableGen/NeonEmitter.cpp (+3-8) 


``````````diff
diff --git a/clang/include/clang/AST/Type.h b/clang/include/clang/AST/Type.h
index 3457d524c63aaa..1d9743520654eb 100644
--- a/clang/include/clang/AST/Type.h
+++ b/clang/include/clang/AST/Type.h
@@ -2518,6 +2518,7 @@ class alignas(TypeAlignment) Type : public 
ExtQualsTypeCommonBase {
   bool isFloat32Type() const;
   bool isDoubleType() const;
   bool isBFloat16Type() const;
+  bool isMFloat8Type() const;
   bool isFloat128Type() const;
   bool isIbm128Type() const;
   bool isRealType() const;         // C99 6.2.5p17 (real floating + integer)
@@ -8537,6 +8538,10 @@ inline bool Type::isBFloat16Type() const {
   return isSpecificBuiltinType(BuiltinType::BFloat16);
 }
 
+inline bool Type::isMFloat8Type() const {
+  return isSpecificBuiltinType(BuiltinType::MFloat8);
+}
+
 inline bool Type::isFloat128Type() const {
   return isSpecificBuiltinType(BuiltinType::Float128);
 }
diff --git a/clang/include/clang/Basic/AArch64SVEACLETypes.def 
b/clang/include/clang/Basic/AArch64SVEACLETypes.def
index 063cac1f4a58ee..2dd2754e778d60 100644
--- a/clang/include/clang/Basic/AArch64SVEACLETypes.def
+++ b/clang/include/clang/Basic/AArch64SVEACLETypes.def
@@ -201,8 +201,6 @@ SVE_PREDICATE_TYPE_ALL("__clang_svboolx4_t", "svboolx4_t", 
SveBoolx4, SveBoolx4T
 SVE_OPAQUE_TYPE("__SVCount_t", "__SVCount_t", SveCount, SveCountTy)
 
 AARCH64_VECTOR_TYPE_MFLOAT("__mfp8", "__mfp8", MFloat8, MFloat8Ty, 1, 8, 1)
-AARCH64_VECTOR_TYPE_MFLOAT("__MFloat8x8_t", "__MFloat8x8_t", MFloat8x8, 
MFloat8x8Ty, 8, 8, 1)
-AARCH64_VECTOR_TYPE_MFLOAT("__MFloat8x16_t", "__MFloat8x16_t", MFloat8x16, 
MFloat8x16Ty, 16, 8, 1)
 
 #undef SVE_VECTOR_TYPE
 #undef SVE_VECTOR_TYPE_BFLOAT
diff --git a/clang/include/clang/Basic/TargetBuiltins.h 
b/clang/include/clang/Basic/TargetBuiltins.h
index 4dc8b24ed8ae6c..83ef015018f1a1 100644
--- a/clang/include/clang/Basic/TargetBuiltins.h
+++ b/clang/include/clang/Basic/TargetBuiltins.h
@@ -208,7 +208,8 @@ namespace clang {
       Float16,
       Float32,
       Float64,
-      BFloat16
+      BFloat16,
+      MFloat8
     };
 
     NeonTypeFlags(unsigned F) : Flags(F) {}
@@ -230,6 +231,7 @@ namespace clang {
       switch (getEltType()) {
       case Int8:
       case Poly8:
+      case MFloat8:
         return 8;
       case Int16:
       case Float16:
diff --git a/clang/lib/AST/ItaniumMangle.cpp b/clang/lib/AST/ItaniumMangle.cpp
index 1dd936cf4fb518..9948963d7f44b3 100644
--- a/clang/lib/AST/ItaniumMangle.cpp
+++ b/clang/lib/AST/ItaniumMangle.cpp
@@ -3919,6 +3919,9 @@ void CXXNameMangler::mangleNeonVectorType(const 
VectorType *T) {
     case BuiltinType::Float:     EltName = "float32_t"; break;
     case BuiltinType::Half:      EltName = "float16_t"; break;
     case BuiltinType::BFloat16:  EltName = "bfloat16_t"; break;
+    case BuiltinType::MFloat8:
+      EltName = "mfloat8_t";
+      break;
     default:
       llvm_unreachable("unexpected Neon vector element type");
     }
@@ -3972,6 +3975,8 @@ static StringRef mangleAArch64VectorBase(const 
BuiltinType *EltType) {
     return "Float64";
   case BuiltinType::BFloat16:
     return "Bfloat16";
+  case BuiltinType::MFloat8:
+    return "Mfloat8";
   default:
     llvm_unreachable("Unexpected vector element base type");
   }
diff --git a/clang/lib/CodeGen/CGBuiltin.cpp b/clang/lib/CodeGen/CGBuiltin.cpp
index b80833fd91884d..5fd4c3e34ebeba 100644
--- a/clang/lib/CodeGen/CGBuiltin.cpp
+++ b/clang/lib/CodeGen/CGBuiltin.cpp
@@ -6819,6 +6819,7 @@ static llvm::FixedVectorType *GetNeonType(CodeGenFunction 
*CGF,
   switch (TypeFlags.getEltType()) {
   case NeonTypeFlags::Int8:
   case NeonTypeFlags::Poly8:
+  case NeonTypeFlags::MFloat8:
     return llvm::FixedVectorType::get(CGF->Int8Ty, V1Ty ? 1 : (8 << IsQuad));
   case NeonTypeFlags::Int16:
   case NeonTypeFlags::Poly16:
diff --git a/clang/lib/CodeGen/CGExpr.cpp b/clang/lib/CodeGen/CGExpr.cpp
index 9a9a8c7f6eae09..eeff77db2d2684 100644
--- a/clang/lib/CodeGen/CGExpr.cpp
+++ b/clang/lib/CodeGen/CGExpr.cpp
@@ -2414,8 +2414,15 @@ void CodeGenFunction::EmitStoreThroughLValue(RValue Src, 
LValue Dst,
         Vec = Builder.CreateBitCast(Vec, IRVecTy);
         // iN --> <N x i1>.
       }
-      Vec = Builder.CreateInsertElement(Vec, Src.getScalarVal(),
-                                        Dst.getVectorIdx(), "vecins");
+      llvm::Value *SrcVal = Src.getScalarVal();
+      // Allow inserting `<1 x T>` into an `<N x T>`. It can happen with scalar
+      // types which are mapped to vector LLVM IR types (e.g. for implementing
+      // an ABI).
+      if (auto *EltTy = dyn_cast<llvm::FixedVectorType>(SrcVal->getType());
+          EltTy && EltTy->getNumElements() == 1)
+        SrcVal = Builder.CreateBitCast(SrcVal, EltTy->getElementType());
+      Vec = Builder.CreateInsertElement(Vec, SrcVal, Dst.getVectorIdx(),
+                                        "vecins");
       if (IRStoreTy) {
         // <N x i1> --> <iN>.
         Vec = Builder.CreateBitCast(Vec, IRStoreTy);
diff --git a/clang/lib/CodeGen/CodeGenTypes.cpp 
b/clang/lib/CodeGen/CodeGenTypes.cpp
index 09191a4901f493..950b23f4e13b99 100644
--- a/clang/lib/CodeGen/CodeGenTypes.cpp
+++ b/clang/lib/CodeGen/CodeGenTypes.cpp
@@ -650,7 +650,9 @@ llvm::Type *CodeGenTypes::ConvertType(QualType T) {
     // An ext_vector_type of Bool is really a vector of bits.
     llvm::Type *IRElemTy = VT->isExtVectorBoolType()
                                ? llvm::Type::getInt1Ty(getLLVMContext())
-                               : ConvertType(VT->getElementType());
+                               : (VT->getElementType()->isMFloat8Type()
+                                      ? llvm::Type::getInt8Ty(getLLVMContext())
+                                      : ConvertType(VT->getElementType()));
     ResultType = llvm::FixedVectorType::get(IRElemTy, VT->getNumElements());
     break;
   }
diff --git a/clang/lib/CodeGen/Targets/AArch64.cpp 
b/clang/lib/CodeGen/Targets/AArch64.cpp
index 7db67ecba07c8f..c702e79ff8eb98 100644
--- a/clang/lib/CodeGen/Targets/AArch64.cpp
+++ b/clang/lib/CodeGen/Targets/AArch64.cpp
@@ -383,10 +383,6 @@ ABIArgInfo AArch64ABIInfo::classifyArgumentType(QualType 
Ty, bool IsVariadicFn,
         NSRN = std::min(NSRN + 1, 8u);
       else {
         switch (BT->getKind()) {
-        case BuiltinType::MFloat8x8:
-        case BuiltinType::MFloat8x16:
-          NSRN = std::min(NSRN + 1, 8u);
-          break;
         case BuiltinType::SveBool:
         case BuiltinType::SveCount:
           NPRN = std::min(NPRN + 1, 4u);
@@ -629,8 +625,7 @@ bool 
AArch64ABIInfo::isHomogeneousAggregateBaseType(QualType Ty) const {
   // but with the difference that any floating-point type is allowed,
   // including __fp16.
   if (const BuiltinType *BT = Ty->getAs<BuiltinType>()) {
-    if (BT->isFloatingPoint() || BT->getKind() == BuiltinType::MFloat8x16 ||
-        BT->getKind() == BuiltinType::MFloat8x8)
+    if (BT->isFloatingPoint())
       return true;
   } else if (const VectorType *VT = Ty->getAs<VectorType>()) {
     if (auto Kind = VT->getVectorKind();
diff --git a/clang/lib/Sema/SemaARM.cpp b/clang/lib/Sema/SemaARM.cpp
index db418d80e0e09c..2620bbc97ba02a 100644
--- a/clang/lib/Sema/SemaARM.cpp
+++ b/clang/lib/Sema/SemaARM.cpp
@@ -352,6 +352,8 @@ static QualType getNeonEltType(NeonTypeFlags Flags, 
ASTContext &Context,
     return Context.DoubleTy;
   case NeonTypeFlags::BFloat16:
     return Context.BFloat16Ty;
+  case NeonTypeFlags::MFloat8:
+    return Context.MFloat8Ty;
   }
   llvm_unreachable("Invalid NeonTypeFlag!");
 }
diff --git a/clang/lib/Sema/SemaExpr.cpp b/clang/lib/Sema/SemaExpr.cpp
index ae40895980d90a..2087e8b251d8c6 100644
--- a/clang/lib/Sema/SemaExpr.cpp
+++ b/clang/lib/Sema/SemaExpr.cpp
@@ -7502,7 +7502,7 @@ static bool breakDownVectorType(QualType type, uint64_t 
&len,
   if (const VectorType *vecType = type->getAs<VectorType>()) {
     len = vecType->getNumElements();
     eltType = vecType->getElementType();
-    assert(eltType->isScalarType());
+    assert(eltType->isScalarType() || eltType->isMFloat8Type());
     return true;
   }
 
@@ -10168,6 +10168,11 @@ QualType Sema::CheckVectorOperands(ExprResult &LHS, 
ExprResult &RHS,
     return HLSL().handleVectorBinOpConversion(LHS, RHS, LHSType, RHSType,
                                               IsCompAssign);
 
+  // Any operation with MFloat8 type is only possible with C intrinsics
+  if ((LHSVecType && LHSVecType->getElementType()->isMFloat8Type()) ||
+      (RHSVecType && RHSVecType->getElementType()->isMFloat8Type()))
+    return InvalidOperands(Loc, LHS, RHS);
+
   // AltiVec-style "vector bool op vector bool" combinations are allowed
   // for some operators but not others.
   if (!AllowBothBool && LHSVecType &&
diff --git a/clang/lib/Sema/SemaType.cpp b/clang/lib/Sema/SemaType.cpp
index 2ccf5a8e1d6f31..33d5378944ddbf 100644
--- a/clang/lib/Sema/SemaType.cpp
+++ b/clang/lib/Sema/SemaType.cpp
@@ -8306,7 +8306,8 @@ static bool isPermittedNeonBaseType(QualType &Ty, 
VectorKind VecKind, Sema &S) {
          BTy->getKind() == BuiltinType::ULongLong ||
          BTy->getKind() == BuiltinType::Float ||
          BTy->getKind() == BuiltinType::Half ||
-         BTy->getKind() == BuiltinType::BFloat16;
+         BTy->getKind() == BuiltinType::BFloat16 ||
+         BTy->getKind() == BuiltinType::MFloat8;
 }
 
 static bool verifyValidIntegerConstantExpr(Sema &S, const ParsedAttr &Attr,
diff --git a/clang/test/CodeGen/AArch64/builtin-shufflevector-fp8.c 
b/clang/test/CodeGen/AArch64/builtin-shufflevector-fp8.c
new file mode 100644
index 00000000000000..147ca1d1becc15
--- /dev/null
+++ b/clang/test/CodeGen/AArch64/builtin-shufflevector-fp8.c
@@ -0,0 +1,123 @@
+// NOTE: Assertions have been autogenerated by utils/update_cc_test_checks.py 
UTC_ARGS: --version 5
+// RUN: %clang_cc1 -triple aarch64-linux -target-feature +neon 
-disable-O0-optnone -emit-llvm -o - %s | opt -S -passes=mem2reg | FileCheck %s
+
+// REQUIRES: aarch64-registered-target
+
+typedef __attribute__((neon_vector_type(8))) signed char int8x8_t;
+typedef __attribute__((neon_vector_type(16))) signed char int8x16_t;
+
+typedef __attribute__((neon_vector_type(8))) __mfp8 mfloat8x8_t;
+typedef __attribute__((neon_vector_type(16))) __mfp8 mfloat8x16_t;
+
+// CHECK-LABEL: define dso_local <8 x i8> @test_8x8(
+// CHECK-SAME: <8 x i8> [[X:%.*]]) #[[ATTR0:[0-9]+]] {
+// CHECK-NEXT:  [[ENTRY:.*:]]
+// CHECK-NEXT:    [[SHUFFLE:%.*]] = shufflevector <8 x i8> [[X]], <8 x i8> 
[[X]], <8 x i32> <i32 3, i32 2, i32 1, i32 0, i32 3, i32 2, i32 1, i32 0>
+// CHECK-NEXT:    ret <8 x i8> [[SHUFFLE]]
+//
+mfloat8x8_t test_8x8(mfloat8x8_t x) {
+  return __builtin_shufflevector(x, x, 3, 2, 1, 0, 3, 2, 1, 0);
+}
+
+// CHECK-LABEL: define dso_local <8 x i8> @test_8x8_v(
+// CHECK-SAME: <8 x i8> [[X:%.*]], <8 x i8> noundef [[P:%.*]]) #[[ATTR0]] {
+// CHECK-NEXT:  [[ENTRY:.*:]]
+// CHECK-NEXT:    [[MASK:%.*]] = and <8 x i8> [[P]], splat (i8 7)
+// CHECK-NEXT:    [[SHUF_IDX:%.*]] = extractelement <8 x i8> [[MASK]], i64 0
+// CHECK-NEXT:    [[SHUF_ELT:%.*]] = extractelement <8 x i8> [[X]], i8 
[[SHUF_IDX]]
+// CHECK-NEXT:    [[SHUF_INS:%.*]] = insertelement <8 x i8> poison, i8 
[[SHUF_ELT]], i64 0
+// CHECK-NEXT:    [[SHUF_IDX1:%.*]] = extractelement <8 x i8> [[MASK]], i64 1
+// CHECK-NEXT:    [[SHUF_ELT2:%.*]] = extractelement <8 x i8> [[X]], i8 
[[SHUF_IDX1]]
+// CHECK-NEXT:    [[SHUF_INS3:%.*]] = insertelement <8 x i8> [[SHUF_INS]], i8 
[[SHUF_ELT2]], i64 1
+// CHECK-NEXT:    [[SHUF_IDX4:%.*]] = extractelement <8 x i8> [[MASK]], i64 2
+// CHECK-NEXT:    [[SHUF_ELT5:%.*]] = extractelement <8 x i8> [[X]], i8 
[[SHUF_IDX4]]
+// CHECK-NEXT:    [[SHUF_INS6:%.*]] = insertelement <8 x i8> [[SHUF_INS3]], i8 
[[SHUF_ELT5]], i64 2
+// CHECK-NEXT:    [[SHUF_IDX7:%.*]] = extractelement <8 x i8> [[MASK]], i64 3
+// CHECK-NEXT:    [[SHUF_ELT8:%.*]] = extractelement <8 x i8> [[X]], i8 
[[SHUF_IDX7]]
+// CHECK-NEXT:    [[SHUF_INS9:%.*]] = insertelement <8 x i8> [[SHUF_INS6]], i8 
[[SHUF_ELT8]], i64 3
+// CHECK-NEXT:    [[SHUF_IDX10:%.*]] = extractelement <8 x i8> [[MASK]], i64 4
+// CHECK-NEXT:    [[SHUF_ELT11:%.*]] = extractelement <8 x i8> [[X]], i8 
[[SHUF_IDX10]]
+// CHECK-NEXT:    [[SHUF_INS12:%.*]] = insertelement <8 x i8> [[SHUF_INS9]], 
i8 [[SHUF_ELT11]], i64 4
+// CHECK-NEXT:    [[SHUF_IDX13:%.*]] = extractelement <8 x i8> [[MASK]], i64 5
+// CHECK-NEXT:    [[SHUF_ELT14:%.*]] = extractelement <8 x i8> [[X]], i8 
[[SHUF_IDX13]]
+// CHECK-NEXT:    [[SHUF_INS15:%.*]] = insertelement <8 x i8> [[SHUF_INS12]], 
i8 [[SHUF_ELT14]], i64 5
+// CHECK-NEXT:    [[SHUF_IDX16:%.*]] = extractelement <8 x i8> [[MASK]], i64 6
+// CHECK-NEXT:    [[SHUF_ELT17:%.*]] = extractelement <8 x i8> [[X]], i8 
[[SHUF_IDX16]]
+// CHECK-NEXT:    [[SHUF_INS18:%.*]] = insertelement <8 x i8> [[SHUF_INS15]], 
i8 [[SHUF_ELT17]], i64 6
+// CHECK-NEXT:    [[SHUF_IDX19:%.*]] = extractelement <8 x i8> [[MASK]], i64 7
+// CHECK-NEXT:    [[SHUF_ELT20:%.*]] = extractelement <8 x i8> [[X]], i8 
[[SHUF_IDX19]]
+// CHECK-NEXT:    [[SHUF_INS21:%.*]] = insertelement <8 x i8> [[SHUF_INS18]], 
i8 [[SHUF_ELT20]], i64 7
+// CHECK-NEXT:    ret <8 x i8> [[SHUF_INS21]]
+//
+mfloat8x8_t test_8x8_v(mfloat8x8_t x, int8x8_t p) {
+  return __builtin_shufflevector(x, p);
+}
+
+// CHECK-LABEL: define dso_local <16 x i8> @test_8x16(
+// CHECK-SAME: <16 x i8> [[X:%.*]]) #[[ATTR0]] {
+// CHECK-NEXT:  [[ENTRY:.*:]]
+// CHECK-NEXT:    [[SHUFFLE:%.*]] = shufflevector <16 x i8> [[X]], <16 x i8> 
[[X]], <16 x i32> <i32 7, i32 6, i32 5, i32 4, i32 3, i32 2, i32 1, i32 0, i32 
7, i32 6, i32 5, i32 4, i32 3, i32 2, i32 1, i32 0>
+// CHECK-NEXT:    ret <16 x i8> [[SHUFFLE]]
+//
+mfloat8x16_t test_8x16(mfloat8x16_t x) {
+  return __builtin_shufflevector(x, x, 7, 6, 5, 4, 3, 2, 1, 0, 7, 6, 5, 4, 3, 
2,
+                                 1, 0);
+}
+
+// CHECK-LABEL: define dso_local <16 x i8> @test_8x16_v(
+// CHECK-SAME: <16 x i8> [[X:%.*]], <16 x i8> noundef [[P:%.*]]) #[[ATTR0]] {
+// CHECK-NEXT:  [[ENTRY:.*:]]
+// CHECK-NEXT:    [[MASK:%.*]] = and <16 x i8> [[P]], splat (i8 15)
+// CHECK-NEXT:    [[SHUF_IDX:%.*]] = extractelement <16 x i8> [[MASK]], i64 0
+// CHECK-NEXT:    [[SHUF_ELT:%.*]] = extractelement <16 x i8> [[X]], i8 
[[SHUF_IDX]]
+// CHECK-NEXT:    [[SHUF_INS:%.*]] = insertelement <16 x i8> poison, i8 
[[SHUF_ELT]], i64 0
+// CHECK-NEXT:    [[SHUF_IDX1:%.*]] = extractelement <16 x i8> [[MASK]], i64 1
+// CHECK-NEXT:    [[SHUF_ELT2:%.*]] = extractelement <16 x i8> [[X]], i8 
[[SHUF_IDX1]]
+// CHECK-NEXT:    [[SHUF_INS3:%.*]] = insertelement <16 x i8> [[SHUF_INS]], i8 
[[SHUF_ELT2]], i64 1
+// CHECK-NEXT:    [[SHUF_IDX4:%.*]] = extractelement <16 x i8> [[MASK]], i64 2
+// CHECK-NEXT:    [[SHUF_ELT5:%.*]] = extractelement <16 x i8> [[X]], i8 
[[SHUF_IDX4]]
+// CHECK-NEXT:    [[SHUF_INS6:%.*]] = insertelement <16 x i8> [[SHUF_INS3]], 
i8 [[SHUF_ELT5]], i64 2
+// CHECK-NEXT:    [[SHUF_IDX7:%.*]] = extractelement <16 x i8> [[MASK]], i64 3
+// CHECK-NEXT:    [[SHUF_ELT8:%.*]] = extractelement <16 x i8> [[X]], i8 
[[SHUF_IDX7]]
+// CHECK-NEXT:    [[SHUF_INS9:%.*]] = insertelement <16 x i8> [[SHUF_INS6]], 
i8 [[SHUF_ELT8]], i64 3
+// CHECK-NEXT:    [[SHUF_IDX10:%.*]] = extractelement <16 x i8> [[MASK]], i64 4
+// CHECK-NEXT:    [[SHUF_ELT11:%.*]] = extractelement <16 x i8> [[X]], i8 
[[SHUF_IDX10]]
+// CHECK-NEXT:    [[SHUF_INS12:%.*]] = insertelement <16 x i8> [[SHUF_INS9]], 
i8 [[SHUF_ELT11]], i64 4
+// CHECK-NEXT:    [[SHUF_IDX13:%.*]] = extractelement <16 x i8> [[MASK]], i64 5
+// CHECK-NEXT:    [[SHUF_ELT14:%.*]] = extractelement <16 x i8> [[X]], i8 
[[SHUF_IDX13]]
+// CHECK-NEXT:    [[SHUF_INS15:%.*]] = insertelement <16 x i8> [[SHUF_INS12]], 
i8 [[SHUF_ELT14]], i64 5
+// CHECK-NEXT:    [[SHUF_IDX16:%.*]] = extractelement <16 x i8> [[MASK]], i64 6
+// CHECK-NEXT:    [[SHUF_ELT17:%.*]] = extractelement <16 x i8> [[X]], i8 
[[SHUF_IDX16]]
+// CHECK-NEXT:    [[SHUF_INS18:%.*]] = insertelement <16 x i8> [[SHUF_INS15]], 
i8 [[SHUF_ELT17]], i64 6
+// CHECK-NEXT:    [[SHUF_IDX19:%.*]] = extractelement <16 x i8> [[MASK]], i64 7
+// CHECK-NEXT:    [[SHUF_ELT20:%.*]] = extractelement <16 x i8> [[X]], i8 
[[SHUF_IDX19]]
+// CHECK-NEXT:    [[SHUF_INS21:%.*]] = insertelement <16 x i8> [[SHUF_INS18]], 
i8 [[SHUF_ELT20]], i64 7
+// CHECK-NEXT:    [[SHUF_IDX22:%.*]] = extractelement <16 x i8> [[MASK]], i64 8
+// CHECK-NEXT:    [[SHUF_ELT23:%.*]] = extractelement <16 x i8> [[X]], i8 
[[SHUF_IDX22]]
+// CHECK-NEXT:    [[SHUF_INS24:%.*]] = insertelement <16 x i8> [[SHUF_INS21]], 
i8 [[SHUF_ELT23]], i64 8
+// CHECK-NEXT:    [[SHUF_IDX25:%.*]] = extractelement <16 x i8> [[MASK]], i64 9
+// CHECK-NEXT:    [[SHUF_ELT26:%.*]] = extractelement <16 x i8> [[X]], i8 
[[SHUF_IDX25]]
+// CHECK-NEXT:    [[SHUF_INS27:%.*]] = insertelement <16 x i8> [[SHUF_INS24]], 
i8 [[SHUF_ELT26]], i64 9
+// CHECK-NEXT:    [[SHUF_IDX28:%.*]] = extractelement <16 x i8> [[MASK]], i64 
10
+// CHECK-NEXT:    [[SHUF_ELT29:%.*]] = extractelement <16 x i8> [[X]], i8 
[[SHUF_IDX28]]
+// CHECK-NEXT:    [[SHUF_INS30:%.*]] = insertelement <16 x i8> [[SHUF_INS27]], 
i8 [[SHUF_ELT29]], i64 10
+// CHECK-NEXT:    [[SHUF_IDX31:%.*]] = extractelement <16 x i8> [[MASK]], i64 
11
+// CHECK-NEXT:    [[SHUF_ELT32:%.*]] = extractelement <16 x i8> [[X]], i8 
[[SHUF_IDX31]]
+// CHECK-NEXT:    [[SHUF_INS33:%.*]] = insertelement <16 x i8> [[SHUF_INS30]], 
i8 [[SHUF_ELT32]], i64 11
+// CHECK-NEXT:    [[SHUF_IDX34:%.*]] = extractelement <16 x i8> [[MASK]], i64 
12
+// CHECK-NEXT:    [[SHUF_ELT35:%.*]] = extractelement <16 x i8> [[X]], i8 
[[SHUF_IDX34]]
+// CHECK-NEXT:    [[SHUF_INS36:%.*]] = insertelement <16 x i8> [[SHUF_INS33]], 
i8 [[SHUF_ELT35]], i64 12
+// CHECK-NEXT:    [[SHUF_IDX37:%.*]] = extractelement <16 x i8> [[MASK]], i64 
13
+// CHECK-NEXT:    [[SHUF_ELT38:%.*]] = extractelement <16 x i8> [[X]], i8 
[[SHUF_IDX37]]
+// CHECK-NEXT:    [[SHUF_INS39:%.*]] = insertelement <16 x i8> [[SHUF_INS36]], 
i8 [[SHUF_ELT38]], i64 13
+// CHECK-NEXT:    [[SHUF_IDX40:%.*]] = extractelement <16 x i8> [[MASK]], i64 
14
+// CHECK-NEXT:    [[SHUF_ELT41:%.*]] = extractelement <16 x i8> [[X]], i8 
[[SHUF_IDX40]]
+// CHECK-NEXT:    [[SHUF_INS42:%.*]] = insertelement <16 x i8> [[SHUF_INS39]], 
i8 [[SHUF_ELT41]], i64 14
+// CHECK-NEXT:    [[SHUF_IDX43:%.*]] = extractelement <16 x i8> [[MASK]], i64 
15
+// CHECK-NEXT:    [[SHUF_ELT44:%.*]] = extractelement <16 x i8> [[X]], i8 
[[SHUF_IDX43]]
+// CHECK-NEXT:    [[SHUF_INS45:%.*]] = insertelement <16 x i8> [[SHUF_INS42]], 
i8 [[SHUF_ELT44]], i64 15
+// CHECK-NEXT:    ret <16 x i8> [[SHUF_INS45]]
+//
+mfloat8x16_t test_8x16_v(mfloat8x16_t x, int8x16_t p) {
+  return __builtin_shufflevector(x, p);
+}
diff --git a/clang/test/CodeGen/AArch64/fp8-cast.c 
b/clang/test/CodeGen/AArch64/fp8-cast.c
new file mode 100644
index 00000000000000..a9ce31b9e6beab
--- /dev/null
+++ b/clang/test/CodeGen/AArch64/fp8-cast.c
@@ -0,0 +1,193 @@
+// NOTE: Assertions have been autogenerated by utils/update_cc_test_checks.py 
UTC_ARGS: --version 5
+// RUN: %clang_cc1        -triple aarch64-none-linux-gnu -target-feature +neon 
-disable-O0-optnone -Werror -Wall -emit-llvm -o - %s | opt -S -p mem2reg | 
FileCheck %s
+// RUN: %clang_cc1 -x c++ -triple aarch64-none-linux-gnu -target-feature +neon 
-disable-O0-optnone -Werror -Wall -emit-llvm -o - %s | opt -S -p mem2reg | 
FileCheck %s -check-prefix CHECK-CXX
+
+// RUN: %clang_cc1        -triple aarch64-none-linux-gnu -target-feature +neon 
-disable-O0-optnone -Werror -Wall -S -o /dev/null %s
+
+// REQUIRES: aarch64-registered-target
+
+#include <arm_neon.h>
+
+// Bitcast between FP8 Neon vectors
+// CHECK-LABEL: define dso_local <8 x i8> @test_f8_f8(
+// CHECK-SAME: <8 x i8> [[X:%.*]]) #[[ATTR0:[0-9]+]] {
+// CHECK-NEXT:  [[ENTRY:.*:]]
+// CHECK-NEXT:    ret <8 x i8> [[X]]
+//
+// CHECK-CXX-LABEL: define dso_local <8 x i8> @_Z10test_f8_f813__Mfloat8x8_t(
+// CHECK-CXX-SAME: <8 x i8> [[X:%.*]]) #[[ATTR0:[0-9]+]] {
+// CHECK-CXX-NEXT:  [[ENTRY:.*:]]
+// CHECK-CXX-NEXT:    ret <8 x i8> [[X]]
+//
+mfloat8x8_t test_f8_f8(mfloat8x8_t x) {
+    return (mfloat8x8_t) x;
+}
+
+// CHECK-LABEL: define dso_local <16 x i8> @testq_f8_f8(
+// CHECK-SAME: <16 x i8> [[X:%.*]]) #[[ATTR0]] {
+// CHECK-NEXT:  [[ENTRY:.*:]]
+// CHECK-NEXT:    ret <16 x i8> [[X]]
+//
+// CHECK-CXX-LABEL: define dso_local <16 x i8> 
@_Z11testq_f8_f814__Mfloat8x16_t(
+// CHECK-CXX-SAME: <16 x i8> [[X:%.*]]) #[[ATTR0]] {
+// CHECK-CXX-NEXT:  [[ENTRY:.*:]]
+// CHECK-CXX-NEXT:    ret <16 x i8> [[X]]
+//
+mfloat8x16_t testq_f8_f8(mfloat8x16_t x) {
+    return (mfloat8x16_t) x;
+}
+
+// Bitcast between FP8 and int8 Neon vectors
+// CHECK-LABEL: define dso_local <...
[truncated]

``````````

</details>


https://github.com/llvm/llvm-project/pull/123603
_______________________________________________
cfe-commits mailing list
cfe-commits@lists.llvm.org
https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits

[clang] [AArch64] Implement NEON FP8 vectors as VectorType (PR #123603)

Reply via email to