[clang] [llvm] [AArch64] Implement NEON FP8 fused multiply-add intrinsics (non-indexed) (PR #123614)
https://github.com/momchil-velikov updated
https://github.com/llvm/llvm-project/pull/123614
>From ae09723ecc1cc9bc2cbcef300b05aa2ce5ced448 Mon Sep 17 00:00:00 2001
From: Momchil Velikov
Date: Tue, 17 Dec 2024 11:42:42 +
Subject: [PATCH 1/2] [AArch64] Add FP8 Neon intrinsics for dot-product
THis patch adds the following intrinsics:
float16x4_t vdot_f16_mf8_fpm(float16x4_t vd, mfloat8x8_t vn, mfloat8x8_t vm,
fpm_t fpm)
float16x8_t vdotq_f16_mf8_fpm(float16x8_t vd, mfloat8x16_t vn, mfloat8x16_t vm,
fpm_t fpm)
float16x4_t vdot_lane_f16_mf8_fpm(float16x4_t vd, mfloat8x8_t vn, mfloat8x8_t
vm, __builtin_constant_p(lane), fpm_t fpm)
float16x4_t vdot_laneq_f16_mf8_fpm(float16x4_t vd, mfloat8x8_t vn, mfloat8x16_t
vm, __builtin_constant_p(lane), fpm_t fpm)
float16x8_t vdotq_lane_f16_mf8_fpm(float16x8_t vd, mfloat8x16_t vn, mfloat8x8_t
vm, __builtin_constant_p(lane), fpm_t fpm)
float16x8_t vdotq_laneq_f16_mf8_fpm(float16x8_t vd, mfloat8x16_t vn,
mfloat8x16_t vm, __builtin_constant_p(lane), fpm_t fpm)
[fixup] Remove not needed argument (NFC)
[fixup] Update intrinsics declarations
[fixup] Add C++ runs to tests, remove some opt passes
---
clang/include/clang/Basic/arm_neon.td | 20 ++
clang/include/clang/Basic/arm_neon_incl.td| 2 +-
clang/lib/CodeGen/CGBuiltin.cpp | 44 +++
clang/lib/CodeGen/CodeGenFunction.h | 4 +
.../fp8-intrinsics/acle_neon_fp8_fdot.c | 254 ++
.../acle_neon_fp8_fdot.c | 54
llvm/include/llvm/IR/IntrinsicsAArch64.td | 21 ++
.../lib/Target/AArch64/AArch64InstrFormats.td | 82 +++---
llvm/lib/Target/AArch64/AArch64InstrInfo.td | 14 +-
llvm/test/CodeGen/AArch64/fp8-neon-fdot.ll| 74 +
10 files changed, 529 insertions(+), 40 deletions(-)
create mode 100644
clang/test/CodeGen/AArch64/fp8-intrinsics/acle_neon_fp8_fdot.c
create mode 100644 clang/test/Sema/aarch64-fp8-intrinsics/acle_neon_fp8_fdot.c
create mode 100644 llvm/test/CodeGen/AArch64/fp8-neon-fdot.ll
diff --git a/clang/include/clang/Basic/arm_neon.td
b/clang/include/clang/Basic/arm_neon.td
index 9a6a77640ef5d3..c6609f312969ee 100644
--- a/clang/include/clang/Basic/arm_neon.td
+++ b/clang/include/clang/Basic/arm_neon.td
@@ -2141,6 +2141,26 @@ let ArchGuard = "defined(__aarch64__)", TargetGuard =
"fp8,neon" in {
def VCVTN_F8_F16 : VInst<"vcvt_mf8_f16_fpm", ".(>F)(>F)V",
"mQm">;
}
+let ArchGuard = "defined(__aarch64__)", TargetGuard = "fp8dot2,neon" in {
+ def VDOT_F16_MF8 : VInst<"vdot_f16_mf8_fpm", "(>F)(>F)..V", "mQm">;
+
+ def VDOT_LANE_F16_MF8 : VInst<"vdot_lane_f16_mf8_fpm", "(>F)(>F)..IV",
"m", [ImmCheck<3, ImmCheck0_3, 0>]>;
+ def VDOT_LANEQ_F16_MF8 : VInst<"vdot_laneq_f16_mf8_fpm", "(>F)(>F).QIV",
"m", [ImmCheck<3, ImmCheck0_7, 0>]>;
+
+ def VDOTQ_LANE_F16_MF8 : VInst<"vdot_lane_f16_mf8_fpm", "(>F)(>F).qIV",
"Qm", [ImmCheck<3, ImmCheck0_3, 0>]>;
+ def VDOTQ_LANEQ_F16_MF8 : VInst<"vdot_laneq_f16_mf8_fpm", "(>F)(>F)..IV",
"Qm", [ImmCheck<3, ImmCheck0_7, 0>]>;
+}
+
+let ArchGuard = "defined(__aarch64__)", TargetGuard = "fp8dot4,neon" in {
+ def VDOT_F32_MF8 : VInst<"vdot_f32_mf8_fpm", "(>>F)(>>F)..V", "mQm">;
+
+ def VDOT_LANE_F32_MF8 : VInst<"vdot_lane_f32_mf8_fpm", "(>>F)(>>F)..IV",
"m", [ImmCheck<3, ImmCheck0_1, 0>]>;
+ def VDOT_LANEQ_F32_MF8 : VInst<"vdot_laneq_f32_mf8_fpm", "(>>F)(>>F).QIV",
"m", [ImmCheck<3, ImmCheck0_3, 0>]>;
+
+ def VDOTQ_LANE_F32_MF8 : VInst<"vdot_lane_f32_mf8_fpm", "(>>F)(>>F).qIV",
"Qm", [ImmCheck<3, ImmCheck0_1, 0>]>;
+ def VDOTQ_LANEQ_F32_MF8 : VInst<"vdot_laneq_f32_mf8_fpm", "(>>F)(>>F)..IV",
"Qm", [ImmCheck<3, ImmCheck0_3, 0>]>;
+}
+
let ArchGuard = "defined(__aarch64__)", TargetGuard = "neon,faminmax" in {
def FAMIN : WInst<"vamin", "...", "fhQdQfQh">;
def FAMAX : WInst<"vamax", "...", "fhQdQfQh">;
diff --git a/clang/include/clang/Basic/arm_neon_incl.td
b/clang/include/clang/Basic/arm_neon_incl.td
index 91a2bf3020b9a3..b9b9d509c22512 100644
--- a/clang/include/clang/Basic/arm_neon_incl.td
+++ b/clang/include/clang/Basic/arm_neon_incl.td
@@ -302,7 +302,7 @@ class Inst ch = []>{
class SInst ch = []> : Inst {}
class IInst ch = []> : Inst {}
class WInst ch = []> : Inst {}
-class VInst : Inst {}
+class VInst ch = []> : Inst {}
// The following instruction classes are implemented via operators
// instead of builtins. As such these declarations are only used for
diff --git a/clang/lib/CodeGen/CGBuiltin.cpp b/clang/lib/CodeGen/CGBuiltin.cpp
index 0a06ce028a9160..b4b26eb84d5f92 100644
--- a/clang/lib/CodeGen/CGBuiltin.cpp
+++ b/clang/lib/CodeGen/CGBuiltin.cpp
@@ -6766,6 +6766,24 @@ Value *CodeGenFunction::EmitFP8NeonCall(Function *F,
return EmitNeonCall(F, Ops, name);
}
+llvm::Value *CodeGenFunction::EmitFP8NeonFDOTCall(
+unsigned IID, bool ExtendLane, llvm::Type *RetTy,
+SmallVectorImpl &Ops, const CallExpr *E, const char *name) {
+
+ const unsigned ElemCount = Ops[0]->getType()->getPrimitiveSizeInBits() /
[clang] [llvm] [AArch64] Implement NEON FP8 fused multiply-add intrinsics (non-indexed) (PR #123614)
https://github.com/jthackray approved this pull request. Déjà vu, LGTM. https://github.com/llvm/llvm-project/pull/123614 ___ cfe-commits mailing list [email protected] https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits
[clang] [llvm] [AArch64] Implement NEON FP8 fused multiply-add intrinsics (non-indexed) (PR #123614)
llvmbot wrote:
@llvm/pr-subscribers-clang-codegen
Author: Momchil Velikov (momchil-velikov)
Changes
This patch adds the following intrinsics:
float16x8_t vmlalbq_f16_mf8_fpm(float16x8_t, mfloat8x16_t, mfloat8x16_t,
fpm_t)
float16x8_t vmlaltq_f16_mf8_fpm(float16x8_t, mfloat8x16_t, mfloat8x16_t,
fpm_t)
float32x4_t vmlallbbq_f32_mf8_fpm(float32x4_t, mfloat8x16_t, mfloat8x16_t,
fpm_t)
float32x4_t vmlallbtq_f32_mf8_fpm(float32x4_t, mfloat8x16_t, mfloat8x16_t,
fpm_t)
float32x4_t vmlalltbq_f32_mf8_fpm(float32x4_t, mfloat8x16_t, mfloat8x16_t,
fpm_t)
float32x4_t vmlallttq_f32_mf8_fpm(float32x4_t, mfloat8x16_t, mfloat8x16_t,
fpm_t)
Supersedes https://github.com/llvm/llvm-project/pull/120273
---
Patch is 160.53 KiB, truncated to 20.00 KiB below, full version:
https://github.com/llvm/llvm-project/pull/123614.diff
37 Files Affected:
- (modified) clang/include/clang/AST/Type.h (+5)
- (modified) clang/include/clang/Basic/AArch64SVEACLETypes.def (+17-20)
- (modified) clang/include/clang/Basic/TargetBuiltins.h (+3-1)
- (modified) clang/include/clang/Basic/arm_neon.td (+52)
- (modified) clang/include/clang/Basic/arm_neon_incl.td (+2)
- (modified) clang/lib/AST/ASTContext.cpp (+18-12)
- (modified) clang/lib/AST/ItaniumMangle.cpp (+6-1)
- (modified) clang/lib/AST/Type.cpp (+1-3)
- (modified) clang/lib/CodeGen/CGBuiltin.cpp (+147)
- (modified) clang/lib/CodeGen/CGExpr.cpp (+9-2)
- (modified) clang/lib/CodeGen/CodeGenFunction.h (+11)
- (modified) clang/lib/CodeGen/CodeGenTypes.cpp (+13-5)
- (modified) clang/lib/CodeGen/Targets/AArch64.cpp (+6-8)
- (modified) clang/lib/Sema/SemaARM.cpp (+2)
- (modified) clang/lib/Sema/SemaExpr.cpp (+6-1)
- (modified) clang/lib/Sema/SemaType.cpp (+2-1)
- (added) clang/test/CodeGen/AArch64/builtin-shufflevector-fp8.c (+123)
- (added) clang/test/CodeGen/AArch64/fp8-cast.c (+193)
- (added) clang/test/CodeGen/AArch64/fp8-intrinsics/acle_neon_fp8_cvt.c (+316)
- (added) clang/test/CodeGen/AArch64/fp8-intrinsics/acle_neon_fp8_fdot.c (+254)
- (added) clang/test/CodeGen/AArch64/fp8-intrinsics/acle_neon_fp8_fmla.c (+121)
- (modified) clang/test/CodeGen/arm-mfp8.c (+53-35)
- (modified) clang/test/CodeGenCXX/aarch64-mangle-neon-vectors.cpp (+7)
- (modified) clang/test/CodeGenCXX/mangle-neon-vectors.cpp (+11)
- (added) clang/test/Sema/aarch64-fp8-cast.c (+104)
- (added) clang/test/Sema/aarch64-fp8-intrinsics/acle_neon_fp8_cvt.c (+43)
- (added) clang/test/Sema/aarch64-fp8-intrinsics/acle_neon_fp8_fdot.c (+54)
- (added) clang/test/Sema/aarch64-fp8-intrinsics/acle_neon_fp8_fmla.c (+22)
- (modified) clang/test/Sema/arm-mfp8.cpp (+22-12)
- (modified) clang/utils/TableGen/NeonEmitter.cpp (+23-9)
- (modified) clang/utils/TableGen/SveEmitter.cpp (+2-2)
- (modified) llvm/include/llvm/IR/IntrinsicsAArch64.td (+60)
- (modified) llvm/lib/Target/AArch64/AArch64InstrFormats.td (+88-49)
- (modified) llvm/lib/Target/AArch64/AArch64InstrInfo.td (+21-21)
- (added) llvm/test/CodeGen/AArch64/fp8-neon-fdot.ll (+74)
- (added) llvm/test/CodeGen/AArch64/fp8-neon-fmla.ll (+56)
- (added) llvm/test/CodeGen/AArch64/neon-fp8-cvt.ll (+112)
``diff
diff --git a/clang/include/clang/AST/Type.h b/clang/include/clang/AST/Type.h
index 3457d524c63aaa..1d9743520654eb 100644
--- a/clang/include/clang/AST/Type.h
+++ b/clang/include/clang/AST/Type.h
@@ -2518,6 +2518,7 @@ class alignas(TypeAlignment) Type : public
ExtQualsTypeCommonBase {
bool isFloat32Type() const;
bool isDoubleType() const;
bool isBFloat16Type() const;
+ bool isMFloat8Type() const;
bool isFloat128Type() const;
bool isIbm128Type() const;
bool isRealType() const; // C99 6.2.5p17 (real floating + integer)
@@ -8537,6 +8538,10 @@ inline bool Type::isBFloat16Type() const {
return isSpecificBuiltinType(BuiltinType::BFloat16);
}
+inline bool Type::isMFloat8Type() const {
+ return isSpecificBuiltinType(BuiltinType::MFloat8);
+}
+
inline bool Type::isFloat128Type() const {
return isSpecificBuiltinType(BuiltinType::Float128);
}
diff --git a/clang/include/clang/Basic/AArch64SVEACLETypes.def
b/clang/include/clang/Basic/AArch64SVEACLETypes.def
index 063cac1f4a58ee..a408bb0c54057c 100644
--- a/clang/include/clang/Basic/AArch64SVEACLETypes.def
+++ b/clang/include/clang/Basic/AArch64SVEACLETypes.def
@@ -57,6 +57,11 @@
// - IsBF true for vector of brain float elements.
//===--===//
+#ifndef SVE_SCALAR_TYPE
+#define SVE_SCALAR_TYPE(Name, MangledName, Id, SingletonId, Bits) \
+ SVE_TYPE(Name, Id, SingletonId)
+#endif
+
#ifndef SVE_VECTOR_TYPE
#define SVE_VECTOR_TYPE(Name, MangledName, Id, SingletonId) \
SVE_TYPE(Name, Id, SingletonId)
@@ -72,6 +77,11 @@
SVE_VECTOR_TYPE_DETAILS(Name, MangledName, Id, SingletonId, NumEls, ElBits,
NF, false, false, true)
#endif
+#ifndef SVE_VECTOR_TYPE_MFLOAT
+#define SVE_VECTOR_TYPE_MFLOAT(Name, MangledName, Id, SingletonId, NumEls,
ElBits, NF) \
+ S
[clang] [llvm] [AArch64] Implement NEON FP8 fused multiply-add intrinsics (non-indexed) (PR #123614)
llvmbot wrote:
@llvm/pr-subscribers-backend-arm
Author: Momchil Velikov (momchil-velikov)
Changes
This patch adds the following intrinsics:
float16x8_t vmlalbq_f16_mf8_fpm(float16x8_t, mfloat8x16_t, mfloat8x16_t,
fpm_t)
float16x8_t vmlaltq_f16_mf8_fpm(float16x8_t, mfloat8x16_t, mfloat8x16_t,
fpm_t)
float32x4_t vmlallbbq_f32_mf8_fpm(float32x4_t, mfloat8x16_t, mfloat8x16_t,
fpm_t)
float32x4_t vmlallbtq_f32_mf8_fpm(float32x4_t, mfloat8x16_t, mfloat8x16_t,
fpm_t)
float32x4_t vmlalltbq_f32_mf8_fpm(float32x4_t, mfloat8x16_t, mfloat8x16_t,
fpm_t)
float32x4_t vmlallttq_f32_mf8_fpm(float32x4_t, mfloat8x16_t, mfloat8x16_t,
fpm_t)
Supersedes https://github.com/llvm/llvm-project/pull/120273
---
Patch is 160.53 KiB, truncated to 20.00 KiB below, full version:
https://github.com/llvm/llvm-project/pull/123614.diff
37 Files Affected:
- (modified) clang/include/clang/AST/Type.h (+5)
- (modified) clang/include/clang/Basic/AArch64SVEACLETypes.def (+17-20)
- (modified) clang/include/clang/Basic/TargetBuiltins.h (+3-1)
- (modified) clang/include/clang/Basic/arm_neon.td (+52)
- (modified) clang/include/clang/Basic/arm_neon_incl.td (+2)
- (modified) clang/lib/AST/ASTContext.cpp (+18-12)
- (modified) clang/lib/AST/ItaniumMangle.cpp (+6-1)
- (modified) clang/lib/AST/Type.cpp (+1-3)
- (modified) clang/lib/CodeGen/CGBuiltin.cpp (+147)
- (modified) clang/lib/CodeGen/CGExpr.cpp (+9-2)
- (modified) clang/lib/CodeGen/CodeGenFunction.h (+11)
- (modified) clang/lib/CodeGen/CodeGenTypes.cpp (+13-5)
- (modified) clang/lib/CodeGen/Targets/AArch64.cpp (+6-8)
- (modified) clang/lib/Sema/SemaARM.cpp (+2)
- (modified) clang/lib/Sema/SemaExpr.cpp (+6-1)
- (modified) clang/lib/Sema/SemaType.cpp (+2-1)
- (added) clang/test/CodeGen/AArch64/builtin-shufflevector-fp8.c (+123)
- (added) clang/test/CodeGen/AArch64/fp8-cast.c (+193)
- (added) clang/test/CodeGen/AArch64/fp8-intrinsics/acle_neon_fp8_cvt.c (+316)
- (added) clang/test/CodeGen/AArch64/fp8-intrinsics/acle_neon_fp8_fdot.c (+254)
- (added) clang/test/CodeGen/AArch64/fp8-intrinsics/acle_neon_fp8_fmla.c (+121)
- (modified) clang/test/CodeGen/arm-mfp8.c (+53-35)
- (modified) clang/test/CodeGenCXX/aarch64-mangle-neon-vectors.cpp (+7)
- (modified) clang/test/CodeGenCXX/mangle-neon-vectors.cpp (+11)
- (added) clang/test/Sema/aarch64-fp8-cast.c (+104)
- (added) clang/test/Sema/aarch64-fp8-intrinsics/acle_neon_fp8_cvt.c (+43)
- (added) clang/test/Sema/aarch64-fp8-intrinsics/acle_neon_fp8_fdot.c (+54)
- (added) clang/test/Sema/aarch64-fp8-intrinsics/acle_neon_fp8_fmla.c (+22)
- (modified) clang/test/Sema/arm-mfp8.cpp (+22-12)
- (modified) clang/utils/TableGen/NeonEmitter.cpp (+23-9)
- (modified) clang/utils/TableGen/SveEmitter.cpp (+2-2)
- (modified) llvm/include/llvm/IR/IntrinsicsAArch64.td (+60)
- (modified) llvm/lib/Target/AArch64/AArch64InstrFormats.td (+88-49)
- (modified) llvm/lib/Target/AArch64/AArch64InstrInfo.td (+21-21)
- (added) llvm/test/CodeGen/AArch64/fp8-neon-fdot.ll (+74)
- (added) llvm/test/CodeGen/AArch64/fp8-neon-fmla.ll (+56)
- (added) llvm/test/CodeGen/AArch64/neon-fp8-cvt.ll (+112)
``diff
diff --git a/clang/include/clang/AST/Type.h b/clang/include/clang/AST/Type.h
index 3457d524c63aaa..1d9743520654eb 100644
--- a/clang/include/clang/AST/Type.h
+++ b/clang/include/clang/AST/Type.h
@@ -2518,6 +2518,7 @@ class alignas(TypeAlignment) Type : public
ExtQualsTypeCommonBase {
bool isFloat32Type() const;
bool isDoubleType() const;
bool isBFloat16Type() const;
+ bool isMFloat8Type() const;
bool isFloat128Type() const;
bool isIbm128Type() const;
bool isRealType() const; // C99 6.2.5p17 (real floating + integer)
@@ -8537,6 +8538,10 @@ inline bool Type::isBFloat16Type() const {
return isSpecificBuiltinType(BuiltinType::BFloat16);
}
+inline bool Type::isMFloat8Type() const {
+ return isSpecificBuiltinType(BuiltinType::MFloat8);
+}
+
inline bool Type::isFloat128Type() const {
return isSpecificBuiltinType(BuiltinType::Float128);
}
diff --git a/clang/include/clang/Basic/AArch64SVEACLETypes.def
b/clang/include/clang/Basic/AArch64SVEACLETypes.def
index 063cac1f4a58ee..a408bb0c54057c 100644
--- a/clang/include/clang/Basic/AArch64SVEACLETypes.def
+++ b/clang/include/clang/Basic/AArch64SVEACLETypes.def
@@ -57,6 +57,11 @@
// - IsBF true for vector of brain float elements.
//===--===//
+#ifndef SVE_SCALAR_TYPE
+#define SVE_SCALAR_TYPE(Name, MangledName, Id, SingletonId, Bits) \
+ SVE_TYPE(Name, Id, SingletonId)
+#endif
+
#ifndef SVE_VECTOR_TYPE
#define SVE_VECTOR_TYPE(Name, MangledName, Id, SingletonId) \
SVE_TYPE(Name, Id, SingletonId)
@@ -72,6 +77,11 @@
SVE_VECTOR_TYPE_DETAILS(Name, MangledName, Id, SingletonId, NumEls, ElBits,
NF, false, false, true)
#endif
+#ifndef SVE_VECTOR_TYPE_MFLOAT
+#define SVE_VECTOR_TYPE_MFLOAT(Name, MangledName, Id, SingletonId, NumEls,
ElBits, NF) \
+ SVE
[clang] [llvm] [AArch64] Implement NEON FP8 fused multiply-add intrinsics (non-indexed) (PR #123614)
llvmbot wrote:
@llvm/pr-subscribers-clang
Author: Momchil Velikov (momchil-velikov)
Changes
This patch adds the following intrinsics:
float16x8_t vmlalbq_f16_mf8_fpm(float16x8_t, mfloat8x16_t, mfloat8x16_t,
fpm_t)
float16x8_t vmlaltq_f16_mf8_fpm(float16x8_t, mfloat8x16_t, mfloat8x16_t,
fpm_t)
float32x4_t vmlallbbq_f32_mf8_fpm(float32x4_t, mfloat8x16_t, mfloat8x16_t,
fpm_t)
float32x4_t vmlallbtq_f32_mf8_fpm(float32x4_t, mfloat8x16_t, mfloat8x16_t,
fpm_t)
float32x4_t vmlalltbq_f32_mf8_fpm(float32x4_t, mfloat8x16_t, mfloat8x16_t,
fpm_t)
float32x4_t vmlallttq_f32_mf8_fpm(float32x4_t, mfloat8x16_t, mfloat8x16_t,
fpm_t)
Supersedes https://github.com/llvm/llvm-project/pull/120273
---
Patch is 160.53 KiB, truncated to 20.00 KiB below, full version:
https://github.com/llvm/llvm-project/pull/123614.diff
37 Files Affected:
- (modified) clang/include/clang/AST/Type.h (+5)
- (modified) clang/include/clang/Basic/AArch64SVEACLETypes.def (+17-20)
- (modified) clang/include/clang/Basic/TargetBuiltins.h (+3-1)
- (modified) clang/include/clang/Basic/arm_neon.td (+52)
- (modified) clang/include/clang/Basic/arm_neon_incl.td (+2)
- (modified) clang/lib/AST/ASTContext.cpp (+18-12)
- (modified) clang/lib/AST/ItaniumMangle.cpp (+6-1)
- (modified) clang/lib/AST/Type.cpp (+1-3)
- (modified) clang/lib/CodeGen/CGBuiltin.cpp (+147)
- (modified) clang/lib/CodeGen/CGExpr.cpp (+9-2)
- (modified) clang/lib/CodeGen/CodeGenFunction.h (+11)
- (modified) clang/lib/CodeGen/CodeGenTypes.cpp (+13-5)
- (modified) clang/lib/CodeGen/Targets/AArch64.cpp (+6-8)
- (modified) clang/lib/Sema/SemaARM.cpp (+2)
- (modified) clang/lib/Sema/SemaExpr.cpp (+6-1)
- (modified) clang/lib/Sema/SemaType.cpp (+2-1)
- (added) clang/test/CodeGen/AArch64/builtin-shufflevector-fp8.c (+123)
- (added) clang/test/CodeGen/AArch64/fp8-cast.c (+193)
- (added) clang/test/CodeGen/AArch64/fp8-intrinsics/acle_neon_fp8_cvt.c (+316)
- (added) clang/test/CodeGen/AArch64/fp8-intrinsics/acle_neon_fp8_fdot.c (+254)
- (added) clang/test/CodeGen/AArch64/fp8-intrinsics/acle_neon_fp8_fmla.c (+121)
- (modified) clang/test/CodeGen/arm-mfp8.c (+53-35)
- (modified) clang/test/CodeGenCXX/aarch64-mangle-neon-vectors.cpp (+7)
- (modified) clang/test/CodeGenCXX/mangle-neon-vectors.cpp (+11)
- (added) clang/test/Sema/aarch64-fp8-cast.c (+104)
- (added) clang/test/Sema/aarch64-fp8-intrinsics/acle_neon_fp8_cvt.c (+43)
- (added) clang/test/Sema/aarch64-fp8-intrinsics/acle_neon_fp8_fdot.c (+54)
- (added) clang/test/Sema/aarch64-fp8-intrinsics/acle_neon_fp8_fmla.c (+22)
- (modified) clang/test/Sema/arm-mfp8.cpp (+22-12)
- (modified) clang/utils/TableGen/NeonEmitter.cpp (+23-9)
- (modified) clang/utils/TableGen/SveEmitter.cpp (+2-2)
- (modified) llvm/include/llvm/IR/IntrinsicsAArch64.td (+60)
- (modified) llvm/lib/Target/AArch64/AArch64InstrFormats.td (+88-49)
- (modified) llvm/lib/Target/AArch64/AArch64InstrInfo.td (+21-21)
- (added) llvm/test/CodeGen/AArch64/fp8-neon-fdot.ll (+74)
- (added) llvm/test/CodeGen/AArch64/fp8-neon-fmla.ll (+56)
- (added) llvm/test/CodeGen/AArch64/neon-fp8-cvt.ll (+112)
``diff
diff --git a/clang/include/clang/AST/Type.h b/clang/include/clang/AST/Type.h
index 3457d524c63aaa..1d9743520654eb 100644
--- a/clang/include/clang/AST/Type.h
+++ b/clang/include/clang/AST/Type.h
@@ -2518,6 +2518,7 @@ class alignas(TypeAlignment) Type : public
ExtQualsTypeCommonBase {
bool isFloat32Type() const;
bool isDoubleType() const;
bool isBFloat16Type() const;
+ bool isMFloat8Type() const;
bool isFloat128Type() const;
bool isIbm128Type() const;
bool isRealType() const; // C99 6.2.5p17 (real floating + integer)
@@ -8537,6 +8538,10 @@ inline bool Type::isBFloat16Type() const {
return isSpecificBuiltinType(BuiltinType::BFloat16);
}
+inline bool Type::isMFloat8Type() const {
+ return isSpecificBuiltinType(BuiltinType::MFloat8);
+}
+
inline bool Type::isFloat128Type() const {
return isSpecificBuiltinType(BuiltinType::Float128);
}
diff --git a/clang/include/clang/Basic/AArch64SVEACLETypes.def
b/clang/include/clang/Basic/AArch64SVEACLETypes.def
index 063cac1f4a58ee..a408bb0c54057c 100644
--- a/clang/include/clang/Basic/AArch64SVEACLETypes.def
+++ b/clang/include/clang/Basic/AArch64SVEACLETypes.def
@@ -57,6 +57,11 @@
// - IsBF true for vector of brain float elements.
//===--===//
+#ifndef SVE_SCALAR_TYPE
+#define SVE_SCALAR_TYPE(Name, MangledName, Id, SingletonId, Bits) \
+ SVE_TYPE(Name, Id, SingletonId)
+#endif
+
#ifndef SVE_VECTOR_TYPE
#define SVE_VECTOR_TYPE(Name, MangledName, Id, SingletonId) \
SVE_TYPE(Name, Id, SingletonId)
@@ -72,6 +77,11 @@
SVE_VECTOR_TYPE_DETAILS(Name, MangledName, Id, SingletonId, NumEls, ElBits,
NF, false, false, true)
#endif
+#ifndef SVE_VECTOR_TYPE_MFLOAT
+#define SVE_VECTOR_TYPE_MFLOAT(Name, MangledName, Id, SingletonId, NumEls,
ElBits, NF) \
+ SVE_VECTO
