[clang] [llvm] [AArch64] Implement FP8 SVE intrinsics for widening conversions (PR #118123)
https://github.com/momchil-velikov closed https://github.com/llvm/llvm-project/pull/118123 ___ cfe-commits mailing list cfe-commits@lists.llvm.org https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits
[clang] [llvm] [AArch64] Implement FP8 SVE intrinsics for widening conversions (PR #118123)
https://github.com/momchil-velikov updated https://github.com/llvm/llvm-project/pull/118123 >From 359e611dc5a74d4e2dfdb19119eb8f83badb1f0b Mon Sep 17 00:00:00 2001 From: Momchil Velikov Date: Thu, 21 Nov 2024 11:21:29 + Subject: [PATCH] [AArch64] Implement FP8 SVE intrinsics for widening conversions This patch adds the following intrinsics: * 8-bit floating-point convert to half-precision and BFloat16. // Variants are also available for: _bf16 svfloat16_t svcvt1_f16[_mf8]_fpm(svmfloat8_t zn, fpm_t fpm); svfloat16_t svcvt2_f16[_mf8]_fpm(svmfloat8_t zn, fpm_t fpm); * 8-bit floating-point convert to half-precision and BFloat16 (top). // Variants are also available for: _bf16 svfloat16_t svcvtlt1_f16[_mf8]_fpm(svmfloat8_t zn, fpm_t fpm); svfloat16_t svcvtlt2_f16[_mf8]_fpm(svmfloat8_t zn, fpm_t fpm); --- clang/include/clang/Basic/arm_sve.td | 20 +- .../fp8-intrinsics/acle_sve2_fp8_cvt.c| 173 ++ .../aarch64-sve2-intrinsics/acle_sve2_fp8.c | 24 +++ llvm/include/llvm/IR/IntrinsicsAArch64.td | 13 +- .../lib/Target/AArch64/AArch64SVEInstrInfo.td | 16 +- llvm/lib/Target/AArch64/SVEInstrFormats.td| 7 +- .../test/CodeGen/AArch64/fp8-sve-cvt-cvtlt.ll | 78 7 files changed, 317 insertions(+), 14 deletions(-) create mode 100644 clang/test/CodeGen/AArch64/fp8-intrinsics/acle_sve2_fp8_cvt.c create mode 100644 clang/test/Sema/aarch64-sve2-intrinsics/acle_sve2_fp8.c create mode 100644 llvm/test/CodeGen/AArch64/fp8-sve-cvt-cvtlt.ll diff --git a/clang/include/clang/Basic/arm_sve.td b/clang/include/clang/Basic/arm_sve.td index 9b8a8546b072c0..7b8ecf29a9de6e 100644 --- a/clang/include/clang/Basic/arm_sve.td +++ b/clang/include/clang/Basic/arm_sve.td @@ -2430,12 +2430,12 @@ let SVETargetGuard = InvalidMode, SMETargetGuard = "sme2,fp8" in { def FSCALE_X4 : Inst<"svscale[_{d}_x4]", "444.x", "fhd", MergeNone, "aarch64_sme_fp8_scale_x4", [IsStreaming],[]>; // Convert from FP8 to half-precision/BFloat16 multi-vector - def SVF1CVT : Inst<"svcvt1_{d}[_mf8]_x2_fpm", "2~>", "bh", MergeNone, "aarch64_sve_fp8_cvt1_x2", [IsStreaming, SetsFPMR], []>; - def SVF2CVT : Inst<"svcvt2_{d}[_mf8]_x2_fpm", "2~>", "bh", MergeNone, "aarch64_sve_fp8_cvt2_x2", [IsStreaming, SetsFPMR], []>; + def SVF1CVT_X2 : Inst<"svcvt1_{d}[_mf8]_x2_fpm", "2~>", "bh", MergeNone, "aarch64_sve_fp8_cvt1_x2", [IsStreaming, SetsFPMR], []>; + def SVF2CVT_X2 : Inst<"svcvt2_{d}[_mf8]_x2_fpm", "2~>", "bh", MergeNone, "aarch64_sve_fp8_cvt2_x2", [IsStreaming, SetsFPMR], []>; // Convert from FP8 to deinterleaved half-precision/BFloat16 multi-vector - def SVF1CVTL : Inst<"svcvtl1_{d}[_mf8]_x2_fpm", "2~>", "bh", MergeNone, "aarch64_sve_fp8_cvtl1_x2", [IsStreaming, SetsFPMR], []>; - def SVF2CVTL : Inst<"svcvtl2_{d}[_mf8]_x2_fpm", "2~>", "bh", MergeNone, "aarch64_sve_fp8_cvtl2_x2", [IsStreaming, SetsFPMR], []>; + def SVF1CVTL_X2 : Inst<"svcvtl1_{d}[_mf8]_x2_fpm", "2~>", "bh", MergeNone, "aarch64_sve_fp8_cvtl1_x2", [IsStreaming, SetsFPMR], []>; + def SVF2CVTL_X2 : Inst<"svcvtl2_{d}[_mf8]_x2_fpm", "2~>", "bh", MergeNone, "aarch64_sve_fp8_cvtl2_x2", [IsStreaming, SetsFPMR], []>; } let SVETargetGuard = "sve2p1", SMETargetGuard = "sme2" in { @@ -2451,3 +2451,15 @@ let SVETargetGuard = "sve2,faminmax", SMETargetGuard = "sme2,faminmax" in { defm SVAMIN : SInstZPZZ<"svamin", "hfd", "aarch64_sve_famin", "aarch64_sve_famin_u">; defm SVAMAX : SInstZPZZ<"svamax", "hfd", "aarch64_sve_famax", "aarch64_sve_famax_u">; } + +let SVETargetGuard = "sve2,fp8", SMETargetGuard = "sme2,fp8" in { + // SVE FP8 widening conversions + + // 8-bit floating-point convert to BFloat16/Float16 + def SVF1CVT : SInst<"svcvt1_{d}[_mf8]_fpm", "d~>", "bh", MergeNone, "aarch64_sve_fp8_cvt1", [VerifyRuntimeMode, SetsFPMR]>; + def SVF2CVT : SInst<"svcvt2_{d}[_mf8]_fpm", "d~>", "bh", MergeNone, "aarch64_sve_fp8_cvt2", [VerifyRuntimeMode, SetsFPMR]>; + + // 8-bit floating-point convert to BFloat16/Float16 (top) + def SVF1CVTLT : SInst<"svcvtlt1_{d}[_mf8]_fpm", "d~>", "bh", MergeNone, "aarch64_sve_fp8_cvtlt1", [VerifyRuntimeMode, SetsFPMR]>; + def SVF2CVTLT : SInst<"svcvtlt2_{d}[_mf8]_fpm", "d~>", "bh", MergeNone, "aarch64_sve_fp8_cvtlt2", [VerifyRuntimeMode, SetsFPMR]>; +} diff --git a/clang/test/CodeGen/AArch64/fp8-intrinsics/acle_sve2_fp8_cvt.c b/clang/test/CodeGen/AArch64/fp8-intrinsics/acle_sve2_fp8_cvt.c new file mode 100644 index 00..c026b8aa216f32 --- /dev/null +++ b/clang/test/CodeGen/AArch64/fp8-intrinsics/acle_sve2_fp8_cvt.c @@ -0,0 +1,173 @@ +// NOTE: Assertions have been autogenerated by utils/update_cc_test_checks.py UTC_ARGS: --version 5 +// RUN: %clang_cc1-triple aarch64-none-linux-gnu -target-feature +sve -target-feature +sve2 -target-feature +fp8 -disable-O0-optnone -Werror -Wall -emit-llvm -o - %s | opt -S -p mem2reg,instcombine,tailcallelim | FileCheck %s +// RUN: %clang_cc1 -x c++ -triple aarch64-none-linux-gnu -target-feature +
[clang] [llvm] [AArch64] Implement FP8 SVE intrinsics for widening conversions (PR #118123)
https://github.com/momchil-velikov edited https://github.com/llvm/llvm-project/pull/118123 ___ cfe-commits mailing list cfe-commits@lists.llvm.org https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits
[clang] [llvm] [AArch64] Implement FP8 SVE intrinsics for widening conversions (PR #118123)
@@ -0,0 +1,78 @@ +; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py UTC_ARGS: --version 5 +; RUN: llc -mattr=+bf16,+sve2,+fp8 < %s | FileCheck %s momchil-velikov wrote: Removed. https://github.com/llvm/llvm-project/pull/118123 ___ cfe-commits mailing list cfe-commits@lists.llvm.org https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits
[clang] [llvm] [AArch64] Implement FP8 SVE intrinsics for widening conversions (PR #118123)
https://github.com/momchil-velikov updated https://github.com/llvm/llvm-project/pull/118123 >From cdd86588c639f818909964ab49b9972da6869cb3 Mon Sep 17 00:00:00 2001 From: Momchil Velikov Date: Thu, 21 Nov 2024 11:21:29 + Subject: [PATCH] [AArch64] Implement FP8 SVE intrinsics for widening conversions This patch adds the following intrinsics: * 8-bit floating-point convert to half-precision and BFloat16. // Variants are also available for: _bf16 svfloat16_t svcvt1_f16[_mf8]_fpm(svmfloat8_t zn, fpm_t fpm); svfloat16_t svcvt2_f16[_mf8]_fpm(svmfloat8_t zn, fpm_t fpm); * 8-bit floating-point convert to half-precision and BFloat16 (top). // Variants are also available for: _bf16 svfloat16_t svcvtlt1_f16[_mf8]_fpm(svmfloat8_t zn, fpm_t fpm); svfloat16_t svcvtlt2_f16[_mf8]_fpm(svmfloat8_t zn, fpm_t fpm); --- clang/include/clang/Basic/arm_sve.td | 20 +- .../fp8-intrinsics/acle_sve2_fp8_cvt.c| 173 ++ .../aarch64-sve2-intrinsics/acle_sve2_fp8.c | 24 +++ llvm/include/llvm/IR/IntrinsicsAArch64.td | 13 +- .../lib/Target/AArch64/AArch64SVEInstrInfo.td | 16 +- llvm/lib/Target/AArch64/SVEInstrFormats.td| 7 +- .../test/CodeGen/AArch64/fp8-sve-cvt-cvtlt.ll | 78 7 files changed, 317 insertions(+), 14 deletions(-) create mode 100644 clang/test/CodeGen/AArch64/fp8-intrinsics/acle_sve2_fp8_cvt.c create mode 100644 clang/test/Sema/aarch64-sve2-intrinsics/acle_sve2_fp8.c create mode 100644 llvm/test/CodeGen/AArch64/fp8-sve-cvt-cvtlt.ll diff --git a/clang/include/clang/Basic/arm_sve.td b/clang/include/clang/Basic/arm_sve.td index 9b8a8546b072c0..7b8ecf29a9de6e 100644 --- a/clang/include/clang/Basic/arm_sve.td +++ b/clang/include/clang/Basic/arm_sve.td @@ -2430,12 +2430,12 @@ let SVETargetGuard = InvalidMode, SMETargetGuard = "sme2,fp8" in { def FSCALE_X4 : Inst<"svscale[_{d}_x4]", "444.x", "fhd", MergeNone, "aarch64_sme_fp8_scale_x4", [IsStreaming],[]>; // Convert from FP8 to half-precision/BFloat16 multi-vector - def SVF1CVT : Inst<"svcvt1_{d}[_mf8]_x2_fpm", "2~>", "bh", MergeNone, "aarch64_sve_fp8_cvt1_x2", [IsStreaming, SetsFPMR], []>; - def SVF2CVT : Inst<"svcvt2_{d}[_mf8]_x2_fpm", "2~>", "bh", MergeNone, "aarch64_sve_fp8_cvt2_x2", [IsStreaming, SetsFPMR], []>; + def SVF1CVT_X2 : Inst<"svcvt1_{d}[_mf8]_x2_fpm", "2~>", "bh", MergeNone, "aarch64_sve_fp8_cvt1_x2", [IsStreaming, SetsFPMR], []>; + def SVF2CVT_X2 : Inst<"svcvt2_{d}[_mf8]_x2_fpm", "2~>", "bh", MergeNone, "aarch64_sve_fp8_cvt2_x2", [IsStreaming, SetsFPMR], []>; // Convert from FP8 to deinterleaved half-precision/BFloat16 multi-vector - def SVF1CVTL : Inst<"svcvtl1_{d}[_mf8]_x2_fpm", "2~>", "bh", MergeNone, "aarch64_sve_fp8_cvtl1_x2", [IsStreaming, SetsFPMR], []>; - def SVF2CVTL : Inst<"svcvtl2_{d}[_mf8]_x2_fpm", "2~>", "bh", MergeNone, "aarch64_sve_fp8_cvtl2_x2", [IsStreaming, SetsFPMR], []>; + def SVF1CVTL_X2 : Inst<"svcvtl1_{d}[_mf8]_x2_fpm", "2~>", "bh", MergeNone, "aarch64_sve_fp8_cvtl1_x2", [IsStreaming, SetsFPMR], []>; + def SVF2CVTL_X2 : Inst<"svcvtl2_{d}[_mf8]_x2_fpm", "2~>", "bh", MergeNone, "aarch64_sve_fp8_cvtl2_x2", [IsStreaming, SetsFPMR], []>; } let SVETargetGuard = "sve2p1", SMETargetGuard = "sme2" in { @@ -2451,3 +2451,15 @@ let SVETargetGuard = "sve2,faminmax", SMETargetGuard = "sme2,faminmax" in { defm SVAMIN : SInstZPZZ<"svamin", "hfd", "aarch64_sve_famin", "aarch64_sve_famin_u">; defm SVAMAX : SInstZPZZ<"svamax", "hfd", "aarch64_sve_famax", "aarch64_sve_famax_u">; } + +let SVETargetGuard = "sve2,fp8", SMETargetGuard = "sme2,fp8" in { + // SVE FP8 widening conversions + + // 8-bit floating-point convert to BFloat16/Float16 + def SVF1CVT : SInst<"svcvt1_{d}[_mf8]_fpm", "d~>", "bh", MergeNone, "aarch64_sve_fp8_cvt1", [VerifyRuntimeMode, SetsFPMR]>; + def SVF2CVT : SInst<"svcvt2_{d}[_mf8]_fpm", "d~>", "bh", MergeNone, "aarch64_sve_fp8_cvt2", [VerifyRuntimeMode, SetsFPMR]>; + + // 8-bit floating-point convert to BFloat16/Float16 (top) + def SVF1CVTLT : SInst<"svcvtlt1_{d}[_mf8]_fpm", "d~>", "bh", MergeNone, "aarch64_sve_fp8_cvtlt1", [VerifyRuntimeMode, SetsFPMR]>; + def SVF2CVTLT : SInst<"svcvtlt2_{d}[_mf8]_fpm", "d~>", "bh", MergeNone, "aarch64_sve_fp8_cvtlt2", [VerifyRuntimeMode, SetsFPMR]>; +} diff --git a/clang/test/CodeGen/AArch64/fp8-intrinsics/acle_sve2_fp8_cvt.c b/clang/test/CodeGen/AArch64/fp8-intrinsics/acle_sve2_fp8_cvt.c new file mode 100644 index 00..c026b8aa216f32 --- /dev/null +++ b/clang/test/CodeGen/AArch64/fp8-intrinsics/acle_sve2_fp8_cvt.c @@ -0,0 +1,173 @@ +// NOTE: Assertions have been autogenerated by utils/update_cc_test_checks.py UTC_ARGS: --version 5 +// RUN: %clang_cc1-triple aarch64-none-linux-gnu -target-feature +sve -target-feature +sve2 -target-feature +fp8 -disable-O0-optnone -Werror -Wall -emit-llvm -o - %s | opt -S -p mem2reg,instcombine,tailcallelim | FileCheck %s +// RUN: %clang_cc1 -x c++ -triple aarch64-none-linux-gnu -target-feature +
[clang] [llvm] [AArch64] Implement FP8 SVE intrinsics for widening conversions (PR #118123)
https://github.com/CarolineConcatto approved this pull request. Momchil, Thank you for the patch. It LGTM! I just would like before you push the patch to update the commit message with the prototypes you are implementing in this patch. So we can check with CVT it has implemented without having to look into the patch. https://github.com/llvm/llvm-project/pull/118123 ___ cfe-commits mailing list cfe-commits@lists.llvm.org https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits
[clang] [llvm] [AArch64] Implement FP8 SVE intrinsics for widening conversions (PR #118123)
@@ -3864,3 +3864,20 @@ def int_aarch64_sve_famin_u : AdvSIMD_Pred2VectorArg_Intrinsic; // Neon absolute maximum and minimum def int_aarch64_neon_famax : AdvSIMD_2VectorArg_Intrinsic; def int_aarch64_neon_famin : AdvSIMD_2VectorArg_Intrinsic; + +// +// FP8 intrinsics SpencerAbson wrote: Would you mind moving the `SME2_FP8_CVT_X2_Single_Intrinsic` intrinics I've merged (and perhaps the fp8 `fscale` ones) down to this section? I'll follow this convention with my in-flight patches. https://github.com/llvm/llvm-project/pull/118123 ___ cfe-commits mailing list cfe-commits@lists.llvm.org https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits
[clang] [llvm] [AArch64] Implement FP8 SVE intrinsics for widening conversions (PR #118123)
@@ -0,0 +1,78 @@ +; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py UTC_ARGS: --version 5 +; RUN: llc -mattr=+bf16,+sve2,+fp8 < %s | FileCheck %s SpencerAbson wrote: nit: I'm not sure we need `+bf16` in these. https://github.com/llvm/llvm-project/pull/118123 ___ cfe-commits mailing list cfe-commits@lists.llvm.org https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits
[clang] [llvm] [AArch64] Implement FP8 SVE intrinsics for widening conversions (PR #118123)
llvmbot wrote: @llvm/pr-subscribers-llvm-ir Author: Momchil Velikov (momchil-velikov) Changes --- Full diff: https://github.com/llvm/llvm-project/pull/118123.diff 7 Files Affected: - (modified) clang/include/clang/Basic/arm_sve.td (+10) - (added) clang/test/CodeGen/AArch64/fp8-intrinsics/acle_sve2_fp8_cvt.c (+173) - (added) clang/test/Sema/aarch64-sve2-intrinsics/acle_sve2_fp8.c (+24) - (modified) llvm/include/llvm/IR/IntrinsicsAArch64.td (+17) - (modified) llvm/lib/Target/AArch64/AArch64SVEInstrInfo.td (+8-8) - (modified) llvm/lib/Target/AArch64/SVEInstrFormats.td (+6-1) - (added) llvm/test/CodeGen/AArch64/fp8-sve-cvt-cvtlt.ll (+78) ``diff diff --git a/clang/include/clang/Basic/arm_sve.td b/clang/include/clang/Basic/arm_sve.td index b36e592042da0b..b9d8360843aa8e 100644 --- a/clang/include/clang/Basic/arm_sve.td +++ b/clang/include/clang/Basic/arm_sve.td @@ -2447,3 +2447,13 @@ let SVETargetGuard = "sve2,faminmax", SMETargetGuard = "sme2,faminmax" in { defm SVAMIN : SInstZPZZ<"svamin", "hfd", "aarch64_sve_famin", "aarch64_sve_famin_u">; defm SVAMAX : SInstZPZZ<"svamax", "hfd", "aarch64_sve_famax", "aarch64_sve_famax_u">; } + +let SVETargetGuard = "sve2,fp8", SMETargetGuard = "sme2,fp8" in { + // 8-bit floating-point convert to BFloat16/Float16 + def SVF1CVT : SInst<"svcvt1_{d}[_mf8]_fpm", "d~>", "bh", MergeNone, "aarch64_sve_fp8_cvt1", [VerifyRuntimeMode, SetsFPMR]>; + def SVF2CVT : SInst<"svcvt2_{d}[_mf8]_fpm", "d~>", "bh", MergeNone, "aarch64_sve_fp8_cvt2", [VerifyRuntimeMode, SetsFPMR]>; + + // 8-bit floating-point convert to BFloat16/Float16 (top) + def SVF1CVTLT : SInst<"svcvtlt1_{d}[_mf8]_fpm", "d~>", "bh", MergeNone, "aarch64_sve_fp8_cvtlt1", [VerifyRuntimeMode, SetsFPMR]>; + def SVF2CVTLT : SInst<"svcvtlt2_{d}[_mf8]_fpm", "d~>", "bh", MergeNone, "aarch64_sve_fp8_cvtlt2", [VerifyRuntimeMode, SetsFPMR]>; +} diff --git a/clang/test/CodeGen/AArch64/fp8-intrinsics/acle_sve2_fp8_cvt.c b/clang/test/CodeGen/AArch64/fp8-intrinsics/acle_sve2_fp8_cvt.c new file mode 100644 index 00..c026b8aa216f32 --- /dev/null +++ b/clang/test/CodeGen/AArch64/fp8-intrinsics/acle_sve2_fp8_cvt.c @@ -0,0 +1,173 @@ +// NOTE: Assertions have been autogenerated by utils/update_cc_test_checks.py UTC_ARGS: --version 5 +// RUN: %clang_cc1-triple aarch64-none-linux-gnu -target-feature +sve -target-feature +sve2 -target-feature +fp8 -disable-O0-optnone -Werror -Wall -emit-llvm -o - %s | opt -S -p mem2reg,instcombine,tailcallelim | FileCheck %s +// RUN: %clang_cc1 -x c++ -triple aarch64-none-linux-gnu -target-feature +sme -target-feature +sme2 -target-feature +fp8 -disable-O0-optnone -Werror -Wall -emit-llvm -o - %s | opt -S -p mem2reg,instcombine,tailcallelim | FileCheck %s -check-prefix=CHECK-CXX + +// RUN: %clang_cc1 -DSME_OVERLOADED_FORMS-triple aarch64-none-linux-gnu -target-feature +sme -target-feature +sme2 -target-feature +fp8 -disable-O0-optnone -Werror -Wall -emit-llvm -o - %s | opt -S -p mem2reg,instcombine,tailcallelim | FileCheck %s +// RUN: %clang_cc1 -DSME_OVERLOADED_FORMS -x c++ -triple aarch64-none-linux-gnu -target-feature +sve -target-feature +sve2 -target-feature +fp8 -disable-O0-optnone -Werror -Wall -emit-llvm -o - %s | opt -S -p mem2reg,instcombine,tailcallelim | FileCheck %s -check-prefix=CHECK-CXX + +// RUN: %clang_cc1 -triple aarch64-none-linux-gnu -target-feature +sve -target-feature +sve2 -target-feature +fp8 -S -disable-O0-optnone -Werror -Wall -o /dev/null %s +// RUN: %clang_cc1 -triple aarch64-none-linux-gnu -target-feature +sme -target-feature +sme2 -target-feature +fp8 -S -disable-O0-optnone -Werror -Wall -o /dev/null %s + +// REQUIRES: aarch64-registered-target + +#ifdef __ARM_FEATURE_SME +#include +#else +#include +#endif + +#ifdef SVE_OVERLOADED_FORMS +#define SVE_ACLE_FUNC(A1,A2_UNUSED,A3) A1##A3 +#else +#define SVE_ACLE_FUNC(A1,A2,A3) A1##A2##A3 +#endif + +#ifdef __ARM_FEATURE_SME +#define STREAMING __arm_streaming +#else +#define STREAMING +#endif + +// CHECK-LABEL: define dso_local @test_svcvt1_bf16_mf8( +// CHECK-SAME: [[ZN:%.*]], i64 noundef [[FPM:%.*]]) #[[ATTR0:[0-9]+]] { +// CHECK-NEXT: [[ENTRY:.*:]] +// CHECK-NEXT:tail call void @llvm.aarch64.set.fpmr(i64 [[FPM]]) +// CHECK-NEXT:[[TMP0:%.*]] = tail call @llvm.aarch64.sve.fp8.cvt1.nxv8bf16( [[ZN]]) +// CHECK-NEXT:ret [[TMP0]] +// +// CHECK-CXX-LABEL: define dso_local @_Z20test_svcvt1_bf16_mf8u13__SVMfloat8_tm( +// CHECK-CXX-SAME: [[ZN:%.*]], i64 noundef [[FPM:%.*]]) #[[ATTR0:[0-9]+]] { +// CHECK-CXX-NEXT: [[ENTRY:.*:]] +// CHECK-CXX-NEXT:tail call void @llvm.aarch64.set.fpmr(i64 [[FPM]]) +// CHECK-CXX-NEXT:[[TMP0:%.*]] = tail call @llvm.aarch64.sve.fp8.cvt1.nxv8bf16( [[ZN]]) +// CHECK-CXX-NEXT:ret [[TMP0]] +// +svbfloat16_t test_svcvt1_bf16_mf8(svmfloat8_t zn, fpm_t fpm) STREAMING { + return SVE_ACLE_FUNC(svcvt1_bf16,_mf8,_fpm)(zn, fpm); +} + +// CHECK-LABEL: define dso_local @test_svcvt2_bf16_mf8( +// CH
[clang] [llvm] [AArch64] Implement FP8 SVE intrinsics for widening conversions (PR #118123)
llvmbot wrote: @llvm/pr-subscribers-clang @llvm/pr-subscribers-backend-aarch64 Author: Momchil Velikov (momchil-velikov) Changes --- Full diff: https://github.com/llvm/llvm-project/pull/118123.diff 7 Files Affected: - (modified) clang/include/clang/Basic/arm_sve.td (+10) - (added) clang/test/CodeGen/AArch64/fp8-intrinsics/acle_sve2_fp8_cvt.c (+173) - (added) clang/test/Sema/aarch64-sve2-intrinsics/acle_sve2_fp8.c (+24) - (modified) llvm/include/llvm/IR/IntrinsicsAArch64.td (+17) - (modified) llvm/lib/Target/AArch64/AArch64SVEInstrInfo.td (+8-8) - (modified) llvm/lib/Target/AArch64/SVEInstrFormats.td (+6-1) - (added) llvm/test/CodeGen/AArch64/fp8-sve-cvt-cvtlt.ll (+78) ``diff diff --git a/clang/include/clang/Basic/arm_sve.td b/clang/include/clang/Basic/arm_sve.td index b36e592042da0b..b9d8360843aa8e 100644 --- a/clang/include/clang/Basic/arm_sve.td +++ b/clang/include/clang/Basic/arm_sve.td @@ -2447,3 +2447,13 @@ let SVETargetGuard = "sve2,faminmax", SMETargetGuard = "sme2,faminmax" in { defm SVAMIN : SInstZPZZ<"svamin", "hfd", "aarch64_sve_famin", "aarch64_sve_famin_u">; defm SVAMAX : SInstZPZZ<"svamax", "hfd", "aarch64_sve_famax", "aarch64_sve_famax_u">; } + +let SVETargetGuard = "sve2,fp8", SMETargetGuard = "sme2,fp8" in { + // 8-bit floating-point convert to BFloat16/Float16 + def SVF1CVT : SInst<"svcvt1_{d}[_mf8]_fpm", "d~>", "bh", MergeNone, "aarch64_sve_fp8_cvt1", [VerifyRuntimeMode, SetsFPMR]>; + def SVF2CVT : SInst<"svcvt2_{d}[_mf8]_fpm", "d~>", "bh", MergeNone, "aarch64_sve_fp8_cvt2", [VerifyRuntimeMode, SetsFPMR]>; + + // 8-bit floating-point convert to BFloat16/Float16 (top) + def SVF1CVTLT : SInst<"svcvtlt1_{d}[_mf8]_fpm", "d~>", "bh", MergeNone, "aarch64_sve_fp8_cvtlt1", [VerifyRuntimeMode, SetsFPMR]>; + def SVF2CVTLT : SInst<"svcvtlt2_{d}[_mf8]_fpm", "d~>", "bh", MergeNone, "aarch64_sve_fp8_cvtlt2", [VerifyRuntimeMode, SetsFPMR]>; +} diff --git a/clang/test/CodeGen/AArch64/fp8-intrinsics/acle_sve2_fp8_cvt.c b/clang/test/CodeGen/AArch64/fp8-intrinsics/acle_sve2_fp8_cvt.c new file mode 100644 index 00..c026b8aa216f32 --- /dev/null +++ b/clang/test/CodeGen/AArch64/fp8-intrinsics/acle_sve2_fp8_cvt.c @@ -0,0 +1,173 @@ +// NOTE: Assertions have been autogenerated by utils/update_cc_test_checks.py UTC_ARGS: --version 5 +// RUN: %clang_cc1-triple aarch64-none-linux-gnu -target-feature +sve -target-feature +sve2 -target-feature +fp8 -disable-O0-optnone -Werror -Wall -emit-llvm -o - %s | opt -S -p mem2reg,instcombine,tailcallelim | FileCheck %s +// RUN: %clang_cc1 -x c++ -triple aarch64-none-linux-gnu -target-feature +sme -target-feature +sme2 -target-feature +fp8 -disable-O0-optnone -Werror -Wall -emit-llvm -o - %s | opt -S -p mem2reg,instcombine,tailcallelim | FileCheck %s -check-prefix=CHECK-CXX + +// RUN: %clang_cc1 -DSME_OVERLOADED_FORMS-triple aarch64-none-linux-gnu -target-feature +sme -target-feature +sme2 -target-feature +fp8 -disable-O0-optnone -Werror -Wall -emit-llvm -o - %s | opt -S -p mem2reg,instcombine,tailcallelim | FileCheck %s +// RUN: %clang_cc1 -DSME_OVERLOADED_FORMS -x c++ -triple aarch64-none-linux-gnu -target-feature +sve -target-feature +sve2 -target-feature +fp8 -disable-O0-optnone -Werror -Wall -emit-llvm -o - %s | opt -S -p mem2reg,instcombine,tailcallelim | FileCheck %s -check-prefix=CHECK-CXX + +// RUN: %clang_cc1 -triple aarch64-none-linux-gnu -target-feature +sve -target-feature +sve2 -target-feature +fp8 -S -disable-O0-optnone -Werror -Wall -o /dev/null %s +// RUN: %clang_cc1 -triple aarch64-none-linux-gnu -target-feature +sme -target-feature +sme2 -target-feature +fp8 -S -disable-O0-optnone -Werror -Wall -o /dev/null %s + +// REQUIRES: aarch64-registered-target + +#ifdef __ARM_FEATURE_SME +#include +#else +#include +#endif + +#ifdef SVE_OVERLOADED_FORMS +#define SVE_ACLE_FUNC(A1,A2_UNUSED,A3) A1##A3 +#else +#define SVE_ACLE_FUNC(A1,A2,A3) A1##A2##A3 +#endif + +#ifdef __ARM_FEATURE_SME +#define STREAMING __arm_streaming +#else +#define STREAMING +#endif + +// CHECK-LABEL: define dso_local @test_svcvt1_bf16_mf8( +// CHECK-SAME: [[ZN:%.*]], i64 noundef [[FPM:%.*]]) #[[ATTR0:[0-9]+]] { +// CHECK-NEXT: [[ENTRY:.*:]] +// CHECK-NEXT:tail call void @llvm.aarch64.set.fpmr(i64 [[FPM]]) +// CHECK-NEXT:[[TMP0:%.*]] = tail call @llvm.aarch64.sve.fp8.cvt1.nxv8bf16( [[ZN]]) +// CHECK-NEXT:ret [[TMP0]] +// +// CHECK-CXX-LABEL: define dso_local @_Z20test_svcvt1_bf16_mf8u13__SVMfloat8_tm( +// CHECK-CXX-SAME: [[ZN:%.*]], i64 noundef [[FPM:%.*]]) #[[ATTR0:[0-9]+]] { +// CHECK-CXX-NEXT: [[ENTRY:.*:]] +// CHECK-CXX-NEXT:tail call void @llvm.aarch64.set.fpmr(i64 [[FPM]]) +// CHECK-CXX-NEXT:[[TMP0:%.*]] = tail call @llvm.aarch64.sve.fp8.cvt1.nxv8bf16( [[ZN]]) +// CHECK-CXX-NEXT:ret [[TMP0]] +// +svbfloat16_t test_svcvt1_bf16_mf8(svmfloat8_t zn, fpm_t fpm) STREAMING { + return SVE_ACLE_FUNC(svcvt1_bf16,_mf8,_fpm)(zn, fpm); +} + +// CHECK-LABEL: define dso_l
[clang] [llvm] [AArch64] Implement FP8 SVE intrinsics for widening conversions (PR #118123)
https://github.com/momchil-velikov created https://github.com/llvm/llvm-project/pull/118123 None >From 3a9643e6c2d61eae2e23df42c19b1410d4a5fcc5 Mon Sep 17 00:00:00 2001 From: Momchil Velikov Date: Thu, 21 Nov 2024 11:21:29 + Subject: [PATCH] FP8 CVT/CVTL --- clang/include/clang/Basic/arm_sve.td | 10 + .../fp8-intrinsics/acle_sve2_fp8_cvt.c| 173 ++ .../aarch64-sve2-intrinsics/acle_sve2_fp8.c | 24 +++ llvm/include/llvm/IR/IntrinsicsAArch64.td | 17 ++ .../lib/Target/AArch64/AArch64SVEInstrInfo.td | 16 +- llvm/lib/Target/AArch64/SVEInstrFormats.td| 7 +- .../test/CodeGen/AArch64/fp8-sve-cvt-cvtlt.ll | 78 7 files changed, 316 insertions(+), 9 deletions(-) create mode 100644 clang/test/CodeGen/AArch64/fp8-intrinsics/acle_sve2_fp8_cvt.c create mode 100644 clang/test/Sema/aarch64-sve2-intrinsics/acle_sve2_fp8.c create mode 100644 llvm/test/CodeGen/AArch64/fp8-sve-cvt-cvtlt.ll diff --git a/clang/include/clang/Basic/arm_sve.td b/clang/include/clang/Basic/arm_sve.td index b36e592042da0b..b9d8360843aa8e 100644 --- a/clang/include/clang/Basic/arm_sve.td +++ b/clang/include/clang/Basic/arm_sve.td @@ -2447,3 +2447,13 @@ let SVETargetGuard = "sve2,faminmax", SMETargetGuard = "sme2,faminmax" in { defm SVAMIN : SInstZPZZ<"svamin", "hfd", "aarch64_sve_famin", "aarch64_sve_famin_u">; defm SVAMAX : SInstZPZZ<"svamax", "hfd", "aarch64_sve_famax", "aarch64_sve_famax_u">; } + +let SVETargetGuard = "sve2,fp8", SMETargetGuard = "sme2,fp8" in { + // 8-bit floating-point convert to BFloat16/Float16 + def SVF1CVT : SInst<"svcvt1_{d}[_mf8]_fpm", "d~>", "bh", MergeNone, "aarch64_sve_fp8_cvt1", [VerifyRuntimeMode, SetsFPMR]>; + def SVF2CVT : SInst<"svcvt2_{d}[_mf8]_fpm", "d~>", "bh", MergeNone, "aarch64_sve_fp8_cvt2", [VerifyRuntimeMode, SetsFPMR]>; + + // 8-bit floating-point convert to BFloat16/Float16 (top) + def SVF1CVTLT : SInst<"svcvtlt1_{d}[_mf8]_fpm", "d~>", "bh", MergeNone, "aarch64_sve_fp8_cvtlt1", [VerifyRuntimeMode, SetsFPMR]>; + def SVF2CVTLT : SInst<"svcvtlt2_{d}[_mf8]_fpm", "d~>", "bh", MergeNone, "aarch64_sve_fp8_cvtlt2", [VerifyRuntimeMode, SetsFPMR]>; +} diff --git a/clang/test/CodeGen/AArch64/fp8-intrinsics/acle_sve2_fp8_cvt.c b/clang/test/CodeGen/AArch64/fp8-intrinsics/acle_sve2_fp8_cvt.c new file mode 100644 index 00..c026b8aa216f32 --- /dev/null +++ b/clang/test/CodeGen/AArch64/fp8-intrinsics/acle_sve2_fp8_cvt.c @@ -0,0 +1,173 @@ +// NOTE: Assertions have been autogenerated by utils/update_cc_test_checks.py UTC_ARGS: --version 5 +// RUN: %clang_cc1-triple aarch64-none-linux-gnu -target-feature +sve -target-feature +sve2 -target-feature +fp8 -disable-O0-optnone -Werror -Wall -emit-llvm -o - %s | opt -S -p mem2reg,instcombine,tailcallelim | FileCheck %s +// RUN: %clang_cc1 -x c++ -triple aarch64-none-linux-gnu -target-feature +sme -target-feature +sme2 -target-feature +fp8 -disable-O0-optnone -Werror -Wall -emit-llvm -o - %s | opt -S -p mem2reg,instcombine,tailcallelim | FileCheck %s -check-prefix=CHECK-CXX + +// RUN: %clang_cc1 -DSME_OVERLOADED_FORMS-triple aarch64-none-linux-gnu -target-feature +sme -target-feature +sme2 -target-feature +fp8 -disable-O0-optnone -Werror -Wall -emit-llvm -o - %s | opt -S -p mem2reg,instcombine,tailcallelim | FileCheck %s +// RUN: %clang_cc1 -DSME_OVERLOADED_FORMS -x c++ -triple aarch64-none-linux-gnu -target-feature +sve -target-feature +sve2 -target-feature +fp8 -disable-O0-optnone -Werror -Wall -emit-llvm -o - %s | opt -S -p mem2reg,instcombine,tailcallelim | FileCheck %s -check-prefix=CHECK-CXX + +// RUN: %clang_cc1 -triple aarch64-none-linux-gnu -target-feature +sve -target-feature +sve2 -target-feature +fp8 -S -disable-O0-optnone -Werror -Wall -o /dev/null %s +// RUN: %clang_cc1 -triple aarch64-none-linux-gnu -target-feature +sme -target-feature +sme2 -target-feature +fp8 -S -disable-O0-optnone -Werror -Wall -o /dev/null %s + +// REQUIRES: aarch64-registered-target + +#ifdef __ARM_FEATURE_SME +#include +#else +#include +#endif + +#ifdef SVE_OVERLOADED_FORMS +#define SVE_ACLE_FUNC(A1,A2_UNUSED,A3) A1##A3 +#else +#define SVE_ACLE_FUNC(A1,A2,A3) A1##A2##A3 +#endif + +#ifdef __ARM_FEATURE_SME +#define STREAMING __arm_streaming +#else +#define STREAMING +#endif + +// CHECK-LABEL: define dso_local @test_svcvt1_bf16_mf8( +// CHECK-SAME: [[ZN:%.*]], i64 noundef [[FPM:%.*]]) #[[ATTR0:[0-9]+]] { +// CHECK-NEXT: [[ENTRY:.*:]] +// CHECK-NEXT:tail call void @llvm.aarch64.set.fpmr(i64 [[FPM]]) +// CHECK-NEXT:[[TMP0:%.*]] = tail call @llvm.aarch64.sve.fp8.cvt1.nxv8bf16( [[ZN]]) +// CHECK-NEXT:ret [[TMP0]] +// +// CHECK-CXX-LABEL: define dso_local @_Z20test_svcvt1_bf16_mf8u13__SVMfloat8_tm( +// CHECK-CXX-SAME: [[ZN:%.*]], i64 noundef [[FPM:%.*]]) #[[ATTR0:[0-9]+]] { +// CHECK-CXX-NEXT: [[ENTRY:.*:]] +// CHECK-CXX-NEXT:tail call void @llvm.aarch64.set.fpmr(i64 [[FPM]]) +// CHECK-CXX-NEXT:[[TMP0:%.*]] = tail call @llv