[clang] [llvm] [AArch64] Implement FP8 SVE intrinsics for widening conversions (PR #118123)

2024-12-10 Thread Momchil Velikov via cfe-commits

https://github.com/momchil-velikov closed 
https://github.com/llvm/llvm-project/pull/118123
___
cfe-commits mailing list
cfe-commits@lists.llvm.org
https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits


[clang] [llvm] [AArch64] Implement FP8 SVE intrinsics for widening conversions (PR #118123)

2024-12-10 Thread Momchil Velikov via cfe-commits

https://github.com/momchil-velikov updated 
https://github.com/llvm/llvm-project/pull/118123

>From 359e611dc5a74d4e2dfdb19119eb8f83badb1f0b Mon Sep 17 00:00:00 2001
From: Momchil Velikov 
Date: Thu, 21 Nov 2024 11:21:29 +
Subject: [PATCH] [AArch64] Implement FP8 SVE intrinsics for widening
 conversions

This patch adds the following intrinsics:

* 8-bit floating-point convert to half-precision and BFloat16.

  // Variants are also available for: _bf16
  svfloat16_t svcvt1_f16[_mf8]_fpm(svmfloat8_t zn, fpm_t fpm);
  svfloat16_t svcvt2_f16[_mf8]_fpm(svmfloat8_t zn, fpm_t fpm);

* 8-bit floating-point convert to half-precision and BFloat16 (top).

  // Variants are also available for: _bf16
  svfloat16_t svcvtlt1_f16[_mf8]_fpm(svmfloat8_t zn, fpm_t fpm);
  svfloat16_t svcvtlt2_f16[_mf8]_fpm(svmfloat8_t zn, fpm_t fpm);
---
 clang/include/clang/Basic/arm_sve.td  |  20 +-
 .../fp8-intrinsics/acle_sve2_fp8_cvt.c| 173 ++
 .../aarch64-sve2-intrinsics/acle_sve2_fp8.c   |  24 +++
 llvm/include/llvm/IR/IntrinsicsAArch64.td |  13 +-
 .../lib/Target/AArch64/AArch64SVEInstrInfo.td |  16 +-
 llvm/lib/Target/AArch64/SVEInstrFormats.td|   7 +-
 .../test/CodeGen/AArch64/fp8-sve-cvt-cvtlt.ll |  78 
 7 files changed, 317 insertions(+), 14 deletions(-)
 create mode 100644 
clang/test/CodeGen/AArch64/fp8-intrinsics/acle_sve2_fp8_cvt.c
 create mode 100644 clang/test/Sema/aarch64-sve2-intrinsics/acle_sve2_fp8.c
 create mode 100644 llvm/test/CodeGen/AArch64/fp8-sve-cvt-cvtlt.ll

diff --git a/clang/include/clang/Basic/arm_sve.td 
b/clang/include/clang/Basic/arm_sve.td
index 9b8a8546b072c0..7b8ecf29a9de6e 100644
--- a/clang/include/clang/Basic/arm_sve.td
+++ b/clang/include/clang/Basic/arm_sve.td
@@ -2430,12 +2430,12 @@ let SVETargetGuard = InvalidMode, SMETargetGuard = 
"sme2,fp8" in {
   def FSCALE_X4 : Inst<"svscale[_{d}_x4]", "444.x", "fhd", MergeNone, 
"aarch64_sme_fp8_scale_x4", [IsStreaming],[]>;
 
   // Convert from FP8 to half-precision/BFloat16 multi-vector
-  def SVF1CVT : Inst<"svcvt1_{d}[_mf8]_x2_fpm", "2~>", "bh", MergeNone, 
"aarch64_sve_fp8_cvt1_x2", [IsStreaming, SetsFPMR], []>;
-  def SVF2CVT : Inst<"svcvt2_{d}[_mf8]_x2_fpm", "2~>", "bh", MergeNone, 
"aarch64_sve_fp8_cvt2_x2", [IsStreaming, SetsFPMR], []>;
+  def SVF1CVT_X2 : Inst<"svcvt1_{d}[_mf8]_x2_fpm", "2~>", "bh", MergeNone, 
"aarch64_sve_fp8_cvt1_x2", [IsStreaming, SetsFPMR], []>;
+  def SVF2CVT_X2 : Inst<"svcvt2_{d}[_mf8]_x2_fpm", "2~>", "bh", MergeNone, 
"aarch64_sve_fp8_cvt2_x2", [IsStreaming, SetsFPMR], []>;
 
   // Convert from FP8 to deinterleaved half-precision/BFloat16 multi-vector
-  def SVF1CVTL : Inst<"svcvtl1_{d}[_mf8]_x2_fpm",  "2~>", "bh", MergeNone, 
"aarch64_sve_fp8_cvtl1_x2",  [IsStreaming, SetsFPMR], []>;
-  def SVF2CVTL : Inst<"svcvtl2_{d}[_mf8]_x2_fpm",  "2~>", "bh", MergeNone, 
"aarch64_sve_fp8_cvtl2_x2",  [IsStreaming, SetsFPMR], []>;
+  def SVF1CVTL_X2 : Inst<"svcvtl1_{d}[_mf8]_x2_fpm",  "2~>", "bh", MergeNone, 
"aarch64_sve_fp8_cvtl1_x2",  [IsStreaming, SetsFPMR], []>;
+  def SVF2CVTL_X2 : Inst<"svcvtl2_{d}[_mf8]_x2_fpm",  "2~>", "bh", MergeNone, 
"aarch64_sve_fp8_cvtl2_x2",  [IsStreaming, SetsFPMR], []>;
 }
 
 let SVETargetGuard = "sve2p1", SMETargetGuard = "sme2" in {
@@ -2451,3 +2451,15 @@ let SVETargetGuard = "sve2,faminmax", SMETargetGuard = 
"sme2,faminmax" in {
   defm SVAMIN : SInstZPZZ<"svamin", "hfd", "aarch64_sve_famin", 
"aarch64_sve_famin_u">;
   defm SVAMAX : SInstZPZZ<"svamax", "hfd", "aarch64_sve_famax", 
"aarch64_sve_famax_u">;
 }
+
+let SVETargetGuard = "sve2,fp8", SMETargetGuard = "sme2,fp8" in {
+  // SVE FP8 widening conversions
+
+  // 8-bit floating-point convert to BFloat16/Float16
+  def SVF1CVT : SInst<"svcvt1_{d}[_mf8]_fpm", "d~>", "bh", MergeNone, 
"aarch64_sve_fp8_cvt1", [VerifyRuntimeMode, SetsFPMR]>;
+  def SVF2CVT : SInst<"svcvt2_{d}[_mf8]_fpm", "d~>", "bh", MergeNone, 
"aarch64_sve_fp8_cvt2", [VerifyRuntimeMode, SetsFPMR]>;
+
+  // 8-bit floating-point convert to BFloat16/Float16 (top)
+  def SVF1CVTLT : SInst<"svcvtlt1_{d}[_mf8]_fpm", "d~>", "bh", MergeNone, 
"aarch64_sve_fp8_cvtlt1", [VerifyRuntimeMode, SetsFPMR]>;
+  def SVF2CVTLT : SInst<"svcvtlt2_{d}[_mf8]_fpm", "d~>", "bh", MergeNone, 
"aarch64_sve_fp8_cvtlt2", [VerifyRuntimeMode, SetsFPMR]>;
+}
diff --git a/clang/test/CodeGen/AArch64/fp8-intrinsics/acle_sve2_fp8_cvt.c 
b/clang/test/CodeGen/AArch64/fp8-intrinsics/acle_sve2_fp8_cvt.c
new file mode 100644
index 00..c026b8aa216f32
--- /dev/null
+++ b/clang/test/CodeGen/AArch64/fp8-intrinsics/acle_sve2_fp8_cvt.c
@@ -0,0 +1,173 @@
+// NOTE: Assertions have been autogenerated by utils/update_cc_test_checks.py 
UTC_ARGS: --version 5
+// RUN: %clang_cc1-triple aarch64-none-linux-gnu -target-feature +sve 
-target-feature +sve2 -target-feature +fp8 -disable-O0-optnone -Werror -Wall 
-emit-llvm -o - %s | opt -S -p mem2reg,instcombine,tailcallelim | FileCheck %s
+// RUN: %clang_cc1 -x c++ -triple aarch64-none-linux-gnu -target-feature +

[clang] [llvm] [AArch64] Implement FP8 SVE intrinsics for widening conversions (PR #118123)

2024-12-09 Thread Momchil Velikov via cfe-commits

https://github.com/momchil-velikov edited 
https://github.com/llvm/llvm-project/pull/118123
___
cfe-commits mailing list
cfe-commits@lists.llvm.org
https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits


[clang] [llvm] [AArch64] Implement FP8 SVE intrinsics for widening conversions (PR #118123)

2024-12-09 Thread Momchil Velikov via cfe-commits


@@ -0,0 +1,78 @@
+; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py 
UTC_ARGS: --version 5
+; RUN: llc -mattr=+bf16,+sve2,+fp8 < %s | FileCheck %s

momchil-velikov wrote:

Removed.

https://github.com/llvm/llvm-project/pull/118123
___
cfe-commits mailing list
cfe-commits@lists.llvm.org
https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits


[clang] [llvm] [AArch64] Implement FP8 SVE intrinsics for widening conversions (PR #118123)

2024-12-09 Thread Momchil Velikov via cfe-commits

https://github.com/momchil-velikov updated 
https://github.com/llvm/llvm-project/pull/118123

>From cdd86588c639f818909964ab49b9972da6869cb3 Mon Sep 17 00:00:00 2001
From: Momchil Velikov 
Date: Thu, 21 Nov 2024 11:21:29 +
Subject: [PATCH] [AArch64] Implement FP8 SVE intrinsics for widening
 conversions

This patch adds the following intrinsics:

* 8-bit floating-point convert to half-precision and BFloat16.

  // Variants are also available for: _bf16
  svfloat16_t svcvt1_f16[_mf8]_fpm(svmfloat8_t zn, fpm_t fpm);
  svfloat16_t svcvt2_f16[_mf8]_fpm(svmfloat8_t zn, fpm_t fpm);

* 8-bit floating-point convert to half-precision and BFloat16 (top).

  // Variants are also available for: _bf16
  svfloat16_t svcvtlt1_f16[_mf8]_fpm(svmfloat8_t zn, fpm_t fpm);
  svfloat16_t svcvtlt2_f16[_mf8]_fpm(svmfloat8_t zn, fpm_t fpm);
---
 clang/include/clang/Basic/arm_sve.td  |  20 +-
 .../fp8-intrinsics/acle_sve2_fp8_cvt.c| 173 ++
 .../aarch64-sve2-intrinsics/acle_sve2_fp8.c   |  24 +++
 llvm/include/llvm/IR/IntrinsicsAArch64.td |  13 +-
 .../lib/Target/AArch64/AArch64SVEInstrInfo.td |  16 +-
 llvm/lib/Target/AArch64/SVEInstrFormats.td|   7 +-
 .../test/CodeGen/AArch64/fp8-sve-cvt-cvtlt.ll |  78 
 7 files changed, 317 insertions(+), 14 deletions(-)
 create mode 100644 
clang/test/CodeGen/AArch64/fp8-intrinsics/acle_sve2_fp8_cvt.c
 create mode 100644 clang/test/Sema/aarch64-sve2-intrinsics/acle_sve2_fp8.c
 create mode 100644 llvm/test/CodeGen/AArch64/fp8-sve-cvt-cvtlt.ll

diff --git a/clang/include/clang/Basic/arm_sve.td 
b/clang/include/clang/Basic/arm_sve.td
index 9b8a8546b072c0..7b8ecf29a9de6e 100644
--- a/clang/include/clang/Basic/arm_sve.td
+++ b/clang/include/clang/Basic/arm_sve.td
@@ -2430,12 +2430,12 @@ let SVETargetGuard = InvalidMode, SMETargetGuard = 
"sme2,fp8" in {
   def FSCALE_X4 : Inst<"svscale[_{d}_x4]", "444.x", "fhd", MergeNone, 
"aarch64_sme_fp8_scale_x4", [IsStreaming],[]>;
 
   // Convert from FP8 to half-precision/BFloat16 multi-vector
-  def SVF1CVT : Inst<"svcvt1_{d}[_mf8]_x2_fpm", "2~>", "bh", MergeNone, 
"aarch64_sve_fp8_cvt1_x2", [IsStreaming, SetsFPMR], []>;
-  def SVF2CVT : Inst<"svcvt2_{d}[_mf8]_x2_fpm", "2~>", "bh", MergeNone, 
"aarch64_sve_fp8_cvt2_x2", [IsStreaming, SetsFPMR], []>;
+  def SVF1CVT_X2 : Inst<"svcvt1_{d}[_mf8]_x2_fpm", "2~>", "bh", MergeNone, 
"aarch64_sve_fp8_cvt1_x2", [IsStreaming, SetsFPMR], []>;
+  def SVF2CVT_X2 : Inst<"svcvt2_{d}[_mf8]_x2_fpm", "2~>", "bh", MergeNone, 
"aarch64_sve_fp8_cvt2_x2", [IsStreaming, SetsFPMR], []>;
 
   // Convert from FP8 to deinterleaved half-precision/BFloat16 multi-vector
-  def SVF1CVTL : Inst<"svcvtl1_{d}[_mf8]_x2_fpm",  "2~>", "bh", MergeNone, 
"aarch64_sve_fp8_cvtl1_x2",  [IsStreaming, SetsFPMR], []>;
-  def SVF2CVTL : Inst<"svcvtl2_{d}[_mf8]_x2_fpm",  "2~>", "bh", MergeNone, 
"aarch64_sve_fp8_cvtl2_x2",  [IsStreaming, SetsFPMR], []>;
+  def SVF1CVTL_X2 : Inst<"svcvtl1_{d}[_mf8]_x2_fpm",  "2~>", "bh", MergeNone, 
"aarch64_sve_fp8_cvtl1_x2",  [IsStreaming, SetsFPMR], []>;
+  def SVF2CVTL_X2 : Inst<"svcvtl2_{d}[_mf8]_x2_fpm",  "2~>", "bh", MergeNone, 
"aarch64_sve_fp8_cvtl2_x2",  [IsStreaming, SetsFPMR], []>;
 }
 
 let SVETargetGuard = "sve2p1", SMETargetGuard = "sme2" in {
@@ -2451,3 +2451,15 @@ let SVETargetGuard = "sve2,faminmax", SMETargetGuard = 
"sme2,faminmax" in {
   defm SVAMIN : SInstZPZZ<"svamin", "hfd", "aarch64_sve_famin", 
"aarch64_sve_famin_u">;
   defm SVAMAX : SInstZPZZ<"svamax", "hfd", "aarch64_sve_famax", 
"aarch64_sve_famax_u">;
 }
+
+let SVETargetGuard = "sve2,fp8", SMETargetGuard = "sme2,fp8" in {
+  // SVE FP8 widening conversions
+
+  // 8-bit floating-point convert to BFloat16/Float16
+  def SVF1CVT : SInst<"svcvt1_{d}[_mf8]_fpm", "d~>", "bh", MergeNone, 
"aarch64_sve_fp8_cvt1", [VerifyRuntimeMode, SetsFPMR]>;
+  def SVF2CVT : SInst<"svcvt2_{d}[_mf8]_fpm", "d~>", "bh", MergeNone, 
"aarch64_sve_fp8_cvt2", [VerifyRuntimeMode, SetsFPMR]>;
+
+  // 8-bit floating-point convert to BFloat16/Float16 (top)
+  def SVF1CVTLT : SInst<"svcvtlt1_{d}[_mf8]_fpm", "d~>", "bh", MergeNone, 
"aarch64_sve_fp8_cvtlt1", [VerifyRuntimeMode, SetsFPMR]>;
+  def SVF2CVTLT : SInst<"svcvtlt2_{d}[_mf8]_fpm", "d~>", "bh", MergeNone, 
"aarch64_sve_fp8_cvtlt2", [VerifyRuntimeMode, SetsFPMR]>;
+}
diff --git a/clang/test/CodeGen/AArch64/fp8-intrinsics/acle_sve2_fp8_cvt.c 
b/clang/test/CodeGen/AArch64/fp8-intrinsics/acle_sve2_fp8_cvt.c
new file mode 100644
index 00..c026b8aa216f32
--- /dev/null
+++ b/clang/test/CodeGen/AArch64/fp8-intrinsics/acle_sve2_fp8_cvt.c
@@ -0,0 +1,173 @@
+// NOTE: Assertions have been autogenerated by utils/update_cc_test_checks.py 
UTC_ARGS: --version 5
+// RUN: %clang_cc1-triple aarch64-none-linux-gnu -target-feature +sve 
-target-feature +sve2 -target-feature +fp8 -disable-O0-optnone -Werror -Wall 
-emit-llvm -o - %s | opt -S -p mem2reg,instcombine,tailcallelim | FileCheck %s
+// RUN: %clang_cc1 -x c++ -triple aarch64-none-linux-gnu -target-feature +

[clang] [llvm] [AArch64] Implement FP8 SVE intrinsics for widening conversions (PR #118123)

2024-12-03 Thread via cfe-commits

https://github.com/CarolineConcatto approved this pull request.

Momchil,
Thank you for the patch. It LGTM!
I just would like before you push the patch to update the commit message with 
the prototypes you are implementing in this patch. So we can check with CVT it 
has implemented without having to look into the patch.


https://github.com/llvm/llvm-project/pull/118123
___
cfe-commits mailing list
cfe-commits@lists.llvm.org
https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits


[clang] [llvm] [AArch64] Implement FP8 SVE intrinsics for widening conversions (PR #118123)

2024-12-02 Thread via cfe-commits


@@ -3864,3 +3864,20 @@ def int_aarch64_sve_famin_u : 
AdvSIMD_Pred2VectorArg_Intrinsic;
 // Neon absolute maximum and minimum
 def int_aarch64_neon_famax :  AdvSIMD_2VectorArg_Intrinsic;
 def int_aarch64_neon_famin :  AdvSIMD_2VectorArg_Intrinsic;
+
+//
+// FP8 intrinsics

SpencerAbson wrote:

Would you mind moving the `SME2_FP8_CVT_X2_Single_Intrinsic` intrinics I've 
merged (and perhaps the fp8 `fscale` ones) down to this section? I'll follow 
this convention with my in-flight patches. 

https://github.com/llvm/llvm-project/pull/118123
___
cfe-commits mailing list
cfe-commits@lists.llvm.org
https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits


[clang] [llvm] [AArch64] Implement FP8 SVE intrinsics for widening conversions (PR #118123)

2024-12-02 Thread via cfe-commits


@@ -0,0 +1,78 @@
+; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py 
UTC_ARGS: --version 5
+; RUN: llc -mattr=+bf16,+sve2,+fp8 < %s | FileCheck %s

SpencerAbson wrote:

nit: I'm not sure we need `+bf16` in these.

https://github.com/llvm/llvm-project/pull/118123
___
cfe-commits mailing list
cfe-commits@lists.llvm.org
https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits


[clang] [llvm] [AArch64] Implement FP8 SVE intrinsics for widening conversions (PR #118123)

2024-11-29 Thread via cfe-commits

llvmbot wrote:




@llvm/pr-subscribers-llvm-ir

Author: Momchil Velikov (momchil-velikov)


Changes



---
Full diff: https://github.com/llvm/llvm-project/pull/118123.diff


7 Files Affected:

- (modified) clang/include/clang/Basic/arm_sve.td (+10) 
- (added) clang/test/CodeGen/AArch64/fp8-intrinsics/acle_sve2_fp8_cvt.c (+173) 
- (added) clang/test/Sema/aarch64-sve2-intrinsics/acle_sve2_fp8.c (+24) 
- (modified) llvm/include/llvm/IR/IntrinsicsAArch64.td (+17) 
- (modified) llvm/lib/Target/AArch64/AArch64SVEInstrInfo.td (+8-8) 
- (modified) llvm/lib/Target/AArch64/SVEInstrFormats.td (+6-1) 
- (added) llvm/test/CodeGen/AArch64/fp8-sve-cvt-cvtlt.ll (+78) 


``diff
diff --git a/clang/include/clang/Basic/arm_sve.td 
b/clang/include/clang/Basic/arm_sve.td
index b36e592042da0b..b9d8360843aa8e 100644
--- a/clang/include/clang/Basic/arm_sve.td
+++ b/clang/include/clang/Basic/arm_sve.td
@@ -2447,3 +2447,13 @@ let SVETargetGuard = "sve2,faminmax", SMETargetGuard = 
"sme2,faminmax" in {
   defm SVAMIN : SInstZPZZ<"svamin", "hfd", "aarch64_sve_famin", 
"aarch64_sve_famin_u">;
   defm SVAMAX : SInstZPZZ<"svamax", "hfd", "aarch64_sve_famax", 
"aarch64_sve_famax_u">;
 }
+
+let SVETargetGuard = "sve2,fp8", SMETargetGuard = "sme2,fp8" in {
+  // 8-bit floating-point convert to BFloat16/Float16
+  def SVF1CVT : SInst<"svcvt1_{d}[_mf8]_fpm", "d~>", "bh", MergeNone, 
"aarch64_sve_fp8_cvt1", [VerifyRuntimeMode, SetsFPMR]>;
+  def SVF2CVT : SInst<"svcvt2_{d}[_mf8]_fpm", "d~>", "bh", MergeNone, 
"aarch64_sve_fp8_cvt2", [VerifyRuntimeMode, SetsFPMR]>;
+
+  // 8-bit floating-point convert to BFloat16/Float16 (top)
+  def SVF1CVTLT : SInst<"svcvtlt1_{d}[_mf8]_fpm", "d~>", "bh", MergeNone, 
"aarch64_sve_fp8_cvtlt1", [VerifyRuntimeMode, SetsFPMR]>;
+  def SVF2CVTLT : SInst<"svcvtlt2_{d}[_mf8]_fpm", "d~>", "bh", MergeNone, 
"aarch64_sve_fp8_cvtlt2", [VerifyRuntimeMode, SetsFPMR]>;
+}
diff --git a/clang/test/CodeGen/AArch64/fp8-intrinsics/acle_sve2_fp8_cvt.c 
b/clang/test/CodeGen/AArch64/fp8-intrinsics/acle_sve2_fp8_cvt.c
new file mode 100644
index 00..c026b8aa216f32
--- /dev/null
+++ b/clang/test/CodeGen/AArch64/fp8-intrinsics/acle_sve2_fp8_cvt.c
@@ -0,0 +1,173 @@
+// NOTE: Assertions have been autogenerated by utils/update_cc_test_checks.py 
UTC_ARGS: --version 5
+// RUN: %clang_cc1-triple aarch64-none-linux-gnu -target-feature +sve 
-target-feature +sve2 -target-feature +fp8 -disable-O0-optnone -Werror -Wall 
-emit-llvm -o - %s | opt -S -p mem2reg,instcombine,tailcallelim | FileCheck %s
+// RUN: %clang_cc1 -x c++ -triple aarch64-none-linux-gnu -target-feature +sme 
-target-feature +sme2 -target-feature +fp8 -disable-O0-optnone -Werror -Wall 
-emit-llvm -o - %s | opt -S -p mem2reg,instcombine,tailcallelim | FileCheck %s 
-check-prefix=CHECK-CXX
+
+// RUN: %clang_cc1 -DSME_OVERLOADED_FORMS-triple 
aarch64-none-linux-gnu -target-feature +sme -target-feature +sme2 
-target-feature +fp8 -disable-O0-optnone -Werror -Wall -emit-llvm -o - %s | opt 
-S -p mem2reg,instcombine,tailcallelim | FileCheck %s
+// RUN: %clang_cc1 -DSME_OVERLOADED_FORMS -x c++ -triple 
aarch64-none-linux-gnu -target-feature +sve -target-feature +sve2 
-target-feature +fp8 -disable-O0-optnone -Werror -Wall -emit-llvm -o - %s | opt 
-S -p mem2reg,instcombine,tailcallelim | FileCheck %s -check-prefix=CHECK-CXX
+
+// RUN: %clang_cc1 -triple aarch64-none-linux-gnu -target-feature +sve 
-target-feature +sve2 -target-feature +fp8 -S -disable-O0-optnone -Werror -Wall 
-o /dev/null %s
+// RUN: %clang_cc1 -triple aarch64-none-linux-gnu -target-feature +sme 
-target-feature +sme2 -target-feature +fp8 -S -disable-O0-optnone -Werror -Wall 
-o /dev/null %s
+
+// REQUIRES: aarch64-registered-target
+
+#ifdef __ARM_FEATURE_SME
+#include 
+#else
+#include 
+#endif
+
+#ifdef SVE_OVERLOADED_FORMS
+#define SVE_ACLE_FUNC(A1,A2_UNUSED,A3) A1##A3
+#else
+#define SVE_ACLE_FUNC(A1,A2,A3) A1##A2##A3
+#endif
+
+#ifdef __ARM_FEATURE_SME
+#define STREAMING __arm_streaming
+#else
+#define STREAMING
+#endif
+
+// CHECK-LABEL: define dso_local  @test_svcvt1_bf16_mf8(
+// CHECK-SAME:  [[ZN:%.*]], i64 noundef [[FPM:%.*]]) 
#[[ATTR0:[0-9]+]] {
+// CHECK-NEXT:  [[ENTRY:.*:]]
+// CHECK-NEXT:tail call void @llvm.aarch64.set.fpmr(i64 [[FPM]])
+// CHECK-NEXT:[[TMP0:%.*]] = tail call  
@llvm.aarch64.sve.fp8.cvt1.nxv8bf16( [[ZN]])
+// CHECK-NEXT:ret  [[TMP0]]
+//
+// CHECK-CXX-LABEL: define dso_local  
@_Z20test_svcvt1_bf16_mf8u13__SVMfloat8_tm(
+// CHECK-CXX-SAME:  [[ZN:%.*]], i64 noundef [[FPM:%.*]]) 
#[[ATTR0:[0-9]+]] {
+// CHECK-CXX-NEXT:  [[ENTRY:.*:]]
+// CHECK-CXX-NEXT:tail call void @llvm.aarch64.set.fpmr(i64 [[FPM]])
+// CHECK-CXX-NEXT:[[TMP0:%.*]] = tail call  
@llvm.aarch64.sve.fp8.cvt1.nxv8bf16( [[ZN]])
+// CHECK-CXX-NEXT:ret  [[TMP0]]
+//
+svbfloat16_t test_svcvt1_bf16_mf8(svmfloat8_t zn, fpm_t fpm) STREAMING {
+  return SVE_ACLE_FUNC(svcvt1_bf16,_mf8,_fpm)(zn, fpm);
+}
+
+// CHECK-LABEL: define dso_local  @test_svcvt2_bf16_mf8(
+// CH

[clang] [llvm] [AArch64] Implement FP8 SVE intrinsics for widening conversions (PR #118123)

2024-11-29 Thread via cfe-commits

llvmbot wrote:



@llvm/pr-subscribers-clang

@llvm/pr-subscribers-backend-aarch64

Author: Momchil Velikov (momchil-velikov)


Changes



---
Full diff: https://github.com/llvm/llvm-project/pull/118123.diff


7 Files Affected:

- (modified) clang/include/clang/Basic/arm_sve.td (+10) 
- (added) clang/test/CodeGen/AArch64/fp8-intrinsics/acle_sve2_fp8_cvt.c (+173) 
- (added) clang/test/Sema/aarch64-sve2-intrinsics/acle_sve2_fp8.c (+24) 
- (modified) llvm/include/llvm/IR/IntrinsicsAArch64.td (+17) 
- (modified) llvm/lib/Target/AArch64/AArch64SVEInstrInfo.td (+8-8) 
- (modified) llvm/lib/Target/AArch64/SVEInstrFormats.td (+6-1) 
- (added) llvm/test/CodeGen/AArch64/fp8-sve-cvt-cvtlt.ll (+78) 


``diff
diff --git a/clang/include/clang/Basic/arm_sve.td 
b/clang/include/clang/Basic/arm_sve.td
index b36e592042da0b..b9d8360843aa8e 100644
--- a/clang/include/clang/Basic/arm_sve.td
+++ b/clang/include/clang/Basic/arm_sve.td
@@ -2447,3 +2447,13 @@ let SVETargetGuard = "sve2,faminmax", SMETargetGuard = 
"sme2,faminmax" in {
   defm SVAMIN : SInstZPZZ<"svamin", "hfd", "aarch64_sve_famin", 
"aarch64_sve_famin_u">;
   defm SVAMAX : SInstZPZZ<"svamax", "hfd", "aarch64_sve_famax", 
"aarch64_sve_famax_u">;
 }
+
+let SVETargetGuard = "sve2,fp8", SMETargetGuard = "sme2,fp8" in {
+  // 8-bit floating-point convert to BFloat16/Float16
+  def SVF1CVT : SInst<"svcvt1_{d}[_mf8]_fpm", "d~>", "bh", MergeNone, 
"aarch64_sve_fp8_cvt1", [VerifyRuntimeMode, SetsFPMR]>;
+  def SVF2CVT : SInst<"svcvt2_{d}[_mf8]_fpm", "d~>", "bh", MergeNone, 
"aarch64_sve_fp8_cvt2", [VerifyRuntimeMode, SetsFPMR]>;
+
+  // 8-bit floating-point convert to BFloat16/Float16 (top)
+  def SVF1CVTLT : SInst<"svcvtlt1_{d}[_mf8]_fpm", "d~>", "bh", MergeNone, 
"aarch64_sve_fp8_cvtlt1", [VerifyRuntimeMode, SetsFPMR]>;
+  def SVF2CVTLT : SInst<"svcvtlt2_{d}[_mf8]_fpm", "d~>", "bh", MergeNone, 
"aarch64_sve_fp8_cvtlt2", [VerifyRuntimeMode, SetsFPMR]>;
+}
diff --git a/clang/test/CodeGen/AArch64/fp8-intrinsics/acle_sve2_fp8_cvt.c 
b/clang/test/CodeGen/AArch64/fp8-intrinsics/acle_sve2_fp8_cvt.c
new file mode 100644
index 00..c026b8aa216f32
--- /dev/null
+++ b/clang/test/CodeGen/AArch64/fp8-intrinsics/acle_sve2_fp8_cvt.c
@@ -0,0 +1,173 @@
+// NOTE: Assertions have been autogenerated by utils/update_cc_test_checks.py 
UTC_ARGS: --version 5
+// RUN: %clang_cc1-triple aarch64-none-linux-gnu -target-feature +sve 
-target-feature +sve2 -target-feature +fp8 -disable-O0-optnone -Werror -Wall 
-emit-llvm -o - %s | opt -S -p mem2reg,instcombine,tailcallelim | FileCheck %s
+// RUN: %clang_cc1 -x c++ -triple aarch64-none-linux-gnu -target-feature +sme 
-target-feature +sme2 -target-feature +fp8 -disable-O0-optnone -Werror -Wall 
-emit-llvm -o - %s | opt -S -p mem2reg,instcombine,tailcallelim | FileCheck %s 
-check-prefix=CHECK-CXX
+
+// RUN: %clang_cc1 -DSME_OVERLOADED_FORMS-triple 
aarch64-none-linux-gnu -target-feature +sme -target-feature +sme2 
-target-feature +fp8 -disable-O0-optnone -Werror -Wall -emit-llvm -o - %s | opt 
-S -p mem2reg,instcombine,tailcallelim | FileCheck %s
+// RUN: %clang_cc1 -DSME_OVERLOADED_FORMS -x c++ -triple 
aarch64-none-linux-gnu -target-feature +sve -target-feature +sve2 
-target-feature +fp8 -disable-O0-optnone -Werror -Wall -emit-llvm -o - %s | opt 
-S -p mem2reg,instcombine,tailcallelim | FileCheck %s -check-prefix=CHECK-CXX
+
+// RUN: %clang_cc1 -triple aarch64-none-linux-gnu -target-feature +sve 
-target-feature +sve2 -target-feature +fp8 -S -disable-O0-optnone -Werror -Wall 
-o /dev/null %s
+// RUN: %clang_cc1 -triple aarch64-none-linux-gnu -target-feature +sme 
-target-feature +sme2 -target-feature +fp8 -S -disable-O0-optnone -Werror -Wall 
-o /dev/null %s
+
+// REQUIRES: aarch64-registered-target
+
+#ifdef __ARM_FEATURE_SME
+#include 
+#else
+#include 
+#endif
+
+#ifdef SVE_OVERLOADED_FORMS
+#define SVE_ACLE_FUNC(A1,A2_UNUSED,A3) A1##A3
+#else
+#define SVE_ACLE_FUNC(A1,A2,A3) A1##A2##A3
+#endif
+
+#ifdef __ARM_FEATURE_SME
+#define STREAMING __arm_streaming
+#else
+#define STREAMING
+#endif
+
+// CHECK-LABEL: define dso_local  @test_svcvt1_bf16_mf8(
+// CHECK-SAME:  [[ZN:%.*]], i64 noundef [[FPM:%.*]]) 
#[[ATTR0:[0-9]+]] {
+// CHECK-NEXT:  [[ENTRY:.*:]]
+// CHECK-NEXT:tail call void @llvm.aarch64.set.fpmr(i64 [[FPM]])
+// CHECK-NEXT:[[TMP0:%.*]] = tail call  
@llvm.aarch64.sve.fp8.cvt1.nxv8bf16( [[ZN]])
+// CHECK-NEXT:ret  [[TMP0]]
+//
+// CHECK-CXX-LABEL: define dso_local  
@_Z20test_svcvt1_bf16_mf8u13__SVMfloat8_tm(
+// CHECK-CXX-SAME:  [[ZN:%.*]], i64 noundef [[FPM:%.*]]) 
#[[ATTR0:[0-9]+]] {
+// CHECK-CXX-NEXT:  [[ENTRY:.*:]]
+// CHECK-CXX-NEXT:tail call void @llvm.aarch64.set.fpmr(i64 [[FPM]])
+// CHECK-CXX-NEXT:[[TMP0:%.*]] = tail call  
@llvm.aarch64.sve.fp8.cvt1.nxv8bf16( [[ZN]])
+// CHECK-CXX-NEXT:ret  [[TMP0]]
+//
+svbfloat16_t test_svcvt1_bf16_mf8(svmfloat8_t zn, fpm_t fpm) STREAMING {
+  return SVE_ACLE_FUNC(svcvt1_bf16,_mf8,_fpm)(zn, fpm);
+}
+
+// CHECK-LABEL: define dso_l

[clang] [llvm] [AArch64] Implement FP8 SVE intrinsics for widening conversions (PR #118123)

2024-11-29 Thread Momchil Velikov via cfe-commits

https://github.com/momchil-velikov created 
https://github.com/llvm/llvm-project/pull/118123

None

>From 3a9643e6c2d61eae2e23df42c19b1410d4a5fcc5 Mon Sep 17 00:00:00 2001
From: Momchil Velikov 
Date: Thu, 21 Nov 2024 11:21:29 +
Subject: [PATCH] FP8 CVT/CVTL

---
 clang/include/clang/Basic/arm_sve.td  |  10 +
 .../fp8-intrinsics/acle_sve2_fp8_cvt.c| 173 ++
 .../aarch64-sve2-intrinsics/acle_sve2_fp8.c   |  24 +++
 llvm/include/llvm/IR/IntrinsicsAArch64.td |  17 ++
 .../lib/Target/AArch64/AArch64SVEInstrInfo.td |  16 +-
 llvm/lib/Target/AArch64/SVEInstrFormats.td|   7 +-
 .../test/CodeGen/AArch64/fp8-sve-cvt-cvtlt.ll |  78 
 7 files changed, 316 insertions(+), 9 deletions(-)
 create mode 100644 
clang/test/CodeGen/AArch64/fp8-intrinsics/acle_sve2_fp8_cvt.c
 create mode 100644 clang/test/Sema/aarch64-sve2-intrinsics/acle_sve2_fp8.c
 create mode 100644 llvm/test/CodeGen/AArch64/fp8-sve-cvt-cvtlt.ll

diff --git a/clang/include/clang/Basic/arm_sve.td 
b/clang/include/clang/Basic/arm_sve.td
index b36e592042da0b..b9d8360843aa8e 100644
--- a/clang/include/clang/Basic/arm_sve.td
+++ b/clang/include/clang/Basic/arm_sve.td
@@ -2447,3 +2447,13 @@ let SVETargetGuard = "sve2,faminmax", SMETargetGuard = 
"sme2,faminmax" in {
   defm SVAMIN : SInstZPZZ<"svamin", "hfd", "aarch64_sve_famin", 
"aarch64_sve_famin_u">;
   defm SVAMAX : SInstZPZZ<"svamax", "hfd", "aarch64_sve_famax", 
"aarch64_sve_famax_u">;
 }
+
+let SVETargetGuard = "sve2,fp8", SMETargetGuard = "sme2,fp8" in {
+  // 8-bit floating-point convert to BFloat16/Float16
+  def SVF1CVT : SInst<"svcvt1_{d}[_mf8]_fpm", "d~>", "bh", MergeNone, 
"aarch64_sve_fp8_cvt1", [VerifyRuntimeMode, SetsFPMR]>;
+  def SVF2CVT : SInst<"svcvt2_{d}[_mf8]_fpm", "d~>", "bh", MergeNone, 
"aarch64_sve_fp8_cvt2", [VerifyRuntimeMode, SetsFPMR]>;
+
+  // 8-bit floating-point convert to BFloat16/Float16 (top)
+  def SVF1CVTLT : SInst<"svcvtlt1_{d}[_mf8]_fpm", "d~>", "bh", MergeNone, 
"aarch64_sve_fp8_cvtlt1", [VerifyRuntimeMode, SetsFPMR]>;
+  def SVF2CVTLT : SInst<"svcvtlt2_{d}[_mf8]_fpm", "d~>", "bh", MergeNone, 
"aarch64_sve_fp8_cvtlt2", [VerifyRuntimeMode, SetsFPMR]>;
+}
diff --git a/clang/test/CodeGen/AArch64/fp8-intrinsics/acle_sve2_fp8_cvt.c 
b/clang/test/CodeGen/AArch64/fp8-intrinsics/acle_sve2_fp8_cvt.c
new file mode 100644
index 00..c026b8aa216f32
--- /dev/null
+++ b/clang/test/CodeGen/AArch64/fp8-intrinsics/acle_sve2_fp8_cvt.c
@@ -0,0 +1,173 @@
+// NOTE: Assertions have been autogenerated by utils/update_cc_test_checks.py 
UTC_ARGS: --version 5
+// RUN: %clang_cc1-triple aarch64-none-linux-gnu -target-feature +sve 
-target-feature +sve2 -target-feature +fp8 -disable-O0-optnone -Werror -Wall 
-emit-llvm -o - %s | opt -S -p mem2reg,instcombine,tailcallelim | FileCheck %s
+// RUN: %clang_cc1 -x c++ -triple aarch64-none-linux-gnu -target-feature +sme 
-target-feature +sme2 -target-feature +fp8 -disable-O0-optnone -Werror -Wall 
-emit-llvm -o - %s | opt -S -p mem2reg,instcombine,tailcallelim | FileCheck %s 
-check-prefix=CHECK-CXX
+
+// RUN: %clang_cc1 -DSME_OVERLOADED_FORMS-triple 
aarch64-none-linux-gnu -target-feature +sme -target-feature +sme2 
-target-feature +fp8 -disable-O0-optnone -Werror -Wall -emit-llvm -o - %s | opt 
-S -p mem2reg,instcombine,tailcallelim | FileCheck %s
+// RUN: %clang_cc1 -DSME_OVERLOADED_FORMS -x c++ -triple 
aarch64-none-linux-gnu -target-feature +sve -target-feature +sve2 
-target-feature +fp8 -disable-O0-optnone -Werror -Wall -emit-llvm -o - %s | opt 
-S -p mem2reg,instcombine,tailcallelim | FileCheck %s -check-prefix=CHECK-CXX
+
+// RUN: %clang_cc1 -triple aarch64-none-linux-gnu -target-feature +sve 
-target-feature +sve2 -target-feature +fp8 -S -disable-O0-optnone -Werror -Wall 
-o /dev/null %s
+// RUN: %clang_cc1 -triple aarch64-none-linux-gnu -target-feature +sme 
-target-feature +sme2 -target-feature +fp8 -S -disable-O0-optnone -Werror -Wall 
-o /dev/null %s
+
+// REQUIRES: aarch64-registered-target
+
+#ifdef __ARM_FEATURE_SME
+#include 
+#else
+#include 
+#endif
+
+#ifdef SVE_OVERLOADED_FORMS
+#define SVE_ACLE_FUNC(A1,A2_UNUSED,A3) A1##A3
+#else
+#define SVE_ACLE_FUNC(A1,A2,A3) A1##A2##A3
+#endif
+
+#ifdef __ARM_FEATURE_SME
+#define STREAMING __arm_streaming
+#else
+#define STREAMING
+#endif
+
+// CHECK-LABEL: define dso_local  @test_svcvt1_bf16_mf8(
+// CHECK-SAME:  [[ZN:%.*]], i64 noundef [[FPM:%.*]]) 
#[[ATTR0:[0-9]+]] {
+// CHECK-NEXT:  [[ENTRY:.*:]]
+// CHECK-NEXT:tail call void @llvm.aarch64.set.fpmr(i64 [[FPM]])
+// CHECK-NEXT:[[TMP0:%.*]] = tail call  
@llvm.aarch64.sve.fp8.cvt1.nxv8bf16( [[ZN]])
+// CHECK-NEXT:ret  [[TMP0]]
+//
+// CHECK-CXX-LABEL: define dso_local  
@_Z20test_svcvt1_bf16_mf8u13__SVMfloat8_tm(
+// CHECK-CXX-SAME:  [[ZN:%.*]], i64 noundef [[FPM:%.*]]) 
#[[ATTR0:[0-9]+]] {
+// CHECK-CXX-NEXT:  [[ENTRY:.*:]]
+// CHECK-CXX-NEXT:tail call void @llvm.aarch64.set.fpmr(i64 [[FPM]])
+// CHECK-CXX-NEXT:[[TMP0:%.*]] = tail call  
@llv