[clang] [llvm] [AArch64] Implement FP8 SVE Intrinsics for narrowing conversions (PR #118124)

2024-12-11 Thread Momchil Velikov via cfe-commits

https://github.com/momchil-velikov closed 
https://github.com/llvm/llvm-project/pull/118124
___
cfe-commits mailing list
cfe-commits@lists.llvm.org
https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits


[clang] [llvm] [AArch64] Implement FP8 SVE Intrinsics for narrowing conversions (PR #118124)

2024-12-10 Thread Momchil Velikov via cfe-commits

https://github.com/momchil-velikov updated 
https://github.com/llvm/llvm-project/pull/118124

>From 6f268d4a80a8994855c99679ce1c66c11be8e357 Mon Sep 17 00:00:00 2001
From: Momchil Velikov 
Date: Mon, 25 Nov 2024 09:47:41 +
Subject: [PATCH] [AArch64] Implement FP8 SVE Intrinsics for narrowing
 conversions

* Half-precision and BFloat16 convert, narrow, and interleave to 8-bit 
floating-point.

  // Variant is also available for: _bf16_x2
  svmfloat8_t svcvtn_mf8[_f16_x2]_fpm(svfloat16x2_t zn, fpm_t fpm);

* Single-precision convert, narrow, and interleave to 8-bit floating-point (top 
and bottom).

  svmfloat8_t svcvtnt_mf8[_f32_x2]_fpm(svmfloat8_t zd, svfloat32x2_t zn, fpm_t 
fpm);
  svmfloat8_t svcvtnb_mf8[_f32_x2]_fpm(svfloat32x2_t zn, fpm_t fpm);
---
 clang/include/clang/Basic/arm_sve.td  |   7 ++
 .../fp8-intrinsics/acle_sve2_fp8_cvtn.c   | 101 ++
 .../aarch64-sve2-intrinsics/acle_sve2_fp8.c   |  11 +-
 llvm/include/llvm/IR/IntrinsicsAArch64.td |  14 +++
 .../lib/Target/AArch64/AArch64SVEInstrInfo.td |   9 +-
 llvm/lib/Target/AArch64/SVEInstrFormats.td|  37 ++-
 llvm/test/CodeGen/AArch64/fp8-sve-cvtn.ll |  49 +
 7 files changed, 222 insertions(+), 6 deletions(-)
 create mode 100644 
clang/test/CodeGen/AArch64/fp8-intrinsics/acle_sve2_fp8_cvtn.c
 create mode 100644 llvm/test/CodeGen/AArch64/fp8-sve-cvtn.ll

diff --git a/clang/include/clang/Basic/arm_sve.td 
b/clang/include/clang/Basic/arm_sve.td
index 7b8ecf29a9de6e..d467720fc5c61f 100644
--- a/clang/include/clang/Basic/arm_sve.td
+++ b/clang/include/clang/Basic/arm_sve.td
@@ -2462,4 +2462,11 @@ let SVETargetGuard = "sve2,fp8", SMETargetGuard = 
"sme2,fp8" in {
   // 8-bit floating-point convert to BFloat16/Float16 (top)
   def SVF1CVTLT : SInst<"svcvtlt1_{d}[_mf8]_fpm", "d~>", "bh", MergeNone, 
"aarch64_sve_fp8_cvtlt1", [VerifyRuntimeMode, SetsFPMR]>;
   def SVF2CVTLT : SInst<"svcvtlt2_{d}[_mf8]_fpm", "d~>", "bh", MergeNone, 
"aarch64_sve_fp8_cvtlt2", [VerifyRuntimeMode, SetsFPMR]>;
+
+  // BFloat16/Float16 convert, narrow and interleave to 8-bit floating-point
+  def SVFCVTN : SInst<"svcvtn_mf8[_{d}_x2]_fpm", "~2>", "bh", MergeNone, 
"aarch64_sve_fp8_cvtn", [VerifyRuntimeMode, SetsFPMR]>;
+
+  // Single-precision convert, narrow and interleave to 8-bit floating-point 
(top and bottom)
+  def SVFCVTNB : SInst<"svcvtnb_mf8[_f32_x2]_fpm", "~2>",  "f", MergeNone, 
"aarch64_sve_fp8_cvtnb", [VerifyRuntimeMode, SetsFPMR]>;
+  def SVFCVTNT : SInst<"svcvtnt_mf8[_f32_x2]_fpm", "~~2>", "f", MergeNone, 
"aarch64_sve_fp8_cvtnt", [VerifyRuntimeMode, SetsFPMR]>;
 }
diff --git a/clang/test/CodeGen/AArch64/fp8-intrinsics/acle_sve2_fp8_cvtn.c 
b/clang/test/CodeGen/AArch64/fp8-intrinsics/acle_sve2_fp8_cvtn.c
new file mode 100644
index 00..ed5b0ce02af4bd
--- /dev/null
+++ b/clang/test/CodeGen/AArch64/fp8-intrinsics/acle_sve2_fp8_cvtn.c
@@ -0,0 +1,101 @@
+// NOTE: Assertions have been autogenerated by utils/update_cc_test_checks.py 
UTC_ARGS: --version 5
+// RUN: %clang_cc1-triple aarch64-none-linux-gnu -target-feature +sve 
-target-feature +sve2 -target-feature +fp8 -disable-O0-optnone -Werror -Wall 
-emit-llvm -o - %s | opt -S -p mem2reg,instcombine,tailcallelim | FileCheck %s
+// RUN: %clang_cc1 -x c++ -triple aarch64-none-linux-gnu -target-feature +sme 
-target-feature +sme2 -target-feature +fp8 -disable-O0-optnone -Werror -Wall 
-emit-llvm -o - %s | opt -S -p mem2reg,instcombine,tailcallelim | FileCheck %s 
-check-prefix=CHECK-CXX
+
+// RUN: %clang_cc1 -DSVE_OVERLOADED_FORMS-triple 
aarch64-none-linux-gnu -target-feature +sme -target-feature +sme2 
-target-feature +fp8 -disable-O0-optnone -Werror -Wall -emit-llvm -o - %s | opt 
-S -p mem2reg,instcombine,tailcallelim | FileCheck %s
+// RUN: %clang_cc1 -DSVE_OVERLOADED_FORMS -x c++ -triple 
aarch64-none-linux-gnu -target-feature +sve -target-feature +sve2 
-target-feature +fp8 -disable-O0-optnone -Werror -Wall -emit-llvm -o - %s | opt 
-S -p mem2reg,instcombine,tailcallelim | FileCheck %s -check-prefix=CHECK-CXX
+
+// RUN: %clang_cc1 -triple aarch64-none-linux-gnu -target-feature +sve 
-target-feature +sve2 -target-feature +fp8 -S -disable-O0-optnone -Werror -Wall 
-o /dev/null %s
+// RUN: %clang_cc1 -triple aarch64-none-linux-gnu -target-feature +sme 
-target-feature +sme2 -target-feature +fp8 -S -disable-O0-optnone -Werror -Wall 
-o /dev/null %s
+
+// REQUIRES: aarch64-registered-target
+
+#ifdef __ARM_FEATURE_SME
+#include 
+#else
+#include 
+#endif
+
+#ifdef SVE_OVERLOADED_FORMS
+#define SVE_ACLE_FUNC(A1,A2_UNUSED,A3) A1##A3
+#else
+#define SVE_ACLE_FUNC(A1,A2,A3) A1##A2##A3
+#endif
+
+#ifdef __ARM_FEATURE_SME
+#define STREAMING __arm_streaming
+#else
+#define STREAMING
+#endif
+
+// CHECK-LABEL: define dso_local  @test_svcvtn_f8_bf16(
+// CHECK-SAME:  [[ZN_ZM_COERCE0:%.*]],  [[ZN_ZM_COERCE1:%.*]], i64 noundef [[FPM:%.*]]) #[[ATTR0:[0-9]+]] {
+// CHECK-NEXT:  [[ENTRY:.*:]]
+// CHECK-NEXT:tail call void @llvm.aarch64.set.fpmr(i6

[clang] [llvm] [AArch64] Implement FP8 SVE Intrinsics for narrowing conversions (PR #118124)

2024-12-10 Thread Momchil Velikov via cfe-commits

https://github.com/momchil-velikov updated 
https://github.com/llvm/llvm-project/pull/118124

>From 359e611dc5a74d4e2dfdb19119eb8f83badb1f0b Mon Sep 17 00:00:00 2001
From: Momchil Velikov 
Date: Thu, 21 Nov 2024 11:21:29 +
Subject: [PATCH 1/2] [AArch64] Implement FP8 SVE intrinsics for widening
 conversions

This patch adds the following intrinsics:

* 8-bit floating-point convert to half-precision and BFloat16.

  // Variants are also available for: _bf16
  svfloat16_t svcvt1_f16[_mf8]_fpm(svmfloat8_t zn, fpm_t fpm);
  svfloat16_t svcvt2_f16[_mf8]_fpm(svmfloat8_t zn, fpm_t fpm);

* 8-bit floating-point convert to half-precision and BFloat16 (top).

  // Variants are also available for: _bf16
  svfloat16_t svcvtlt1_f16[_mf8]_fpm(svmfloat8_t zn, fpm_t fpm);
  svfloat16_t svcvtlt2_f16[_mf8]_fpm(svmfloat8_t zn, fpm_t fpm);
---
 clang/include/clang/Basic/arm_sve.td  |  20 +-
 .../fp8-intrinsics/acle_sve2_fp8_cvt.c| 173 ++
 .../aarch64-sve2-intrinsics/acle_sve2_fp8.c   |  24 +++
 llvm/include/llvm/IR/IntrinsicsAArch64.td |  13 +-
 .../lib/Target/AArch64/AArch64SVEInstrInfo.td |  16 +-
 llvm/lib/Target/AArch64/SVEInstrFormats.td|   7 +-
 .../test/CodeGen/AArch64/fp8-sve-cvt-cvtlt.ll |  78 
 7 files changed, 317 insertions(+), 14 deletions(-)
 create mode 100644 
clang/test/CodeGen/AArch64/fp8-intrinsics/acle_sve2_fp8_cvt.c
 create mode 100644 clang/test/Sema/aarch64-sve2-intrinsics/acle_sve2_fp8.c
 create mode 100644 llvm/test/CodeGen/AArch64/fp8-sve-cvt-cvtlt.ll

diff --git a/clang/include/clang/Basic/arm_sve.td 
b/clang/include/clang/Basic/arm_sve.td
index 9b8a8546b072c0..7b8ecf29a9de6e 100644
--- a/clang/include/clang/Basic/arm_sve.td
+++ b/clang/include/clang/Basic/arm_sve.td
@@ -2430,12 +2430,12 @@ let SVETargetGuard = InvalidMode, SMETargetGuard = 
"sme2,fp8" in {
   def FSCALE_X4 : Inst<"svscale[_{d}_x4]", "444.x", "fhd", MergeNone, 
"aarch64_sme_fp8_scale_x4", [IsStreaming],[]>;
 
   // Convert from FP8 to half-precision/BFloat16 multi-vector
-  def SVF1CVT : Inst<"svcvt1_{d}[_mf8]_x2_fpm", "2~>", "bh", MergeNone, 
"aarch64_sve_fp8_cvt1_x2", [IsStreaming, SetsFPMR], []>;
-  def SVF2CVT : Inst<"svcvt2_{d}[_mf8]_x2_fpm", "2~>", "bh", MergeNone, 
"aarch64_sve_fp8_cvt2_x2", [IsStreaming, SetsFPMR], []>;
+  def SVF1CVT_X2 : Inst<"svcvt1_{d}[_mf8]_x2_fpm", "2~>", "bh", MergeNone, 
"aarch64_sve_fp8_cvt1_x2", [IsStreaming, SetsFPMR], []>;
+  def SVF2CVT_X2 : Inst<"svcvt2_{d}[_mf8]_x2_fpm", "2~>", "bh", MergeNone, 
"aarch64_sve_fp8_cvt2_x2", [IsStreaming, SetsFPMR], []>;
 
   // Convert from FP8 to deinterleaved half-precision/BFloat16 multi-vector
-  def SVF1CVTL : Inst<"svcvtl1_{d}[_mf8]_x2_fpm",  "2~>", "bh", MergeNone, 
"aarch64_sve_fp8_cvtl1_x2",  [IsStreaming, SetsFPMR], []>;
-  def SVF2CVTL : Inst<"svcvtl2_{d}[_mf8]_x2_fpm",  "2~>", "bh", MergeNone, 
"aarch64_sve_fp8_cvtl2_x2",  [IsStreaming, SetsFPMR], []>;
+  def SVF1CVTL_X2 : Inst<"svcvtl1_{d}[_mf8]_x2_fpm",  "2~>", "bh", MergeNone, 
"aarch64_sve_fp8_cvtl1_x2",  [IsStreaming, SetsFPMR], []>;
+  def SVF2CVTL_X2 : Inst<"svcvtl2_{d}[_mf8]_x2_fpm",  "2~>", "bh", MergeNone, 
"aarch64_sve_fp8_cvtl2_x2",  [IsStreaming, SetsFPMR], []>;
 }
 
 let SVETargetGuard = "sve2p1", SMETargetGuard = "sme2" in {
@@ -2451,3 +2451,15 @@ let SVETargetGuard = "sve2,faminmax", SMETargetGuard = 
"sme2,faminmax" in {
   defm SVAMIN : SInstZPZZ<"svamin", "hfd", "aarch64_sve_famin", 
"aarch64_sve_famin_u">;
   defm SVAMAX : SInstZPZZ<"svamax", "hfd", "aarch64_sve_famax", 
"aarch64_sve_famax_u">;
 }
+
+let SVETargetGuard = "sve2,fp8", SMETargetGuard = "sme2,fp8" in {
+  // SVE FP8 widening conversions
+
+  // 8-bit floating-point convert to BFloat16/Float16
+  def SVF1CVT : SInst<"svcvt1_{d}[_mf8]_fpm", "d~>", "bh", MergeNone, 
"aarch64_sve_fp8_cvt1", [VerifyRuntimeMode, SetsFPMR]>;
+  def SVF2CVT : SInst<"svcvt2_{d}[_mf8]_fpm", "d~>", "bh", MergeNone, 
"aarch64_sve_fp8_cvt2", [VerifyRuntimeMode, SetsFPMR]>;
+
+  // 8-bit floating-point convert to BFloat16/Float16 (top)
+  def SVF1CVTLT : SInst<"svcvtlt1_{d}[_mf8]_fpm", "d~>", "bh", MergeNone, 
"aarch64_sve_fp8_cvtlt1", [VerifyRuntimeMode, SetsFPMR]>;
+  def SVF2CVTLT : SInst<"svcvtlt2_{d}[_mf8]_fpm", "d~>", "bh", MergeNone, 
"aarch64_sve_fp8_cvtlt2", [VerifyRuntimeMode, SetsFPMR]>;
+}
diff --git a/clang/test/CodeGen/AArch64/fp8-intrinsics/acle_sve2_fp8_cvt.c 
b/clang/test/CodeGen/AArch64/fp8-intrinsics/acle_sve2_fp8_cvt.c
new file mode 100644
index 00..c026b8aa216f32
--- /dev/null
+++ b/clang/test/CodeGen/AArch64/fp8-intrinsics/acle_sve2_fp8_cvt.c
@@ -0,0 +1,173 @@
+// NOTE: Assertions have been autogenerated by utils/update_cc_test_checks.py 
UTC_ARGS: --version 5
+// RUN: %clang_cc1-triple aarch64-none-linux-gnu -target-feature +sve 
-target-feature +sve2 -target-feature +fp8 -disable-O0-optnone -Werror -Wall 
-emit-llvm -o - %s | opt -S -p mem2reg,instcombine,tailcallelim | FileCheck %s
+// RUN: %clang_cc1 -x c++ -triple aarch64-none-linux-gnu -target-featu

[clang] [llvm] [AArch64] Implement FP8 SVE Intrinsics for narrowing conversions (PR #118124)

2024-12-09 Thread Momchil Velikov via cfe-commits

https://github.com/momchil-velikov edited 
https://github.com/llvm/llvm-project/pull/118124
___
cfe-commits mailing list
cfe-commits@lists.llvm.org
https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits


[clang] [llvm] [AArch64] Implement FP8 SVE Intrinsics for narrowing conversions (PR #118124)

2024-12-09 Thread Momchil Velikov via cfe-commits

https://github.com/momchil-velikov updated 
https://github.com/llvm/llvm-project/pull/118124

>From cdd86588c639f818909964ab49b9972da6869cb3 Mon Sep 17 00:00:00 2001
From: Momchil Velikov 
Date: Thu, 21 Nov 2024 11:21:29 +
Subject: [PATCH 1/2] [AArch64] Implement FP8 SVE intrinsics for widening
 conversions

This patch adds the following intrinsics:

* 8-bit floating-point convert to half-precision and BFloat16.

  // Variants are also available for: _bf16
  svfloat16_t svcvt1_f16[_mf8]_fpm(svmfloat8_t zn, fpm_t fpm);
  svfloat16_t svcvt2_f16[_mf8]_fpm(svmfloat8_t zn, fpm_t fpm);

* 8-bit floating-point convert to half-precision and BFloat16 (top).

  // Variants are also available for: _bf16
  svfloat16_t svcvtlt1_f16[_mf8]_fpm(svmfloat8_t zn, fpm_t fpm);
  svfloat16_t svcvtlt2_f16[_mf8]_fpm(svmfloat8_t zn, fpm_t fpm);
---
 clang/include/clang/Basic/arm_sve.td  |  20 +-
 .../fp8-intrinsics/acle_sve2_fp8_cvt.c| 173 ++
 .../aarch64-sve2-intrinsics/acle_sve2_fp8.c   |  24 +++
 llvm/include/llvm/IR/IntrinsicsAArch64.td |  13 +-
 .../lib/Target/AArch64/AArch64SVEInstrInfo.td |  16 +-
 llvm/lib/Target/AArch64/SVEInstrFormats.td|   7 +-
 .../test/CodeGen/AArch64/fp8-sve-cvt-cvtlt.ll |  78 
 7 files changed, 317 insertions(+), 14 deletions(-)
 create mode 100644 
clang/test/CodeGen/AArch64/fp8-intrinsics/acle_sve2_fp8_cvt.c
 create mode 100644 clang/test/Sema/aarch64-sve2-intrinsics/acle_sve2_fp8.c
 create mode 100644 llvm/test/CodeGen/AArch64/fp8-sve-cvt-cvtlt.ll

diff --git a/clang/include/clang/Basic/arm_sve.td 
b/clang/include/clang/Basic/arm_sve.td
index 9b8a8546b072c0..7b8ecf29a9de6e 100644
--- a/clang/include/clang/Basic/arm_sve.td
+++ b/clang/include/clang/Basic/arm_sve.td
@@ -2430,12 +2430,12 @@ let SVETargetGuard = InvalidMode, SMETargetGuard = 
"sme2,fp8" in {
   def FSCALE_X4 : Inst<"svscale[_{d}_x4]", "444.x", "fhd", MergeNone, 
"aarch64_sme_fp8_scale_x4", [IsStreaming],[]>;
 
   // Convert from FP8 to half-precision/BFloat16 multi-vector
-  def SVF1CVT : Inst<"svcvt1_{d}[_mf8]_x2_fpm", "2~>", "bh", MergeNone, 
"aarch64_sve_fp8_cvt1_x2", [IsStreaming, SetsFPMR], []>;
-  def SVF2CVT : Inst<"svcvt2_{d}[_mf8]_x2_fpm", "2~>", "bh", MergeNone, 
"aarch64_sve_fp8_cvt2_x2", [IsStreaming, SetsFPMR], []>;
+  def SVF1CVT_X2 : Inst<"svcvt1_{d}[_mf8]_x2_fpm", "2~>", "bh", MergeNone, 
"aarch64_sve_fp8_cvt1_x2", [IsStreaming, SetsFPMR], []>;
+  def SVF2CVT_X2 : Inst<"svcvt2_{d}[_mf8]_x2_fpm", "2~>", "bh", MergeNone, 
"aarch64_sve_fp8_cvt2_x2", [IsStreaming, SetsFPMR], []>;
 
   // Convert from FP8 to deinterleaved half-precision/BFloat16 multi-vector
-  def SVF1CVTL : Inst<"svcvtl1_{d}[_mf8]_x2_fpm",  "2~>", "bh", MergeNone, 
"aarch64_sve_fp8_cvtl1_x2",  [IsStreaming, SetsFPMR], []>;
-  def SVF2CVTL : Inst<"svcvtl2_{d}[_mf8]_x2_fpm",  "2~>", "bh", MergeNone, 
"aarch64_sve_fp8_cvtl2_x2",  [IsStreaming, SetsFPMR], []>;
+  def SVF1CVTL_X2 : Inst<"svcvtl1_{d}[_mf8]_x2_fpm",  "2~>", "bh", MergeNone, 
"aarch64_sve_fp8_cvtl1_x2",  [IsStreaming, SetsFPMR], []>;
+  def SVF2CVTL_X2 : Inst<"svcvtl2_{d}[_mf8]_x2_fpm",  "2~>", "bh", MergeNone, 
"aarch64_sve_fp8_cvtl2_x2",  [IsStreaming, SetsFPMR], []>;
 }
 
 let SVETargetGuard = "sve2p1", SMETargetGuard = "sme2" in {
@@ -2451,3 +2451,15 @@ let SVETargetGuard = "sve2,faminmax", SMETargetGuard = 
"sme2,faminmax" in {
   defm SVAMIN : SInstZPZZ<"svamin", "hfd", "aarch64_sve_famin", 
"aarch64_sve_famin_u">;
   defm SVAMAX : SInstZPZZ<"svamax", "hfd", "aarch64_sve_famax", 
"aarch64_sve_famax_u">;
 }
+
+let SVETargetGuard = "sve2,fp8", SMETargetGuard = "sme2,fp8" in {
+  // SVE FP8 widening conversions
+
+  // 8-bit floating-point convert to BFloat16/Float16
+  def SVF1CVT : SInst<"svcvt1_{d}[_mf8]_fpm", "d~>", "bh", MergeNone, 
"aarch64_sve_fp8_cvt1", [VerifyRuntimeMode, SetsFPMR]>;
+  def SVF2CVT : SInst<"svcvt2_{d}[_mf8]_fpm", "d~>", "bh", MergeNone, 
"aarch64_sve_fp8_cvt2", [VerifyRuntimeMode, SetsFPMR]>;
+
+  // 8-bit floating-point convert to BFloat16/Float16 (top)
+  def SVF1CVTLT : SInst<"svcvtlt1_{d}[_mf8]_fpm", "d~>", "bh", MergeNone, 
"aarch64_sve_fp8_cvtlt1", [VerifyRuntimeMode, SetsFPMR]>;
+  def SVF2CVTLT : SInst<"svcvtlt2_{d}[_mf8]_fpm", "d~>", "bh", MergeNone, 
"aarch64_sve_fp8_cvtlt2", [VerifyRuntimeMode, SetsFPMR]>;
+}
diff --git a/clang/test/CodeGen/AArch64/fp8-intrinsics/acle_sve2_fp8_cvt.c 
b/clang/test/CodeGen/AArch64/fp8-intrinsics/acle_sve2_fp8_cvt.c
new file mode 100644
index 00..c026b8aa216f32
--- /dev/null
+++ b/clang/test/CodeGen/AArch64/fp8-intrinsics/acle_sve2_fp8_cvt.c
@@ -0,0 +1,173 @@
+// NOTE: Assertions have been autogenerated by utils/update_cc_test_checks.py 
UTC_ARGS: --version 5
+// RUN: %clang_cc1-triple aarch64-none-linux-gnu -target-feature +sve 
-target-feature +sve2 -target-feature +fp8 -disable-O0-optnone -Werror -Wall 
-emit-llvm -o - %s | opt -S -p mem2reg,instcombine,tailcallelim | FileCheck %s
+// RUN: %clang_cc1 -x c++ -triple aarch64-none-linux-gnu -target-featu

[clang] [llvm] [AArch64] Implement FP8 SVE Intrinsics for narrowing conversions (PR #118124)

2024-12-09 Thread Momchil Velikov via cfe-commits


@@ -0,0 +1,33 @@
+// REQUIRES: aarch64-registered-target
+
+// RUN: %clang_cc1 -triple aarch64-none-linux-gnu -target-feature +sve 
-target-feature +bf16 -verify -emit-llvm %s

momchil-velikov wrote:

This feature is needed by `svcreate2` and `svundef_bf16`. What problems does it 
create as to need fixing?

https://github.com/llvm/llvm-project/pull/118124
___
cfe-commits mailing list
cfe-commits@lists.llvm.org
https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits


[clang] [llvm] [AArch64] Implement FP8 SVE Intrinsics for narrowing conversions (PR #118124)

2024-12-09 Thread Momchil Velikov via cfe-commits


@@ -0,0 +1,101 @@
+// NOTE: Assertions have been autogenerated by utils/update_cc_test_checks.py 
UTC_ARGS: --version 5
+// RUN: %clang_cc1-triple aarch64-none-linux-gnu -target-feature +sve 
-target-feature +sve2 -target-feature +fp8 -disable-O0-optnone -Werror -Wall 
-emit-llvm -o - %s | opt -S -p mem2reg,instcombine,tailcallelim | FileCheck %s
+// RUN: %clang_cc1 -x c++ -triple aarch64-none-linux-gnu -target-feature +sme 
-target-feature +sme2 -target-feature +fp8 -disable-O0-optnone -Werror -Wall 
-emit-llvm -o - %s | opt -S -p mem2reg,instcombine,tailcallelim | FileCheck %s 
-check-prefix=CHECK-CXX
+
+// RUN: %clang_cc1 -DSVE_OVERLOADED_FORMS-triple 
aarch64-none-linux-gnu -target-feature +sme -target-feature +sme2 
-target-feature +fp8 -disable-O0-optnone -Werror -Wall -emit-llvm -o - %s | opt 
-S -p mem2reg,instcombine,tailcallelim | FileCheck %s
+// RUN: %clang_cc1 -DSVE_OVERLOADED_FORMS -x c++ -triple 
aarch64-none-linux-gnu -target-feature +sve -target-feature +sve2 
-target-feature +fp8 -disable-O0-optnone -Werror -Wall -emit-llvm -o - %s | opt 
-S -p mem2reg,instcombine,tailcallelim | FileCheck %s -check-prefix=CHECK-CXX
+
+// RUN: %clang_cc1 -triple aarch64-none-linux-gnu -target-feature +sve 
-target-feature +sve2 -target-feature +fp8 -S -disable-O0-optnone -Werror -Wall 
-o /dev/null %s
+// RUN: %clang_cc1 -triple aarch64-none-linux-gnu -target-feature +sme 
-target-feature +sme2 -target-feature +fp8 -S -disable-O0-optnone -Werror -Wall 
-o /dev/null %s
+
+// REQUIRES: aarch64-registered-target
+
+#ifdef __ARM_FEATURE_SME
+#include 
+#else
+#include 
+#endif
+
+#ifdef SVE_OVERLOADED_FORMS
+#define SVE_ACLE_FUNC(A1,A2_UNUSED,A3) A1##A3
+#else
+#define SVE_ACLE_FUNC(A1,A2,A3) A1##A2##A3
+#endif
+
+#ifdef __ARM_FEATURE_SME
+#define STREAMING __arm_streaming
+#else
+#define STREAMING
+#endif
+
+// CHECK-LABEL: define dso_local  @test_svcvtn_f8_bf16(
+// CHECK-SAME:  [[ZN_ZM_COERCE0:%.*]],  [[ZN_ZM_COERCE1:%.*]], i64 noundef [[FPM:%.*]]) #[[ATTR0:[0-9]+]] {
+// CHECK-NEXT:  [[ENTRY:.*:]]
+// CHECK-NEXT:tail call void @llvm.aarch64.set.fpmr(i64 [[FPM]])
+// CHECK-NEXT:[[TMP0:%.*]] = tail call  
@llvm.aarch64.sve.fp8.cvtn.nxv8bf16( [[ZN_ZM_COERCE0]], 
 [[ZN_ZM_COERCE1]])
+// CHECK-NEXT:ret  [[TMP0]]
+//
+// CHECK-CXX-LABEL: define dso_local  
@_Z19test_svcvtn_f8_bf1614svbfloat16x2_tm(
+// CHECK-CXX-SAME:  [[ZN_ZM_COERCE0:%.*]],  [[ZN_ZM_COERCE1:%.*]], i64 noundef [[FPM:%.*]]) #[[ATTR0:[0-9]+]] {
+// CHECK-CXX-NEXT:  [[ENTRY:.*:]]
+// CHECK-CXX-NEXT:tail call void @llvm.aarch64.set.fpmr(i64 [[FPM]])
+// CHECK-CXX-NEXT:[[TMP0:%.*]] = tail call  
@llvm.aarch64.sve.fp8.cvtn.nxv8bf16( [[ZN_ZM_COERCE0]], 
 [[ZN_ZM_COERCE1]])
+// CHECK-CXX-NEXT:ret  [[TMP0]]
+//
+svmfloat8_t test_svcvtn_f8_bf16(svbfloat16x2_t zn_zm, fpm_t fpm) STREAMING {
+  return SVE_ACLE_FUNC(svcvtn_mf8,_bf16_x2,_fpm)(zn_zm, fpm);

momchil-velikov wrote:

It's completely irrelevant what is the variable name here. I've probably named 
it like this because the value is passed in two registers. Neither of `zn_zm`, 
`zn1_zn2`, `u_v`, `a`, 'x', `wt`, etc is better or worse than another.

https://github.com/llvm/llvm-project/pull/118124
___
cfe-commits mailing list
cfe-commits@lists.llvm.org
https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits


[clang] [llvm] [AArch64] Implement FP8 SVE Intrinsics for narrowing conversions (PR #118124)

2024-12-09 Thread Momchil Velikov via cfe-commits


@@ -10753,10 +10758,45 @@ class sve2_fp8_down_cvt_single opc, string 
mnemonic,
   let Inst{5} = 0b0;
   let Inst{4-0} = Zd;
   let Uses = [FPMR, FPCR];
+
+  let mayLoad  = 1;
+  let mayStore = 0;
 }
 
-multiclass sve2_fp8_down_cvt_single opc, string mnemonic, 
RegisterOperand src> {
+multiclass sve2_fp8_down_cvt_single opc, string mnemonic, 
RegisterOperand src,
+ValueType ty, SDPatternOperator op> {
   def NAME : sve2_fp8_down_cvt_single;
+
+  def : Pat<(nxv16i8 (op ty:$Zn1, ty:$Zn2)),
+(!cast(NAME) (REG_SEQUENCE ZPR2Mul2, $Zn1, zsub0, 
$Zn2, zsub1))>;
+}
+
+class sve2_fp8_down_cvt_single_top opc, string mnemonic, 
RegisterOperand src_ty>

momchil-velikov wrote:

It's not like the encodings are going to change so we are worried about extra 
maintenance burden due to duplication.

https://github.com/llvm/llvm-project/pull/118124
___
cfe-commits mailing list
cfe-commits@lists.llvm.org
https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits


[clang] [llvm] [AArch64] Implement FP8 SVE Intrinsics for narrowing conversions (PR #118124)

2024-12-03 Thread via cfe-commits


@@ -0,0 +1,101 @@
+// NOTE: Assertions have been autogenerated by utils/update_cc_test_checks.py 
UTC_ARGS: --version 5
+// RUN: %clang_cc1-triple aarch64-none-linux-gnu -target-feature +sve 
-target-feature +sve2 -target-feature +fp8 -disable-O0-optnone -Werror -Wall 
-emit-llvm -o - %s | opt -S -p mem2reg,instcombine,tailcallelim | FileCheck %s
+// RUN: %clang_cc1 -x c++ -triple aarch64-none-linux-gnu -target-feature +sme 
-target-feature +sme2 -target-feature +fp8 -disable-O0-optnone -Werror -Wall 
-emit-llvm -o - %s | opt -S -p mem2reg,instcombine,tailcallelim | FileCheck %s 
-check-prefix=CHECK-CXX
+
+// RUN: %clang_cc1 -DSVE_OVERLOADED_FORMS-triple 
aarch64-none-linux-gnu -target-feature +sme -target-feature +sme2 
-target-feature +fp8 -disable-O0-optnone -Werror -Wall -emit-llvm -o - %s | opt 
-S -p mem2reg,instcombine,tailcallelim | FileCheck %s
+// RUN: %clang_cc1 -DSVE_OVERLOADED_FORMS -x c++ -triple 
aarch64-none-linux-gnu -target-feature +sve -target-feature +sve2 
-target-feature +fp8 -disable-O0-optnone -Werror -Wall -emit-llvm -o - %s | opt 
-S -p mem2reg,instcombine,tailcallelim | FileCheck %s -check-prefix=CHECK-CXX
+
+// RUN: %clang_cc1 -triple aarch64-none-linux-gnu -target-feature +sve 
-target-feature +sve2 -target-feature +fp8 -S -disable-O0-optnone -Werror -Wall 
-o /dev/null %s
+// RUN: %clang_cc1 -triple aarch64-none-linux-gnu -target-feature +sme 
-target-feature +sme2 -target-feature +fp8 -S -disable-O0-optnone -Werror -Wall 
-o /dev/null %s
+
+// REQUIRES: aarch64-registered-target
+
+#ifdef __ARM_FEATURE_SME
+#include 
+#else
+#include 
+#endif
+
+#ifdef SVE_OVERLOADED_FORMS
+#define SVE_ACLE_FUNC(A1,A2_UNUSED,A3) A1##A3
+#else
+#define SVE_ACLE_FUNC(A1,A2,A3) A1##A2##A3
+#endif
+
+#ifdef __ARM_FEATURE_SME
+#define STREAMING __arm_streaming
+#else
+#define STREAMING
+#endif
+
+// CHECK-LABEL: define dso_local  @test_svcvtn_f8_bf16(
+// CHECK-SAME:  [[ZN_ZM_COERCE0:%.*]],  [[ZN_ZM_COERCE1:%.*]], i64 noundef [[FPM:%.*]]) #[[ATTR0:[0-9]+]] {
+// CHECK-NEXT:  [[ENTRY:.*:]]
+// CHECK-NEXT:tail call void @llvm.aarch64.set.fpmr(i64 [[FPM]])
+// CHECK-NEXT:[[TMP0:%.*]] = tail call  
@llvm.aarch64.sve.fp8.cvtn.nxv8bf16( [[ZN_ZM_COERCE0]], 
 [[ZN_ZM_COERCE1]])
+// CHECK-NEXT:ret  [[TMP0]]
+//
+// CHECK-CXX-LABEL: define dso_local  
@_Z19test_svcvtn_f8_bf1614svbfloat16x2_tm(
+// CHECK-CXX-SAME:  [[ZN_ZM_COERCE0:%.*]],  [[ZN_ZM_COERCE1:%.*]], i64 noundef [[FPM:%.*]]) #[[ATTR0:[0-9]+]] {
+// CHECK-CXX-NEXT:  [[ENTRY:.*:]]
+// CHECK-CXX-NEXT:tail call void @llvm.aarch64.set.fpmr(i64 [[FPM]])
+// CHECK-CXX-NEXT:[[TMP0:%.*]] = tail call  
@llvm.aarch64.sve.fp8.cvtn.nxv8bf16( [[ZN_ZM_COERCE0]], 
 [[ZN_ZM_COERCE1]])
+// CHECK-CXX-NEXT:ret  [[TMP0]]
+//
+svmfloat8_t test_svcvtn_f8_bf16(svbfloat16x2_t zn_zm, fpm_t fpm) STREAMING {
+  return SVE_ACLE_FUNC(svcvtn_mf8,_bf16_x2,_fpm)(zn_zm, fpm);

CarolineConcatto wrote:

Out of curiosity: what zn_zm.
In the assembly I see:
BFCVTN .B, { .H-.H }
I would imagine zn1_zn2 would be more fit, but I dont think you need change.

https://github.com/llvm/llvm-project/pull/118124
___
cfe-commits mailing list
cfe-commits@lists.llvm.org
https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits


[clang] [llvm] [AArch64] Implement FP8 SVE Intrinsics for narrowing conversions (PR #118124)

2024-12-03 Thread via cfe-commits


@@ -10753,10 +10758,45 @@ class sve2_fp8_down_cvt_single opc, string 
mnemonic,
   let Inst{5} = 0b0;
   let Inst{4-0} = Zd;
   let Uses = [FPMR, FPCR];
+
+  let mayLoad  = 1;
+  let mayStore = 0;
 }
 
-multiclass sve2_fp8_down_cvt_single opc, string mnemonic, 
RegisterOperand src> {
+multiclass sve2_fp8_down_cvt_single opc, string mnemonic, 
RegisterOperand src,
+ValueType ty, SDPatternOperator op> {
   def NAME : sve2_fp8_down_cvt_single;
+
+  def : Pat<(nxv16i8 (op ty:$Zn1, ty:$Zn2)),
+(!cast(NAME) (REG_SEQUENCE ZPR2Mul2, $Zn1, zsub0, 
$Zn2, zsub1))>;
+}
+
+class sve2_fp8_down_cvt_single_top opc, string mnemonic, 
RegisterOperand src_ty>

CarolineConcatto wrote:

Nothing to do here, but it is really annoying that we cannot use the same 
multiclass for the FCVTNT,
 because of the Zen being an input and output.
What do you think of using if like this:
!if(!eq(opc, 0b11),(ins ZPR8:$_Zd, src_ty:$Zn), (ins src_ty:$Zn))
?

https://github.com/llvm/llvm-project/pull/118124
___
cfe-commits mailing list
cfe-commits@lists.llvm.org
https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits


[clang] [llvm] [AArch64] Implement FP8 SVE Intrinsics for narrowing conversions (PR #118124)

2024-12-03 Thread via cfe-commits


@@ -0,0 +1,33 @@
+// REQUIRES: aarch64-registered-target
+
+// RUN: %clang_cc1 -triple aarch64-none-linux-gnu -target-feature +sve 
-target-feature +bf16 -verify -emit-llvm %s

CarolineConcatto wrote:

I think @SpencerAbson  asked on the previous PR,  but just in case why are we 
adding  -target-feature +bf16 only for the Sema tests? So this will be probably 
fixed in the previous patch

https://github.com/llvm/llvm-project/pull/118124
___
cfe-commits mailing list
cfe-commits@lists.llvm.org
https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits


[clang] [llvm] [AArch64] Implement FP8 SVE Intrinsics for narrowing conversions (PR #118124)

2024-12-03 Thread via cfe-commits

https://github.com/CarolineConcatto edited 
https://github.com/llvm/llvm-project/pull/118124
___
cfe-commits mailing list
cfe-commits@lists.llvm.org
https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits


[clang] [llvm] [AArch64] Implement FP8 SVE Intrinsics for narrowing conversions (PR #118124)

2024-12-03 Thread via cfe-commits

https://github.com/CarolineConcatto commented:

Hi Momchil, 
Thank you for the patch.
Can you also update the commit message here before you merge?

https://github.com/llvm/llvm-project/pull/118124
___
cfe-commits mailing list
cfe-commits@lists.llvm.org
https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits


[clang] [llvm] [AArch64] Implement FP8 SVE Intrinsics for narrowing conversions (PR #118124)

2024-12-03 Thread Jonathan Thackray via cfe-commits

https://github.com/jthackray approved this pull request.

LGTM

https://github.com/llvm/llvm-project/pull/118124
___
cfe-commits mailing list
cfe-commits@lists.llvm.org
https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits


[clang] [llvm] [AArch64] Implement FP8 SVE Intrinsics for narrowing conversions (PR #118124)

2024-11-29 Thread Momchil Velikov via cfe-commits

https://github.com/momchil-velikov created 
https://github.com/llvm/llvm-project/pull/118124

None

>From 3a9643e6c2d61eae2e23df42c19b1410d4a5fcc5 Mon Sep 17 00:00:00 2001
From: Momchil Velikov 
Date: Thu, 21 Nov 2024 11:21:29 +
Subject: [PATCH 1/2] FP8 CVT/CVTL

---
 clang/include/clang/Basic/arm_sve.td  |  10 +
 .../fp8-intrinsics/acle_sve2_fp8_cvt.c| 173 ++
 .../aarch64-sve2-intrinsics/acle_sve2_fp8.c   |  24 +++
 llvm/include/llvm/IR/IntrinsicsAArch64.td |  17 ++
 .../lib/Target/AArch64/AArch64SVEInstrInfo.td |  16 +-
 llvm/lib/Target/AArch64/SVEInstrFormats.td|   7 +-
 .../test/CodeGen/AArch64/fp8-sve-cvt-cvtlt.ll |  78 
 7 files changed, 316 insertions(+), 9 deletions(-)
 create mode 100644 
clang/test/CodeGen/AArch64/fp8-intrinsics/acle_sve2_fp8_cvt.c
 create mode 100644 clang/test/Sema/aarch64-sve2-intrinsics/acle_sve2_fp8.c
 create mode 100644 llvm/test/CodeGen/AArch64/fp8-sve-cvt-cvtlt.ll

diff --git a/clang/include/clang/Basic/arm_sve.td 
b/clang/include/clang/Basic/arm_sve.td
index b36e592042da0b..b9d8360843aa8e 100644
--- a/clang/include/clang/Basic/arm_sve.td
+++ b/clang/include/clang/Basic/arm_sve.td
@@ -2447,3 +2447,13 @@ let SVETargetGuard = "sve2,faminmax", SMETargetGuard = 
"sme2,faminmax" in {
   defm SVAMIN : SInstZPZZ<"svamin", "hfd", "aarch64_sve_famin", 
"aarch64_sve_famin_u">;
   defm SVAMAX : SInstZPZZ<"svamax", "hfd", "aarch64_sve_famax", 
"aarch64_sve_famax_u">;
 }
+
+let SVETargetGuard = "sve2,fp8", SMETargetGuard = "sme2,fp8" in {
+  // 8-bit floating-point convert to BFloat16/Float16
+  def SVF1CVT : SInst<"svcvt1_{d}[_mf8]_fpm", "d~>", "bh", MergeNone, 
"aarch64_sve_fp8_cvt1", [VerifyRuntimeMode, SetsFPMR]>;
+  def SVF2CVT : SInst<"svcvt2_{d}[_mf8]_fpm", "d~>", "bh", MergeNone, 
"aarch64_sve_fp8_cvt2", [VerifyRuntimeMode, SetsFPMR]>;
+
+  // 8-bit floating-point convert to BFloat16/Float16 (top)
+  def SVF1CVTLT : SInst<"svcvtlt1_{d}[_mf8]_fpm", "d~>", "bh", MergeNone, 
"aarch64_sve_fp8_cvtlt1", [VerifyRuntimeMode, SetsFPMR]>;
+  def SVF2CVTLT : SInst<"svcvtlt2_{d}[_mf8]_fpm", "d~>", "bh", MergeNone, 
"aarch64_sve_fp8_cvtlt2", [VerifyRuntimeMode, SetsFPMR]>;
+}
diff --git a/clang/test/CodeGen/AArch64/fp8-intrinsics/acle_sve2_fp8_cvt.c 
b/clang/test/CodeGen/AArch64/fp8-intrinsics/acle_sve2_fp8_cvt.c
new file mode 100644
index 00..c026b8aa216f32
--- /dev/null
+++ b/clang/test/CodeGen/AArch64/fp8-intrinsics/acle_sve2_fp8_cvt.c
@@ -0,0 +1,173 @@
+// NOTE: Assertions have been autogenerated by utils/update_cc_test_checks.py 
UTC_ARGS: --version 5
+// RUN: %clang_cc1-triple aarch64-none-linux-gnu -target-feature +sve 
-target-feature +sve2 -target-feature +fp8 -disable-O0-optnone -Werror -Wall 
-emit-llvm -o - %s | opt -S -p mem2reg,instcombine,tailcallelim | FileCheck %s
+// RUN: %clang_cc1 -x c++ -triple aarch64-none-linux-gnu -target-feature +sme 
-target-feature +sme2 -target-feature +fp8 -disable-O0-optnone -Werror -Wall 
-emit-llvm -o - %s | opt -S -p mem2reg,instcombine,tailcallelim | FileCheck %s 
-check-prefix=CHECK-CXX
+
+// RUN: %clang_cc1 -DSME_OVERLOADED_FORMS-triple 
aarch64-none-linux-gnu -target-feature +sme -target-feature +sme2 
-target-feature +fp8 -disable-O0-optnone -Werror -Wall -emit-llvm -o - %s | opt 
-S -p mem2reg,instcombine,tailcallelim | FileCheck %s
+// RUN: %clang_cc1 -DSME_OVERLOADED_FORMS -x c++ -triple 
aarch64-none-linux-gnu -target-feature +sve -target-feature +sve2 
-target-feature +fp8 -disable-O0-optnone -Werror -Wall -emit-llvm -o - %s | opt 
-S -p mem2reg,instcombine,tailcallelim | FileCheck %s -check-prefix=CHECK-CXX
+
+// RUN: %clang_cc1 -triple aarch64-none-linux-gnu -target-feature +sve 
-target-feature +sve2 -target-feature +fp8 -S -disable-O0-optnone -Werror -Wall 
-o /dev/null %s
+// RUN: %clang_cc1 -triple aarch64-none-linux-gnu -target-feature +sme 
-target-feature +sme2 -target-feature +fp8 -S -disable-O0-optnone -Werror -Wall 
-o /dev/null %s
+
+// REQUIRES: aarch64-registered-target
+
+#ifdef __ARM_FEATURE_SME
+#include 
+#else
+#include 
+#endif
+
+#ifdef SVE_OVERLOADED_FORMS
+#define SVE_ACLE_FUNC(A1,A2_UNUSED,A3) A1##A3
+#else
+#define SVE_ACLE_FUNC(A1,A2,A3) A1##A2##A3
+#endif
+
+#ifdef __ARM_FEATURE_SME
+#define STREAMING __arm_streaming
+#else
+#define STREAMING
+#endif
+
+// CHECK-LABEL: define dso_local  @test_svcvt1_bf16_mf8(
+// CHECK-SAME:  [[ZN:%.*]], i64 noundef [[FPM:%.*]]) 
#[[ATTR0:[0-9]+]] {
+// CHECK-NEXT:  [[ENTRY:.*:]]
+// CHECK-NEXT:tail call void @llvm.aarch64.set.fpmr(i64 [[FPM]])
+// CHECK-NEXT:[[TMP0:%.*]] = tail call  
@llvm.aarch64.sve.fp8.cvt1.nxv8bf16( [[ZN]])
+// CHECK-NEXT:ret  [[TMP0]]
+//
+// CHECK-CXX-LABEL: define dso_local  
@_Z20test_svcvt1_bf16_mf8u13__SVMfloat8_tm(
+// CHECK-CXX-SAME:  [[ZN:%.*]], i64 noundef [[FPM:%.*]]) 
#[[ATTR0:[0-9]+]] {
+// CHECK-CXX-NEXT:  [[ENTRY:.*:]]
+// CHECK-CXX-NEXT:tail call void @llvm.aarch64.set.fpmr(i64 [[FPM]])
+// CHECK-CXX-NEXT:[[TMP0:%.*]] = tail call  

[clang] [llvm] [AArch64] Implement FP8 SVE Intrinsics for narrowing conversions (PR #118124)

2024-11-29 Thread via cfe-commits

llvmbot wrote:




@llvm/pr-subscribers-backend-aarch64

Author: Momchil Velikov (momchil-velikov)


Changes



---

Patch is 33.78 KiB, truncated to 20.00 KiB below, full version: 
https://github.com/llvm/llvm-project/pull/118124.diff


9 Files Affected:

- (modified) clang/include/clang/Basic/arm_sve.td (+17) 
- (added) clang/test/CodeGen/AArch64/fp8-intrinsics/acle_sve2_fp8_cvt.c (+173) 
- (added) clang/test/CodeGen/AArch64/fp8-intrinsics/acle_sve2_fp8_cvtn.c (+101) 
- (added) clang/test/Sema/aarch64-sve2-intrinsics/acle_sve2_fp8.c (+33) 
- (modified) llvm/include/llvm/IR/IntrinsicsAArch64.td (+30) 
- (modified) llvm/lib/Target/AArch64/AArch64SVEInstrInfo.td (+13-12) 
- (modified) llvm/lib/Target/AArch64/SVEInstrFormats.td (+42-2) 
- (added) llvm/test/CodeGen/AArch64/fp8-sve-cvt-cvtlt.ll (+78) 
- (added) llvm/test/CodeGen/AArch64/fp8-sve-cvtn.ll (+49) 


``diff
diff --git a/clang/include/clang/Basic/arm_sve.td 
b/clang/include/clang/Basic/arm_sve.td
index b36e592042da0b..efba2d4671d819 100644
--- a/clang/include/clang/Basic/arm_sve.td
+++ b/clang/include/clang/Basic/arm_sve.td
@@ -2447,3 +2447,20 @@ let SVETargetGuard = "sve2,faminmax", SMETargetGuard = 
"sme2,faminmax" in {
   defm SVAMIN : SInstZPZZ<"svamin", "hfd", "aarch64_sve_famin", 
"aarch64_sve_famin_u">;
   defm SVAMAX : SInstZPZZ<"svamax", "hfd", "aarch64_sve_famax", 
"aarch64_sve_famax_u">;
 }
+
+let SVETargetGuard = "sve2,fp8", SMETargetGuard = "sme2,fp8" in {
+  // 8-bit floating-point convert to BFloat16/Float16
+  def SVF1CVT : SInst<"svcvt1_{d}[_mf8]_fpm", "d~>", "bh", MergeNone, 
"aarch64_sve_fp8_cvt1", [VerifyRuntimeMode, SetsFPMR]>;
+  def SVF2CVT : SInst<"svcvt2_{d}[_mf8]_fpm", "d~>", "bh", MergeNone, 
"aarch64_sve_fp8_cvt2", [VerifyRuntimeMode, SetsFPMR]>;
+
+  // 8-bit floating-point convert to BFloat16/Float16 (top)
+  def SVF1CVTLT : SInst<"svcvtlt1_{d}[_mf8]_fpm", "d~>", "bh", MergeNone, 
"aarch64_sve_fp8_cvtlt1", [VerifyRuntimeMode, SetsFPMR]>;
+  def SVF2CVTLT : SInst<"svcvtlt2_{d}[_mf8]_fpm", "d~>", "bh", MergeNone, 
"aarch64_sve_fp8_cvtlt2", [VerifyRuntimeMode, SetsFPMR]>;
+
+  // BFloat16/Float16 convert, narrow and interleave to 8-bit floating-point
+  def SVFCVTN : SInst<"svcvtn_mf8[_{d}_x2]_fpm", "~2>", "bh", MergeNone, 
"aarch64_sve_fp8_cvtn", [VerifyRuntimeMode, SetsFPMR]>;
+
+  // Single-precision convert, narrow and interleave to 8-bit floating-point 
(top and bottom)
+  def SVFCVTNB : SInst<"svcvtnb_mf8[_f32_x2]_fpm", "~2>",  "f", MergeNone, 
"aarch64_sve_fp8_cvtnb", [VerifyRuntimeMode, SetsFPMR]>;
+  def SVFCVTNT : SInst<"svcvtnt_mf8[_f32_x2]_fpm", "~~2>", "f", MergeNone, 
"aarch64_sve_fp8_cvtnt", [VerifyRuntimeMode, SetsFPMR]>;
+}
diff --git a/clang/test/CodeGen/AArch64/fp8-intrinsics/acle_sve2_fp8_cvt.c 
b/clang/test/CodeGen/AArch64/fp8-intrinsics/acle_sve2_fp8_cvt.c
new file mode 100644
index 00..c026b8aa216f32
--- /dev/null
+++ b/clang/test/CodeGen/AArch64/fp8-intrinsics/acle_sve2_fp8_cvt.c
@@ -0,0 +1,173 @@
+// NOTE: Assertions have been autogenerated by utils/update_cc_test_checks.py 
UTC_ARGS: --version 5
+// RUN: %clang_cc1-triple aarch64-none-linux-gnu -target-feature +sve 
-target-feature +sve2 -target-feature +fp8 -disable-O0-optnone -Werror -Wall 
-emit-llvm -o - %s | opt -S -p mem2reg,instcombine,tailcallelim | FileCheck %s
+// RUN: %clang_cc1 -x c++ -triple aarch64-none-linux-gnu -target-feature +sme 
-target-feature +sme2 -target-feature +fp8 -disable-O0-optnone -Werror -Wall 
-emit-llvm -o - %s | opt -S -p mem2reg,instcombine,tailcallelim | FileCheck %s 
-check-prefix=CHECK-CXX
+
+// RUN: %clang_cc1 -DSME_OVERLOADED_FORMS-triple 
aarch64-none-linux-gnu -target-feature +sme -target-feature +sme2 
-target-feature +fp8 -disable-O0-optnone -Werror -Wall -emit-llvm -o - %s | opt 
-S -p mem2reg,instcombine,tailcallelim | FileCheck %s
+// RUN: %clang_cc1 -DSME_OVERLOADED_FORMS -x c++ -triple 
aarch64-none-linux-gnu -target-feature +sve -target-feature +sve2 
-target-feature +fp8 -disable-O0-optnone -Werror -Wall -emit-llvm -o - %s | opt 
-S -p mem2reg,instcombine,tailcallelim | FileCheck %s -check-prefix=CHECK-CXX
+
+// RUN: %clang_cc1 -triple aarch64-none-linux-gnu -target-feature +sve 
-target-feature +sve2 -target-feature +fp8 -S -disable-O0-optnone -Werror -Wall 
-o /dev/null %s
+// RUN: %clang_cc1 -triple aarch64-none-linux-gnu -target-feature +sme 
-target-feature +sme2 -target-feature +fp8 -S -disable-O0-optnone -Werror -Wall 
-o /dev/null %s
+
+// REQUIRES: aarch64-registered-target
+
+#ifdef __ARM_FEATURE_SME
+#include 
+#else
+#include 
+#endif
+
+#ifdef SVE_OVERLOADED_FORMS
+#define SVE_ACLE_FUNC(A1,A2_UNUSED,A3) A1##A3
+#else
+#define SVE_ACLE_FUNC(A1,A2,A3) A1##A2##A3
+#endif
+
+#ifdef __ARM_FEATURE_SME
+#define STREAMING __arm_streaming
+#else
+#define STREAMING
+#endif
+
+// CHECK-LABEL: define dso_local  @test_svcvt1_bf16_mf8(
+// CHECK-SAME:  [[ZN:%.*]], i64 noundef [[FPM:%.*]]) 
#[[ATTR0:[0-9]+]] {
+// CHECK-NEXT:  [[ENTRY:.*:]]
+// CHECK-NEXT:tail

[clang] [llvm] [AArch64] Implement FP8 SVE Intrinsics for narrowing conversions (PR #118124)

2024-11-29 Thread via cfe-commits

llvmbot wrote:




@llvm/pr-subscribers-clang

Author: Momchil Velikov (momchil-velikov)


Changes



---

Patch is 33.78 KiB, truncated to 20.00 KiB below, full version: 
https://github.com/llvm/llvm-project/pull/118124.diff


9 Files Affected:

- (modified) clang/include/clang/Basic/arm_sve.td (+17) 
- (added) clang/test/CodeGen/AArch64/fp8-intrinsics/acle_sve2_fp8_cvt.c (+173) 
- (added) clang/test/CodeGen/AArch64/fp8-intrinsics/acle_sve2_fp8_cvtn.c (+101) 
- (added) clang/test/Sema/aarch64-sve2-intrinsics/acle_sve2_fp8.c (+33) 
- (modified) llvm/include/llvm/IR/IntrinsicsAArch64.td (+30) 
- (modified) llvm/lib/Target/AArch64/AArch64SVEInstrInfo.td (+13-12) 
- (modified) llvm/lib/Target/AArch64/SVEInstrFormats.td (+42-2) 
- (added) llvm/test/CodeGen/AArch64/fp8-sve-cvt-cvtlt.ll (+78) 
- (added) llvm/test/CodeGen/AArch64/fp8-sve-cvtn.ll (+49) 


``diff
diff --git a/clang/include/clang/Basic/arm_sve.td 
b/clang/include/clang/Basic/arm_sve.td
index b36e592042da0b..efba2d4671d819 100644
--- a/clang/include/clang/Basic/arm_sve.td
+++ b/clang/include/clang/Basic/arm_sve.td
@@ -2447,3 +2447,20 @@ let SVETargetGuard = "sve2,faminmax", SMETargetGuard = 
"sme2,faminmax" in {
   defm SVAMIN : SInstZPZZ<"svamin", "hfd", "aarch64_sve_famin", 
"aarch64_sve_famin_u">;
   defm SVAMAX : SInstZPZZ<"svamax", "hfd", "aarch64_sve_famax", 
"aarch64_sve_famax_u">;
 }
+
+let SVETargetGuard = "sve2,fp8", SMETargetGuard = "sme2,fp8" in {
+  // 8-bit floating-point convert to BFloat16/Float16
+  def SVF1CVT : SInst<"svcvt1_{d}[_mf8]_fpm", "d~>", "bh", MergeNone, 
"aarch64_sve_fp8_cvt1", [VerifyRuntimeMode, SetsFPMR]>;
+  def SVF2CVT : SInst<"svcvt2_{d}[_mf8]_fpm", "d~>", "bh", MergeNone, 
"aarch64_sve_fp8_cvt2", [VerifyRuntimeMode, SetsFPMR]>;
+
+  // 8-bit floating-point convert to BFloat16/Float16 (top)
+  def SVF1CVTLT : SInst<"svcvtlt1_{d}[_mf8]_fpm", "d~>", "bh", MergeNone, 
"aarch64_sve_fp8_cvtlt1", [VerifyRuntimeMode, SetsFPMR]>;
+  def SVF2CVTLT : SInst<"svcvtlt2_{d}[_mf8]_fpm", "d~>", "bh", MergeNone, 
"aarch64_sve_fp8_cvtlt2", [VerifyRuntimeMode, SetsFPMR]>;
+
+  // BFloat16/Float16 convert, narrow and interleave to 8-bit floating-point
+  def SVFCVTN : SInst<"svcvtn_mf8[_{d}_x2]_fpm", "~2>", "bh", MergeNone, 
"aarch64_sve_fp8_cvtn", [VerifyRuntimeMode, SetsFPMR]>;
+
+  // Single-precision convert, narrow and interleave to 8-bit floating-point 
(top and bottom)
+  def SVFCVTNB : SInst<"svcvtnb_mf8[_f32_x2]_fpm", "~2>",  "f", MergeNone, 
"aarch64_sve_fp8_cvtnb", [VerifyRuntimeMode, SetsFPMR]>;
+  def SVFCVTNT : SInst<"svcvtnt_mf8[_f32_x2]_fpm", "~~2>", "f", MergeNone, 
"aarch64_sve_fp8_cvtnt", [VerifyRuntimeMode, SetsFPMR]>;
+}
diff --git a/clang/test/CodeGen/AArch64/fp8-intrinsics/acle_sve2_fp8_cvt.c 
b/clang/test/CodeGen/AArch64/fp8-intrinsics/acle_sve2_fp8_cvt.c
new file mode 100644
index 00..c026b8aa216f32
--- /dev/null
+++ b/clang/test/CodeGen/AArch64/fp8-intrinsics/acle_sve2_fp8_cvt.c
@@ -0,0 +1,173 @@
+// NOTE: Assertions have been autogenerated by utils/update_cc_test_checks.py 
UTC_ARGS: --version 5
+// RUN: %clang_cc1-triple aarch64-none-linux-gnu -target-feature +sve 
-target-feature +sve2 -target-feature +fp8 -disable-O0-optnone -Werror -Wall 
-emit-llvm -o - %s | opt -S -p mem2reg,instcombine,tailcallelim | FileCheck %s
+// RUN: %clang_cc1 -x c++ -triple aarch64-none-linux-gnu -target-feature +sme 
-target-feature +sme2 -target-feature +fp8 -disable-O0-optnone -Werror -Wall 
-emit-llvm -o - %s | opt -S -p mem2reg,instcombine,tailcallelim | FileCheck %s 
-check-prefix=CHECK-CXX
+
+// RUN: %clang_cc1 -DSME_OVERLOADED_FORMS-triple 
aarch64-none-linux-gnu -target-feature +sme -target-feature +sme2 
-target-feature +fp8 -disable-O0-optnone -Werror -Wall -emit-llvm -o - %s | opt 
-S -p mem2reg,instcombine,tailcallelim | FileCheck %s
+// RUN: %clang_cc1 -DSME_OVERLOADED_FORMS -x c++ -triple 
aarch64-none-linux-gnu -target-feature +sve -target-feature +sve2 
-target-feature +fp8 -disable-O0-optnone -Werror -Wall -emit-llvm -o - %s | opt 
-S -p mem2reg,instcombine,tailcallelim | FileCheck %s -check-prefix=CHECK-CXX
+
+// RUN: %clang_cc1 -triple aarch64-none-linux-gnu -target-feature +sve 
-target-feature +sve2 -target-feature +fp8 -S -disable-O0-optnone -Werror -Wall 
-o /dev/null %s
+// RUN: %clang_cc1 -triple aarch64-none-linux-gnu -target-feature +sme 
-target-feature +sme2 -target-feature +fp8 -S -disable-O0-optnone -Werror -Wall 
-o /dev/null %s
+
+// REQUIRES: aarch64-registered-target
+
+#ifdef __ARM_FEATURE_SME
+#include 
+#else
+#include 
+#endif
+
+#ifdef SVE_OVERLOADED_FORMS
+#define SVE_ACLE_FUNC(A1,A2_UNUSED,A3) A1##A3
+#else
+#define SVE_ACLE_FUNC(A1,A2,A3) A1##A2##A3
+#endif
+
+#ifdef __ARM_FEATURE_SME
+#define STREAMING __arm_streaming
+#else
+#define STREAMING
+#endif
+
+// CHECK-LABEL: define dso_local  @test_svcvt1_bf16_mf8(
+// CHECK-SAME:  [[ZN:%.*]], i64 noundef [[FPM:%.*]]) 
#[[ATTR0:[0-9]+]] {
+// CHECK-NEXT:  [[ENTRY:.*:]]
+// CHECK-NEXT:tail call void