[clang] [llvm] [AArch64] Add intrinsics for non-widening FMOPA/FMOPS (PR #88105)

2024-05-10 Thread Momchil Velikov via cfe-commits

https://github.com/momchil-velikov closed 
https://github.com/llvm/llvm-project/pull/88105
___
cfe-commits mailing list
cfe-commits@lists.llvm.org
https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits


[clang] [llvm] [AArch64] Add intrinsics for non-widening FMOPA/FMOPS (PR #88105)

2024-05-09 Thread Momchil Velikov via cfe-commits

https://github.com/momchil-velikov updated 
https://github.com/llvm/llvm-project/pull/88105

>From 8a63b17711d36cfeb4aab591853163119f5f167d Mon Sep 17 00:00:00 2001
From: Momchil Velikov 
Date: Tue, 9 Apr 2024 10:52:41 +0100
Subject: [PATCH 1/4] [AArch64] Add intrinsics for non-widening FMOPA/FMOPS

According to the specification in
https://github.com/ARM-software/acle/pull/309 this adds the intrinsics

void svmopa_za16[_f16]_m(uint64_t tile, svbool_t pn, svbool_t pm,
 svfloat16_t zn, svfloat16_t zm)
__arm_streaming __arm_inout("za");
void svmops_za16[_f16]_m(uint64_t tile, svbool_t pn,
 svbool_t pm, svfloat16_t zn, svfloat16_t zm)
__arm_streaming __arm_inout("za");

as well as the corresponding `bf16` variants.
---
 clang/include/clang/Basic/arm_sme.td  | 24 +
 .../acle_sme2_mopa_nonwide.c  | 97 +++
 .../acle_sme2_mopa_nonwide.c  | 34 +++
 llvm/include/llvm/IR/IntrinsicsAArch64.td |  3 +
 .../lib/Target/AArch64/AArch64SMEInstrInfo.td | 10 +-
 llvm/lib/Target/AArch64/SMEInstrFormats.td| 16 ++-
 .../CodeGen/AArch64/sme2-intrinsics-mopa.ll   | 42 
 7 files changed, 219 insertions(+), 7 deletions(-)
 create mode 100644 
clang/test/CodeGen/aarch64-sme2-intrinsics/acle_sme2_mopa_nonwide.c
 create mode 100644 
clang/test/Sema/aarch64-sme2-intrinsics/acle_sme2_mopa_nonwide.c
 create mode 100644 llvm/test/CodeGen/AArch64/sme2-intrinsics-mopa.ll

diff --git a/clang/include/clang/Basic/arm_sme.td 
b/clang/include/clang/Basic/arm_sme.td
index 1ac6d5170ea2..a18a5094a15e 100644
--- a/clang/include/clang/Basic/arm_sme.td
+++ b/clang/include/clang/Basic/arm_sme.td
@@ -674,3 +674,27 @@ let TargetGuard = "sme2" in {
   def SVLUTI2_LANE_ZT_X2 : Inst<"svluti2_lane_zt_{d}_x2", "2.di[i", 
"cUcsUsiUibhf", MergeNone, "aarch64_sme_luti2_lane_zt_x2", [IsStreaming, 
IsInZT0], [ImmCheck<0, ImmCheck0_0>, ImmCheck<2, ImmCheck0_7>]>;
   def SVLUTI4_LANE_ZT_X2 : Inst<"svluti4_lane_zt_{d}_x2", "2.di[i", 
"cUcsUsiUibhf", MergeNone, "aarch64_sme_luti4_lane_zt_x2", [IsStreaming, 
IsInZT0], [ImmCheck<0, ImmCheck0_0>, ImmCheck<2, ImmCheck0_3>]>;
 }
+
+
+// SME2p1 - FMOPA, FMOPS (non-widening)
+let TargetGuard = "sme2,b16b16" in {
+  def SVMOPA_BF16_NW : SInst<"svmopa_za16[_bf16]_m", "viPPdd", "b",
+ MergeNone, "aarch64_sme_mopa",
+ [IsStreaming, IsInOutZA],
+ [ImmCheck<0, ImmCheck0_1>]>;
+  def SVMOPS_BF16_NW : SInst<"svmops_za16[_bf16]_m", "viPPdd", "b",
+ MergeNone, "aarch64_sme_mops",
+ [IsStreaming, IsInOutZA],
+ [ImmCheck<0, ImmCheck0_1>]>;
+}
+
+let TargetGuard = "sme2p1,sme-f16f16" in {
+  def SVMOPA_F16_NW : SInst<"svmopa_za16[_f16]_m", "viPPdd", "h",
+MergeNone, "aarch64_sme_mopa",
+[IsStreaming, IsInOutZA],
+[ImmCheck<0, ImmCheck0_1>]>;
+  def SVMOPS_F16_NW : SInst<"svmops_za16[_f16]_m", "viPPdd", "h",
+MergeNone, "aarch64_sme_mops",
+[IsStreaming, IsInOutZA],
+[ImmCheck<0, ImmCheck0_1>]>;
+}
diff --git 
a/clang/test/CodeGen/aarch64-sme2-intrinsics/acle_sme2_mopa_nonwide.c 
b/clang/test/CodeGen/aarch64-sme2-intrinsics/acle_sme2_mopa_nonwide.c
new file mode 100644
index ..40fcad6a5764
--- /dev/null
+++ b/clang/test/CodeGen/aarch64-sme2-intrinsics/acle_sme2_mopa_nonwide.c
@@ -0,0 +1,97 @@
+// NOTE: Assertions have been autogenerated by utils/update_cc_test_checks.py 
UTC_ARGS: --version 4
+// RUN: %clang_cc1   -fclang-abi-compat=latest 
-triple aarch64-none-linux-gnu -target-feature +sme2p1 -target-feature  +b16b16 
-target-feature +sme-f16f16 -S -O2 -Werror -emit-llvm -o - %s | FileCheck %s 
-check-prefixes=CHECK
+// RUN: %clang_cc1-x c++ -fclang-abi-compat=latest 
-triple aarch64-none-linux-gnu -target-feature +sme2p1 -target-feature  +b16b16 
-target-feature +sme-f16f16 -S -O2 -Werror -emit-llvm -o - %s | FileCheck %s 
-check-prefixes=CHECK-CXX
+// RUN: %clang_cc1 -DSME_OVERLOADED_FORMS-fclang-abi-compat=latest 
-triple aarch64-none-linux-gnu -target-feature +sme2p1 -target-feature  +b16b16 
-target-feature +sme-f16f16 -S -O2 -Werror -emit-llvm -o - %s | FileCheck %s 
-check-prefixes=CHECK
+// RUN: %clang_cc1 -DSME_OVERLOADED_FORMS -x c++ -fclang-abi-compat=latest 
-triple aarch64-none-linux-gnu -target-feature +sme2p1 -target-feature  +b16b16 
-target-feature +sme-f16f16 -S -O2 -Werror -emit-llvm -o - %s | FileCheck %s 
-check-prefixes=CHECK-CXX
+
+// RUN: %clang_cc1 -DSME_OVERLOADED_FORMS -x c++ -fclang-abi-compat=latest 
-triple aarch64-none-linux-gnu -target-feature +sme2p1 -target-feature  

[clang] [llvm] [AArch64] Add intrinsics for non-widening FMOPA/FMOPS (PR #88105)

2024-04-30 Thread Momchil Velikov via cfe-commits

https://github.com/momchil-velikov updated 
https://github.com/llvm/llvm-project/pull/88105

>From 74ee4857a76bc7eb5353dc22311e766ec5356514 Mon Sep 17 00:00:00 2001
From: Momchil Velikov 
Date: Tue, 9 Apr 2024 10:52:41 +0100
Subject: [PATCH 1/3] [AArch64] Add intrinsics for non-widening FMOPA/FMOPS

According to the specification in
https://github.com/ARM-software/acle/pull/309 this adds the intrinsics

void svmopa_za16[_f16]_m(uint64_t tile, svbool_t pn, svbool_t pm,
 svfloat16_t zn, svfloat16_t zm)
__arm_streaming __arm_inout("za");
void svmops_za16[_f16]_m(uint64_t tile, svbool_t pn,
 svbool_t pm, svfloat16_t zn, svfloat16_t zm)
__arm_streaming __arm_inout("za");

as well as the corresponding `bf16` variants.
---
 clang/include/clang/Basic/arm_sme.td  | 24 +
 .../acle_sme2_mopa_nonwide.c  | 97 +++
 .../acle_sme2_mopa_nonwide.c  | 34 +++
 llvm/include/llvm/IR/IntrinsicsAArch64.td |  3 +
 .../lib/Target/AArch64/AArch64SMEInstrInfo.td | 10 +-
 llvm/lib/Target/AArch64/SMEInstrFormats.td| 16 ++-
 .../CodeGen/AArch64/sme2-intrinsics-mopa.ll   | 42 
 7 files changed, 219 insertions(+), 7 deletions(-)
 create mode 100644 
clang/test/CodeGen/aarch64-sme2-intrinsics/acle_sme2_mopa_nonwide.c
 create mode 100644 
clang/test/Sema/aarch64-sme2-intrinsics/acle_sme2_mopa_nonwide.c
 create mode 100644 llvm/test/CodeGen/AArch64/sme2-intrinsics-mopa.ll

diff --git a/clang/include/clang/Basic/arm_sme.td 
b/clang/include/clang/Basic/arm_sme.td
index 1ac6d5170ea283..a18a5094a15ed3 100644
--- a/clang/include/clang/Basic/arm_sme.td
+++ b/clang/include/clang/Basic/arm_sme.td
@@ -674,3 +674,27 @@ let TargetGuard = "sme2" in {
   def SVLUTI2_LANE_ZT_X2 : Inst<"svluti2_lane_zt_{d}_x2", "2.di[i", 
"cUcsUsiUibhf", MergeNone, "aarch64_sme_luti2_lane_zt_x2", [IsStreaming, 
IsInZT0], [ImmCheck<0, ImmCheck0_0>, ImmCheck<2, ImmCheck0_7>]>;
   def SVLUTI4_LANE_ZT_X2 : Inst<"svluti4_lane_zt_{d}_x2", "2.di[i", 
"cUcsUsiUibhf", MergeNone, "aarch64_sme_luti4_lane_zt_x2", [IsStreaming, 
IsInZT0], [ImmCheck<0, ImmCheck0_0>, ImmCheck<2, ImmCheck0_3>]>;
 }
+
+
+// SME2p1 - FMOPA, FMOPS (non-widening)
+let TargetGuard = "sme2,b16b16" in {
+  def SVMOPA_BF16_NW : SInst<"svmopa_za16[_bf16]_m", "viPPdd", "b",
+ MergeNone, "aarch64_sme_mopa",
+ [IsStreaming, IsInOutZA],
+ [ImmCheck<0, ImmCheck0_1>]>;
+  def SVMOPS_BF16_NW : SInst<"svmops_za16[_bf16]_m", "viPPdd", "b",
+ MergeNone, "aarch64_sme_mops",
+ [IsStreaming, IsInOutZA],
+ [ImmCheck<0, ImmCheck0_1>]>;
+}
+
+let TargetGuard = "sme2p1,sme-f16f16" in {
+  def SVMOPA_F16_NW : SInst<"svmopa_za16[_f16]_m", "viPPdd", "h",
+MergeNone, "aarch64_sme_mopa",
+[IsStreaming, IsInOutZA],
+[ImmCheck<0, ImmCheck0_1>]>;
+  def SVMOPS_F16_NW : SInst<"svmops_za16[_f16]_m", "viPPdd", "h",
+MergeNone, "aarch64_sme_mops",
+[IsStreaming, IsInOutZA],
+[ImmCheck<0, ImmCheck0_1>]>;
+}
diff --git 
a/clang/test/CodeGen/aarch64-sme2-intrinsics/acle_sme2_mopa_nonwide.c 
b/clang/test/CodeGen/aarch64-sme2-intrinsics/acle_sme2_mopa_nonwide.c
new file mode 100644
index 00..40fcad6a576483
--- /dev/null
+++ b/clang/test/CodeGen/aarch64-sme2-intrinsics/acle_sme2_mopa_nonwide.c
@@ -0,0 +1,97 @@
+// NOTE: Assertions have been autogenerated by utils/update_cc_test_checks.py 
UTC_ARGS: --version 4
+// RUN: %clang_cc1   -fclang-abi-compat=latest 
-triple aarch64-none-linux-gnu -target-feature +sme2p1 -target-feature  +b16b16 
-target-feature +sme-f16f16 -S -O2 -Werror -emit-llvm -o - %s | FileCheck %s 
-check-prefixes=CHECK
+// RUN: %clang_cc1-x c++ -fclang-abi-compat=latest 
-triple aarch64-none-linux-gnu -target-feature +sme2p1 -target-feature  +b16b16 
-target-feature +sme-f16f16 -S -O2 -Werror -emit-llvm -o - %s | FileCheck %s 
-check-prefixes=CHECK-CXX
+// RUN: %clang_cc1 -DSME_OVERLOADED_FORMS-fclang-abi-compat=latest 
-triple aarch64-none-linux-gnu -target-feature +sme2p1 -target-feature  +b16b16 
-target-feature +sme-f16f16 -S -O2 -Werror -emit-llvm -o - %s | FileCheck %s 
-check-prefixes=CHECK
+// RUN: %clang_cc1 -DSME_OVERLOADED_FORMS -x c++ -fclang-abi-compat=latest 
-triple aarch64-none-linux-gnu -target-feature +sme2p1 -target-feature  +b16b16 
-target-feature +sme-f16f16 -S -O2 -Werror -emit-llvm -o - %s | FileCheck %s 
-check-prefixes=CHECK-CXX
+
+// RUN: %clang_cc1 -DSME_OVERLOADED_FORMS -x c++ -fclang-abi-compat=latest 
-triple aarch64-none-linux-gnu -target-feature +sme2p1 

[clang] [llvm] [AArch64] Add intrinsics for non-widening FMOPA/FMOPS (PR #88105)

2024-04-29 Thread Momchil Velikov via cfe-commits

https://github.com/momchil-velikov updated 
https://github.com/llvm/llvm-project/pull/88105

>From 3ea7ee0aaf7f8be8c2ee42af92ba3b13b8212645 Mon Sep 17 00:00:00 2001
From: Momchil Velikov 
Date: Tue, 9 Apr 2024 10:52:41 +0100
Subject: [PATCH 1/3] [AArch64] Add intrinsics for non-widening FMOPA/FMOPS

According to the specification in
https://github.com/ARM-software/acle/pull/309 this adds the intrinsics

void svmopa_za16[_f16]_m(uint64_t tile, svbool_t pn, svbool_t pm,
 svfloat16_t zn, svfloat16_t zm)
__arm_streaming __arm_inout("za");
void svmops_za16[_f16]_m(uint64_t tile, svbool_t pn,
 svbool_t pm, svfloat16_t zn, svfloat16_t zm)
__arm_streaming __arm_inout("za");

as well as the corresponding `bf16` variants.
---
 clang/include/clang/Basic/arm_sme.td  | 24 +
 .../acle_sme2_mopa_nonwide.c  | 97 +++
 .../acle_sme2_mopa_nonwide.c  | 34 +++
 llvm/include/llvm/IR/IntrinsicsAArch64.td |  3 +
 .../lib/Target/AArch64/AArch64SMEInstrInfo.td | 10 +-
 llvm/lib/Target/AArch64/SMEInstrFormats.td| 16 ++-
 .../CodeGen/AArch64/sme2-intrinsics-mopa.ll   | 42 
 7 files changed, 219 insertions(+), 7 deletions(-)
 create mode 100644 
clang/test/CodeGen/aarch64-sme2-intrinsics/acle_sme2_mopa_nonwide.c
 create mode 100644 
clang/test/Sema/aarch64-sme2-intrinsics/acle_sme2_mopa_nonwide.c
 create mode 100644 llvm/test/CodeGen/AArch64/sme2-intrinsics-mopa.ll

diff --git a/clang/include/clang/Basic/arm_sme.td 
b/clang/include/clang/Basic/arm_sme.td
index 1ac6d5170ea283..a18a5094a15ed3 100644
--- a/clang/include/clang/Basic/arm_sme.td
+++ b/clang/include/clang/Basic/arm_sme.td
@@ -674,3 +674,27 @@ let TargetGuard = "sme2" in {
   def SVLUTI2_LANE_ZT_X2 : Inst<"svluti2_lane_zt_{d}_x2", "2.di[i", 
"cUcsUsiUibhf", MergeNone, "aarch64_sme_luti2_lane_zt_x2", [IsStreaming, 
IsInZT0], [ImmCheck<0, ImmCheck0_0>, ImmCheck<2, ImmCheck0_7>]>;
   def SVLUTI4_LANE_ZT_X2 : Inst<"svluti4_lane_zt_{d}_x2", "2.di[i", 
"cUcsUsiUibhf", MergeNone, "aarch64_sme_luti4_lane_zt_x2", [IsStreaming, 
IsInZT0], [ImmCheck<0, ImmCheck0_0>, ImmCheck<2, ImmCheck0_3>]>;
 }
+
+
+// SME2p1 - FMOPA, FMOPS (non-widening)
+let TargetGuard = "sme2,b16b16" in {
+  def SVMOPA_BF16_NW : SInst<"svmopa_za16[_bf16]_m", "viPPdd", "b",
+ MergeNone, "aarch64_sme_mopa",
+ [IsStreaming, IsInOutZA],
+ [ImmCheck<0, ImmCheck0_1>]>;
+  def SVMOPS_BF16_NW : SInst<"svmops_za16[_bf16]_m", "viPPdd", "b",
+ MergeNone, "aarch64_sme_mops",
+ [IsStreaming, IsInOutZA],
+ [ImmCheck<0, ImmCheck0_1>]>;
+}
+
+let TargetGuard = "sme2p1,sme-f16f16" in {
+  def SVMOPA_F16_NW : SInst<"svmopa_za16[_f16]_m", "viPPdd", "h",
+MergeNone, "aarch64_sme_mopa",
+[IsStreaming, IsInOutZA],
+[ImmCheck<0, ImmCheck0_1>]>;
+  def SVMOPS_F16_NW : SInst<"svmops_za16[_f16]_m", "viPPdd", "h",
+MergeNone, "aarch64_sme_mops",
+[IsStreaming, IsInOutZA],
+[ImmCheck<0, ImmCheck0_1>]>;
+}
diff --git 
a/clang/test/CodeGen/aarch64-sme2-intrinsics/acle_sme2_mopa_nonwide.c 
b/clang/test/CodeGen/aarch64-sme2-intrinsics/acle_sme2_mopa_nonwide.c
new file mode 100644
index 00..40fcad6a576483
--- /dev/null
+++ b/clang/test/CodeGen/aarch64-sme2-intrinsics/acle_sme2_mopa_nonwide.c
@@ -0,0 +1,97 @@
+// NOTE: Assertions have been autogenerated by utils/update_cc_test_checks.py 
UTC_ARGS: --version 4
+// RUN: %clang_cc1   -fclang-abi-compat=latest 
-triple aarch64-none-linux-gnu -target-feature +sme2p1 -target-feature  +b16b16 
-target-feature +sme-f16f16 -S -O2 -Werror -emit-llvm -o - %s | FileCheck %s 
-check-prefixes=CHECK
+// RUN: %clang_cc1-x c++ -fclang-abi-compat=latest 
-triple aarch64-none-linux-gnu -target-feature +sme2p1 -target-feature  +b16b16 
-target-feature +sme-f16f16 -S -O2 -Werror -emit-llvm -o - %s | FileCheck %s 
-check-prefixes=CHECK-CXX
+// RUN: %clang_cc1 -DSME_OVERLOADED_FORMS-fclang-abi-compat=latest 
-triple aarch64-none-linux-gnu -target-feature +sme2p1 -target-feature  +b16b16 
-target-feature +sme-f16f16 -S -O2 -Werror -emit-llvm -o - %s | FileCheck %s 
-check-prefixes=CHECK
+// RUN: %clang_cc1 -DSME_OVERLOADED_FORMS -x c++ -fclang-abi-compat=latest 
-triple aarch64-none-linux-gnu -target-feature +sme2p1 -target-feature  +b16b16 
-target-feature +sme-f16f16 -S -O2 -Werror -emit-llvm -o - %s | FileCheck %s 
-check-prefixes=CHECK-CXX
+
+// RUN: %clang_cc1 -DSME_OVERLOADED_FORMS -x c++ -fclang-abi-compat=latest 
-triple aarch64-none-linux-gnu -target-feature +sme2p1 

[clang] [llvm] [AArch64] Add intrinsics for non-widening FMOPA/FMOPS (PR #88105)

2024-04-29 Thread Momchil Velikov via cfe-commits

https://github.com/momchil-velikov updated 
https://github.com/llvm/llvm-project/pull/88105

>From 3ea7ee0aaf7f8be8c2ee42af92ba3b13b8212645 Mon Sep 17 00:00:00 2001
From: Momchil Velikov 
Date: Tue, 9 Apr 2024 10:52:41 +0100
Subject: [PATCH 1/2] [AArch64] Add intrinsics for non-widening FMOPA/FMOPS

According to the specification in
https://github.com/ARM-software/acle/pull/309 this adds the intrinsics

void svmopa_za16[_f16]_m(uint64_t tile, svbool_t pn, svbool_t pm,
 svfloat16_t zn, svfloat16_t zm)
__arm_streaming __arm_inout("za");
void svmops_za16[_f16]_m(uint64_t tile, svbool_t pn,
 svbool_t pm, svfloat16_t zn, svfloat16_t zm)
__arm_streaming __arm_inout("za");

as well as the corresponding `bf16` variants.
---
 clang/include/clang/Basic/arm_sme.td  | 24 +
 .../acle_sme2_mopa_nonwide.c  | 97 +++
 .../acle_sme2_mopa_nonwide.c  | 34 +++
 llvm/include/llvm/IR/IntrinsicsAArch64.td |  3 +
 .../lib/Target/AArch64/AArch64SMEInstrInfo.td | 10 +-
 llvm/lib/Target/AArch64/SMEInstrFormats.td| 16 ++-
 .../CodeGen/AArch64/sme2-intrinsics-mopa.ll   | 42 
 7 files changed, 219 insertions(+), 7 deletions(-)
 create mode 100644 
clang/test/CodeGen/aarch64-sme2-intrinsics/acle_sme2_mopa_nonwide.c
 create mode 100644 
clang/test/Sema/aarch64-sme2-intrinsics/acle_sme2_mopa_nonwide.c
 create mode 100644 llvm/test/CodeGen/AArch64/sme2-intrinsics-mopa.ll

diff --git a/clang/include/clang/Basic/arm_sme.td 
b/clang/include/clang/Basic/arm_sme.td
index 1ac6d5170ea283..a18a5094a15ed3 100644
--- a/clang/include/clang/Basic/arm_sme.td
+++ b/clang/include/clang/Basic/arm_sme.td
@@ -674,3 +674,27 @@ let TargetGuard = "sme2" in {
   def SVLUTI2_LANE_ZT_X2 : Inst<"svluti2_lane_zt_{d}_x2", "2.di[i", 
"cUcsUsiUibhf", MergeNone, "aarch64_sme_luti2_lane_zt_x2", [IsStreaming, 
IsInZT0], [ImmCheck<0, ImmCheck0_0>, ImmCheck<2, ImmCheck0_7>]>;
   def SVLUTI4_LANE_ZT_X2 : Inst<"svluti4_lane_zt_{d}_x2", "2.di[i", 
"cUcsUsiUibhf", MergeNone, "aarch64_sme_luti4_lane_zt_x2", [IsStreaming, 
IsInZT0], [ImmCheck<0, ImmCheck0_0>, ImmCheck<2, ImmCheck0_3>]>;
 }
+
+
+// SME2p1 - FMOPA, FMOPS (non-widening)
+let TargetGuard = "sme2,b16b16" in {
+  def SVMOPA_BF16_NW : SInst<"svmopa_za16[_bf16]_m", "viPPdd", "b",
+ MergeNone, "aarch64_sme_mopa",
+ [IsStreaming, IsInOutZA],
+ [ImmCheck<0, ImmCheck0_1>]>;
+  def SVMOPS_BF16_NW : SInst<"svmops_za16[_bf16]_m", "viPPdd", "b",
+ MergeNone, "aarch64_sme_mops",
+ [IsStreaming, IsInOutZA],
+ [ImmCheck<0, ImmCheck0_1>]>;
+}
+
+let TargetGuard = "sme2p1,sme-f16f16" in {
+  def SVMOPA_F16_NW : SInst<"svmopa_za16[_f16]_m", "viPPdd", "h",
+MergeNone, "aarch64_sme_mopa",
+[IsStreaming, IsInOutZA],
+[ImmCheck<0, ImmCheck0_1>]>;
+  def SVMOPS_F16_NW : SInst<"svmops_za16[_f16]_m", "viPPdd", "h",
+MergeNone, "aarch64_sme_mops",
+[IsStreaming, IsInOutZA],
+[ImmCheck<0, ImmCheck0_1>]>;
+}
diff --git 
a/clang/test/CodeGen/aarch64-sme2-intrinsics/acle_sme2_mopa_nonwide.c 
b/clang/test/CodeGen/aarch64-sme2-intrinsics/acle_sme2_mopa_nonwide.c
new file mode 100644
index 00..40fcad6a576483
--- /dev/null
+++ b/clang/test/CodeGen/aarch64-sme2-intrinsics/acle_sme2_mopa_nonwide.c
@@ -0,0 +1,97 @@
+// NOTE: Assertions have been autogenerated by utils/update_cc_test_checks.py 
UTC_ARGS: --version 4
+// RUN: %clang_cc1   -fclang-abi-compat=latest 
-triple aarch64-none-linux-gnu -target-feature +sme2p1 -target-feature  +b16b16 
-target-feature +sme-f16f16 -S -O2 -Werror -emit-llvm -o - %s | FileCheck %s 
-check-prefixes=CHECK
+// RUN: %clang_cc1-x c++ -fclang-abi-compat=latest 
-triple aarch64-none-linux-gnu -target-feature +sme2p1 -target-feature  +b16b16 
-target-feature +sme-f16f16 -S -O2 -Werror -emit-llvm -o - %s | FileCheck %s 
-check-prefixes=CHECK-CXX
+// RUN: %clang_cc1 -DSME_OVERLOADED_FORMS-fclang-abi-compat=latest 
-triple aarch64-none-linux-gnu -target-feature +sme2p1 -target-feature  +b16b16 
-target-feature +sme-f16f16 -S -O2 -Werror -emit-llvm -o - %s | FileCheck %s 
-check-prefixes=CHECK
+// RUN: %clang_cc1 -DSME_OVERLOADED_FORMS -x c++ -fclang-abi-compat=latest 
-triple aarch64-none-linux-gnu -target-feature +sme2p1 -target-feature  +b16b16 
-target-feature +sme-f16f16 -S -O2 -Werror -emit-llvm -o - %s | FileCheck %s 
-check-prefixes=CHECK-CXX
+
+// RUN: %clang_cc1 -DSME_OVERLOADED_FORMS -x c++ -fclang-abi-compat=latest 
-triple aarch64-none-linux-gnu -target-feature +sme2p1 

[clang] [llvm] [AArch64] Add intrinsics for non-widening FMOPA/FMOPS (PR #88105)

2024-04-18 Thread Momchil Velikov via cfe-commits


@@ -286,14 +286,26 @@ multiclass sme_outer_product_fp64
   def : SME_ZA_Tile_TwoPred_TwoVec_Pat;
 }
 
-multiclass sme2p1_fmop_tile_fp16 op, 
ZPRRegOp zpr_ty>{
-  def NAME : sme_fp_outer_product_inst {
+multiclass sme2p1_fmop_tile_f8f16 op> {
+  def NAME : sme_fp_outer_product_inst {
 bits<1> ZAda;
 let Inst{2-1} = 0b00;
 let Inst{0}   = ZAda;
   }
 }
 
+multiclass sme2p1_fmop_tile_fp16 op, 
ValueType vt, SDPatternOperator intrinsic = null_frag> {
+  def NAME : sme_fp_outer_product_inst, SMEPseudo2Instr {

momchil-velikov wrote:

Done.

https://github.com/llvm/llvm-project/pull/88105
___
cfe-commits mailing list
cfe-commits@lists.llvm.org
https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits


[clang] [llvm] [AArch64] Add intrinsics for non-widening FMOPA/FMOPS (PR #88105)

2024-04-18 Thread Momchil Velikov via cfe-commits

https://github.com/momchil-velikov updated 
https://github.com/llvm/llvm-project/pull/88105

>From 2b0befb9078f8c9116ad52be937c8722045708ef Mon Sep 17 00:00:00 2001
From: Momchil Velikov 
Date: Tue, 9 Apr 2024 10:52:41 +0100
Subject: [PATCH 1/2] [AArch64] Add intrinsics for non-widening FMOPA/FMOPS

According to the specification in
https://github.com/ARM-software/acle/pull/309 this adds the intrinsics

void svmopa_za16[_f16]_m(uint64_t tile, svbool_t pn, svbool_t pm,
 svfloat16_t zn, svfloat16_t zm)
__arm_streaming __arm_inout("za");
void svmops_za16[_f16]_m(uint64_t tile, svbool_t pn,
 svbool_t pm, svfloat16_t zn, svfloat16_t zm)
__arm_streaming __arm_inout("za");

as well as the corresponding `bf16` variants.
---
 clang/include/clang/Basic/arm_sme.td  | 24 +
 .../acle_sme2_mopa_nonwide.c  | 97 +++
 .../acle_sme2_mopa_nonwide.c  | 34 +++
 llvm/include/llvm/IR/IntrinsicsAArch64.td |  5 +-
 .../lib/Target/AArch64/AArch64SMEInstrInfo.td | 10 +-
 llvm/lib/Target/AArch64/SMEInstrFormats.td| 16 ++-
 .../CodeGen/AArch64/sme2-intrinsics-mopa.ll   | 42 
 7 files changed, 220 insertions(+), 8 deletions(-)
 create mode 100644 
clang/test/CodeGen/aarch64-sme2-intrinsics/acle_sme2_mopa_nonwide.c
 create mode 100644 
clang/test/Sema/aarch64-sme2-intrinsics/acle_sme2_mopa_nonwide.c
 create mode 100644 llvm/test/CodeGen/AArch64/sme2-intrinsics-mopa.ll

diff --git a/clang/include/clang/Basic/arm_sme.td 
b/clang/include/clang/Basic/arm_sme.td
index 1ac6d5170ea283..a18a5094a15ed3 100644
--- a/clang/include/clang/Basic/arm_sme.td
+++ b/clang/include/clang/Basic/arm_sme.td
@@ -674,3 +674,27 @@ let TargetGuard = "sme2" in {
   def SVLUTI2_LANE_ZT_X2 : Inst<"svluti2_lane_zt_{d}_x2", "2.di[i", 
"cUcsUsiUibhf", MergeNone, "aarch64_sme_luti2_lane_zt_x2", [IsStreaming, 
IsInZT0], [ImmCheck<0, ImmCheck0_0>, ImmCheck<2, ImmCheck0_7>]>;
   def SVLUTI4_LANE_ZT_X2 : Inst<"svluti4_lane_zt_{d}_x2", "2.di[i", 
"cUcsUsiUibhf", MergeNone, "aarch64_sme_luti4_lane_zt_x2", [IsStreaming, 
IsInZT0], [ImmCheck<0, ImmCheck0_0>, ImmCheck<2, ImmCheck0_3>]>;
 }
+
+
+// SME2p1 - FMOPA, FMOPS (non-widening)
+let TargetGuard = "sme2,b16b16" in {
+  def SVMOPA_BF16_NW : SInst<"svmopa_za16[_bf16]_m", "viPPdd", "b",
+ MergeNone, "aarch64_sme_mopa",
+ [IsStreaming, IsInOutZA],
+ [ImmCheck<0, ImmCheck0_1>]>;
+  def SVMOPS_BF16_NW : SInst<"svmops_za16[_bf16]_m", "viPPdd", "b",
+ MergeNone, "aarch64_sme_mops",
+ [IsStreaming, IsInOutZA],
+ [ImmCheck<0, ImmCheck0_1>]>;
+}
+
+let TargetGuard = "sme2p1,sme-f16f16" in {
+  def SVMOPA_F16_NW : SInst<"svmopa_za16[_f16]_m", "viPPdd", "h",
+MergeNone, "aarch64_sme_mopa",
+[IsStreaming, IsInOutZA],
+[ImmCheck<0, ImmCheck0_1>]>;
+  def SVMOPS_F16_NW : SInst<"svmops_za16[_f16]_m", "viPPdd", "h",
+MergeNone, "aarch64_sme_mops",
+[IsStreaming, IsInOutZA],
+[ImmCheck<0, ImmCheck0_1>]>;
+}
diff --git 
a/clang/test/CodeGen/aarch64-sme2-intrinsics/acle_sme2_mopa_nonwide.c 
b/clang/test/CodeGen/aarch64-sme2-intrinsics/acle_sme2_mopa_nonwide.c
new file mode 100644
index 00..40fcad6a576483
--- /dev/null
+++ b/clang/test/CodeGen/aarch64-sme2-intrinsics/acle_sme2_mopa_nonwide.c
@@ -0,0 +1,97 @@
+// NOTE: Assertions have been autogenerated by utils/update_cc_test_checks.py 
UTC_ARGS: --version 4
+// RUN: %clang_cc1   -fclang-abi-compat=latest 
-triple aarch64-none-linux-gnu -target-feature +sme2p1 -target-feature  +b16b16 
-target-feature +sme-f16f16 -S -O2 -Werror -emit-llvm -o - %s | FileCheck %s 
-check-prefixes=CHECK
+// RUN: %clang_cc1-x c++ -fclang-abi-compat=latest 
-triple aarch64-none-linux-gnu -target-feature +sme2p1 -target-feature  +b16b16 
-target-feature +sme-f16f16 -S -O2 -Werror -emit-llvm -o - %s | FileCheck %s 
-check-prefixes=CHECK-CXX
+// RUN: %clang_cc1 -DSME_OVERLOADED_FORMS-fclang-abi-compat=latest 
-triple aarch64-none-linux-gnu -target-feature +sme2p1 -target-feature  +b16b16 
-target-feature +sme-f16f16 -S -O2 -Werror -emit-llvm -o - %s | FileCheck %s 
-check-prefixes=CHECK
+// RUN: %clang_cc1 -DSME_OVERLOADED_FORMS -x c++ -fclang-abi-compat=latest 
-triple aarch64-none-linux-gnu -target-feature +sme2p1 -target-feature  +b16b16 
-target-feature +sme-f16f16 -S -O2 -Werror -emit-llvm -o - %s | FileCheck %s 
-check-prefixes=CHECK-CXX
+
+// RUN: %clang_cc1 -DSME_OVERLOADED_FORMS -x c++ -fclang-abi-compat=latest 
-triple aarch64-none-linux-gnu -target-feature +sme2p1 

[clang] [llvm] [AArch64] Add intrinsics for non-widening FMOPA/FMOPS (PR #88105)

2024-04-15 Thread via cfe-commits


@@ -286,14 +286,26 @@ multiclass sme_outer_product_fp64
   def : SME_ZA_Tile_TwoPred_TwoVec_Pat;
 }
 
-multiclass sme2p1_fmop_tile_fp16 op, 
ZPRRegOp zpr_ty>{
-  def NAME : sme_fp_outer_product_inst {
+multiclass sme2p1_fmop_tile_f8f16 op> {
+  def NAME : sme_fp_outer_product_inst {
 bits<1> ZAda;
 let Inst{2-1} = 0b00;
 let Inst{0}   = ZAda;
   }
 }
 
+multiclass sme2p1_fmop_tile_fp16 op, 
ValueType vt, SDPatternOperator intrinsic = null_frag> {
+  def NAME : sme_fp_outer_product_inst, SMEPseudo2Instr {

CarolineConcatto wrote:

I think you can remove bits<2> op, because all of them are 0b11, now that you 
created a new class for the fp8.

https://github.com/llvm/llvm-project/pull/88105
___
cfe-commits mailing list
cfe-commits@lists.llvm.org
https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits


[clang] [llvm] [AArch64] Add intrinsics for non-widening FMOPA/FMOPS (PR #88105)

2024-04-15 Thread via cfe-commits

https://github.com/CarolineConcatto approved this pull request.

LGTM

https://github.com/llvm/llvm-project/pull/88105
___
cfe-commits mailing list
cfe-commits@lists.llvm.org
https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits


[clang] [llvm] [AArch64] Add intrinsics for non-widening FMOPA/FMOPS (PR #88105)

2024-04-15 Thread via cfe-commits

https://github.com/CarolineConcatto edited 
https://github.com/llvm/llvm-project/pull/88105
___
cfe-commits mailing list
cfe-commits@lists.llvm.org
https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits


[clang] [llvm] [AArch64] Add intrinsics for non-widening FMOPA/FMOPS (PR #88105)

2024-04-11 Thread Momchil Velikov via cfe-commits


@@ -674,3 +674,27 @@ let TargetGuard = "sme2" in {
   def SVLUTI2_LANE_ZT_X2 : Inst<"svluti2_lane_zt_{d}_x2", "2.di[i", 
"cUcsUsiUibhf", MergeNone, "aarch64_sme_luti2_lane_zt_x2", [IsStreaming, 
IsInZT0], [ImmCheck<0, ImmCheck0_0>, ImmCheck<2, ImmCheck0_7>]>;
   def SVLUTI4_LANE_ZT_X2 : Inst<"svluti4_lane_zt_{d}_x2", "2.di[i", 
"cUcsUsiUibhf", MergeNone, "aarch64_sme_luti4_lane_zt_x2", [IsStreaming, 
IsInZT0], [ImmCheck<0, ImmCheck0_0>, ImmCheck<2, ImmCheck0_3>]>;
 }
+
+
+// SME2p1 - FMOPA, FMOPS (non-widening)
+let TargetGuard = "sme,b16b16" in {

momchil-velikov wrote:

Done. The Clang instrinsics use the same target features as the underlying 
assembly instructions.
If the features on the assembly instruction are not entirely correct we should 
fix it, but in a separate patch.

https://github.com/llvm/llvm-project/pull/88105
___
cfe-commits mailing list
cfe-commits@lists.llvm.org
https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits


[clang] [llvm] [AArch64] Add intrinsics for non-widening FMOPA/FMOPS (PR #88105)

2024-04-11 Thread Momchil Velikov via cfe-commits


@@ -815,8 +815,8 @@ defm FMLS_VG4_M4Z2Z_H : 
sme2_dot_mla_add_sub_array_vg4_multi<"fmls", 0b0100011,
 defm FCVT_2ZZ_H  : sme2p1_fp_cvt_vector_vg2_single<"fcvt", 0b0>;
 defm FCVTL_2ZZ_H : sme2p1_fp_cvt_vector_vg2_single<"fcvtl", 0b1>;
 
-defm FMOPA_MPPZZ_H : sme2p1_fmop_tile_fp16<"fmopa", 0b0, 0b0, 0b11, ZPR16>;
-defm FMOPS_MPPZZ_H : sme2p1_fmop_tile_fp16<"fmops", 0b0, 0b1, 0b11, ZPR16>;
+defm FMOPA_MPPZZ_H : sme2p1_fmop_tile_fp16<"fmopa", 0b0, 0b0, 0b11, nxv8f16, 
int_aarch64_sme_mopa_nonwide>;

momchil-velikov wrote:

Discussed offline.

https://github.com/llvm/llvm-project/pull/88105
___
cfe-commits mailing list
cfe-commits@lists.llvm.org
https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits


[clang] [llvm] [AArch64] Add intrinsics for non-widening FMOPA/FMOPS (PR #88105)

2024-04-11 Thread Momchil Velikov via cfe-commits


@@ -674,3 +674,27 @@ let TargetGuard = "sme2" in {
   def SVLUTI2_LANE_ZT_X2 : Inst<"svluti2_lane_zt_{d}_x2", "2.di[i", 
"cUcsUsiUibhf", MergeNone, "aarch64_sme_luti2_lane_zt_x2", [IsStreaming, 
IsInZT0], [ImmCheck<0, ImmCheck0_0>, ImmCheck<2, ImmCheck0_7>]>;
   def SVLUTI4_LANE_ZT_X2 : Inst<"svluti4_lane_zt_{d}_x2", "2.di[i", 
"cUcsUsiUibhf", MergeNone, "aarch64_sme_luti4_lane_zt_x2", [IsStreaming, 
IsInZT0], [ImmCheck<0, ImmCheck0_0>, ImmCheck<2, ImmCheck0_3>]>;
 }
+
+
+// SME2p1 - FMOPA, FMOPS (non-widening)
+let TargetGuard = "sme,b16b16" in {
+  def SVMOPA_BF16_NW : SInst<"svmopa_za16[_bf16]", "viPPdd", "b",
+ MergeOp1, "aarch64_sme_mopa_nonwide",
+ [IsStreaming, IsInOutZA],
+ [ImmCheck<0, ImmCheck0_1>]>;
+  def SVMOPS_BF16_NW : SInst<"svmops_za16[_bf16]", "viPPdd", "b",
+ MergeOp1, "aarch64_sme_mops_nonwide",
+ [IsStreaming, IsInOutZA],
+ [ImmCheck<0, ImmCheck0_1>]>;
+}
+
+let TargetGuard = "sme2,sme-f16f16" in {
+  def SVMOPA_F16_NW : SInst<"svmopa_za16[_f16]", "viPPdd", "h",
+MergeOp1, "aarch64_sme_mopa_nonwide",

momchil-velikov wrote:

In fact we can reuse the existing `aarch64_sme_mopa` which is used for other 
non-widening operations and since they are polymorphic and non-widening the 
instantiation type is enough to disambiguate the operation.

https://github.com/llvm/llvm-project/pull/88105
___
cfe-commits mailing list
cfe-commits@lists.llvm.org
https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits


[clang] [llvm] [AArch64] Add intrinsics for non-widening FMOPA/FMOPS (PR #88105)

2024-04-11 Thread Momchil Velikov via cfe-commits


@@ -674,3 +674,27 @@ let TargetGuard = "sme2" in {
   def SVLUTI2_LANE_ZT_X2 : Inst<"svluti2_lane_zt_{d}_x2", "2.di[i", 
"cUcsUsiUibhf", MergeNone, "aarch64_sme_luti2_lane_zt_x2", [IsStreaming, 
IsInZT0], [ImmCheck<0, ImmCheck0_0>, ImmCheck<2, ImmCheck0_7>]>;
   def SVLUTI4_LANE_ZT_X2 : Inst<"svluti4_lane_zt_{d}_x2", "2.di[i", 
"cUcsUsiUibhf", MergeNone, "aarch64_sme_luti4_lane_zt_x2", [IsStreaming, 
IsInZT0], [ImmCheck<0, ImmCheck0_0>, ImmCheck<2, ImmCheck0_3>]>;
 }
+
+
+// SME2p1 - FMOPA, FMOPS (non-widening)
+let TargetGuard = "sme,b16b16" in {
+  def SVMOPA_BF16_NW : SInst<"svmopa_za16[_bf16]", "viPPdd", "b",
+ MergeOp1, "aarch64_sme_mopa_nonwide",

momchil-velikov wrote:

Done

https://github.com/llvm/llvm-project/pull/88105
___
cfe-commits mailing list
cfe-commits@lists.llvm.org
https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits


[clang] [llvm] [AArch64] Add intrinsics for non-widening FMOPA/FMOPS (PR #88105)

2024-04-11 Thread Momchil Velikov via cfe-commits

https://github.com/momchil-velikov updated 
https://github.com/llvm/llvm-project/pull/88105

>From 2b0befb9078f8c9116ad52be937c8722045708ef Mon Sep 17 00:00:00 2001
From: Momchil Velikov 
Date: Tue, 9 Apr 2024 10:52:41 +0100
Subject: [PATCH] [AArch64] Add intrinsics for non-widening FMOPA/FMOPS

According to the specification in
https://github.com/ARM-software/acle/pull/309 this adds the intrinsics

void svmopa_za16[_f16]_m(uint64_t tile, svbool_t pn, svbool_t pm,
 svfloat16_t zn, svfloat16_t zm)
__arm_streaming __arm_inout("za");
void svmops_za16[_f16]_m(uint64_t tile, svbool_t pn,
 svbool_t pm, svfloat16_t zn, svfloat16_t zm)
__arm_streaming __arm_inout("za");

as well as the corresponding `bf16` variants.
---
 clang/include/clang/Basic/arm_sme.td  | 24 +
 .../acle_sme2_mopa_nonwide.c  | 97 +++
 .../acle_sme2_mopa_nonwide.c  | 34 +++
 llvm/include/llvm/IR/IntrinsicsAArch64.td |  5 +-
 .../lib/Target/AArch64/AArch64SMEInstrInfo.td | 10 +-
 llvm/lib/Target/AArch64/SMEInstrFormats.td| 16 ++-
 .../CodeGen/AArch64/sme2-intrinsics-mopa.ll   | 42 
 7 files changed, 220 insertions(+), 8 deletions(-)
 create mode 100644 
clang/test/CodeGen/aarch64-sme2-intrinsics/acle_sme2_mopa_nonwide.c
 create mode 100644 
clang/test/Sema/aarch64-sme2-intrinsics/acle_sme2_mopa_nonwide.c
 create mode 100644 llvm/test/CodeGen/AArch64/sme2-intrinsics-mopa.ll

diff --git a/clang/include/clang/Basic/arm_sme.td 
b/clang/include/clang/Basic/arm_sme.td
index 1ac6d5170ea283..a18a5094a15ed3 100644
--- a/clang/include/clang/Basic/arm_sme.td
+++ b/clang/include/clang/Basic/arm_sme.td
@@ -674,3 +674,27 @@ let TargetGuard = "sme2" in {
   def SVLUTI2_LANE_ZT_X2 : Inst<"svluti2_lane_zt_{d}_x2", "2.di[i", 
"cUcsUsiUibhf", MergeNone, "aarch64_sme_luti2_lane_zt_x2", [IsStreaming, 
IsInZT0], [ImmCheck<0, ImmCheck0_0>, ImmCheck<2, ImmCheck0_7>]>;
   def SVLUTI4_LANE_ZT_X2 : Inst<"svluti4_lane_zt_{d}_x2", "2.di[i", 
"cUcsUsiUibhf", MergeNone, "aarch64_sme_luti4_lane_zt_x2", [IsStreaming, 
IsInZT0], [ImmCheck<0, ImmCheck0_0>, ImmCheck<2, ImmCheck0_3>]>;
 }
+
+
+// SME2p1 - FMOPA, FMOPS (non-widening)
+let TargetGuard = "sme2,b16b16" in {
+  def SVMOPA_BF16_NW : SInst<"svmopa_za16[_bf16]_m", "viPPdd", "b",
+ MergeNone, "aarch64_sme_mopa",
+ [IsStreaming, IsInOutZA],
+ [ImmCheck<0, ImmCheck0_1>]>;
+  def SVMOPS_BF16_NW : SInst<"svmops_za16[_bf16]_m", "viPPdd", "b",
+ MergeNone, "aarch64_sme_mops",
+ [IsStreaming, IsInOutZA],
+ [ImmCheck<0, ImmCheck0_1>]>;
+}
+
+let TargetGuard = "sme2p1,sme-f16f16" in {
+  def SVMOPA_F16_NW : SInst<"svmopa_za16[_f16]_m", "viPPdd", "h",
+MergeNone, "aarch64_sme_mopa",
+[IsStreaming, IsInOutZA],
+[ImmCheck<0, ImmCheck0_1>]>;
+  def SVMOPS_F16_NW : SInst<"svmops_za16[_f16]_m", "viPPdd", "h",
+MergeNone, "aarch64_sme_mops",
+[IsStreaming, IsInOutZA],
+[ImmCheck<0, ImmCheck0_1>]>;
+}
diff --git 
a/clang/test/CodeGen/aarch64-sme2-intrinsics/acle_sme2_mopa_nonwide.c 
b/clang/test/CodeGen/aarch64-sme2-intrinsics/acle_sme2_mopa_nonwide.c
new file mode 100644
index 00..40fcad6a576483
--- /dev/null
+++ b/clang/test/CodeGen/aarch64-sme2-intrinsics/acle_sme2_mopa_nonwide.c
@@ -0,0 +1,97 @@
+// NOTE: Assertions have been autogenerated by utils/update_cc_test_checks.py 
UTC_ARGS: --version 4
+// RUN: %clang_cc1   -fclang-abi-compat=latest 
-triple aarch64-none-linux-gnu -target-feature +sme2p1 -target-feature  +b16b16 
-target-feature +sme-f16f16 -S -O2 -Werror -emit-llvm -o - %s | FileCheck %s 
-check-prefixes=CHECK
+// RUN: %clang_cc1-x c++ -fclang-abi-compat=latest 
-triple aarch64-none-linux-gnu -target-feature +sme2p1 -target-feature  +b16b16 
-target-feature +sme-f16f16 -S -O2 -Werror -emit-llvm -o - %s | FileCheck %s 
-check-prefixes=CHECK-CXX
+// RUN: %clang_cc1 -DSME_OVERLOADED_FORMS-fclang-abi-compat=latest 
-triple aarch64-none-linux-gnu -target-feature +sme2p1 -target-feature  +b16b16 
-target-feature +sme-f16f16 -S -O2 -Werror -emit-llvm -o - %s | FileCheck %s 
-check-prefixes=CHECK
+// RUN: %clang_cc1 -DSME_OVERLOADED_FORMS -x c++ -fclang-abi-compat=latest 
-triple aarch64-none-linux-gnu -target-feature +sme2p1 -target-feature  +b16b16 
-target-feature +sme-f16f16 -S -O2 -Werror -emit-llvm -o - %s | FileCheck %s 
-check-prefixes=CHECK-CXX
+
+// RUN: %clang_cc1 -DSME_OVERLOADED_FORMS -x c++ -fclang-abi-compat=latest 
-triple aarch64-none-linux-gnu -target-feature +sme2p1 

[clang] [llvm] [AArch64] Add intrinsics for non-widening FMOPA/FMOPS (PR #88105)

2024-04-10 Thread via cfe-commits


@@ -815,8 +815,8 @@ defm FMLS_VG4_M4Z2Z_H : 
sme2_dot_mla_add_sub_array_vg4_multi<"fmls", 0b0100011,
 defm FCVT_2ZZ_H  : sme2p1_fp_cvt_vector_vg2_single<"fcvt", 0b0>;
 defm FCVTL_2ZZ_H : sme2p1_fp_cvt_vector_vg2_single<"fcvtl", 0b1>;
 
-defm FMOPA_MPPZZ_H : sme2p1_fmop_tile_fp16<"fmopa", 0b0, 0b0, 0b11, ZPR16>;
-defm FMOPS_MPPZZ_H : sme2p1_fmop_tile_fp16<"fmops", 0b0, 0b1, 0b11, ZPR16>;
+defm FMOPA_MPPZZ_H : sme2p1_fmop_tile_fp16<"fmopa", 0b0, 0b0, 0b11, nxv8f16, 
int_aarch64_sme_mopa_nonwide>;

CarolineConcatto wrote:

Not in this patch, but I think all the instructions under 
`let Predicates = [HasSME2p1, HasSMEF16F16]`, 
should be under
`  let Predicates = [HasSME2, HasSMEF16F16].` to be consistent with clang
Otherwise we will have some problem with clang( clang has sme2 and the backend 
has sme2.1.
TBH when I look at
[FEAT_SME_F16F16](https://developer.arm.com/documentation/ddi0602/2024-03/SME-Instructions/FMOPS--non-widening---Floating-point-outer-product-and-subtract-?lang=en)
It maybe only  need FEAT_SME_F16F16, like

 defm USMOPA_MPPZZ_D : sme_int_outer_product_i64<0b100, "usmopa", 
int_aarch64_sme_usmopa_wide>;


https://github.com/llvm/llvm-project/pull/88105
___
cfe-commits mailing list
cfe-commits@lists.llvm.org
https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits


[clang] [llvm] [AArch64] Add intrinsics for non-widening FMOPA/FMOPS (PR #88105)

2024-04-10 Thread via cfe-commits


@@ -674,3 +674,27 @@ let TargetGuard = "sme2" in {
   def SVLUTI2_LANE_ZT_X2 : Inst<"svluti2_lane_zt_{d}_x2", "2.di[i", 
"cUcsUsiUibhf", MergeNone, "aarch64_sme_luti2_lane_zt_x2", [IsStreaming, 
IsInZT0], [ImmCheck<0, ImmCheck0_0>, ImmCheck<2, ImmCheck0_7>]>;
   def SVLUTI4_LANE_ZT_X2 : Inst<"svluti4_lane_zt_{d}_x2", "2.di[i", 
"cUcsUsiUibhf", MergeNone, "aarch64_sme_luti4_lane_zt_x2", [IsStreaming, 
IsInZT0], [ImmCheck<0, ImmCheck0_0>, ImmCheck<2, ImmCheck0_3>]>;
 }
+
+
+// SME2p1 - FMOPA, FMOPS (non-widening)
+let TargetGuard = "sme,b16b16" in {
+  def SVMOPA_BF16_NW : SInst<"svmopa_za16[_bf16]", "viPPdd", "b",
+ MergeOp1, "aarch64_sme_mopa_nonwide",
+ [IsStreaming, IsInOutZA],
+ [ImmCheck<0, ImmCheck0_1>]>;
+  def SVMOPS_BF16_NW : SInst<"svmops_za16[_bf16]", "viPPdd", "b",
+ MergeOp1, "aarch64_sme_mops_nonwide",
+ [IsStreaming, IsInOutZA],
+ [ImmCheck<0, ImmCheck0_1>]>;
+}
+
+let TargetGuard = "sme2,sme-f16f16" in {
+  def SVMOPA_F16_NW : SInst<"svmopa_za16[_f16]", "viPPdd", "h",
+MergeOp1, "aarch64_sme_mopa_nonwide",

CarolineConcatto wrote:

Can you replace: aarch64_sme_mopa_ by aarch64_sme_mopa_za16?

https://github.com/llvm/llvm-project/pull/88105
___
cfe-commits mailing list
cfe-commits@lists.llvm.org
https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits


[clang] [llvm] [AArch64] Add intrinsics for non-widening FMOPA/FMOPS (PR #88105)

2024-04-10 Thread via cfe-commits


@@ -674,3 +674,27 @@ let TargetGuard = "sme2" in {
   def SVLUTI2_LANE_ZT_X2 : Inst<"svluti2_lane_zt_{d}_x2", "2.di[i", 
"cUcsUsiUibhf", MergeNone, "aarch64_sme_luti2_lane_zt_x2", [IsStreaming, 
IsInZT0], [ImmCheck<0, ImmCheck0_0>, ImmCheck<2, ImmCheck0_7>]>;
   def SVLUTI4_LANE_ZT_X2 : Inst<"svluti4_lane_zt_{d}_x2", "2.di[i", 
"cUcsUsiUibhf", MergeNone, "aarch64_sme_luti4_lane_zt_x2", [IsStreaming, 
IsInZT0], [ImmCheck<0, ImmCheck0_0>, ImmCheck<2, ImmCheck0_3>]>;
 }
+
+
+// SME2p1 - FMOPA, FMOPS (non-widening)
+let TargetGuard = "sme,b16b16" in {

CarolineConcatto wrote:

I think it should be  "sme2,b16b16" 
if we see the BFMOPA
https://developer.arm.com/documentation/ddi0602/2024-03/SME-Instructions/BFMOPA--non-widening---BFloat16-floating-point-outer-product-and-accumulate-?lang=en

if !IsFeatureImplemented(FEAT_SME2) || !IsFeatureImplemented(FEAT_SVE_B16B16) 
then UNDEFINED;

https://github.com/llvm/llvm-project/pull/88105
___
cfe-commits mailing list
cfe-commits@lists.llvm.org
https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits


[clang] [llvm] [AArch64] Add intrinsics for non-widening FMOPA/FMOPS (PR #88105)

2024-04-10 Thread via cfe-commits


@@ -674,3 +674,27 @@ let TargetGuard = "sme2" in {
   def SVLUTI2_LANE_ZT_X2 : Inst<"svluti2_lane_zt_{d}_x2", "2.di[i", 
"cUcsUsiUibhf", MergeNone, "aarch64_sme_luti2_lane_zt_x2", [IsStreaming, 
IsInZT0], [ImmCheck<0, ImmCheck0_0>, ImmCheck<2, ImmCheck0_7>]>;
   def SVLUTI4_LANE_ZT_X2 : Inst<"svluti4_lane_zt_{d}_x2", "2.di[i", 
"cUcsUsiUibhf", MergeNone, "aarch64_sme_luti4_lane_zt_x2", [IsStreaming, 
IsInZT0], [ImmCheck<0, ImmCheck0_0>, ImmCheck<2, ImmCheck0_3>]>;
 }
+
+
+// SME2p1 - FMOPA, FMOPS (non-widening)
+let TargetGuard = "sme,b16b16" in {
+  def SVMOPA_BF16_NW : SInst<"svmopa_za16[_bf16]", "viPPdd", "b",
+ MergeOp1, "aarch64_sme_mopa_nonwide",

CarolineConcatto wrote:

Just in case:
I remember a discussion that we said we could use MergeNone and in the name add 
 _m:
svmopa_za16[_bf16]_m
Because MergeOp1 is only adding _m

https://github.com/llvm/llvm-project/pull/88105
___
cfe-commits mailing list
cfe-commits@lists.llvm.org
https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits


[clang] [llvm] [AArch64] Add intrinsics for non-widening FMOPA/FMOPS (PR #88105)

2024-04-10 Thread via cfe-commits


@@ -815,8 +815,8 @@ defm FMLS_VG4_M4Z2Z_H : 
sme2_dot_mla_add_sub_array_vg4_multi<"fmls", 0b0100011,
 defm FCVT_2ZZ_H  : sme2p1_fp_cvt_vector_vg2_single<"fcvt", 0b0>;
 defm FCVTL_2ZZ_H : sme2p1_fp_cvt_vector_vg2_single<"fcvtl", 0b1>;
 
-defm FMOPA_MPPZZ_H : sme2p1_fmop_tile_fp16<"fmopa", 0b0, 0b0, 0b11, ZPR16>;
-defm FMOPS_MPPZZ_H : sme2p1_fmop_tile_fp16<"fmops", 0b0, 0b1, 0b11, ZPR16>;
+defm FMOPA_MPPZZ_H : sme2p1_fmop_tile_fp16<"fmopa", 0b0, 0b0, 0b11, nxv8f16, 
int_aarch64_sme_mopa_nonwide>;

CarolineConcatto wrote:

Just in case, maybe it is better to change this to;
 sme2_fmop_tile_fp16 and not change the fp8

https://github.com/llvm/llvm-project/pull/88105
___
cfe-commits mailing list
cfe-commits@lists.llvm.org
https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits


[clang] [llvm] [AArch64] Add intrinsics for non-widening FMOPA/FMOPS (PR #88105)

2024-04-09 Thread via cfe-commits

Lukacma wrote:

> > I noticed that file names and file location are using sme2 as prefix. 
> > Shouldn't we use sme2p1 prefix for this intrinsic ?
> 
> None of instructions seem to require `FEAT_SME2p1`: 
> https://developer.arm.com/documentation/ddi0602/2024-03/SME-Instructions/BFMOPA--non-widening---BFloat16-floating-point-outer-product-and-accumulate-?lang=en
>  
> https://developer.arm.com/documentation/ddi0602/2024-03/SME-Instructions/BFMOPS--non-widening---BFloat16-floating-point-outer-product-and-subtract-?lang=en
>  
> https://developer.arm.com/documentation/ddi0602/2024-03/SME-Instructions/FMOPA--non-widening---Floating-point-outer-product-and-accumulate-?lang=en
>  
> https://developer.arm.com/documentation/ddi0602/2024-03/SME-Instructions/FMOPS--non-widening---Floating-point-outer-product-and-subtract-?lang=en

My bad then. Thought all these new intrinsics for  sme2p1

https://github.com/llvm/llvm-project/pull/88105
___
cfe-commits mailing list
cfe-commits@lists.llvm.org
https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits


[clang] [llvm] [AArch64] Add intrinsics for non-widening FMOPA/FMOPS (PR #88105)

2024-04-09 Thread Momchil Velikov via cfe-commits

momchil-velikov wrote:

> I noticed that file names and file location are using sme2 as prefix. 
> Shouldn't we use sme2p1 prefix for this intrinsic ?

None of instructions seem to require `FEAT_SME2p1`:
https://developer.arm.com/documentation/ddi0602/2024-03/SME-Instructions/BFMOPA--non-widening---BFloat16-floating-point-outer-product-and-accumulate-?lang=en
https://developer.arm.com/documentation/ddi0602/2024-03/SME-Instructions/BFMOPS--non-widening---BFloat16-floating-point-outer-product-and-subtract-?lang=en
https://developer.arm.com/documentation/ddi0602/2024-03/SME-Instructions/FMOPA--non-widening---Floating-point-outer-product-and-accumulate-?lang=en
https://developer.arm.com/documentation/ddi0602/2024-03/SME-Instructions/FMOPS--non-widening---Floating-point-outer-product-and-subtract-?lang=en


https://github.com/llvm/llvm-project/pull/88105
___
cfe-commits mailing list
cfe-commits@lists.llvm.org
https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits


[clang] [llvm] [AArch64] Add intrinsics for non-widening FMOPA/FMOPS (PR #88105)

2024-04-09 Thread via cfe-commits

Lukacma wrote:

I noticed that file names and file location are using sme2 as prefix. Shouldn't 
we use sme2p1 prefix for this intrinsic ?

https://github.com/llvm/llvm-project/pull/88105
___
cfe-commits mailing list
cfe-commits@lists.llvm.org
https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits


[clang] [llvm] [AArch64] Add intrinsics for non-widening FMOPA/FMOPS (PR #88105)

2024-04-09 Thread via cfe-commits

llvmbot wrote:




@llvm/pr-subscribers-clang

Author: Momchil Velikov (momchil-velikov)


Changes

According to the specification in
https://github.com/ARM-software/acle/pull/309 this adds the intrinsics

void svmopa_za16[_f16]_m(uint64_t tile, svbool_t pn, svbool_t pm,
 svfloat16_t zn, svfloat16_t zm)
__arm_streaming __arm_inout("za");
void svmops_za16[_f16]_m(uint64_t tile, svbool_t pn,
 svbool_t pm, svfloat16_t zn, svfloat16_t zm)
__arm_streaming __arm_inout("za");

as well as the corresponding `bf16` variants.

---
Full diff: https://github.com/llvm/llvm-project/pull/88105.diff


7 Files Affected:

- (modified) clang/include/clang/Basic/arm_sme.td (+24) 
- (added) clang/test/CodeGen/aarch64-sme2-intrinsics/acle_sme2_mopa_nonwide.c 
(+97) 
- (added) clang/test/Sema/aarch64-sme2-intrinsics/acle_sme2_mopa_nonwide.c 
(+34) 
- (modified) llvm/include/llvm/IR/IntrinsicsAArch64.td (+4-1) 
- (modified) llvm/lib/Target/AArch64/AArch64SMEInstrInfo.td (+5-5) 
- (modified) llvm/lib/Target/AArch64/SMEInstrFormats.td (+14-2) 
- (added) llvm/test/CodeGen/AArch64/sme2-intrinsics-mopa.ll (+42) 


``diff
diff --git a/clang/include/clang/Basic/arm_sme.td 
b/clang/include/clang/Basic/arm_sme.td
index 1ac6d5170ea283..e60a400b094850 100644
--- a/clang/include/clang/Basic/arm_sme.td
+++ b/clang/include/clang/Basic/arm_sme.td
@@ -674,3 +674,27 @@ let TargetGuard = "sme2" in {
   def SVLUTI2_LANE_ZT_X2 : Inst<"svluti2_lane_zt_{d}_x2", "2.di[i", 
"cUcsUsiUibhf", MergeNone, "aarch64_sme_luti2_lane_zt_x2", [IsStreaming, 
IsInZT0], [ImmCheck<0, ImmCheck0_0>, ImmCheck<2, ImmCheck0_7>]>;
   def SVLUTI4_LANE_ZT_X2 : Inst<"svluti4_lane_zt_{d}_x2", "2.di[i", 
"cUcsUsiUibhf", MergeNone, "aarch64_sme_luti4_lane_zt_x2", [IsStreaming, 
IsInZT0], [ImmCheck<0, ImmCheck0_0>, ImmCheck<2, ImmCheck0_3>]>;
 }
+
+
+// SME2p1 - FMOPA, FMOPS (non-widening)
+let TargetGuard = "sme,b16b16" in {
+  def SVMOPA_BF16_NW : SInst<"svmopa_za16[_bf16]", "viPPdd", "b",
+ MergeOp1, "aarch64_sme_mopa_nonwide",
+ [IsStreaming, IsInOutZA],
+ [ImmCheck<0, ImmCheck0_1>]>;
+  def SVMOPS_BF16_NW : SInst<"svmops_za16[_bf16]", "viPPdd", "b",
+ MergeOp1, "aarch64_sme_mops_nonwide",
+ [IsStreaming, IsInOutZA],
+ [ImmCheck<0, ImmCheck0_1>]>;
+}
+
+let TargetGuard = "sme2,sme-f16f16" in {
+  def SVMOPA_F16_NW : SInst<"svmopa_za16[_f16]", "viPPdd", "h",
+MergeOp1, "aarch64_sme_mopa_nonwide",
+[IsStreaming, IsInOutZA],
+[ImmCheck<0, ImmCheck0_1>]>;
+  def SVMOPS_F16_NW : SInst<"svmops_za16[_f16]", "viPPdd", "h",
+MergeOp1, "aarch64_sme_mops_nonwide",
+[IsStreaming, IsInOutZA],
+[ImmCheck<0, ImmCheck0_1>]>;
+}
diff --git 
a/clang/test/CodeGen/aarch64-sme2-intrinsics/acle_sme2_mopa_nonwide.c 
b/clang/test/CodeGen/aarch64-sme2-intrinsics/acle_sme2_mopa_nonwide.c
new file mode 100644
index 00..36a75609534653
--- /dev/null
+++ b/clang/test/CodeGen/aarch64-sme2-intrinsics/acle_sme2_mopa_nonwide.c
@@ -0,0 +1,97 @@
+// NOTE: Assertions have been autogenerated by utils/update_cc_test_checks.py 
UTC_ARGS: --version 4
+// RUN: %clang_cc1   -fclang-abi-compat=latest 
-triple aarch64-none-linux-gnu -target-feature +sme2p1 -target-feature  +b16b16 
-target-feature +sme-f16f16 -S -O2 -Werror -emit-llvm -o - %s | FileCheck %s 
-check-prefixes=CHECK
+// RUN: %clang_cc1-x c++ -fclang-abi-compat=latest 
-triple aarch64-none-linux-gnu -target-feature +sme2p1 -target-feature  +b16b16 
-target-feature +sme-f16f16 -S -O2 -Werror -emit-llvm -o - %s | FileCheck %s 
-check-prefixes=CHECK-CXX
+// RUN: %clang_cc1 -DSME_OVERLOADED_FORMS-fclang-abi-compat=latest 
-triple aarch64-none-linux-gnu -target-feature +sme2p1 -target-feature  +b16b16 
-target-feature +sme-f16f16 -S -O2 -Werror -emit-llvm -o - %s | FileCheck %s 
-check-prefixes=CHECK
+// RUN: %clang_cc1 -DSME_OVERLOADED_FORMS -x c++ -fclang-abi-compat=latest 
-triple aarch64-none-linux-gnu -target-feature +sme2p1 -target-feature  +b16b16 
-target-feature +sme-f16f16 -S -O2 -Werror -emit-llvm -o - %s | FileCheck %s 
-check-prefixes=CHECK-CXX
+
+// RUN: %clang_cc1 -DSME_OVERLOADED_FORMS -x c++ -fclang-abi-compat=latest 
-triple aarch64-none-linux-gnu -target-feature +sme2p1 -target-feature  +b16b16 
-target-feature +sme-f16f16 -S -O2 -Werror -o /dev/null %s
+
+// REQUIRES: aarch64-registered-target
+
+#include 
+
+#ifdef SME_OVERLOADED_FORMS
+#define SME_ACLE_FUNC(A1,A2_UNUSED,A3) A1##A3
+#else
+#define SME_ACLE_FUNC(A1,A2,A3) A1##A2##A3
+#endif
+
+// CHECK-LABEL: define dso_local 

[clang] [llvm] [AArch64] Add intrinsics for non-widening FMOPA/FMOPS (PR #88105)

2024-04-09 Thread via cfe-commits

llvmbot wrote:




@llvm/pr-subscribers-llvm-ir

Author: Momchil Velikov (momchil-velikov)


Changes

According to the specification in
https://github.com/ARM-software/acle/pull/309 this adds the intrinsics

void svmopa_za16[_f16]_m(uint64_t tile, svbool_t pn, svbool_t pm,
 svfloat16_t zn, svfloat16_t zm)
__arm_streaming __arm_inout("za");
void svmops_za16[_f16]_m(uint64_t tile, svbool_t pn,
 svbool_t pm, svfloat16_t zn, svfloat16_t zm)
__arm_streaming __arm_inout("za");

as well as the corresponding `bf16` variants.

---
Full diff: https://github.com/llvm/llvm-project/pull/88105.diff


7 Files Affected:

- (modified) clang/include/clang/Basic/arm_sme.td (+24) 
- (added) clang/test/CodeGen/aarch64-sme2-intrinsics/acle_sme2_mopa_nonwide.c 
(+97) 
- (added) clang/test/Sema/aarch64-sme2-intrinsics/acle_sme2_mopa_nonwide.c 
(+34) 
- (modified) llvm/include/llvm/IR/IntrinsicsAArch64.td (+4-1) 
- (modified) llvm/lib/Target/AArch64/AArch64SMEInstrInfo.td (+5-5) 
- (modified) llvm/lib/Target/AArch64/SMEInstrFormats.td (+14-2) 
- (added) llvm/test/CodeGen/AArch64/sme2-intrinsics-mopa.ll (+42) 


``diff
diff --git a/clang/include/clang/Basic/arm_sme.td 
b/clang/include/clang/Basic/arm_sme.td
index 1ac6d5170ea283..e60a400b094850 100644
--- a/clang/include/clang/Basic/arm_sme.td
+++ b/clang/include/clang/Basic/arm_sme.td
@@ -674,3 +674,27 @@ let TargetGuard = "sme2" in {
   def SVLUTI2_LANE_ZT_X2 : Inst<"svluti2_lane_zt_{d}_x2", "2.di[i", 
"cUcsUsiUibhf", MergeNone, "aarch64_sme_luti2_lane_zt_x2", [IsStreaming, 
IsInZT0], [ImmCheck<0, ImmCheck0_0>, ImmCheck<2, ImmCheck0_7>]>;
   def SVLUTI4_LANE_ZT_X2 : Inst<"svluti4_lane_zt_{d}_x2", "2.di[i", 
"cUcsUsiUibhf", MergeNone, "aarch64_sme_luti4_lane_zt_x2", [IsStreaming, 
IsInZT0], [ImmCheck<0, ImmCheck0_0>, ImmCheck<2, ImmCheck0_3>]>;
 }
+
+
+// SME2p1 - FMOPA, FMOPS (non-widening)
+let TargetGuard = "sme,b16b16" in {
+  def SVMOPA_BF16_NW : SInst<"svmopa_za16[_bf16]", "viPPdd", "b",
+ MergeOp1, "aarch64_sme_mopa_nonwide",
+ [IsStreaming, IsInOutZA],
+ [ImmCheck<0, ImmCheck0_1>]>;
+  def SVMOPS_BF16_NW : SInst<"svmops_za16[_bf16]", "viPPdd", "b",
+ MergeOp1, "aarch64_sme_mops_nonwide",
+ [IsStreaming, IsInOutZA],
+ [ImmCheck<0, ImmCheck0_1>]>;
+}
+
+let TargetGuard = "sme2,sme-f16f16" in {
+  def SVMOPA_F16_NW : SInst<"svmopa_za16[_f16]", "viPPdd", "h",
+MergeOp1, "aarch64_sme_mopa_nonwide",
+[IsStreaming, IsInOutZA],
+[ImmCheck<0, ImmCheck0_1>]>;
+  def SVMOPS_F16_NW : SInst<"svmops_za16[_f16]", "viPPdd", "h",
+MergeOp1, "aarch64_sme_mops_nonwide",
+[IsStreaming, IsInOutZA],
+[ImmCheck<0, ImmCheck0_1>]>;
+}
diff --git 
a/clang/test/CodeGen/aarch64-sme2-intrinsics/acle_sme2_mopa_nonwide.c 
b/clang/test/CodeGen/aarch64-sme2-intrinsics/acle_sme2_mopa_nonwide.c
new file mode 100644
index 00..36a75609534653
--- /dev/null
+++ b/clang/test/CodeGen/aarch64-sme2-intrinsics/acle_sme2_mopa_nonwide.c
@@ -0,0 +1,97 @@
+// NOTE: Assertions have been autogenerated by utils/update_cc_test_checks.py 
UTC_ARGS: --version 4
+// RUN: %clang_cc1   -fclang-abi-compat=latest 
-triple aarch64-none-linux-gnu -target-feature +sme2p1 -target-feature  +b16b16 
-target-feature +sme-f16f16 -S -O2 -Werror -emit-llvm -o - %s | FileCheck %s 
-check-prefixes=CHECK
+// RUN: %clang_cc1-x c++ -fclang-abi-compat=latest 
-triple aarch64-none-linux-gnu -target-feature +sme2p1 -target-feature  +b16b16 
-target-feature +sme-f16f16 -S -O2 -Werror -emit-llvm -o - %s | FileCheck %s 
-check-prefixes=CHECK-CXX
+// RUN: %clang_cc1 -DSME_OVERLOADED_FORMS-fclang-abi-compat=latest 
-triple aarch64-none-linux-gnu -target-feature +sme2p1 -target-feature  +b16b16 
-target-feature +sme-f16f16 -S -O2 -Werror -emit-llvm -o - %s | FileCheck %s 
-check-prefixes=CHECK
+// RUN: %clang_cc1 -DSME_OVERLOADED_FORMS -x c++ -fclang-abi-compat=latest 
-triple aarch64-none-linux-gnu -target-feature +sme2p1 -target-feature  +b16b16 
-target-feature +sme-f16f16 -S -O2 -Werror -emit-llvm -o - %s | FileCheck %s 
-check-prefixes=CHECK-CXX
+
+// RUN: %clang_cc1 -DSME_OVERLOADED_FORMS -x c++ -fclang-abi-compat=latest 
-triple aarch64-none-linux-gnu -target-feature +sme2p1 -target-feature  +b16b16 
-target-feature +sme-f16f16 -S -O2 -Werror -o /dev/null %s
+
+// REQUIRES: aarch64-registered-target
+
+#include 
+
+#ifdef SME_OVERLOADED_FORMS
+#define SME_ACLE_FUNC(A1,A2_UNUSED,A3) A1##A3
+#else
+#define SME_ACLE_FUNC(A1,A2,A3) A1##A2##A3
+#endif
+
+// CHECK-LABEL: define dso_local 

[clang] [llvm] [AArch64] Add intrinsics for non-widening FMOPA/FMOPS (PR #88105)

2024-04-09 Thread Momchil Velikov via cfe-commits

https://github.com/momchil-velikov created 
https://github.com/llvm/llvm-project/pull/88105

According to the specification in
https://github.com/ARM-software/acle/pull/309 this adds the intrinsics

void svmopa_za16[_f16]_m(uint64_t tile, svbool_t pn, svbool_t pm,
 svfloat16_t zn, svfloat16_t zm)
__arm_streaming __arm_inout("za");
void svmops_za16[_f16]_m(uint64_t tile, svbool_t pn,
 svbool_t pm, svfloat16_t zn, svfloat16_t zm)
__arm_streaming __arm_inout("za");

as well as the corresponding `bf16` variants.

>From ee78ad565158c2d1301265415992511ea559e7a6 Mon Sep 17 00:00:00 2001
From: Momchil Velikov 
Date: Tue, 9 Apr 2024 10:52:41 +0100
Subject: [PATCH] [AArch64] Add intrinsics for non-widening FMOPA/FMOPS

According to the specification in
https://github.com/ARM-software/acle/pull/309 this adds the intrinsics

void svmopa_za16[_f16]_m(uint64_t tile, svbool_t pn, svbool_t pm,
 svfloat16_t zn, svfloat16_t zm)
__arm_streaming __arm_inout("za");
void svmops_za16[_f16]_m(uint64_t tile, svbool_t pn,
 svbool_t pm, svfloat16_t zn, svfloat16_t zm)
__arm_streaming __arm_inout("za");

as well as the corresponding `bf16` variants.
---
 clang/include/clang/Basic/arm_sme.td  | 24 +
 .../acle_sme2_mopa_nonwide.c  | 97 +++
 .../acle_sme2_mopa_nonwide.c  | 34 +++
 llvm/include/llvm/IR/IntrinsicsAArch64.td |  5 +-
 .../lib/Target/AArch64/AArch64SMEInstrInfo.td | 10 +-
 llvm/lib/Target/AArch64/SMEInstrFormats.td| 16 ++-
 .../CodeGen/AArch64/sme2-intrinsics-mopa.ll   | 42 
 7 files changed, 220 insertions(+), 8 deletions(-)
 create mode 100644 
clang/test/CodeGen/aarch64-sme2-intrinsics/acle_sme2_mopa_nonwide.c
 create mode 100644 
clang/test/Sema/aarch64-sme2-intrinsics/acle_sme2_mopa_nonwide.c
 create mode 100644 llvm/test/CodeGen/AArch64/sme2-intrinsics-mopa.ll

diff --git a/clang/include/clang/Basic/arm_sme.td 
b/clang/include/clang/Basic/arm_sme.td
index 1ac6d5170ea283..e60a400b094850 100644
--- a/clang/include/clang/Basic/arm_sme.td
+++ b/clang/include/clang/Basic/arm_sme.td
@@ -674,3 +674,27 @@ let TargetGuard = "sme2" in {
   def SVLUTI2_LANE_ZT_X2 : Inst<"svluti2_lane_zt_{d}_x2", "2.di[i", 
"cUcsUsiUibhf", MergeNone, "aarch64_sme_luti2_lane_zt_x2", [IsStreaming, 
IsInZT0], [ImmCheck<0, ImmCheck0_0>, ImmCheck<2, ImmCheck0_7>]>;
   def SVLUTI4_LANE_ZT_X2 : Inst<"svluti4_lane_zt_{d}_x2", "2.di[i", 
"cUcsUsiUibhf", MergeNone, "aarch64_sme_luti4_lane_zt_x2", [IsStreaming, 
IsInZT0], [ImmCheck<0, ImmCheck0_0>, ImmCheck<2, ImmCheck0_3>]>;
 }
+
+
+// SME2p1 - FMOPA, FMOPS (non-widening)
+let TargetGuard = "sme,b16b16" in {
+  def SVMOPA_BF16_NW : SInst<"svmopa_za16[_bf16]", "viPPdd", "b",
+ MergeOp1, "aarch64_sme_mopa_nonwide",
+ [IsStreaming, IsInOutZA],
+ [ImmCheck<0, ImmCheck0_1>]>;
+  def SVMOPS_BF16_NW : SInst<"svmops_za16[_bf16]", "viPPdd", "b",
+ MergeOp1, "aarch64_sme_mops_nonwide",
+ [IsStreaming, IsInOutZA],
+ [ImmCheck<0, ImmCheck0_1>]>;
+}
+
+let TargetGuard = "sme2,sme-f16f16" in {
+  def SVMOPA_F16_NW : SInst<"svmopa_za16[_f16]", "viPPdd", "h",
+MergeOp1, "aarch64_sme_mopa_nonwide",
+[IsStreaming, IsInOutZA],
+[ImmCheck<0, ImmCheck0_1>]>;
+  def SVMOPS_F16_NW : SInst<"svmops_za16[_f16]", "viPPdd", "h",
+MergeOp1, "aarch64_sme_mops_nonwide",
+[IsStreaming, IsInOutZA],
+[ImmCheck<0, ImmCheck0_1>]>;
+}
diff --git 
a/clang/test/CodeGen/aarch64-sme2-intrinsics/acle_sme2_mopa_nonwide.c 
b/clang/test/CodeGen/aarch64-sme2-intrinsics/acle_sme2_mopa_nonwide.c
new file mode 100644
index 00..36a75609534653
--- /dev/null
+++ b/clang/test/CodeGen/aarch64-sme2-intrinsics/acle_sme2_mopa_nonwide.c
@@ -0,0 +1,97 @@
+// NOTE: Assertions have been autogenerated by utils/update_cc_test_checks.py 
UTC_ARGS: --version 4
+// RUN: %clang_cc1   -fclang-abi-compat=latest 
-triple aarch64-none-linux-gnu -target-feature +sme2p1 -target-feature  +b16b16 
-target-feature +sme-f16f16 -S -O2 -Werror -emit-llvm -o - %s | FileCheck %s 
-check-prefixes=CHECK
+// RUN: %clang_cc1-x c++ -fclang-abi-compat=latest 
-triple aarch64-none-linux-gnu -target-feature +sme2p1 -target-feature  +b16b16 
-target-feature +sme-f16f16 -S -O2 -Werror -emit-llvm -o - %s | FileCheck %s 
-check-prefixes=CHECK-CXX
+// RUN: %clang_cc1 -DSME_OVERLOADED_FORMS-fclang-abi-compat=latest 
-triple aarch64-none-linux-gnu -target-feature +sme2p1