[clang] [llvm] [AArch64][SME] Add intrinsics for vector groups ZERO (PR #88114)

2024-05-23 Thread via cfe-commits

https://github.com/Lukacma closed 
https://github.com/llvm/llvm-project/pull/88114
___
cfe-commits mailing list
cfe-commits@lists.llvm.org
https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits


[clang] [llvm] [AArch64][SME] Add intrinsics for vector groups ZERO (PR #88114)

2024-05-23 Thread via cfe-commits

https://github.com/Lukacma updated 
https://github.com/llvm/llvm-project/pull/88114

>From c44bd42f8011dd09771fda50a76a7321342c2b2f Mon Sep 17 00:00:00 2001
From: Marian Lukac 
Date: Thu, 4 Apr 2024 14:36:54 +
Subject: [PATCH 1/5] WIP

---
 clang/include/clang/Basic/arm_sme.td  |  29 ++-
 .../acle_sme2p1_zero.c|  91 +
 .../acle_sme2p1_imm.cpp   | 190 ++
 llvm/include/llvm/IR/IntrinsicsAArch64.td |   6 +
 4 files changed, 315 insertions(+), 1 deletion(-)
 create mode 100644 
clang/test/CodeGen/aarch64-sme2p1-intrinsics/acle_sme2p1_zero.c
 create mode 100644 
clang/test/Sema/aarch64-sme2p1-intrinsics/acle_sme2p1_imm.cpp

diff --git a/clang/include/clang/Basic/arm_sme.td 
b/clang/include/clang/Basic/arm_sme.td
index 1ac6d5170ea28..b00eabe331169 100644
--- a/clang/include/clang/Basic/arm_sme.td
+++ b/clang/include/clang/Basic/arm_sme.td
@@ -146,6 +146,33 @@ let TargetGuard = "sme" in {
  [IsOverloadNone, IsStreamingCompatible, IsOutZA]>;
 }
 
+let TargetGuard = "sme2p1" in {
+  def SVZERO_ZA64_VG1x2 : SInst<"svzero_za64_vg1x2", "vi", "", MergeNone, 
"aarch64_sme_zero_za64_vg1x2",
+[IsOverloadNone, IsStreaming, IsInOutZA],
+[ImmCheck<0, ImmCheck0_7>]>;
+  def SVZERO_ZA64_VG1x4 : SInst<"svzero_za64_vg1x4", "vi", "", MergeNone, 
"aarch64_sme_zero_za64_vg1x4",
+[IsOverloadNone, IsStreaming, IsInOutZA],
+[ImmCheck<0, ImmCheck0_7>]>;
+  def SVZERO_ZA64_VG2x1 : SInst<"svzero_za64_vg2x1", "vi", "", MergeNone, 
"aarch64_sme_zero_za64_vg2x1",
+[IsOverloadNone, IsStreaming, IsInOutZA],
+[ImmCheck<0, ImmCheck0_7>]>;
+  def SVZERO_ZA64_VG2x2 : SInst<"svzero_za64_vg2x2", "vi", "", MergeNone, 
"aarch64_sme_zero_za64_vg2x2",
+[IsOverloadNone, IsStreaming, IsInOutZA],
+[ImmCheck<0, ImmCheck0_3>]>;
+  def SVZERO_ZA64_VG2x4 : SInst<"svzero_za64_vg2x4", "vi", "", MergeNone, 
"aarch64_sme_zero_za64_vg2x4",
+[IsOverloadNone, IsStreaming, IsInOutZA],
+[ImmCheck<0, ImmCheck0_3>]>;
+  def SVZERO_ZA64_VG4x1 : SInst<"svzero_za64_vg4x1", "vi", "", MergeNone, 
"aarch64_sme_zero_za64_vg4x1",
+[IsOverloadNone, IsStreaming, IsInOutZA],
+[ImmCheck<0, ImmCheck0_3>]>;
+  def SVZERO_ZA64_VG4x2 : SInst<"svzero_za64_vg4x2", "vi", "", MergeNone, 
"aarch64_sme_zero_za64_vg4x2",
+[IsOverloadNone, IsStreaming, IsInOutZA],
+[ImmCheck<0, ImmCheck0_1>]>;
+  def SVZERO_ZA64_VG4x4 : SInst<"svzero_za64_vg4x4", "vi", "", MergeNone, 
"aarch64_sme_zero_za64_vg4x4",
+[IsOverloadNone, IsStreaming, IsInOutZA],
+[ImmCheck<0, ImmCheck0_1>]>;
+}
+
 

 // SME - Counting elements in a streaming vector
 
@@ -673,4 +700,4 @@ let TargetGuard = "sme2" in {
 let TargetGuard = "sme2" in {
   def SVLUTI2_LANE_ZT_X2 : Inst<"svluti2_lane_zt_{d}_x2", "2.di[i", 
"cUcsUsiUibhf", MergeNone, "aarch64_sme_luti2_lane_zt_x2", [IsStreaming, 
IsInZT0], [ImmCheck<0, ImmCheck0_0>, ImmCheck<2, ImmCheck0_7>]>;
   def SVLUTI4_LANE_ZT_X2 : Inst<"svluti4_lane_zt_{d}_x2", "2.di[i", 
"cUcsUsiUibhf", MergeNone, "aarch64_sme_luti4_lane_zt_x2", [IsStreaming, 
IsInZT0], [ImmCheck<0, ImmCheck0_0>, ImmCheck<2, ImmCheck0_3>]>;
-}
+}
\ No newline at end of file
diff --git a/clang/test/CodeGen/aarch64-sme2p1-intrinsics/acle_sme2p1_zero.c 
b/clang/test/CodeGen/aarch64-sme2p1-intrinsics/acle_sme2p1_zero.c
new file mode 100644
index 0..3b661ec425cda
--- /dev/null
+++ b/clang/test/CodeGen/aarch64-sme2p1-intrinsics/acle_sme2p1_zero.c
@@ -0,0 +1,91 @@
+// NOTE: Assertions have been autogenerated by utils/update_cc_test_checks.py 
UTC_ARGS: --version 4
+// REQUIRES: aarch64-registered-target
+
+// RUN: %clang_cc1 -triple aarch64-none-linux-gnu -target-feature +sme2p1 -S 
-disable-O0-optnone -Werror -Wall -emit-llvm -o - %s | opt -S -p 
mem2reg,instcombine,tailcallelim | FileCheck %s
+// RUN: %clang_cc1 -triple aarch64-none-linux-gnu -target-feature +sme2p1 -S 
-disable-O0-optnone -Werror -Wall -emit-llvm -o - -x c++ %s | opt -S -p 
mem2reg,instcombine,tailcallelim | FileCheck %s -check-prefix=CPP-CHECK
+#include 
+
+#define SVE_ACLE_FUNC(A1,A2) A1##A2
+
+// CHECK-LABEL: define dso_local void @test_svzero_za64_vg1x2(
+// CHECK-SAME: ) #[[ATTR0:[0-9]+]] {
+// CHECK-NEXT:  entry:
+// CHECK-NEXT:tail call void @llvm.aarch64.sme.zero.za64.vg1x2(i32 0)
+// CHECK-NEXT:ret void
+//
+void test_svzero_za64_vg1x2(void) __arm_streaming __arm_inout("za")
+{
+   SVE_ACLE_FUNC(svzero_za64,_vg1x2)(0);
+}
+
+// CHECK-LABEL: define dso_local void @test_svzero_za64_vg1x4(
+// 

[clang] [llvm] [AArch64][SME] Add intrinsics for vector groups ZERO (PR #88114)

2024-05-23 Thread via cfe-commits

https://github.com/Lukacma updated 
https://github.com/llvm/llvm-project/pull/88114

>From c44bd42f8011dd09771fda50a76a7321342c2b2f Mon Sep 17 00:00:00 2001
From: Marian Lukac 
Date: Thu, 4 Apr 2024 14:36:54 +
Subject: [PATCH 1/4] WIP

---
 clang/include/clang/Basic/arm_sme.td  |  29 ++-
 .../acle_sme2p1_zero.c|  91 +
 .../acle_sme2p1_imm.cpp   | 190 ++
 llvm/include/llvm/IR/IntrinsicsAArch64.td |   6 +
 4 files changed, 315 insertions(+), 1 deletion(-)
 create mode 100644 
clang/test/CodeGen/aarch64-sme2p1-intrinsics/acle_sme2p1_zero.c
 create mode 100644 
clang/test/Sema/aarch64-sme2p1-intrinsics/acle_sme2p1_imm.cpp

diff --git a/clang/include/clang/Basic/arm_sme.td 
b/clang/include/clang/Basic/arm_sme.td
index 1ac6d5170ea28..b00eabe331169 100644
--- a/clang/include/clang/Basic/arm_sme.td
+++ b/clang/include/clang/Basic/arm_sme.td
@@ -146,6 +146,33 @@ let TargetGuard = "sme" in {
  [IsOverloadNone, IsStreamingCompatible, IsOutZA]>;
 }
 
+let TargetGuard = "sme2p1" in {
+  def SVZERO_ZA64_VG1x2 : SInst<"svzero_za64_vg1x2", "vi", "", MergeNone, 
"aarch64_sme_zero_za64_vg1x2",
+[IsOverloadNone, IsStreaming, IsInOutZA],
+[ImmCheck<0, ImmCheck0_7>]>;
+  def SVZERO_ZA64_VG1x4 : SInst<"svzero_za64_vg1x4", "vi", "", MergeNone, 
"aarch64_sme_zero_za64_vg1x4",
+[IsOverloadNone, IsStreaming, IsInOutZA],
+[ImmCheck<0, ImmCheck0_7>]>;
+  def SVZERO_ZA64_VG2x1 : SInst<"svzero_za64_vg2x1", "vi", "", MergeNone, 
"aarch64_sme_zero_za64_vg2x1",
+[IsOverloadNone, IsStreaming, IsInOutZA],
+[ImmCheck<0, ImmCheck0_7>]>;
+  def SVZERO_ZA64_VG2x2 : SInst<"svzero_za64_vg2x2", "vi", "", MergeNone, 
"aarch64_sme_zero_za64_vg2x2",
+[IsOverloadNone, IsStreaming, IsInOutZA],
+[ImmCheck<0, ImmCheck0_3>]>;
+  def SVZERO_ZA64_VG2x4 : SInst<"svzero_za64_vg2x4", "vi", "", MergeNone, 
"aarch64_sme_zero_za64_vg2x4",
+[IsOverloadNone, IsStreaming, IsInOutZA],
+[ImmCheck<0, ImmCheck0_3>]>;
+  def SVZERO_ZA64_VG4x1 : SInst<"svzero_za64_vg4x1", "vi", "", MergeNone, 
"aarch64_sme_zero_za64_vg4x1",
+[IsOverloadNone, IsStreaming, IsInOutZA],
+[ImmCheck<0, ImmCheck0_3>]>;
+  def SVZERO_ZA64_VG4x2 : SInst<"svzero_za64_vg4x2", "vi", "", MergeNone, 
"aarch64_sme_zero_za64_vg4x2",
+[IsOverloadNone, IsStreaming, IsInOutZA],
+[ImmCheck<0, ImmCheck0_1>]>;
+  def SVZERO_ZA64_VG4x4 : SInst<"svzero_za64_vg4x4", "vi", "", MergeNone, 
"aarch64_sme_zero_za64_vg4x4",
+[IsOverloadNone, IsStreaming, IsInOutZA],
+[ImmCheck<0, ImmCheck0_1>]>;
+}
+
 

 // SME - Counting elements in a streaming vector
 
@@ -673,4 +700,4 @@ let TargetGuard = "sme2" in {
 let TargetGuard = "sme2" in {
   def SVLUTI2_LANE_ZT_X2 : Inst<"svluti2_lane_zt_{d}_x2", "2.di[i", 
"cUcsUsiUibhf", MergeNone, "aarch64_sme_luti2_lane_zt_x2", [IsStreaming, 
IsInZT0], [ImmCheck<0, ImmCheck0_0>, ImmCheck<2, ImmCheck0_7>]>;
   def SVLUTI4_LANE_ZT_X2 : Inst<"svluti4_lane_zt_{d}_x2", "2.di[i", 
"cUcsUsiUibhf", MergeNone, "aarch64_sme_luti4_lane_zt_x2", [IsStreaming, 
IsInZT0], [ImmCheck<0, ImmCheck0_0>, ImmCheck<2, ImmCheck0_3>]>;
-}
+}
\ No newline at end of file
diff --git a/clang/test/CodeGen/aarch64-sme2p1-intrinsics/acle_sme2p1_zero.c 
b/clang/test/CodeGen/aarch64-sme2p1-intrinsics/acle_sme2p1_zero.c
new file mode 100644
index 0..3b661ec425cda
--- /dev/null
+++ b/clang/test/CodeGen/aarch64-sme2p1-intrinsics/acle_sme2p1_zero.c
@@ -0,0 +1,91 @@
+// NOTE: Assertions have been autogenerated by utils/update_cc_test_checks.py 
UTC_ARGS: --version 4
+// REQUIRES: aarch64-registered-target
+
+// RUN: %clang_cc1 -triple aarch64-none-linux-gnu -target-feature +sme2p1 -S 
-disable-O0-optnone -Werror -Wall -emit-llvm -o - %s | opt -S -p 
mem2reg,instcombine,tailcallelim | FileCheck %s
+// RUN: %clang_cc1 -triple aarch64-none-linux-gnu -target-feature +sme2p1 -S 
-disable-O0-optnone -Werror -Wall -emit-llvm -o - -x c++ %s | opt -S -p 
mem2reg,instcombine,tailcallelim | FileCheck %s -check-prefix=CPP-CHECK
+#include 
+
+#define SVE_ACLE_FUNC(A1,A2) A1##A2
+
+// CHECK-LABEL: define dso_local void @test_svzero_za64_vg1x2(
+// CHECK-SAME: ) #[[ATTR0:[0-9]+]] {
+// CHECK-NEXT:  entry:
+// CHECK-NEXT:tail call void @llvm.aarch64.sme.zero.za64.vg1x2(i32 0)
+// CHECK-NEXT:ret void
+//
+void test_svzero_za64_vg1x2(void) __arm_streaming __arm_inout("za")
+{
+   SVE_ACLE_FUNC(svzero_za64,_vg1x2)(0);
+}
+
+// CHECK-LABEL: define dso_local void @test_svzero_za64_vg1x4(
+// 

[clang] [llvm] [AArch64][SME] Add intrinsics for vector groups ZERO (PR #88114)

2024-04-18 Thread via cfe-commits

https://github.com/CarolineConcatto approved this pull request.


https://github.com/llvm/llvm-project/pull/88114
___
cfe-commits mailing list
cfe-commits@lists.llvm.org
https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits


[clang] [llvm] [AArch64][SME] Add intrinsics for vector groups ZERO (PR #88114)

2024-04-17 Thread via cfe-commits


@@ -0,0 +1,94 @@
+; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py 
UTC_ARGS: --version 4
+; RUN: llc -verify-machineinstrs < %s | FileCheck %s
+
+target triple = "aarch64-linux"
+
+define  void @test_svzero_za64_vg1x2(i32  %slice)  #0 {

Lukacma wrote:

Done


https://github.com/llvm/llvm-project/pull/88114
___
cfe-commits mailing list
cfe-commits@lists.llvm.org
https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits


[clang] [llvm] [AArch64][SME] Add intrinsics for vector groups ZERO (PR #88114)

2024-04-17 Thread via cfe-commits

https://github.com/Lukacma updated 
https://github.com/llvm/llvm-project/pull/88114

>From c44bd42f8011dd09771fda50a76a7321342c2b2f Mon Sep 17 00:00:00 2001
From: Marian Lukac 
Date: Thu, 4 Apr 2024 14:36:54 +
Subject: [PATCH 1/4] WIP

---
 clang/include/clang/Basic/arm_sme.td  |  29 ++-
 .../acle_sme2p1_zero.c|  91 +
 .../acle_sme2p1_imm.cpp   | 190 ++
 llvm/include/llvm/IR/IntrinsicsAArch64.td |   6 +
 4 files changed, 315 insertions(+), 1 deletion(-)
 create mode 100644 
clang/test/CodeGen/aarch64-sme2p1-intrinsics/acle_sme2p1_zero.c
 create mode 100644 
clang/test/Sema/aarch64-sme2p1-intrinsics/acle_sme2p1_imm.cpp

diff --git a/clang/include/clang/Basic/arm_sme.td 
b/clang/include/clang/Basic/arm_sme.td
index 1ac6d5170ea283..b00eabe331169f 100644
--- a/clang/include/clang/Basic/arm_sme.td
+++ b/clang/include/clang/Basic/arm_sme.td
@@ -146,6 +146,33 @@ let TargetGuard = "sme" in {
  [IsOverloadNone, IsStreamingCompatible, IsOutZA]>;
 }
 
+let TargetGuard = "sme2p1" in {
+  def SVZERO_ZA64_VG1x2 : SInst<"svzero_za64_vg1x2", "vi", "", MergeNone, 
"aarch64_sme_zero_za64_vg1x2",
+[IsOverloadNone, IsStreaming, IsInOutZA],
+[ImmCheck<0, ImmCheck0_7>]>;
+  def SVZERO_ZA64_VG1x4 : SInst<"svzero_za64_vg1x4", "vi", "", MergeNone, 
"aarch64_sme_zero_za64_vg1x4",
+[IsOverloadNone, IsStreaming, IsInOutZA],
+[ImmCheck<0, ImmCheck0_7>]>;
+  def SVZERO_ZA64_VG2x1 : SInst<"svzero_za64_vg2x1", "vi", "", MergeNone, 
"aarch64_sme_zero_za64_vg2x1",
+[IsOverloadNone, IsStreaming, IsInOutZA],
+[ImmCheck<0, ImmCheck0_7>]>;
+  def SVZERO_ZA64_VG2x2 : SInst<"svzero_za64_vg2x2", "vi", "", MergeNone, 
"aarch64_sme_zero_za64_vg2x2",
+[IsOverloadNone, IsStreaming, IsInOutZA],
+[ImmCheck<0, ImmCheck0_3>]>;
+  def SVZERO_ZA64_VG2x4 : SInst<"svzero_za64_vg2x4", "vi", "", MergeNone, 
"aarch64_sme_zero_za64_vg2x4",
+[IsOverloadNone, IsStreaming, IsInOutZA],
+[ImmCheck<0, ImmCheck0_3>]>;
+  def SVZERO_ZA64_VG4x1 : SInst<"svzero_za64_vg4x1", "vi", "", MergeNone, 
"aarch64_sme_zero_za64_vg4x1",
+[IsOverloadNone, IsStreaming, IsInOutZA],
+[ImmCheck<0, ImmCheck0_3>]>;
+  def SVZERO_ZA64_VG4x2 : SInst<"svzero_za64_vg4x2", "vi", "", MergeNone, 
"aarch64_sme_zero_za64_vg4x2",
+[IsOverloadNone, IsStreaming, IsInOutZA],
+[ImmCheck<0, ImmCheck0_1>]>;
+  def SVZERO_ZA64_VG4x4 : SInst<"svzero_za64_vg4x4", "vi", "", MergeNone, 
"aarch64_sme_zero_za64_vg4x4",
+[IsOverloadNone, IsStreaming, IsInOutZA],
+[ImmCheck<0, ImmCheck0_1>]>;
+}
+
 

 // SME - Counting elements in a streaming vector
 
@@ -673,4 +700,4 @@ let TargetGuard = "sme2" in {
 let TargetGuard = "sme2" in {
   def SVLUTI2_LANE_ZT_X2 : Inst<"svluti2_lane_zt_{d}_x2", "2.di[i", 
"cUcsUsiUibhf", MergeNone, "aarch64_sme_luti2_lane_zt_x2", [IsStreaming, 
IsInZT0], [ImmCheck<0, ImmCheck0_0>, ImmCheck<2, ImmCheck0_7>]>;
   def SVLUTI4_LANE_ZT_X2 : Inst<"svluti4_lane_zt_{d}_x2", "2.di[i", 
"cUcsUsiUibhf", MergeNone, "aarch64_sme_luti4_lane_zt_x2", [IsStreaming, 
IsInZT0], [ImmCheck<0, ImmCheck0_0>, ImmCheck<2, ImmCheck0_3>]>;
-}
+}
\ No newline at end of file
diff --git a/clang/test/CodeGen/aarch64-sme2p1-intrinsics/acle_sme2p1_zero.c 
b/clang/test/CodeGen/aarch64-sme2p1-intrinsics/acle_sme2p1_zero.c
new file mode 100644
index 00..3b661ec425cda5
--- /dev/null
+++ b/clang/test/CodeGen/aarch64-sme2p1-intrinsics/acle_sme2p1_zero.c
@@ -0,0 +1,91 @@
+// NOTE: Assertions have been autogenerated by utils/update_cc_test_checks.py 
UTC_ARGS: --version 4
+// REQUIRES: aarch64-registered-target
+
+// RUN: %clang_cc1 -triple aarch64-none-linux-gnu -target-feature +sme2p1 -S 
-disable-O0-optnone -Werror -Wall -emit-llvm -o - %s | opt -S -p 
mem2reg,instcombine,tailcallelim | FileCheck %s
+// RUN: %clang_cc1 -triple aarch64-none-linux-gnu -target-feature +sme2p1 -S 
-disable-O0-optnone -Werror -Wall -emit-llvm -o - -x c++ %s | opt -S -p 
mem2reg,instcombine,tailcallelim | FileCheck %s -check-prefix=CPP-CHECK
+#include 
+
+#define SVE_ACLE_FUNC(A1,A2) A1##A2
+
+// CHECK-LABEL: define dso_local void @test_svzero_za64_vg1x2(
+// CHECK-SAME: ) #[[ATTR0:[0-9]+]] {
+// CHECK-NEXT:  entry:
+// CHECK-NEXT:tail call void @llvm.aarch64.sme.zero.za64.vg1x2(i32 0)
+// CHECK-NEXT:ret void
+//
+void test_svzero_za64_vg1x2(void) __arm_streaming __arm_inout("za")
+{
+   SVE_ACLE_FUNC(svzero_za64,_vg1x2)(0);
+}
+
+// CHECK-LABEL: define dso_local void @test_svzero_za64_vg1x4(
+// 

[clang] [llvm] [AArch64][SME] Add intrinsics for vector groups ZERO (PR #88114)

2024-04-17 Thread via cfe-commits

https://github.com/Lukacma deleted 
https://github.com/llvm/llvm-project/pull/88114
___
cfe-commits mailing list
cfe-commits@lists.llvm.org
https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits


[clang] [llvm] [AArch64][SME] Add intrinsics for vector groups ZERO (PR #88114)

2024-04-17 Thread via cfe-commits


@@ -0,0 +1,94 @@
+; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py 
UTC_ARGS: --version 4
+; RUN: llc -verify-machineinstrs < %s | FileCheck %s
+
+target triple = "aarch64-linux"
+
+define  void @test_svzero_za64_vg1x2(i32  %slice)  #0 {

Lukacma wrote:

I have added the test, but offset creation is not working as expected. I can 
try to debug why as part of this patch, but maybe it would be better to do it 
as separate pull request as I do not think it is unique to this intrinsic. What 
do you think ?

https://github.com/llvm/llvm-project/pull/88114
___
cfe-commits mailing list
cfe-commits@lists.llvm.org
https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits


[clang] [llvm] [AArch64][SME] Add intrinsics for vector groups ZERO (PR #88114)

2024-04-17 Thread via cfe-commits

https://github.com/Lukacma updated 
https://github.com/llvm/llvm-project/pull/88114

>From c44bd42f8011dd09771fda50a76a7321342c2b2f Mon Sep 17 00:00:00 2001
From: Marian Lukac 
Date: Thu, 4 Apr 2024 14:36:54 +
Subject: [PATCH 1/3] WIP

---
 clang/include/clang/Basic/arm_sme.td  |  29 ++-
 .../acle_sme2p1_zero.c|  91 +
 .../acle_sme2p1_imm.cpp   | 190 ++
 llvm/include/llvm/IR/IntrinsicsAArch64.td |   6 +
 4 files changed, 315 insertions(+), 1 deletion(-)
 create mode 100644 
clang/test/CodeGen/aarch64-sme2p1-intrinsics/acle_sme2p1_zero.c
 create mode 100644 
clang/test/Sema/aarch64-sme2p1-intrinsics/acle_sme2p1_imm.cpp

diff --git a/clang/include/clang/Basic/arm_sme.td 
b/clang/include/clang/Basic/arm_sme.td
index 1ac6d5170ea283..b00eabe331169f 100644
--- a/clang/include/clang/Basic/arm_sme.td
+++ b/clang/include/clang/Basic/arm_sme.td
@@ -146,6 +146,33 @@ let TargetGuard = "sme" in {
  [IsOverloadNone, IsStreamingCompatible, IsOutZA]>;
 }
 
+let TargetGuard = "sme2p1" in {
+  def SVZERO_ZA64_VG1x2 : SInst<"svzero_za64_vg1x2", "vi", "", MergeNone, 
"aarch64_sme_zero_za64_vg1x2",
+[IsOverloadNone, IsStreaming, IsInOutZA],
+[ImmCheck<0, ImmCheck0_7>]>;
+  def SVZERO_ZA64_VG1x4 : SInst<"svzero_za64_vg1x4", "vi", "", MergeNone, 
"aarch64_sme_zero_za64_vg1x4",
+[IsOverloadNone, IsStreaming, IsInOutZA],
+[ImmCheck<0, ImmCheck0_7>]>;
+  def SVZERO_ZA64_VG2x1 : SInst<"svzero_za64_vg2x1", "vi", "", MergeNone, 
"aarch64_sme_zero_za64_vg2x1",
+[IsOverloadNone, IsStreaming, IsInOutZA],
+[ImmCheck<0, ImmCheck0_7>]>;
+  def SVZERO_ZA64_VG2x2 : SInst<"svzero_za64_vg2x2", "vi", "", MergeNone, 
"aarch64_sme_zero_za64_vg2x2",
+[IsOverloadNone, IsStreaming, IsInOutZA],
+[ImmCheck<0, ImmCheck0_3>]>;
+  def SVZERO_ZA64_VG2x4 : SInst<"svzero_za64_vg2x4", "vi", "", MergeNone, 
"aarch64_sme_zero_za64_vg2x4",
+[IsOverloadNone, IsStreaming, IsInOutZA],
+[ImmCheck<0, ImmCheck0_3>]>;
+  def SVZERO_ZA64_VG4x1 : SInst<"svzero_za64_vg4x1", "vi", "", MergeNone, 
"aarch64_sme_zero_za64_vg4x1",
+[IsOverloadNone, IsStreaming, IsInOutZA],
+[ImmCheck<0, ImmCheck0_3>]>;
+  def SVZERO_ZA64_VG4x2 : SInst<"svzero_za64_vg4x2", "vi", "", MergeNone, 
"aarch64_sme_zero_za64_vg4x2",
+[IsOverloadNone, IsStreaming, IsInOutZA],
+[ImmCheck<0, ImmCheck0_1>]>;
+  def SVZERO_ZA64_VG4x4 : SInst<"svzero_za64_vg4x4", "vi", "", MergeNone, 
"aarch64_sme_zero_za64_vg4x4",
+[IsOverloadNone, IsStreaming, IsInOutZA],
+[ImmCheck<0, ImmCheck0_1>]>;
+}
+
 

 // SME - Counting elements in a streaming vector
 
@@ -673,4 +700,4 @@ let TargetGuard = "sme2" in {
 let TargetGuard = "sme2" in {
   def SVLUTI2_LANE_ZT_X2 : Inst<"svluti2_lane_zt_{d}_x2", "2.di[i", 
"cUcsUsiUibhf", MergeNone, "aarch64_sme_luti2_lane_zt_x2", [IsStreaming, 
IsInZT0], [ImmCheck<0, ImmCheck0_0>, ImmCheck<2, ImmCheck0_7>]>;
   def SVLUTI4_LANE_ZT_X2 : Inst<"svluti4_lane_zt_{d}_x2", "2.di[i", 
"cUcsUsiUibhf", MergeNone, "aarch64_sme_luti4_lane_zt_x2", [IsStreaming, 
IsInZT0], [ImmCheck<0, ImmCheck0_0>, ImmCheck<2, ImmCheck0_3>]>;
-}
+}
\ No newline at end of file
diff --git a/clang/test/CodeGen/aarch64-sme2p1-intrinsics/acle_sme2p1_zero.c 
b/clang/test/CodeGen/aarch64-sme2p1-intrinsics/acle_sme2p1_zero.c
new file mode 100644
index 00..3b661ec425cda5
--- /dev/null
+++ b/clang/test/CodeGen/aarch64-sme2p1-intrinsics/acle_sme2p1_zero.c
@@ -0,0 +1,91 @@
+// NOTE: Assertions have been autogenerated by utils/update_cc_test_checks.py 
UTC_ARGS: --version 4
+// REQUIRES: aarch64-registered-target
+
+// RUN: %clang_cc1 -triple aarch64-none-linux-gnu -target-feature +sme2p1 -S 
-disable-O0-optnone -Werror -Wall -emit-llvm -o - %s | opt -S -p 
mem2reg,instcombine,tailcallelim | FileCheck %s
+// RUN: %clang_cc1 -triple aarch64-none-linux-gnu -target-feature +sme2p1 -S 
-disable-O0-optnone -Werror -Wall -emit-llvm -o - -x c++ %s | opt -S -p 
mem2reg,instcombine,tailcallelim | FileCheck %s -check-prefix=CPP-CHECK
+#include 
+
+#define SVE_ACLE_FUNC(A1,A2) A1##A2
+
+// CHECK-LABEL: define dso_local void @test_svzero_za64_vg1x2(
+// CHECK-SAME: ) #[[ATTR0:[0-9]+]] {
+// CHECK-NEXT:  entry:
+// CHECK-NEXT:tail call void @llvm.aarch64.sme.zero.za64.vg1x2(i32 0)
+// CHECK-NEXT:ret void
+//
+void test_svzero_za64_vg1x2(void) __arm_streaming __arm_inout("za")
+{
+   SVE_ACLE_FUNC(svzero_za64,_vg1x2)(0);
+}
+
+// CHECK-LABEL: define dso_local void @test_svzero_za64_vg1x4(
+// 

[clang] [llvm] [AArch64][SME] Add intrinsics for vector groups ZERO (PR #88114)

2024-04-10 Thread via cfe-commits


@@ -4774,39 +4784,57 @@ class sme2p1_zero_matrix opc, Operand index_ty, 
string mnemonic,
 }
 
 multiclass sme2p1_zero_matrix {
-  def _VG2_Z : sme2p1_zero_matrix<{0b000,?,?,?}, sme_elm_idx0_7, mnemonic, 
"vgx2"> {
+  def _VG2_Z : sme2p1_zero_matrix<{0b000,?,?,?}, sme_elm_idx0_7, mnemonic, 
"vgx2">, SMEPseudo2Instr {
 bits<3> imm;
 let Inst{2-0} = imm;
   }
-  def _2Z : sme2p1_zero_matrix<{0b001,?,?,?}, uimm3s2range, mnemonic> {
+  def _2Z : sme2p1_zero_matrix<{0b001,?,?,?}, uimm3s2range, mnemonic>, 
SMEPseudo2Instr {
 bits<3> imm;
 let Inst{2-0} = imm;
   }
-  def _VG2_2Z : sme2p1_zero_matrix<{0b0100,?,?}, uimm2s2range, mnemonic, 
"vgx2"> {
+  def _VG2_2Z : sme2p1_zero_matrix<{0b0100,?,?}, uimm2s2range, mnemonic, 
"vgx2">, SMEPseudo2Instr {
 bits<2> imm;
 let Inst{1-0} = imm;
   }
-  def _VG4_2Z : sme2p1_zero_matrix<{0b0110,?,?}, uimm2s2range, mnemonic, 
"vgx4"> {
+  def _VG4_2Z : sme2p1_zero_matrix<{0b0110,?,?}, uimm2s2range, mnemonic, 
"vgx4">, SMEPseudo2Instr {
 bits<2> imm;
 let Inst{1-0} = imm;
   }
-  def _VG4_Z : sme2p1_zero_matrix<{0b100,?,?,?}, sme_elm_idx0_7, mnemonic, 
"vgx4"> {
+  def _VG4_Z : sme2p1_zero_matrix<{0b100,?,?,?}, sme_elm_idx0_7, mnemonic, 
"vgx4">, SMEPseudo2Instr {
 bits<3> imm;
 let Inst{2-0} = imm;
   }
-  def _4Z : sme2p1_zero_matrix<{0b1010,?,?}, uimm2s4range, mnemonic> {
+  def _4Z : sme2p1_zero_matrix<{0b1010,?,?}, uimm2s4range, mnemonic>, 
SMEPseudo2Instr {
 bits<2> imm;
 let Inst{1-0} = imm;
   }
-  def _VG2_4Z :sme2p1_zero_matrix<{0b11000,?}, uimm1s4range, mnemonic, "vgx2"> 
{
+  def _VG2_4Z : sme2p1_zero_matrix<{0b11000,?}, uimm1s4range, mnemonic, 
"vgx2">, SMEPseudo2Instr {
 bits<1> imm;
 let Inst{0}   = imm;
   }
-  def _VG4_4Z :sme2p1_zero_matrix<{0b11100,?}, uimm1s4range, mnemonic, "vgx4"> 
{
+  def _VG4_4Z : sme2p1_zero_matrix<{0b11100,?}, uimm1s4range, mnemonic, 
"vgx4">, SMEPseudo2Instr {
 bits<1> imm;
 let Inst{0}   = imm;
   }
-}
+
+  def NAME # _VG2_Z_PSEUDO : sem2p1_zero_matrix_pseudo;
+  def NAME # _VG4_Z_PSEUDO : sem2p1_zero_matrix_pseudo;
+  def NAME # _2Z_PSEUDO : sem2p1_zero_matrix_pseudo;
+  def NAME # _VG2_2Z_PSEUDO : sem2p1_zero_matrix_pseudo;
+  def NAME # _VG4_2Z_PSEUDO : sem2p1_zero_matrix_pseudo;
+  def NAME # _4Z_PSEUDO : sem2p1_zero_matrix_pseudo;
+  def NAME # _VG2_4Z_PSEUDO : sem2p1_zero_matrix_pseudo;
+  def NAME # _VG4_4Z_PSEUDO : sem2p1_zero_matrix_pseudo;
+
+  def : SME2_Zero_Matrix_Pat;

CarolineConcatto wrote:

Nothing to do here, but just to document that 
After debating with myself and reading the ACLE spec about vg1x2 and vg2x1 I 
think that the instructions and the vgs are correct.

```
*   Intrinsic functions have a `_vg1x2`, `_vg1x4` suffix if the function
operates on 2 or 4 single-vector groups within the ZA array.
*   Intrinsic functions have a `_vg2x1`, `_vg2x2`, `_vg2x4` suffix if
the function operates on 1, 2 or 4 double-vector groups within the ZA array.
*   Intrinsic functions have a `_vg4x1`, `_vg4x2`, `_vg4x4` suffix if the
function operates on 1, 2 or 4 quad-vector groups within the ZA array.
For example:
```


https://github.com/llvm/llvm-project/pull/88114
___
cfe-commits mailing list
cfe-commits@lists.llvm.org
https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits


[clang] [llvm] [AArch64][SME] Add intrinsics for vector groups ZERO (PR #88114)

2024-04-10 Thread via cfe-commits


@@ -0,0 +1,94 @@
+; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py 
UTC_ARGS: --version 4
+; RUN: llc -verify-machineinstrs < %s | FileCheck %s
+
+target triple = "aarch64-linux"
+
+define  void @test_svzero_za64_vg1x2(i32  %slice)  #0 {

CarolineConcatto wrote:

Can you add tests for when slice is not zero. Something like:
define  void @test_svzero_za64_vg1x2(i32  %slice)  #0 {
 tail call void @llvm.aarch64.sme.zero.za64.vg1x2(i32 %slice)
%slice.max = add i32 %slice, 7
tail call void @llvm.aarch64.sme.zero.za64.vg1x2(i32 %slice.max)
ret void
}


https://github.com/llvm/llvm-project/pull/88114
___
cfe-commits mailing list
cfe-commits@lists.llvm.org
https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits


[clang] [llvm] [AArch64][SME] Add intrinsics for vector groups ZERO (PR #88114)

2024-04-10 Thread via cfe-commits

https://github.com/Lukacma updated 
https://github.com/llvm/llvm-project/pull/88114

>From c44bd42f8011dd09771fda50a76a7321342c2b2f Mon Sep 17 00:00:00 2001
From: Marian Lukac 
Date: Thu, 4 Apr 2024 14:36:54 +
Subject: [PATCH 1/2] WIP

---
 clang/include/clang/Basic/arm_sme.td  |  29 ++-
 .../acle_sme2p1_zero.c|  91 +
 .../acle_sme2p1_imm.cpp   | 190 ++
 llvm/include/llvm/IR/IntrinsicsAArch64.td |   6 +
 4 files changed, 315 insertions(+), 1 deletion(-)
 create mode 100644 
clang/test/CodeGen/aarch64-sme2p1-intrinsics/acle_sme2p1_zero.c
 create mode 100644 
clang/test/Sema/aarch64-sme2p1-intrinsics/acle_sme2p1_imm.cpp

diff --git a/clang/include/clang/Basic/arm_sme.td 
b/clang/include/clang/Basic/arm_sme.td
index 1ac6d5170ea283..b00eabe331169f 100644
--- a/clang/include/clang/Basic/arm_sme.td
+++ b/clang/include/clang/Basic/arm_sme.td
@@ -146,6 +146,33 @@ let TargetGuard = "sme" in {
  [IsOverloadNone, IsStreamingCompatible, IsOutZA]>;
 }
 
+let TargetGuard = "sme2p1" in {
+  def SVZERO_ZA64_VG1x2 : SInst<"svzero_za64_vg1x2", "vi", "", MergeNone, 
"aarch64_sme_zero_za64_vg1x2",
+[IsOverloadNone, IsStreaming, IsInOutZA],
+[ImmCheck<0, ImmCheck0_7>]>;
+  def SVZERO_ZA64_VG1x4 : SInst<"svzero_za64_vg1x4", "vi", "", MergeNone, 
"aarch64_sme_zero_za64_vg1x4",
+[IsOverloadNone, IsStreaming, IsInOutZA],
+[ImmCheck<0, ImmCheck0_7>]>;
+  def SVZERO_ZA64_VG2x1 : SInst<"svzero_za64_vg2x1", "vi", "", MergeNone, 
"aarch64_sme_zero_za64_vg2x1",
+[IsOverloadNone, IsStreaming, IsInOutZA],
+[ImmCheck<0, ImmCheck0_7>]>;
+  def SVZERO_ZA64_VG2x2 : SInst<"svzero_za64_vg2x2", "vi", "", MergeNone, 
"aarch64_sme_zero_za64_vg2x2",
+[IsOverloadNone, IsStreaming, IsInOutZA],
+[ImmCheck<0, ImmCheck0_3>]>;
+  def SVZERO_ZA64_VG2x4 : SInst<"svzero_za64_vg2x4", "vi", "", MergeNone, 
"aarch64_sme_zero_za64_vg2x4",
+[IsOverloadNone, IsStreaming, IsInOutZA],
+[ImmCheck<0, ImmCheck0_3>]>;
+  def SVZERO_ZA64_VG4x1 : SInst<"svzero_za64_vg4x1", "vi", "", MergeNone, 
"aarch64_sme_zero_za64_vg4x1",
+[IsOverloadNone, IsStreaming, IsInOutZA],
+[ImmCheck<0, ImmCheck0_3>]>;
+  def SVZERO_ZA64_VG4x2 : SInst<"svzero_za64_vg4x2", "vi", "", MergeNone, 
"aarch64_sme_zero_za64_vg4x2",
+[IsOverloadNone, IsStreaming, IsInOutZA],
+[ImmCheck<0, ImmCheck0_1>]>;
+  def SVZERO_ZA64_VG4x4 : SInst<"svzero_za64_vg4x4", "vi", "", MergeNone, 
"aarch64_sme_zero_za64_vg4x4",
+[IsOverloadNone, IsStreaming, IsInOutZA],
+[ImmCheck<0, ImmCheck0_1>]>;
+}
+
 

 // SME - Counting elements in a streaming vector
 
@@ -673,4 +700,4 @@ let TargetGuard = "sme2" in {
 let TargetGuard = "sme2" in {
   def SVLUTI2_LANE_ZT_X2 : Inst<"svluti2_lane_zt_{d}_x2", "2.di[i", 
"cUcsUsiUibhf", MergeNone, "aarch64_sme_luti2_lane_zt_x2", [IsStreaming, 
IsInZT0], [ImmCheck<0, ImmCheck0_0>, ImmCheck<2, ImmCheck0_7>]>;
   def SVLUTI4_LANE_ZT_X2 : Inst<"svluti4_lane_zt_{d}_x2", "2.di[i", 
"cUcsUsiUibhf", MergeNone, "aarch64_sme_luti4_lane_zt_x2", [IsStreaming, 
IsInZT0], [ImmCheck<0, ImmCheck0_0>, ImmCheck<2, ImmCheck0_3>]>;
-}
+}
\ No newline at end of file
diff --git a/clang/test/CodeGen/aarch64-sme2p1-intrinsics/acle_sme2p1_zero.c 
b/clang/test/CodeGen/aarch64-sme2p1-intrinsics/acle_sme2p1_zero.c
new file mode 100644
index 00..3b661ec425cda5
--- /dev/null
+++ b/clang/test/CodeGen/aarch64-sme2p1-intrinsics/acle_sme2p1_zero.c
@@ -0,0 +1,91 @@
+// NOTE: Assertions have been autogenerated by utils/update_cc_test_checks.py 
UTC_ARGS: --version 4
+// REQUIRES: aarch64-registered-target
+
+// RUN: %clang_cc1 -triple aarch64-none-linux-gnu -target-feature +sme2p1 -S 
-disable-O0-optnone -Werror -Wall -emit-llvm -o - %s | opt -S -p 
mem2reg,instcombine,tailcallelim | FileCheck %s
+// RUN: %clang_cc1 -triple aarch64-none-linux-gnu -target-feature +sme2p1 -S 
-disable-O0-optnone -Werror -Wall -emit-llvm -o - -x c++ %s | opt -S -p 
mem2reg,instcombine,tailcallelim | FileCheck %s -check-prefix=CPP-CHECK
+#include 
+
+#define SVE_ACLE_FUNC(A1,A2) A1##A2
+
+// CHECK-LABEL: define dso_local void @test_svzero_za64_vg1x2(
+// CHECK-SAME: ) #[[ATTR0:[0-9]+]] {
+// CHECK-NEXT:  entry:
+// CHECK-NEXT:tail call void @llvm.aarch64.sme.zero.za64.vg1x2(i32 0)
+// CHECK-NEXT:ret void
+//
+void test_svzero_za64_vg1x2(void) __arm_streaming __arm_inout("za")
+{
+   SVE_ACLE_FUNC(svzero_za64,_vg1x2)(0);
+}
+
+// CHECK-LABEL: define dso_local void @test_svzero_za64_vg1x4(
+// 

[clang] [llvm] [AArch64][SME] Add intrinsics for vector groups ZERO (PR #88114)

2024-04-09 Thread via cfe-commits

llvmbot wrote:




@llvm/pr-subscribers-llvm-ir

Author: None (Lukacma)


Changes

According to the specification in
https://github.com/ARM-software/acle/pull/309 this adds the intrinsics:

```
  void svzero_za64_vg1x2(uint32_t slice)
__arm_streaming __arm_inout("za");

  void svzero_za64_vg1x4(uint32_t slice)
__arm_streaming __arm_inout("za");

  void svzero_za64_vg2x1(uint32_t slice)
__arm_streaming __arm_inout("za");

  void svzero_za64_vg2x2(uint32_t slice)
__arm_streaming __arm_inout("za");

  void svzero_za64_vg2x4(uint32_t slice)
__arm_streaming __arm_inout("za");

  void svzero_za64_vg4x1(uint32_t slice)
__arm_streaming __arm_inout("za");

  void svzero_za64_vg4x2(uint32_t slice)
__arm_streaming __arm_inout("za");

  void svzero_za64_vg4x4(uint32_t slice)
__arm_streaming __arm_inout("za");
```

---
Full diff: https://github.com/llvm/llvm-project/pull/88114.diff


5 Files Affected:

- (modified) clang/include/clang/Basic/arm_sme.td (+20-1) 
- (added) clang/test/CodeGen/aarch64-sme2p1-intrinsics/acle_sme2p1_zero.c 
(+139) 
- (modified) llvm/include/llvm/IR/IntrinsicsAArch64.td (+6) 
- (modified) llvm/lib/Target/AArch64/SMEInstrFormats.td (+37-9) 
- (added) llvm/test/CodeGen/AArch64/sme2p1-intrinsics-zero.ll (+94) 


``diff
diff --git a/clang/include/clang/Basic/arm_sme.td 
b/clang/include/clang/Basic/arm_sme.td
index 1ac6d5170ea283..9bcfbf8c4f5c5e 100644
--- a/clang/include/clang/Basic/arm_sme.td
+++ b/clang/include/clang/Basic/arm_sme.td
@@ -146,6 +146,25 @@ let TargetGuard = "sme" in {
  [IsOverloadNone, IsStreamingCompatible, IsOutZA]>;
 }
 
+let TargetGuard = "sme2p1" in {
+  def SVZERO_ZA64_VG1x2 : SInst<"svzero_za64_vg1x2", "vm", "", MergeNone, 
"aarch64_sme_zero_za64_vg1x2",
+[IsOverloadNone, IsStreaming, IsInOutZA]>;
+  def SVZERO_ZA64_VG1x4 : SInst<"svzero_za64_vg1x4", "vm", "", MergeNone, 
"aarch64_sme_zero_za64_vg1x4",
+[IsOverloadNone, IsStreaming, IsInOutZA]>;
+  def SVZERO_ZA64_VG2x1 : SInst<"svzero_za64_vg2x1", "vm", "", MergeNone, 
"aarch64_sme_zero_za64_vg2x1",
+[IsOverloadNone, IsStreaming, IsInOutZA]>;
+  def SVZERO_ZA64_VG2x2 : SInst<"svzero_za64_vg2x2", "vm", "", MergeNone, 
"aarch64_sme_zero_za64_vg2x2",
+[IsOverloadNone, IsStreaming, IsInOutZA]>;
+  def SVZERO_ZA64_VG2x4 : SInst<"svzero_za64_vg2x4", "vm", "", MergeNone, 
"aarch64_sme_zero_za64_vg2x4",
+[IsOverloadNone, IsStreaming, IsInOutZA]>;
+  def SVZERO_ZA64_VG4x1 : SInst<"svzero_za64_vg4x1", "vm", "", MergeNone, 
"aarch64_sme_zero_za64_vg4x1",
+[IsOverloadNone, IsStreaming, IsInOutZA]>;
+  def SVZERO_ZA64_VG4x2 : SInst<"svzero_za64_vg4x2", "vm", "", MergeNone, 
"aarch64_sme_zero_za64_vg4x2",
+[IsOverloadNone, IsStreaming, IsInOutZA]>;
+  def SVZERO_ZA64_VG4x4 : SInst<"svzero_za64_vg4x4", "vm", "", MergeNone, 
"aarch64_sme_zero_za64_vg4x4",
+[IsOverloadNone, IsStreaming, IsInOutZA]>;
+}
+
 

 // SME - Counting elements in a streaming vector
 
@@ -673,4 +692,4 @@ let TargetGuard = "sme2" in {
 let TargetGuard = "sme2" in {
   def SVLUTI2_LANE_ZT_X2 : Inst<"svluti2_lane_zt_{d}_x2", "2.di[i", 
"cUcsUsiUibhf", MergeNone, "aarch64_sme_luti2_lane_zt_x2", [IsStreaming, 
IsInZT0], [ImmCheck<0, ImmCheck0_0>, ImmCheck<2, ImmCheck0_7>]>;
   def SVLUTI4_LANE_ZT_X2 : Inst<"svluti4_lane_zt_{d}_x2", "2.di[i", 
"cUcsUsiUibhf", MergeNone, "aarch64_sme_luti4_lane_zt_x2", [IsStreaming, 
IsInZT0], [ImmCheck<0, ImmCheck0_0>, ImmCheck<2, ImmCheck0_3>]>;
-}
+}
\ No newline at end of file
diff --git a/clang/test/CodeGen/aarch64-sme2p1-intrinsics/acle_sme2p1_zero.c 
b/clang/test/CodeGen/aarch64-sme2p1-intrinsics/acle_sme2p1_zero.c
new file mode 100644
index 00..bdd75798554148
--- /dev/null
+++ b/clang/test/CodeGen/aarch64-sme2p1-intrinsics/acle_sme2p1_zero.c
@@ -0,0 +1,139 @@
+// NOTE: Assertions have been autogenerated by utils/update_cc_test_checks.py 
UTC_ARGS: --version 4
+// REQUIRES: aarch64-registered-target
+
+// RUN: %clang_cc1 -triple aarch64-none-linux-gnu -target-feature +sme2p1 -S 
-disable-O0-optnone -Werror -Wall -emit-llvm -o - %s | opt -S -p 
mem2reg,instcombine,tailcallelim | FileCheck %s
+// RUN: %clang_cc1 -triple aarch64-none-linux-gnu -target-feature +sme2p1 -S 
-disable-O0-optnone -Werror -Wall -emit-llvm -o - -x c++ %s | opt -S -p 
mem2reg,instcombine,tailcallelim | FileCheck %s -check-prefix=CPP-CHECK
+// RUN: %clang_cc1 -triple aarch64-none-linux-gnu -target-feature +sme2p1 -S 
-disable-O0-optnone -Werror -Wall -o /dev/null %s
+
+#include 
+
+#define SVE_ACLE_FUNC(A1,A2) A1##A2
+
+// CHECK-LABEL: define dso_local void @test_svzero_za64_vg1x2(
+// CHECK-SAME: i32 noundef [[SLICE:%.*]]) #[[ATTR0:[0-9]+]] {
+// CHECK-NEXT:  entry:
+// CHECK-NEXT: 

[clang] [llvm] [AArch64][SME] Add intrinsics for vector groups ZERO (PR #88114)

2024-04-09 Thread via cfe-commits

https://github.com/Lukacma created 
https://github.com/llvm/llvm-project/pull/88114

According to the specification in
https://github.com/ARM-software/acle/pull/309 this adds the intrinsics:

```
  void svzero_za64_vg1x2(uint32_t slice)
__arm_streaming __arm_inout("za");

  void svzero_za64_vg1x4(uint32_t slice)
__arm_streaming __arm_inout("za");

  void svzero_za64_vg2x1(uint32_t slice)
__arm_streaming __arm_inout("za");

  void svzero_za64_vg2x2(uint32_t slice)
__arm_streaming __arm_inout("za");

  void svzero_za64_vg2x4(uint32_t slice)
__arm_streaming __arm_inout("za");

  void svzero_za64_vg4x1(uint32_t slice)
__arm_streaming __arm_inout("za");

  void svzero_za64_vg4x2(uint32_t slice)
__arm_streaming __arm_inout("za");

  void svzero_za64_vg4x4(uint32_t slice)
__arm_streaming __arm_inout("za");
```

>From c44bd42f8011dd09771fda50a76a7321342c2b2f Mon Sep 17 00:00:00 2001
From: Marian Lukac 
Date: Thu, 4 Apr 2024 14:36:54 +
Subject: [PATCH 1/2] WIP

---
 clang/include/clang/Basic/arm_sme.td  |  29 ++-
 .../acle_sme2p1_zero.c|  91 +
 .../acle_sme2p1_imm.cpp   | 190 ++
 llvm/include/llvm/IR/IntrinsicsAArch64.td |   6 +
 4 files changed, 315 insertions(+), 1 deletion(-)
 create mode 100644 
clang/test/CodeGen/aarch64-sme2p1-intrinsics/acle_sme2p1_zero.c
 create mode 100644 
clang/test/Sema/aarch64-sme2p1-intrinsics/acle_sme2p1_imm.cpp

diff --git a/clang/include/clang/Basic/arm_sme.td 
b/clang/include/clang/Basic/arm_sme.td
index 1ac6d5170ea283..b00eabe331169f 100644
--- a/clang/include/clang/Basic/arm_sme.td
+++ b/clang/include/clang/Basic/arm_sme.td
@@ -146,6 +146,33 @@ let TargetGuard = "sme" in {
  [IsOverloadNone, IsStreamingCompatible, IsOutZA]>;
 }
 
+let TargetGuard = "sme2p1" in {
+  def SVZERO_ZA64_VG1x2 : SInst<"svzero_za64_vg1x2", "vi", "", MergeNone, 
"aarch64_sme_zero_za64_vg1x2",
+[IsOverloadNone, IsStreaming, IsInOutZA],
+[ImmCheck<0, ImmCheck0_7>]>;
+  def SVZERO_ZA64_VG1x4 : SInst<"svzero_za64_vg1x4", "vi", "", MergeNone, 
"aarch64_sme_zero_za64_vg1x4",
+[IsOverloadNone, IsStreaming, IsInOutZA],
+[ImmCheck<0, ImmCheck0_7>]>;
+  def SVZERO_ZA64_VG2x1 : SInst<"svzero_za64_vg2x1", "vi", "", MergeNone, 
"aarch64_sme_zero_za64_vg2x1",
+[IsOverloadNone, IsStreaming, IsInOutZA],
+[ImmCheck<0, ImmCheck0_7>]>;
+  def SVZERO_ZA64_VG2x2 : SInst<"svzero_za64_vg2x2", "vi", "", MergeNone, 
"aarch64_sme_zero_za64_vg2x2",
+[IsOverloadNone, IsStreaming, IsInOutZA],
+[ImmCheck<0, ImmCheck0_3>]>;
+  def SVZERO_ZA64_VG2x4 : SInst<"svzero_za64_vg2x4", "vi", "", MergeNone, 
"aarch64_sme_zero_za64_vg2x4",
+[IsOverloadNone, IsStreaming, IsInOutZA],
+[ImmCheck<0, ImmCheck0_3>]>;
+  def SVZERO_ZA64_VG4x1 : SInst<"svzero_za64_vg4x1", "vi", "", MergeNone, 
"aarch64_sme_zero_za64_vg4x1",
+[IsOverloadNone, IsStreaming, IsInOutZA],
+[ImmCheck<0, ImmCheck0_3>]>;
+  def SVZERO_ZA64_VG4x2 : SInst<"svzero_za64_vg4x2", "vi", "", MergeNone, 
"aarch64_sme_zero_za64_vg4x2",
+[IsOverloadNone, IsStreaming, IsInOutZA],
+[ImmCheck<0, ImmCheck0_1>]>;
+  def SVZERO_ZA64_VG4x4 : SInst<"svzero_za64_vg4x4", "vi", "", MergeNone, 
"aarch64_sme_zero_za64_vg4x4",
+[IsOverloadNone, IsStreaming, IsInOutZA],
+[ImmCheck<0, ImmCheck0_1>]>;
+}
+
 

 // SME - Counting elements in a streaming vector
 
@@ -673,4 +700,4 @@ let TargetGuard = "sme2" in {
 let TargetGuard = "sme2" in {
   def SVLUTI2_LANE_ZT_X2 : Inst<"svluti2_lane_zt_{d}_x2", "2.di[i", 
"cUcsUsiUibhf", MergeNone, "aarch64_sme_luti2_lane_zt_x2", [IsStreaming, 
IsInZT0], [ImmCheck<0, ImmCheck0_0>, ImmCheck<2, ImmCheck0_7>]>;
   def SVLUTI4_LANE_ZT_X2 : Inst<"svluti4_lane_zt_{d}_x2", "2.di[i", 
"cUcsUsiUibhf", MergeNone, "aarch64_sme_luti4_lane_zt_x2", [IsStreaming, 
IsInZT0], [ImmCheck<0, ImmCheck0_0>, ImmCheck<2, ImmCheck0_3>]>;
-}
+}
\ No newline at end of file
diff --git a/clang/test/CodeGen/aarch64-sme2p1-intrinsics/acle_sme2p1_zero.c 
b/clang/test/CodeGen/aarch64-sme2p1-intrinsics/acle_sme2p1_zero.c
new file mode 100644
index 00..3b661ec425cda5
--- /dev/null
+++ b/clang/test/CodeGen/aarch64-sme2p1-intrinsics/acle_sme2p1_zero.c
@@ -0,0 +1,91 @@
+// NOTE: Assertions have been autogenerated by utils/update_cc_test_checks.py 
UTC_ARGS: --version 4
+// REQUIRES: aarch64-registered-target
+
+// RUN: %clang_cc1 -triple aarch64-none-linux-gnu -target-feature +sme2p1 -S 
-disable-O0-optnone -Werror -Wall -emit-llvm -o