[clang] [llvm] [AArch64] Add intrinsics for multi-vector to ZA array vector accumulators (PR #88266)

2024-05-09 Thread Momchil Velikov via cfe-commits

momchil-velikov wrote:

> Thanks for the quick revert!
> 
> Is the failure due to a conflict with another commit that landed?

Perhaps, e.g. https://github.com/llvm/llvm-project/pull/91140

https://github.com/llvm/llvm-project/pull/88266
___
cfe-commits mailing list
cfe-commits@lists.llvm.org
https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits


[clang] [llvm] [AArch64] Add intrinsics for multi-vector to ZA array vector accumulators (PR #88266)

2024-05-09 Thread Nico Weber via cfe-commits

nico wrote:

Thanks for the quick revert!

Is the failure due to a conflict with another commit that landed?

https://github.com/llvm/llvm-project/pull/88266
___
cfe-commits mailing list
cfe-commits@lists.llvm.org
https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits


[clang] [llvm] [AArch64] Add intrinsics for multi-vector to ZA array vector accumulators (PR #88266)

2024-05-09 Thread Momchil Velikov via cfe-commits

https://github.com/momchil-velikov closed 
https://github.com/llvm/llvm-project/pull/88266
___
cfe-commits mailing list
cfe-commits@lists.llvm.org
https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits


[clang] [llvm] [AArch64] Add intrinsics for multi-vector to ZA array vector accumulators (PR #88266)

2024-05-07 Thread via cfe-commits

https://github.com/CarolineConcatto approved this pull request.


https://github.com/llvm/llvm-project/pull/88266
___
cfe-commits mailing list
cfe-commits@lists.llvm.org
https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits


[clang] [llvm] [AArch64] Add intrinsics for multi-vector to ZA array vector accumulators (PR #88266)

2024-04-30 Thread Momchil Velikov via cfe-commits

https://github.com/momchil-velikov updated 
https://github.com/llvm/llvm-project/pull/88266

>From cafe0a8b70ad0189b638ec377e7d8cba9e786ecb Mon Sep 17 00:00:00 2001
From: Momchil Velikov 
Date: Wed, 10 Apr 2024 11:25:50 +0100
Subject: [PATCH] [AArch64] Add intrinsics for multi-vector to ZA array vector
 accumulators

---
 clang/include/clang/Basic/arm_sme.td  |  10 +
 .../acle_sme2_add_sub_za16.c  | 193 ++
 .../acle_sme2_add_sub_za16.c  |  29 +++
 llvm/include/llvm/IR/IntrinsicsAArch64.td |   2 +-
 .../lib/Target/AArch64/AArch64SMEInstrInfo.td |  16 +-
 .../AArch64/sme2-intrinsics-add-sub-za16.ll   | 148 ++
 6 files changed, 389 insertions(+), 9 deletions(-)
 create mode 100644 
clang/test/CodeGen/aarch64-sme2-intrinsics/acle_sme2_add_sub_za16.c
 create mode 100644 
clang/test/Sema/aarch64-sme2-intrinsics/acle_sme2_add_sub_za16.c
 create mode 100644 llvm/test/CodeGen/AArch64/sme2-intrinsics-add-sub-za16.ll

diff --git a/clang/include/clang/Basic/arm_sme.td 
b/clang/include/clang/Basic/arm_sme.td
index 1ac6d5170ea283..000bd97a4b25d5 100644
--- a/clang/include/clang/Basic/arm_sme.td
+++ b/clang/include/clang/Basic/arm_sme.td
@@ -298,6 +298,16 @@ multiclass ZAAddSub {
 def NAME # _ZA64_VG1X2_F64 : Inst<"sv" # n_suffix # "_za64[_{d}]_vg1x2", 
"vm2", "d", MergeNone, "aarch64_sme_" # n_suffix # "_za64_vg1x2", [IsStreaming, 
IsInOutZA], []>;
 def NAME # _ZA64_VG1X4_F64 : Inst<"sv" # n_suffix # "_za64[_{d}]_vg1x4", 
"vm4", "d", MergeNone, "aarch64_sme_" # n_suffix # "_za64_vg1x4", [IsStreaming, 
IsInOutZA], []>;
   }
+
+  let TargetGuard = "sme-f16f16|sme-f8f16" in {
+def NAME # _ZA16_VG1X2_F16 : Inst<"sv" # n_suffix # "_za16[_{d}]_vg1x2", 
"vm2", "h", MergeNone, "aarch64_sme_" # n_suffix # "_za16_vg1x2", [IsStreaming, 
IsInOutZA], []>;
+def NAME # _ZA16_VG1X4_F16 : Inst<"sv" # n_suffix # "_za16[_{d}]_vg1x4", 
"vm4", "h", MergeNone, "aarch64_sme_" # n_suffix # "_za16_vg1x4", [IsStreaming, 
IsInOutZA], []>;
+  }
+
+  let TargetGuard = "sme2,b16b16" in {
+def NAME # _ZA16_VG1X2_BF16 : Inst<"sv" # n_suffix # "_za16[_{d}]_vg1x2", 
"vm2", "b", MergeNone, "aarch64_sme_" # n_suffix # "_za16_vg1x2", [IsStreaming, 
IsInOutZA], []>;
+def NAME # _ZA16_VG1X4_BF16 : Inst<"sv" # n_suffix # "_za16[_{d}]_vg1x4", 
"vm4", "b", MergeNone, "aarch64_sme_" # n_suffix # "_za16_vg1x4", [IsStreaming, 
IsInOutZA], []>;
+  }
 }
 
 defm SVADD : ZAAddSub<"add">;
diff --git 
a/clang/test/CodeGen/aarch64-sme2-intrinsics/acle_sme2_add_sub_za16.c 
b/clang/test/CodeGen/aarch64-sme2-intrinsics/acle_sme2_add_sub_za16.c
new file mode 100644
index 00..d98427fac610b8
--- /dev/null
+++ b/clang/test/CodeGen/aarch64-sme2-intrinsics/acle_sme2_add_sub_za16.c
@@ -0,0 +1,193 @@
+// NOTE: Assertions have been autogenerated by utils/update_cc_test_checks.py 
UTC_ARGS: --version 4
+// RUN: %clang_cc1   -fclang-abi-compat=latest 
-triple aarch64-none-linux-gnu -target-feature +sme2 -target-feature 
+sme-f16f16 -target-feature +b16b16 -O2 -S -Werror -Wall -emit-llvm -o - %s | 
FileCheck %s
+// RUN: %clang_cc1-x c++ -fclang-abi-compat=latest 
-triple aarch64-none-linux-gnu -target-feature +sme2 -target-feature  
+sme-f8f16 -target-feature +b16b16 -O2 -S -Werror -Wall -emit-llvm -o - %s | 
FileCheck %s -check-prefix CHECK-CXX
+// RUN: %clang_cc1 -DSVE_OVERLOADED_FORMS-fclang-abi-compat=latest 
-triple aarch64-none-linux-gnu -target-feature +sme2 -target-feature  
+sme-f8f16 -target-feature +b16b16 -O2 -S -Werror -Wall -emit-llvm -o - %s | 
FileCheck %s
+// RUN: %clang_cc1 -DSVE_OVERLOADED_FORMS -x c++ -fclang-abi-compat=latest 
-triple aarch64-none-linux-gnu -target-feature +sme2 -target-feature 
+sme-f16f16 -target-feature +b16b16 -O2 -S -Werror -Wall -emit-llvm -o - %s | 
FileCheck %s -check-prefix CHECK-CXX
+
+// RUN: %clang_cc1   -fclang-abi-compat=latest 
-triple aarch64-none-linux-gnu -target-feature +sme2 -target-feature 
+sme-f16f16 -target-feature +b16b16 -O2 -S -Werror -Wall -o /dev/null
+
+// REQUIRES: aarch64-registered-target
+
+#include 
+
+#ifdef SVE_OVERLOADED_FORMS
+#define SVE_ACLE_FUNC(A1,A2_UNUSED,A3) A1##A3
+#else
+#define SVE_ACLE_FUNC(A1,A2,A3) A1##A2##A3
+#endif
+
+// CHECK-LABEL: define dso_local void @test_svadd_za16_vg1x2_f16(
+// CHECK-SAME: i32 noundef [[SLICE:%.*]],  [[ZN:%.*]]) 
local_unnamed_addr #[[ATTR0:[0-9]+]] {
+// CHECK-NEXT:  entry:
+// CHECK-NEXT:[[TMP0:%.*]] = tail call  
@llvm.vector.extract.nxv8f16.nxv16f16( [[ZN]], i64 0)
+// CHECK-NEXT:[[TMP1:%.*]] = tail call  
@llvm.vector.extract.nxv8f16.nxv16f16( [[ZN]], i64 8)
+// CHECK-NEXT:tail call void @llvm.aarch64.sme.add.za16.vg1x2.nxv8f16(i32 
[[SLICE]],  [[TMP0]],  [[TMP1]])
+// CHECK-NEXT:ret void
+//
+// CHECK-CXX-LABEL: define dso_local void 
@_Z25test_svadd_za16_vg1x2_f16j13svfloat16x2_t(
+// CHECK-CXX-SAME: i32 noundef [[SLICE:%.*]],  [[ZN:%.*]]) 
local_unnamed

[clang] [llvm] [AArch64] Add intrinsics for multi-vector to ZA array vector accumulators (PR #88266)

2024-04-29 Thread Momchil Velikov via cfe-commits

https://github.com/momchil-velikov updated 
https://github.com/llvm/llvm-project/pull/88266

>From 09167c5df2b50476a5073ff2e527503d090e7995 Mon Sep 17 00:00:00 2001
From: Momchil Velikov 
Date: Wed, 10 Apr 2024 11:25:50 +0100
Subject: [PATCH] [AArch64] Add intrinsics for multi-vector to ZA array vector
 accumulators

---
 clang/include/clang/Basic/arm_sme.td  |  10 +
 .../acle_sme2_add_sub_za16.c  | 193 ++
 .../acle_sme2_add_sub_za16.c  |  29 +++
 llvm/include/llvm/IR/IntrinsicsAArch64.td |   2 +-
 .../lib/Target/AArch64/AArch64SMEInstrInfo.td |  16 +-
 .../AArch64/sme2-intrinsics-add-sub-za16.ll   | 148 ++
 6 files changed, 389 insertions(+), 9 deletions(-)
 create mode 100644 
clang/test/CodeGen/aarch64-sme2-intrinsics/acle_sme2_add_sub_za16.c
 create mode 100644 
clang/test/Sema/aarch64-sme2-intrinsics/acle_sme2_add_sub_za16.c
 create mode 100644 llvm/test/CodeGen/AArch64/sme2-intrinsics-add-sub-za16.ll

diff --git a/clang/include/clang/Basic/arm_sme.td 
b/clang/include/clang/Basic/arm_sme.td
index 1ac6d5170ea283..000bd97a4b25d5 100644
--- a/clang/include/clang/Basic/arm_sme.td
+++ b/clang/include/clang/Basic/arm_sme.td
@@ -298,6 +298,16 @@ multiclass ZAAddSub {
 def NAME # _ZA64_VG1X2_F64 : Inst<"sv" # n_suffix # "_za64[_{d}]_vg1x2", 
"vm2", "d", MergeNone, "aarch64_sme_" # n_suffix # "_za64_vg1x2", [IsStreaming, 
IsInOutZA], []>;
 def NAME # _ZA64_VG1X4_F64 : Inst<"sv" # n_suffix # "_za64[_{d}]_vg1x4", 
"vm4", "d", MergeNone, "aarch64_sme_" # n_suffix # "_za64_vg1x4", [IsStreaming, 
IsInOutZA], []>;
   }
+
+  let TargetGuard = "sme-f16f16|sme-f8f16" in {
+def NAME # _ZA16_VG1X2_F16 : Inst<"sv" # n_suffix # "_za16[_{d}]_vg1x2", 
"vm2", "h", MergeNone, "aarch64_sme_" # n_suffix # "_za16_vg1x2", [IsStreaming, 
IsInOutZA], []>;
+def NAME # _ZA16_VG1X4_F16 : Inst<"sv" # n_suffix # "_za16[_{d}]_vg1x4", 
"vm4", "h", MergeNone, "aarch64_sme_" # n_suffix # "_za16_vg1x4", [IsStreaming, 
IsInOutZA], []>;
+  }
+
+  let TargetGuard = "sme2,b16b16" in {
+def NAME # _ZA16_VG1X2_BF16 : Inst<"sv" # n_suffix # "_za16[_{d}]_vg1x2", 
"vm2", "b", MergeNone, "aarch64_sme_" # n_suffix # "_za16_vg1x2", [IsStreaming, 
IsInOutZA], []>;
+def NAME # _ZA16_VG1X4_BF16 : Inst<"sv" # n_suffix # "_za16[_{d}]_vg1x4", 
"vm4", "b", MergeNone, "aarch64_sme_" # n_suffix # "_za16_vg1x4", [IsStreaming, 
IsInOutZA], []>;
+  }
 }
 
 defm SVADD : ZAAddSub<"add">;
diff --git 
a/clang/test/CodeGen/aarch64-sme2-intrinsics/acle_sme2_add_sub_za16.c 
b/clang/test/CodeGen/aarch64-sme2-intrinsics/acle_sme2_add_sub_za16.c
new file mode 100644
index 00..d98427fac610b8
--- /dev/null
+++ b/clang/test/CodeGen/aarch64-sme2-intrinsics/acle_sme2_add_sub_za16.c
@@ -0,0 +1,193 @@
+// NOTE: Assertions have been autogenerated by utils/update_cc_test_checks.py 
UTC_ARGS: --version 4
+// RUN: %clang_cc1   -fclang-abi-compat=latest 
-triple aarch64-none-linux-gnu -target-feature +sme2 -target-feature 
+sme-f16f16 -target-feature +b16b16 -O2 -S -Werror -Wall -emit-llvm -o - %s | 
FileCheck %s
+// RUN: %clang_cc1-x c++ -fclang-abi-compat=latest 
-triple aarch64-none-linux-gnu -target-feature +sme2 -target-feature  
+sme-f8f16 -target-feature +b16b16 -O2 -S -Werror -Wall -emit-llvm -o - %s | 
FileCheck %s -check-prefix CHECK-CXX
+// RUN: %clang_cc1 -DSVE_OVERLOADED_FORMS-fclang-abi-compat=latest 
-triple aarch64-none-linux-gnu -target-feature +sme2 -target-feature  
+sme-f8f16 -target-feature +b16b16 -O2 -S -Werror -Wall -emit-llvm -o - %s | 
FileCheck %s
+// RUN: %clang_cc1 -DSVE_OVERLOADED_FORMS -x c++ -fclang-abi-compat=latest 
-triple aarch64-none-linux-gnu -target-feature +sme2 -target-feature 
+sme-f16f16 -target-feature +b16b16 -O2 -S -Werror -Wall -emit-llvm -o - %s | 
FileCheck %s -check-prefix CHECK-CXX
+
+// RUN: %clang_cc1   -fclang-abi-compat=latest 
-triple aarch64-none-linux-gnu -target-feature +sme2 -target-feature 
+sme-f16f16 -target-feature +b16b16 -O2 -S -Werror -Wall -o /dev/null
+
+// REQUIRES: aarch64-registered-target
+
+#include 
+
+#ifdef SVE_OVERLOADED_FORMS
+#define SVE_ACLE_FUNC(A1,A2_UNUSED,A3) A1##A3
+#else
+#define SVE_ACLE_FUNC(A1,A2,A3) A1##A2##A3
+#endif
+
+// CHECK-LABEL: define dso_local void @test_svadd_za16_vg1x2_f16(
+// CHECK-SAME: i32 noundef [[SLICE:%.*]],  [[ZN:%.*]]) 
local_unnamed_addr #[[ATTR0:[0-9]+]] {
+// CHECK-NEXT:  entry:
+// CHECK-NEXT:[[TMP0:%.*]] = tail call  
@llvm.vector.extract.nxv8f16.nxv16f16( [[ZN]], i64 0)
+// CHECK-NEXT:[[TMP1:%.*]] = tail call  
@llvm.vector.extract.nxv8f16.nxv16f16( [[ZN]], i64 8)
+// CHECK-NEXT:tail call void @llvm.aarch64.sme.add.za16.vg1x2.nxv8f16(i32 
[[SLICE]],  [[TMP0]],  [[TMP1]])
+// CHECK-NEXT:ret void
+//
+// CHECK-CXX-LABEL: define dso_local void 
@_Z25test_svadd_za16_vg1x2_f16j13svfloat16x2_t(
+// CHECK-CXX-SAME: i32 noundef [[SLICE:%.*]],  [[ZN:%.*]]) 
local_unnamed

[clang] [llvm] [AArch64] Add intrinsics for multi-vector to ZA array vector accumulators (PR #88266)

2024-04-15 Thread via cfe-commits


@@ -0,0 +1,191 @@
+// NOTE: Assertions have been autogenerated by utils/update_cc_test_checks.py 
UTC_ARGS: --version 4
+// RUN: %clang_cc1   -fclang-abi-compat=latest 
-triple aarch64-none-linux-gnu -target-feature +sme2p1  -target-feature 
+sme-f16f16 -target-feature +b16b16 -O2 -S -Werror -Wall -emit-llvm -o - %s | 
FileCheck %s
+// RUN: %clang_cc1-x c++ -fclang-abi-compat=latest 
-triple aarch64-none-linux-gnu -target-feature +sme2p1  -target-feature 
+sme-f16f16 -target-feature +b16b16 -O2 -S -Werror -Wall -emit-llvm -o - %s | 
FileCheck %s -check-prefix CHECK-CXX
+// RUN: %clang_cc1 -DSVE_OVERLOADED_FORMS-fclang-abi-compat=latest 
-triple aarch64-none-linux-gnu -target-feature +sme2p1  -target-feature 
+sme-f16f16 -target-feature +b16b16 -O2 -S -Werror -Wall -emit-llvm -o - %s | 
FileCheck %s
+// RUN: %clang_cc1 -DSVE_OVERLOADED_FORMS -x c++ -fclang-abi-compat=latest 
-triple aarch64-none-linux-gnu -target-feature +sme2p1  -target-feature 
+sme-f16f16 -target-feature +b16b16 -O2 -S -Werror -Wall -emit-llvm -o - %s | 
FileCheck %s -check-prefix CHECK-CXX
+
+// REQUIRES: aarch64-registered-target

CarolineConcatto wrote:

I believe we should add the tests  in the file that already exist:
clang/test/CodeGen/aarch64-sme2-intrinsics/acle_sme2_add.c

https://github.com/llvm/llvm-project/pull/88266
___
cfe-commits mailing list
cfe-commits@lists.llvm.org
https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits


[clang] [llvm] [AArch64] Add intrinsics for multi-vector to ZA array vector accumulators (PR #88266)

2024-04-15 Thread via cfe-commits


@@ -0,0 +1,146 @@
+; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py 
UTC_ARGS: --version 4
+; RUN: llc -verify-machineinstrs < %s | FileCheck %s
+
+target triple = "aarch64-linux"

CarolineConcatto wrote:

I think we should add the tests in test/CodeGen/AArch64/sme2-intrinsics-add.ll

https://github.com/llvm/llvm-project/pull/88266
___
cfe-commits mailing list
cfe-commits@lists.llvm.org
https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits


[clang] [llvm] [AArch64] Add intrinsics for multi-vector to ZA array vector accumulators (PR #88266)

2024-04-15 Thread via cfe-commits


@@ -298,6 +298,17 @@ multiclass ZAAddSub {
 def NAME # _ZA64_VG1X2_F64 : Inst<"sv" # n_suffix # "_za64[_{d}]_vg1x2", 
"vm2", "d", MergeNone, "aarch64_sme_" # n_suffix # "_za64_vg1x2", [IsStreaming, 
IsInOutZA], []>;
 def NAME # _ZA64_VG1X4_F64 : Inst<"sv" # n_suffix # "_za64[_{d}]_vg1x4", 
"vm4", "d", MergeNone, "aarch64_sme_" # n_suffix # "_za64_vg1x4", [IsStreaming, 
IsInOutZA], []>;
   }
+
+  let TargetGuard = "sme2p1,sme-f16f16" in {
+def NAME # _ZA16_VG1X2_F16 : Inst<"sv" # n_suffix # "_za16[_{d}]_vg1x2", 
"vm2", "h", MergeNone, "aarch64_sme_" # n_suffix # "_za16_vg1x2", [IsStreaming, 
IsInOutZA], []>;
+def NAME # _ZA16_VG1X4_F16 : Inst<"sv" # n_suffix # "_za16[_{d}]_vg1x4", 
"vm4", "h", MergeNone, "aarch64_sme_" # n_suffix # "_za16_vg1x4", [IsStreaming, 
IsInOutZA], []>;
+  }
+
+  let TargetGuard = "sme2p1,b16b16" in {

CarolineConcatto wrote:

According to the ACLE these builtins should be under sme2. Maybe we need some 
work in the backend.  The smef16f16 prototypes in clang are under  target 
feature =sme2p1, smef16f16 in assembly/disassembly.

https://github.com/llvm/llvm-project/pull/88266
___
cfe-commits mailing list
cfe-commits@lists.llvm.org
https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits


[clang] [llvm] [AArch64] Add intrinsics for multi-vector to ZA array vector accumulators (PR #88266)

2024-04-10 Thread via cfe-commits

llvmbot wrote:




@llvm/pr-subscribers-llvm-ir

Author: Momchil Velikov (momchil-velikov)


Changes

According to the specification in
https://github.com/ARM-software/acle/pull/309 this adds the intrinsics

void_svadd_za16_vg1x2_f16(uint32_t slice, svfloat16x2_t zn) __arm_streaming 
__arm_inout("za");
void_svadd_za16_vg1x4_f16(uint32_t slice, svfloat16x4_t zn) __arm_streaming 
__arm_inout("za");
void_svsub_za16_vg1x2_f16(uint32_t slice, svfloat16x2_t zn) __arm_streaming 
__arm_inout("za");
void_svsub_za16_vg1x4_f16(uint32_t slice, svfloat16x4_t zn) __arm_streaming 
__arm_inout("za");

as well as the corresponding `bf16` variants.

---

Patch is 31.84 KiB, truncated to 20.00 KiB below, full version: 
https://github.com/llvm/llvm-project/pull/88266.diff


6 Files Affected:

- (modified) clang/include/clang/Basic/arm_sme.td (+11) 
- (added) clang/test/CodeGen/aarch64-sme2-intrinsics/acle_sme2_add_sub_za16.c 
(+191) 
- (added) clang/test/Sema/aarch64-sme2-intrinsics/acle_sme2_add_sub_za16.c 
(+26) 
- (modified) llvm/include/llvm/IR/IntrinsicsAArch64.td (+2-2) 
- (modified) llvm/lib/Target/AArch64/AArch64SMEInstrInfo.td (+8-8) 
- (added) llvm/test/CodeGen/AArch64/sme2-intrinsics-add-sub-za16.ll (+146) 


``diff
diff --git a/clang/include/clang/Basic/arm_sme.td 
b/clang/include/clang/Basic/arm_sme.td
index 1ac6d5170ea283..dcfaefa7a3e266 100644
--- a/clang/include/clang/Basic/arm_sme.td
+++ b/clang/include/clang/Basic/arm_sme.td
@@ -298,6 +298,17 @@ multiclass ZAAddSub {
 def NAME # _ZA64_VG1X2_F64 : Inst<"sv" # n_suffix # "_za64[_{d}]_vg1x2", 
"vm2", "d", MergeNone, "aarch64_sme_" # n_suffix # "_za64_vg1x2", [IsStreaming, 
IsInOutZA], []>;
 def NAME # _ZA64_VG1X4_F64 : Inst<"sv" # n_suffix # "_za64[_{d}]_vg1x4", 
"vm4", "d", MergeNone, "aarch64_sme_" # n_suffix # "_za64_vg1x4", [IsStreaming, 
IsInOutZA], []>;
   }
+
+  let TargetGuard = "sme2p1,sme-f16f16" in {
+def NAME # _ZA16_VG1X2_F16 : Inst<"sv" # n_suffix # "_za16[_{d}]_vg1x2", 
"vm2", "h", MergeNone, "aarch64_sme_" # n_suffix # "_za16_vg1x2", [IsStreaming, 
IsInOutZA], []>;
+def NAME # _ZA16_VG1X4_F16 : Inst<"sv" # n_suffix # "_za16[_{d}]_vg1x4", 
"vm4", "h", MergeNone, "aarch64_sme_" # n_suffix # "_za16_vg1x4", [IsStreaming, 
IsInOutZA], []>;
+  }
+
+  let TargetGuard = "sme2p1,b16b16" in {
+def NAME # _ZA16_VG1X2_BF16 : Inst<"sv" # n_suffix # "_za16[_{d}]_vg1x2", 
"vm2", "b", MergeNone, "aarch64_sme_" # n_suffix # "_za16_vg1x2", [IsStreaming, 
IsInOutZA], []>;
+def NAME # _ZA16_VG1X4_BF16 : Inst<"sv" # n_suffix # "_za16[_{d}]_vg1x4", 
"vm4", "b", MergeNone, "aarch64_sme_" # n_suffix # "_za16_vg1x4", [IsStreaming, 
IsInOutZA], []>;
+  }
+
 }
 
 defm SVADD : ZAAddSub<"add">;
diff --git 
a/clang/test/CodeGen/aarch64-sme2-intrinsics/acle_sme2_add_sub_za16.c 
b/clang/test/CodeGen/aarch64-sme2-intrinsics/acle_sme2_add_sub_za16.c
new file mode 100644
index 00..bdf07f86b9c93d
--- /dev/null
+++ b/clang/test/CodeGen/aarch64-sme2-intrinsics/acle_sme2_add_sub_za16.c
@@ -0,0 +1,191 @@
+// NOTE: Assertions have been autogenerated by utils/update_cc_test_checks.py 
UTC_ARGS: --version 4
+// RUN: %clang_cc1   -fclang-abi-compat=latest 
-triple aarch64-none-linux-gnu -target-feature +sme2p1  -target-feature 
+sme-f16f16 -target-feature +b16b16 -O2 -S -Werror -Wall -emit-llvm -o - %s | 
FileCheck %s
+// RUN: %clang_cc1-x c++ -fclang-abi-compat=latest 
-triple aarch64-none-linux-gnu -target-feature +sme2p1  -target-feature 
+sme-f16f16 -target-feature +b16b16 -O2 -S -Werror -Wall -emit-llvm -o - %s | 
FileCheck %s -check-prefix CHECK-CXX
+// RUN: %clang_cc1 -DSVE_OVERLOADED_FORMS-fclang-abi-compat=latest 
-triple aarch64-none-linux-gnu -target-feature +sme2p1  -target-feature 
+sme-f16f16 -target-feature +b16b16 -O2 -S -Werror -Wall -emit-llvm -o - %s | 
FileCheck %s
+// RUN: %clang_cc1 -DSVE_OVERLOADED_FORMS -x c++ -fclang-abi-compat=latest 
-triple aarch64-none-linux-gnu -target-feature +sme2p1  -target-feature 
+sme-f16f16 -target-feature +b16b16 -O2 -S -Werror -Wall -emit-llvm -o - %s | 
FileCheck %s -check-prefix CHECK-CXX
+
+// REQUIRES: aarch64-registered-target
+
+#include 
+
+#ifdef SVE_OVERLOADED_FORMS
+#define SVE_ACLE_FUNC(A1,A2_UNUSED,A3) A1##A3
+#else
+#define SVE_ACLE_FUNC(A1,A2,A3) A1##A2##A3
+#endif
+
+// CHECK-LABEL: define dso_local void @test_svadd_za16_vg1x2_f16(
+// CHECK-SAME: i32 noundef [[SLICE:%.*]],  [[ZN:%.*]]) 
local_unnamed_addr #[[ATTR0:[0-9]+]] {
+// CHECK-NEXT:  entry:
+// CHECK-NEXT:[[TMP0:%.*]] = tail call  
@llvm.vector.extract.nxv8f16.nxv16f16( [[ZN]], i64 0)
+// CHECK-NEXT:[[TMP1:%.*]] = tail call  
@llvm.vector.extract.nxv8f16.nxv16f16( [[ZN]], i64 8)
+// CHECK-NEXT:tail call void @llvm.aarch64.sme.add.za16.vg1x2.nxv8f16(i32 
[[SLICE]],  [[TMP0]],  [[TMP1]])
+// CHECK-NEXT:ret void
+//
+// CHECK-CXX-LABEL: define dso_local void 
@_Z25test_svadd_za16_vg1x2_f16j13svfloat16x2_t(
+// CHECK-CXX-SAME: i3

[clang] [llvm] [AArch64] Add intrinsics for multi-vector to ZA array vector accumulators (PR #88266)

2024-04-10 Thread via cfe-commits

llvmbot wrote:




@llvm/pr-subscribers-backend-aarch64

Author: Momchil Velikov (momchil-velikov)


Changes

According to the specification in
https://github.com/ARM-software/acle/pull/309 this adds the intrinsics

void_svadd_za16_vg1x2_f16(uint32_t slice, svfloat16x2_t zn) __arm_streaming 
__arm_inout("za");
void_svadd_za16_vg1x4_f16(uint32_t slice, svfloat16x4_t zn) __arm_streaming 
__arm_inout("za");
void_svsub_za16_vg1x2_f16(uint32_t slice, svfloat16x2_t zn) __arm_streaming 
__arm_inout("za");
void_svsub_za16_vg1x4_f16(uint32_t slice, svfloat16x4_t zn) __arm_streaming 
__arm_inout("za");

as well as the corresponding `bf16` variants.

---

Patch is 31.84 KiB, truncated to 20.00 KiB below, full version: 
https://github.com/llvm/llvm-project/pull/88266.diff


6 Files Affected:

- (modified) clang/include/clang/Basic/arm_sme.td (+11) 
- (added) clang/test/CodeGen/aarch64-sme2-intrinsics/acle_sme2_add_sub_za16.c 
(+191) 
- (added) clang/test/Sema/aarch64-sme2-intrinsics/acle_sme2_add_sub_za16.c 
(+26) 
- (modified) llvm/include/llvm/IR/IntrinsicsAArch64.td (+2-2) 
- (modified) llvm/lib/Target/AArch64/AArch64SMEInstrInfo.td (+8-8) 
- (added) llvm/test/CodeGen/AArch64/sme2-intrinsics-add-sub-za16.ll (+146) 


``diff
diff --git a/clang/include/clang/Basic/arm_sme.td 
b/clang/include/clang/Basic/arm_sme.td
index 1ac6d5170ea283..dcfaefa7a3e266 100644
--- a/clang/include/clang/Basic/arm_sme.td
+++ b/clang/include/clang/Basic/arm_sme.td
@@ -298,6 +298,17 @@ multiclass ZAAddSub {
 def NAME # _ZA64_VG1X2_F64 : Inst<"sv" # n_suffix # "_za64[_{d}]_vg1x2", 
"vm2", "d", MergeNone, "aarch64_sme_" # n_suffix # "_za64_vg1x2", [IsStreaming, 
IsInOutZA], []>;
 def NAME # _ZA64_VG1X4_F64 : Inst<"sv" # n_suffix # "_za64[_{d}]_vg1x4", 
"vm4", "d", MergeNone, "aarch64_sme_" # n_suffix # "_za64_vg1x4", [IsStreaming, 
IsInOutZA], []>;
   }
+
+  let TargetGuard = "sme2p1,sme-f16f16" in {
+def NAME # _ZA16_VG1X2_F16 : Inst<"sv" # n_suffix # "_za16[_{d}]_vg1x2", 
"vm2", "h", MergeNone, "aarch64_sme_" # n_suffix # "_za16_vg1x2", [IsStreaming, 
IsInOutZA], []>;
+def NAME # _ZA16_VG1X4_F16 : Inst<"sv" # n_suffix # "_za16[_{d}]_vg1x4", 
"vm4", "h", MergeNone, "aarch64_sme_" # n_suffix # "_za16_vg1x4", [IsStreaming, 
IsInOutZA], []>;
+  }
+
+  let TargetGuard = "sme2p1,b16b16" in {
+def NAME # _ZA16_VG1X2_BF16 : Inst<"sv" # n_suffix # "_za16[_{d}]_vg1x2", 
"vm2", "b", MergeNone, "aarch64_sme_" # n_suffix # "_za16_vg1x2", [IsStreaming, 
IsInOutZA], []>;
+def NAME # _ZA16_VG1X4_BF16 : Inst<"sv" # n_suffix # "_za16[_{d}]_vg1x4", 
"vm4", "b", MergeNone, "aarch64_sme_" # n_suffix # "_za16_vg1x4", [IsStreaming, 
IsInOutZA], []>;
+  }
+
 }
 
 defm SVADD : ZAAddSub<"add">;
diff --git 
a/clang/test/CodeGen/aarch64-sme2-intrinsics/acle_sme2_add_sub_za16.c 
b/clang/test/CodeGen/aarch64-sme2-intrinsics/acle_sme2_add_sub_za16.c
new file mode 100644
index 00..bdf07f86b9c93d
--- /dev/null
+++ b/clang/test/CodeGen/aarch64-sme2-intrinsics/acle_sme2_add_sub_za16.c
@@ -0,0 +1,191 @@
+// NOTE: Assertions have been autogenerated by utils/update_cc_test_checks.py 
UTC_ARGS: --version 4
+// RUN: %clang_cc1   -fclang-abi-compat=latest 
-triple aarch64-none-linux-gnu -target-feature +sme2p1  -target-feature 
+sme-f16f16 -target-feature +b16b16 -O2 -S -Werror -Wall -emit-llvm -o - %s | 
FileCheck %s
+// RUN: %clang_cc1-x c++ -fclang-abi-compat=latest 
-triple aarch64-none-linux-gnu -target-feature +sme2p1  -target-feature 
+sme-f16f16 -target-feature +b16b16 -O2 -S -Werror -Wall -emit-llvm -o - %s | 
FileCheck %s -check-prefix CHECK-CXX
+// RUN: %clang_cc1 -DSVE_OVERLOADED_FORMS-fclang-abi-compat=latest 
-triple aarch64-none-linux-gnu -target-feature +sme2p1  -target-feature 
+sme-f16f16 -target-feature +b16b16 -O2 -S -Werror -Wall -emit-llvm -o - %s | 
FileCheck %s
+// RUN: %clang_cc1 -DSVE_OVERLOADED_FORMS -x c++ -fclang-abi-compat=latest 
-triple aarch64-none-linux-gnu -target-feature +sme2p1  -target-feature 
+sme-f16f16 -target-feature +b16b16 -O2 -S -Werror -Wall -emit-llvm -o - %s | 
FileCheck %s -check-prefix CHECK-CXX
+
+// REQUIRES: aarch64-registered-target
+
+#include 
+
+#ifdef SVE_OVERLOADED_FORMS
+#define SVE_ACLE_FUNC(A1,A2_UNUSED,A3) A1##A3
+#else
+#define SVE_ACLE_FUNC(A1,A2,A3) A1##A2##A3
+#endif
+
+// CHECK-LABEL: define dso_local void @test_svadd_za16_vg1x2_f16(
+// CHECK-SAME: i32 noundef [[SLICE:%.*]],  [[ZN:%.*]]) 
local_unnamed_addr #[[ATTR0:[0-9]+]] {
+// CHECK-NEXT:  entry:
+// CHECK-NEXT:[[TMP0:%.*]] = tail call  
@llvm.vector.extract.nxv8f16.nxv16f16( [[ZN]], i64 0)
+// CHECK-NEXT:[[TMP1:%.*]] = tail call  
@llvm.vector.extract.nxv8f16.nxv16f16( [[ZN]], i64 8)
+// CHECK-NEXT:tail call void @llvm.aarch64.sme.add.za16.vg1x2.nxv8f16(i32 
[[SLICE]],  [[TMP0]],  [[TMP1]])
+// CHECK-NEXT:ret void
+//
+// CHECK-CXX-LABEL: define dso_local void 
@_Z25test_svadd_za16_vg1x2_f16j13svfloat16x2_t(
+// CHECK-CXX-

[clang] [llvm] [AArch64] Add intrinsics for multi-vector to ZA array vector accumulators (PR #88266)

2024-04-10 Thread Momchil Velikov via cfe-commits

https://github.com/momchil-velikov created 
https://github.com/llvm/llvm-project/pull/88266

According to the specification in
https://github.com/ARM-software/acle/pull/309 this adds the intrinsics

void_svadd_za16_vg1x2_f16(uint32_t slice, svfloat16x2_t zn) __arm_streaming 
__arm_inout("za");
void_svadd_za16_vg1x4_f16(uint32_t slice, svfloat16x4_t zn) __arm_streaming 
__arm_inout("za");
void_svsub_za16_vg1x2_f16(uint32_t slice, svfloat16x2_t zn) __arm_streaming 
__arm_inout("za");
void_svsub_za16_vg1x4_f16(uint32_t slice, svfloat16x4_t zn) __arm_streaming 
__arm_inout("za");

as well as the corresponding `bf16` variants.

>From 2b0557d4a62476b827352b6775588cef15cecd33 Mon Sep 17 00:00:00 2001
From: Momchil Velikov 
Date: Wed, 10 Apr 2024 11:25:50 +0100
Subject: [PATCH] [AArch64] Add intrinsics for multi-vector to ZA array vector
 accumulators

---
 clang/include/clang/Basic/arm_sme.td  |  11 +
 .../acle_sme2_add_sub_za16.c  | 191 ++
 .../acle_sme2_add_sub_za16.c  |  26 +++
 llvm/include/llvm/IR/IntrinsicsAArch64.td |   4 +-
 .../lib/Target/AArch64/AArch64SMEInstrInfo.td |  16 +-
 .../AArch64/sme2-intrinsics-add-sub-za16.ll   | 146 +
 6 files changed, 384 insertions(+), 10 deletions(-)
 create mode 100644 
clang/test/CodeGen/aarch64-sme2-intrinsics/acle_sme2_add_sub_za16.c
 create mode 100644 
clang/test/Sema/aarch64-sme2-intrinsics/acle_sme2_add_sub_za16.c
 create mode 100644 llvm/test/CodeGen/AArch64/sme2-intrinsics-add-sub-za16.ll

diff --git a/clang/include/clang/Basic/arm_sme.td 
b/clang/include/clang/Basic/arm_sme.td
index 1ac6d5170ea283..dcfaefa7a3e266 100644
--- a/clang/include/clang/Basic/arm_sme.td
+++ b/clang/include/clang/Basic/arm_sme.td
@@ -298,6 +298,17 @@ multiclass ZAAddSub {
 def NAME # _ZA64_VG1X2_F64 : Inst<"sv" # n_suffix # "_za64[_{d}]_vg1x2", 
"vm2", "d", MergeNone, "aarch64_sme_" # n_suffix # "_za64_vg1x2", [IsStreaming, 
IsInOutZA], []>;
 def NAME # _ZA64_VG1X4_F64 : Inst<"sv" # n_suffix # "_za64[_{d}]_vg1x4", 
"vm4", "d", MergeNone, "aarch64_sme_" # n_suffix # "_za64_vg1x4", [IsStreaming, 
IsInOutZA], []>;
   }
+
+  let TargetGuard = "sme2p1,sme-f16f16" in {
+def NAME # _ZA16_VG1X2_F16 : Inst<"sv" # n_suffix # "_za16[_{d}]_vg1x2", 
"vm2", "h", MergeNone, "aarch64_sme_" # n_suffix # "_za16_vg1x2", [IsStreaming, 
IsInOutZA], []>;
+def NAME # _ZA16_VG1X4_F16 : Inst<"sv" # n_suffix # "_za16[_{d}]_vg1x4", 
"vm4", "h", MergeNone, "aarch64_sme_" # n_suffix # "_za16_vg1x4", [IsStreaming, 
IsInOutZA], []>;
+  }
+
+  let TargetGuard = "sme2p1,b16b16" in {
+def NAME # _ZA16_VG1X2_BF16 : Inst<"sv" # n_suffix # "_za16[_{d}]_vg1x2", 
"vm2", "b", MergeNone, "aarch64_sme_" # n_suffix # "_za16_vg1x2", [IsStreaming, 
IsInOutZA], []>;
+def NAME # _ZA16_VG1X4_BF16 : Inst<"sv" # n_suffix # "_za16[_{d}]_vg1x4", 
"vm4", "b", MergeNone, "aarch64_sme_" # n_suffix # "_za16_vg1x4", [IsStreaming, 
IsInOutZA], []>;
+  }
+
 }
 
 defm SVADD : ZAAddSub<"add">;
diff --git 
a/clang/test/CodeGen/aarch64-sme2-intrinsics/acle_sme2_add_sub_za16.c 
b/clang/test/CodeGen/aarch64-sme2-intrinsics/acle_sme2_add_sub_za16.c
new file mode 100644
index 00..bdf07f86b9c93d
--- /dev/null
+++ b/clang/test/CodeGen/aarch64-sme2-intrinsics/acle_sme2_add_sub_za16.c
@@ -0,0 +1,191 @@
+// NOTE: Assertions have been autogenerated by utils/update_cc_test_checks.py 
UTC_ARGS: --version 4
+// RUN: %clang_cc1   -fclang-abi-compat=latest 
-triple aarch64-none-linux-gnu -target-feature +sme2p1  -target-feature 
+sme-f16f16 -target-feature +b16b16 -O2 -S -Werror -Wall -emit-llvm -o - %s | 
FileCheck %s
+// RUN: %clang_cc1-x c++ -fclang-abi-compat=latest 
-triple aarch64-none-linux-gnu -target-feature +sme2p1  -target-feature 
+sme-f16f16 -target-feature +b16b16 -O2 -S -Werror -Wall -emit-llvm -o - %s | 
FileCheck %s -check-prefix CHECK-CXX
+// RUN: %clang_cc1 -DSVE_OVERLOADED_FORMS-fclang-abi-compat=latest 
-triple aarch64-none-linux-gnu -target-feature +sme2p1  -target-feature 
+sme-f16f16 -target-feature +b16b16 -O2 -S -Werror -Wall -emit-llvm -o - %s | 
FileCheck %s
+// RUN: %clang_cc1 -DSVE_OVERLOADED_FORMS -x c++ -fclang-abi-compat=latest 
-triple aarch64-none-linux-gnu -target-feature +sme2p1  -target-feature 
+sme-f16f16 -target-feature +b16b16 -O2 -S -Werror -Wall -emit-llvm -o - %s | 
FileCheck %s -check-prefix CHECK-CXX
+
+// REQUIRES: aarch64-registered-target
+
+#include 
+
+#ifdef SVE_OVERLOADED_FORMS
+#define SVE_ACLE_FUNC(A1,A2_UNUSED,A3) A1##A3
+#else
+#define SVE_ACLE_FUNC(A1,A2,A3) A1##A2##A3
+#endif
+
+// CHECK-LABEL: define dso_local void @test_svadd_za16_vg1x2_f16(
+// CHECK-SAME: i32 noundef [[SLICE:%.*]],  [[ZN:%.*]]) 
local_unnamed_addr #[[ATTR0:[0-9]+]] {
+// CHECK-NEXT:  entry:
+// CHECK-NEXT:[[TMP0:%.*]] = tail call  
@llvm.vector.extract.nxv8f16.nxv16f16( [[ZN]], i64 0)
+// CHECK-NEXT:[[TMP1:%.*]] = tail call  
@llvm.vector.extr