[clang] [llvm] [AArch64] Implement intrinsics for SME FP8 FMOPA (PR #118115)

2024-12-09 Thread via cfe-commits

https://github.com/SpencerAbson closed 
https://github.com/llvm/llvm-project/pull/118115
___
cfe-commits mailing list
cfe-commits@lists.llvm.org
https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits


[clang] [llvm] [AArch64] Implement intrinsics for SME FP8 FMOPA (PR #118115)

2024-12-08 Thread via cfe-commits

https://github.com/SpencerAbson updated 
https://github.com/llvm/llvm-project/pull/118115

>From e1181dd6dab09b20be8077d1f4e70ef4da7ab437 Mon Sep 17 00:00:00 2001
From: Spencer Abson 
Date: Mon, 25 Nov 2024 21:47:20 +
Subject: [PATCH 1/2] [AArch64] Implement intrinsics for SME FP8 FMOPA

---
 clang/include/clang/Basic/arm_sme.td  | 10 
 clang/lib/CodeGen/CGBuiltin.cpp   |  6 ++
 .../fp8-intrinsics/acle_sme2_fp8_fmopa.c  | 55 +++
 .../acle_sme2_fp8_imm.c   | 18 ++
 .../acle_sme2_fp8_mopa.c  | 13 +
 llvm/include/llvm/IR/IntrinsicsAArch64.td | 11 
 .../lib/Target/AArch64/AArch64SMEInstrInfo.td | 17 +++---
 llvm/lib/Target/AArch64/SMEInstrFormats.td| 26 -
 .../AArch64/sme2-fp8-intrinsics-fmopa.ll  | 22 
 9 files changed, 166 insertions(+), 12 deletions(-)
 create mode 100644 
clang/test/CodeGen/AArch64/fp8-intrinsics/acle_sme2_fp8_fmopa.c
 create mode 100644 clang/test/Sema/aarch64-fp8-intrinsics/acle_sme2_fp8_imm.c
 create mode 100644 clang/test/Sema/aarch64-fp8-intrinsics/acle_sme2_fp8_mopa.c
 create mode 100644 llvm/test/CodeGen/AArch64/sme2-fp8-intrinsics-fmopa.ll

diff --git a/clang/include/clang/Basic/arm_sme.td 
b/clang/include/clang/Basic/arm_sme.td
index 0f689e82bdb742..71b2c7cdd04f93 100644
--- a/clang/include/clang/Basic/arm_sme.td
+++ b/clang/include/clang/Basic/arm_sme.td
@@ -824,4 +824,14 @@ let SMETargetGuard = "sme-lutv2" in {
   def SVLUTI4_ZT_X4 : SInst<"svluti4_zt_{d}_x4", "4i2.u", "cUc", MergeNone, 
"aarch64_sme_luti4_zt_x4", [IsStreaming, IsInZT0], [ImmCheck<0, ImmCheck0_0>]>;
 }
 
+let SMETargetGuard = "sme-f8f32" in {
+  def SVMOPA_FP8_ZA32 : Inst<"svmopa_za32[_mf8]_m_fpm", "viPPdd>", "m", 
MergeNone, "aarch64_sme_fp8_fmopa_za32",
+ [IsStreaming, IsInOutZA, SetsFPMR, 
IsOverloadNone], [ImmCheck<0, ImmCheck0_3>]>;
+}
+
+let SMETargetGuard = "sme-f8f16" in {
+  def SVMOPA_FP8_ZA16 : Inst<"svmopa_za16[_mf8]_m_fpm", "viPPdd>", "m", 
MergeNone, "aarch64_sme_fp8_fmopa_za16",
+ [IsStreaming, IsInOutZA, SetsFPMR, 
IsOverloadNone], [ImmCheck<0, ImmCheck0_1>]>;
+}
+
 } // let SVETargetGuard = InvalidMode
diff --git a/clang/lib/CodeGen/CGBuiltin.cpp b/clang/lib/CodeGen/CGBuiltin.cpp
index 41c632ead6aa3c..c2e983eebebc10 100644
--- a/clang/lib/CodeGen/CGBuiltin.cpp
+++ b/clang/lib/CodeGen/CGBuiltin.cpp
@@ -10201,6 +10201,8 @@ CodeGenFunction::getSVEType(const SVETypeFlags 
&TypeFlags) {
   case SVETypeFlags::EltTyInt64:
 return llvm::ScalableVectorType::get(Builder.getInt64Ty(), 2);
 
+  case SVETypeFlags::EltTyMFloat8:
+return llvm::ScalableVectorType::get(Builder.getInt8Ty(), 16);
   case SVETypeFlags::EltTyFloat16:
 return llvm::ScalableVectorType::get(Builder.getHalfTy(), 8);
   case SVETypeFlags::EltTyBFloat16:
@@ -11255,6 +11257,10 @@ Value 
*CodeGenFunction::EmitAArch64SMEBuiltinExpr(unsigned BuiltinID,
BuiltinID == SME::BI__builtin_sme_svstr_za)
 return EmitSMELdrStr(TypeFlags, Ops, Builtin->LLVMIntrinsic);
 
+  // Emit set FPMR for intrinsics that require it
+  if (TypeFlags.setsFPMR())
+Builder.CreateCall(CGM.getIntrinsic(Intrinsic::aarch64_set_fpmr),
+   Ops.pop_back_val());
   // Handle builtins which require their multi-vector operands to be swapped
   swapCommutativeSMEOperands(BuiltinID, Ops);
 
diff --git a/clang/test/CodeGen/AArch64/fp8-intrinsics/acle_sme2_fp8_fmopa.c 
b/clang/test/CodeGen/AArch64/fp8-intrinsics/acle_sme2_fp8_fmopa.c
new file mode 100644
index 00..95d6383ab30efe
--- /dev/null
+++ b/clang/test/CodeGen/AArch64/fp8-intrinsics/acle_sme2_fp8_fmopa.c
@@ -0,0 +1,55 @@
+// NOTE: Assertions have been autogenerated by utils/update_cc_test_checks.py 
UTC_ARGS: --version 5
+// REQUIRES: aarch64-registered-target
+
+// RUN: %clang_cc1 -triple aarch64-none-linux-gnu -target-feature +sme 
-target-feature +sme-f8f16 -target-feature +sme-f8f32 -disable-O0-optnone 
-Werror -Wall -emit-llvm -o - %s | opt -S -passes=mem2reg,tailcallelim | 
FileCheck %s
+// RUN: %clang_cc1 -triple aarch64-none-linux-gnu -target-feature +sme 
-target-feature +sme-f8f16 -target-feature +sme-f8f32 -disable-O0-optnone 
-Werror -Wall -emit-llvm -o - -x c++ %s | opt -S -passes=mem2reg,tailcallelim | 
FileCheck %s -check-prefix=CPP-CHECK
+// RUN: %clang_cc1 -DSVE_OVERLOADED_FORMS -triple aarch64-none-linux-gnu 
-target-feature +sme -target-feature +sme-f8f16 -target-feature +sme-f8f32 
-disable-O0-optnone -Werror -Wall -emit-llvm -o - %s | opt -S 
-passes=mem2reg,tailcallelim | FileCheck %s
+// RUN: %clang_cc1 -DSVE_OVERLOADED_FORMS -triple aarch64-none-linux-gnu 
-target-feature +sme -target-feature +sme-f8f16 -target-feature +sme-f8f32 
-disable-O0-optnone -Werror -Wall -emit-llvm -o - -x c++ %s | opt -S 
-passes=mem2reg,tailcallelim | FileCheck %s -check-prefix=CPP-CHECK
+// RUN: %clang_cc1 -triple aarch64-none-linux-gnu -target-feature +sme 
-target-feature +sme

[clang] [llvm] [AArch64] Implement intrinsics for SME FP8 FMOPA (PR #118115)

2024-12-06 Thread via cfe-commits

https://github.com/CarolineConcatto approved this pull request.

LGTM

https://github.com/llvm/llvm-project/pull/118115
___
cfe-commits mailing list
cfe-commits@lists.llvm.org
https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits


[clang] [llvm] [AArch64] Implement intrinsics for SME FP8 FMOPA (PR #118115)

2024-12-06 Thread via cfe-commits


@@ -305,6 +305,21 @@ multiclass sme_outer_product_fp32 sz, 
ZPRRegOp zpr_ty, string mne
   def : SME_ZA_Tile_TwoPred_TwoVec_Pat;
 }
 
+multiclass sme2_fp8_fmopa_za32 {
+def NAME : sme_fp_outer_product_inst<0, 0b01, 0b00, TileOp32, ZPR8, 
mnemonic>, SMEPseudo2Instr {
+  bits<2> ZAda;
+  let Inst{1-0} = ZAda;
+  let Inst{2}   = 0b0;
+

CarolineConcatto wrote:

Ok, fair enough. 

https://github.com/llvm/llvm-project/pull/118115
___
cfe-commits mailing list
cfe-commits@lists.llvm.org
https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits


[clang] [llvm] [AArch64] Implement intrinsics for SME FP8 FMOPA (PR #118115)

2024-12-05 Thread via cfe-commits


@@ -305,6 +305,21 @@ multiclass sme_outer_product_fp32 sz, 
ZPRRegOp zpr_ty, string mne
   def : SME_ZA_Tile_TwoPred_TwoVec_Pat;
 }
 
+multiclass sme2_fp8_fmopa_za32 {
+def NAME : sme_fp_outer_product_inst<0, 0b01, 0b00, TileOp32, ZPR8, 
mnemonic>, SMEPseudo2Instr {
+  bits<2> ZAda;
+  let Inst{1-0} = ZAda;
+  let Inst{2}   = 0b0;
+

SpencerAbson wrote:

Since the intrinsic matches the pseudo-instruction, the machine-instruction 
does not actually need to adopt the flags of the intrinsic 
(`[IntrInaccessibleMemOnly, IntrHasSideEffects]  => mayStore=1,mayLoad=1`). 
This is probably a good thing, as the instructions do not actually access 
memory and have their non-operand dependency on the FPMR described by `Uses`.

IMO, we should either be explicit about the effects of 
`[IntrInaccessibleMemOnly, IntrHasSideEffects]` on the **pseudo-instruction**, 
or disreguard this entirely. What do you think?


https://github.com/llvm/llvm-project/pull/118115
___
cfe-commits mailing list
cfe-commits@lists.llvm.org
https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits


[clang] [llvm] [AArch64] Implement intrinsics for SME FP8 FMOPA (PR #118115)

2024-12-05 Thread Momchil Velikov via cfe-commits


@@ -305,6 +305,21 @@ multiclass sme_outer_product_fp32 sz, 
ZPRRegOp zpr_ty, string mne
   def : SME_ZA_Tile_TwoPred_TwoVec_Pat;
 }
 
+multiclass sme2_fp8_fmopa_za32 {
+def NAME : sme_fp_outer_product_inst<0, 0b01, 0b00, TileOp32, ZPR8, 
mnemonic>, SMEPseudo2Instr {
+  bits<2> ZAda;
+  let Inst{1-0} = ZAda;
+  let Inst{2}   = 0b0;
+

momchil-velikov wrote:

Yes, only where the ISel pattern and the instruction interact. If the transform 
sequence is DAG -> pseudo-insn -> real-insn, onlt the pseudo-insn would need 
flags (and "need" here means "to silence tblgen" not a legit need).

https://github.com/llvm/llvm-project/pull/118115
___
cfe-commits mailing list
cfe-commits@lists.llvm.org
https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits


[clang] [llvm] [AArch64] Implement intrinsics for SME FP8 FMOPA (PR #118115)

2024-12-04 Thread via cfe-commits

https://github.com/CarolineConcatto commented:

Thank you for the patch Spencer.


https://github.com/llvm/llvm-project/pull/118115
___
cfe-commits mailing list
cfe-commits@lists.llvm.org
https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits


[clang] [llvm] [AArch64] Implement intrinsics for SME FP8 FMOPA (PR #118115)

2024-12-04 Thread via cfe-commits

https://github.com/CarolineConcatto edited 
https://github.com/llvm/llvm-project/pull/118115
___
cfe-commits mailing list
cfe-commits@lists.llvm.org
https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits


[clang] [llvm] [AArch64] Implement intrinsics for SME FP8 FMOPA (PR #118115)

2024-12-04 Thread via cfe-commits


@@ -305,6 +305,21 @@ multiclass sme_outer_product_fp32 sz, 
ZPRRegOp zpr_ty, string mne
   def : SME_ZA_Tile_TwoPred_TwoVec_Pat;
 }
 
+multiclass sme2_fp8_fmopa_za32 {
+def NAME : sme_fp_outer_product_inst<0, 0b01, 0b00, TileOp32, ZPR8, 
mnemonic>, SMEPseudo2Instr {
+  bits<2> ZAda;
+  let Inst{1-0} = ZAda;
+  let Inst{2}   = 0b0;
+

CarolineConcatto wrote:

nit:
Can you do this:

```
  let Uses = [FPMR, FPCR]; 
  let mayStore = 1
  let mayLoad = 1
}
 def NAME # _PSEUDO : sme_outer_product_pseudo, 
SMEPseudo2Instr;
```

So it is similar to what is being done for previous instructions.

https://github.com/llvm/llvm-project/pull/118115
___
cfe-commits mailing list
cfe-commits@lists.llvm.org
https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits


[clang] [llvm] [AArch64] Implement intrinsics for SME FP8 FMOPA (PR #118115)

2024-12-02 Thread via cfe-commits

https://github.com/SpencerAbson deleted 
https://github.com/llvm/llvm-project/pull/118115
___
cfe-commits mailing list
cfe-commits@lists.llvm.org
https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits


[clang] [llvm] [AArch64] Implement intrinsics for SME FP8 FMOPA (PR #118115)

2024-12-02 Thread via cfe-commits

https://github.com/SpencerAbson updated 
https://github.com/llvm/llvm-project/pull/118115

>From a92b7a9faba65931918e4c3d6763896f1e1eda6b Mon Sep 17 00:00:00 2001
From: Spencer Abson 
Date: Mon, 25 Nov 2024 21:47:20 +
Subject: [PATCH] [AArch64] Implement intrinsics for SME FP8 FMOPA

---
 clang/include/clang/Basic/arm_sme.td  | 10 
 clang/lib/CodeGen/CGBuiltin.cpp   |  6 ++
 .../fp8-intrinsics/acle_sme2_fp8_fmopa.c  | 55 +++
 .../acle_sme2_fp8_imm.c   | 18 ++
 .../acle_sme2_fp8_mopa.c  | 13 +
 llvm/include/llvm/IR/IntrinsicsAArch64.td | 11 
 .../lib/Target/AArch64/AArch64SMEInstrInfo.td | 17 +++---
 llvm/lib/Target/AArch64/SMEInstrFormats.td| 26 -
 .../AArch64/sme2-fp8-intrinsics-fmopa.ll  | 22 
 9 files changed, 166 insertions(+), 12 deletions(-)
 create mode 100644 
clang/test/CodeGen/AArch64/fp8-intrinsics/acle_sme2_fp8_fmopa.c
 create mode 100644 clang/test/Sema/aarch64-fp8-intrinsics/acle_sme2_fp8_imm.c
 create mode 100644 clang/test/Sema/aarch64-fp8-intrinsics/acle_sme2_fp8_mopa.c
 create mode 100644 llvm/test/CodeGen/AArch64/sme2-fp8-intrinsics-fmopa.ll

diff --git a/clang/include/clang/Basic/arm_sme.td 
b/clang/include/clang/Basic/arm_sme.td
index 0f689e82bdb742..71b2c7cdd04f93 100644
--- a/clang/include/clang/Basic/arm_sme.td
+++ b/clang/include/clang/Basic/arm_sme.td
@@ -824,4 +824,14 @@ let SMETargetGuard = "sme-lutv2" in {
   def SVLUTI4_ZT_X4 : SInst<"svluti4_zt_{d}_x4", "4i2.u", "cUc", MergeNone, 
"aarch64_sme_luti4_zt_x4", [IsStreaming, IsInZT0], [ImmCheck<0, ImmCheck0_0>]>;
 }
 
+let SMETargetGuard = "sme-f8f32" in {
+  def SVMOPA_FP8_ZA32 : Inst<"svmopa_za32[_mf8]_m_fpm", "viPPdd>", "m", 
MergeNone, "aarch64_sme_fp8_fmopa_za32",
+ [IsStreaming, IsInOutZA, SetsFPMR, 
IsOverloadNone], [ImmCheck<0, ImmCheck0_3>]>;
+}
+
+let SMETargetGuard = "sme-f8f16" in {
+  def SVMOPA_FP8_ZA16 : Inst<"svmopa_za16[_mf8]_m_fpm", "viPPdd>", "m", 
MergeNone, "aarch64_sme_fp8_fmopa_za16",
+ [IsStreaming, IsInOutZA, SetsFPMR, 
IsOverloadNone], [ImmCheck<0, ImmCheck0_1>]>;
+}
+
 } // let SVETargetGuard = InvalidMode
diff --git a/clang/lib/CodeGen/CGBuiltin.cpp b/clang/lib/CodeGen/CGBuiltin.cpp
index cb9c23b8e0a0d0..56595bb4704e74 100644
--- a/clang/lib/CodeGen/CGBuiltin.cpp
+++ b/clang/lib/CodeGen/CGBuiltin.cpp
@@ -10183,6 +10183,8 @@ CodeGenFunction::getSVEType(const SVETypeFlags 
&TypeFlags) {
   case SVETypeFlags::EltTyInt64:
 return llvm::ScalableVectorType::get(Builder.getInt64Ty(), 2);
 
+  case SVETypeFlags::EltTyMFloat8:
+return llvm::ScalableVectorType::get(Builder.getInt8Ty(), 16);
   case SVETypeFlags::EltTyFloat16:
 return llvm::ScalableVectorType::get(Builder.getHalfTy(), 8);
   case SVETypeFlags::EltTyBFloat16:
@@ -11234,6 +11236,10 @@ Value 
*CodeGenFunction::EmitAArch64SMEBuiltinExpr(unsigned BuiltinID,
BuiltinID == SME::BI__builtin_sme_svstr_za)
 return EmitSMELdrStr(TypeFlags, Ops, Builtin->LLVMIntrinsic);
 
+  // Emit set FPMR for intrinsics that require it
+  if (TypeFlags.setsFPMR())
+Builder.CreateCall(CGM.getIntrinsic(Intrinsic::aarch64_set_fpmr),
+   Ops.pop_back_val());
   // Handle builtins which require their multi-vector operands to be swapped
   swapCommutativeSMEOperands(BuiltinID, Ops);
 
diff --git a/clang/test/CodeGen/AArch64/fp8-intrinsics/acle_sme2_fp8_fmopa.c 
b/clang/test/CodeGen/AArch64/fp8-intrinsics/acle_sme2_fp8_fmopa.c
new file mode 100644
index 00..95d6383ab30efe
--- /dev/null
+++ b/clang/test/CodeGen/AArch64/fp8-intrinsics/acle_sme2_fp8_fmopa.c
@@ -0,0 +1,55 @@
+// NOTE: Assertions have been autogenerated by utils/update_cc_test_checks.py 
UTC_ARGS: --version 5
+// REQUIRES: aarch64-registered-target
+
+// RUN: %clang_cc1 -triple aarch64-none-linux-gnu -target-feature +sme 
-target-feature +sme-f8f16 -target-feature +sme-f8f32 -disable-O0-optnone 
-Werror -Wall -emit-llvm -o - %s | opt -S -passes=mem2reg,tailcallelim | 
FileCheck %s
+// RUN: %clang_cc1 -triple aarch64-none-linux-gnu -target-feature +sme 
-target-feature +sme-f8f16 -target-feature +sme-f8f32 -disable-O0-optnone 
-Werror -Wall -emit-llvm -o - -x c++ %s | opt -S -passes=mem2reg,tailcallelim | 
FileCheck %s -check-prefix=CPP-CHECK
+// RUN: %clang_cc1 -DSVE_OVERLOADED_FORMS -triple aarch64-none-linux-gnu 
-target-feature +sme -target-feature +sme-f8f16 -target-feature +sme-f8f32 
-disable-O0-optnone -Werror -Wall -emit-llvm -o - %s | opt -S 
-passes=mem2reg,tailcallelim | FileCheck %s
+// RUN: %clang_cc1 -DSVE_OVERLOADED_FORMS -triple aarch64-none-linux-gnu 
-target-feature +sme -target-feature +sme-f8f16 -target-feature +sme-f8f32 
-disable-O0-optnone -Werror -Wall -emit-llvm -o - -x c++ %s | opt -S 
-passes=mem2reg,tailcallelim | FileCheck %s -check-prefix=CPP-CHECK
+// RUN: %clang_cc1 -triple aarch64-none-linux-gnu -target-feature +sme 
-target-feature +sme-f8f

[clang] [llvm] [AArch64] Implement intrinsics for SME FP8 FMOPA (PR #118115)

2024-11-29 Thread via cfe-commits


@@ -587,7 +587,6 @@ void SVEType::applyTypespec(StringRef TS) {
   ElementBitwidth = 16;

SpencerAbson wrote:

This is exactly what my refactoring work will help us avoid - so don't fear!

https://github.com/llvm/llvm-project/pull/118115
___
cfe-commits mailing list
cfe-commits@lists.llvm.org
https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits


[clang] [llvm] [AArch64] Implement intrinsics for SME FP8 FMOPA (PR #118115)

2024-11-29 Thread via cfe-commits

llvmbot wrote:




@llvm/pr-subscribers-clang

Author: None (SpencerAbson)


Changes

This patch implements the following intrinsics:

8-bit floating-point sum of outer products and accumulate.
``` c
  // Only if __ARM_FEATURE_SME_F8F16 != 0
void svmopa_za16[_mf8]_m_fpm(uint64_t tile, svbool_t pn, svbool_t pm,
 svmfloat8_t zn, svmfloat8_t zm, fpm_t fpm)
 __arm_streaming __arm_inout("za");

  // Only if __ARM_FEATURE_SME_F8F32 != 0
void svmopa_za32[_mf8]_m_fpm(uint64_t tile, svbool_t pn, svbool_t pm,
 svmfloat8_t zn, svmfloat8_t zm, fpm_t fpm)
 __arm_streaming __arm_inout("za");
```

In accordance with: https://github.com/ARM-software/acle/pull/323/

Co-authored-by: Momchil Velikov momchil.velikov@arm.com
Co-authored-by: Marian Lukac marian.lukac@arm.com

---

Patch is 20.77 KiB, truncated to 20.00 KiB below, full version: 
https://github.com/llvm/llvm-project/pull/118115.diff


10 Files Affected:

- (modified) clang/include/clang/Basic/arm_sme.td (+10) 
- (modified) clang/lib/CodeGen/CGBuiltin.cpp (+6) 
- (added) clang/test/CodeGen/AArch64/fp8-intrinsics/acle_sme2_fp8_fmopa.c (+55) 
- (added) clang/test/Sema/aarch64-fp8-intrinsics/acle_sme2_fp8_imm.c (+18) 
- (added) clang/test/Sema/aarch64-fp8-intrinsics/acle_sme2_fp8_mopa.c (+13) 
- (modified) clang/utils/TableGen/SveEmitter.cpp (+14-1) 
- (modified) llvm/include/llvm/IR/IntrinsicsAArch64.td (+11) 
- (modified) llvm/lib/Target/AArch64/AArch64SMEInstrInfo.td (+7-10) 
- (modified) llvm/lib/Target/AArch64/SMEInstrFormats.td (+24-2) 
- (added) llvm/test/CodeGen/AArch64/sme2-fp8-intrinsics-fmopa.ll (+22) 


``diff
diff --git a/clang/include/clang/Basic/arm_sme.td 
b/clang/include/clang/Basic/arm_sme.td
index 0f689e82bdb742..71b2c7cdd04f93 100644
--- a/clang/include/clang/Basic/arm_sme.td
+++ b/clang/include/clang/Basic/arm_sme.td
@@ -824,4 +824,14 @@ let SMETargetGuard = "sme-lutv2" in {
   def SVLUTI4_ZT_X4 : SInst<"svluti4_zt_{d}_x4", "4i2.u", "cUc", MergeNone, 
"aarch64_sme_luti4_zt_x4", [IsStreaming, IsInZT0], [ImmCheck<0, ImmCheck0_0>]>;
 }
 
+let SMETargetGuard = "sme-f8f32" in {
+  def SVMOPA_FP8_ZA32 : Inst<"svmopa_za32[_mf8]_m_fpm", "viPPdd>", "m", 
MergeNone, "aarch64_sme_fp8_fmopa_za32",
+ [IsStreaming, IsInOutZA, SetsFPMR, 
IsOverloadNone], [ImmCheck<0, ImmCheck0_3>]>;
+}
+
+let SMETargetGuard = "sme-f8f16" in {
+  def SVMOPA_FP8_ZA16 : Inst<"svmopa_za16[_mf8]_m_fpm", "viPPdd>", "m", 
MergeNone, "aarch64_sme_fp8_fmopa_za16",
+ [IsStreaming, IsInOutZA, SetsFPMR, 
IsOverloadNone], [ImmCheck<0, ImmCheck0_1>]>;
+}
+
 } // let SVETargetGuard = InvalidMode
diff --git a/clang/lib/CodeGen/CGBuiltin.cpp b/clang/lib/CodeGen/CGBuiltin.cpp
index cb9c23b8e0a0d0..56595bb4704e74 100644
--- a/clang/lib/CodeGen/CGBuiltin.cpp
+++ b/clang/lib/CodeGen/CGBuiltin.cpp
@@ -10183,6 +10183,8 @@ CodeGenFunction::getSVEType(const SVETypeFlags 
&TypeFlags) {
   case SVETypeFlags::EltTyInt64:
 return llvm::ScalableVectorType::get(Builder.getInt64Ty(), 2);
 
+  case SVETypeFlags::EltTyMFloat8:
+return llvm::ScalableVectorType::get(Builder.getInt8Ty(), 16);
   case SVETypeFlags::EltTyFloat16:
 return llvm::ScalableVectorType::get(Builder.getHalfTy(), 8);
   case SVETypeFlags::EltTyBFloat16:
@@ -11234,6 +11236,10 @@ Value 
*CodeGenFunction::EmitAArch64SMEBuiltinExpr(unsigned BuiltinID,
BuiltinID == SME::BI__builtin_sme_svstr_za)
 return EmitSMELdrStr(TypeFlags, Ops, Builtin->LLVMIntrinsic);
 
+  // Emit set FPMR for intrinsics that require it
+  if (TypeFlags.setsFPMR())
+Builder.CreateCall(CGM.getIntrinsic(Intrinsic::aarch64_set_fpmr),
+   Ops.pop_back_val());
   // Handle builtins which require their multi-vector operands to be swapped
   swapCommutativeSMEOperands(BuiltinID, Ops);
 
diff --git a/clang/test/CodeGen/AArch64/fp8-intrinsics/acle_sme2_fp8_fmopa.c 
b/clang/test/CodeGen/AArch64/fp8-intrinsics/acle_sme2_fp8_fmopa.c
new file mode 100644
index 00..95d6383ab30efe
--- /dev/null
+++ b/clang/test/CodeGen/AArch64/fp8-intrinsics/acle_sme2_fp8_fmopa.c
@@ -0,0 +1,55 @@
+// NOTE: Assertions have been autogenerated by utils/update_cc_test_checks.py 
UTC_ARGS: --version 5
+// REQUIRES: aarch64-registered-target
+
+// RUN: %clang_cc1 -triple aarch64-none-linux-gnu -target-feature +sme 
-target-feature +sme-f8f16 -target-feature +sme-f8f32 -disable-O0-optnone 
-Werror -Wall -emit-llvm -o - %s | opt -S -passes=mem2reg,tailcallelim | 
FileCheck %s
+// RUN: %clang_cc1 -triple aarch64-none-linux-gnu -target-feature +sme 
-target-feature +sme-f8f16 -target-feature +sme-f8f32 -disable-O0-optnone 
-Werror -Wall -emit-llvm -o - -x c++ %s | opt -S -passes=mem2reg,tailcallelim | 
FileCheck %s -check-prefix=CPP-CHECK
+// RUN: %clang_cc1 -DSVE_OVERLOADED_FORMS -triple aarch64-none-linux-gnu 
-target-feature +sme -target-feature +sme-f

[clang] [llvm] [AArch64] Implement intrinsics for SME FP8 FMOPA (PR #118115)

2024-11-29 Thread via cfe-commits

https://github.com/SpencerAbson created 
https://github.com/llvm/llvm-project/pull/118115

This patch implements the following intrinsics:

8-bit floating-point sum of outer products and accumulate.
``` c
  // Only if __ARM_FEATURE_SME_F8F16 != 0
void svmopa_za16[_mf8]_m_fpm(uint64_t tile, svbool_t pn, svbool_t pm,
 svmfloat8_t zn, svmfloat8_t zm, fpm_t fpm)
 __arm_streaming __arm_inout("za");

  // Only if __ARM_FEATURE_SME_F8F32 != 0
void svmopa_za32[_mf8]_m_fpm(uint64_t tile, svbool_t pn, svbool_t pm,
 svmfloat8_t zn, svmfloat8_t zm, fpm_t fpm)
 __arm_streaming __arm_inout("za");
```

In accordance with: https://github.com/ARM-software/acle/pull/323/

Co-authored-by: Momchil Velikov momchil.veli...@arm.com
Co-authored-by: Marian Lukac marian.lu...@arm.com

>From 010eae4a5e23377f86b264e2ea6af9b35f10c9d6 Mon Sep 17 00:00:00 2001
From: Spencer Abson 
Date: Mon, 25 Nov 2024 21:47:20 +
Subject: [PATCH] [AArch64] Implement intrinsics for SME FP8 FMOPA

---
 clang/include/clang/Basic/arm_sme.td  | 10 
 clang/lib/CodeGen/CGBuiltin.cpp   |  6 ++
 .../fp8-intrinsics/acle_sme2_fp8_fmopa.c  | 55 +++
 .../acle_sme2_fp8_imm.c   | 18 ++
 .../acle_sme2_fp8_mopa.c  | 13 +
 clang/utils/TableGen/SveEmitter.cpp   | 15 -
 llvm/include/llvm/IR/IntrinsicsAArch64.td | 11 
 .../lib/Target/AArch64/AArch64SMEInstrInfo.td | 17 +++---
 llvm/lib/Target/AArch64/SMEInstrFormats.td| 26 -
 .../AArch64/sme2-fp8-intrinsics-fmopa.ll  | 22 
 10 files changed, 180 insertions(+), 13 deletions(-)
 create mode 100644 
clang/test/CodeGen/AArch64/fp8-intrinsics/acle_sme2_fp8_fmopa.c
 create mode 100644 clang/test/Sema/aarch64-fp8-intrinsics/acle_sme2_fp8_imm.c
 create mode 100644 clang/test/Sema/aarch64-fp8-intrinsics/acle_sme2_fp8_mopa.c
 create mode 100644 llvm/test/CodeGen/AArch64/sme2-fp8-intrinsics-fmopa.ll

diff --git a/clang/include/clang/Basic/arm_sme.td 
b/clang/include/clang/Basic/arm_sme.td
index 0f689e82bdb742..71b2c7cdd04f93 100644
--- a/clang/include/clang/Basic/arm_sme.td
+++ b/clang/include/clang/Basic/arm_sme.td
@@ -824,4 +824,14 @@ let SMETargetGuard = "sme-lutv2" in {
   def SVLUTI4_ZT_X4 : SInst<"svluti4_zt_{d}_x4", "4i2.u", "cUc", MergeNone, 
"aarch64_sme_luti4_zt_x4", [IsStreaming, IsInZT0], [ImmCheck<0, ImmCheck0_0>]>;
 }
 
+let SMETargetGuard = "sme-f8f32" in {
+  def SVMOPA_FP8_ZA32 : Inst<"svmopa_za32[_mf8]_m_fpm", "viPPdd>", "m", 
MergeNone, "aarch64_sme_fp8_fmopa_za32",
+ [IsStreaming, IsInOutZA, SetsFPMR, 
IsOverloadNone], [ImmCheck<0, ImmCheck0_3>]>;
+}
+
+let SMETargetGuard = "sme-f8f16" in {
+  def SVMOPA_FP8_ZA16 : Inst<"svmopa_za16[_mf8]_m_fpm", "viPPdd>", "m", 
MergeNone, "aarch64_sme_fp8_fmopa_za16",
+ [IsStreaming, IsInOutZA, SetsFPMR, 
IsOverloadNone], [ImmCheck<0, ImmCheck0_1>]>;
+}
+
 } // let SVETargetGuard = InvalidMode
diff --git a/clang/lib/CodeGen/CGBuiltin.cpp b/clang/lib/CodeGen/CGBuiltin.cpp
index cb9c23b8e0a0d0..56595bb4704e74 100644
--- a/clang/lib/CodeGen/CGBuiltin.cpp
+++ b/clang/lib/CodeGen/CGBuiltin.cpp
@@ -10183,6 +10183,8 @@ CodeGenFunction::getSVEType(const SVETypeFlags 
&TypeFlags) {
   case SVETypeFlags::EltTyInt64:
 return llvm::ScalableVectorType::get(Builder.getInt64Ty(), 2);
 
+  case SVETypeFlags::EltTyMFloat8:
+return llvm::ScalableVectorType::get(Builder.getInt8Ty(), 16);
   case SVETypeFlags::EltTyFloat16:
 return llvm::ScalableVectorType::get(Builder.getHalfTy(), 8);
   case SVETypeFlags::EltTyBFloat16:
@@ -11234,6 +11236,10 @@ Value 
*CodeGenFunction::EmitAArch64SMEBuiltinExpr(unsigned BuiltinID,
BuiltinID == SME::BI__builtin_sme_svstr_za)
 return EmitSMELdrStr(TypeFlags, Ops, Builtin->LLVMIntrinsic);
 
+  // Emit set FPMR for intrinsics that require it
+  if (TypeFlags.setsFPMR())
+Builder.CreateCall(CGM.getIntrinsic(Intrinsic::aarch64_set_fpmr),
+   Ops.pop_back_val());
   // Handle builtins which require their multi-vector operands to be swapped
   swapCommutativeSMEOperands(BuiltinID, Ops);
 
diff --git a/clang/test/CodeGen/AArch64/fp8-intrinsics/acle_sme2_fp8_fmopa.c 
b/clang/test/CodeGen/AArch64/fp8-intrinsics/acle_sme2_fp8_fmopa.c
new file mode 100644
index 00..95d6383ab30efe
--- /dev/null
+++ b/clang/test/CodeGen/AArch64/fp8-intrinsics/acle_sme2_fp8_fmopa.c
@@ -0,0 +1,55 @@
+// NOTE: Assertions have been autogenerated by utils/update_cc_test_checks.py 
UTC_ARGS: --version 5
+// REQUIRES: aarch64-registered-target
+
+// RUN: %clang_cc1 -triple aarch64-none-linux-gnu -target-feature +sme 
-target-feature +sme-f8f16 -target-feature +sme-f8f32 -disable-O0-optnone 
-Werror -Wall -emit-llvm -o - %s | opt -S -passes=mem2reg,tailcallelim | 
FileCheck %s
+// RUN: %clang_cc1 -triple aarch64-