[clang] [llvm] [AArch64] Add intrinsics for non-widening FMOPA/FMOPS (PR #88105)
https://github.com/momchil-velikov closed https://github.com/llvm/llvm-project/pull/88105 ___ cfe-commits mailing list cfe-commits@lists.llvm.org https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits
[clang] [llvm] [AArch64] Add intrinsics for non-widening FMOPA/FMOPS (PR #88105)
https://github.com/momchil-velikov updated https://github.com/llvm/llvm-project/pull/88105 >From 8a63b17711d36cfeb4aab591853163119f5f167d Mon Sep 17 00:00:00 2001 From: Momchil Velikov Date: Tue, 9 Apr 2024 10:52:41 +0100 Subject: [PATCH 1/4] [AArch64] Add intrinsics for non-widening FMOPA/FMOPS According to the specification in https://github.com/ARM-software/acle/pull/309 this adds the intrinsics void svmopa_za16[_f16]_m(uint64_t tile, svbool_t pn, svbool_t pm, svfloat16_t zn, svfloat16_t zm) __arm_streaming __arm_inout("za"); void svmops_za16[_f16]_m(uint64_t tile, svbool_t pn, svbool_t pm, svfloat16_t zn, svfloat16_t zm) __arm_streaming __arm_inout("za"); as well as the corresponding `bf16` variants. --- clang/include/clang/Basic/arm_sme.td | 24 + .../acle_sme2_mopa_nonwide.c | 97 +++ .../acle_sme2_mopa_nonwide.c | 34 +++ llvm/include/llvm/IR/IntrinsicsAArch64.td | 3 + .../lib/Target/AArch64/AArch64SMEInstrInfo.td | 10 +- llvm/lib/Target/AArch64/SMEInstrFormats.td| 16 ++- .../CodeGen/AArch64/sme2-intrinsics-mopa.ll | 42 7 files changed, 219 insertions(+), 7 deletions(-) create mode 100644 clang/test/CodeGen/aarch64-sme2-intrinsics/acle_sme2_mopa_nonwide.c create mode 100644 clang/test/Sema/aarch64-sme2-intrinsics/acle_sme2_mopa_nonwide.c create mode 100644 llvm/test/CodeGen/AArch64/sme2-intrinsics-mopa.ll diff --git a/clang/include/clang/Basic/arm_sme.td b/clang/include/clang/Basic/arm_sme.td index 1ac6d5170ea2..a18a5094a15e 100644 --- a/clang/include/clang/Basic/arm_sme.td +++ b/clang/include/clang/Basic/arm_sme.td @@ -674,3 +674,27 @@ let TargetGuard = "sme2" in { def SVLUTI2_LANE_ZT_X2 : Inst<"svluti2_lane_zt_{d}_x2", "2.di[i", "cUcsUsiUibhf", MergeNone, "aarch64_sme_luti2_lane_zt_x2", [IsStreaming, IsInZT0], [ImmCheck<0, ImmCheck0_0>, ImmCheck<2, ImmCheck0_7>]>; def SVLUTI4_LANE_ZT_X2 : Inst<"svluti4_lane_zt_{d}_x2", "2.di[i", "cUcsUsiUibhf", MergeNone, "aarch64_sme_luti4_lane_zt_x2", [IsStreaming, IsInZT0], [ImmCheck<0, ImmCheck0_0>, ImmCheck<2, ImmCheck0_3>]>; } + + +// SME2p1 - FMOPA, FMOPS (non-widening) +let TargetGuard = "sme2,b16b16" in { + def SVMOPA_BF16_NW : SInst<"svmopa_za16[_bf16]_m", "viPPdd", "b", + MergeNone, "aarch64_sme_mopa", + [IsStreaming, IsInOutZA], + [ImmCheck<0, ImmCheck0_1>]>; + def SVMOPS_BF16_NW : SInst<"svmops_za16[_bf16]_m", "viPPdd", "b", + MergeNone, "aarch64_sme_mops", + [IsStreaming, IsInOutZA], + [ImmCheck<0, ImmCheck0_1>]>; +} + +let TargetGuard = "sme2p1,sme-f16f16" in { + def SVMOPA_F16_NW : SInst<"svmopa_za16[_f16]_m", "viPPdd", "h", +MergeNone, "aarch64_sme_mopa", +[IsStreaming, IsInOutZA], +[ImmCheck<0, ImmCheck0_1>]>; + def SVMOPS_F16_NW : SInst<"svmops_za16[_f16]_m", "viPPdd", "h", +MergeNone, "aarch64_sme_mops", +[IsStreaming, IsInOutZA], +[ImmCheck<0, ImmCheck0_1>]>; +} diff --git a/clang/test/CodeGen/aarch64-sme2-intrinsics/acle_sme2_mopa_nonwide.c b/clang/test/CodeGen/aarch64-sme2-intrinsics/acle_sme2_mopa_nonwide.c new file mode 100644 index ..40fcad6a5764 --- /dev/null +++ b/clang/test/CodeGen/aarch64-sme2-intrinsics/acle_sme2_mopa_nonwide.c @@ -0,0 +1,97 @@ +// NOTE: Assertions have been autogenerated by utils/update_cc_test_checks.py UTC_ARGS: --version 4 +// RUN: %clang_cc1 -fclang-abi-compat=latest -triple aarch64-none-linux-gnu -target-feature +sme2p1 -target-feature +b16b16 -target-feature +sme-f16f16 -S -O2 -Werror -emit-llvm -o - %s | FileCheck %s -check-prefixes=CHECK +// RUN: %clang_cc1-x c++ -fclang-abi-compat=latest -triple aarch64-none-linux-gnu -target-feature +sme2p1 -target-feature +b16b16 -target-feature +sme-f16f16 -S -O2 -Werror -emit-llvm -o - %s | FileCheck %s -check-prefixes=CHECK-CXX +// RUN: %clang_cc1 -DSME_OVERLOADED_FORMS-fclang-abi-compat=latest -triple aarch64-none-linux-gnu -target-feature +sme2p1 -target-feature +b16b16 -target-feature +sme-f16f16 -S -O2 -Werror -emit-llvm -o - %s | FileCheck %s -check-prefixes=CHECK +// RUN: %clang_cc1 -DSME_OVERLOADED_FORMS -x c++ -fclang-abi-compat=latest -triple aarch64-none-linux-gnu -target-feature +sme2p1 -target-feature +b16b16 -target-feature +sme-f16f16 -S -O2 -Werror -emit-llvm -o - %s | FileCheck %s -check-prefixes=CHECK-CXX + +// RUN: %clang_cc1 -DSME_OVERLOADED_FORMS -x c++ -fclang-abi-compat=latest -triple aarch64-none-linux-gnu -target-feature +sme2p1 -target-feature
[clang] [llvm] [AArch64] Add intrinsics for non-widening FMOPA/FMOPS (PR #88105)
https://github.com/momchil-velikov updated https://github.com/llvm/llvm-project/pull/88105 >From 74ee4857a76bc7eb5353dc22311e766ec5356514 Mon Sep 17 00:00:00 2001 From: Momchil Velikov Date: Tue, 9 Apr 2024 10:52:41 +0100 Subject: [PATCH 1/3] [AArch64] Add intrinsics for non-widening FMOPA/FMOPS According to the specification in https://github.com/ARM-software/acle/pull/309 this adds the intrinsics void svmopa_za16[_f16]_m(uint64_t tile, svbool_t pn, svbool_t pm, svfloat16_t zn, svfloat16_t zm) __arm_streaming __arm_inout("za"); void svmops_za16[_f16]_m(uint64_t tile, svbool_t pn, svbool_t pm, svfloat16_t zn, svfloat16_t zm) __arm_streaming __arm_inout("za"); as well as the corresponding `bf16` variants. --- clang/include/clang/Basic/arm_sme.td | 24 + .../acle_sme2_mopa_nonwide.c | 97 +++ .../acle_sme2_mopa_nonwide.c | 34 +++ llvm/include/llvm/IR/IntrinsicsAArch64.td | 3 + .../lib/Target/AArch64/AArch64SMEInstrInfo.td | 10 +- llvm/lib/Target/AArch64/SMEInstrFormats.td| 16 ++- .../CodeGen/AArch64/sme2-intrinsics-mopa.ll | 42 7 files changed, 219 insertions(+), 7 deletions(-) create mode 100644 clang/test/CodeGen/aarch64-sme2-intrinsics/acle_sme2_mopa_nonwide.c create mode 100644 clang/test/Sema/aarch64-sme2-intrinsics/acle_sme2_mopa_nonwide.c create mode 100644 llvm/test/CodeGen/AArch64/sme2-intrinsics-mopa.ll diff --git a/clang/include/clang/Basic/arm_sme.td b/clang/include/clang/Basic/arm_sme.td index 1ac6d5170ea283..a18a5094a15ed3 100644 --- a/clang/include/clang/Basic/arm_sme.td +++ b/clang/include/clang/Basic/arm_sme.td @@ -674,3 +674,27 @@ let TargetGuard = "sme2" in { def SVLUTI2_LANE_ZT_X2 : Inst<"svluti2_lane_zt_{d}_x2", "2.di[i", "cUcsUsiUibhf", MergeNone, "aarch64_sme_luti2_lane_zt_x2", [IsStreaming, IsInZT0], [ImmCheck<0, ImmCheck0_0>, ImmCheck<2, ImmCheck0_7>]>; def SVLUTI4_LANE_ZT_X2 : Inst<"svluti4_lane_zt_{d}_x2", "2.di[i", "cUcsUsiUibhf", MergeNone, "aarch64_sme_luti4_lane_zt_x2", [IsStreaming, IsInZT0], [ImmCheck<0, ImmCheck0_0>, ImmCheck<2, ImmCheck0_3>]>; } + + +// SME2p1 - FMOPA, FMOPS (non-widening) +let TargetGuard = "sme2,b16b16" in { + def SVMOPA_BF16_NW : SInst<"svmopa_za16[_bf16]_m", "viPPdd", "b", + MergeNone, "aarch64_sme_mopa", + [IsStreaming, IsInOutZA], + [ImmCheck<0, ImmCheck0_1>]>; + def SVMOPS_BF16_NW : SInst<"svmops_za16[_bf16]_m", "viPPdd", "b", + MergeNone, "aarch64_sme_mops", + [IsStreaming, IsInOutZA], + [ImmCheck<0, ImmCheck0_1>]>; +} + +let TargetGuard = "sme2p1,sme-f16f16" in { + def SVMOPA_F16_NW : SInst<"svmopa_za16[_f16]_m", "viPPdd", "h", +MergeNone, "aarch64_sme_mopa", +[IsStreaming, IsInOutZA], +[ImmCheck<0, ImmCheck0_1>]>; + def SVMOPS_F16_NW : SInst<"svmops_za16[_f16]_m", "viPPdd", "h", +MergeNone, "aarch64_sme_mops", +[IsStreaming, IsInOutZA], +[ImmCheck<0, ImmCheck0_1>]>; +} diff --git a/clang/test/CodeGen/aarch64-sme2-intrinsics/acle_sme2_mopa_nonwide.c b/clang/test/CodeGen/aarch64-sme2-intrinsics/acle_sme2_mopa_nonwide.c new file mode 100644 index 00..40fcad6a576483 --- /dev/null +++ b/clang/test/CodeGen/aarch64-sme2-intrinsics/acle_sme2_mopa_nonwide.c @@ -0,0 +1,97 @@ +// NOTE: Assertions have been autogenerated by utils/update_cc_test_checks.py UTC_ARGS: --version 4 +// RUN: %clang_cc1 -fclang-abi-compat=latest -triple aarch64-none-linux-gnu -target-feature +sme2p1 -target-feature +b16b16 -target-feature +sme-f16f16 -S -O2 -Werror -emit-llvm -o - %s | FileCheck %s -check-prefixes=CHECK +// RUN: %clang_cc1-x c++ -fclang-abi-compat=latest -triple aarch64-none-linux-gnu -target-feature +sme2p1 -target-feature +b16b16 -target-feature +sme-f16f16 -S -O2 -Werror -emit-llvm -o - %s | FileCheck %s -check-prefixes=CHECK-CXX +// RUN: %clang_cc1 -DSME_OVERLOADED_FORMS-fclang-abi-compat=latest -triple aarch64-none-linux-gnu -target-feature +sme2p1 -target-feature +b16b16 -target-feature +sme-f16f16 -S -O2 -Werror -emit-llvm -o - %s | FileCheck %s -check-prefixes=CHECK +// RUN: %clang_cc1 -DSME_OVERLOADED_FORMS -x c++ -fclang-abi-compat=latest -triple aarch64-none-linux-gnu -target-feature +sme2p1 -target-feature +b16b16 -target-feature +sme-f16f16 -S -O2 -Werror -emit-llvm -o - %s | FileCheck %s -check-prefixes=CHECK-CXX + +// RUN: %clang_cc1 -DSME_OVERLOADED_FORMS -x c++ -fclang-abi-compat=latest -triple aarch64-none-linux-gnu -target-feature +sme2p1
[clang] [llvm] [AArch64] Add intrinsics for non-widening FMOPA/FMOPS (PR #88105)
https://github.com/momchil-velikov updated https://github.com/llvm/llvm-project/pull/88105 >From 3ea7ee0aaf7f8be8c2ee42af92ba3b13b8212645 Mon Sep 17 00:00:00 2001 From: Momchil Velikov Date: Tue, 9 Apr 2024 10:52:41 +0100 Subject: [PATCH 1/3] [AArch64] Add intrinsics for non-widening FMOPA/FMOPS According to the specification in https://github.com/ARM-software/acle/pull/309 this adds the intrinsics void svmopa_za16[_f16]_m(uint64_t tile, svbool_t pn, svbool_t pm, svfloat16_t zn, svfloat16_t zm) __arm_streaming __arm_inout("za"); void svmops_za16[_f16]_m(uint64_t tile, svbool_t pn, svbool_t pm, svfloat16_t zn, svfloat16_t zm) __arm_streaming __arm_inout("za"); as well as the corresponding `bf16` variants. --- clang/include/clang/Basic/arm_sme.td | 24 + .../acle_sme2_mopa_nonwide.c | 97 +++ .../acle_sme2_mopa_nonwide.c | 34 +++ llvm/include/llvm/IR/IntrinsicsAArch64.td | 3 + .../lib/Target/AArch64/AArch64SMEInstrInfo.td | 10 +- llvm/lib/Target/AArch64/SMEInstrFormats.td| 16 ++- .../CodeGen/AArch64/sme2-intrinsics-mopa.ll | 42 7 files changed, 219 insertions(+), 7 deletions(-) create mode 100644 clang/test/CodeGen/aarch64-sme2-intrinsics/acle_sme2_mopa_nonwide.c create mode 100644 clang/test/Sema/aarch64-sme2-intrinsics/acle_sme2_mopa_nonwide.c create mode 100644 llvm/test/CodeGen/AArch64/sme2-intrinsics-mopa.ll diff --git a/clang/include/clang/Basic/arm_sme.td b/clang/include/clang/Basic/arm_sme.td index 1ac6d5170ea283..a18a5094a15ed3 100644 --- a/clang/include/clang/Basic/arm_sme.td +++ b/clang/include/clang/Basic/arm_sme.td @@ -674,3 +674,27 @@ let TargetGuard = "sme2" in { def SVLUTI2_LANE_ZT_X2 : Inst<"svluti2_lane_zt_{d}_x2", "2.di[i", "cUcsUsiUibhf", MergeNone, "aarch64_sme_luti2_lane_zt_x2", [IsStreaming, IsInZT0], [ImmCheck<0, ImmCheck0_0>, ImmCheck<2, ImmCheck0_7>]>; def SVLUTI4_LANE_ZT_X2 : Inst<"svluti4_lane_zt_{d}_x2", "2.di[i", "cUcsUsiUibhf", MergeNone, "aarch64_sme_luti4_lane_zt_x2", [IsStreaming, IsInZT0], [ImmCheck<0, ImmCheck0_0>, ImmCheck<2, ImmCheck0_3>]>; } + + +// SME2p1 - FMOPA, FMOPS (non-widening) +let TargetGuard = "sme2,b16b16" in { + def SVMOPA_BF16_NW : SInst<"svmopa_za16[_bf16]_m", "viPPdd", "b", + MergeNone, "aarch64_sme_mopa", + [IsStreaming, IsInOutZA], + [ImmCheck<0, ImmCheck0_1>]>; + def SVMOPS_BF16_NW : SInst<"svmops_za16[_bf16]_m", "viPPdd", "b", + MergeNone, "aarch64_sme_mops", + [IsStreaming, IsInOutZA], + [ImmCheck<0, ImmCheck0_1>]>; +} + +let TargetGuard = "sme2p1,sme-f16f16" in { + def SVMOPA_F16_NW : SInst<"svmopa_za16[_f16]_m", "viPPdd", "h", +MergeNone, "aarch64_sme_mopa", +[IsStreaming, IsInOutZA], +[ImmCheck<0, ImmCheck0_1>]>; + def SVMOPS_F16_NW : SInst<"svmops_za16[_f16]_m", "viPPdd", "h", +MergeNone, "aarch64_sme_mops", +[IsStreaming, IsInOutZA], +[ImmCheck<0, ImmCheck0_1>]>; +} diff --git a/clang/test/CodeGen/aarch64-sme2-intrinsics/acle_sme2_mopa_nonwide.c b/clang/test/CodeGen/aarch64-sme2-intrinsics/acle_sme2_mopa_nonwide.c new file mode 100644 index 00..40fcad6a576483 --- /dev/null +++ b/clang/test/CodeGen/aarch64-sme2-intrinsics/acle_sme2_mopa_nonwide.c @@ -0,0 +1,97 @@ +// NOTE: Assertions have been autogenerated by utils/update_cc_test_checks.py UTC_ARGS: --version 4 +// RUN: %clang_cc1 -fclang-abi-compat=latest -triple aarch64-none-linux-gnu -target-feature +sme2p1 -target-feature +b16b16 -target-feature +sme-f16f16 -S -O2 -Werror -emit-llvm -o - %s | FileCheck %s -check-prefixes=CHECK +// RUN: %clang_cc1-x c++ -fclang-abi-compat=latest -triple aarch64-none-linux-gnu -target-feature +sme2p1 -target-feature +b16b16 -target-feature +sme-f16f16 -S -O2 -Werror -emit-llvm -o - %s | FileCheck %s -check-prefixes=CHECK-CXX +// RUN: %clang_cc1 -DSME_OVERLOADED_FORMS-fclang-abi-compat=latest -triple aarch64-none-linux-gnu -target-feature +sme2p1 -target-feature +b16b16 -target-feature +sme-f16f16 -S -O2 -Werror -emit-llvm -o - %s | FileCheck %s -check-prefixes=CHECK +// RUN: %clang_cc1 -DSME_OVERLOADED_FORMS -x c++ -fclang-abi-compat=latest -triple aarch64-none-linux-gnu -target-feature +sme2p1 -target-feature +b16b16 -target-feature +sme-f16f16 -S -O2 -Werror -emit-llvm -o - %s | FileCheck %s -check-prefixes=CHECK-CXX + +// RUN: %clang_cc1 -DSME_OVERLOADED_FORMS -x c++ -fclang-abi-compat=latest -triple aarch64-none-linux-gnu -target-feature +sme2p1
[clang] [llvm] [AArch64] Add intrinsics for non-widening FMOPA/FMOPS (PR #88105)
https://github.com/momchil-velikov updated https://github.com/llvm/llvm-project/pull/88105 >From 3ea7ee0aaf7f8be8c2ee42af92ba3b13b8212645 Mon Sep 17 00:00:00 2001 From: Momchil Velikov Date: Tue, 9 Apr 2024 10:52:41 +0100 Subject: [PATCH 1/2] [AArch64] Add intrinsics for non-widening FMOPA/FMOPS According to the specification in https://github.com/ARM-software/acle/pull/309 this adds the intrinsics void svmopa_za16[_f16]_m(uint64_t tile, svbool_t pn, svbool_t pm, svfloat16_t zn, svfloat16_t zm) __arm_streaming __arm_inout("za"); void svmops_za16[_f16]_m(uint64_t tile, svbool_t pn, svbool_t pm, svfloat16_t zn, svfloat16_t zm) __arm_streaming __arm_inout("za"); as well as the corresponding `bf16` variants. --- clang/include/clang/Basic/arm_sme.td | 24 + .../acle_sme2_mopa_nonwide.c | 97 +++ .../acle_sme2_mopa_nonwide.c | 34 +++ llvm/include/llvm/IR/IntrinsicsAArch64.td | 3 + .../lib/Target/AArch64/AArch64SMEInstrInfo.td | 10 +- llvm/lib/Target/AArch64/SMEInstrFormats.td| 16 ++- .../CodeGen/AArch64/sme2-intrinsics-mopa.ll | 42 7 files changed, 219 insertions(+), 7 deletions(-) create mode 100644 clang/test/CodeGen/aarch64-sme2-intrinsics/acle_sme2_mopa_nonwide.c create mode 100644 clang/test/Sema/aarch64-sme2-intrinsics/acle_sme2_mopa_nonwide.c create mode 100644 llvm/test/CodeGen/AArch64/sme2-intrinsics-mopa.ll diff --git a/clang/include/clang/Basic/arm_sme.td b/clang/include/clang/Basic/arm_sme.td index 1ac6d5170ea283..a18a5094a15ed3 100644 --- a/clang/include/clang/Basic/arm_sme.td +++ b/clang/include/clang/Basic/arm_sme.td @@ -674,3 +674,27 @@ let TargetGuard = "sme2" in { def SVLUTI2_LANE_ZT_X2 : Inst<"svluti2_lane_zt_{d}_x2", "2.di[i", "cUcsUsiUibhf", MergeNone, "aarch64_sme_luti2_lane_zt_x2", [IsStreaming, IsInZT0], [ImmCheck<0, ImmCheck0_0>, ImmCheck<2, ImmCheck0_7>]>; def SVLUTI4_LANE_ZT_X2 : Inst<"svluti4_lane_zt_{d}_x2", "2.di[i", "cUcsUsiUibhf", MergeNone, "aarch64_sme_luti4_lane_zt_x2", [IsStreaming, IsInZT0], [ImmCheck<0, ImmCheck0_0>, ImmCheck<2, ImmCheck0_3>]>; } + + +// SME2p1 - FMOPA, FMOPS (non-widening) +let TargetGuard = "sme2,b16b16" in { + def SVMOPA_BF16_NW : SInst<"svmopa_za16[_bf16]_m", "viPPdd", "b", + MergeNone, "aarch64_sme_mopa", + [IsStreaming, IsInOutZA], + [ImmCheck<0, ImmCheck0_1>]>; + def SVMOPS_BF16_NW : SInst<"svmops_za16[_bf16]_m", "viPPdd", "b", + MergeNone, "aarch64_sme_mops", + [IsStreaming, IsInOutZA], + [ImmCheck<0, ImmCheck0_1>]>; +} + +let TargetGuard = "sme2p1,sme-f16f16" in { + def SVMOPA_F16_NW : SInst<"svmopa_za16[_f16]_m", "viPPdd", "h", +MergeNone, "aarch64_sme_mopa", +[IsStreaming, IsInOutZA], +[ImmCheck<0, ImmCheck0_1>]>; + def SVMOPS_F16_NW : SInst<"svmops_za16[_f16]_m", "viPPdd", "h", +MergeNone, "aarch64_sme_mops", +[IsStreaming, IsInOutZA], +[ImmCheck<0, ImmCheck0_1>]>; +} diff --git a/clang/test/CodeGen/aarch64-sme2-intrinsics/acle_sme2_mopa_nonwide.c b/clang/test/CodeGen/aarch64-sme2-intrinsics/acle_sme2_mopa_nonwide.c new file mode 100644 index 00..40fcad6a576483 --- /dev/null +++ b/clang/test/CodeGen/aarch64-sme2-intrinsics/acle_sme2_mopa_nonwide.c @@ -0,0 +1,97 @@ +// NOTE: Assertions have been autogenerated by utils/update_cc_test_checks.py UTC_ARGS: --version 4 +// RUN: %clang_cc1 -fclang-abi-compat=latest -triple aarch64-none-linux-gnu -target-feature +sme2p1 -target-feature +b16b16 -target-feature +sme-f16f16 -S -O2 -Werror -emit-llvm -o - %s | FileCheck %s -check-prefixes=CHECK +// RUN: %clang_cc1-x c++ -fclang-abi-compat=latest -triple aarch64-none-linux-gnu -target-feature +sme2p1 -target-feature +b16b16 -target-feature +sme-f16f16 -S -O2 -Werror -emit-llvm -o - %s | FileCheck %s -check-prefixes=CHECK-CXX +// RUN: %clang_cc1 -DSME_OVERLOADED_FORMS-fclang-abi-compat=latest -triple aarch64-none-linux-gnu -target-feature +sme2p1 -target-feature +b16b16 -target-feature +sme-f16f16 -S -O2 -Werror -emit-llvm -o - %s | FileCheck %s -check-prefixes=CHECK +// RUN: %clang_cc1 -DSME_OVERLOADED_FORMS -x c++ -fclang-abi-compat=latest -triple aarch64-none-linux-gnu -target-feature +sme2p1 -target-feature +b16b16 -target-feature +sme-f16f16 -S -O2 -Werror -emit-llvm -o - %s | FileCheck %s -check-prefixes=CHECK-CXX + +// RUN: %clang_cc1 -DSME_OVERLOADED_FORMS -x c++ -fclang-abi-compat=latest -triple aarch64-none-linux-gnu -target-feature +sme2p1
[clang] [llvm] [AArch64] Add intrinsics for non-widening FMOPA/FMOPS (PR #88105)
@@ -286,14 +286,26 @@ multiclass sme_outer_product_fp64 def : SME_ZA_Tile_TwoPred_TwoVec_Pat; } -multiclass sme2p1_fmop_tile_fp16 op, ZPRRegOp zpr_ty>{ - def NAME : sme_fp_outer_product_inst { +multiclass sme2p1_fmop_tile_f8f16 op> { + def NAME : sme_fp_outer_product_inst { bits<1> ZAda; let Inst{2-1} = 0b00; let Inst{0} = ZAda; } } +multiclass sme2p1_fmop_tile_fp16 op, ValueType vt, SDPatternOperator intrinsic = null_frag> { + def NAME : sme_fp_outer_product_inst, SMEPseudo2Instr { momchil-velikov wrote: Done. https://github.com/llvm/llvm-project/pull/88105 ___ cfe-commits mailing list cfe-commits@lists.llvm.org https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits
[clang] [llvm] [AArch64] Add intrinsics for non-widening FMOPA/FMOPS (PR #88105)
https://github.com/momchil-velikov updated https://github.com/llvm/llvm-project/pull/88105 >From 2b0befb9078f8c9116ad52be937c8722045708ef Mon Sep 17 00:00:00 2001 From: Momchil Velikov Date: Tue, 9 Apr 2024 10:52:41 +0100 Subject: [PATCH 1/2] [AArch64] Add intrinsics for non-widening FMOPA/FMOPS According to the specification in https://github.com/ARM-software/acle/pull/309 this adds the intrinsics void svmopa_za16[_f16]_m(uint64_t tile, svbool_t pn, svbool_t pm, svfloat16_t zn, svfloat16_t zm) __arm_streaming __arm_inout("za"); void svmops_za16[_f16]_m(uint64_t tile, svbool_t pn, svbool_t pm, svfloat16_t zn, svfloat16_t zm) __arm_streaming __arm_inout("za"); as well as the corresponding `bf16` variants. --- clang/include/clang/Basic/arm_sme.td | 24 + .../acle_sme2_mopa_nonwide.c | 97 +++ .../acle_sme2_mopa_nonwide.c | 34 +++ llvm/include/llvm/IR/IntrinsicsAArch64.td | 5 +- .../lib/Target/AArch64/AArch64SMEInstrInfo.td | 10 +- llvm/lib/Target/AArch64/SMEInstrFormats.td| 16 ++- .../CodeGen/AArch64/sme2-intrinsics-mopa.ll | 42 7 files changed, 220 insertions(+), 8 deletions(-) create mode 100644 clang/test/CodeGen/aarch64-sme2-intrinsics/acle_sme2_mopa_nonwide.c create mode 100644 clang/test/Sema/aarch64-sme2-intrinsics/acle_sme2_mopa_nonwide.c create mode 100644 llvm/test/CodeGen/AArch64/sme2-intrinsics-mopa.ll diff --git a/clang/include/clang/Basic/arm_sme.td b/clang/include/clang/Basic/arm_sme.td index 1ac6d5170ea283..a18a5094a15ed3 100644 --- a/clang/include/clang/Basic/arm_sme.td +++ b/clang/include/clang/Basic/arm_sme.td @@ -674,3 +674,27 @@ let TargetGuard = "sme2" in { def SVLUTI2_LANE_ZT_X2 : Inst<"svluti2_lane_zt_{d}_x2", "2.di[i", "cUcsUsiUibhf", MergeNone, "aarch64_sme_luti2_lane_zt_x2", [IsStreaming, IsInZT0], [ImmCheck<0, ImmCheck0_0>, ImmCheck<2, ImmCheck0_7>]>; def SVLUTI4_LANE_ZT_X2 : Inst<"svluti4_lane_zt_{d}_x2", "2.di[i", "cUcsUsiUibhf", MergeNone, "aarch64_sme_luti4_lane_zt_x2", [IsStreaming, IsInZT0], [ImmCheck<0, ImmCheck0_0>, ImmCheck<2, ImmCheck0_3>]>; } + + +// SME2p1 - FMOPA, FMOPS (non-widening) +let TargetGuard = "sme2,b16b16" in { + def SVMOPA_BF16_NW : SInst<"svmopa_za16[_bf16]_m", "viPPdd", "b", + MergeNone, "aarch64_sme_mopa", + [IsStreaming, IsInOutZA], + [ImmCheck<0, ImmCheck0_1>]>; + def SVMOPS_BF16_NW : SInst<"svmops_za16[_bf16]_m", "viPPdd", "b", + MergeNone, "aarch64_sme_mops", + [IsStreaming, IsInOutZA], + [ImmCheck<0, ImmCheck0_1>]>; +} + +let TargetGuard = "sme2p1,sme-f16f16" in { + def SVMOPA_F16_NW : SInst<"svmopa_za16[_f16]_m", "viPPdd", "h", +MergeNone, "aarch64_sme_mopa", +[IsStreaming, IsInOutZA], +[ImmCheck<0, ImmCheck0_1>]>; + def SVMOPS_F16_NW : SInst<"svmops_za16[_f16]_m", "viPPdd", "h", +MergeNone, "aarch64_sme_mops", +[IsStreaming, IsInOutZA], +[ImmCheck<0, ImmCheck0_1>]>; +} diff --git a/clang/test/CodeGen/aarch64-sme2-intrinsics/acle_sme2_mopa_nonwide.c b/clang/test/CodeGen/aarch64-sme2-intrinsics/acle_sme2_mopa_nonwide.c new file mode 100644 index 00..40fcad6a576483 --- /dev/null +++ b/clang/test/CodeGen/aarch64-sme2-intrinsics/acle_sme2_mopa_nonwide.c @@ -0,0 +1,97 @@ +// NOTE: Assertions have been autogenerated by utils/update_cc_test_checks.py UTC_ARGS: --version 4 +// RUN: %clang_cc1 -fclang-abi-compat=latest -triple aarch64-none-linux-gnu -target-feature +sme2p1 -target-feature +b16b16 -target-feature +sme-f16f16 -S -O2 -Werror -emit-llvm -o - %s | FileCheck %s -check-prefixes=CHECK +// RUN: %clang_cc1-x c++ -fclang-abi-compat=latest -triple aarch64-none-linux-gnu -target-feature +sme2p1 -target-feature +b16b16 -target-feature +sme-f16f16 -S -O2 -Werror -emit-llvm -o - %s | FileCheck %s -check-prefixes=CHECK-CXX +// RUN: %clang_cc1 -DSME_OVERLOADED_FORMS-fclang-abi-compat=latest -triple aarch64-none-linux-gnu -target-feature +sme2p1 -target-feature +b16b16 -target-feature +sme-f16f16 -S -O2 -Werror -emit-llvm -o - %s | FileCheck %s -check-prefixes=CHECK +// RUN: %clang_cc1 -DSME_OVERLOADED_FORMS -x c++ -fclang-abi-compat=latest -triple aarch64-none-linux-gnu -target-feature +sme2p1 -target-feature +b16b16 -target-feature +sme-f16f16 -S -O2 -Werror -emit-llvm -o - %s | FileCheck %s -check-prefixes=CHECK-CXX + +// RUN: %clang_cc1 -DSME_OVERLOADED_FORMS -x c++ -fclang-abi-compat=latest -triple aarch64-none-linux-gnu -target-feature +sme2p1
[clang] [llvm] [AArch64] Add intrinsics for non-widening FMOPA/FMOPS (PR #88105)
@@ -286,14 +286,26 @@ multiclass sme_outer_product_fp64 def : SME_ZA_Tile_TwoPred_TwoVec_Pat; } -multiclass sme2p1_fmop_tile_fp16 op, ZPRRegOp zpr_ty>{ - def NAME : sme_fp_outer_product_inst { +multiclass sme2p1_fmop_tile_f8f16 op> { + def NAME : sme_fp_outer_product_inst { bits<1> ZAda; let Inst{2-1} = 0b00; let Inst{0} = ZAda; } } +multiclass sme2p1_fmop_tile_fp16 op, ValueType vt, SDPatternOperator intrinsic = null_frag> { + def NAME : sme_fp_outer_product_inst, SMEPseudo2Instr { CarolineConcatto wrote: I think you can remove bits<2> op, because all of them are 0b11, now that you created a new class for the fp8. https://github.com/llvm/llvm-project/pull/88105 ___ cfe-commits mailing list cfe-commits@lists.llvm.org https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits
[clang] [llvm] [AArch64] Add intrinsics for non-widening FMOPA/FMOPS (PR #88105)
https://github.com/CarolineConcatto approved this pull request. LGTM https://github.com/llvm/llvm-project/pull/88105 ___ cfe-commits mailing list cfe-commits@lists.llvm.org https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits
[clang] [llvm] [AArch64] Add intrinsics for non-widening FMOPA/FMOPS (PR #88105)
https://github.com/CarolineConcatto edited https://github.com/llvm/llvm-project/pull/88105 ___ cfe-commits mailing list cfe-commits@lists.llvm.org https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits
[clang] [llvm] [AArch64] Add intrinsics for non-widening FMOPA/FMOPS (PR #88105)
@@ -674,3 +674,27 @@ let TargetGuard = "sme2" in { def SVLUTI2_LANE_ZT_X2 : Inst<"svluti2_lane_zt_{d}_x2", "2.di[i", "cUcsUsiUibhf", MergeNone, "aarch64_sme_luti2_lane_zt_x2", [IsStreaming, IsInZT0], [ImmCheck<0, ImmCheck0_0>, ImmCheck<2, ImmCheck0_7>]>; def SVLUTI4_LANE_ZT_X2 : Inst<"svluti4_lane_zt_{d}_x2", "2.di[i", "cUcsUsiUibhf", MergeNone, "aarch64_sme_luti4_lane_zt_x2", [IsStreaming, IsInZT0], [ImmCheck<0, ImmCheck0_0>, ImmCheck<2, ImmCheck0_3>]>; } + + +// SME2p1 - FMOPA, FMOPS (non-widening) +let TargetGuard = "sme,b16b16" in { momchil-velikov wrote: Done. The Clang instrinsics use the same target features as the underlying assembly instructions. If the features on the assembly instruction are not entirely correct we should fix it, but in a separate patch. https://github.com/llvm/llvm-project/pull/88105 ___ cfe-commits mailing list cfe-commits@lists.llvm.org https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits
[clang] [llvm] [AArch64] Add intrinsics for non-widening FMOPA/FMOPS (PR #88105)
@@ -815,8 +815,8 @@ defm FMLS_VG4_M4Z2Z_H : sme2_dot_mla_add_sub_array_vg4_multi<"fmls", 0b0100011, defm FCVT_2ZZ_H : sme2p1_fp_cvt_vector_vg2_single<"fcvt", 0b0>; defm FCVTL_2ZZ_H : sme2p1_fp_cvt_vector_vg2_single<"fcvtl", 0b1>; -defm FMOPA_MPPZZ_H : sme2p1_fmop_tile_fp16<"fmopa", 0b0, 0b0, 0b11, ZPR16>; -defm FMOPS_MPPZZ_H : sme2p1_fmop_tile_fp16<"fmops", 0b0, 0b1, 0b11, ZPR16>; +defm FMOPA_MPPZZ_H : sme2p1_fmop_tile_fp16<"fmopa", 0b0, 0b0, 0b11, nxv8f16, int_aarch64_sme_mopa_nonwide>; momchil-velikov wrote: Discussed offline. https://github.com/llvm/llvm-project/pull/88105 ___ cfe-commits mailing list cfe-commits@lists.llvm.org https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits
[clang] [llvm] [AArch64] Add intrinsics for non-widening FMOPA/FMOPS (PR #88105)
@@ -674,3 +674,27 @@ let TargetGuard = "sme2" in { def SVLUTI2_LANE_ZT_X2 : Inst<"svluti2_lane_zt_{d}_x2", "2.di[i", "cUcsUsiUibhf", MergeNone, "aarch64_sme_luti2_lane_zt_x2", [IsStreaming, IsInZT0], [ImmCheck<0, ImmCheck0_0>, ImmCheck<2, ImmCheck0_7>]>; def SVLUTI4_LANE_ZT_X2 : Inst<"svluti4_lane_zt_{d}_x2", "2.di[i", "cUcsUsiUibhf", MergeNone, "aarch64_sme_luti4_lane_zt_x2", [IsStreaming, IsInZT0], [ImmCheck<0, ImmCheck0_0>, ImmCheck<2, ImmCheck0_3>]>; } + + +// SME2p1 - FMOPA, FMOPS (non-widening) +let TargetGuard = "sme,b16b16" in { + def SVMOPA_BF16_NW : SInst<"svmopa_za16[_bf16]", "viPPdd", "b", + MergeOp1, "aarch64_sme_mopa_nonwide", + [IsStreaming, IsInOutZA], + [ImmCheck<0, ImmCheck0_1>]>; + def SVMOPS_BF16_NW : SInst<"svmops_za16[_bf16]", "viPPdd", "b", + MergeOp1, "aarch64_sme_mops_nonwide", + [IsStreaming, IsInOutZA], + [ImmCheck<0, ImmCheck0_1>]>; +} + +let TargetGuard = "sme2,sme-f16f16" in { + def SVMOPA_F16_NW : SInst<"svmopa_za16[_f16]", "viPPdd", "h", +MergeOp1, "aarch64_sme_mopa_nonwide", momchil-velikov wrote: In fact we can reuse the existing `aarch64_sme_mopa` which is used for other non-widening operations and since they are polymorphic and non-widening the instantiation type is enough to disambiguate the operation. https://github.com/llvm/llvm-project/pull/88105 ___ cfe-commits mailing list cfe-commits@lists.llvm.org https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits
[clang] [llvm] [AArch64] Add intrinsics for non-widening FMOPA/FMOPS (PR #88105)
@@ -674,3 +674,27 @@ let TargetGuard = "sme2" in { def SVLUTI2_LANE_ZT_X2 : Inst<"svluti2_lane_zt_{d}_x2", "2.di[i", "cUcsUsiUibhf", MergeNone, "aarch64_sme_luti2_lane_zt_x2", [IsStreaming, IsInZT0], [ImmCheck<0, ImmCheck0_0>, ImmCheck<2, ImmCheck0_7>]>; def SVLUTI4_LANE_ZT_X2 : Inst<"svluti4_lane_zt_{d}_x2", "2.di[i", "cUcsUsiUibhf", MergeNone, "aarch64_sme_luti4_lane_zt_x2", [IsStreaming, IsInZT0], [ImmCheck<0, ImmCheck0_0>, ImmCheck<2, ImmCheck0_3>]>; } + + +// SME2p1 - FMOPA, FMOPS (non-widening) +let TargetGuard = "sme,b16b16" in { + def SVMOPA_BF16_NW : SInst<"svmopa_za16[_bf16]", "viPPdd", "b", + MergeOp1, "aarch64_sme_mopa_nonwide", momchil-velikov wrote: Done https://github.com/llvm/llvm-project/pull/88105 ___ cfe-commits mailing list cfe-commits@lists.llvm.org https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits
[clang] [llvm] [AArch64] Add intrinsics for non-widening FMOPA/FMOPS (PR #88105)
https://github.com/momchil-velikov updated https://github.com/llvm/llvm-project/pull/88105 >From 2b0befb9078f8c9116ad52be937c8722045708ef Mon Sep 17 00:00:00 2001 From: Momchil Velikov Date: Tue, 9 Apr 2024 10:52:41 +0100 Subject: [PATCH] [AArch64] Add intrinsics for non-widening FMOPA/FMOPS According to the specification in https://github.com/ARM-software/acle/pull/309 this adds the intrinsics void svmopa_za16[_f16]_m(uint64_t tile, svbool_t pn, svbool_t pm, svfloat16_t zn, svfloat16_t zm) __arm_streaming __arm_inout("za"); void svmops_za16[_f16]_m(uint64_t tile, svbool_t pn, svbool_t pm, svfloat16_t zn, svfloat16_t zm) __arm_streaming __arm_inout("za"); as well as the corresponding `bf16` variants. --- clang/include/clang/Basic/arm_sme.td | 24 + .../acle_sme2_mopa_nonwide.c | 97 +++ .../acle_sme2_mopa_nonwide.c | 34 +++ llvm/include/llvm/IR/IntrinsicsAArch64.td | 5 +- .../lib/Target/AArch64/AArch64SMEInstrInfo.td | 10 +- llvm/lib/Target/AArch64/SMEInstrFormats.td| 16 ++- .../CodeGen/AArch64/sme2-intrinsics-mopa.ll | 42 7 files changed, 220 insertions(+), 8 deletions(-) create mode 100644 clang/test/CodeGen/aarch64-sme2-intrinsics/acle_sme2_mopa_nonwide.c create mode 100644 clang/test/Sema/aarch64-sme2-intrinsics/acle_sme2_mopa_nonwide.c create mode 100644 llvm/test/CodeGen/AArch64/sme2-intrinsics-mopa.ll diff --git a/clang/include/clang/Basic/arm_sme.td b/clang/include/clang/Basic/arm_sme.td index 1ac6d5170ea283..a18a5094a15ed3 100644 --- a/clang/include/clang/Basic/arm_sme.td +++ b/clang/include/clang/Basic/arm_sme.td @@ -674,3 +674,27 @@ let TargetGuard = "sme2" in { def SVLUTI2_LANE_ZT_X2 : Inst<"svluti2_lane_zt_{d}_x2", "2.di[i", "cUcsUsiUibhf", MergeNone, "aarch64_sme_luti2_lane_zt_x2", [IsStreaming, IsInZT0], [ImmCheck<0, ImmCheck0_0>, ImmCheck<2, ImmCheck0_7>]>; def SVLUTI4_LANE_ZT_X2 : Inst<"svluti4_lane_zt_{d}_x2", "2.di[i", "cUcsUsiUibhf", MergeNone, "aarch64_sme_luti4_lane_zt_x2", [IsStreaming, IsInZT0], [ImmCheck<0, ImmCheck0_0>, ImmCheck<2, ImmCheck0_3>]>; } + + +// SME2p1 - FMOPA, FMOPS (non-widening) +let TargetGuard = "sme2,b16b16" in { + def SVMOPA_BF16_NW : SInst<"svmopa_za16[_bf16]_m", "viPPdd", "b", + MergeNone, "aarch64_sme_mopa", + [IsStreaming, IsInOutZA], + [ImmCheck<0, ImmCheck0_1>]>; + def SVMOPS_BF16_NW : SInst<"svmops_za16[_bf16]_m", "viPPdd", "b", + MergeNone, "aarch64_sme_mops", + [IsStreaming, IsInOutZA], + [ImmCheck<0, ImmCheck0_1>]>; +} + +let TargetGuard = "sme2p1,sme-f16f16" in { + def SVMOPA_F16_NW : SInst<"svmopa_za16[_f16]_m", "viPPdd", "h", +MergeNone, "aarch64_sme_mopa", +[IsStreaming, IsInOutZA], +[ImmCheck<0, ImmCheck0_1>]>; + def SVMOPS_F16_NW : SInst<"svmops_za16[_f16]_m", "viPPdd", "h", +MergeNone, "aarch64_sme_mops", +[IsStreaming, IsInOutZA], +[ImmCheck<0, ImmCheck0_1>]>; +} diff --git a/clang/test/CodeGen/aarch64-sme2-intrinsics/acle_sme2_mopa_nonwide.c b/clang/test/CodeGen/aarch64-sme2-intrinsics/acle_sme2_mopa_nonwide.c new file mode 100644 index 00..40fcad6a576483 --- /dev/null +++ b/clang/test/CodeGen/aarch64-sme2-intrinsics/acle_sme2_mopa_nonwide.c @@ -0,0 +1,97 @@ +// NOTE: Assertions have been autogenerated by utils/update_cc_test_checks.py UTC_ARGS: --version 4 +// RUN: %clang_cc1 -fclang-abi-compat=latest -triple aarch64-none-linux-gnu -target-feature +sme2p1 -target-feature +b16b16 -target-feature +sme-f16f16 -S -O2 -Werror -emit-llvm -o - %s | FileCheck %s -check-prefixes=CHECK +// RUN: %clang_cc1-x c++ -fclang-abi-compat=latest -triple aarch64-none-linux-gnu -target-feature +sme2p1 -target-feature +b16b16 -target-feature +sme-f16f16 -S -O2 -Werror -emit-llvm -o - %s | FileCheck %s -check-prefixes=CHECK-CXX +// RUN: %clang_cc1 -DSME_OVERLOADED_FORMS-fclang-abi-compat=latest -triple aarch64-none-linux-gnu -target-feature +sme2p1 -target-feature +b16b16 -target-feature +sme-f16f16 -S -O2 -Werror -emit-llvm -o - %s | FileCheck %s -check-prefixes=CHECK +// RUN: %clang_cc1 -DSME_OVERLOADED_FORMS -x c++ -fclang-abi-compat=latest -triple aarch64-none-linux-gnu -target-feature +sme2p1 -target-feature +b16b16 -target-feature +sme-f16f16 -S -O2 -Werror -emit-llvm -o - %s | FileCheck %s -check-prefixes=CHECK-CXX + +// RUN: %clang_cc1 -DSME_OVERLOADED_FORMS -x c++ -fclang-abi-compat=latest -triple aarch64-none-linux-gnu -target-feature +sme2p1
[clang] [llvm] [AArch64] Add intrinsics for non-widening FMOPA/FMOPS (PR #88105)
@@ -815,8 +815,8 @@ defm FMLS_VG4_M4Z2Z_H : sme2_dot_mla_add_sub_array_vg4_multi<"fmls", 0b0100011, defm FCVT_2ZZ_H : sme2p1_fp_cvt_vector_vg2_single<"fcvt", 0b0>; defm FCVTL_2ZZ_H : sme2p1_fp_cvt_vector_vg2_single<"fcvtl", 0b1>; -defm FMOPA_MPPZZ_H : sme2p1_fmop_tile_fp16<"fmopa", 0b0, 0b0, 0b11, ZPR16>; -defm FMOPS_MPPZZ_H : sme2p1_fmop_tile_fp16<"fmops", 0b0, 0b1, 0b11, ZPR16>; +defm FMOPA_MPPZZ_H : sme2p1_fmop_tile_fp16<"fmopa", 0b0, 0b0, 0b11, nxv8f16, int_aarch64_sme_mopa_nonwide>; CarolineConcatto wrote: Not in this patch, but I think all the instructions under `let Predicates = [HasSME2p1, HasSMEF16F16]`, should be under ` let Predicates = [HasSME2, HasSMEF16F16].` to be consistent with clang Otherwise we will have some problem with clang( clang has sme2 and the backend has sme2.1. TBH when I look at [FEAT_SME_F16F16](https://developer.arm.com/documentation/ddi0602/2024-03/SME-Instructions/FMOPS--non-widening---Floating-point-outer-product-and-subtract-?lang=en) It maybe only need FEAT_SME_F16F16, like defm USMOPA_MPPZZ_D : sme_int_outer_product_i64<0b100, "usmopa", int_aarch64_sme_usmopa_wide>; https://github.com/llvm/llvm-project/pull/88105 ___ cfe-commits mailing list cfe-commits@lists.llvm.org https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits
[clang] [llvm] [AArch64] Add intrinsics for non-widening FMOPA/FMOPS (PR #88105)
@@ -674,3 +674,27 @@ let TargetGuard = "sme2" in { def SVLUTI2_LANE_ZT_X2 : Inst<"svluti2_lane_zt_{d}_x2", "2.di[i", "cUcsUsiUibhf", MergeNone, "aarch64_sme_luti2_lane_zt_x2", [IsStreaming, IsInZT0], [ImmCheck<0, ImmCheck0_0>, ImmCheck<2, ImmCheck0_7>]>; def SVLUTI4_LANE_ZT_X2 : Inst<"svluti4_lane_zt_{d}_x2", "2.di[i", "cUcsUsiUibhf", MergeNone, "aarch64_sme_luti4_lane_zt_x2", [IsStreaming, IsInZT0], [ImmCheck<0, ImmCheck0_0>, ImmCheck<2, ImmCheck0_3>]>; } + + +// SME2p1 - FMOPA, FMOPS (non-widening) +let TargetGuard = "sme,b16b16" in { + def SVMOPA_BF16_NW : SInst<"svmopa_za16[_bf16]", "viPPdd", "b", + MergeOp1, "aarch64_sme_mopa_nonwide", + [IsStreaming, IsInOutZA], + [ImmCheck<0, ImmCheck0_1>]>; + def SVMOPS_BF16_NW : SInst<"svmops_za16[_bf16]", "viPPdd", "b", + MergeOp1, "aarch64_sme_mops_nonwide", + [IsStreaming, IsInOutZA], + [ImmCheck<0, ImmCheck0_1>]>; +} + +let TargetGuard = "sme2,sme-f16f16" in { + def SVMOPA_F16_NW : SInst<"svmopa_za16[_f16]", "viPPdd", "h", +MergeOp1, "aarch64_sme_mopa_nonwide", CarolineConcatto wrote: Can you replace: aarch64_sme_mopa_ by aarch64_sme_mopa_za16? https://github.com/llvm/llvm-project/pull/88105 ___ cfe-commits mailing list cfe-commits@lists.llvm.org https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits
[clang] [llvm] [AArch64] Add intrinsics for non-widening FMOPA/FMOPS (PR #88105)
@@ -674,3 +674,27 @@ let TargetGuard = "sme2" in { def SVLUTI2_LANE_ZT_X2 : Inst<"svluti2_lane_zt_{d}_x2", "2.di[i", "cUcsUsiUibhf", MergeNone, "aarch64_sme_luti2_lane_zt_x2", [IsStreaming, IsInZT0], [ImmCheck<0, ImmCheck0_0>, ImmCheck<2, ImmCheck0_7>]>; def SVLUTI4_LANE_ZT_X2 : Inst<"svluti4_lane_zt_{d}_x2", "2.di[i", "cUcsUsiUibhf", MergeNone, "aarch64_sme_luti4_lane_zt_x2", [IsStreaming, IsInZT0], [ImmCheck<0, ImmCheck0_0>, ImmCheck<2, ImmCheck0_3>]>; } + + +// SME2p1 - FMOPA, FMOPS (non-widening) +let TargetGuard = "sme,b16b16" in { CarolineConcatto wrote: I think it should be "sme2,b16b16" if we see the BFMOPA https://developer.arm.com/documentation/ddi0602/2024-03/SME-Instructions/BFMOPA--non-widening---BFloat16-floating-point-outer-product-and-accumulate-?lang=en if !IsFeatureImplemented(FEAT_SME2) || !IsFeatureImplemented(FEAT_SVE_B16B16) then UNDEFINED; https://github.com/llvm/llvm-project/pull/88105 ___ cfe-commits mailing list cfe-commits@lists.llvm.org https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits
[clang] [llvm] [AArch64] Add intrinsics for non-widening FMOPA/FMOPS (PR #88105)
@@ -674,3 +674,27 @@ let TargetGuard = "sme2" in { def SVLUTI2_LANE_ZT_X2 : Inst<"svluti2_lane_zt_{d}_x2", "2.di[i", "cUcsUsiUibhf", MergeNone, "aarch64_sme_luti2_lane_zt_x2", [IsStreaming, IsInZT0], [ImmCheck<0, ImmCheck0_0>, ImmCheck<2, ImmCheck0_7>]>; def SVLUTI4_LANE_ZT_X2 : Inst<"svluti4_lane_zt_{d}_x2", "2.di[i", "cUcsUsiUibhf", MergeNone, "aarch64_sme_luti4_lane_zt_x2", [IsStreaming, IsInZT0], [ImmCheck<0, ImmCheck0_0>, ImmCheck<2, ImmCheck0_3>]>; } + + +// SME2p1 - FMOPA, FMOPS (non-widening) +let TargetGuard = "sme,b16b16" in { + def SVMOPA_BF16_NW : SInst<"svmopa_za16[_bf16]", "viPPdd", "b", + MergeOp1, "aarch64_sme_mopa_nonwide", CarolineConcatto wrote: Just in case: I remember a discussion that we said we could use MergeNone and in the name add _m: svmopa_za16[_bf16]_m Because MergeOp1 is only adding _m https://github.com/llvm/llvm-project/pull/88105 ___ cfe-commits mailing list cfe-commits@lists.llvm.org https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits
[clang] [llvm] [AArch64] Add intrinsics for non-widening FMOPA/FMOPS (PR #88105)
@@ -815,8 +815,8 @@ defm FMLS_VG4_M4Z2Z_H : sme2_dot_mla_add_sub_array_vg4_multi<"fmls", 0b0100011, defm FCVT_2ZZ_H : sme2p1_fp_cvt_vector_vg2_single<"fcvt", 0b0>; defm FCVTL_2ZZ_H : sme2p1_fp_cvt_vector_vg2_single<"fcvtl", 0b1>; -defm FMOPA_MPPZZ_H : sme2p1_fmop_tile_fp16<"fmopa", 0b0, 0b0, 0b11, ZPR16>; -defm FMOPS_MPPZZ_H : sme2p1_fmop_tile_fp16<"fmops", 0b0, 0b1, 0b11, ZPR16>; +defm FMOPA_MPPZZ_H : sme2p1_fmop_tile_fp16<"fmopa", 0b0, 0b0, 0b11, nxv8f16, int_aarch64_sme_mopa_nonwide>; CarolineConcatto wrote: Just in case, maybe it is better to change this to; sme2_fmop_tile_fp16 and not change the fp8 https://github.com/llvm/llvm-project/pull/88105 ___ cfe-commits mailing list cfe-commits@lists.llvm.org https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits
[clang] [llvm] [AArch64] Add intrinsics for non-widening FMOPA/FMOPS (PR #88105)
Lukacma wrote: > > I noticed that file names and file location are using sme2 as prefix. > > Shouldn't we use sme2p1 prefix for this intrinsic ? > > None of instructions seem to require `FEAT_SME2p1`: > https://developer.arm.com/documentation/ddi0602/2024-03/SME-Instructions/BFMOPA--non-widening---BFloat16-floating-point-outer-product-and-accumulate-?lang=en > > https://developer.arm.com/documentation/ddi0602/2024-03/SME-Instructions/BFMOPS--non-widening---BFloat16-floating-point-outer-product-and-subtract-?lang=en > > https://developer.arm.com/documentation/ddi0602/2024-03/SME-Instructions/FMOPA--non-widening---Floating-point-outer-product-and-accumulate-?lang=en > > https://developer.arm.com/documentation/ddi0602/2024-03/SME-Instructions/FMOPS--non-widening---Floating-point-outer-product-and-subtract-?lang=en My bad then. Thought all these new intrinsics for sme2p1 https://github.com/llvm/llvm-project/pull/88105 ___ cfe-commits mailing list cfe-commits@lists.llvm.org https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits
[clang] [llvm] [AArch64] Add intrinsics for non-widening FMOPA/FMOPS (PR #88105)
momchil-velikov wrote: > I noticed that file names and file location are using sme2 as prefix. > Shouldn't we use sme2p1 prefix for this intrinsic ? None of instructions seem to require `FEAT_SME2p1`: https://developer.arm.com/documentation/ddi0602/2024-03/SME-Instructions/BFMOPA--non-widening---BFloat16-floating-point-outer-product-and-accumulate-?lang=en https://developer.arm.com/documentation/ddi0602/2024-03/SME-Instructions/BFMOPS--non-widening---BFloat16-floating-point-outer-product-and-subtract-?lang=en https://developer.arm.com/documentation/ddi0602/2024-03/SME-Instructions/FMOPA--non-widening---Floating-point-outer-product-and-accumulate-?lang=en https://developer.arm.com/documentation/ddi0602/2024-03/SME-Instructions/FMOPS--non-widening---Floating-point-outer-product-and-subtract-?lang=en https://github.com/llvm/llvm-project/pull/88105 ___ cfe-commits mailing list cfe-commits@lists.llvm.org https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits
[clang] [llvm] [AArch64] Add intrinsics for non-widening FMOPA/FMOPS (PR #88105)
Lukacma wrote: I noticed that file names and file location are using sme2 as prefix. Shouldn't we use sme2p1 prefix for this intrinsic ? https://github.com/llvm/llvm-project/pull/88105 ___ cfe-commits mailing list cfe-commits@lists.llvm.org https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits
[clang] [llvm] [AArch64] Add intrinsics for non-widening FMOPA/FMOPS (PR #88105)
llvmbot wrote: @llvm/pr-subscribers-clang Author: Momchil Velikov (momchil-velikov) Changes According to the specification in https://github.com/ARM-software/acle/pull/309 this adds the intrinsics void svmopa_za16[_f16]_m(uint64_t tile, svbool_t pn, svbool_t pm, svfloat16_t zn, svfloat16_t zm) __arm_streaming __arm_inout("za"); void svmops_za16[_f16]_m(uint64_t tile, svbool_t pn, svbool_t pm, svfloat16_t zn, svfloat16_t zm) __arm_streaming __arm_inout("za"); as well as the corresponding `bf16` variants. --- Full diff: https://github.com/llvm/llvm-project/pull/88105.diff 7 Files Affected: - (modified) clang/include/clang/Basic/arm_sme.td (+24) - (added) clang/test/CodeGen/aarch64-sme2-intrinsics/acle_sme2_mopa_nonwide.c (+97) - (added) clang/test/Sema/aarch64-sme2-intrinsics/acle_sme2_mopa_nonwide.c (+34) - (modified) llvm/include/llvm/IR/IntrinsicsAArch64.td (+4-1) - (modified) llvm/lib/Target/AArch64/AArch64SMEInstrInfo.td (+5-5) - (modified) llvm/lib/Target/AArch64/SMEInstrFormats.td (+14-2) - (added) llvm/test/CodeGen/AArch64/sme2-intrinsics-mopa.ll (+42) ``diff diff --git a/clang/include/clang/Basic/arm_sme.td b/clang/include/clang/Basic/arm_sme.td index 1ac6d5170ea283..e60a400b094850 100644 --- a/clang/include/clang/Basic/arm_sme.td +++ b/clang/include/clang/Basic/arm_sme.td @@ -674,3 +674,27 @@ let TargetGuard = "sme2" in { def SVLUTI2_LANE_ZT_X2 : Inst<"svluti2_lane_zt_{d}_x2", "2.di[i", "cUcsUsiUibhf", MergeNone, "aarch64_sme_luti2_lane_zt_x2", [IsStreaming, IsInZT0], [ImmCheck<0, ImmCheck0_0>, ImmCheck<2, ImmCheck0_7>]>; def SVLUTI4_LANE_ZT_X2 : Inst<"svluti4_lane_zt_{d}_x2", "2.di[i", "cUcsUsiUibhf", MergeNone, "aarch64_sme_luti4_lane_zt_x2", [IsStreaming, IsInZT0], [ImmCheck<0, ImmCheck0_0>, ImmCheck<2, ImmCheck0_3>]>; } + + +// SME2p1 - FMOPA, FMOPS (non-widening) +let TargetGuard = "sme,b16b16" in { + def SVMOPA_BF16_NW : SInst<"svmopa_za16[_bf16]", "viPPdd", "b", + MergeOp1, "aarch64_sme_mopa_nonwide", + [IsStreaming, IsInOutZA], + [ImmCheck<0, ImmCheck0_1>]>; + def SVMOPS_BF16_NW : SInst<"svmops_za16[_bf16]", "viPPdd", "b", + MergeOp1, "aarch64_sme_mops_nonwide", + [IsStreaming, IsInOutZA], + [ImmCheck<0, ImmCheck0_1>]>; +} + +let TargetGuard = "sme2,sme-f16f16" in { + def SVMOPA_F16_NW : SInst<"svmopa_za16[_f16]", "viPPdd", "h", +MergeOp1, "aarch64_sme_mopa_nonwide", +[IsStreaming, IsInOutZA], +[ImmCheck<0, ImmCheck0_1>]>; + def SVMOPS_F16_NW : SInst<"svmops_za16[_f16]", "viPPdd", "h", +MergeOp1, "aarch64_sme_mops_nonwide", +[IsStreaming, IsInOutZA], +[ImmCheck<0, ImmCheck0_1>]>; +} diff --git a/clang/test/CodeGen/aarch64-sme2-intrinsics/acle_sme2_mopa_nonwide.c b/clang/test/CodeGen/aarch64-sme2-intrinsics/acle_sme2_mopa_nonwide.c new file mode 100644 index 00..36a75609534653 --- /dev/null +++ b/clang/test/CodeGen/aarch64-sme2-intrinsics/acle_sme2_mopa_nonwide.c @@ -0,0 +1,97 @@ +// NOTE: Assertions have been autogenerated by utils/update_cc_test_checks.py UTC_ARGS: --version 4 +// RUN: %clang_cc1 -fclang-abi-compat=latest -triple aarch64-none-linux-gnu -target-feature +sme2p1 -target-feature +b16b16 -target-feature +sme-f16f16 -S -O2 -Werror -emit-llvm -o - %s | FileCheck %s -check-prefixes=CHECK +// RUN: %clang_cc1-x c++ -fclang-abi-compat=latest -triple aarch64-none-linux-gnu -target-feature +sme2p1 -target-feature +b16b16 -target-feature +sme-f16f16 -S -O2 -Werror -emit-llvm -o - %s | FileCheck %s -check-prefixes=CHECK-CXX +// RUN: %clang_cc1 -DSME_OVERLOADED_FORMS-fclang-abi-compat=latest -triple aarch64-none-linux-gnu -target-feature +sme2p1 -target-feature +b16b16 -target-feature +sme-f16f16 -S -O2 -Werror -emit-llvm -o - %s | FileCheck %s -check-prefixes=CHECK +// RUN: %clang_cc1 -DSME_OVERLOADED_FORMS -x c++ -fclang-abi-compat=latest -triple aarch64-none-linux-gnu -target-feature +sme2p1 -target-feature +b16b16 -target-feature +sme-f16f16 -S -O2 -Werror -emit-llvm -o - %s | FileCheck %s -check-prefixes=CHECK-CXX + +// RUN: %clang_cc1 -DSME_OVERLOADED_FORMS -x c++ -fclang-abi-compat=latest -triple aarch64-none-linux-gnu -target-feature +sme2p1 -target-feature +b16b16 -target-feature +sme-f16f16 -S -O2 -Werror -o /dev/null %s + +// REQUIRES: aarch64-registered-target + +#include + +#ifdef SME_OVERLOADED_FORMS +#define SME_ACLE_FUNC(A1,A2_UNUSED,A3) A1##A3 +#else +#define SME_ACLE_FUNC(A1,A2,A3) A1##A2##A3 +#endif + +// CHECK-LABEL: define dso_local
[clang] [llvm] [AArch64] Add intrinsics for non-widening FMOPA/FMOPS (PR #88105)
llvmbot wrote: @llvm/pr-subscribers-llvm-ir Author: Momchil Velikov (momchil-velikov) Changes According to the specification in https://github.com/ARM-software/acle/pull/309 this adds the intrinsics void svmopa_za16[_f16]_m(uint64_t tile, svbool_t pn, svbool_t pm, svfloat16_t zn, svfloat16_t zm) __arm_streaming __arm_inout("za"); void svmops_za16[_f16]_m(uint64_t tile, svbool_t pn, svbool_t pm, svfloat16_t zn, svfloat16_t zm) __arm_streaming __arm_inout("za"); as well as the corresponding `bf16` variants. --- Full diff: https://github.com/llvm/llvm-project/pull/88105.diff 7 Files Affected: - (modified) clang/include/clang/Basic/arm_sme.td (+24) - (added) clang/test/CodeGen/aarch64-sme2-intrinsics/acle_sme2_mopa_nonwide.c (+97) - (added) clang/test/Sema/aarch64-sme2-intrinsics/acle_sme2_mopa_nonwide.c (+34) - (modified) llvm/include/llvm/IR/IntrinsicsAArch64.td (+4-1) - (modified) llvm/lib/Target/AArch64/AArch64SMEInstrInfo.td (+5-5) - (modified) llvm/lib/Target/AArch64/SMEInstrFormats.td (+14-2) - (added) llvm/test/CodeGen/AArch64/sme2-intrinsics-mopa.ll (+42) ``diff diff --git a/clang/include/clang/Basic/arm_sme.td b/clang/include/clang/Basic/arm_sme.td index 1ac6d5170ea283..e60a400b094850 100644 --- a/clang/include/clang/Basic/arm_sme.td +++ b/clang/include/clang/Basic/arm_sme.td @@ -674,3 +674,27 @@ let TargetGuard = "sme2" in { def SVLUTI2_LANE_ZT_X2 : Inst<"svluti2_lane_zt_{d}_x2", "2.di[i", "cUcsUsiUibhf", MergeNone, "aarch64_sme_luti2_lane_zt_x2", [IsStreaming, IsInZT0], [ImmCheck<0, ImmCheck0_0>, ImmCheck<2, ImmCheck0_7>]>; def SVLUTI4_LANE_ZT_X2 : Inst<"svluti4_lane_zt_{d}_x2", "2.di[i", "cUcsUsiUibhf", MergeNone, "aarch64_sme_luti4_lane_zt_x2", [IsStreaming, IsInZT0], [ImmCheck<0, ImmCheck0_0>, ImmCheck<2, ImmCheck0_3>]>; } + + +// SME2p1 - FMOPA, FMOPS (non-widening) +let TargetGuard = "sme,b16b16" in { + def SVMOPA_BF16_NW : SInst<"svmopa_za16[_bf16]", "viPPdd", "b", + MergeOp1, "aarch64_sme_mopa_nonwide", + [IsStreaming, IsInOutZA], + [ImmCheck<0, ImmCheck0_1>]>; + def SVMOPS_BF16_NW : SInst<"svmops_za16[_bf16]", "viPPdd", "b", + MergeOp1, "aarch64_sme_mops_nonwide", + [IsStreaming, IsInOutZA], + [ImmCheck<0, ImmCheck0_1>]>; +} + +let TargetGuard = "sme2,sme-f16f16" in { + def SVMOPA_F16_NW : SInst<"svmopa_za16[_f16]", "viPPdd", "h", +MergeOp1, "aarch64_sme_mopa_nonwide", +[IsStreaming, IsInOutZA], +[ImmCheck<0, ImmCheck0_1>]>; + def SVMOPS_F16_NW : SInst<"svmops_za16[_f16]", "viPPdd", "h", +MergeOp1, "aarch64_sme_mops_nonwide", +[IsStreaming, IsInOutZA], +[ImmCheck<0, ImmCheck0_1>]>; +} diff --git a/clang/test/CodeGen/aarch64-sme2-intrinsics/acle_sme2_mopa_nonwide.c b/clang/test/CodeGen/aarch64-sme2-intrinsics/acle_sme2_mopa_nonwide.c new file mode 100644 index 00..36a75609534653 --- /dev/null +++ b/clang/test/CodeGen/aarch64-sme2-intrinsics/acle_sme2_mopa_nonwide.c @@ -0,0 +1,97 @@ +// NOTE: Assertions have been autogenerated by utils/update_cc_test_checks.py UTC_ARGS: --version 4 +// RUN: %clang_cc1 -fclang-abi-compat=latest -triple aarch64-none-linux-gnu -target-feature +sme2p1 -target-feature +b16b16 -target-feature +sme-f16f16 -S -O2 -Werror -emit-llvm -o - %s | FileCheck %s -check-prefixes=CHECK +// RUN: %clang_cc1-x c++ -fclang-abi-compat=latest -triple aarch64-none-linux-gnu -target-feature +sme2p1 -target-feature +b16b16 -target-feature +sme-f16f16 -S -O2 -Werror -emit-llvm -o - %s | FileCheck %s -check-prefixes=CHECK-CXX +// RUN: %clang_cc1 -DSME_OVERLOADED_FORMS-fclang-abi-compat=latest -triple aarch64-none-linux-gnu -target-feature +sme2p1 -target-feature +b16b16 -target-feature +sme-f16f16 -S -O2 -Werror -emit-llvm -o - %s | FileCheck %s -check-prefixes=CHECK +// RUN: %clang_cc1 -DSME_OVERLOADED_FORMS -x c++ -fclang-abi-compat=latest -triple aarch64-none-linux-gnu -target-feature +sme2p1 -target-feature +b16b16 -target-feature +sme-f16f16 -S -O2 -Werror -emit-llvm -o - %s | FileCheck %s -check-prefixes=CHECK-CXX + +// RUN: %clang_cc1 -DSME_OVERLOADED_FORMS -x c++ -fclang-abi-compat=latest -triple aarch64-none-linux-gnu -target-feature +sme2p1 -target-feature +b16b16 -target-feature +sme-f16f16 -S -O2 -Werror -o /dev/null %s + +// REQUIRES: aarch64-registered-target + +#include + +#ifdef SME_OVERLOADED_FORMS +#define SME_ACLE_FUNC(A1,A2_UNUSED,A3) A1##A3 +#else +#define SME_ACLE_FUNC(A1,A2,A3) A1##A2##A3 +#endif + +// CHECK-LABEL: define dso_local
[clang] [llvm] [AArch64] Add intrinsics for non-widening FMOPA/FMOPS (PR #88105)
https://github.com/momchil-velikov created https://github.com/llvm/llvm-project/pull/88105 According to the specification in https://github.com/ARM-software/acle/pull/309 this adds the intrinsics void svmopa_za16[_f16]_m(uint64_t tile, svbool_t pn, svbool_t pm, svfloat16_t zn, svfloat16_t zm) __arm_streaming __arm_inout("za"); void svmops_za16[_f16]_m(uint64_t tile, svbool_t pn, svbool_t pm, svfloat16_t zn, svfloat16_t zm) __arm_streaming __arm_inout("za"); as well as the corresponding `bf16` variants. >From ee78ad565158c2d1301265415992511ea559e7a6 Mon Sep 17 00:00:00 2001 From: Momchil Velikov Date: Tue, 9 Apr 2024 10:52:41 +0100 Subject: [PATCH] [AArch64] Add intrinsics for non-widening FMOPA/FMOPS According to the specification in https://github.com/ARM-software/acle/pull/309 this adds the intrinsics void svmopa_za16[_f16]_m(uint64_t tile, svbool_t pn, svbool_t pm, svfloat16_t zn, svfloat16_t zm) __arm_streaming __arm_inout("za"); void svmops_za16[_f16]_m(uint64_t tile, svbool_t pn, svbool_t pm, svfloat16_t zn, svfloat16_t zm) __arm_streaming __arm_inout("za"); as well as the corresponding `bf16` variants. --- clang/include/clang/Basic/arm_sme.td | 24 + .../acle_sme2_mopa_nonwide.c | 97 +++ .../acle_sme2_mopa_nonwide.c | 34 +++ llvm/include/llvm/IR/IntrinsicsAArch64.td | 5 +- .../lib/Target/AArch64/AArch64SMEInstrInfo.td | 10 +- llvm/lib/Target/AArch64/SMEInstrFormats.td| 16 ++- .../CodeGen/AArch64/sme2-intrinsics-mopa.ll | 42 7 files changed, 220 insertions(+), 8 deletions(-) create mode 100644 clang/test/CodeGen/aarch64-sme2-intrinsics/acle_sme2_mopa_nonwide.c create mode 100644 clang/test/Sema/aarch64-sme2-intrinsics/acle_sme2_mopa_nonwide.c create mode 100644 llvm/test/CodeGen/AArch64/sme2-intrinsics-mopa.ll diff --git a/clang/include/clang/Basic/arm_sme.td b/clang/include/clang/Basic/arm_sme.td index 1ac6d5170ea283..e60a400b094850 100644 --- a/clang/include/clang/Basic/arm_sme.td +++ b/clang/include/clang/Basic/arm_sme.td @@ -674,3 +674,27 @@ let TargetGuard = "sme2" in { def SVLUTI2_LANE_ZT_X2 : Inst<"svluti2_lane_zt_{d}_x2", "2.di[i", "cUcsUsiUibhf", MergeNone, "aarch64_sme_luti2_lane_zt_x2", [IsStreaming, IsInZT0], [ImmCheck<0, ImmCheck0_0>, ImmCheck<2, ImmCheck0_7>]>; def SVLUTI4_LANE_ZT_X2 : Inst<"svluti4_lane_zt_{d}_x2", "2.di[i", "cUcsUsiUibhf", MergeNone, "aarch64_sme_luti4_lane_zt_x2", [IsStreaming, IsInZT0], [ImmCheck<0, ImmCheck0_0>, ImmCheck<2, ImmCheck0_3>]>; } + + +// SME2p1 - FMOPA, FMOPS (non-widening) +let TargetGuard = "sme,b16b16" in { + def SVMOPA_BF16_NW : SInst<"svmopa_za16[_bf16]", "viPPdd", "b", + MergeOp1, "aarch64_sme_mopa_nonwide", + [IsStreaming, IsInOutZA], + [ImmCheck<0, ImmCheck0_1>]>; + def SVMOPS_BF16_NW : SInst<"svmops_za16[_bf16]", "viPPdd", "b", + MergeOp1, "aarch64_sme_mops_nonwide", + [IsStreaming, IsInOutZA], + [ImmCheck<0, ImmCheck0_1>]>; +} + +let TargetGuard = "sme2,sme-f16f16" in { + def SVMOPA_F16_NW : SInst<"svmopa_za16[_f16]", "viPPdd", "h", +MergeOp1, "aarch64_sme_mopa_nonwide", +[IsStreaming, IsInOutZA], +[ImmCheck<0, ImmCheck0_1>]>; + def SVMOPS_F16_NW : SInst<"svmops_za16[_f16]", "viPPdd", "h", +MergeOp1, "aarch64_sme_mops_nonwide", +[IsStreaming, IsInOutZA], +[ImmCheck<0, ImmCheck0_1>]>; +} diff --git a/clang/test/CodeGen/aarch64-sme2-intrinsics/acle_sme2_mopa_nonwide.c b/clang/test/CodeGen/aarch64-sme2-intrinsics/acle_sme2_mopa_nonwide.c new file mode 100644 index 00..36a75609534653 --- /dev/null +++ b/clang/test/CodeGen/aarch64-sme2-intrinsics/acle_sme2_mopa_nonwide.c @@ -0,0 +1,97 @@ +// NOTE: Assertions have been autogenerated by utils/update_cc_test_checks.py UTC_ARGS: --version 4 +// RUN: %clang_cc1 -fclang-abi-compat=latest -triple aarch64-none-linux-gnu -target-feature +sme2p1 -target-feature +b16b16 -target-feature +sme-f16f16 -S -O2 -Werror -emit-llvm -o - %s | FileCheck %s -check-prefixes=CHECK +// RUN: %clang_cc1-x c++ -fclang-abi-compat=latest -triple aarch64-none-linux-gnu -target-feature +sme2p1 -target-feature +b16b16 -target-feature +sme-f16f16 -S -O2 -Werror -emit-llvm -o - %s | FileCheck %s -check-prefixes=CHECK-CXX +// RUN: %clang_cc1 -DSME_OVERLOADED_FORMS-fclang-abi-compat=latest -triple aarch64-none-linux-gnu -target-feature +sme2p1