[clang] [llvm] [AArch64] Implement NEON vamin/vamax intrinsics (PR #99041)
@@ -5985,6 +5985,26 @@ multiclass SIMDThreeSameVectorFP opc, [(set (v2f64 V128:$Rd), (OpNode (v2f64 V128:$Rn), (v2f64 V128:$Rm)))]>; } +let mayRaiseFPException = 1, Uses = [FPCR] in +multiclass SIMDThreeVectorFP opc, momchil-velikov wrote: Removed. https://github.com/llvm/llvm-project/pull/99041 ___ cfe-commits mailing list cfe-commits@lists.llvm.org https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits
[clang] [llvm] [AArch64] Implement NEON vamin/vamax intrinsics (PR #99041)
https://github.com/momchil-velikov updated https://github.com/llvm/llvm-project/pull/99041 >From 8e0aba5bcfd0a5f861c9ebb30a28c05eb0d6dcf5 Mon Sep 17 00:00:00 2001 From: Momchil Velikov Date: Mon, 15 Jul 2024 17:50:43 +0100 Subject: [PATCH 1/2] [AArch64] Implement NEON vamin/vamax intrinsics This patch implements the intrinsics of the form floatNxM_t vamin[q]_fN(floatNxM_t vn, floatNxM_t vm); floatNxM_t vamax[q]_fN(floatNxM_t vn, floatNxM_t vm); as defined in https://github.com/ARM-software/acle/pull/324 Co-authored-by: Hassnaa Hamdi --- clang/include/clang/Basic/arm_neon.td | 5 + clang/lib/CodeGen/CGBuiltin.cpp | 17 +++ .../aarch64-neon-faminmax-intrinsics.c| 112 ++ llvm/include/llvm/IR/IntrinsicsAArch64.td | 3 + .../lib/Target/AArch64/AArch64InstrFormats.td | 20 llvm/lib/Target/AArch64/AArch64InstrInfo.td | 4 +- llvm/test/CodeGen/AArch64/neon-famin-famax.ll | 96 +++ 7 files changed, 255 insertions(+), 2 deletions(-) create mode 100644 clang/test/CodeGen/aarch64-neon-faminmax-intrinsics.c create mode 100644 llvm/test/CodeGen/AArch64/neon-famin-famax.ll diff --git a/clang/include/clang/Basic/arm_neon.td b/clang/include/clang/Basic/arm_neon.td index 6390ba3f9fe5e..3746b3667ad99 100644 --- a/clang/include/clang/Basic/arm_neon.td +++ b/clang/include/clang/Basic/arm_neon.td @@ -2096,3 +2096,8 @@ let ArchGuard = "defined(__aarch64__) || defined(__arm64ec__)", TargetGuard = "r def VLDAP1_LANE : WInst<"vldap1_lane", ".(c*!).I", "QUlQlUlldQdPlQPl">; def VSTL1_LANE : WInst<"vstl1_lane", "v*(.!)I", "QUlQlUlldQdPlQPl">; } + +let ArchGuard = "defined(__aarch64__)", TargetGuard = "faminmax" in { + def FAMIN : WInst<"vamin", "...", "fhQdQfQh">; + def FAMAX : WInst<"vamax", "...", "fhQdQfQh">; +} diff --git a/clang/lib/CodeGen/CGBuiltin.cpp b/clang/lib/CodeGen/CGBuiltin.cpp index a54fa7bf87aad..bb6094aa31805 100644 --- a/clang/lib/CodeGen/CGBuiltin.cpp +++ b/clang/lib/CodeGen/CGBuiltin.cpp @@ -13398,6 +13398,23 @@ Value *CodeGenFunction::EmitAArch64BuiltinExpr(unsigned BuiltinID, Int = Intrinsic::aarch64_neon_suqadd; return EmitNeonCall(CGM.getIntrinsic(Int, Ty), Ops, "vuqadd"); } + + case NEON::BI__builtin_neon_vamin_f16: + case NEON::BI__builtin_neon_vaminq_f16: + case NEON::BI__builtin_neon_vamin_f32: + case NEON::BI__builtin_neon_vaminq_f32: + case NEON::BI__builtin_neon_vaminq_f64: { +Int = Intrinsic::aarch64_neon_famin; +return EmitNeonCall(CGM.getIntrinsic(Int, Ty), Ops, "famin"); + } + case NEON::BI__builtin_neon_vamax_f16: + case NEON::BI__builtin_neon_vamaxq_f16: + case NEON::BI__builtin_neon_vamax_f32: + case NEON::BI__builtin_neon_vamaxq_f32: + case NEON::BI__builtin_neon_vamaxq_f64: { +Int = Intrinsic::aarch64_neon_famax; +return EmitNeonCall(CGM.getIntrinsic(Int, Ty), Ops, "famax"); + } } } diff --git a/clang/test/CodeGen/aarch64-neon-faminmax-intrinsics.c b/clang/test/CodeGen/aarch64-neon-faminmax-intrinsics.c new file mode 100644 index 0..631e9738b85c5 --- /dev/null +++ b/clang/test/CodeGen/aarch64-neon-faminmax-intrinsics.c @@ -0,0 +1,112 @@ +// NOTE: Assertions have been autogenerated by utils/update_cc_test_checks.py UTC_ARGS: --version 5 +#include + +// RUN: %clang_cc1 -triple aarch64-none-linux-gnu -target-feature +neon -target-feature +faminmax -O3 -emit-llvm -o - %s | FileCheck %s +// RUN: %clang_cc1 -triple aarch64-none-linux-gnu -target-feature +neon -target-feature +faminmax -S -O3 -Werror -Wall -o /dev/null %s + +// CHECK-LABEL: define dso_local <4 x half> @test_vamin_f16( +// CHECK-SAME: <4 x half> noundef [[VN:%.*]], <4 x half> noundef [[VM:%.*]]) local_unnamed_addr #[[ATTR0:[0-9]+]] { +// CHECK-NEXT: [[ENTRY:.*:]] +// CHECK-NEXT:[[FAMIN2_I:%.*]] = tail call <4 x half> @llvm.aarch64.neon.famin.v4f16(<4 x half> [[VN]], <4 x half> [[VM]]) +// CHECK-NEXT:ret <4 x half> [[FAMIN2_I]] +// +float16x4_t test_vamin_f16(float16x4_t vn, float16x4_t vm) { + return vamin_f16(vn, vm); +} + +// CHECK-LABEL: define dso_local <8 x half> @test_vaminq_f16( +// CHECK-SAME: <8 x half> noundef [[VN:%.*]], <8 x half> noundef [[VM:%.*]]) local_unnamed_addr #[[ATTR0]] { +// CHECK-NEXT: [[ENTRY:.*:]] +// CHECK-NEXT:[[FAMIN2_I:%.*]] = tail call <8 x half> @llvm.aarch64.neon.famin.v8f16(<8 x half> [[VN]], <8 x half> [[VM]]) +// CHECK-NEXT:ret <8 x half> [[FAMIN2_I]] +// +float16x8_t test_vaminq_f16(float16x8_t vn, float16x8_t vm) { + return vaminq_f16(vn, vm); + +} + +// CHECK-LABEL: define dso_local <2 x float> @test_vamin_f32( +// CHECK-SAME: <2 x float> noundef [[VN:%.*]], <2 x float> noundef [[VM:%.*]]) local_unnamed_addr #[[ATTR0]] { +// CHECK-NEXT: [[ENTRY:.*:]] +// CHECK-NEXT:[[FAMIN2_I:%.*]] = tail call <2 x float> @llvm.aarch64.neon.famin.v2f32(<2 x float> [[VN]], <2 x float> [[VM]]) +// CHECK-NEXT:ret <2 x float> [[FAMIN2_I]] +// +float32x2_t test_vamin_f32(float32x2_t vn, float32x2_t vm) { +
[clang] [llvm] [AArch64] Implement intrinsics for SVE FAMIN/FAMAX (PR #99042)
@@ -2385,3 +2385,8 @@ let SVETargetGuard = "sve2p1", SMETargetGuard = "sme2" in { def SVBFMLSLB_LANE : SInst<"svbfmlslb_lane[_{d}]", "dd$$i", "f", MergeNone, "aarch64_sve_bfmlslb_lane", [IsOverloadNone, VerifyRuntimeMode], [ImmCheck<3, ImmCheck0_7>]>; def SVBFMLSLT_LANE : SInst<"svbfmlslt_lane[_{d}]", "dd$$i", "f", MergeNone, "aarch64_sve_bfmlslt_lane", [IsOverloadNone, VerifyRuntimeMode], [ImmCheck<3, ImmCheck0_7>]>; } + momchil-velikov wrote: It comes from `SInstZPZZ`. https://github.com/llvm/llvm-project/pull/99042 ___ cfe-commits mailing list cfe-commits@lists.llvm.org https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits
[clang] [AArch64] Implement FP8 floating-point mode helper intrinsics (PR #100608)
https://github.com/momchil-velikov created https://github.com/llvm/llvm-project/pull/100608 None >From bd2814249f922206e8648d58d2850f89afad4fd8 Mon Sep 17 00:00:00 2001 From: Momchil Velikov Date: Thu, 25 Jul 2024 18:25:40 +0100 Subject: [PATCH] [AArch64] Implement FP8 floating-point mode helper intrinsics --- clang/test/CodeGen/aarch64-fpm-helpers.c | 162 +++ clang/utils/TableGen/NeonEmitter.cpp | 54 2 files changed, 216 insertions(+) create mode 100644 clang/test/CodeGen/aarch64-fpm-helpers.c diff --git a/clang/test/CodeGen/aarch64-fpm-helpers.c b/clang/test/CodeGen/aarch64-fpm-helpers.c new file mode 100644 index 0..dba79cebae547 --- /dev/null +++ b/clang/test/CodeGen/aarch64-fpm-helpers.c @@ -0,0 +1,162 @@ +// NOTE: Assertions have been autogenerated by utils/update_cc_test_checks.py UTC_ARGS: --version 5 + +// RUN: %clang_cc1 -O2 -triple aarch64 -emit-llvm -x c -DUSE_NEON_H %s -o - | FileCheck %s +// RUN: %clang_cc1 -O2 -triple aarch64 -emit-llvm -x c -DUSE_SVE_H %s -o - | FileCheck %s +// RUN: %clang_cc1 -O2 -triple aarch64 -emit-llvm -x c -DUSE_SME_H %s -o - | FileCheck %s +// RUN: %clang_cc1 -O2 -triple aarch64 -emit-llvm -x c++ -DUSE_NEON_H %s -o - | FileCheck %s +// RUN: %clang_cc1 -O2 -triple aarch64 -emit-llvm -x c++ -DUSE_SVE_H %s -o - | FileCheck %s +// RUN: %clang_cc1 -O2 -triple aarch64 -emit-llvm -x c++ -DUSE_SME_H %s -o - | FileCheck %s + +// REQUIRES: aarch64-registered-target + +#ifdef USE_NEON_H +#include "arm_neon.h" +#endif + +#ifdef USE_SVE_H +#include "arm_sve.h" +#endif + +#ifdef USE_SME_H +#include "arm_sme.h" +#endif + +#ifdef __cplusplus +extern "C" { +#endif + +// CHECK-LABEL: define dso_local noundef i64 @test_init( +// CHECK-SAME: ) local_unnamed_addr #[[ATTR0:[0-9]+]] { +// CHECK-NEXT: [[ENTRY:.*:]] +// CHECK-NEXT:ret i64 0 +// +fpm_t test_init() { return __arm_fpm_init(); } + +// CHECK-LABEL: define dso_local noundef range(i64 0, 2) i64 @test_src1_1( +// CHECK-SAME: ) local_unnamed_addr #[[ATTR0]] { +// CHECK-NEXT: [[ENTRY:.*:]] +// CHECK-NEXT:ret i64 0 +// +fpm_t test_src1_1() { + return __arm_set_fpm_src1_format(__arm_fpm_init(), __ARM_FPM_E5M2); +} + +// CHECK-LABEL: define dso_local noundef range(i64 0, 2) i64 @test_src1_2( +// CHECK-SAME: ) local_unnamed_addr #[[ATTR0]] { +// CHECK-NEXT: [[ENTRY:.*:]] +// CHECK-NEXT:ret i64 1 +// +fpm_t test_src1_2() { + return __arm_set_fpm_src1_format(__arm_fpm_init(), __ARM_FPM_E4M3); +} + +// CHECK-LABEL: define dso_local noundef range(i64 0, 16) i64 @test_src2_1( +// CHECK-SAME: ) local_unnamed_addr #[[ATTR0]] { +// CHECK-NEXT: [[ENTRY:.*:]] +// CHECK-NEXT:ret i64 0 +// +fpm_t test_src2_1() { + return __arm_set_fpm_src2_format(__arm_fpm_init(), __ARM_FPM_E5M2); +} + +// CHECK-LABEL: define dso_local noundef range(i64 0, 16) i64 @test_src2_2( +// CHECK-SAME: ) local_unnamed_addr #[[ATTR0]] { +// CHECK-NEXT: [[ENTRY:.*:]] +// CHECK-NEXT:ret i64 8 +// +fpm_t test_src2_2() { + return __arm_set_fpm_src2_format(__arm_fpm_init(), __ARM_FPM_E4M3); +} + +// CHECK-LABEL: define dso_local noundef range(i64 0, 128) i64 @test_dst1_1( +// CHECK-SAME: ) local_unnamed_addr #[[ATTR0]] { +// CHECK-NEXT: [[ENTRY:.*:]] +// CHECK-NEXT:ret i64 0 +// +fpm_t test_dst1_1() { + return __arm_set_fpm_dst_format(__arm_fpm_init(), __ARM_FPM_E5M2); +} + +// CHECK-LABEL: define dso_local noundef range(i64 0, 128) i64 @test_dst2_2( +// CHECK-SAME: ) local_unnamed_addr #[[ATTR0]] { +// CHECK-NEXT: [[ENTRY:.*:]] +// CHECK-NEXT:ret i64 64 +// +fpm_t test_dst2_2() { + return __arm_set_fpm_dst_format(__arm_fpm_init(), __ARM_FPM_E4M3); +} + +// CHECK-LABEL: define dso_local noundef range(i64 0, 32768) i64 @test_of_mul_1( +// CHECK-SAME: ) local_unnamed_addr #[[ATTR0]] { +// CHECK-NEXT: [[ENTRY:.*:]] +// CHECK-NEXT:ret i64 0 +// +fpm_t test_of_mul_1() { + return __arm_set_fpm_overflow_mul(__arm_fpm_init(), __ARM_FPM_INFNAN); +} + +// CHECK-LABEL: define dso_local noundef range(i64 0, 32768) i64 @test_of_mul_2( +// CHECK-SAME: ) local_unnamed_addr #[[ATTR0]] { +// CHECK-NEXT: [[ENTRY:.*:]] +// CHECK-NEXT:ret i64 16384 +// +fpm_t test_of_mul_2() { + return __arm_set_fpm_overflow_mul(__arm_fpm_init(), __ARM_FPM_SATURATE); +} + +// CHECK-LABEL: define dso_local noundef range(i64 0, 65536) i64 @test_of_cvt_1( +// CHECK-SAME: ) local_unnamed_addr #[[ATTR0]] { +// CHECK-NEXT: [[ENTRY:.*:]] +// CHECK-NEXT:ret i64 0 +// +fpm_t test_of_cvt_1() { + return __arm_set_fpm_overflow_cvt(__arm_fpm_init(), __ARM_FPM_INFNAN); +} + +// CHECK-LABEL: define dso_local noundef range(i64 0, 65536) i64 @test_of_cvt_2( +// CHECK-SAME: ) local_unnamed_addr #[[ATTR0]] { +// CHECK-NEXT: [[ENTRY:.*:]] +// CHECK-NEXT:ret i64 32768 +// +fpm_t test_of_cvt_2() { + return __arm_set_fpm_overflow_cvt(__arm_fpm_init(), __ARM_FPM_SATURATE); +} + +// CHECK-LABEL: define dso_local noundef i64 @test_lscale( +// CHECK-SAME: ) local_unnamed_addr #[[ATTR0]] { +//
[clang] [llvm] [AArch64] Implement intrinsics for SME2 FAMIN/FAMAX (PR #99063)
https://github.com/momchil-velikov created https://github.com/llvm/llvm-project/pull/99063 This patch implements these intrinsics: ``` c // Variants are also available for: // [_f32_x2], [_f64_x2], // [_f16_x4], [_f32_x4], [_f64_x4] svfloat16x2_t svamax[_f16_x2](svfloat16x2 zd, svfloat16x2_t zm) __arm_streaming; svfloat16x2_t svamin[_f16_x2](svfloat16x2 zd, svfloat16x2_t zm) __arm_streaming; ``` (cf. https://github.com/ARM-software/acle/pull/324) >From db16c5c8fb1e8b272d49149016ee3b7ce43c5672 Mon Sep 17 00:00:00 2001 From: Momchil Velikov Date: Tue, 16 Jul 2024 16:49:04 +0100 Subject: [PATCH] [AArch64] Implement intrinsics for SME2 FAMIN/FAMAX This patch implements these intrinsics: ``` c // Variants are also available for: // [_f32_x2], [_f64_x2], // [_f16_x4], [_f32_x4], [_f64_x4] svfloat16x2_t svamax[_f16_x2](svfloat16x2 zd, svfloat16x2_t zm) __arm_streaming; svfloat16x2_t svamin[_f16_x2](svfloat16x2 zd, svfloat16x2_t zm) __arm_streaming; ``` (cf. https://github.com/ARM-software/acle/pull/324) Co-authored-by: Caroline Concatto --- clang/include/clang/Basic/arm_sve.td | 7 + .../acle_sme2_faminmax.c | 476 ++ llvm/include/llvm/IR/IntrinsicsAArch64.td | 9 + .../Target/AArch64/AArch64ISelDAGToDAG.cpp| 28 ++ .../AArch64/sme2-intrinsics-faminmax.ll | 241 + 5 files changed, 761 insertions(+) create mode 100644 clang/test/CodeGen/aarch64-sme2-intrinsics/acle_sme2_faminmax.c create mode 100644 llvm/test/CodeGen/AArch64/sme2-intrinsics-faminmax.ll diff --git a/clang/include/clang/Basic/arm_sve.td b/clang/include/clang/Basic/arm_sve.td index 94c093d891156..e415589ee25e5 100644 --- a/clang/include/clang/Basic/arm_sve.td +++ b/clang/include/clang/Basic/arm_sve.td @@ -2219,6 +2219,13 @@ let SVETargetGuard = InvalidMode, SMETargetGuard = "sme2" in { def SVSQDMULH_X4: SInst<"svqdmulh[_{d}_x4]","444", "csil", MergeNone, "aarch64_sve_sqdmulh_vgx4",[IsStreaming], []>; } +let SVETargetGuard = InvalidMode, SMETargetGuard = "sme2,faminmax" in { + def FAMIN_X2 : Inst<"svamin[_{d}_x2]", "222", "hfd", MergeNone, "aarch64_sme_famin_x2", [IsStreaming], []>; + def FAMAX_X2 : Inst<"svamax[_{d}_x2]", "222", "hfd", MergeNone, "aarch64_sme_famax_x2", [IsStreaming], []>; + def FAMIN_X4 : Inst<"svamin[_{d}_x4]", "444", "hfd", MergeNone, "aarch64_sme_famin_x4", [IsStreaming], []>; + def FAMAX_X4 : Inst<"svamax[_{d}_x4]", "444", "hfd", MergeNone, "aarch64_sme_famax_x4", [IsStreaming], []>; +} + let SVETargetGuard = "sve2p1", SMETargetGuard = "sme2" in { def REINTERPRET_SVBOOL_TO_SVCOUNT : Inst<"svreinterpret[_c]", "}P", "Pc", MergeNone, "", [VerifyRuntimeMode], []>; def REINTERPRET_SVCOUNT_TO_SVBOOL : Inst<"svreinterpret[_b]", "P}", "Pc", MergeNone, "", [VerifyRuntimeMode], []>; diff --git a/clang/test/CodeGen/aarch64-sme2-intrinsics/acle_sme2_faminmax.c b/clang/test/CodeGen/aarch64-sme2-intrinsics/acle_sme2_faminmax.c new file mode 100644 index 0..5d026f8cde5e0 --- /dev/null +++ b/clang/test/CodeGen/aarch64-sme2-intrinsics/acle_sme2_faminmax.c @@ -0,0 +1,476 @@ +// NOTE: Assertions have been autogenerated by utils/update_cc_test_checks.py +// REQUIRES: aarch64-registered-target +// RUN: %clang_cc1 -triple aarch64-none-linux-gnu -target-feature +sme -target-feature +sme2 -target-feature +faminmax -disable-O0-optnone -Werror -Wall -emit-llvm -o - %s | opt -S -p mem2reg,instcombine,tailcallelim | FileCheck %s +// RUN: %clang_cc1 -triple aarch64-none-linux-gnu -target-feature +sme -target-feature +sme2 -target-feature +faminmax -disable-O0-optnone -Werror -Wall -emit-llvm -o - -x c++ %s | opt -S -p mem2reg,instcombine,tailcallelim | FileCheck %s -check-prefix=CPP-CHECK +// RUN: %clang_cc1 -DSVE_OVERLOADED_FORMS -triple aarch64-none-linux-gnu -target-feature +sme -target-feature +sme2 -target-feature +faminmax -disable-O0-optnone -Werror -Wall -emit-llvm -o - %s | opt -S -p mem2reg,instcombine,tailcallelim | FileCheck %s +// RUN: %clang_cc1 -DSVE_OVERLOADED_FORMS -triple aarch64-none-linux-gnu -target-feature +sme -target-feature +sme2 -target-feature +faminmax -disable-O0-optnone -Werror -Wall -emit-llvm -o - -x c++ %s | opt -S -p mem2reg,instcombine,tailcallelim | FileCheck %s -check-prefix=CPP-CHECK +// RUN: %clang_cc1 -triple aarch64-none-linux-gnu -target-feature +sme -target-feature +sme2 -target-feature +faminmax -S -disable-O0-optnone -Werror -Wall -o /dev/null %s +#include + +#ifdef SVE_OVERLOADED_FORMS +// A simple used,unused... macro, long enough to represent any SVE builtin. +#define SVE_ACLE_FUNC(A1,A2_UNUSED) A1 +#else +#define SVE_ACLE_FUNC(A1,A2) A1##A2 +#endif + + +// Multi, x2 + +// CHECK-LABEL: @test_svamax_f16_x2( +// CHECK-NEXT: entry: +// CHECK-NEXT:[[TMP0:%.*]] = tail call @llvm.vector.extract.nxv8f16.nxv16f16( [[ZDN:%.*]], i64 0) +// CHECK-NEXT:[[TMP1:%.*]] = tail call
[clang] [llvm] [AArch64] Implement NEON vamin/vamax intrinsics (PR #99041)
momchil-velikov wrote: > Did you consider emitting `llvm.fmin(llvm.fabs(x), llvm.fabs(y))`? Nope. I'll have a look. https://github.com/llvm/llvm-project/pull/99041 ___ cfe-commits mailing list cfe-commits@lists.llvm.org https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits
[clang] [llvm] [Clang][LLVM][AArch64] Add intrinsic for MOVT SME2 instruction (PR #97602)
@@ -3278,10 +3278,50 @@ class sme2_movt_zt_to_zt opc> let Inst{4-0} = Zt; } -multiclass sme2_movt_zt_to_zt opc> { +multiclass sme2_movt_zt_to_zt opc, SDPatternOperator intrinsic_lane, SDPatternOperator intrinsic> { def NAME : sme2_movt_zt_to_zt; + def NAME # _PSEUDO + : Pseudo<(outs), (ins ZTR:$ZT, sme_elm_idx0_3:$off2, ZPRAny:$Zt), []>, Sched<[]> { +let usesCustomInserter = 1; + } def : InstAlias(NAME) ZTR:$ZTt, 0, ZPRAny:$Zt), 1>; + + def : Pat<(intrinsic_lane (imm_to_zt untyped:$zt), nxv16i8:$zn, sme_elm_idx0_3:$imm), momchil-velikov wrote: You could make this quite a bit easier to read and modify with a loop: ``` foreach vt = [nxv16i8, nxv8i16, nxv4i32, nxv2i64, nxv8f16, nxv4f32, nxv2f64, nxv8bf16] in { def : Pat<(intrinsic_lane (imm_to_zt untyped:$zt), vt:$zn, sme_elm_idx0_3:$imm), (!cast(NAME # _PSEUDO) $zt, $imm, $zn)>; def : Pat<(intrinsic (imm_to_zt untyped:$zt), vt:$zn), (!cast(NAME # _PSEUDO) $zt, 0, $zn)>; } ``` https://github.com/llvm/llvm-project/pull/97602 ___ cfe-commits mailing list cfe-commits@lists.llvm.org https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits
[clang] [clang] Use different memory layout type for _BitInt(N) in LLVM IR (PR #91364)
momchil-velikov wrote: This solves 5-6 issues we had downstream, many thanks! https://github.com/llvm/llvm-project/pull/91364 ___ cfe-commits mailing list cfe-commits@lists.llvm.org https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits
[clang] [clang] Lower _BitInt(129+) to a different type in LLVM IR (PR #91364)
@@ -1886,6 +1896,29 @@ llvm::Constant *ConstantEmitter::emitForMemory(CodeGenModule , return Res; } + if (destType->isBitIntType()) { +if (CGM.getTypes().typeRequiresSplitIntoByteArray(destType, C->getType())) { + // Long _BitInt has array of bytes as in-memory type. + // So, split constant into individual bytes. + ConstantAggregateBuilder Builder(CGM); + llvm::Type *DesiredTy = CGM.getTypes().ConvertTypeForMem(destType); + llvm::Type *LoadStoreTy = momchil-velikov wrote: It would seem instead of invoking `convertTypeForLoadStore` we could use the bit width of the in-memory type and sign-/zero- extend the `APInt Value`, something like. ``` llvm::APInt Value; if destType is unsigned Value = CI->getValue().zext(Width); else Value = CI->getValue().sext(Width); ``` https://github.com/llvm/llvm-project/pull/91364 ___ cfe-commits mailing list cfe-commits@lists.llvm.org https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits
[clang] [clang] Lower _BitInt(129+) to a different type in LLVM IR (PR #91364)
@@ -118,6 +124,37 @@ llvm::Type *CodeGenTypes::ConvertTypeForMem(QualType T, bool ForBitField) { return R; } +bool CodeGenTypes::LLVMTypeLayoutMatchesAST(QualType ASTTy, +llvm::Type *LLVMTy) { + CharUnits ASTSize = Context.getTypeSizeInChars(ASTTy); + CharUnits LLVMSize = + CharUnits::fromQuantity(getDataLayout().getTypeAllocSize(LLVMTy)); + return ASTSize == LLVMSize; +} + +llvm::Type *CodeGenTypes::convertTypeForLoadStore(QualType T, + llvm::Type *LLVMTy) { + if (!LLVMTy) +LLVMTy = ConvertType(T); + + if (!T->isBitIntType() && LLVMTy->isIntegerTy(1)) +return llvm::IntegerType::get(getLLVMContext(), + (unsigned)Context.getTypeSize(T)); + + if (T->isBitIntType()) { +llvm::Type *R = ConvertType(T); +if (!LLVMTypeLayoutMatchesAST(T, R)) + return llvm::Type::getIntNTy( + getLLVMContext(), Context.getTypeSizeInChars(T).getQuantity() * 8); momchil-velikov wrote: Getting back to this... > I wonder making it unconditional that requires to have different in-memory > type for all _BitInt types? The in-memory representation is a whole number of bytes, so in general the number of in-memory bits will be different from the declared number on a `_BitInt` type. If you have `_BitInt(K)` and a load/store type `iN`, then loading/storing `iK` won't necessarily write to the same bytes as loading/storing `iN` would. That means we won't be writing to all the bytes of the in-memory representation which might be incorrect (e.g. for AArch32 ABI which requires padding bits to be sign- or zero- extended). It can also be inefficient, because LLVM requires memory accesses to be the minimum number of bytes sufficient to hold an `iK` - e.g. `i17` will end up as a load store of `i16` followed by a load store of `i8`, whereas the in-memory representation is 4 bytes and can be accesses as a single `i32`. https://github.com/llvm/llvm-project/pull/91364 ___ cfe-commits mailing list cfe-commits@lists.llvm.org https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits
[clang] [clang] Lower _BitInt(129+) to a different type in LLVM IR (PR #91364)
https://github.com/momchil-velikov edited https://github.com/llvm/llvm-project/pull/91364 ___ cfe-commits mailing list cfe-commits@lists.llvm.org https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits
[clang] [clang] Lower _BitInt(129+) to a different type in LLVM IR (PR #91364)
@@ -1774,6 +1774,18 @@ llvm::Constant *ConstantEmitter::emitForMemory(CodeGenModule , return Res; } + if (const auto *BIT = destType->getAs()) { +if (BIT->getNumBits() > 128) { + // Long _BitInt has array of bytes as in-memory type. + ConstantAggregateBuilder Builder(CGM); + llvm::Type *DesiredTy = CGM.getTypes().ConvertTypeForMem(destType); + auto *CI = cast(C); momchil-velikov wrote: How about a "small" `_BitInt` ? The comment starts > // LLVM type doesn't match AST type only for big enough _BitInts, and for AArch32 and AArch64 we are going to have a non-matching LLVM types even for "small" `_BitInt`s - for AArch32 because the ABI wants the padding bing in-memory representation to contain zero or the sign-bit, and for both we'd like to emit loads/stores in bigger chunks, e.g. `i17` is a single `i32` load store, as opposed to two separate accesses to `i16` and `i8`. https://github.com/llvm/llvm-project/pull/91364 ___ cfe-commits mailing list cfe-commits@lists.llvm.org https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits
[clang] [llvm] [AArch64][NEON] Add intrinsics for LUTI (PR #96883)
https://github.com/momchil-velikov edited https://github.com/llvm/llvm-project/pull/96883 ___ cfe-commits mailing list cfe-commits@lists.llvm.org https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits
[clang] [llvm] [AArch64][NEON] Add intrinsics for LUTI (PR #96883)
@@ -545,6 +545,25 @@ let TargetPrefix = "aarch64", IntrProperties = [IntrNoMem] in { def int_aarch64_neon_vcmla_rot270 : AdvSIMD_3VectorArg_Intrinsic; } +let TargetPrefix = "aarch64" in { +def int_aarch64_neon_vluti2_lane : DefaultAttrsIntrinsic<[llvm_anyvector_ty], momchil-velikov wrote: Only the return type and the table type are polymorphic, isn't it? The indices and the immediate are always integers. https://github.com/llvm/llvm-project/pull/96883 ___ cfe-commits mailing list cfe-commits@lists.llvm.org https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits
[clang] [Clang] Remove preprocessor guards and global feature checks for NEON (PR #95102)
@@ -8084,29 +8084,6 @@ static void HandleNeonVectorTypeAttr(QualType , const ParsedAttr , AuxTI && (AuxTI->getTriple().isAArch64() || AuxTI->getTriple().isARM()); } - // Target must have NEON (or MVE, whose vectors are similar enough - // not to need a separate attribute) - if (!(S.Context.getTargetInfo().hasFeature("neon") || -S.Context.getTargetInfo().hasFeature("mve") || -S.Context.getTargetInfo().hasFeature("sve") || -S.Context.getTargetInfo().hasFeature("sme") || -IsTargetCUDAAndHostARM) && - VecKind == VectorKind::Neon) { -S.Diag(Attr.getLoc(), diag::err_attribute_unsupported) -<< Attr << "'neon', 'mve', 'sve' or 'sme'"; -Attr.setInvalid(); -return; - } - if (!(S.Context.getTargetInfo().hasFeature("neon") || momchil-velikov wrote: You can preserve the behaviour for MVE if you alter the diagnostics condition to be `"NEON type seen" && "no MVE" && "compiling for M-class".` https://github.com/llvm/llvm-project/pull/95102 ___ cfe-commits mailing list cfe-commits@lists.llvm.org https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits
[clang] [clang] Lower _BitInt(129+) to a different type in LLVM IR (PR #91364)
@@ -118,6 +124,37 @@ llvm::Type *CodeGenTypes::ConvertTypeForMem(QualType T, bool ForBitField) { return R; } +bool CodeGenTypes::LLVMTypeLayoutMatchesAST(QualType ASTTy, +llvm::Type *LLVMTy) { + CharUnits ASTSize = Context.getTypeSizeInChars(ASTTy); + CharUnits LLVMSize = + CharUnits::fromQuantity(getDataLayout().getTypeAllocSize(LLVMTy)); + return ASTSize == LLVMSize; +} + +llvm::Type *CodeGenTypes::convertTypeForLoadStore(QualType T, + llvm::Type *LLVMTy) { + if (!LLVMTy) +LLVMTy = ConvertType(T); + + if (!T->isBitIntType() && LLVMTy->isIntegerTy(1)) +return llvm::IntegerType::get(getLLVMContext(), + (unsigned)Context.getTypeSize(T)); + + if (T->isBitIntType()) { +llvm::Type *R = ConvertType(T); +if (!LLVMTypeLayoutMatchesAST(T, R)) + return llvm::Type::getIntNTy( + getLLVMContext(), Context.getTypeSizeInChars(T).getQuantity() * 8); momchil-velikov wrote: cf. https://github.com/llvm/llvm-project/pull/91364#issuecomment-2099384663 I'm fairly certain using load/store type of `iBITS` is the correct thing to do, unconditionally. Not quite sure about the choice between `iBITS` and `[BYTES x i8]`, if we're not talking about a load/stores how the array type could possibly be less efficient, so we don't default unconditionally to it? https://github.com/llvm/llvm-project/pull/91364 ___ cfe-commits mailing list cfe-commits@lists.llvm.org https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits
[clang] [clang] Lower _BitInt(129+) to a different type in LLVM IR (PR #91364)
@@ -118,6 +124,37 @@ llvm::Type *CodeGenTypes::ConvertTypeForMem(QualType T, bool ForBitField) { return R; } +bool CodeGenTypes::LLVMTypeLayoutMatchesAST(QualType ASTTy, +llvm::Type *LLVMTy) { + CharUnits ASTSize = Context.getTypeSizeInChars(ASTTy); + CharUnits LLVMSize = + CharUnits::fromQuantity(getDataLayout().getTypeAllocSize(LLVMTy)); + return ASTSize == LLVMSize; +} + +llvm::Type *CodeGenTypes::convertTypeForLoadStore(QualType T, + llvm::Type *LLVMTy) { + if (!LLVMTy) +LLVMTy = ConvertType(T); + + if (!T->isBitIntType() && LLVMTy->isIntegerTy(1)) +return llvm::IntegerType::get(getLLVMContext(), + (unsigned)Context.getTypeSize(T)); + + if (T->isBitIntType()) { +llvm::Type *R = ConvertType(T); +if (!LLVMTypeLayoutMatchesAST(T, R)) + return llvm::Type::getIntNTy( + getLLVMContext(), Context.getTypeSizeInChars(T).getQuantity() * 8); momchil-velikov wrote: I don't understand why the choice of type is conditional. No matter of the "layout" matches or not, the stores have to produce valid in-memory representation (according to the target ABI), which in the general case means writing **all the** bits of the in-memory representation. https://github.com/llvm/llvm-project/pull/91364 ___ cfe-commits mailing list cfe-commits@lists.llvm.org https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits
[clang] [llvm] [CLANG][LLVM][AArch64]Add SME2.1 intrinsics for MOVAZ tile to vector,… (PR #88499)
https://github.com/momchil-velikov approved this pull request. https://github.com/llvm/llvm-project/pull/88499 ___ cfe-commits mailing list cfe-commits@lists.llvm.org https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits
[clang] [llvm] [CLANG][LLVM][AArch64]SME2.1 intrinsics for MOVAZ tile to 2/4 vectors (PR #88710)
https://github.com/momchil-velikov approved this pull request. https://github.com/llvm/llvm-project/pull/88710 ___ cfe-commits mailing list cfe-commits@lists.llvm.org https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits
[clang] [llvm] [CLANG][LLVM][AArch64]SME2.1 intrinsics for MOVAZ tile to 2/4 vectors (PR #88710)
@@ -674,3 +674,26 @@ let TargetGuard = "sme2" in { def SVLUTI2_LANE_ZT_X2 : Inst<"svluti2_lane_zt_{d}_x2", "2.di[i", "cUcsUsiUibhf", MergeNone, "aarch64_sme_luti2_lane_zt_x2", [IsStreaming, IsInZT0], [ImmCheck<0, ImmCheck0_0>, ImmCheck<2, ImmCheck0_7>]>; def SVLUTI4_LANE_ZT_X2 : Inst<"svluti4_lane_zt_{d}_x2", "2.di[i", "cUcsUsiUibhf", MergeNone, "aarch64_sme_luti4_lane_zt_x2", [IsStreaming, IsInZT0], [ImmCheck<0, ImmCheck0_0>, ImmCheck<2, ImmCheck0_3>]>; } + +multiclass ZAReadz ch> { + let TargetGuard = "sme2p1" in { +def NAME # _H : SInst<"svreadz_hor_" # n_suffix # "_{d}_vg" # vg_num, vg_num # "im", t, + MergeNone, i_prefix # "_horiz_x" # vg_num, + [IsStreaming, IsInOutZA], ch>; momchil-velikov wrote: > Should these be `InZA` instead of `InOutZA`? And also `ReadZA` (not sure what > that's for). The instructions zero the source tiles after copying (https://developer.arm.com/documentation/ddi0602/2024-03/SME-Instructions/MOVAZ--tile-to-vector--four-registers---Move-and-zero-four-ZA-tile-slices-to-vector-registers-?lang=en ). https://github.com/llvm/llvm-project/pull/88710 ___ cfe-commits mailing list cfe-commits@lists.llvm.org https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits
[clang] [clang] Lower _BitInt(129+) to a different type in LLVM IR (PR #91364)
@@ -1989,6 +1989,14 @@ llvm::Value *CodeGenFunction::EmitLoadOfScalar(Address Addr, bool Volatile, return EmitAtomicLoad(AtomicLValue, Loc).getScalarVal(); } + if (const auto *BIT = Ty->getAs()) { +if (BIT->getNumBits() > 128) { + // Long _BitInt has array of bytes as in-memory type. + llvm::Type *NewTy = ConvertType(Ty); momchil-velikov wrote: Oh, I see. It looks close to what we are trying to do with https://github.com/llvm/llvm-project/pull/93495, which is: * create in-memory representations according to the target ABI * improve efficiency of loads/stores, e.g. load/store of `i18` in LLVM must touch just 3 bytes, so a compiler would emit one 16-bit load and one 8-bit load, but if `i18` comes from `_BitInt(18)` then a single 32-bit load would work better. https://github.com/llvm/llvm-project/pull/91364 ___ cfe-commits mailing list cfe-commits@lists.llvm.org https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits
[clang] [clang] Lower _BitInt(129+) to a different type in LLVM IR (PR #91364)
@@ -1989,6 +1989,14 @@ llvm::Value *CodeGenFunction::EmitLoadOfScalar(Address Addr, bool Volatile, return EmitAtomicLoad(AtomicLValue, Loc).getScalarVal(); } + if (const auto *BIT = Ty->getAs()) { +if (BIT->getNumBits() > 128) { + // Long _BitInt has array of bytes as in-memory type. + llvm::Type *NewTy = ConvertType(Ty); momchil-velikov wrote: Shouldn't we call calling `ConvertTypeForMem` here? https://github.com/llvm/llvm-project/pull/91364 ___ cfe-commits mailing list cfe-commits@lists.llvm.org https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits
[clang] [Clang][AArch64][ARM]: Fix Inefficient loads/stores of _BitInt(N) (PR #93495)
@@ -2021,6 +2028,12 @@ llvm::Value *CodeGenFunction::EmitToMemory(llvm::Value *Value, QualType Ty) { assert(Value->getType()->isIntegerTy(getContext().getTypeSize(Ty)) && "wrong value rep of bool"); } + if (auto *BitIntTy = Ty->getAs()) { +if (CGM.getTarget().isBitIntSignExtended(BitIntTy->isSigned())) momchil-velikov wrote: We might be introducing changes that are not desirable (or correct) for non-Arm targets. Instead, I would suggest to add a description of the in-memory padding for the `_BitInt` types (e.g. via a member function in `TargetInfo`). One reasonable approach is lilke this: ``` enum class TargetBitInitPaddingKind { None, ZeroOrSignExtend, AnyExtend }; ``` where * `None` will be the default and will result in identical code as the one that Clang generates now, i.e. no `sext` or `zext`, load/stores use LLVM type `iN` for `_BitInt(N)`. * `ZeroOrSignExtend` would mean in-memory representation is padded with 0 for `unsigned _BitInt(N)` and with the sign bit for `signed _BitInt(N)`. This will be the value for AArch32 * `AnyExtend` would mean in-memory representation is padded with unspecified bits. This will be the value for AArch64. Since AFAIK we don't have such an operation in LLVM IR, one way to implement this would be identically to `ZeroOrSignExtend` or, alternatively, do zero-extend regardless of the signedness of the `_BitInt(N)` type. https://github.com/llvm/llvm-project/pull/93495 ___ cfe-commits mailing list cfe-commits@lists.llvm.org https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits
[clang] [Clang][AArch64][ARM]: Fix Inefficient loads/stores of _BitInt(N) (PR #93495)
@@ -221,6 +221,16 @@ bool AArch64TargetInfo::validateTarget(DiagnosticsEngine ) const { return true; } +unsigned AArch64TargetInfo::getBitIntLegalWidth(unsigned Width) const { momchil-velikov wrote: This function is likely unnecessary (also it's incorrect in a couple of ways[1]). I think you should be using instead `ASTConext::getTypeInfo(T).Width`. [1] Representation might be in less than 32-bits (could be also 8 or 16) and `_BitInt(N), N > 128` is not `N` bits wide, it's in multiples of `i128`. https://github.com/llvm/llvm-project/pull/93495 ___ cfe-commits mailing list cfe-commits@lists.llvm.org https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits
[clang] [llvm] [AArch64] Fix feature flags dependecies (PR #90612)
momchil-velikov wrote: > > This patch removes FEAT_FPMR from list of available of architecture > > features, instead enabling FMPR register by default. > > Can you expand a little bit on the reasoning? It doesn't seem all that > problematic but is still eyebrow-raising. The overall idea is that system registers ought be available everywhere without the need to explicitly enable them with a command line option. Since `FEAT_FPMR` has no function other than enabling the register and it is going to be enabled by default, having a command line option, predicate, feature definition, etc becomes pointless. The FP8 instructions themselves are still guarded by a target feature. https://github.com/llvm/llvm-project/pull/90612 ___ cfe-commits mailing list cfe-commits@lists.llvm.org https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits
[clang] [Clang][AArch64][SVE] Allow write to SVE vector elements using the subscript operator (PR #91965)
https://github.com/momchil-velikov closed https://github.com/llvm/llvm-project/pull/91965 ___ cfe-commits mailing list cfe-commits@lists.llvm.org https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits
[clang] [Clang][AArch64][SVE] Allow write to SVE vector elements using the subscript operator (PR #91965)
momchil-velikov wrote: Rebased. https://github.com/llvm/llvm-project/pull/91965 ___ cfe-commits mailing list cfe-commits@lists.llvm.org https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits
[clang] [Clang][AArch64][SVE] Allow write to SVE vector elements using the subscript operator (PR #91965)
https://github.com/momchil-velikov updated https://github.com/llvm/llvm-project/pull/91965 >From b1b69ffcaf4525a66dde1ae7f1a022c85204a579 Mon Sep 17 00:00:00 2001 From: Momchil Velikov Date: Mon, 20 May 2024 16:25:43 +0100 Subject: [PATCH 1/2] [Clang][AArch64][SVE] Allow write to SVE vector elements using the subscript operator The patch at https://reviews.llvm.org/D122732 introduced using the array subscript operator for SVE vectors, however it also causes an ICE when the subscripting expression is used as an lvalue. This patches fixes the error. Lvalue subscripting expressions are emitted as LLVM IR `insertvector`. Change-Id: I46d0333d8ed8508cd9cd23e02dd1c2d48fb74cd2 --- clang/lib/CodeGen/CGExpr.cpp | 2 +- clang/lib/Sema/SemaExpr.cpp | 2 +- clang/test/CodeGen/aarch64-sve-vector-subscript-ops.c | 10 ++ 3 files changed, 12 insertions(+), 2 deletions(-) diff --git a/clang/lib/CodeGen/CGExpr.cpp b/clang/lib/CodeGen/CGExpr.cpp index cd1c48b420382..6f9237e2067f5 100644 --- a/clang/lib/CodeGen/CGExpr.cpp +++ b/clang/lib/CodeGen/CGExpr.cpp @@ -4180,7 +4180,7 @@ LValue CodeGenFunction::EmitArraySubscriptExpr(const ArraySubscriptExpr *E, // If the base is a vector type, then we are forming a vector element lvalue // with this subscript. - if (E->getBase()->getType()->isVectorType() && + if (E->getBase()->getType()->isSubscriptableVectorType() && !isa(E->getBase())) { // Emit the vector as an lvalue to get its address. LValue LHS = EmitLValue(E->getBase()); diff --git a/clang/lib/Sema/SemaExpr.cpp b/clang/lib/Sema/SemaExpr.cpp index 326879b0883fa..49541edf106e1 100644 --- a/clang/lib/Sema/SemaExpr.cpp +++ b/clang/lib/Sema/SemaExpr.cpp @@ -5185,7 +5185,7 @@ Sema::CreateBuiltinArraySubscriptExpr(Expr *Base, SourceLocation LLoc, } // Perform default conversions. - if (!LHSExp->getType()->getAs()) { + if (!LHSExp->getType()->isSubscriptableVectorType()) { ExprResult Result = DefaultFunctionArrayLvalueConversion(LHSExp); if (Result.isInvalid()) return ExprError(); diff --git a/clang/test/CodeGen/aarch64-sve-vector-subscript-ops.c b/clang/test/CodeGen/aarch64-sve-vector-subscript-ops.c index fb60c6d100ce6..634423765c4c3 100644 --- a/clang/test/CodeGen/aarch64-sve-vector-subscript-ops.c +++ b/clang/test/CodeGen/aarch64-sve-vector-subscript-ops.c @@ -88,3 +88,13 @@ float subscript_float32(svfloat32_t a, size_t b) { double subscript_float64(svfloat64_t a, size_t b) { return a[b]; } + +// CHECK-LABEL: @subscript_write_float32( +// CHECK-NEXT: entry: +// CHECK-NEXT:[[VECINS:%.*]] = insertelement [[A:%.*]], float 1.00e+00, i64 [[B:%.*]] +// CHECK-NEXT:ret [[VECINS]] +// +svfloat32_t subscript_write_float32(svfloat32_t a, size_t b) { + a[b] = 1.0f; + return a; +} >From 08a155b49a6b9c859ba8569170e0f71e63b76735 Mon Sep 17 00:00:00 2001 From: Momchil Velikov Date: Mon, 20 May 2024 16:26:06 +0100 Subject: [PATCH 2/2] [fixiup] Add a test using compound assignment operator Change-Id: I81e1fd4f23eb65a96e71015de7a4562fcbc53c0f --- .../test/CodeGen/aarch64-sve-vector-subscript-ops.c | 12 1 file changed, 12 insertions(+) diff --git a/clang/test/CodeGen/aarch64-sve-vector-subscript-ops.c b/clang/test/CodeGen/aarch64-sve-vector-subscript-ops.c index 634423765c4c3..52a05d010de9b 100644 --- a/clang/test/CodeGen/aarch64-sve-vector-subscript-ops.c +++ b/clang/test/CodeGen/aarch64-sve-vector-subscript-ops.c @@ -98,3 +98,15 @@ svfloat32_t subscript_write_float32(svfloat32_t a, size_t b) { a[b] = 1.0f; return a; } + +// CHECK-LABEL: @subscript_read_write_float32( +// CHECK-NEXT: entry: +// CHECK-NEXT:[[VECEXT:%.*]] = extractelement [[A:%.*]], i64 [[B:%.*]] +// CHECK-NEXT:[[ADD:%.*]] = fadd float [[VECEXT]], 1.00e+00 +// CHECK-NEXT:[[VECINS:%.*]] = insertelement [[A]], float [[ADD]], i64 [[B]] +// CHECK-NEXT:ret [[VECINS]] +// +svfloat32_t subscript_read_write_float32(svfloat32_t a, size_t b) { + a[b] += 1.0f; + return a; +} ___ cfe-commits mailing list cfe-commits@lists.llvm.org https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits
[clang] [Clang][Sema] Refactor handling of vector subscript expressions (NFC) (PR #92778)
https://github.com/momchil-velikov closed https://github.com/llvm/llvm-project/pull/92778 ___ cfe-commits mailing list cfe-commits@lists.llvm.org https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits
[clang] [Clang][Sema] Refactor handling of vector subscript expressions (NFC) (PR #92778)
https://github.com/momchil-velikov edited https://github.com/llvm/llvm-project/pull/92778 ___ cfe-commits mailing list cfe-commits@lists.llvm.org https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits
[clang] [Clang][AArch64][SVE] Allow write to SVE vector elements using the subscript operator (PR #91965)
@@ -4180,8 +4180,10 @@ LValue CodeGenFunction::EmitArraySubscriptExpr(const ArraySubscriptExpr *E, // If the base is a vector type, then we are forming a vector element lvalue // with this subscript. - if (E->getBase()->getType()->isVectorType() && - !isa(E->getBase())) { + if (QualType BaseTy = E->getBase()->getType(); + (BaseTy->isVectorType() && !isa(E->getBase())) || + (BaseTy->isBuiltinType() && + BaseTy->getAs()->isSveVLSBuiltinType())) { momchil-velikov wrote: Split out to https://github.com/llvm/llvm-project/pull/92778 https://github.com/llvm/llvm-project/pull/91965 ___ cfe-commits mailing list cfe-commits@lists.llvm.org https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits
[clang] [Clang][Sema] Refactor handling of vector subscript expressions (NFC) (PR #92778)
https://github.com/momchil-velikov created https://github.com/llvm/llvm-project/pull/92778 None >From 435f3104e68ef278196417c293093131258c549d Mon Sep 17 00:00:00 2001 From: Momchil Velikov Date: Mon, 20 May 2024 15:43:31 +0100 Subject: [PATCH] [Clang][Sema] Refactor handling of vector subscript expressions (NFC) Change-Id: I514431a482ffa0a2d906c019b6e374bf4607571e --- clang/include/clang/AST/Type.h | 5 clang/lib/Sema/SemaExpr.cpp| 44 +++--- 2 files changed, 19 insertions(+), 30 deletions(-) diff --git a/clang/include/clang/AST/Type.h b/clang/include/clang/AST/Type.h index da3834f19ca04..9a5c6e8d562c3 100644 --- a/clang/include/clang/AST/Type.h +++ b/clang/include/clang/AST/Type.h @@ -2523,6 +2523,7 @@ class alignas(TypeAlignment) Type : public ExtQualsTypeCommonBase { bool isVectorType() const;// GCC vector type. bool isExtVectorType() const; // Extended vector type. bool isExtVectorBoolType() const; // Extended vector type with bool element. + bool isSubscriptableVectorType() const; bool isMatrixType() const;// Matrix type. bool isConstantMatrixType() const;// Constant matrix type. bool isDependentAddressSpaceType() const; // value-dependent address space qualifier @@ -7729,6 +7730,10 @@ inline bool Type::isExtVectorBoolType() const { return cast(CanonicalType)->getElementType()->isBooleanType(); } +inline bool Type::isSubscriptableVectorType() const { + return isVectorType() || isSveVLSBuiltinType(); +} + inline bool Type::isMatrixType() const { return isa(CanonicalType); } diff --git a/clang/lib/Sema/SemaExpr.cpp b/clang/lib/Sema/SemaExpr.cpp index 5ecfdee21f09d..c86f1d9c8076e 100644 --- a/clang/lib/Sema/SemaExpr.cpp +++ b/clang/lib/Sema/SemaExpr.cpp @@ -5283,36 +5283,22 @@ Sema::CreateBuiltinArraySubscriptExpr(Expr *Base, SourceLocation LLoc, << ResultType << BaseExpr->getSourceRange(); return ExprError(); } - } else if (const VectorType *VTy = LHSTy->getAs()) { -BaseExpr = LHSExp;// vectors: V[123] -IndexExpr = RHSExp; -// We apply C++ DR1213 to vector subscripting too. -if (getLangOpts().CPlusPlus11 && LHSExp->isPRValue()) { - ExprResult Materialized = TemporaryMaterializationConversion(LHSExp); - if (Materialized.isInvalid()) -return ExprError(); - LHSExp = Materialized.get(); + } else if (LHSTy->isSubscriptableVectorType()) { +if (LHSTy->isBuiltinType() && +LHSTy->getAs()->isSveVLSBuiltinType()) { + const BuiltinType *BTy = LHSTy->getAs(); + if (BTy->isSVEBool()) +return ExprError(Diag(LLoc, diag::err_subscript_svbool_t) + << LHSExp->getSourceRange() + << RHSExp->getSourceRange()); + ResultType = BTy->getSveEltType(Context); +} else { + const VectorType *VTy = LHSTy->getAs(); + ResultType = VTy->getElementType(); } -VK = LHSExp->getValueKind(); -if (VK != VK_PRValue) - OK = OK_VectorComponent; - -ResultType = VTy->getElementType(); -QualType BaseType = BaseExpr->getType(); -Qualifiers BaseQuals = BaseType.getQualifiers(); -Qualifiers MemberQuals = ResultType.getQualifiers(); -Qualifiers Combined = BaseQuals + MemberQuals; -if (Combined != MemberQuals) - ResultType = Context.getQualifiedType(ResultType, Combined); - } else if (LHSTy->isBuiltinType() && - LHSTy->getAs()->isSveVLSBuiltinType()) { -const BuiltinType *BTy = LHSTy->getAs(); -if (BTy->isSVEBool()) - return ExprError(Diag(LLoc, diag::err_subscript_svbool_t) - << LHSExp->getSourceRange() << RHSExp->getSourceRange()); - -BaseExpr = LHSExp; +BaseExpr = LHSExp; // vectors: V[123] IndexExpr = RHSExp; +// We apply C++ DR1213 to vector subscripting too. if (getLangOpts().CPlusPlus11 && LHSExp->isPRValue()) { ExprResult Materialized = TemporaryMaterializationConversion(LHSExp); if (Materialized.isInvalid()) @@ -5323,8 +5309,6 @@ Sema::CreateBuiltinArraySubscriptExpr(Expr *Base, SourceLocation LLoc, if (VK != VK_PRValue) OK = OK_VectorComponent; -ResultType = BTy->getSveEltType(Context); - QualType BaseType = BaseExpr->getType(); Qualifiers BaseQuals = BaseType.getQualifiers(); Qualifiers MemberQuals = ResultType.getQualifiers(); ___ cfe-commits mailing list cfe-commits@lists.llvm.org https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits
[clang] [Clang][AArch64][SVE] Allow write to SVE vector elements using the subscript operator (PR #91965)
https://github.com/momchil-velikov updated https://github.com/llvm/llvm-project/pull/91965 >From 435f3104e68ef278196417c293093131258c549d Mon Sep 17 00:00:00 2001 From: Momchil Velikov Date: Mon, 20 May 2024 15:43:31 +0100 Subject: [PATCH 1/3] [Clang][Sema] Refactor handling of vector subscript expressions (NFC) Change-Id: I514431a482ffa0a2d906c019b6e374bf4607571e --- clang/include/clang/AST/Type.h | 5 clang/lib/Sema/SemaExpr.cpp| 44 +++--- 2 files changed, 19 insertions(+), 30 deletions(-) diff --git a/clang/include/clang/AST/Type.h b/clang/include/clang/AST/Type.h index da3834f19ca04..9a5c6e8d562c3 100644 --- a/clang/include/clang/AST/Type.h +++ b/clang/include/clang/AST/Type.h @@ -2523,6 +2523,7 @@ class alignas(TypeAlignment) Type : public ExtQualsTypeCommonBase { bool isVectorType() const;// GCC vector type. bool isExtVectorType() const; // Extended vector type. bool isExtVectorBoolType() const; // Extended vector type with bool element. + bool isSubscriptableVectorType() const; bool isMatrixType() const;// Matrix type. bool isConstantMatrixType() const;// Constant matrix type. bool isDependentAddressSpaceType() const; // value-dependent address space qualifier @@ -7729,6 +7730,10 @@ inline bool Type::isExtVectorBoolType() const { return cast(CanonicalType)->getElementType()->isBooleanType(); } +inline bool Type::isSubscriptableVectorType() const { + return isVectorType() || isSveVLSBuiltinType(); +} + inline bool Type::isMatrixType() const { return isa(CanonicalType); } diff --git a/clang/lib/Sema/SemaExpr.cpp b/clang/lib/Sema/SemaExpr.cpp index 5ecfdee21f09d..c86f1d9c8076e 100644 --- a/clang/lib/Sema/SemaExpr.cpp +++ b/clang/lib/Sema/SemaExpr.cpp @@ -5283,36 +5283,22 @@ Sema::CreateBuiltinArraySubscriptExpr(Expr *Base, SourceLocation LLoc, << ResultType << BaseExpr->getSourceRange(); return ExprError(); } - } else if (const VectorType *VTy = LHSTy->getAs()) { -BaseExpr = LHSExp;// vectors: V[123] -IndexExpr = RHSExp; -// We apply C++ DR1213 to vector subscripting too. -if (getLangOpts().CPlusPlus11 && LHSExp->isPRValue()) { - ExprResult Materialized = TemporaryMaterializationConversion(LHSExp); - if (Materialized.isInvalid()) -return ExprError(); - LHSExp = Materialized.get(); + } else if (LHSTy->isSubscriptableVectorType()) { +if (LHSTy->isBuiltinType() && +LHSTy->getAs()->isSveVLSBuiltinType()) { + const BuiltinType *BTy = LHSTy->getAs(); + if (BTy->isSVEBool()) +return ExprError(Diag(LLoc, diag::err_subscript_svbool_t) + << LHSExp->getSourceRange() + << RHSExp->getSourceRange()); + ResultType = BTy->getSveEltType(Context); +} else { + const VectorType *VTy = LHSTy->getAs(); + ResultType = VTy->getElementType(); } -VK = LHSExp->getValueKind(); -if (VK != VK_PRValue) - OK = OK_VectorComponent; - -ResultType = VTy->getElementType(); -QualType BaseType = BaseExpr->getType(); -Qualifiers BaseQuals = BaseType.getQualifiers(); -Qualifiers MemberQuals = ResultType.getQualifiers(); -Qualifiers Combined = BaseQuals + MemberQuals; -if (Combined != MemberQuals) - ResultType = Context.getQualifiedType(ResultType, Combined); - } else if (LHSTy->isBuiltinType() && - LHSTy->getAs()->isSveVLSBuiltinType()) { -const BuiltinType *BTy = LHSTy->getAs(); -if (BTy->isSVEBool()) - return ExprError(Diag(LLoc, diag::err_subscript_svbool_t) - << LHSExp->getSourceRange() << RHSExp->getSourceRange()); - -BaseExpr = LHSExp; +BaseExpr = LHSExp; // vectors: V[123] IndexExpr = RHSExp; +// We apply C++ DR1213 to vector subscripting too. if (getLangOpts().CPlusPlus11 && LHSExp->isPRValue()) { ExprResult Materialized = TemporaryMaterializationConversion(LHSExp); if (Materialized.isInvalid()) @@ -5323,8 +5309,6 @@ Sema::CreateBuiltinArraySubscriptExpr(Expr *Base, SourceLocation LLoc, if (VK != VK_PRValue) OK = OK_VectorComponent; -ResultType = BTy->getSveEltType(Context); - QualType BaseType = BaseExpr->getType(); Qualifiers BaseQuals = BaseType.getQualifiers(); Qualifiers MemberQuals = ResultType.getQualifiers(); >From 7fc3ff1758fa424bdbea3c847aede260f7598814 Mon Sep 17 00:00:00 2001 From: Momchil Velikov Date: Mon, 20 May 2024 16:25:43 +0100 Subject: [PATCH 2/3] [Clang][AArch64][SVE] Allow write to SVE vector elements using the subscript operator The patch at https://reviews.llvm.org/D122732 introduced using the array subscript operator for SVE vectors, however it also causes an ICE when the subscripting expression is used as an lvalue. This patches fixes the error. Lvalue subscripting expressions are emitted as LLVM IR `insertvector`.
[clang] [llvm] [CLANG][LLVM][AArch64]SME2.1 intrinsics for MOVAZ tile to 2/4 vectors (PR #88710)
@@ -2939,59 +2922,18 @@ MachineBasicBlock *AArch64TargetLowering::EmitInstrWithCustomInserter( TII->get(MI.getOpcode()).TSFlags & AArch64::SMEMatrixTypeMask; switch (SMEMatrixType) { case (AArch64::SMEMatrixArray): - return EmitZAInstr(SMEOrigInstr, AArch64::ZA, MI, BB, /*HasTile*/ false, - /*HasZPROut*/ false); + return EmitZAInstr(SMEOrigInstr, AArch64::ZA, MI, BB); case (AArch64::SMEMatrixTileB): - switch (MI.getOpcode()) { - case AArch64::MOVAZ_2ZMI_H_B_PSEUDO: - case AArch64::MOVAZ_2ZMI_V_B_PSEUDO: - case AArch64::MOVAZ_4ZMI_H_B_PSEUDO: - case AArch64::MOVAZ_4ZMI_V_B_PSEUDO: -return EmitZAInstr(SMEOrigInstr, AArch64::ZAB0, MI, BB, - /*HasTile*/ true, /*HasZPROut*/ true); - default: -return EmitZAInstr(SMEOrigInstr, AArch64::ZAB0, MI, BB, - /*HasTile*/ true, /*HasZPROut*/ false); - } + return EmitZAInstr(SMEOrigInstr, AArch64::ZAB0, MI, BB); case (AArch64::SMEMatrixTileH): - switch (MI.getOpcode()) { - case AArch64::MOVAZ_2ZMI_H_H_PSEUDO: - case AArch64::MOVAZ_2ZMI_V_H_PSEUDO: - case AArch64::MOVAZ_4ZMI_H_H_PSEUDO: - case AArch64::MOVAZ_4ZMI_V_H_PSEUDO: -return EmitZAInstr(SMEOrigInstr, AArch64::ZAH0, MI, BB, - /*HasTile*/ true, /*HasZPROut*/ true); - default: -return EmitZAInstr(SMEOrigInstr, AArch64::ZAH0, MI, BB, - /*HasTile*/ true, /*HasZPROut*/ false); - } + return EmitZAInstr(SMEOrigInstr, AArch64::ZAH0, MI, BB); + ///*HasTile*/ true, /*HasZPROut*/ false); momchil-velikov wrote: Stray comment. https://github.com/llvm/llvm-project/pull/88710 ___ cfe-commits mailing list cfe-commits@lists.llvm.org https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits
[clang] [llvm] [CLANG][LLVM][AArch64]SME2.1 intrinsics for MOVAZ tile to 2/4 vectors (PR #88710)
momchil-velikov wrote: ``` if (HasTile) { MIB.addReg(BaseReg + MI.getOperand(0).getImm(), RegState::Define); MIB.addReg(BaseReg + MI.getOperand(0).getImm()); StartIdx = 1; } else MIB.addReg(BaseReg, RegState::Define).addReg(BaseReg); } ``` Needs extra braces around the `else` clause, https://llvm.org/docs/CodingStandards.html#don-t-use-braces-on-simple-single-statement-bodies-of-if-else-loop-statements cf. ``` // Use braces for the `if` block to keep it uniform with the `else` block. if (isa(D)) { handleFunctionDecl(D); } else { // In this `else` case, it is necessary that we explain the situation with // this surprisingly long comment, so it would be unclear without the braces // whether the following statement is in the scope of the `if`. handleOtherDecl(D); } ``` https://github.com/llvm/llvm-project/pull/88710 ___ cfe-commits mailing list cfe-commits@lists.llvm.org https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits
[clang] [llvm] [CLANG][LLVM][AArch64]SME2.1 intrinsics for MOVAZ tile to 2/4 vectors (PR #88710)
@@ -2883,19 +2883,28 @@ MachineBasicBlock *AArch64TargetLowering::EmitZTInstr(MachineInstr , MachineBasicBlock * AArch64TargetLowering::EmitZAInstr(unsigned Opc, unsigned BaseReg, - MachineInstr , - MachineBasicBlock *BB, bool HasTile) const { + MachineInstr , MachineBasicBlock *BB, + bool HasTile, bool HasZPROut) const { const TargetInstrInfo *TII = Subtarget->getInstrInfo(); MachineInstrBuilder MIB = BuildMI(*BB, MI, MI.getDebugLoc(), TII->get(Opc)); unsigned StartIdx = 0; - if (HasTile) { -MIB.addReg(BaseReg + MI.getOperand(0).getImm(), RegState::Define); -MIB.addReg(BaseReg + MI.getOperand(0).getImm()); -StartIdx = 1; - } else -MIB.addReg(BaseReg, RegState::Define).addReg(BaseReg); - + if (HasZPROut) { momchil-velikov wrote: Looks good with the last change. Still can further simplify and make it more readable like in the snippet above. https://github.com/llvm/llvm-project/pull/88710 ___ cfe-commits mailing list cfe-commits@lists.llvm.org https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits
[clang] [llvm] [AArch64] Add intrinsics for multi-vector to ZA array vector accumulators (PR #91606)
@@ -0,0 +1,29 @@ +// RUN: %clang_cc1 -triple aarch64-none-linux-gnu -target-feature +sme -verify -emit-llvm %s momchil-velikov wrote: Thanks! https://github.com/llvm/llvm-project/pull/91606 ___ cfe-commits mailing list cfe-commits@lists.llvm.org https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits
[clang] [llvm] [AArch64] Add intrinsics for multi-vector to ZA array vector accumulators (PR #91606)
https://github.com/momchil-velikov closed https://github.com/llvm/llvm-project/pull/91606 ___ cfe-commits mailing list cfe-commits@lists.llvm.org https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits
[clang] [Clang][AArch64][SVE] Allow write to SVE vector elements using the subscript operator (PR #91965)
@@ -88,3 +88,13 @@ float subscript_float32(svfloat32_t a, size_t b) { double subscript_float64(svfloat64_t a, size_t b) { return a[b]; } + +// CHECK-LABEL: @subscript_write_float32( +// CHECK-NEXT: entry: +// CHECK-NEXT:[[VECINS:%.*]] = insertelement [[A:%.*]], float 1.00e+00, i64 [[B:%.*]] +// CHECK-NEXT:ret [[VECINS]] +// +svfloat32_t subscript_write_float32(svfloat32_t a, size_t b) { + a[b] = 1.0f; momchil-velikov wrote: Done. https://github.com/llvm/llvm-project/pull/91965 ___ cfe-commits mailing list cfe-commits@lists.llvm.org https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits
[clang] [Clang][AArch64][SVE] Allow write to SVE vector elements using the subscript operator (PR #91965)
https://github.com/momchil-velikov updated https://github.com/llvm/llvm-project/pull/91965 >From fd4a31c1eb48db410f5445f45243dfbc1d9d22ab Mon Sep 17 00:00:00 2001 From: Momchil Velikov Date: Mon, 13 May 2024 14:27:51 +0100 Subject: [PATCH 1/2] [Clang][AArch64][SVE] Allow write to SVE vector elements using the subscript operator The patch at https://reviews.llvm.org/D122732 introduced using the array subscript operator for SVE vectors, however it also causes an ICE when the subscripting expression is used as an lvalue. This patches fixes the error. Lvalue subscripting expressions are emitted as LLVM IR `insertvector`. --- clang/lib/CodeGen/CGExpr.cpp | 6 -- clang/lib/Sema/SemaExpr.cpp | 4 +++- clang/test/CodeGen/aarch64-sve-vector-subscript-ops.c | 10 ++ 3 files changed, 17 insertions(+), 3 deletions(-) diff --git a/clang/lib/CodeGen/CGExpr.cpp b/clang/lib/CodeGen/CGExpr.cpp index d96c7bb1e5682..37b8b723937b7 100644 --- a/clang/lib/CodeGen/CGExpr.cpp +++ b/clang/lib/CodeGen/CGExpr.cpp @@ -4180,8 +4180,10 @@ LValue CodeGenFunction::EmitArraySubscriptExpr(const ArraySubscriptExpr *E, // If the base is a vector type, then we are forming a vector element lvalue // with this subscript. - if (E->getBase()->getType()->isVectorType() && - !isa(E->getBase())) { + if (QualType BaseTy = E->getBase()->getType(); + (BaseTy->isVectorType() && !isa(E->getBase())) || + (BaseTy->isBuiltinType() && + BaseTy->getAs()->isSveVLSBuiltinType())) { // Emit the vector as an lvalue to get its address. LValue LHS = EmitLValue(E->getBase()); auto *Idx = EmitIdxAfterBase(/*Promote*/false); diff --git a/clang/lib/Sema/SemaExpr.cpp b/clang/lib/Sema/SemaExpr.cpp index e0aae6333e1a1..f3983a3cbefb1 100644 --- a/clang/lib/Sema/SemaExpr.cpp +++ b/clang/lib/Sema/SemaExpr.cpp @@ -5227,7 +5227,9 @@ Sema::CreateBuiltinArraySubscriptExpr(Expr *Base, SourceLocation LLoc, } // Perform default conversions. - if (!LHSExp->getType()->getAs()) { + if (!LHSExp->getType()->getAs() && + !(LHSExp->getType()->isBuiltinType() && +LHSExp->getType()->getAs()->isSveVLSBuiltinType())) { ExprResult Result = DefaultFunctionArrayLvalueConversion(LHSExp); if (Result.isInvalid()) return ExprError(); diff --git a/clang/test/CodeGen/aarch64-sve-vector-subscript-ops.c b/clang/test/CodeGen/aarch64-sve-vector-subscript-ops.c index fb60c6d100ce6..634423765c4c3 100644 --- a/clang/test/CodeGen/aarch64-sve-vector-subscript-ops.c +++ b/clang/test/CodeGen/aarch64-sve-vector-subscript-ops.c @@ -88,3 +88,13 @@ float subscript_float32(svfloat32_t a, size_t b) { double subscript_float64(svfloat64_t a, size_t b) { return a[b]; } + +// CHECK-LABEL: @subscript_write_float32( +// CHECK-NEXT: entry: +// CHECK-NEXT:[[VECINS:%.*]] = insertelement [[A:%.*]], float 1.00e+00, i64 [[B:%.*]] +// CHECK-NEXT:ret [[VECINS]] +// +svfloat32_t subscript_write_float32(svfloat32_t a, size_t b) { + a[b] = 1.0f; + return a; +} >From fec051ff91df9cc8fca4d0571fe77a18cfb58072 Mon Sep 17 00:00:00 2001 From: Momchil Velikov Date: Fri, 17 May 2024 13:20:18 +0100 Subject: [PATCH 2/2] [fixiup] Add a test using compound assignment operator Change-Id: I81e1fd4f23eb65a96e71015de7a4562fcbc53c0f --- .../test/CodeGen/aarch64-sve-vector-subscript-ops.c | 12 1 file changed, 12 insertions(+) diff --git a/clang/test/CodeGen/aarch64-sve-vector-subscript-ops.c b/clang/test/CodeGen/aarch64-sve-vector-subscript-ops.c index 634423765c4c3..52a05d010de9b 100644 --- a/clang/test/CodeGen/aarch64-sve-vector-subscript-ops.c +++ b/clang/test/CodeGen/aarch64-sve-vector-subscript-ops.c @@ -98,3 +98,15 @@ svfloat32_t subscript_write_float32(svfloat32_t a, size_t b) { a[b] = 1.0f; return a; } + +// CHECK-LABEL: @subscript_read_write_float32( +// CHECK-NEXT: entry: +// CHECK-NEXT:[[VECEXT:%.*]] = extractelement [[A:%.*]], i64 [[B:%.*]] +// CHECK-NEXT:[[ADD:%.*]] = fadd float [[VECEXT]], 1.00e+00 +// CHECK-NEXT:[[VECINS:%.*]] = insertelement [[A]], float [[ADD]], i64 [[B]] +// CHECK-NEXT:ret [[VECINS]] +// +svfloat32_t subscript_read_write_float32(svfloat32_t a, size_t b) { + a[b] += 1.0f; + return a; +} ___ cfe-commits mailing list cfe-commits@lists.llvm.org https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits
[clang] [Clang][AArch64][SVE] Allow write to SVE vector elements using the subscript operator (PR #91965)
@@ -4180,8 +4180,10 @@ LValue CodeGenFunction::EmitArraySubscriptExpr(const ArraySubscriptExpr *E, // If the base is a vector type, then we are forming a vector element lvalue // with this subscript. - if (E->getBase()->getType()->isVectorType() && - !isa(E->getBase())) { + if (QualType BaseTy = E->getBase()->getType(); + (BaseTy->isVectorType() && !isa(E->getBase())) || + (BaseTy->isBuiltinType() && + BaseTy->getAs()->isSveVLSBuiltinType())) { momchil-velikov wrote: AFAICT here https://github.com/llvm/llvm-project/blob/371eccd5dfed88c8e76449233d8388c12be3464b/clang/lib/Sema/SemaExpr.cpp#L5307 we are enabling array subscripts for SVE only. Perhaps we can be generalised to any size-less vector type, in a followup patch, @jacquesguan , what do you think? https://github.com/llvm/llvm-project/pull/91965 ___ cfe-commits mailing list cfe-commits@lists.llvm.org https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits
[clang] [Clang][AArch64][SVE] Allow write to SVE vector elements using the subscript operator (PR #91965)
https://github.com/momchil-velikov updated https://github.com/llvm/llvm-project/pull/91965 >From 2e081d74e87ad14fdf6d950d3e3da6bed07ee723 Mon Sep 17 00:00:00 2001 From: Momchil Velikov Date: Mon, 13 May 2024 14:27:51 +0100 Subject: [PATCH] [Clang][AArch64][SVE] Allow write to SVE vector elements using the subscript operator The patch at https://reviews.llvm.org/D122732 introduced using the array subscript operator for SVE vectors, however it also causes an ICE when the subscripting expression is used as an lvalue. This patches fixes the error. Lvalue subscripting expressions are emitted as LLVM IR `insertvector`. --- clang/lib/CodeGen/CGExpr.cpp | 6 -- clang/lib/Sema/SemaExpr.cpp | 4 +++- clang/test/CodeGen/aarch64-sve-vector-subscript-ops.c | 10 ++ 3 files changed, 17 insertions(+), 3 deletions(-) diff --git a/clang/lib/CodeGen/CGExpr.cpp b/clang/lib/CodeGen/CGExpr.cpp index d96c7bb1e5682..37b8b723937b7 100644 --- a/clang/lib/CodeGen/CGExpr.cpp +++ b/clang/lib/CodeGen/CGExpr.cpp @@ -4180,8 +4180,10 @@ LValue CodeGenFunction::EmitArraySubscriptExpr(const ArraySubscriptExpr *E, // If the base is a vector type, then we are forming a vector element lvalue // with this subscript. - if (E->getBase()->getType()->isVectorType() && - !isa(E->getBase())) { + if (QualType BaseTy = E->getBase()->getType(); + (BaseTy->isVectorType() && !isa(E->getBase())) || + (BaseTy->isBuiltinType() && + BaseTy->getAs()->isSveVLSBuiltinType())) { // Emit the vector as an lvalue to get its address. LValue LHS = EmitLValue(E->getBase()); auto *Idx = EmitIdxAfterBase(/*Promote*/false); diff --git a/clang/lib/Sema/SemaExpr.cpp b/clang/lib/Sema/SemaExpr.cpp index bb4b116fd73ca..fd16be30bd848 100644 --- a/clang/lib/Sema/SemaExpr.cpp +++ b/clang/lib/Sema/SemaExpr.cpp @@ -5383,7 +5383,9 @@ Sema::CreateBuiltinArraySubscriptExpr(Expr *Base, SourceLocation LLoc, } // Perform default conversions. - if (!LHSExp->getType()->getAs()) { + if (!LHSExp->getType()->getAs() && + !(LHSExp->getType()->isBuiltinType() && +LHSExp->getType()->getAs()->isSveVLSBuiltinType())) { ExprResult Result = DefaultFunctionArrayLvalueConversion(LHSExp); if (Result.isInvalid()) return ExprError(); diff --git a/clang/test/CodeGen/aarch64-sve-vector-subscript-ops.c b/clang/test/CodeGen/aarch64-sve-vector-subscript-ops.c index fb60c6d100ce6..634423765c4c3 100644 --- a/clang/test/CodeGen/aarch64-sve-vector-subscript-ops.c +++ b/clang/test/CodeGen/aarch64-sve-vector-subscript-ops.c @@ -88,3 +88,13 @@ float subscript_float32(svfloat32_t a, size_t b) { double subscript_float64(svfloat64_t a, size_t b) { return a[b]; } + +// CHECK-LABEL: @subscript_write_float32( +// CHECK-NEXT: entry: +// CHECK-NEXT:[[VECINS:%.*]] = insertelement [[A:%.*]], float 1.00e+00, i64 [[B:%.*]] +// CHECK-NEXT:ret [[VECINS]] +// +svfloat32_t subscript_write_float32(svfloat32_t a, size_t b) { + a[b] = 1.0f; + return a; +} ___ cfe-commits mailing list cfe-commits@lists.llvm.org https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits
[clang] [Clang][AArch64][SVE] Allow write to SVE vector elements using the subscript operator (PR #91965)
https://github.com/momchil-velikov created https://github.com/llvm/llvm-project/pull/91965 The patch at https://reviews.llvm.org/D122732 introduced using the array subscript operator for SVE vectors, however it also causes an ICE when the subscripting expression is used as an lvalue. This patches fixes the error. Lvalue subscripting expressions are emitted as LLVM IR `insertelement`. >From 2e081d74e87ad14fdf6d950d3e3da6bed07ee723 Mon Sep 17 00:00:00 2001 From: Momchil Velikov Date: Mon, 13 May 2024 14:27:51 +0100 Subject: [PATCH] [Clang][AArch64][SVE] Allow write to SVE vector elements using the subscript operator The patch at https://reviews.llvm.org/D122732 introduced using the array subscript operator for SVE vectors, however it also causes an ICE when the subscripting expression is used as an lvalue. This patches fixes the error. Lvalue subscripting expressions are emitted as LLVM IR `insertvector`. --- clang/lib/CodeGen/CGExpr.cpp | 6 -- clang/lib/Sema/SemaExpr.cpp | 4 +++- clang/test/CodeGen/aarch64-sve-vector-subscript-ops.c | 10 ++ 3 files changed, 17 insertions(+), 3 deletions(-) diff --git a/clang/lib/CodeGen/CGExpr.cpp b/clang/lib/CodeGen/CGExpr.cpp index d96c7bb1e5682..37b8b723937b7 100644 --- a/clang/lib/CodeGen/CGExpr.cpp +++ b/clang/lib/CodeGen/CGExpr.cpp @@ -4180,8 +4180,10 @@ LValue CodeGenFunction::EmitArraySubscriptExpr(const ArraySubscriptExpr *E, // If the base is a vector type, then we are forming a vector element lvalue // with this subscript. - if (E->getBase()->getType()->isVectorType() && - !isa(E->getBase())) { + if (QualType BaseTy = E->getBase()->getType(); + (BaseTy->isVectorType() && !isa(E->getBase())) || + (BaseTy->isBuiltinType() && + BaseTy->getAs()->isSveVLSBuiltinType())) { // Emit the vector as an lvalue to get its address. LValue LHS = EmitLValue(E->getBase()); auto *Idx = EmitIdxAfterBase(/*Promote*/false); diff --git a/clang/lib/Sema/SemaExpr.cpp b/clang/lib/Sema/SemaExpr.cpp index bb4b116fd73ca..fd16be30bd848 100644 --- a/clang/lib/Sema/SemaExpr.cpp +++ b/clang/lib/Sema/SemaExpr.cpp @@ -5383,7 +5383,9 @@ Sema::CreateBuiltinArraySubscriptExpr(Expr *Base, SourceLocation LLoc, } // Perform default conversions. - if (!LHSExp->getType()->getAs()) { + if (!LHSExp->getType()->getAs() && + !(LHSExp->getType()->isBuiltinType() && +LHSExp->getType()->getAs()->isSveVLSBuiltinType())) { ExprResult Result = DefaultFunctionArrayLvalueConversion(LHSExp); if (Result.isInvalid()) return ExprError(); diff --git a/clang/test/CodeGen/aarch64-sve-vector-subscript-ops.c b/clang/test/CodeGen/aarch64-sve-vector-subscript-ops.c index fb60c6d100ce6..634423765c4c3 100644 --- a/clang/test/CodeGen/aarch64-sve-vector-subscript-ops.c +++ b/clang/test/CodeGen/aarch64-sve-vector-subscript-ops.c @@ -88,3 +88,13 @@ float subscript_float32(svfloat32_t a, size_t b) { double subscript_float64(svfloat64_t a, size_t b) { return a[b]; } + +// CHECK-LABEL: @subscript_write_float32( +// CHECK-NEXT: entry: +// CHECK-NEXT:[[VECINS:%.*]] = insertelement [[A:%.*]], float 1.00e+00, i64 [[B:%.*]] +// CHECK-NEXT:ret [[VECINS]] +// +svfloat32_t subscript_write_float32(svfloat32_t a, size_t b) { + a[b] = 1.0f; + return a; +} ___ cfe-commits mailing list cfe-commits@lists.llvm.org https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits
[clang] [llvm] [AArch64] Add intrinsics for multi-vector to ZA array vector accumulators (PR #91606)
https://github.com/momchil-velikov updated https://github.com/llvm/llvm-project/pull/91606 >From 43fb20b7492307740c437e85c3f73af068d093cf Mon Sep 17 00:00:00 2001 From: Momchil Velikov Date: Thu, 9 May 2024 15:56:31 +0100 Subject: [PATCH] [AArch64] Add intrinsics for multi-vector to ZA array vector accumulators (#88266) According to the specification in https://github.com/ARM-software/acle/pull/309 this adds the intrinsics void_svadd_za16_vg1x2_f16(uint32_t slice, svfloat16x2_t zn) __arm_streaming __arm_inout("za"); void_svadd_za16_vg1x4_f16(uint32_t slice, svfloat16x4_t zn) __arm_streaming __arm_inout("za"); void_svsub_za16_vg1x2_f16(uint32_t slice, svfloat16x2_t zn) __arm_streaming __arm_inout("za"); void_svsub_za16_vg1x4_f16(uint32_t slice, svfloat16x4_t zn) __arm_streaming __arm_inout("za"); as well as the corresponding `bf16` variants. --- clang/include/clang/Basic/arm_sme.td | 10 + .../acle_sme2_add_sub_za16.c | 193 ++ .../acle_sme2_add_sub_za16.c | 29 +++ llvm/include/llvm/IR/IntrinsicsAArch64.td | 2 +- .../lib/Target/AArch64/AArch64SMEInstrInfo.td | 16 +- .../AArch64/sme2-intrinsics-add-sub-za16.ll | 148 ++ 6 files changed, 389 insertions(+), 9 deletions(-) create mode 100644 clang/test/CodeGen/aarch64-sme2-intrinsics/acle_sme2_add_sub_za16.c create mode 100644 clang/test/Sema/aarch64-sme2-intrinsics/acle_sme2_add_sub_za16.c create mode 100644 llvm/test/CodeGen/AArch64/sme2-intrinsics-add-sub-za16.ll diff --git a/clang/include/clang/Basic/arm_sme.td b/clang/include/clang/Basic/arm_sme.td index 7808ee559932e..80e635e4a57ec 100644 --- a/clang/include/clang/Basic/arm_sme.td +++ b/clang/include/clang/Basic/arm_sme.td @@ -298,6 +298,16 @@ multiclass ZAAddSub { def NAME # _ZA64_VG1X2_F64 : Inst<"sv" # n_suffix # "_za64[_{d}]_vg1x2", "vm2", "d", MergeNone, "aarch64_sme_" # n_suffix # "_za64_vg1x2", [IsStreaming, IsInOutZA], []>; def NAME # _ZA64_VG1X4_F64 : Inst<"sv" # n_suffix # "_za64[_{d}]_vg1x4", "vm4", "d", MergeNone, "aarch64_sme_" # n_suffix # "_za64_vg1x4", [IsStreaming, IsInOutZA], []>; } + + let TargetGuard = "sme-f16f16|sme-f8f16" in { +def NAME # _ZA16_VG1X2_F16 : Inst<"sv" # n_suffix # "_za16[_{d}]_vg1x2", "vm2", "h", MergeNone, "aarch64_sme_" # n_suffix # "_za16_vg1x2", [IsStreaming, IsInOutZA], []>; +def NAME # _ZA16_VG1X4_F16 : Inst<"sv" # n_suffix # "_za16[_{d}]_vg1x4", "vm4", "h", MergeNone, "aarch64_sme_" # n_suffix # "_za16_vg1x4", [IsStreaming, IsInOutZA], []>; + } + + let TargetGuard = "sme2,b16b16" in { +def NAME # _ZA16_VG1X2_BF16 : Inst<"sv" # n_suffix # "_za16[_{d}]_vg1x2", "vm2", "b", MergeNone, "aarch64_sme_" # n_suffix # "_za16_vg1x2", [IsStreaming, IsInOutZA], []>; +def NAME # _ZA16_VG1X4_BF16 : Inst<"sv" # n_suffix # "_za16[_{d}]_vg1x4", "vm4", "b", MergeNone, "aarch64_sme_" # n_suffix # "_za16_vg1x4", [IsStreaming, IsInOutZA], []>; + } } defm SVADD : ZAAddSub<"add">; diff --git a/clang/test/CodeGen/aarch64-sme2-intrinsics/acle_sme2_add_sub_za16.c b/clang/test/CodeGen/aarch64-sme2-intrinsics/acle_sme2_add_sub_za16.c new file mode 100644 index 0..9a8aa448d3780 --- /dev/null +++ b/clang/test/CodeGen/aarch64-sme2-intrinsics/acle_sme2_add_sub_za16.c @@ -0,0 +1,193 @@ +// NOTE: Assertions have been autogenerated by utils/update_cc_test_checks.py UTC_ARGS: --version 4 +// RUN: %clang_cc1 -fclang-abi-compat=latest -triple aarch64-none-linux-gnu -target-feature +sme2 -target-feature +sme-f16f16 -target-feature +b16b16 -O2 -Werror -Wall -emit-llvm -o - %s | FileCheck %s +// RUN: %clang_cc1-x c++ -fclang-abi-compat=latest -triple aarch64-none-linux-gnu -target-feature +sme2 -target-feature +sme-f8f16 -target-feature +b16b16 -O2 -Werror -Wall -emit-llvm -o - %s | FileCheck %s -check-prefix CHECK-CXX +// RUN: %clang_cc1 -DSVE_OVERLOADED_FORMS-fclang-abi-compat=latest -triple aarch64-none-linux-gnu -target-feature +sme2 -target-feature +sme-f8f16 -target-feature +b16b16 -O2 -Werror -Wall -emit-llvm -o - %s | FileCheck %s +// RUN: %clang_cc1 -DSVE_OVERLOADED_FORMS -x c++ -fclang-abi-compat=latest -triple aarch64-none-linux-gnu -target-feature +sme2 -target-feature +sme-f16f16 -target-feature +b16b16 -O2 -Werror -Wall -emit-llvm -o - %s | FileCheck %s -check-prefix CHECK-CXX + +// RUN: %clang_cc1 -fclang-abi-compat=latest -triple aarch64-none-linux-gnu -target-feature +sme2 -target-feature +sme-f16f16 -target-feature +b16b16 -O2 -S -Werror -Wall -o /dev/null + +// REQUIRES: aarch64-registered-target + +#include + +#ifdef SVE_OVERLOADED_FORMS +#define SVE_ACLE_FUNC(A1,A2_UNUSED,A3) A1##A3 +#else +#define SVE_ACLE_FUNC(A1,A2,A3) A1##A2##A3 +#endif + +// CHECK-LABEL: define dso_local void @test_svadd_za16_vg1x2_f16( +// CHECK-SAME: i32 noundef [[SLICE:%.*]], [[ZN:%.*]]) local_unnamed_addr #[[ATTR0:[0-9]+]] { +//
[clang] [llvm] [AArch64] Add intrinsics for multi-vector to ZA array vector accumulators (PR #91606)
https://github.com/momchil-velikov updated https://github.com/llvm/llvm-project/pull/91606 >From d3e381ac645d08b6f3b01283d47344556a163605 Mon Sep 17 00:00:00 2001 From: Momchil Velikov Date: Thu, 9 May 2024 15:56:31 +0100 Subject: [PATCH] [AArch64] Add intrinsics for multi-vector to ZA array vector accumulators (#88266) According to the specification in https://github.com/ARM-software/acle/pull/309 this adds the intrinsics void_svadd_za16_vg1x2_f16(uint32_t slice, svfloat16x2_t zn) __arm_streaming __arm_inout("za"); void_svadd_za16_vg1x4_f16(uint32_t slice, svfloat16x4_t zn) __arm_streaming __arm_inout("za"); void_svsub_za16_vg1x2_f16(uint32_t slice, svfloat16x2_t zn) __arm_streaming __arm_inout("za"); void_svsub_za16_vg1x4_f16(uint32_t slice, svfloat16x4_t zn) __arm_streaming __arm_inout("za"); as well as the corresponding `bf16` variants. --- clang/include/clang/Basic/arm_sme.td | 10 + .../acle_sme2_add_sub_za16.c | 193 ++ .../acle_sme2_add_sub_za16.c | 29 +++ llvm/include/llvm/IR/IntrinsicsAArch64.td | 2 +- .../lib/Target/AArch64/AArch64SMEInstrInfo.td | 16 +- .../AArch64/sme2-intrinsics-add-sub-za16.ll | 148 ++ 6 files changed, 389 insertions(+), 9 deletions(-) create mode 100644 clang/test/CodeGen/aarch64-sme2-intrinsics/acle_sme2_add_sub_za16.c create mode 100644 clang/test/Sema/aarch64-sme2-intrinsics/acle_sme2_add_sub_za16.c create mode 100644 llvm/test/CodeGen/AArch64/sme2-intrinsics-add-sub-za16.ll diff --git a/clang/include/clang/Basic/arm_sme.td b/clang/include/clang/Basic/arm_sme.td index 7808ee559932e..80e635e4a57ec 100644 --- a/clang/include/clang/Basic/arm_sme.td +++ b/clang/include/clang/Basic/arm_sme.td @@ -298,6 +298,16 @@ multiclass ZAAddSub { def NAME # _ZA64_VG1X2_F64 : Inst<"sv" # n_suffix # "_za64[_{d}]_vg1x2", "vm2", "d", MergeNone, "aarch64_sme_" # n_suffix # "_za64_vg1x2", [IsStreaming, IsInOutZA], []>; def NAME # _ZA64_VG1X4_F64 : Inst<"sv" # n_suffix # "_za64[_{d}]_vg1x4", "vm4", "d", MergeNone, "aarch64_sme_" # n_suffix # "_za64_vg1x4", [IsStreaming, IsInOutZA], []>; } + + let TargetGuard = "sme-f16f16|sme-f8f16" in { +def NAME # _ZA16_VG1X2_F16 : Inst<"sv" # n_suffix # "_za16[_{d}]_vg1x2", "vm2", "h", MergeNone, "aarch64_sme_" # n_suffix # "_za16_vg1x2", [IsStreaming, IsInOutZA], []>; +def NAME # _ZA16_VG1X4_F16 : Inst<"sv" # n_suffix # "_za16[_{d}]_vg1x4", "vm4", "h", MergeNone, "aarch64_sme_" # n_suffix # "_za16_vg1x4", [IsStreaming, IsInOutZA], []>; + } + + let TargetGuard = "sme2,b16b16" in { +def NAME # _ZA16_VG1X2_BF16 : Inst<"sv" # n_suffix # "_za16[_{d}]_vg1x2", "vm2", "b", MergeNone, "aarch64_sme_" # n_suffix # "_za16_vg1x2", [IsStreaming, IsInOutZA], []>; +def NAME # _ZA16_VG1X4_BF16 : Inst<"sv" # n_suffix # "_za16[_{d}]_vg1x4", "vm4", "b", MergeNone, "aarch64_sme_" # n_suffix # "_za16_vg1x4", [IsStreaming, IsInOutZA], []>; + } } defm SVADD : ZAAddSub<"add">; diff --git a/clang/test/CodeGen/aarch64-sme2-intrinsics/acle_sme2_add_sub_za16.c b/clang/test/CodeGen/aarch64-sme2-intrinsics/acle_sme2_add_sub_za16.c new file mode 100644 index 0..9a8aa448d3780 --- /dev/null +++ b/clang/test/CodeGen/aarch64-sme2-intrinsics/acle_sme2_add_sub_za16.c @@ -0,0 +1,193 @@ +// NOTE: Assertions have been autogenerated by utils/update_cc_test_checks.py UTC_ARGS: --version 4 +// RUN: %clang_cc1 -fclang-abi-compat=latest -triple aarch64-none-linux-gnu -target-feature +sme2 -target-feature +sme-f16f16 -target-feature +b16b16 -O2 -Werror -Wall -emit-llvm -o - %s | FileCheck %s +// RUN: %clang_cc1-x c++ -fclang-abi-compat=latest -triple aarch64-none-linux-gnu -target-feature +sme2 -target-feature +sme-f8f16 -target-feature +b16b16 -O2 -Werror -Wall -emit-llvm -o - %s | FileCheck %s -check-prefix CHECK-CXX +// RUN: %clang_cc1 -DSVE_OVERLOADED_FORMS-fclang-abi-compat=latest -triple aarch64-none-linux-gnu -target-feature +sme2 -target-feature +sme-f8f16 -target-feature +b16b16 -O2 -Werror -Wall -emit-llvm -o - %s | FileCheck %s +// RUN: %clang_cc1 -DSVE_OVERLOADED_FORMS -x c++ -fclang-abi-compat=latest -triple aarch64-none-linux-gnu -target-feature +sme2 -target-feature +sme-f16f16 -target-feature +b16b16 -O2 -Werror -Wall -emit-llvm -o - %s | FileCheck %s -check-prefix CHECK-CXX + +// RUN: %clang_cc1 -fclang-abi-compat=latest -triple aarch64-none-linux-gnu -target-feature +sme2 -target-feature +sme-f16f16 -target-feature +b16b16 -O2 -S -Werror -Wall -o /dev/null + +// REQUIRES: aarch64-registered-target + +#include + +#ifdef SVE_OVERLOADED_FORMS +#define SVE_ACLE_FUNC(A1,A2_UNUSED,A3) A1##A3 +#else +#define SVE_ACLE_FUNC(A1,A2,A3) A1##A2##A3 +#endif + +// CHECK-LABEL: define dso_local void @test_svadd_za16_vg1x2_f16( +// CHECK-SAME: i32 noundef [[SLICE:%.*]], [[ZN:%.*]]) local_unnamed_addr #[[ATTR0:[0-9]+]] { +//
[clang] [llvm] [AArch64] Add intrinsics for non-widening FMOPA/FMOPS (PR #88105)
https://github.com/momchil-velikov closed https://github.com/llvm/llvm-project/pull/88105 ___ cfe-commits mailing list cfe-commits@lists.llvm.org https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits
[clang] [llvm] [AArch64] Add intrinsics for 16-bit non-widening FMLA/FMLS (PR #88553)
https://github.com/momchil-velikov closed https://github.com/llvm/llvm-project/pull/88553 ___ cfe-commits mailing list cfe-commits@lists.llvm.org https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits
[clang] [llvm] [AArch64] Add intrinsics for bfloat16 min/max/minnm/maxnm (PR #90105)
https://github.com/momchil-velikov commented: LGTM, cheers! https://github.com/llvm/llvm-project/pull/90105 ___ cfe-commits mailing list cfe-commits@lists.llvm.org https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits
[clang] [llvm] [AArch64] Add intrinsics for non-widening FMOPA/FMOPS (PR #88105)
https://github.com/momchil-velikov updated https://github.com/llvm/llvm-project/pull/88105 >From 8a63b17711d36cfeb4aab591853163119f5f167d Mon Sep 17 00:00:00 2001 From: Momchil Velikov Date: Tue, 9 Apr 2024 10:52:41 +0100 Subject: [PATCH 1/4] [AArch64] Add intrinsics for non-widening FMOPA/FMOPS According to the specification in https://github.com/ARM-software/acle/pull/309 this adds the intrinsics void svmopa_za16[_f16]_m(uint64_t tile, svbool_t pn, svbool_t pm, svfloat16_t zn, svfloat16_t zm) __arm_streaming __arm_inout("za"); void svmops_za16[_f16]_m(uint64_t tile, svbool_t pn, svbool_t pm, svfloat16_t zn, svfloat16_t zm) __arm_streaming __arm_inout("za"); as well as the corresponding `bf16` variants. --- clang/include/clang/Basic/arm_sme.td | 24 + .../acle_sme2_mopa_nonwide.c | 97 +++ .../acle_sme2_mopa_nonwide.c | 34 +++ llvm/include/llvm/IR/IntrinsicsAArch64.td | 3 + .../lib/Target/AArch64/AArch64SMEInstrInfo.td | 10 +- llvm/lib/Target/AArch64/SMEInstrFormats.td| 16 ++- .../CodeGen/AArch64/sme2-intrinsics-mopa.ll | 42 7 files changed, 219 insertions(+), 7 deletions(-) create mode 100644 clang/test/CodeGen/aarch64-sme2-intrinsics/acle_sme2_mopa_nonwide.c create mode 100644 clang/test/Sema/aarch64-sme2-intrinsics/acle_sme2_mopa_nonwide.c create mode 100644 llvm/test/CodeGen/AArch64/sme2-intrinsics-mopa.ll diff --git a/clang/include/clang/Basic/arm_sme.td b/clang/include/clang/Basic/arm_sme.td index 1ac6d5170ea2..a18a5094a15e 100644 --- a/clang/include/clang/Basic/arm_sme.td +++ b/clang/include/clang/Basic/arm_sme.td @@ -674,3 +674,27 @@ let TargetGuard = "sme2" in { def SVLUTI2_LANE_ZT_X2 : Inst<"svluti2_lane_zt_{d}_x2", "2.di[i", "cUcsUsiUibhf", MergeNone, "aarch64_sme_luti2_lane_zt_x2", [IsStreaming, IsInZT0], [ImmCheck<0, ImmCheck0_0>, ImmCheck<2, ImmCheck0_7>]>; def SVLUTI4_LANE_ZT_X2 : Inst<"svluti4_lane_zt_{d}_x2", "2.di[i", "cUcsUsiUibhf", MergeNone, "aarch64_sme_luti4_lane_zt_x2", [IsStreaming, IsInZT0], [ImmCheck<0, ImmCheck0_0>, ImmCheck<2, ImmCheck0_3>]>; } + + +// SME2p1 - FMOPA, FMOPS (non-widening) +let TargetGuard = "sme2,b16b16" in { + def SVMOPA_BF16_NW : SInst<"svmopa_za16[_bf16]_m", "viPPdd", "b", + MergeNone, "aarch64_sme_mopa", + [IsStreaming, IsInOutZA], + [ImmCheck<0, ImmCheck0_1>]>; + def SVMOPS_BF16_NW : SInst<"svmops_za16[_bf16]_m", "viPPdd", "b", + MergeNone, "aarch64_sme_mops", + [IsStreaming, IsInOutZA], + [ImmCheck<0, ImmCheck0_1>]>; +} + +let TargetGuard = "sme2p1,sme-f16f16" in { + def SVMOPA_F16_NW : SInst<"svmopa_za16[_f16]_m", "viPPdd", "h", +MergeNone, "aarch64_sme_mopa", +[IsStreaming, IsInOutZA], +[ImmCheck<0, ImmCheck0_1>]>; + def SVMOPS_F16_NW : SInst<"svmops_za16[_f16]_m", "viPPdd", "h", +MergeNone, "aarch64_sme_mops", +[IsStreaming, IsInOutZA], +[ImmCheck<0, ImmCheck0_1>]>; +} diff --git a/clang/test/CodeGen/aarch64-sme2-intrinsics/acle_sme2_mopa_nonwide.c b/clang/test/CodeGen/aarch64-sme2-intrinsics/acle_sme2_mopa_nonwide.c new file mode 100644 index ..40fcad6a5764 --- /dev/null +++ b/clang/test/CodeGen/aarch64-sme2-intrinsics/acle_sme2_mopa_nonwide.c @@ -0,0 +1,97 @@ +// NOTE: Assertions have been autogenerated by utils/update_cc_test_checks.py UTC_ARGS: --version 4 +// RUN: %clang_cc1 -fclang-abi-compat=latest -triple aarch64-none-linux-gnu -target-feature +sme2p1 -target-feature +b16b16 -target-feature +sme-f16f16 -S -O2 -Werror -emit-llvm -o - %s | FileCheck %s -check-prefixes=CHECK +// RUN: %clang_cc1-x c++ -fclang-abi-compat=latest -triple aarch64-none-linux-gnu -target-feature +sme2p1 -target-feature +b16b16 -target-feature +sme-f16f16 -S -O2 -Werror -emit-llvm -o - %s | FileCheck %s -check-prefixes=CHECK-CXX +// RUN: %clang_cc1 -DSME_OVERLOADED_FORMS-fclang-abi-compat=latest -triple aarch64-none-linux-gnu -target-feature +sme2p1 -target-feature +b16b16 -target-feature +sme-f16f16 -S -O2 -Werror -emit-llvm -o - %s | FileCheck %s -check-prefixes=CHECK +// RUN: %clang_cc1 -DSME_OVERLOADED_FORMS -x c++ -fclang-abi-compat=latest -triple aarch64-none-linux-gnu -target-feature +sme2p1 -target-feature +b16b16 -target-feature +sme-f16f16 -S -O2 -Werror -emit-llvm -o - %s | FileCheck %s -check-prefixes=CHECK-CXX + +// RUN: %clang_cc1 -DSME_OVERLOADED_FORMS -x c++ -fclang-abi-compat=latest -triple aarch64-none-linux-gnu -target-feature +sme2p1 -target-feature
[clang] [llvm] [AArch64] Add intrinsics for multi-vector to ZA array vector accumulators (PR #91606)
https://github.com/momchil-velikov created https://github.com/llvm/llvm-project/pull/91606 [Recommit of e88ba6d975d887ca001cae30bfa0c53d91165148] According to the specification in https://github.com/ARM-software/acle/pull/309 this adds the intrinsics void_svadd_za16_vg1x2_f16(uint32_t slice, svfloat16x2_t zn) __arm_streaming __arm_inout("za"); void_svadd_za16_vg1x4_f16(uint32_t slice, svfloat16x4_t zn) __arm_streaming __arm_inout("za"); void_svsub_za16_vg1x2_f16(uint32_t slice, svfloat16x2_t zn) __arm_streaming __arm_inout("za"); void_svsub_za16_vg1x4_f16(uint32_t slice, svfloat16x4_t zn) __arm_streaming __arm_inout("za"); as well as the corresponding `bf16` variants. >From fc45f19cddc7b2dee55f53a2c464d5819f06a0ad Mon Sep 17 00:00:00 2001 From: Momchil Velikov Date: Thu, 9 May 2024 15:56:31 +0100 Subject: [PATCH] [AArch64] Add intrinsics for multi-vector to ZA array vector accumulators (#88266) According to the specification in https://github.com/ARM-software/acle/pull/309 this adds the intrinsics void_svadd_za16_vg1x2_f16(uint32_t slice, svfloat16x2_t zn) __arm_streaming __arm_inout("za"); void_svadd_za16_vg1x4_f16(uint32_t slice, svfloat16x4_t zn) __arm_streaming __arm_inout("za"); void_svsub_za16_vg1x2_f16(uint32_t slice, svfloat16x2_t zn) __arm_streaming __arm_inout("za"); void_svsub_za16_vg1x4_f16(uint32_t slice, svfloat16x4_t zn) __arm_streaming __arm_inout("za"); as well as the corresponding `bf16` variants. --- clang/include/clang/Basic/arm_sme.td | 10 + .../acle_sme2_add_sub_za16.c | 193 ++ .../acle_sme2_add_sub_za16.c | 29 +++ llvm/include/llvm/IR/IntrinsicsAArch64.td | 2 +- .../lib/Target/AArch64/AArch64SMEInstrInfo.td | 16 +- .../AArch64/sme2-intrinsics-add-sub-za16.ll | 148 ++ 6 files changed, 389 insertions(+), 9 deletions(-) create mode 100644 clang/test/CodeGen/aarch64-sme2-intrinsics/acle_sme2_add_sub_za16.c create mode 100644 clang/test/Sema/aarch64-sme2-intrinsics/acle_sme2_add_sub_za16.c create mode 100644 llvm/test/CodeGen/AArch64/sme2-intrinsics-add-sub-za16.ll diff --git a/clang/include/clang/Basic/arm_sme.td b/clang/include/clang/Basic/arm_sme.td index 1ac6d5170ea28..000bd97a4b25d 100644 --- a/clang/include/clang/Basic/arm_sme.td +++ b/clang/include/clang/Basic/arm_sme.td @@ -298,6 +298,16 @@ multiclass ZAAddSub { def NAME # _ZA64_VG1X2_F64 : Inst<"sv" # n_suffix # "_za64[_{d}]_vg1x2", "vm2", "d", MergeNone, "aarch64_sme_" # n_suffix # "_za64_vg1x2", [IsStreaming, IsInOutZA], []>; def NAME # _ZA64_VG1X4_F64 : Inst<"sv" # n_suffix # "_za64[_{d}]_vg1x4", "vm4", "d", MergeNone, "aarch64_sme_" # n_suffix # "_za64_vg1x4", [IsStreaming, IsInOutZA], []>; } + + let TargetGuard = "sme-f16f16|sme-f8f16" in { +def NAME # _ZA16_VG1X2_F16 : Inst<"sv" # n_suffix # "_za16[_{d}]_vg1x2", "vm2", "h", MergeNone, "aarch64_sme_" # n_suffix # "_za16_vg1x2", [IsStreaming, IsInOutZA], []>; +def NAME # _ZA16_VG1X4_F16 : Inst<"sv" # n_suffix # "_za16[_{d}]_vg1x4", "vm4", "h", MergeNone, "aarch64_sme_" # n_suffix # "_za16_vg1x4", [IsStreaming, IsInOutZA], []>; + } + + let TargetGuard = "sme2,b16b16" in { +def NAME # _ZA16_VG1X2_BF16 : Inst<"sv" # n_suffix # "_za16[_{d}]_vg1x2", "vm2", "b", MergeNone, "aarch64_sme_" # n_suffix # "_za16_vg1x2", [IsStreaming, IsInOutZA], []>; +def NAME # _ZA16_VG1X4_BF16 : Inst<"sv" # n_suffix # "_za16[_{d}]_vg1x4", "vm4", "b", MergeNone, "aarch64_sme_" # n_suffix # "_za16_vg1x4", [IsStreaming, IsInOutZA], []>; + } } defm SVADD : ZAAddSub<"add">; diff --git a/clang/test/CodeGen/aarch64-sme2-intrinsics/acle_sme2_add_sub_za16.c b/clang/test/CodeGen/aarch64-sme2-intrinsics/acle_sme2_add_sub_za16.c new file mode 100644 index 0..9a8aa448d3780 --- /dev/null +++ b/clang/test/CodeGen/aarch64-sme2-intrinsics/acle_sme2_add_sub_za16.c @@ -0,0 +1,193 @@ +// NOTE: Assertions have been autogenerated by utils/update_cc_test_checks.py UTC_ARGS: --version 4 +// RUN: %clang_cc1 -fclang-abi-compat=latest -triple aarch64-none-linux-gnu -target-feature +sme2 -target-feature +sme-f16f16 -target-feature +b16b16 -O2 -Werror -Wall -emit-llvm -o - %s | FileCheck %s +// RUN: %clang_cc1-x c++ -fclang-abi-compat=latest -triple aarch64-none-linux-gnu -target-feature +sme2 -target-feature +sme-f8f16 -target-feature +b16b16 -O2 -Werror -Wall -emit-llvm -o - %s | FileCheck %s -check-prefix CHECK-CXX +// RUN: %clang_cc1 -DSVE_OVERLOADED_FORMS-fclang-abi-compat=latest -triple aarch64-none-linux-gnu -target-feature +sme2 -target-feature +sme-f8f16 -target-feature +b16b16 -O2 -Werror -Wall -emit-llvm -o - %s | FileCheck %s +// RUN: %clang_cc1 -DSVE_OVERLOADED_FORMS -x c++ -fclang-abi-compat=latest -triple aarch64-none-linux-gnu -target-feature +sme2 -target-feature +sme-f16f16 -target-feature +b16b16 -O2 -Werror -Wall -emit-llvm -o - %s | FileCheck %s -check-prefix
[clang] [llvm] [AArch64] Add intrinsics for multi-vector to ZA array vector accumulators (PR #88266)
momchil-velikov wrote: > Thanks for the quick revert! > > Is the failure due to a conflict with another commit that landed? Perhaps, e.g. https://github.com/llvm/llvm-project/pull/91140 https://github.com/llvm/llvm-project/pull/88266 ___ cfe-commits mailing list cfe-commits@lists.llvm.org https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits
[clang] [llvm] Revert "[AArch64] Add intrinsics for multi-vector to ZA array vector accumulators" (PR #91597)
https://github.com/momchil-velikov closed https://github.com/llvm/llvm-project/pull/91597 ___ cfe-commits mailing list cfe-commits@lists.llvm.org https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits
[clang] [llvm] Revert "[AArch64] Add intrinsics for multi-vector to ZA array vector accumulators" (PR #91597)
https://github.com/momchil-velikov created https://github.com/llvm/llvm-project/pull/91597 Reverts llvm/llvm-project#88266 due to test failures error: 'expected-error' diagnostics seen but not expected: (frontend): '-fsyntax-only' action ignored; '-emit-llvm' action specified previously >From 0f71196108d1c3c1bb44305a3a8392f406ae71e9 Mon Sep 17 00:00:00 2001 From: Momchil Velikov Date: Thu, 9 May 2024 15:01:53 +0100 Subject: [PATCH] =?UTF-8?q?Revert=20"[AArch64]=20Add=20intrinsics=20for=20?= =?UTF-8?q?multi-vector=20to=20ZA=20array=20vector=20accumula=E2=80=A6"?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit This reverts commit e88ba6d975d887ca001cae30bfa0c53d91165148. --- clang/include/clang/Basic/arm_sme.td | 10 - .../acle_sme2_add_sub_za16.c | 193 -- .../acle_sme2_add_sub_za16.c | 29 --- llvm/include/llvm/IR/IntrinsicsAArch64.td | 2 +- .../lib/Target/AArch64/AArch64SMEInstrInfo.td | 16 +- .../AArch64/sme2-intrinsics-add-sub-za16.ll | 148 -- 6 files changed, 9 insertions(+), 389 deletions(-) delete mode 100644 clang/test/CodeGen/aarch64-sme2-intrinsics/acle_sme2_add_sub_za16.c delete mode 100644 clang/test/Sema/aarch64-sme2-intrinsics/acle_sme2_add_sub_za16.c delete mode 100644 llvm/test/CodeGen/AArch64/sme2-intrinsics-add-sub-za16.ll diff --git a/clang/include/clang/Basic/arm_sme.td b/clang/include/clang/Basic/arm_sme.td index 000bd97a4b25d..1ac6d5170ea28 100644 --- a/clang/include/clang/Basic/arm_sme.td +++ b/clang/include/clang/Basic/arm_sme.td @@ -298,16 +298,6 @@ multiclass ZAAddSub { def NAME # _ZA64_VG1X2_F64 : Inst<"sv" # n_suffix # "_za64[_{d}]_vg1x2", "vm2", "d", MergeNone, "aarch64_sme_" # n_suffix # "_za64_vg1x2", [IsStreaming, IsInOutZA], []>; def NAME # _ZA64_VG1X4_F64 : Inst<"sv" # n_suffix # "_za64[_{d}]_vg1x4", "vm4", "d", MergeNone, "aarch64_sme_" # n_suffix # "_za64_vg1x4", [IsStreaming, IsInOutZA], []>; } - - let TargetGuard = "sme-f16f16|sme-f8f16" in { -def NAME # _ZA16_VG1X2_F16 : Inst<"sv" # n_suffix # "_za16[_{d}]_vg1x2", "vm2", "h", MergeNone, "aarch64_sme_" # n_suffix # "_za16_vg1x2", [IsStreaming, IsInOutZA], []>; -def NAME # _ZA16_VG1X4_F16 : Inst<"sv" # n_suffix # "_za16[_{d}]_vg1x4", "vm4", "h", MergeNone, "aarch64_sme_" # n_suffix # "_za16_vg1x4", [IsStreaming, IsInOutZA], []>; - } - - let TargetGuard = "sme2,b16b16" in { -def NAME # _ZA16_VG1X2_BF16 : Inst<"sv" # n_suffix # "_za16[_{d}]_vg1x2", "vm2", "b", MergeNone, "aarch64_sme_" # n_suffix # "_za16_vg1x2", [IsStreaming, IsInOutZA], []>; -def NAME # _ZA16_VG1X4_BF16 : Inst<"sv" # n_suffix # "_za16[_{d}]_vg1x4", "vm4", "b", MergeNone, "aarch64_sme_" # n_suffix # "_za16_vg1x4", [IsStreaming, IsInOutZA], []>; - } } defm SVADD : ZAAddSub<"add">; diff --git a/clang/test/CodeGen/aarch64-sme2-intrinsics/acle_sme2_add_sub_za16.c b/clang/test/CodeGen/aarch64-sme2-intrinsics/acle_sme2_add_sub_za16.c deleted file mode 100644 index d98427fac610b..0 --- a/clang/test/CodeGen/aarch64-sme2-intrinsics/acle_sme2_add_sub_za16.c +++ /dev/null @@ -1,193 +0,0 @@ -// NOTE: Assertions have been autogenerated by utils/update_cc_test_checks.py UTC_ARGS: --version 4 -// RUN: %clang_cc1 -fclang-abi-compat=latest -triple aarch64-none-linux-gnu -target-feature +sme2 -target-feature +sme-f16f16 -target-feature +b16b16 -O2 -S -Werror -Wall -emit-llvm -o - %s | FileCheck %s -// RUN: %clang_cc1-x c++ -fclang-abi-compat=latest -triple aarch64-none-linux-gnu -target-feature +sme2 -target-feature +sme-f8f16 -target-feature +b16b16 -O2 -S -Werror -Wall -emit-llvm -o - %s | FileCheck %s -check-prefix CHECK-CXX -// RUN: %clang_cc1 -DSVE_OVERLOADED_FORMS-fclang-abi-compat=latest -triple aarch64-none-linux-gnu -target-feature +sme2 -target-feature +sme-f8f16 -target-feature +b16b16 -O2 -S -Werror -Wall -emit-llvm -o - %s | FileCheck %s -// RUN: %clang_cc1 -DSVE_OVERLOADED_FORMS -x c++ -fclang-abi-compat=latest -triple aarch64-none-linux-gnu -target-feature +sme2 -target-feature +sme-f16f16 -target-feature +b16b16 -O2 -S -Werror -Wall -emit-llvm -o - %s | FileCheck %s -check-prefix CHECK-CXX - -// RUN: %clang_cc1 -fclang-abi-compat=latest -triple aarch64-none-linux-gnu -target-feature +sme2 -target-feature +sme-f16f16 -target-feature +b16b16 -O2 -S -Werror -Wall -o /dev/null - -// REQUIRES: aarch64-registered-target - -#include - -#ifdef SVE_OVERLOADED_FORMS -#define SVE_ACLE_FUNC(A1,A2_UNUSED,A3) A1##A3 -#else -#define SVE_ACLE_FUNC(A1,A2,A3) A1##A2##A3 -#endif - -// CHECK-LABEL: define dso_local void @test_svadd_za16_vg1x2_f16( -// CHECK-SAME: i32 noundef [[SLICE:%.*]], [[ZN:%.*]]) local_unnamed_addr #[[ATTR0:[0-9]+]] { -// CHECK-NEXT: entry: -// CHECK-NEXT:[[TMP0:%.*]] = tail call @llvm.vector.extract.nxv8f16.nxv16f16( [[ZN]], i64
[clang] [llvm] [AArch64] Add intrinsics for multi-vector to ZA array vector accumulators (PR #88266)
https://github.com/momchil-velikov closed https://github.com/llvm/llvm-project/pull/88266 ___ cfe-commits mailing list cfe-commits@lists.llvm.org https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits
[clang] [llvm] [CLANG][LLVM][AArch64]SME2.1 intrinsics for MOVAZ tile to 2/4 vectors (PR #88710)
https://github.com/momchil-velikov edited https://github.com/llvm/llvm-project/pull/88710 ___ cfe-commits mailing list cfe-commits@lists.llvm.org https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits
[clang] [llvm] [CLANG][LLVM][AArch64]SME2.1 intrinsics for MOVAZ tile to 2/4 vectors (PR #88710)
https://github.com/momchil-velikov edited https://github.com/llvm/llvm-project/pull/88710 ___ cfe-commits mailing list cfe-commits@lists.llvm.org https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits
[clang] [llvm] [CLANG][LLVM][AArch64]SME2.1 intrinsics for MOVAZ tile to 2/4 vectors (PR #88710)
@@ -2883,19 +2883,28 @@ MachineBasicBlock *AArch64TargetLowering::EmitZTInstr(MachineInstr , MachineBasicBlock * AArch64TargetLowering::EmitZAInstr(unsigned Opc, unsigned BaseReg, - MachineInstr , - MachineBasicBlock *BB, bool HasTile) const { + MachineInstr , MachineBasicBlock *BB, + bool HasTile, bool HasZPROut) const { const TargetInstrInfo *TII = Subtarget->getInstrInfo(); MachineInstrBuilder MIB = BuildMI(*BB, MI, MI.getDebugLoc(), TII->get(Opc)); unsigned StartIdx = 0; - if (HasTile) { -MIB.addReg(BaseReg + MI.getOperand(0).getImm(), RegState::Define); -MIB.addReg(BaseReg + MI.getOperand(0).getImm()); -StartIdx = 1; - } else -MIB.addReg(BaseReg, RegState::Define).addReg(BaseReg); - + if (HasZPROut) { momchil-velikov wrote: I think it can be made a bit more clear and less verbose if we separate the conditions and use `StartIdx` to track how many of the input operands we have consumes, something like: ``` unsigned StartIdx = 0; if (HasGPROut) { MIB.add(MI.getOperand(0)); // Output ZPR ++StartIdx; } if (HasTile) { MIB.addReg(BaseReg + MI.getOperand(StartIdx).getImm(), RegState::Define); // Output ZA Tile MIB.addReg(BaseReg + MI.getOperand(StartIdx).getImm()); // Input Za Tile ++StartIdx; } else { MIB.addReg(BaseReg, RegState::Define).addReg(BaseReg); } ``` https://github.com/llvm/llvm-project/pull/88710 ___ cfe-commits mailing list cfe-commits@lists.llvm.org https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits
[clang] [llvm] [CLANG][LLVM][AArch64]SME2.1 intrinsics for MOVAZ tile to 2/4 vectors (PR #88710)
@@ -2883,19 +2883,28 @@ MachineBasicBlock *AArch64TargetLowering::EmitZTInstr(MachineInstr , MachineBasicBlock * AArch64TargetLowering::EmitZAInstr(unsigned Opc, unsigned BaseReg, - MachineInstr , - MachineBasicBlock *BB, bool HasTile) const { + MachineInstr , MachineBasicBlock *BB, + bool HasTile, bool HasZPROut) const { momchil-velikov wrote: I'm wondering would it be possible to remove *both* `bool` parameters and instead infer their value in the function itself. Maybe like this: ``` bool HasTile = BaseReg != AArch64::ZA; bool HasZPROut = HasTile && MI.getOperand(0).isReg(); ``` https://github.com/llvm/llvm-project/pull/88710 ___ cfe-commits mailing list cfe-commits@lists.llvm.org https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits
[clang] [llvm] [CLANG][LLVM][AArch64]SME2.1 intrinsics for MOVAZ tile to 2/4 vectors (PR #88710)
@@ -2930,17 +2939,59 @@ MachineBasicBlock *AArch64TargetLowering::EmitInstrWithCustomInserter( TII->get(MI.getOpcode()).TSFlags & AArch64::SMEMatrixTypeMask; switch (SMEMatrixType) { case (AArch64::SMEMatrixArray): - return EmitZAInstr(SMEOrigInstr, AArch64::ZA, MI, BB, /*HasTile*/ false); + return EmitZAInstr(SMEOrigInstr, AArch64::ZA, MI, BB, /*HasTile*/ false, momchil-velikov wrote: We the changes from the comment above we can remove all the `bool` arguments and, most importantly, all those opcodes. https://github.com/llvm/llvm-project/pull/88710 ___ cfe-commits mailing list cfe-commits@lists.llvm.org https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits
[clang] [llvm] [AArch64] Add intrinsics for bflaot16 min/max/minnm/maxnm (PR #90105)
momchil-velikov wrote: Typo in commit message: `bflaot16` > Variations other than bfloat16 had been already supported. -> Variations other than bfloat16 are already supported. https://github.com/llvm/llvm-project/pull/90105 ___ cfe-commits mailing list cfe-commits@lists.llvm.org https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits
[clang] [llvm] [AArch64] Add intrinsics for non-widening FMOPA/FMOPS (PR #88105)
https://github.com/momchil-velikov updated https://github.com/llvm/llvm-project/pull/88105 >From 74ee4857a76bc7eb5353dc22311e766ec5356514 Mon Sep 17 00:00:00 2001 From: Momchil Velikov Date: Tue, 9 Apr 2024 10:52:41 +0100 Subject: [PATCH 1/3] [AArch64] Add intrinsics for non-widening FMOPA/FMOPS According to the specification in https://github.com/ARM-software/acle/pull/309 this adds the intrinsics void svmopa_za16[_f16]_m(uint64_t tile, svbool_t pn, svbool_t pm, svfloat16_t zn, svfloat16_t zm) __arm_streaming __arm_inout("za"); void svmops_za16[_f16]_m(uint64_t tile, svbool_t pn, svbool_t pm, svfloat16_t zn, svfloat16_t zm) __arm_streaming __arm_inout("za"); as well as the corresponding `bf16` variants. --- clang/include/clang/Basic/arm_sme.td | 24 + .../acle_sme2_mopa_nonwide.c | 97 +++ .../acle_sme2_mopa_nonwide.c | 34 +++ llvm/include/llvm/IR/IntrinsicsAArch64.td | 3 + .../lib/Target/AArch64/AArch64SMEInstrInfo.td | 10 +- llvm/lib/Target/AArch64/SMEInstrFormats.td| 16 ++- .../CodeGen/AArch64/sme2-intrinsics-mopa.ll | 42 7 files changed, 219 insertions(+), 7 deletions(-) create mode 100644 clang/test/CodeGen/aarch64-sme2-intrinsics/acle_sme2_mopa_nonwide.c create mode 100644 clang/test/Sema/aarch64-sme2-intrinsics/acle_sme2_mopa_nonwide.c create mode 100644 llvm/test/CodeGen/AArch64/sme2-intrinsics-mopa.ll diff --git a/clang/include/clang/Basic/arm_sme.td b/clang/include/clang/Basic/arm_sme.td index 1ac6d5170ea283..a18a5094a15ed3 100644 --- a/clang/include/clang/Basic/arm_sme.td +++ b/clang/include/clang/Basic/arm_sme.td @@ -674,3 +674,27 @@ let TargetGuard = "sme2" in { def SVLUTI2_LANE_ZT_X2 : Inst<"svluti2_lane_zt_{d}_x2", "2.di[i", "cUcsUsiUibhf", MergeNone, "aarch64_sme_luti2_lane_zt_x2", [IsStreaming, IsInZT0], [ImmCheck<0, ImmCheck0_0>, ImmCheck<2, ImmCheck0_7>]>; def SVLUTI4_LANE_ZT_X2 : Inst<"svluti4_lane_zt_{d}_x2", "2.di[i", "cUcsUsiUibhf", MergeNone, "aarch64_sme_luti4_lane_zt_x2", [IsStreaming, IsInZT0], [ImmCheck<0, ImmCheck0_0>, ImmCheck<2, ImmCheck0_3>]>; } + + +// SME2p1 - FMOPA, FMOPS (non-widening) +let TargetGuard = "sme2,b16b16" in { + def SVMOPA_BF16_NW : SInst<"svmopa_za16[_bf16]_m", "viPPdd", "b", + MergeNone, "aarch64_sme_mopa", + [IsStreaming, IsInOutZA], + [ImmCheck<0, ImmCheck0_1>]>; + def SVMOPS_BF16_NW : SInst<"svmops_za16[_bf16]_m", "viPPdd", "b", + MergeNone, "aarch64_sme_mops", + [IsStreaming, IsInOutZA], + [ImmCheck<0, ImmCheck0_1>]>; +} + +let TargetGuard = "sme2p1,sme-f16f16" in { + def SVMOPA_F16_NW : SInst<"svmopa_za16[_f16]_m", "viPPdd", "h", +MergeNone, "aarch64_sme_mopa", +[IsStreaming, IsInOutZA], +[ImmCheck<0, ImmCheck0_1>]>; + def SVMOPS_F16_NW : SInst<"svmops_za16[_f16]_m", "viPPdd", "h", +MergeNone, "aarch64_sme_mops", +[IsStreaming, IsInOutZA], +[ImmCheck<0, ImmCheck0_1>]>; +} diff --git a/clang/test/CodeGen/aarch64-sme2-intrinsics/acle_sme2_mopa_nonwide.c b/clang/test/CodeGen/aarch64-sme2-intrinsics/acle_sme2_mopa_nonwide.c new file mode 100644 index 00..40fcad6a576483 --- /dev/null +++ b/clang/test/CodeGen/aarch64-sme2-intrinsics/acle_sme2_mopa_nonwide.c @@ -0,0 +1,97 @@ +// NOTE: Assertions have been autogenerated by utils/update_cc_test_checks.py UTC_ARGS: --version 4 +// RUN: %clang_cc1 -fclang-abi-compat=latest -triple aarch64-none-linux-gnu -target-feature +sme2p1 -target-feature +b16b16 -target-feature +sme-f16f16 -S -O2 -Werror -emit-llvm -o - %s | FileCheck %s -check-prefixes=CHECK +// RUN: %clang_cc1-x c++ -fclang-abi-compat=latest -triple aarch64-none-linux-gnu -target-feature +sme2p1 -target-feature +b16b16 -target-feature +sme-f16f16 -S -O2 -Werror -emit-llvm -o - %s | FileCheck %s -check-prefixes=CHECK-CXX +// RUN: %clang_cc1 -DSME_OVERLOADED_FORMS-fclang-abi-compat=latest -triple aarch64-none-linux-gnu -target-feature +sme2p1 -target-feature +b16b16 -target-feature +sme-f16f16 -S -O2 -Werror -emit-llvm -o - %s | FileCheck %s -check-prefixes=CHECK +// RUN: %clang_cc1 -DSME_OVERLOADED_FORMS -x c++ -fclang-abi-compat=latest -triple aarch64-none-linux-gnu -target-feature +sme2p1 -target-feature +b16b16 -target-feature +sme-f16f16 -S -O2 -Werror -emit-llvm -o - %s | FileCheck %s -check-prefixes=CHECK-CXX + +// RUN: %clang_cc1 -DSME_OVERLOADED_FORMS -x c++ -fclang-abi-compat=latest -triple aarch64-none-linux-gnu -target-feature +sme2p1
[clang] [llvm] [AArch64] Add intrinsics for multi-vector to ZA array vector accumulators (PR #88266)
https://github.com/momchil-velikov updated https://github.com/llvm/llvm-project/pull/88266 >From cafe0a8b70ad0189b638ec377e7d8cba9e786ecb Mon Sep 17 00:00:00 2001 From: Momchil Velikov Date: Wed, 10 Apr 2024 11:25:50 +0100 Subject: [PATCH] [AArch64] Add intrinsics for multi-vector to ZA array vector accumulators --- clang/include/clang/Basic/arm_sme.td | 10 + .../acle_sme2_add_sub_za16.c | 193 ++ .../acle_sme2_add_sub_za16.c | 29 +++ llvm/include/llvm/IR/IntrinsicsAArch64.td | 2 +- .../lib/Target/AArch64/AArch64SMEInstrInfo.td | 16 +- .../AArch64/sme2-intrinsics-add-sub-za16.ll | 148 ++ 6 files changed, 389 insertions(+), 9 deletions(-) create mode 100644 clang/test/CodeGen/aarch64-sme2-intrinsics/acle_sme2_add_sub_za16.c create mode 100644 clang/test/Sema/aarch64-sme2-intrinsics/acle_sme2_add_sub_za16.c create mode 100644 llvm/test/CodeGen/AArch64/sme2-intrinsics-add-sub-za16.ll diff --git a/clang/include/clang/Basic/arm_sme.td b/clang/include/clang/Basic/arm_sme.td index 1ac6d5170ea283..000bd97a4b25d5 100644 --- a/clang/include/clang/Basic/arm_sme.td +++ b/clang/include/clang/Basic/arm_sme.td @@ -298,6 +298,16 @@ multiclass ZAAddSub { def NAME # _ZA64_VG1X2_F64 : Inst<"sv" # n_suffix # "_za64[_{d}]_vg1x2", "vm2", "d", MergeNone, "aarch64_sme_" # n_suffix # "_za64_vg1x2", [IsStreaming, IsInOutZA], []>; def NAME # _ZA64_VG1X4_F64 : Inst<"sv" # n_suffix # "_za64[_{d}]_vg1x4", "vm4", "d", MergeNone, "aarch64_sme_" # n_suffix # "_za64_vg1x4", [IsStreaming, IsInOutZA], []>; } + + let TargetGuard = "sme-f16f16|sme-f8f16" in { +def NAME # _ZA16_VG1X2_F16 : Inst<"sv" # n_suffix # "_za16[_{d}]_vg1x2", "vm2", "h", MergeNone, "aarch64_sme_" # n_suffix # "_za16_vg1x2", [IsStreaming, IsInOutZA], []>; +def NAME # _ZA16_VG1X4_F16 : Inst<"sv" # n_suffix # "_za16[_{d}]_vg1x4", "vm4", "h", MergeNone, "aarch64_sme_" # n_suffix # "_za16_vg1x4", [IsStreaming, IsInOutZA], []>; + } + + let TargetGuard = "sme2,b16b16" in { +def NAME # _ZA16_VG1X2_BF16 : Inst<"sv" # n_suffix # "_za16[_{d}]_vg1x2", "vm2", "b", MergeNone, "aarch64_sme_" # n_suffix # "_za16_vg1x2", [IsStreaming, IsInOutZA], []>; +def NAME # _ZA16_VG1X4_BF16 : Inst<"sv" # n_suffix # "_za16[_{d}]_vg1x4", "vm4", "b", MergeNone, "aarch64_sme_" # n_suffix # "_za16_vg1x4", [IsStreaming, IsInOutZA], []>; + } } defm SVADD : ZAAddSub<"add">; diff --git a/clang/test/CodeGen/aarch64-sme2-intrinsics/acle_sme2_add_sub_za16.c b/clang/test/CodeGen/aarch64-sme2-intrinsics/acle_sme2_add_sub_za16.c new file mode 100644 index 00..d98427fac610b8 --- /dev/null +++ b/clang/test/CodeGen/aarch64-sme2-intrinsics/acle_sme2_add_sub_za16.c @@ -0,0 +1,193 @@ +// NOTE: Assertions have been autogenerated by utils/update_cc_test_checks.py UTC_ARGS: --version 4 +// RUN: %clang_cc1 -fclang-abi-compat=latest -triple aarch64-none-linux-gnu -target-feature +sme2 -target-feature +sme-f16f16 -target-feature +b16b16 -O2 -S -Werror -Wall -emit-llvm -o - %s | FileCheck %s +// RUN: %clang_cc1-x c++ -fclang-abi-compat=latest -triple aarch64-none-linux-gnu -target-feature +sme2 -target-feature +sme-f8f16 -target-feature +b16b16 -O2 -S -Werror -Wall -emit-llvm -o - %s | FileCheck %s -check-prefix CHECK-CXX +// RUN: %clang_cc1 -DSVE_OVERLOADED_FORMS-fclang-abi-compat=latest -triple aarch64-none-linux-gnu -target-feature +sme2 -target-feature +sme-f8f16 -target-feature +b16b16 -O2 -S -Werror -Wall -emit-llvm -o - %s | FileCheck %s +// RUN: %clang_cc1 -DSVE_OVERLOADED_FORMS -x c++ -fclang-abi-compat=latest -triple aarch64-none-linux-gnu -target-feature +sme2 -target-feature +sme-f16f16 -target-feature +b16b16 -O2 -S -Werror -Wall -emit-llvm -o - %s | FileCheck %s -check-prefix CHECK-CXX + +// RUN: %clang_cc1 -fclang-abi-compat=latest -triple aarch64-none-linux-gnu -target-feature +sme2 -target-feature +sme-f16f16 -target-feature +b16b16 -O2 -S -Werror -Wall -o /dev/null + +// REQUIRES: aarch64-registered-target + +#include + +#ifdef SVE_OVERLOADED_FORMS +#define SVE_ACLE_FUNC(A1,A2_UNUSED,A3) A1##A3 +#else +#define SVE_ACLE_FUNC(A1,A2,A3) A1##A2##A3 +#endif + +// CHECK-LABEL: define dso_local void @test_svadd_za16_vg1x2_f16( +// CHECK-SAME: i32 noundef [[SLICE:%.*]], [[ZN:%.*]]) local_unnamed_addr #[[ATTR0:[0-9]+]] { +// CHECK-NEXT: entry: +// CHECK-NEXT:[[TMP0:%.*]] = tail call @llvm.vector.extract.nxv8f16.nxv16f16( [[ZN]], i64 0) +// CHECK-NEXT:[[TMP1:%.*]] = tail call @llvm.vector.extract.nxv8f16.nxv16f16( [[ZN]], i64 8) +// CHECK-NEXT:tail call void @llvm.aarch64.sme.add.za16.vg1x2.nxv8f16(i32 [[SLICE]], [[TMP0]], [[TMP1]]) +// CHECK-NEXT:ret void +// +// CHECK-CXX-LABEL: define dso_local void @_Z25test_svadd_za16_vg1x2_f16j13svfloat16x2_t( +// CHECK-CXX-SAME: i32 noundef [[SLICE:%.*]], [[ZN:%.*]])
[clang] [llvm] [AArch64] Add intrinsics for non-widening FMOPA/FMOPS (PR #88105)
https://github.com/momchil-velikov updated https://github.com/llvm/llvm-project/pull/88105 >From 3ea7ee0aaf7f8be8c2ee42af92ba3b13b8212645 Mon Sep 17 00:00:00 2001 From: Momchil Velikov Date: Tue, 9 Apr 2024 10:52:41 +0100 Subject: [PATCH 1/3] [AArch64] Add intrinsics for non-widening FMOPA/FMOPS According to the specification in https://github.com/ARM-software/acle/pull/309 this adds the intrinsics void svmopa_za16[_f16]_m(uint64_t tile, svbool_t pn, svbool_t pm, svfloat16_t zn, svfloat16_t zm) __arm_streaming __arm_inout("za"); void svmops_za16[_f16]_m(uint64_t tile, svbool_t pn, svbool_t pm, svfloat16_t zn, svfloat16_t zm) __arm_streaming __arm_inout("za"); as well as the corresponding `bf16` variants. --- clang/include/clang/Basic/arm_sme.td | 24 + .../acle_sme2_mopa_nonwide.c | 97 +++ .../acle_sme2_mopa_nonwide.c | 34 +++ llvm/include/llvm/IR/IntrinsicsAArch64.td | 3 + .../lib/Target/AArch64/AArch64SMEInstrInfo.td | 10 +- llvm/lib/Target/AArch64/SMEInstrFormats.td| 16 ++- .../CodeGen/AArch64/sme2-intrinsics-mopa.ll | 42 7 files changed, 219 insertions(+), 7 deletions(-) create mode 100644 clang/test/CodeGen/aarch64-sme2-intrinsics/acle_sme2_mopa_nonwide.c create mode 100644 clang/test/Sema/aarch64-sme2-intrinsics/acle_sme2_mopa_nonwide.c create mode 100644 llvm/test/CodeGen/AArch64/sme2-intrinsics-mopa.ll diff --git a/clang/include/clang/Basic/arm_sme.td b/clang/include/clang/Basic/arm_sme.td index 1ac6d5170ea283..a18a5094a15ed3 100644 --- a/clang/include/clang/Basic/arm_sme.td +++ b/clang/include/clang/Basic/arm_sme.td @@ -674,3 +674,27 @@ let TargetGuard = "sme2" in { def SVLUTI2_LANE_ZT_X2 : Inst<"svluti2_lane_zt_{d}_x2", "2.di[i", "cUcsUsiUibhf", MergeNone, "aarch64_sme_luti2_lane_zt_x2", [IsStreaming, IsInZT0], [ImmCheck<0, ImmCheck0_0>, ImmCheck<2, ImmCheck0_7>]>; def SVLUTI4_LANE_ZT_X2 : Inst<"svluti4_lane_zt_{d}_x2", "2.di[i", "cUcsUsiUibhf", MergeNone, "aarch64_sme_luti4_lane_zt_x2", [IsStreaming, IsInZT0], [ImmCheck<0, ImmCheck0_0>, ImmCheck<2, ImmCheck0_3>]>; } + + +// SME2p1 - FMOPA, FMOPS (non-widening) +let TargetGuard = "sme2,b16b16" in { + def SVMOPA_BF16_NW : SInst<"svmopa_za16[_bf16]_m", "viPPdd", "b", + MergeNone, "aarch64_sme_mopa", + [IsStreaming, IsInOutZA], + [ImmCheck<0, ImmCheck0_1>]>; + def SVMOPS_BF16_NW : SInst<"svmops_za16[_bf16]_m", "viPPdd", "b", + MergeNone, "aarch64_sme_mops", + [IsStreaming, IsInOutZA], + [ImmCheck<0, ImmCheck0_1>]>; +} + +let TargetGuard = "sme2p1,sme-f16f16" in { + def SVMOPA_F16_NW : SInst<"svmopa_za16[_f16]_m", "viPPdd", "h", +MergeNone, "aarch64_sme_mopa", +[IsStreaming, IsInOutZA], +[ImmCheck<0, ImmCheck0_1>]>; + def SVMOPS_F16_NW : SInst<"svmops_za16[_f16]_m", "viPPdd", "h", +MergeNone, "aarch64_sme_mops", +[IsStreaming, IsInOutZA], +[ImmCheck<0, ImmCheck0_1>]>; +} diff --git a/clang/test/CodeGen/aarch64-sme2-intrinsics/acle_sme2_mopa_nonwide.c b/clang/test/CodeGen/aarch64-sme2-intrinsics/acle_sme2_mopa_nonwide.c new file mode 100644 index 00..40fcad6a576483 --- /dev/null +++ b/clang/test/CodeGen/aarch64-sme2-intrinsics/acle_sme2_mopa_nonwide.c @@ -0,0 +1,97 @@ +// NOTE: Assertions have been autogenerated by utils/update_cc_test_checks.py UTC_ARGS: --version 4 +// RUN: %clang_cc1 -fclang-abi-compat=latest -triple aarch64-none-linux-gnu -target-feature +sme2p1 -target-feature +b16b16 -target-feature +sme-f16f16 -S -O2 -Werror -emit-llvm -o - %s | FileCheck %s -check-prefixes=CHECK +// RUN: %clang_cc1-x c++ -fclang-abi-compat=latest -triple aarch64-none-linux-gnu -target-feature +sme2p1 -target-feature +b16b16 -target-feature +sme-f16f16 -S -O2 -Werror -emit-llvm -o - %s | FileCheck %s -check-prefixes=CHECK-CXX +// RUN: %clang_cc1 -DSME_OVERLOADED_FORMS-fclang-abi-compat=latest -triple aarch64-none-linux-gnu -target-feature +sme2p1 -target-feature +b16b16 -target-feature +sme-f16f16 -S -O2 -Werror -emit-llvm -o - %s | FileCheck %s -check-prefixes=CHECK +// RUN: %clang_cc1 -DSME_OVERLOADED_FORMS -x c++ -fclang-abi-compat=latest -triple aarch64-none-linux-gnu -target-feature +sme2p1 -target-feature +b16b16 -target-feature +sme-f16f16 -S -O2 -Werror -emit-llvm -o - %s | FileCheck %s -check-prefixes=CHECK-CXX + +// RUN: %clang_cc1 -DSME_OVERLOADED_FORMS -x c++ -fclang-abi-compat=latest -triple aarch64-none-linux-gnu -target-feature +sme2p1
[clang] [llvm] [AArch64] Add intrinsics for non-widening FMOPA/FMOPS (PR #88105)
https://github.com/momchil-velikov updated https://github.com/llvm/llvm-project/pull/88105 >From 3ea7ee0aaf7f8be8c2ee42af92ba3b13b8212645 Mon Sep 17 00:00:00 2001 From: Momchil Velikov Date: Tue, 9 Apr 2024 10:52:41 +0100 Subject: [PATCH 1/2] [AArch64] Add intrinsics for non-widening FMOPA/FMOPS According to the specification in https://github.com/ARM-software/acle/pull/309 this adds the intrinsics void svmopa_za16[_f16]_m(uint64_t tile, svbool_t pn, svbool_t pm, svfloat16_t zn, svfloat16_t zm) __arm_streaming __arm_inout("za"); void svmops_za16[_f16]_m(uint64_t tile, svbool_t pn, svbool_t pm, svfloat16_t zn, svfloat16_t zm) __arm_streaming __arm_inout("za"); as well as the corresponding `bf16` variants. --- clang/include/clang/Basic/arm_sme.td | 24 + .../acle_sme2_mopa_nonwide.c | 97 +++ .../acle_sme2_mopa_nonwide.c | 34 +++ llvm/include/llvm/IR/IntrinsicsAArch64.td | 3 + .../lib/Target/AArch64/AArch64SMEInstrInfo.td | 10 +- llvm/lib/Target/AArch64/SMEInstrFormats.td| 16 ++- .../CodeGen/AArch64/sme2-intrinsics-mopa.ll | 42 7 files changed, 219 insertions(+), 7 deletions(-) create mode 100644 clang/test/CodeGen/aarch64-sme2-intrinsics/acle_sme2_mopa_nonwide.c create mode 100644 clang/test/Sema/aarch64-sme2-intrinsics/acle_sme2_mopa_nonwide.c create mode 100644 llvm/test/CodeGen/AArch64/sme2-intrinsics-mopa.ll diff --git a/clang/include/clang/Basic/arm_sme.td b/clang/include/clang/Basic/arm_sme.td index 1ac6d5170ea283..a18a5094a15ed3 100644 --- a/clang/include/clang/Basic/arm_sme.td +++ b/clang/include/clang/Basic/arm_sme.td @@ -674,3 +674,27 @@ let TargetGuard = "sme2" in { def SVLUTI2_LANE_ZT_X2 : Inst<"svluti2_lane_zt_{d}_x2", "2.di[i", "cUcsUsiUibhf", MergeNone, "aarch64_sme_luti2_lane_zt_x2", [IsStreaming, IsInZT0], [ImmCheck<0, ImmCheck0_0>, ImmCheck<2, ImmCheck0_7>]>; def SVLUTI4_LANE_ZT_X2 : Inst<"svluti4_lane_zt_{d}_x2", "2.di[i", "cUcsUsiUibhf", MergeNone, "aarch64_sme_luti4_lane_zt_x2", [IsStreaming, IsInZT0], [ImmCheck<0, ImmCheck0_0>, ImmCheck<2, ImmCheck0_3>]>; } + + +// SME2p1 - FMOPA, FMOPS (non-widening) +let TargetGuard = "sme2,b16b16" in { + def SVMOPA_BF16_NW : SInst<"svmopa_za16[_bf16]_m", "viPPdd", "b", + MergeNone, "aarch64_sme_mopa", + [IsStreaming, IsInOutZA], + [ImmCheck<0, ImmCheck0_1>]>; + def SVMOPS_BF16_NW : SInst<"svmops_za16[_bf16]_m", "viPPdd", "b", + MergeNone, "aarch64_sme_mops", + [IsStreaming, IsInOutZA], + [ImmCheck<0, ImmCheck0_1>]>; +} + +let TargetGuard = "sme2p1,sme-f16f16" in { + def SVMOPA_F16_NW : SInst<"svmopa_za16[_f16]_m", "viPPdd", "h", +MergeNone, "aarch64_sme_mopa", +[IsStreaming, IsInOutZA], +[ImmCheck<0, ImmCheck0_1>]>; + def SVMOPS_F16_NW : SInst<"svmops_za16[_f16]_m", "viPPdd", "h", +MergeNone, "aarch64_sme_mops", +[IsStreaming, IsInOutZA], +[ImmCheck<0, ImmCheck0_1>]>; +} diff --git a/clang/test/CodeGen/aarch64-sme2-intrinsics/acle_sme2_mopa_nonwide.c b/clang/test/CodeGen/aarch64-sme2-intrinsics/acle_sme2_mopa_nonwide.c new file mode 100644 index 00..40fcad6a576483 --- /dev/null +++ b/clang/test/CodeGen/aarch64-sme2-intrinsics/acle_sme2_mopa_nonwide.c @@ -0,0 +1,97 @@ +// NOTE: Assertions have been autogenerated by utils/update_cc_test_checks.py UTC_ARGS: --version 4 +// RUN: %clang_cc1 -fclang-abi-compat=latest -triple aarch64-none-linux-gnu -target-feature +sme2p1 -target-feature +b16b16 -target-feature +sme-f16f16 -S -O2 -Werror -emit-llvm -o - %s | FileCheck %s -check-prefixes=CHECK +// RUN: %clang_cc1-x c++ -fclang-abi-compat=latest -triple aarch64-none-linux-gnu -target-feature +sme2p1 -target-feature +b16b16 -target-feature +sme-f16f16 -S -O2 -Werror -emit-llvm -o - %s | FileCheck %s -check-prefixes=CHECK-CXX +// RUN: %clang_cc1 -DSME_OVERLOADED_FORMS-fclang-abi-compat=latest -triple aarch64-none-linux-gnu -target-feature +sme2p1 -target-feature +b16b16 -target-feature +sme-f16f16 -S -O2 -Werror -emit-llvm -o - %s | FileCheck %s -check-prefixes=CHECK +// RUN: %clang_cc1 -DSME_OVERLOADED_FORMS -x c++ -fclang-abi-compat=latest -triple aarch64-none-linux-gnu -target-feature +sme2p1 -target-feature +b16b16 -target-feature +sme-f16f16 -S -O2 -Werror -emit-llvm -o - %s | FileCheck %s -check-prefixes=CHECK-CXX + +// RUN: %clang_cc1 -DSME_OVERLOADED_FORMS -x c++ -fclang-abi-compat=latest -triple aarch64-none-linux-gnu -target-feature +sme2p1
[clang] [llvm] [AArch64] Add intrinsics for multi-vector to ZA array vector accumulators (PR #88266)
https://github.com/momchil-velikov updated https://github.com/llvm/llvm-project/pull/88266 >From 09167c5df2b50476a5073ff2e527503d090e7995 Mon Sep 17 00:00:00 2001 From: Momchil Velikov Date: Wed, 10 Apr 2024 11:25:50 +0100 Subject: [PATCH] [AArch64] Add intrinsics for multi-vector to ZA array vector accumulators --- clang/include/clang/Basic/arm_sme.td | 10 + .../acle_sme2_add_sub_za16.c | 193 ++ .../acle_sme2_add_sub_za16.c | 29 +++ llvm/include/llvm/IR/IntrinsicsAArch64.td | 2 +- .../lib/Target/AArch64/AArch64SMEInstrInfo.td | 16 +- .../AArch64/sme2-intrinsics-add-sub-za16.ll | 148 ++ 6 files changed, 389 insertions(+), 9 deletions(-) create mode 100644 clang/test/CodeGen/aarch64-sme2-intrinsics/acle_sme2_add_sub_za16.c create mode 100644 clang/test/Sema/aarch64-sme2-intrinsics/acle_sme2_add_sub_za16.c create mode 100644 llvm/test/CodeGen/AArch64/sme2-intrinsics-add-sub-za16.ll diff --git a/clang/include/clang/Basic/arm_sme.td b/clang/include/clang/Basic/arm_sme.td index 1ac6d5170ea283..000bd97a4b25d5 100644 --- a/clang/include/clang/Basic/arm_sme.td +++ b/clang/include/clang/Basic/arm_sme.td @@ -298,6 +298,16 @@ multiclass ZAAddSub { def NAME # _ZA64_VG1X2_F64 : Inst<"sv" # n_suffix # "_za64[_{d}]_vg1x2", "vm2", "d", MergeNone, "aarch64_sme_" # n_suffix # "_za64_vg1x2", [IsStreaming, IsInOutZA], []>; def NAME # _ZA64_VG1X4_F64 : Inst<"sv" # n_suffix # "_za64[_{d}]_vg1x4", "vm4", "d", MergeNone, "aarch64_sme_" # n_suffix # "_za64_vg1x4", [IsStreaming, IsInOutZA], []>; } + + let TargetGuard = "sme-f16f16|sme-f8f16" in { +def NAME # _ZA16_VG1X2_F16 : Inst<"sv" # n_suffix # "_za16[_{d}]_vg1x2", "vm2", "h", MergeNone, "aarch64_sme_" # n_suffix # "_za16_vg1x2", [IsStreaming, IsInOutZA], []>; +def NAME # _ZA16_VG1X4_F16 : Inst<"sv" # n_suffix # "_za16[_{d}]_vg1x4", "vm4", "h", MergeNone, "aarch64_sme_" # n_suffix # "_za16_vg1x4", [IsStreaming, IsInOutZA], []>; + } + + let TargetGuard = "sme2,b16b16" in { +def NAME # _ZA16_VG1X2_BF16 : Inst<"sv" # n_suffix # "_za16[_{d}]_vg1x2", "vm2", "b", MergeNone, "aarch64_sme_" # n_suffix # "_za16_vg1x2", [IsStreaming, IsInOutZA], []>; +def NAME # _ZA16_VG1X4_BF16 : Inst<"sv" # n_suffix # "_za16[_{d}]_vg1x4", "vm4", "b", MergeNone, "aarch64_sme_" # n_suffix # "_za16_vg1x4", [IsStreaming, IsInOutZA], []>; + } } defm SVADD : ZAAddSub<"add">; diff --git a/clang/test/CodeGen/aarch64-sme2-intrinsics/acle_sme2_add_sub_za16.c b/clang/test/CodeGen/aarch64-sme2-intrinsics/acle_sme2_add_sub_za16.c new file mode 100644 index 00..d98427fac610b8 --- /dev/null +++ b/clang/test/CodeGen/aarch64-sme2-intrinsics/acle_sme2_add_sub_za16.c @@ -0,0 +1,193 @@ +// NOTE: Assertions have been autogenerated by utils/update_cc_test_checks.py UTC_ARGS: --version 4 +// RUN: %clang_cc1 -fclang-abi-compat=latest -triple aarch64-none-linux-gnu -target-feature +sme2 -target-feature +sme-f16f16 -target-feature +b16b16 -O2 -S -Werror -Wall -emit-llvm -o - %s | FileCheck %s +// RUN: %clang_cc1-x c++ -fclang-abi-compat=latest -triple aarch64-none-linux-gnu -target-feature +sme2 -target-feature +sme-f8f16 -target-feature +b16b16 -O2 -S -Werror -Wall -emit-llvm -o - %s | FileCheck %s -check-prefix CHECK-CXX +// RUN: %clang_cc1 -DSVE_OVERLOADED_FORMS-fclang-abi-compat=latest -triple aarch64-none-linux-gnu -target-feature +sme2 -target-feature +sme-f8f16 -target-feature +b16b16 -O2 -S -Werror -Wall -emit-llvm -o - %s | FileCheck %s +// RUN: %clang_cc1 -DSVE_OVERLOADED_FORMS -x c++ -fclang-abi-compat=latest -triple aarch64-none-linux-gnu -target-feature +sme2 -target-feature +sme-f16f16 -target-feature +b16b16 -O2 -S -Werror -Wall -emit-llvm -o - %s | FileCheck %s -check-prefix CHECK-CXX + +// RUN: %clang_cc1 -fclang-abi-compat=latest -triple aarch64-none-linux-gnu -target-feature +sme2 -target-feature +sme-f16f16 -target-feature +b16b16 -O2 -S -Werror -Wall -o /dev/null + +// REQUIRES: aarch64-registered-target + +#include + +#ifdef SVE_OVERLOADED_FORMS +#define SVE_ACLE_FUNC(A1,A2_UNUSED,A3) A1##A3 +#else +#define SVE_ACLE_FUNC(A1,A2,A3) A1##A2##A3 +#endif + +// CHECK-LABEL: define dso_local void @test_svadd_za16_vg1x2_f16( +// CHECK-SAME: i32 noundef [[SLICE:%.*]], [[ZN:%.*]]) local_unnamed_addr #[[ATTR0:[0-9]+]] { +// CHECK-NEXT: entry: +// CHECK-NEXT:[[TMP0:%.*]] = tail call @llvm.vector.extract.nxv8f16.nxv16f16( [[ZN]], i64 0) +// CHECK-NEXT:[[TMP1:%.*]] = tail call @llvm.vector.extract.nxv8f16.nxv16f16( [[ZN]], i64 8) +// CHECK-NEXT:tail call void @llvm.aarch64.sme.add.za16.vg1x2.nxv8f16(i32 [[SLICE]], [[TMP0]], [[TMP1]]) +// CHECK-NEXT:ret void +// +// CHECK-CXX-LABEL: define dso_local void @_Z25test_svadd_za16_vg1x2_f16j13svfloat16x2_t( +// CHECK-CXX-SAME: i32 noundef [[SLICE:%.*]], [[ZN:%.*]])
[clang] [llvm] [AArch64] Add intrinsics for bflaot16 min/max/minnm/maxnm (PR #90105)
@@ -3373,7 +3373,7 @@ let TargetPrefix = "aarch64" in { // Multi-vector min/max // - foreach ty = ["f", "s", "u"] in { + foreach ty = ["bf", "f", "s", "u"] in { momchil-velikov wrote: You could just omit that part. Then the `bfloat` intrinsics would use `fmin`/`fmax`/etc in the names without ambiguity, since they are polymorphic. https://github.com/llvm/llvm-project/pull/90105 ___ cfe-commits mailing list cfe-commits@lists.llvm.org https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits
[clang] [llvm] [AArch64] Add intrinsics for bflaot16 min/max/minnm/maxnm (PR #90105)
@@ -3387,7 +3387,7 @@ let TargetPrefix = "aarch64" in { // Multi-vector floating point min/max number // - foreach instr = ["fmaxnm", "fminnm"] in { + foreach instr = ["fmaxnm", "bfmaxnm", "fminnm", "bfminnm"] in { momchil-velikov wrote: Likewise here. https://github.com/llvm/llvm-project/pull/90105 ___ cfe-commits mailing list cfe-commits@lists.llvm.org https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits
[clang] [llvm] [CLANG][LLVM][AArch64]Add SME2.1 intrinsics for MOVAZ tile to vector,… (PR #88499)
https://github.com/momchil-velikov edited https://github.com/llvm/llvm-project/pull/88499 ___ cfe-commits mailing list cfe-commits@lists.llvm.org https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits
[clang] [llvm] [CLANG][LLVM][AArch64]Add SME2.1 intrinsics for MOVAZ tile to vector,… (PR #88499)
@@ -104,6 +104,13 @@ class sme2_move_to_tile_pseudo +: SMEPseudo2Instr, momchil-velikov wrote: This is not needed. https://github.com/llvm/llvm-project/pull/88499 ___ cfe-commits mailing list cfe-commits@lists.llvm.org https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits
[clang] [llvm] [CLANG][LLVM][AArch64]Add SME2.1 intrinsics for MOVAZ tile to vector,… (PR #88499)
@@ -2832,6 +2832,23 @@ AArch64TargetLowering::EmitTileLoad(unsigned Opc, unsigned BaseReg, return BB; } +MachineBasicBlock * +AArch64TargetLowering::EmitTileMovaz(unsigned Opc, unsigned BaseReg, momchil-velikov wrote: This function looks almost identical to `EmitZAInstr`. It looks to me you can reuse `EmitZAInstr` (with a couple of small modifications) and then employ the `SMEPseudo2Instr` technique. Then you won't need the switch cases starting at line 3012. https://github.com/llvm/llvm-project/pull/88499 ___ cfe-commits mailing list cfe-commits@lists.llvm.org https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits
[clang] [llvm] [AArch64] Add intrinsics for 16-bit non-widening FMLA/FMLS (PR #88553)
@@ -458,6 +458,40 @@ let TargetGuard = "sme2,sme-f64f64" in { def SVMLS_LANE_VG1x4_F64 : Inst<"svmls_lane_za64[_{d}]_vg1x4", "vm4di", "d", MergeNone, "aarch64_sme_fmls_lane_vg1x4", [IsStreaming, IsInOutZA], [ImmCheck<3, ImmCheck0_1>]>; } +let TargetGuard = "sme2p1,sme-f16f16" in { + def SVMLA_MULTI_VG1x2_F16 : Inst<"svmla_za16[_f16]_vg1x2", "vm22", "h", MergeNone, "aarch64_sme_fmla_vg1x2", [IsStreaming, IsInOutZA], []>; + def SVMLA_MULTI_VG1x4_F16 : Inst<"svmla_za16[_f16]_vg1x4", "vm44", "h", MergeNone, "aarch64_sme_fmla_vg1x4", [IsStreaming, IsInOutZA], []>; + def SVMLS_MULTI_VG1x2_F16 : Inst<"svmls_za16[_f16]_vg1x2", "vm22", "h", MergeNone, "aarch64_sme_fmls_vg1x2", [IsStreaming, IsInOutZA], []>; + def SVMLS_MULTI_VG1x4_F16 : Inst<"svmls_za16[_f16]_vg1x4", "vm44", "h", MergeNone, "aarch64_sme_fmls_vg1x4", [IsStreaming, IsInOutZA], []>; + + def SVMLA_SINGLE_VG1x2_F16 : Inst<"svmla[_single]_za16[_f16]_vg1x2", "vm2d", "h", MergeNone, "aarch64_sme_fmla_single_vg1x2", [IsStreaming, IsInOutZA], []>; + def SVMLA_SINGLE_VG1x4_F16 : Inst<"svmla[_single]_za16[_f16]_vg1x4", "vm4d", "h", MergeNone, "aarch64_sme_fmla_single_vg1x4", [IsStreaming, IsInOutZA], []>; + def SVMLS_SINGLE_VG1x2_F16 : Inst<"svmls[_single]_za16[_f16]_vg1x2", "vm2d", "h", MergeNone, "aarch64_sme_fmls_single_vg1x2", [IsStreaming, IsInOutZA], []>; + def SVMLS_SINGLE_VG1x4_F16 : Inst<"svmls[_single]_za16[_f16]_vg1x4", "vm4d", "h", MergeNone, "aarch64_sme_fmls_single_vg1x4", [IsStreaming, IsInOutZA], []>; + + def SVMLA_LANE_VG1x2_F16 : Inst<"svmla_lane_za16[_f16]_vg1x2", "vm2di", "h", MergeNone, "aarch64_sme_fmla_lane_vg1x2", [IsStreaming, IsInOutZA], [ImmCheck<3, ImmCheck0_7>]>; + def SVMLA_LANE_VG1x4_F16 : Inst<"svmla_lane_za16[_f16]_vg1x4", "vm4di", "h", MergeNone, "aarch64_sme_fmla_lane_vg1x4", [IsStreaming, IsInOutZA], [ImmCheck<3, ImmCheck0_7>]>; + def SVMLS_LANE_VG1x2_F16 : Inst<"svmls_lane_za16[_f16]_vg1x2", "vm2di", "h", MergeNone, "aarch64_sme_fmls_lane_vg1x2", [IsStreaming, IsInOutZA], [ImmCheck<3, ImmCheck0_7>]>; + def SVMLS_LANE_VG1x4_F16 : Inst<"svmls_lane_za16[_f16]_vg1x4", "vm4di", "h", MergeNone, "aarch64_sme_fmls_lane_vg1x4", [IsStreaming, IsInOutZA], [ImmCheck<3, ImmCheck0_7>]>; +} + +let TargetGuard = "sme2,b16b16" in { momchil-velikov wrote: Arm ARM, version K.a (March 2024) (https://developer.arm.com/documentation/ddi0487/ka ), page A2-173 > If FEAT_SVE_B16B16 is implemented, then FEAT_SME2 or FEAT_SVE2 is implemented. https://github.com/llvm/llvm-project/pull/88553 ___ cfe-commits mailing list cfe-commits@lists.llvm.org https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits
[clang] [llvm] [CLANG][LLVM][AArch64]SME2.1 intrinsics for MOVAZ tile to 2/4 vectors (PR #88710)
@@ -1985,6 +1986,34 @@ void AArch64DAGToDAGISel::SelectMultiVectorMove(SDNode *N, unsigned NumVecs, CurDAG->RemoveDeadNode(N); } +template +void AArch64DAGToDAGISel::SelectMultiVectorMoveZ(SDNode *N, unsigned NumVecs, momchil-velikov wrote: The real question is why is this is a function template or, if you want, why `AArch64DAGToDAGISel::SelectMultiVectorMove` is a function template? Both the template parameters do not affect any type, so we aren't benefiting from any kind of parametric polymorphism, the parameters themselves are only passed as ordinary parameters to `SelectSMETileSlice` and as such can't participate in any constant folding that would warrant multiple instantiations (each of which is essentially a specialisation of the function). IMHO, it's OK to have the `Scale` and `NumVecs` separate since `SelectMultiVectorMove` is a hint we may need it some day. https://github.com/llvm/llvm-project/pull/88710 ___ cfe-commits mailing list cfe-commits@lists.llvm.org https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits
[clang] [llvm] [AArch64] Add intrinsics for non-widening FMOPA/FMOPS (PR #88105)
@@ -286,14 +286,26 @@ multiclass sme_outer_product_fp64 def : SME_ZA_Tile_TwoPred_TwoVec_Pat; } -multiclass sme2p1_fmop_tile_fp16 op, ZPRRegOp zpr_ty>{ - def NAME : sme_fp_outer_product_inst { +multiclass sme2p1_fmop_tile_f8f16 op> { + def NAME : sme_fp_outer_product_inst { bits<1> ZAda; let Inst{2-1} = 0b00; let Inst{0} = ZAda; } } +multiclass sme2p1_fmop_tile_fp16 op, ValueType vt, SDPatternOperator intrinsic = null_frag> { + def NAME : sme_fp_outer_product_inst, SMEPseudo2Instr { momchil-velikov wrote: Done. https://github.com/llvm/llvm-project/pull/88105 ___ cfe-commits mailing list cfe-commits@lists.llvm.org https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits
[clang] [llvm] [AArch64] Add intrinsics for non-widening FMOPA/FMOPS (PR #88105)
https://github.com/momchil-velikov updated https://github.com/llvm/llvm-project/pull/88105 >From 2b0befb9078f8c9116ad52be937c8722045708ef Mon Sep 17 00:00:00 2001 From: Momchil Velikov Date: Tue, 9 Apr 2024 10:52:41 +0100 Subject: [PATCH 1/2] [AArch64] Add intrinsics for non-widening FMOPA/FMOPS According to the specification in https://github.com/ARM-software/acle/pull/309 this adds the intrinsics void svmopa_za16[_f16]_m(uint64_t tile, svbool_t pn, svbool_t pm, svfloat16_t zn, svfloat16_t zm) __arm_streaming __arm_inout("za"); void svmops_za16[_f16]_m(uint64_t tile, svbool_t pn, svbool_t pm, svfloat16_t zn, svfloat16_t zm) __arm_streaming __arm_inout("za"); as well as the corresponding `bf16` variants. --- clang/include/clang/Basic/arm_sme.td | 24 + .../acle_sme2_mopa_nonwide.c | 97 +++ .../acle_sme2_mopa_nonwide.c | 34 +++ llvm/include/llvm/IR/IntrinsicsAArch64.td | 5 +- .../lib/Target/AArch64/AArch64SMEInstrInfo.td | 10 +- llvm/lib/Target/AArch64/SMEInstrFormats.td| 16 ++- .../CodeGen/AArch64/sme2-intrinsics-mopa.ll | 42 7 files changed, 220 insertions(+), 8 deletions(-) create mode 100644 clang/test/CodeGen/aarch64-sme2-intrinsics/acle_sme2_mopa_nonwide.c create mode 100644 clang/test/Sema/aarch64-sme2-intrinsics/acle_sme2_mopa_nonwide.c create mode 100644 llvm/test/CodeGen/AArch64/sme2-intrinsics-mopa.ll diff --git a/clang/include/clang/Basic/arm_sme.td b/clang/include/clang/Basic/arm_sme.td index 1ac6d5170ea283..a18a5094a15ed3 100644 --- a/clang/include/clang/Basic/arm_sme.td +++ b/clang/include/clang/Basic/arm_sme.td @@ -674,3 +674,27 @@ let TargetGuard = "sme2" in { def SVLUTI2_LANE_ZT_X2 : Inst<"svluti2_lane_zt_{d}_x2", "2.di[i", "cUcsUsiUibhf", MergeNone, "aarch64_sme_luti2_lane_zt_x2", [IsStreaming, IsInZT0], [ImmCheck<0, ImmCheck0_0>, ImmCheck<2, ImmCheck0_7>]>; def SVLUTI4_LANE_ZT_X2 : Inst<"svluti4_lane_zt_{d}_x2", "2.di[i", "cUcsUsiUibhf", MergeNone, "aarch64_sme_luti4_lane_zt_x2", [IsStreaming, IsInZT0], [ImmCheck<0, ImmCheck0_0>, ImmCheck<2, ImmCheck0_3>]>; } + + +// SME2p1 - FMOPA, FMOPS (non-widening) +let TargetGuard = "sme2,b16b16" in { + def SVMOPA_BF16_NW : SInst<"svmopa_za16[_bf16]_m", "viPPdd", "b", + MergeNone, "aarch64_sme_mopa", + [IsStreaming, IsInOutZA], + [ImmCheck<0, ImmCheck0_1>]>; + def SVMOPS_BF16_NW : SInst<"svmops_za16[_bf16]_m", "viPPdd", "b", + MergeNone, "aarch64_sme_mops", + [IsStreaming, IsInOutZA], + [ImmCheck<0, ImmCheck0_1>]>; +} + +let TargetGuard = "sme2p1,sme-f16f16" in { + def SVMOPA_F16_NW : SInst<"svmopa_za16[_f16]_m", "viPPdd", "h", +MergeNone, "aarch64_sme_mopa", +[IsStreaming, IsInOutZA], +[ImmCheck<0, ImmCheck0_1>]>; + def SVMOPS_F16_NW : SInst<"svmops_za16[_f16]_m", "viPPdd", "h", +MergeNone, "aarch64_sme_mops", +[IsStreaming, IsInOutZA], +[ImmCheck<0, ImmCheck0_1>]>; +} diff --git a/clang/test/CodeGen/aarch64-sme2-intrinsics/acle_sme2_mopa_nonwide.c b/clang/test/CodeGen/aarch64-sme2-intrinsics/acle_sme2_mopa_nonwide.c new file mode 100644 index 00..40fcad6a576483 --- /dev/null +++ b/clang/test/CodeGen/aarch64-sme2-intrinsics/acle_sme2_mopa_nonwide.c @@ -0,0 +1,97 @@ +// NOTE: Assertions have been autogenerated by utils/update_cc_test_checks.py UTC_ARGS: --version 4 +// RUN: %clang_cc1 -fclang-abi-compat=latest -triple aarch64-none-linux-gnu -target-feature +sme2p1 -target-feature +b16b16 -target-feature +sme-f16f16 -S -O2 -Werror -emit-llvm -o - %s | FileCheck %s -check-prefixes=CHECK +// RUN: %clang_cc1-x c++ -fclang-abi-compat=latest -triple aarch64-none-linux-gnu -target-feature +sme2p1 -target-feature +b16b16 -target-feature +sme-f16f16 -S -O2 -Werror -emit-llvm -o - %s | FileCheck %s -check-prefixes=CHECK-CXX +// RUN: %clang_cc1 -DSME_OVERLOADED_FORMS-fclang-abi-compat=latest -triple aarch64-none-linux-gnu -target-feature +sme2p1 -target-feature +b16b16 -target-feature +sme-f16f16 -S -O2 -Werror -emit-llvm -o - %s | FileCheck %s -check-prefixes=CHECK +// RUN: %clang_cc1 -DSME_OVERLOADED_FORMS -x c++ -fclang-abi-compat=latest -triple aarch64-none-linux-gnu -target-feature +sme2p1 -target-feature +b16b16 -target-feature +sme-f16f16 -S -O2 -Werror -emit-llvm -o - %s | FileCheck %s -check-prefixes=CHECK-CXX + +// RUN: %clang_cc1 -DSME_OVERLOADED_FORMS -x c++ -fclang-abi-compat=latest -triple aarch64-none-linux-gnu -target-feature +sme2p1
[clang] [llvm] [AArch64] Add intrinsics for 16-bit non-widening FMLA/FMLS (PR #88553)
https://github.com/momchil-velikov edited https://github.com/llvm/llvm-project/pull/88553 ___ cfe-commits mailing list cfe-commits@lists.llvm.org https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits
[clang] [llvm] [AArch64] Add intrinsics for non-widening FMOPA/FMOPS (PR #88105)
@@ -674,3 +674,27 @@ let TargetGuard = "sme2" in { def SVLUTI2_LANE_ZT_X2 : Inst<"svluti2_lane_zt_{d}_x2", "2.di[i", "cUcsUsiUibhf", MergeNone, "aarch64_sme_luti2_lane_zt_x2", [IsStreaming, IsInZT0], [ImmCheck<0, ImmCheck0_0>, ImmCheck<2, ImmCheck0_7>]>; def SVLUTI4_LANE_ZT_X2 : Inst<"svluti4_lane_zt_{d}_x2", "2.di[i", "cUcsUsiUibhf", MergeNone, "aarch64_sme_luti4_lane_zt_x2", [IsStreaming, IsInZT0], [ImmCheck<0, ImmCheck0_0>, ImmCheck<2, ImmCheck0_3>]>; } + + +// SME2p1 - FMOPA, FMOPS (non-widening) +let TargetGuard = "sme,b16b16" in { momchil-velikov wrote: Done. The Clang instrinsics use the same target features as the underlying assembly instructions. If the features on the assembly instruction are not entirely correct we should fix it, but in a separate patch. https://github.com/llvm/llvm-project/pull/88105 ___ cfe-commits mailing list cfe-commits@lists.llvm.org https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits
[clang] [llvm] [AArch64] Add intrinsics for non-widening FMOPA/FMOPS (PR #88105)
@@ -815,8 +815,8 @@ defm FMLS_VG4_M4Z2Z_H : sme2_dot_mla_add_sub_array_vg4_multi<"fmls", 0b0100011, defm FCVT_2ZZ_H : sme2p1_fp_cvt_vector_vg2_single<"fcvt", 0b0>; defm FCVTL_2ZZ_H : sme2p1_fp_cvt_vector_vg2_single<"fcvtl", 0b1>; -defm FMOPA_MPPZZ_H : sme2p1_fmop_tile_fp16<"fmopa", 0b0, 0b0, 0b11, ZPR16>; -defm FMOPS_MPPZZ_H : sme2p1_fmop_tile_fp16<"fmops", 0b0, 0b1, 0b11, ZPR16>; +defm FMOPA_MPPZZ_H : sme2p1_fmop_tile_fp16<"fmopa", 0b0, 0b0, 0b11, nxv8f16, int_aarch64_sme_mopa_nonwide>; momchil-velikov wrote: Discussed offline. https://github.com/llvm/llvm-project/pull/88105 ___ cfe-commits mailing list cfe-commits@lists.llvm.org https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits
[clang] [llvm] [AArch64] Add intrinsics for non-widening FMOPA/FMOPS (PR #88105)
@@ -674,3 +674,27 @@ let TargetGuard = "sme2" in { def SVLUTI2_LANE_ZT_X2 : Inst<"svluti2_lane_zt_{d}_x2", "2.di[i", "cUcsUsiUibhf", MergeNone, "aarch64_sme_luti2_lane_zt_x2", [IsStreaming, IsInZT0], [ImmCheck<0, ImmCheck0_0>, ImmCheck<2, ImmCheck0_7>]>; def SVLUTI4_LANE_ZT_X2 : Inst<"svluti4_lane_zt_{d}_x2", "2.di[i", "cUcsUsiUibhf", MergeNone, "aarch64_sme_luti4_lane_zt_x2", [IsStreaming, IsInZT0], [ImmCheck<0, ImmCheck0_0>, ImmCheck<2, ImmCheck0_3>]>; } + + +// SME2p1 - FMOPA, FMOPS (non-widening) +let TargetGuard = "sme,b16b16" in { + def SVMOPA_BF16_NW : SInst<"svmopa_za16[_bf16]", "viPPdd", "b", + MergeOp1, "aarch64_sme_mopa_nonwide", + [IsStreaming, IsInOutZA], + [ImmCheck<0, ImmCheck0_1>]>; + def SVMOPS_BF16_NW : SInst<"svmops_za16[_bf16]", "viPPdd", "b", + MergeOp1, "aarch64_sme_mops_nonwide", + [IsStreaming, IsInOutZA], + [ImmCheck<0, ImmCheck0_1>]>; +} + +let TargetGuard = "sme2,sme-f16f16" in { + def SVMOPA_F16_NW : SInst<"svmopa_za16[_f16]", "viPPdd", "h", +MergeOp1, "aarch64_sme_mopa_nonwide", momchil-velikov wrote: In fact we can reuse the existing `aarch64_sme_mopa` which is used for other non-widening operations and since they are polymorphic and non-widening the instantiation type is enough to disambiguate the operation. https://github.com/llvm/llvm-project/pull/88105 ___ cfe-commits mailing list cfe-commits@lists.llvm.org https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits
[clang] [llvm] [AArch64] Add intrinsics for non-widening FMOPA/FMOPS (PR #88105)
@@ -674,3 +674,27 @@ let TargetGuard = "sme2" in { def SVLUTI2_LANE_ZT_X2 : Inst<"svluti2_lane_zt_{d}_x2", "2.di[i", "cUcsUsiUibhf", MergeNone, "aarch64_sme_luti2_lane_zt_x2", [IsStreaming, IsInZT0], [ImmCheck<0, ImmCheck0_0>, ImmCheck<2, ImmCheck0_7>]>; def SVLUTI4_LANE_ZT_X2 : Inst<"svluti4_lane_zt_{d}_x2", "2.di[i", "cUcsUsiUibhf", MergeNone, "aarch64_sme_luti4_lane_zt_x2", [IsStreaming, IsInZT0], [ImmCheck<0, ImmCheck0_0>, ImmCheck<2, ImmCheck0_3>]>; } + + +// SME2p1 - FMOPA, FMOPS (non-widening) +let TargetGuard = "sme,b16b16" in { + def SVMOPA_BF16_NW : SInst<"svmopa_za16[_bf16]", "viPPdd", "b", + MergeOp1, "aarch64_sme_mopa_nonwide", momchil-velikov wrote: Done https://github.com/llvm/llvm-project/pull/88105 ___ cfe-commits mailing list cfe-commits@lists.llvm.org https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits
[clang] [llvm] [AArch64] Add intrinsics for non-widening FMOPA/FMOPS (PR #88105)
https://github.com/momchil-velikov updated https://github.com/llvm/llvm-project/pull/88105 >From 2b0befb9078f8c9116ad52be937c8722045708ef Mon Sep 17 00:00:00 2001 From: Momchil Velikov Date: Tue, 9 Apr 2024 10:52:41 +0100 Subject: [PATCH] [AArch64] Add intrinsics for non-widening FMOPA/FMOPS According to the specification in https://github.com/ARM-software/acle/pull/309 this adds the intrinsics void svmopa_za16[_f16]_m(uint64_t tile, svbool_t pn, svbool_t pm, svfloat16_t zn, svfloat16_t zm) __arm_streaming __arm_inout("za"); void svmops_za16[_f16]_m(uint64_t tile, svbool_t pn, svbool_t pm, svfloat16_t zn, svfloat16_t zm) __arm_streaming __arm_inout("za"); as well as the corresponding `bf16` variants. --- clang/include/clang/Basic/arm_sme.td | 24 + .../acle_sme2_mopa_nonwide.c | 97 +++ .../acle_sme2_mopa_nonwide.c | 34 +++ llvm/include/llvm/IR/IntrinsicsAArch64.td | 5 +- .../lib/Target/AArch64/AArch64SMEInstrInfo.td | 10 +- llvm/lib/Target/AArch64/SMEInstrFormats.td| 16 ++- .../CodeGen/AArch64/sme2-intrinsics-mopa.ll | 42 7 files changed, 220 insertions(+), 8 deletions(-) create mode 100644 clang/test/CodeGen/aarch64-sme2-intrinsics/acle_sme2_mopa_nonwide.c create mode 100644 clang/test/Sema/aarch64-sme2-intrinsics/acle_sme2_mopa_nonwide.c create mode 100644 llvm/test/CodeGen/AArch64/sme2-intrinsics-mopa.ll diff --git a/clang/include/clang/Basic/arm_sme.td b/clang/include/clang/Basic/arm_sme.td index 1ac6d5170ea283..a18a5094a15ed3 100644 --- a/clang/include/clang/Basic/arm_sme.td +++ b/clang/include/clang/Basic/arm_sme.td @@ -674,3 +674,27 @@ let TargetGuard = "sme2" in { def SVLUTI2_LANE_ZT_X2 : Inst<"svluti2_lane_zt_{d}_x2", "2.di[i", "cUcsUsiUibhf", MergeNone, "aarch64_sme_luti2_lane_zt_x2", [IsStreaming, IsInZT0], [ImmCheck<0, ImmCheck0_0>, ImmCheck<2, ImmCheck0_7>]>; def SVLUTI4_LANE_ZT_X2 : Inst<"svluti4_lane_zt_{d}_x2", "2.di[i", "cUcsUsiUibhf", MergeNone, "aarch64_sme_luti4_lane_zt_x2", [IsStreaming, IsInZT0], [ImmCheck<0, ImmCheck0_0>, ImmCheck<2, ImmCheck0_3>]>; } + + +// SME2p1 - FMOPA, FMOPS (non-widening) +let TargetGuard = "sme2,b16b16" in { + def SVMOPA_BF16_NW : SInst<"svmopa_za16[_bf16]_m", "viPPdd", "b", + MergeNone, "aarch64_sme_mopa", + [IsStreaming, IsInOutZA], + [ImmCheck<0, ImmCheck0_1>]>; + def SVMOPS_BF16_NW : SInst<"svmops_za16[_bf16]_m", "viPPdd", "b", + MergeNone, "aarch64_sme_mops", + [IsStreaming, IsInOutZA], + [ImmCheck<0, ImmCheck0_1>]>; +} + +let TargetGuard = "sme2p1,sme-f16f16" in { + def SVMOPA_F16_NW : SInst<"svmopa_za16[_f16]_m", "viPPdd", "h", +MergeNone, "aarch64_sme_mopa", +[IsStreaming, IsInOutZA], +[ImmCheck<0, ImmCheck0_1>]>; + def SVMOPS_F16_NW : SInst<"svmops_za16[_f16]_m", "viPPdd", "h", +MergeNone, "aarch64_sme_mops", +[IsStreaming, IsInOutZA], +[ImmCheck<0, ImmCheck0_1>]>; +} diff --git a/clang/test/CodeGen/aarch64-sme2-intrinsics/acle_sme2_mopa_nonwide.c b/clang/test/CodeGen/aarch64-sme2-intrinsics/acle_sme2_mopa_nonwide.c new file mode 100644 index 00..40fcad6a576483 --- /dev/null +++ b/clang/test/CodeGen/aarch64-sme2-intrinsics/acle_sme2_mopa_nonwide.c @@ -0,0 +1,97 @@ +// NOTE: Assertions have been autogenerated by utils/update_cc_test_checks.py UTC_ARGS: --version 4 +// RUN: %clang_cc1 -fclang-abi-compat=latest -triple aarch64-none-linux-gnu -target-feature +sme2p1 -target-feature +b16b16 -target-feature +sme-f16f16 -S -O2 -Werror -emit-llvm -o - %s | FileCheck %s -check-prefixes=CHECK +// RUN: %clang_cc1-x c++ -fclang-abi-compat=latest -triple aarch64-none-linux-gnu -target-feature +sme2p1 -target-feature +b16b16 -target-feature +sme-f16f16 -S -O2 -Werror -emit-llvm -o - %s | FileCheck %s -check-prefixes=CHECK-CXX +// RUN: %clang_cc1 -DSME_OVERLOADED_FORMS-fclang-abi-compat=latest -triple aarch64-none-linux-gnu -target-feature +sme2p1 -target-feature +b16b16 -target-feature +sme-f16f16 -S -O2 -Werror -emit-llvm -o - %s | FileCheck %s -check-prefixes=CHECK +// RUN: %clang_cc1 -DSME_OVERLOADED_FORMS -x c++ -fclang-abi-compat=latest -triple aarch64-none-linux-gnu -target-feature +sme2p1 -target-feature +b16b16 -target-feature +sme-f16f16 -S -O2 -Werror -emit-llvm -o - %s | FileCheck %s -check-prefixes=CHECK-CXX + +// RUN: %clang_cc1 -DSME_OVERLOADED_FORMS -x c++ -fclang-abi-compat=latest -triple aarch64-none-linux-gnu -target-feature +sme2p1
[clang] [llvm] [AArch64] Add intrinsics for multi-vector to ZA array vector accumulators (PR #88266)
https://github.com/momchil-velikov created https://github.com/llvm/llvm-project/pull/88266 According to the specification in https://github.com/ARM-software/acle/pull/309 this adds the intrinsics void_svadd_za16_vg1x2_f16(uint32_t slice, svfloat16x2_t zn) __arm_streaming __arm_inout("za"); void_svadd_za16_vg1x4_f16(uint32_t slice, svfloat16x4_t zn) __arm_streaming __arm_inout("za"); void_svsub_za16_vg1x2_f16(uint32_t slice, svfloat16x2_t zn) __arm_streaming __arm_inout("za"); void_svsub_za16_vg1x4_f16(uint32_t slice, svfloat16x4_t zn) __arm_streaming __arm_inout("za"); as well as the corresponding `bf16` variants. >From 2b0557d4a62476b827352b6775588cef15cecd33 Mon Sep 17 00:00:00 2001 From: Momchil Velikov Date: Wed, 10 Apr 2024 11:25:50 +0100 Subject: [PATCH] [AArch64] Add intrinsics for multi-vector to ZA array vector accumulators --- clang/include/clang/Basic/arm_sme.td | 11 + .../acle_sme2_add_sub_za16.c | 191 ++ .../acle_sme2_add_sub_za16.c | 26 +++ llvm/include/llvm/IR/IntrinsicsAArch64.td | 4 +- .../lib/Target/AArch64/AArch64SMEInstrInfo.td | 16 +- .../AArch64/sme2-intrinsics-add-sub-za16.ll | 146 + 6 files changed, 384 insertions(+), 10 deletions(-) create mode 100644 clang/test/CodeGen/aarch64-sme2-intrinsics/acle_sme2_add_sub_za16.c create mode 100644 clang/test/Sema/aarch64-sme2-intrinsics/acle_sme2_add_sub_za16.c create mode 100644 llvm/test/CodeGen/AArch64/sme2-intrinsics-add-sub-za16.ll diff --git a/clang/include/clang/Basic/arm_sme.td b/clang/include/clang/Basic/arm_sme.td index 1ac6d5170ea283..dcfaefa7a3e266 100644 --- a/clang/include/clang/Basic/arm_sme.td +++ b/clang/include/clang/Basic/arm_sme.td @@ -298,6 +298,17 @@ multiclass ZAAddSub { def NAME # _ZA64_VG1X2_F64 : Inst<"sv" # n_suffix # "_za64[_{d}]_vg1x2", "vm2", "d", MergeNone, "aarch64_sme_" # n_suffix # "_za64_vg1x2", [IsStreaming, IsInOutZA], []>; def NAME # _ZA64_VG1X4_F64 : Inst<"sv" # n_suffix # "_za64[_{d}]_vg1x4", "vm4", "d", MergeNone, "aarch64_sme_" # n_suffix # "_za64_vg1x4", [IsStreaming, IsInOutZA], []>; } + + let TargetGuard = "sme2p1,sme-f16f16" in { +def NAME # _ZA16_VG1X2_F16 : Inst<"sv" # n_suffix # "_za16[_{d}]_vg1x2", "vm2", "h", MergeNone, "aarch64_sme_" # n_suffix # "_za16_vg1x2", [IsStreaming, IsInOutZA], []>; +def NAME # _ZA16_VG1X4_F16 : Inst<"sv" # n_suffix # "_za16[_{d}]_vg1x4", "vm4", "h", MergeNone, "aarch64_sme_" # n_suffix # "_za16_vg1x4", [IsStreaming, IsInOutZA], []>; + } + + let TargetGuard = "sme2p1,b16b16" in { +def NAME # _ZA16_VG1X2_BF16 : Inst<"sv" # n_suffix # "_za16[_{d}]_vg1x2", "vm2", "b", MergeNone, "aarch64_sme_" # n_suffix # "_za16_vg1x2", [IsStreaming, IsInOutZA], []>; +def NAME # _ZA16_VG1X4_BF16 : Inst<"sv" # n_suffix # "_za16[_{d}]_vg1x4", "vm4", "b", MergeNone, "aarch64_sme_" # n_suffix # "_za16_vg1x4", [IsStreaming, IsInOutZA], []>; + } + } defm SVADD : ZAAddSub<"add">; diff --git a/clang/test/CodeGen/aarch64-sme2-intrinsics/acle_sme2_add_sub_za16.c b/clang/test/CodeGen/aarch64-sme2-intrinsics/acle_sme2_add_sub_za16.c new file mode 100644 index 00..bdf07f86b9c93d --- /dev/null +++ b/clang/test/CodeGen/aarch64-sme2-intrinsics/acle_sme2_add_sub_za16.c @@ -0,0 +1,191 @@ +// NOTE: Assertions have been autogenerated by utils/update_cc_test_checks.py UTC_ARGS: --version 4 +// RUN: %clang_cc1 -fclang-abi-compat=latest -triple aarch64-none-linux-gnu -target-feature +sme2p1 -target-feature +sme-f16f16 -target-feature +b16b16 -O2 -S -Werror -Wall -emit-llvm -o - %s | FileCheck %s +// RUN: %clang_cc1-x c++ -fclang-abi-compat=latest -triple aarch64-none-linux-gnu -target-feature +sme2p1 -target-feature +sme-f16f16 -target-feature +b16b16 -O2 -S -Werror -Wall -emit-llvm -o - %s | FileCheck %s -check-prefix CHECK-CXX +// RUN: %clang_cc1 -DSVE_OVERLOADED_FORMS-fclang-abi-compat=latest -triple aarch64-none-linux-gnu -target-feature +sme2p1 -target-feature +sme-f16f16 -target-feature +b16b16 -O2 -S -Werror -Wall -emit-llvm -o - %s | FileCheck %s +// RUN: %clang_cc1 -DSVE_OVERLOADED_FORMS -x c++ -fclang-abi-compat=latest -triple aarch64-none-linux-gnu -target-feature +sme2p1 -target-feature +sme-f16f16 -target-feature +b16b16 -O2 -S -Werror -Wall -emit-llvm -o - %s | FileCheck %s -check-prefix CHECK-CXX + +// REQUIRES: aarch64-registered-target + +#include + +#ifdef SVE_OVERLOADED_FORMS +#define SVE_ACLE_FUNC(A1,A2_UNUSED,A3) A1##A3 +#else +#define SVE_ACLE_FUNC(A1,A2,A3) A1##A2##A3 +#endif + +// CHECK-LABEL: define dso_local void @test_svadd_za16_vg1x2_f16( +// CHECK-SAME: i32 noundef [[SLICE:%.*]], [[ZN:%.*]]) local_unnamed_addr #[[ATTR0:[0-9]+]] { +// CHECK-NEXT: entry: +// CHECK-NEXT:[[TMP0:%.*]] = tail call @llvm.vector.extract.nxv8f16.nxv16f16( [[ZN]], i64 0) +// CHECK-NEXT:[[TMP1:%.*]] = tail call
[clang] [llvm] [AArch64][SME] Add intrinsics for multi-vector BFCLAMP (PR #88251)
@@ -2148,6 +2148,11 @@ let TargetGuard = "sme2" in { def SVSCLAMP_X4 : SInst<"svclamp[_single_{d}_x4]", "44dd", "csil", MergeNone, "aarch64_sve_sclamp_single_x4", [IsStreaming], []>; def SVUCLAMP_X4 : SInst<"svclamp[_single_{d}_x4]", "44dd", "UcUsUiUl", MergeNone, "aarch64_sve_uclamp_single_x4", [IsStreaming], []>; def SVFCLAMP_X4 : SInst<"svclamp[_single_{d}_x4]", "44dd", "hfd", MergeNone, "aarch64_sve_fclamp_single_x4", [IsStreaming], []>; + + let TargetGuard = "b16b16"in { momchil-velikov wrote: I does not work. A test for target features (in `Sema`) would catch that. https://github.com/llvm/llvm-project/pull/88251 ___ cfe-commits mailing list cfe-commits@lists.llvm.org https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits
[clang] [llvm] [AArch64] Add intrinsics for non-widening FMOPA/FMOPS (PR #88105)
momchil-velikov wrote: > I noticed that file names and file location are using sme2 as prefix. > Shouldn't we use sme2p1 prefix for this intrinsic ? None of instructions seem to require `FEAT_SME2p1`: https://developer.arm.com/documentation/ddi0602/2024-03/SME-Instructions/BFMOPA--non-widening---BFloat16-floating-point-outer-product-and-accumulate-?lang=en https://developer.arm.com/documentation/ddi0602/2024-03/SME-Instructions/BFMOPS--non-widening---BFloat16-floating-point-outer-product-and-subtract-?lang=en https://developer.arm.com/documentation/ddi0602/2024-03/SME-Instructions/FMOPA--non-widening---Floating-point-outer-product-and-accumulate-?lang=en https://developer.arm.com/documentation/ddi0602/2024-03/SME-Instructions/FMOPS--non-widening---Floating-point-outer-product-and-subtract-?lang=en https://github.com/llvm/llvm-project/pull/88105 ___ cfe-commits mailing list cfe-commits@lists.llvm.org https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits
[clang] [llvm] [AArch64] Add intrinsics for non-widening FMOPA/FMOPS (PR #88105)
https://github.com/momchil-velikov created https://github.com/llvm/llvm-project/pull/88105 According to the specification in https://github.com/ARM-software/acle/pull/309 this adds the intrinsics void svmopa_za16[_f16]_m(uint64_t tile, svbool_t pn, svbool_t pm, svfloat16_t zn, svfloat16_t zm) __arm_streaming __arm_inout("za"); void svmops_za16[_f16]_m(uint64_t tile, svbool_t pn, svbool_t pm, svfloat16_t zn, svfloat16_t zm) __arm_streaming __arm_inout("za"); as well as the corresponding `bf16` variants. >From ee78ad565158c2d1301265415992511ea559e7a6 Mon Sep 17 00:00:00 2001 From: Momchil Velikov Date: Tue, 9 Apr 2024 10:52:41 +0100 Subject: [PATCH] [AArch64] Add intrinsics for non-widening FMOPA/FMOPS According to the specification in https://github.com/ARM-software/acle/pull/309 this adds the intrinsics void svmopa_za16[_f16]_m(uint64_t tile, svbool_t pn, svbool_t pm, svfloat16_t zn, svfloat16_t zm) __arm_streaming __arm_inout("za"); void svmops_za16[_f16]_m(uint64_t tile, svbool_t pn, svbool_t pm, svfloat16_t zn, svfloat16_t zm) __arm_streaming __arm_inout("za"); as well as the corresponding `bf16` variants. --- clang/include/clang/Basic/arm_sme.td | 24 + .../acle_sme2_mopa_nonwide.c | 97 +++ .../acle_sme2_mopa_nonwide.c | 34 +++ llvm/include/llvm/IR/IntrinsicsAArch64.td | 5 +- .../lib/Target/AArch64/AArch64SMEInstrInfo.td | 10 +- llvm/lib/Target/AArch64/SMEInstrFormats.td| 16 ++- .../CodeGen/AArch64/sme2-intrinsics-mopa.ll | 42 7 files changed, 220 insertions(+), 8 deletions(-) create mode 100644 clang/test/CodeGen/aarch64-sme2-intrinsics/acle_sme2_mopa_nonwide.c create mode 100644 clang/test/Sema/aarch64-sme2-intrinsics/acle_sme2_mopa_nonwide.c create mode 100644 llvm/test/CodeGen/AArch64/sme2-intrinsics-mopa.ll diff --git a/clang/include/clang/Basic/arm_sme.td b/clang/include/clang/Basic/arm_sme.td index 1ac6d5170ea283..e60a400b094850 100644 --- a/clang/include/clang/Basic/arm_sme.td +++ b/clang/include/clang/Basic/arm_sme.td @@ -674,3 +674,27 @@ let TargetGuard = "sme2" in { def SVLUTI2_LANE_ZT_X2 : Inst<"svluti2_lane_zt_{d}_x2", "2.di[i", "cUcsUsiUibhf", MergeNone, "aarch64_sme_luti2_lane_zt_x2", [IsStreaming, IsInZT0], [ImmCheck<0, ImmCheck0_0>, ImmCheck<2, ImmCheck0_7>]>; def SVLUTI4_LANE_ZT_X2 : Inst<"svluti4_lane_zt_{d}_x2", "2.di[i", "cUcsUsiUibhf", MergeNone, "aarch64_sme_luti4_lane_zt_x2", [IsStreaming, IsInZT0], [ImmCheck<0, ImmCheck0_0>, ImmCheck<2, ImmCheck0_3>]>; } + + +// SME2p1 - FMOPA, FMOPS (non-widening) +let TargetGuard = "sme,b16b16" in { + def SVMOPA_BF16_NW : SInst<"svmopa_za16[_bf16]", "viPPdd", "b", + MergeOp1, "aarch64_sme_mopa_nonwide", + [IsStreaming, IsInOutZA], + [ImmCheck<0, ImmCheck0_1>]>; + def SVMOPS_BF16_NW : SInst<"svmops_za16[_bf16]", "viPPdd", "b", + MergeOp1, "aarch64_sme_mops_nonwide", + [IsStreaming, IsInOutZA], + [ImmCheck<0, ImmCheck0_1>]>; +} + +let TargetGuard = "sme2,sme-f16f16" in { + def SVMOPA_F16_NW : SInst<"svmopa_za16[_f16]", "viPPdd", "h", +MergeOp1, "aarch64_sme_mopa_nonwide", +[IsStreaming, IsInOutZA], +[ImmCheck<0, ImmCheck0_1>]>; + def SVMOPS_F16_NW : SInst<"svmops_za16[_f16]", "viPPdd", "h", +MergeOp1, "aarch64_sme_mops_nonwide", +[IsStreaming, IsInOutZA], +[ImmCheck<0, ImmCheck0_1>]>; +} diff --git a/clang/test/CodeGen/aarch64-sme2-intrinsics/acle_sme2_mopa_nonwide.c b/clang/test/CodeGen/aarch64-sme2-intrinsics/acle_sme2_mopa_nonwide.c new file mode 100644 index 00..36a75609534653 --- /dev/null +++ b/clang/test/CodeGen/aarch64-sme2-intrinsics/acle_sme2_mopa_nonwide.c @@ -0,0 +1,97 @@ +// NOTE: Assertions have been autogenerated by utils/update_cc_test_checks.py UTC_ARGS: --version 4 +// RUN: %clang_cc1 -fclang-abi-compat=latest -triple aarch64-none-linux-gnu -target-feature +sme2p1 -target-feature +b16b16 -target-feature +sme-f16f16 -S -O2 -Werror -emit-llvm -o - %s | FileCheck %s -check-prefixes=CHECK +// RUN: %clang_cc1-x c++ -fclang-abi-compat=latest -triple aarch64-none-linux-gnu -target-feature +sme2p1 -target-feature +b16b16 -target-feature +sme-f16f16 -S -O2 -Werror -emit-llvm -o - %s | FileCheck %s -check-prefixes=CHECK-CXX +// RUN: %clang_cc1 -DSME_OVERLOADED_FORMS-fclang-abi-compat=latest -triple aarch64-none-linux-gnu -target-feature +sme2p1
[clang] [llvm] [AArch64] [SVE] Created intrinsics for DUPQ instr. (PR #83260)
@@ -10007,6 +10007,16 @@ multiclass sve2p1_dupq { bits<1> index; let Inst{20} = index; } + + def : SVE_2_Op_Imm_Pat(NAME # _B)>; momchil-velikov wrote: Change them to `_timm`. https://github.com/llvm/llvm-project/pull/83260 ___ cfe-commits mailing list cfe-commits@lists.llvm.org https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits
[llvm] [clang] [Clang][LLVM][AArch64]SVE2.1 update the intrinsics according to acle[1] (PR #76844)
https://github.com/momchil-velikov approved this pull request. https://github.com/llvm/llvm-project/pull/76844 ___ cfe-commits mailing list cfe-commits@lists.llvm.org https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits
[llvm] [clang] [TargetParser] Define AEK_FCMA and AEK_JSCVT for tsv110 (PR #75516)
@@ -81,6 +81,15 @@ static bool DecodeAArch64Features(const Driver , StringRef text, else return false; +// +jsconv and +complxnum implies +neon and +fp-armv8 momchil-velikov wrote: According to the latest Arm ARM (https://developer.arm.com/documentation/ddi0487/ja/?lang=en) the architecrtural extensions `FEAT_FCMA` and `FEAT_JSCVT` are mandatory in Armv8.3-a and are not optional in any architecture version. For such features, our convention is to not expose them as command-line options. https://github.com/llvm/llvm-project/pull/75516 ___ cfe-commits mailing list cfe-commits@lists.llvm.org https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits
[clang] [llvm] [AArch64] Update target feature requirements of SVE bfloat instructions (PR #75596)
https://github.com/momchil-velikov closed https://github.com/llvm/llvm-project/pull/75596 ___ cfe-commits mailing list cfe-commits@lists.llvm.org https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits
[clang] [llvm] [AArch64] Update target feature requirements of SVE bfloat instructions (PR #75596)
@@ -2086,7 +2086,7 @@ let TargetGuard = "sve2p1|sme2" in { def SVCNTP_COUNT : SInst<"svcntp_{d}", "n}i", "QcQsQiQl", MergeNone, "aarch64_sve_cntp_{d}", [IsOverloadNone, IsStreamingCompatible], [ImmCheck<1, ImmCheck2_4_Mul2>]>; } -let TargetGuard = "sve2p1,b16b16" in { +let TargetGuard = "(sve2|sme2),b16b16" in { momchil-velikov wrote: Done. https://github.com/llvm/llvm-project/pull/75596 ___ cfe-commits mailing list cfe-commits@lists.llvm.org https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits
[llvm] [clang] [AArch64] Update target feature requirements of SVE bfloat instructions (PR #75596)
@@ -2086,7 +2086,7 @@ let TargetGuard = "sve2p1|sme2" in { def SVCNTP_COUNT : SInst<"svcntp_{d}", "n}i", "QcQsQiQl", MergeNone, "aarch64_sve_cntp_{d}", [IsOverloadNone, IsStreamingCompatible], [ImmCheck<1, ImmCheck2_4_Mul2>]>; } -let TargetGuard = "sve2p1,b16b16" in { +let TargetGuard = "(sve2|sme2),b16b16" in { momchil-velikov wrote: Yeah, I'll add the `IsStreamingCompatible` flag. What needs to be updated in tests? https://github.com/llvm/llvm-project/pull/75596 ___ cfe-commits mailing list cfe-commits@lists.llvm.org https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits
[clang] [llvm] [AArch64] Update target feature requirements of SVE bfloat instructions (PR #75596)
momchil-velikov wrote: Rebased the clear the test run. https://github.com/llvm/llvm-project/pull/75596 ___ cfe-commits mailing list cfe-commits@lists.llvm.org https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits
[clang] [llvm] [AArch64] Update target feature requirements of SVE bfloat instructions (PR #75596)
https://github.com/momchil-velikov updated https://github.com/llvm/llvm-project/pull/75596 >From 04a03eae3fcbdd57257ce3867615ec6be9d84e53 Mon Sep 17 00:00:00 2001 From: Momchil Velikov Date: Fri, 15 Dec 2023 12:18:53 + Subject: [PATCH 1/2] [AArch64] Update target feature requirements of SVE bfloat instructions According to the latest update of the ISA https://developer.arm.com/documentation/ddi0602/2023-09/?lang=en all of the affected instruction encodings now require (FEAT_SVE2 or FEAT_SME2) and FEAT_SVE_B16B16 --- clang/include/clang/Basic/arm_sve.td | 2 +- .../acle_sve2p1_bfadd.c | 11 ++-- .../acle_sve2p1_bfmax.c | 11 ++-- .../acle_sve2p1_bfmaxnm.c | 11 ++-- .../acle_sve2p1_bfmin.c | 11 ++-- .../acle_sve2p1_bfminnm.c | 11 ++-- .../acle_sve2p1_bfmla.c | 11 ++-- .../acle_sve2p1_bfmls.c | 11 ++-- .../acle_sve2p1_bfmul.c | 11 ++-- .../acle_sve2p1_bfsub.c | 11 ++-- llvm/lib/Target/AArch64/AArch64.td| 4 +- .../lib/Target/AArch64/AArch64SVEInstrInfo.td | 10 ++-- llvm/test/MC/AArch64/SVE2p1/bfadd.s | 43 ++-- llvm/test/MC/AArch64/SVE2p1/bfclamp.s | 32 llvm/test/MC/AArch64/SVE2p1/bfmax.s | 34 - llvm/test/MC/AArch64/SVE2p1/bfmaxnm.s | 34 - llvm/test/MC/AArch64/SVE2p1/bfmin.s | 34 - llvm/test/MC/AArch64/SVE2p1/bfminnm.s | 34 - llvm/test/MC/AArch64/SVE2p1/bfmla.s | 44 +--- llvm/test/MC/AArch64/SVE2p1/bfmls.s | 45 +--- llvm/test/MC/AArch64/SVE2p1/bfmul.s | 51 +++ llvm/test/MC/AArch64/SVE2p1/bfsub.s | 43 ++-- 22 files changed, 311 insertions(+), 198 deletions(-) diff --git a/clang/include/clang/Basic/arm_sve.td b/clang/include/clang/Basic/arm_sve.td index 98d7028eb28309..e84d6e5e4cc602 100644 --- a/clang/include/clang/Basic/arm_sve.td +++ b/clang/include/clang/Basic/arm_sve.td @@ -2086,7 +2086,7 @@ let TargetGuard = "sve2p1|sme2" in { def SVCNTP_COUNT : SInst<"svcntp_{d}", "n}i", "QcQsQiQl", MergeNone, "aarch64_sve_cntp_{d}", [IsOverloadNone, IsStreamingCompatible], [ImmCheck<1, ImmCheck2_4_Mul2>]>; } -let TargetGuard = "sve2p1,b16b16" in { +let TargetGuard = "(sve2|sme2),b16b16" in { defm SVMUL_BF : SInstZPZZ<"svmul", "b", "aarch64_sve_fmul", "aarch64_sve_fmul_u">; defm SVADD_BF : SInstZPZZ<"svadd", "b", "aarch64_sve_fadd", "aarch64_sve_fadd_u">; defm SVSUB_BF : SInstZPZZ<"svsub", "b", "aarch64_sve_fsub", "aarch64_sve_fsub_u">; diff --git a/clang/test/CodeGen/aarch64-sve2p1-intrinsics/acle_sve2p1_bfadd.c b/clang/test/CodeGen/aarch64-sve2p1-intrinsics/acle_sve2p1_bfadd.c index 327c4f078872b3..a3026fee3f6d29 100644 --- a/clang/test/CodeGen/aarch64-sve2p1-intrinsics/acle_sve2p1_bfadd.c +++ b/clang/test/CodeGen/aarch64-sve2p1-intrinsics/acle_sve2p1_bfadd.c @@ -1,10 +1,11 @@ // NOTE: Assertions have been autogenerated by utils/update_cc_test_checks.py // REQUIRES: aarch64-registered-target -// RUN: %clang_cc1 -triple aarch64-none-linux-gnu -target-feature +sve2p1 -target-feature +b16b16 -S -disable-O0-optnone -Werror -Wall -emit-llvm -o - %s | opt -S -p mem2reg,instcombine,tailcallelim | FileCheck %s -// RUN: %clang_cc1 -triple aarch64-none-linux-gnu -target-feature +sve2p1 -target-feature +b16b16 -S -disable-O0-optnone -Werror -Wall -emit-llvm -o - -x c++ %s | opt -S -p mem2reg,instcombine,tailcallelim | FileCheck %s -check-prefix=CPP-CHECK -// RUN: %clang_cc1 -DSVE_OVERLOADED_FORMS -triple aarch64-none-linux-gnu -target-feature +sve2p1 -target-feature +b16b16 -S -disable-O0-optnone -Werror -Wall -emit-llvm -o - %s | opt -S -p mem2reg,instcombine,tailcallelim | FileCheck %s -// RUN: %clang_cc1 -DSVE_OVERLOADED_FORMS -triple aarch64-none-linux-gnu -target-feature +sve2p1 -target-feature +b16b16 -S -disable-O0-optnone -Werror -Wall -emit-llvm -o - -x c++ %s | opt -S -p mem2reg,instcombine,tailcallelim | FileCheck %s -check-prefix=CPP-CHECK -// RUN: %clang_cc1 -triple aarch64-none-linux-gnu -target-feature +sve2p1 -target-feature +b16b16 -S -disable-O0-optnone -Werror -Wall -o /dev/null %s +// RUN: %clang_cc1 -triple aarch64-none-linux-gnu -target-feature +sme2 -target-feature +b16b16 -S -disable-O0-optnone -Werror -Wall -emit-llvm -o - %s | opt -S -p mem2reg,instcombine,tailcallelim | FileCheck %s +// RUN: %clang_cc1 -triple aarch64-none-linux-gnu -target-feature +sve2 -target-feature +b16b16 -S -disable-O0-optnone -Werror -Wall -emit-llvm -o - -x c++ %s | opt -S -p mem2reg,instcombine,tailcallelim | FileCheck %s -check-prefix=CPP-CHECK +// RUN: %clang_cc1 -DSVE_OVERLOADED_FORMS -triple aarch64-none-linux-gnu -target-feature +sve2 -target-feature +b16b16 -S -disable-O0-optnone -Werror -Wall -emit-llvm -o -
[llvm] [clang] [AArch64] Update target feature requirements of SVE bfloat instructions (PR #75596)
@@ -2066,7 +2066,7 @@ let TargetGuard = "sve2p1|sme2" in { def SVPFALSE_COUNT_ALIAS : SInst<"svpfalse_c", "}v", "", MergeNone, "", [IsOverloadNone, IsStreamingCompatible]>; } -let TargetGuard = "sve2p1,b16b16" in { +let TargetGuard = "(sve2|sme2),b16b16" in { momchil-velikov wrote: Done. Removed comment as it it useless, the corresponding TargetGuard is just few lines above and the nesting structure is not at all complex. https://github.com/llvm/llvm-project/pull/75596 ___ cfe-commits mailing list cfe-commits@lists.llvm.org https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits
[clang] [llvm] [AArch64] Update target feature requirements of SVE bfloat instructions (PR #75596)
https://github.com/momchil-velikov updated https://github.com/llvm/llvm-project/pull/75596 >From fc5c82e61efef3f1cd2f6606b12c358637a687f5 Mon Sep 17 00:00:00 2001 From: Momchil Velikov Date: Fri, 15 Dec 2023 12:18:53 + Subject: [PATCH 1/2] [AArch64] Update target feature requirements of SVE bfloat instructions According to the latest update of the ISA https://developer.arm.com/documentation/ddi0602/2023-09/?lang=en all of the affected instruction encodings now require (FEAT_SVE2 or FEAT_SME2) and FEAT_SVE_B16B16 --- clang/include/clang/Basic/arm_sve.td | 2 +- .../acle_sve2p1_bfadd.c | 11 ++-- .../acle_sve2p1_bfmax.c | 11 ++-- .../acle_sve2p1_bfmaxnm.c | 11 ++-- .../acle_sve2p1_bfmin.c | 11 ++-- .../acle_sve2p1_bfminnm.c | 11 ++-- .../acle_sve2p1_bfmla.c | 11 ++-- .../acle_sve2p1_bfmls.c | 11 ++-- .../acle_sve2p1_bfmul.c | 11 ++-- .../acle_sve2p1_bfsub.c | 11 ++-- llvm/lib/Target/AArch64/AArch64.td| 4 +- .../lib/Target/AArch64/AArch64SVEInstrInfo.td | 10 ++-- llvm/test/MC/AArch64/SVE2p1/bfadd.s | 43 ++-- llvm/test/MC/AArch64/SVE2p1/bfclamp.s | 32 llvm/test/MC/AArch64/SVE2p1/bfmax.s | 34 - llvm/test/MC/AArch64/SVE2p1/bfmaxnm.s | 34 - llvm/test/MC/AArch64/SVE2p1/bfmin.s | 34 - llvm/test/MC/AArch64/SVE2p1/bfminnm.s | 34 - llvm/test/MC/AArch64/SVE2p1/bfmla.s | 44 +--- llvm/test/MC/AArch64/SVE2p1/bfmls.s | 45 +--- llvm/test/MC/AArch64/SVE2p1/bfmul.s | 51 +++ llvm/test/MC/AArch64/SVE2p1/bfsub.s | 43 ++-- 22 files changed, 311 insertions(+), 198 deletions(-) diff --git a/clang/include/clang/Basic/arm_sve.td b/clang/include/clang/Basic/arm_sve.td index 98d7028eb28309..e84d6e5e4cc602 100644 --- a/clang/include/clang/Basic/arm_sve.td +++ b/clang/include/clang/Basic/arm_sve.td @@ -2086,7 +2086,7 @@ let TargetGuard = "sve2p1|sme2" in { def SVCNTP_COUNT : SInst<"svcntp_{d}", "n}i", "QcQsQiQl", MergeNone, "aarch64_sve_cntp_{d}", [IsOverloadNone, IsStreamingCompatible], [ImmCheck<1, ImmCheck2_4_Mul2>]>; } -let TargetGuard = "sve2p1,b16b16" in { +let TargetGuard = "(sve2|sme2),b16b16" in { defm SVMUL_BF : SInstZPZZ<"svmul", "b", "aarch64_sve_fmul", "aarch64_sve_fmul_u">; defm SVADD_BF : SInstZPZZ<"svadd", "b", "aarch64_sve_fadd", "aarch64_sve_fadd_u">; defm SVSUB_BF : SInstZPZZ<"svsub", "b", "aarch64_sve_fsub", "aarch64_sve_fsub_u">; diff --git a/clang/test/CodeGen/aarch64-sve2p1-intrinsics/acle_sve2p1_bfadd.c b/clang/test/CodeGen/aarch64-sve2p1-intrinsics/acle_sve2p1_bfadd.c index 327c4f078872b3..a3026fee3f6d29 100644 --- a/clang/test/CodeGen/aarch64-sve2p1-intrinsics/acle_sve2p1_bfadd.c +++ b/clang/test/CodeGen/aarch64-sve2p1-intrinsics/acle_sve2p1_bfadd.c @@ -1,10 +1,11 @@ // NOTE: Assertions have been autogenerated by utils/update_cc_test_checks.py // REQUIRES: aarch64-registered-target -// RUN: %clang_cc1 -triple aarch64-none-linux-gnu -target-feature +sve2p1 -target-feature +b16b16 -S -disable-O0-optnone -Werror -Wall -emit-llvm -o - %s | opt -S -p mem2reg,instcombine,tailcallelim | FileCheck %s -// RUN: %clang_cc1 -triple aarch64-none-linux-gnu -target-feature +sve2p1 -target-feature +b16b16 -S -disable-O0-optnone -Werror -Wall -emit-llvm -o - -x c++ %s | opt -S -p mem2reg,instcombine,tailcallelim | FileCheck %s -check-prefix=CPP-CHECK -// RUN: %clang_cc1 -DSVE_OVERLOADED_FORMS -triple aarch64-none-linux-gnu -target-feature +sve2p1 -target-feature +b16b16 -S -disable-O0-optnone -Werror -Wall -emit-llvm -o - %s | opt -S -p mem2reg,instcombine,tailcallelim | FileCheck %s -// RUN: %clang_cc1 -DSVE_OVERLOADED_FORMS -triple aarch64-none-linux-gnu -target-feature +sve2p1 -target-feature +b16b16 -S -disable-O0-optnone -Werror -Wall -emit-llvm -o - -x c++ %s | opt -S -p mem2reg,instcombine,tailcallelim | FileCheck %s -check-prefix=CPP-CHECK -// RUN: %clang_cc1 -triple aarch64-none-linux-gnu -target-feature +sve2p1 -target-feature +b16b16 -S -disable-O0-optnone -Werror -Wall -o /dev/null %s +// RUN: %clang_cc1 -triple aarch64-none-linux-gnu -target-feature +sme2 -target-feature +b16b16 -S -disable-O0-optnone -Werror -Wall -emit-llvm -o - %s | opt -S -p mem2reg,instcombine,tailcallelim | FileCheck %s +// RUN: %clang_cc1 -triple aarch64-none-linux-gnu -target-feature +sve2 -target-feature +b16b16 -S -disable-O0-optnone -Werror -Wall -emit-llvm -o - -x c++ %s | opt -S -p mem2reg,instcombine,tailcallelim | FileCheck %s -check-prefix=CPP-CHECK +// RUN: %clang_cc1 -DSVE_OVERLOADED_FORMS -triple aarch64-none-linux-gnu -target-feature +sve2 -target-feature +b16b16 -S -disable-O0-optnone -Werror -Wall -emit-llvm -o -
[clang] [Clang][SVE2.1] Update names of the `svwhileXX` builtins with predicate-as-counter (PR #75200)
https://github.com/momchil-velikov closed https://github.com/llvm/llvm-project/pull/75200 ___ cfe-commits mailing list cfe-commits@lists.llvm.org https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits
[clang] [Clang][SVE2.1] Update names of the `svwhileXX` builtins with predicate-as-counter (PR #75200)
https://github.com/momchil-velikov edited https://github.com/llvm/llvm-project/pull/75200 ___ cfe-commits mailing list cfe-commits@lists.llvm.org https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits
[clang] [Clang][SVE2.1] Update names of the `svwhileXX` builtins with predicate-as-counter (PR #75200)
https://github.com/momchil-velikov edited https://github.com/llvm/llvm-project/pull/75200 ___ cfe-commits mailing list cfe-commits@lists.llvm.org https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits
[clang] [Clang][SVE2.1] Update names of the `svwhileXX` builtins with predicate-as-counter (PR #75200)
https://github.com/momchil-velikov edited https://github.com/llvm/llvm-project/pull/75200 ___ cfe-commits mailing list cfe-commits@lists.llvm.org https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits
[llvm] [clang] [Clang][SVE2.1] Add floating-point variants of `svrevd_XX` (PR #75117)
https://github.com/momchil-velikov closed https://github.com/llvm/llvm-project/pull/75117 ___ cfe-commits mailing list cfe-commits@lists.llvm.org https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits
[clang] [Clang][SVE2.1] Make a part of the name optional for `svwhileXX` builtins with predicate-as-counter (PR #75200)
https://github.com/momchil-velikov updated https://github.com/llvm/llvm-project/pull/75200 >From d97312680eff280210f588ef22416f845d31d2ef Mon Sep 17 00:00:00 2001 From: Momchil Velikov Date: Tue, 12 Dec 2023 15:08:33 + Subject: [PATCH 1/4] [Clang][SVE2.1] Make the part of the name optional for `svewhileXX` builtins with predicate-as-counter The `_s64`/`_u64` part can be omitted now. It's inferred from the argument types. --- clang/include/clang/Basic/arm_sve.td | 18 ++- .../acle_sve2p1_while_pn.c| 136 +- 2 files changed, 80 insertions(+), 74 deletions(-) diff --git a/clang/include/clang/Basic/arm_sve.td b/clang/include/clang/Basic/arm_sve.td index a429a3c5fe378a..9f4cf98ea28a07 100644 --- a/clang/include/clang/Basic/arm_sve.td +++ b/clang/include/clang/Basic/arm_sve.td @@ -1979,17 +1979,15 @@ let TargetGuard = "sve2p1|sme2" in { //FIXME: Replace IsStreamingCompatible with IsStreamingOrHasSVE2p1 when available def SVPEXT_SINGLE : SInst<"svpext_lane_{d}", "P}i", "QcQsQiQl", MergeNone, "aarch64_sve_pext", [IsStreamingCompatible], [ImmCheck<1, ImmCheck0_3>]>; def SVPEXT_X2 : SInst<"svpext_lane_{d}_x2", "2.P}i", "QcQsQiQl", MergeNone, "aarch64_sve_pext_x2", [IsStreamingCompatible], [ImmCheck<1, ImmCheck0_1>]>; -} -let TargetGuard = "sve2p1" in { -def SVWHILEGE_COUNT : SInst<"svwhilege_{d}", "}lli", "QcQsQiQl", MergeNone, "aarch64_sve_whilege_{d}", [IsOverloadNone], [ImmCheck<2, ImmCheck2_4_Mul2>]>; -def SVWHILEGT_COUNT : SInst<"svwhilegt_{d}", "}lli", "QcQsQiQl", MergeNone, "aarch64_sve_whilegt_{d}", [IsOverloadNone], [ImmCheck<2, ImmCheck2_4_Mul2>]>; -def SVWHILELE_COUNT : SInst<"svwhilele_{d}", "}lli", "QcQsQiQl", MergeNone, "aarch64_sve_whilele_{d}", [IsOverloadNone], [ImmCheck<2, ImmCheck2_4_Mul2>]>; -def SVWHILELT_COUNT : SInst<"svwhilelt_{d}", "}lli", "QcQsQiQl", MergeNone, "aarch64_sve_whilelt_{d}", [IsOverloadNone], [ImmCheck<2, ImmCheck2_4_Mul2>]>; -def SVWHILELO_COUNT : SInst<"svwhilelo_{d}", "}nni", "QcQsQiQl", MergeNone, "aarch64_sve_whilelo_{d}", [IsOverloadNone], [ImmCheck<2, ImmCheck2_4_Mul2>]>; -def SVWHILELS_COUNT : SInst<"svwhilels_{d}", "}nni", "QcQsQiQl", MergeNone, "aarch64_sve_whilels_{d}", [IsOverloadNone], [ImmCheck<2, ImmCheck2_4_Mul2>]>; -def SVWHILEHI_COUNT : SInst<"svwhilehi_{d}", "}nni", "QcQsQiQl", MergeNone, "aarch64_sve_whilehi_{d}", [IsOverloadNone], [ImmCheck<2, ImmCheck2_4_Mul2>]>; -def SVWHILEHS_COUNT : SInst<"svwhilehs_{d}", "}nni", "QcQsQiQl", MergeNone, "aarch64_sve_whilehs_{d}", [IsOverloadNone], [ImmCheck<2, ImmCheck2_4_Mul2>]>; +def SVWHILEGE_COUNT : SInst<"svwhilege_{d}[_{1}]", "}lli", "QcQsQiQl", MergeNone, "aarch64_sve_whilege_{d}", [IsOverloadNone], [ImmCheck<2, ImmCheck2_4_Mul2>]>; +def SVWHILEGT_COUNT : SInst<"svwhilegt_{d}[_{1}]", "}lli", "QcQsQiQl", MergeNone, "aarch64_sve_whilegt_{d}", [IsOverloadNone], [ImmCheck<2, ImmCheck2_4_Mul2>]>; +def SVWHILELE_COUNT : SInst<"svwhilele_{d}[_{1}]", "}lli", "QcQsQiQl", MergeNone, "aarch64_sve_whilele_{d}", [IsOverloadNone], [ImmCheck<2, ImmCheck2_4_Mul2>]>; +def SVWHILELT_COUNT : SInst<"svwhilelt_{d}[_{1}]", "}lli", "QcQsQiQl", MergeNone, "aarch64_sve_whilelt_{d}", [IsOverloadNone], [ImmCheck<2, ImmCheck2_4_Mul2>]>; +def SVWHILELO_COUNT : SInst<"svwhilelo_{d}[_{1}]", "}nni", "QcQsQiQl", MergeNone, "aarch64_sve_whilelo_{d}", [IsOverloadNone], [ImmCheck<2, ImmCheck2_4_Mul2>]>; +def SVWHILELS_COUNT : SInst<"svwhilels_{d}[_{1}]", "}nni", "QcQsQiQl", MergeNone, "aarch64_sve_whilels_{d}", [IsOverloadNone], [ImmCheck<2, ImmCheck2_4_Mul2>]>; +def SVWHILEHI_COUNT : SInst<"svwhilehi_{d}[_{1}]", "}nni", "QcQsQiQl", MergeNone, "aarch64_sve_whilehi_{d}", [IsOverloadNone], [ImmCheck<2, ImmCheck2_4_Mul2>]>; +def SVWHILEHS_COUNT : SInst<"svwhilehs_{d}[_{1}]", "}nni", "QcQsQiQl", MergeNone, "aarch64_sve_whilehs_{d}", [IsOverloadNone], [ImmCheck<2, ImmCheck2_4_Mul2>]>; def SVLD1B_X2 : MInst<"svld1[_{2}]_x2", "2}c", "cUc", [IsStructLoad], MemEltTyDefault, "aarch64_sve_ld1_pn_x2">; def SVLD1H_X2 : MInst<"svld1[_{2}]_x2", "2}c", "sUshb", [IsStructLoad], MemEltTyDefault, "aarch64_sve_ld1_pn_x2">; diff --git a/clang/test/CodeGen/aarch64-sve2p1-intrinsics/acle_sve2p1_while_pn.c b/clang/test/CodeGen/aarch64-sve2p1-intrinsics/acle_sve2p1_while_pn.c index 3dbb38582b676c..08c1ee949c1116 100644 --- a/clang/test/CodeGen/aarch64-sve2p1-intrinsics/acle_sve2p1_while_pn.c +++ b/clang/test/CodeGen/aarch64-sve2p1-intrinsics/acle_sve2p1_while_pn.c @@ -1,12 +1,20 @@ // NOTE: Assertions have been autogenerated by utils/update_cc_test_checks.py // RUN: %clang_cc1 -triple aarch64-none-linux-gnu -target-feature +sve2p1 -S -O1 -Werror -Wall -emit-llvm -o - %s | FileCheck %s // RUN: %clang_cc1 -triple aarch64-none-linux-gnu -target-feature +sve2p1 -S -O1 -Werror -Wall -emit-llvm -o - -x c++ %s | FileCheck %s -check-prefix=CPP-CHECK +// RUN: %clang_cc1 -DSVE_OVERLOADED_FORMS -triple aarch64-none-linux-gnu -target-feature