https://github.com/MDevereau updated https://github.com/llvm/llvm-project/pull/77947
>From 83374edb041c2440f6bfb9413f62882d6e7b6b19 Mon Sep 17 00:00:00 2001 From: Matt Devereau <matthew.dever...@arm.com> Date: Fri, 12 Jan 2024 14:01:10 +0000 Subject: [PATCH 1/2] [AArch64][SME2] Refine fcvtu/fcvts/scvtf/ucvtf Rename intrinsics for fcvtu to fcvtzu and fcvts to fcvtzs. Use llvm_anyvector_ty for both multi vector returns and operands, therefore the return and operands can be specified in the intrinsic call, e.g. @llvm.aarch64.sve.scvtf.x4.nxv4f32.nxv4i32 --- clang/include/clang/Basic/TargetBuiltins.h | 1 + clang/include/clang/Basic/arm_sve.td | 18 +++++----- clang/include/clang/Basic/arm_sve_sme_incl.td | 1 + clang/lib/CodeGen/CGBuiltin.cpp | 2 +- .../aarch64-sme2-intrinsics/acle_sme2_cvt.c | 32 ++++++++--------- llvm/include/llvm/IR/IntrinsicsAArch64.td | 36 +++++++------------ .../Target/AArch64/AArch64ISelDAGToDAG.cpp | 8 ++--- .../CodeGen/AArch64/sme2-intrinsics-cvt.ll | 32 ++++++++--------- 8 files changed, 60 insertions(+), 70 deletions(-) diff --git a/clang/include/clang/Basic/TargetBuiltins.h b/clang/include/clang/Basic/TargetBuiltins.h index c9f9cbec7493bfc..d9eba18dab43f38 100644 --- a/clang/include/clang/Basic/TargetBuiltins.h +++ b/clang/include/clang/Basic/TargetBuiltins.h @@ -310,6 +310,7 @@ namespace clang { bool isReadZA() const { return Flags & IsReadZA; } bool isWriteZA() const { return Flags & IsWriteZA; } bool isReductionQV() const { return Flags & IsReductionQV; } + bool isOverloadMultiVecCvt() const { return Flags & IsOverloadMultiVecCvt; } uint64_t getBits() const { return Flags; } bool isFlagSet(uint64_t Flag) const { return Flags & Flag; } }; diff --git a/clang/include/clang/Basic/arm_sve.td b/clang/include/clang/Basic/arm_sve.td index 51cf8e29129c69b..2c3be6e795f62ac 100644 --- a/clang/include/clang/Basic/arm_sve.td +++ b/clang/include/clang/Basic/arm_sve.td @@ -2244,15 +2244,15 @@ let TargetGuard = "sme2" in { def SVCVT_F16_X2 : SInst<"svcvt_f16[_f32_x2]", "e2", "f", MergeNone, "aarch64_sve_fcvt_x2", [IsStreaming],[]>; def SVCVT_BF16_X2 : SInst<"svcvt_bf16[_f32_x2]", "$2", "f", MergeNone, "aarch64_sve_bfcvt_x2", [IsOverloadNone, IsStreaming],[]>; - def SVCVT_F32_U32_X2 : SInst<"svcvt_{d}[_u32_x2]", "2.d2.u", "f", MergeNone, "aarch64_sve_ucvtf_x2", [IsStreaming], []>; - def SVCVT_U32_F32_X2 : SInst<"svcvt_u32[_{d}_x2]", "2.u2.d", "f", MergeNone, "aarch64_sve_fcvtu_x2", [IsStreaming], []>; - def SVCVT_F32_S32_X2 : SInst<"svcvt_{d}[_s32_x2]", "2.d2.x", "f", MergeNone, "aarch64_sve_scvtf_x2", [IsStreaming], []>; - def SVCVT_S32_F32_X2 : SInst<"svcvt_s32[_{d}_x2]", "2.x2.d", "f", MergeNone, "aarch64_sve_fcvts_x2", [IsStreaming], []>; - - def SVCVT_F32_U32_X4 : SInst<"svcvt_{d}[_u32_x4]", "4.d4.u", "f", MergeNone, "aarch64_sve_ucvtf_x4", [IsStreaming], []>; - def SVCVT_U32_F32_X4 : SInst<"svcvt_u32[_{d}_x4]", "4.u4.d", "f", MergeNone, "aarch64_sve_fcvtu_x4", [IsStreaming], []>; - def SVCVT_F32_S32_X4 : SInst<"svcvt_{d}[_s32_x4]", "4.d4.x", "f", MergeNone, "aarch64_sve_scvtf_x4", [IsStreaming], []>; - def SVCVT_S32_F32_X4 : SInst<"svcvt_s32[_{d}_x4]", "4.x4.d", "f", MergeNone, "aarch64_sve_fcvts_x4", [IsStreaming], []>; + def SVCVT_F32_U32_X2 : SInst<"svcvt_{d}[_u32_x2]", "2.d2.u", "f", MergeNone, "aarch64_sve_ucvtf_x2", [IsStreaming, IsOverloadMultiVecCvt], []>; + def SVCVT_U32_F32_X2 : SInst<"svcvt_{d}[_f32_x2]", "2.d2.M", "Ui", MergeNone, "aarch64_sve_fcvtzu_x2", [IsStreaming, IsOverloadMultiVecCvt], []>; + def SVCVT_F32_S32_X2 : SInst<"svcvt_{d}[_s32_x2]", "2.d2.x", "f", MergeNone, "aarch64_sve_scvtf_x2", [IsStreaming, IsOverloadMultiVecCvt], []>; + def SVCVT_S32_F32_X2 : SInst<"svcvt_{d}[_f32_x2]", "2.d2.M", "i", MergeNone, "aarch64_sve_fcvtzs_x2", [IsStreaming, IsOverloadMultiVecCvt], []>; + + def SVCVT_F32_U32_X4 : SInst<"svcvt_{d}[_u32_x4]", "4.d4.u", "f", MergeNone, "aarch64_sve_ucvtf_x4", [IsStreaming, IsOverloadMultiVecCvt], []>; + def SVCVT_U32_F32_X4 : SInst<"svcvt_{d}[_f32_x4]", "4.d4.M", "Ui", MergeNone, "aarch64_sve_fcvtzu_x4", [IsStreaming, IsOverloadMultiVecCvt], []>; + def SVCVT_F32_S32_X4 : SInst<"svcvt_{d}[_s32_x4]", "4.d4.x", "f", MergeNone, "aarch64_sve_scvtf_x4", [IsStreaming, IsOverloadMultiVecCvt], []>; + def SVCVT_S32_F32_X4 : SInst<"svcvt_{d}[_f32_x4]", "4.d4.M", "i", MergeNone, "aarch64_sve_fcvtzs_x4", [IsStreaming, IsOverloadMultiVecCvt], []>; } // diff --git a/clang/include/clang/Basic/arm_sve_sme_incl.td b/clang/include/clang/Basic/arm_sve_sme_incl.td index c05684c0b248ad3..94ef66c53701930 100644 --- a/clang/include/clang/Basic/arm_sve_sme_incl.td +++ b/clang/include/clang/Basic/arm_sve_sme_incl.td @@ -229,6 +229,7 @@ def IsStreamingOrSVE2p1 : FlagType<0x40000000000>; // Use for intrinsics t def IsInZA : FlagType<0x80000000000>; def IsOutZA : FlagType<0x100000000000>; def IsInOutZA : FlagType<0x200000000000>; +def IsOverloadMultiVecCvt : FlagType<0x400000000000>; // These must be kept in sync with the flags in include/clang/Basic/TargetBuiltins.h class ImmCheckType<int val> { diff --git a/clang/lib/CodeGen/CGBuiltin.cpp b/clang/lib/CodeGen/CGBuiltin.cpp index cb65c7373a34c6b..570b380297c9e69 100644 --- a/clang/lib/CodeGen/CGBuiltin.cpp +++ b/clang/lib/CodeGen/CGBuiltin.cpp @@ -10056,7 +10056,7 @@ CodeGenFunction::getSVEOverloadTypes(const SVETypeFlags &TypeFlags, llvm::Type *DefaultType = getSVEType(TypeFlags); - if (TypeFlags.isOverloadWhile()) + if (TypeFlags.isOverloadWhile() || TypeFlags.isOverloadMultiVecCvt()) return {DefaultType, Ops[1]->getType()}; if (TypeFlags.isOverloadWhileRW()) diff --git a/clang/test/CodeGen/aarch64-sme2-intrinsics/acle_sme2_cvt.c b/clang/test/CodeGen/aarch64-sme2-intrinsics/acle_sme2_cvt.c index f596b3167f2b82e..b292e54ea457f05 100644 --- a/clang/test/CodeGen/aarch64-sme2-intrinsics/acle_sme2_cvt.c +++ b/clang/test/CodeGen/aarch64-sme2-intrinsics/acle_sme2_cvt.c @@ -59,7 +59,7 @@ svbfloat16_t test_cvt_bf16_x2(svfloat32x2_t zn) __arm_streaming { // CHECK-NEXT: entry: // CHECK-NEXT: [[TMP0:%.*]] = tail call <vscale x 4 x i32> @llvm.vector.extract.nxv4i32.nxv8i32(<vscale x 8 x i32> [[ZN:%.*]], i64 0) // CHECK-NEXT: [[TMP1:%.*]] = tail call <vscale x 4 x i32> @llvm.vector.extract.nxv4i32.nxv8i32(<vscale x 8 x i32> [[ZN]], i64 4) -// CHECK-NEXT: [[TMP2:%.*]] = tail call { <vscale x 4 x float>, <vscale x 4 x float> } @llvm.aarch64.sve.ucvtf.x2.nxv4f32(<vscale x 4 x i32> [[TMP0]], <vscale x 4 x i32> [[TMP1]]) +// CHECK-NEXT: [[TMP2:%.*]] = tail call { <vscale x 4 x float>, <vscale x 4 x float> } @llvm.aarch64.sve.ucvtf.x2.nxv4f32.nxv4i32(<vscale x 4 x i32> [[TMP0]], <vscale x 4 x i32> [[TMP1]]) // CHECK-NEXT: [[TMP3:%.*]] = extractvalue { <vscale x 4 x float>, <vscale x 4 x float> } [[TMP2]], 0 // CHECK-NEXT: [[TMP4:%.*]] = tail call <vscale x 8 x float> @llvm.vector.insert.nxv8f32.nxv4f32(<vscale x 8 x float> poison, <vscale x 4 x float> [[TMP3]], i64 0) // CHECK-NEXT: [[TMP5:%.*]] = extractvalue { <vscale x 4 x float>, <vscale x 4 x float> } [[TMP2]], 1 @@ -70,7 +70,7 @@ svbfloat16_t test_cvt_bf16_x2(svfloat32x2_t zn) __arm_streaming { // CPP-CHECK-NEXT: entry: // CPP-CHECK-NEXT: [[TMP0:%.*]] = tail call <vscale x 4 x i32> @llvm.vector.extract.nxv4i32.nxv8i32(<vscale x 8 x i32> [[ZN:%.*]], i64 0) // CPP-CHECK-NEXT: [[TMP1:%.*]] = tail call <vscale x 4 x i32> @llvm.vector.extract.nxv4i32.nxv8i32(<vscale x 8 x i32> [[ZN]], i64 4) -// CPP-CHECK-NEXT: [[TMP2:%.*]] = tail call { <vscale x 4 x float>, <vscale x 4 x float> } @llvm.aarch64.sve.ucvtf.x2.nxv4f32(<vscale x 4 x i32> [[TMP0]], <vscale x 4 x i32> [[TMP1]]) +// CPP-CHECK-NEXT: [[TMP2:%.*]] = tail call { <vscale x 4 x float>, <vscale x 4 x float> } @llvm.aarch64.sve.ucvtf.x2.nxv4f32.nxv4i32(<vscale x 4 x i32> [[TMP0]], <vscale x 4 x i32> [[TMP1]]) // CPP-CHECK-NEXT: [[TMP3:%.*]] = extractvalue { <vscale x 4 x float>, <vscale x 4 x float> } [[TMP2]], 0 // CPP-CHECK-NEXT: [[TMP4:%.*]] = tail call <vscale x 8 x float> @llvm.vector.insert.nxv8f32.nxv4f32(<vscale x 8 x float> poison, <vscale x 4 x float> [[TMP3]], i64 0) // CPP-CHECK-NEXT: [[TMP5:%.*]] = extractvalue { <vscale x 4 x float>, <vscale x 4 x float> } [[TMP2]], 1 @@ -85,7 +85,7 @@ svfloat32x2_t test_svcvt_f32_u32_x2(svuint32x2_t zn) __arm_streaming { // CHECK-NEXT: entry: // CHECK-NEXT: [[TMP0:%.*]] = tail call <vscale x 4 x i32> @llvm.vector.extract.nxv4i32.nxv8i32(<vscale x 8 x i32> [[ZN:%.*]], i64 0) // CHECK-NEXT: [[TMP1:%.*]] = tail call <vscale x 4 x i32> @llvm.vector.extract.nxv4i32.nxv8i32(<vscale x 8 x i32> [[ZN]], i64 4) -// CHECK-NEXT: [[TMP2:%.*]] = tail call { <vscale x 4 x float>, <vscale x 4 x float> } @llvm.aarch64.sve.scvtf.x2.nxv4f32(<vscale x 4 x i32> [[TMP0]], <vscale x 4 x i32> [[TMP1]]) +// CHECK-NEXT: [[TMP2:%.*]] = tail call { <vscale x 4 x float>, <vscale x 4 x float> } @llvm.aarch64.sve.scvtf.x2.nxv4f32.nxv4i32(<vscale x 4 x i32> [[TMP0]], <vscale x 4 x i32> [[TMP1]]) // CHECK-NEXT: [[TMP3:%.*]] = extractvalue { <vscale x 4 x float>, <vscale x 4 x float> } [[TMP2]], 0 // CHECK-NEXT: [[TMP4:%.*]] = tail call <vscale x 8 x float> @llvm.vector.insert.nxv8f32.nxv4f32(<vscale x 8 x float> poison, <vscale x 4 x float> [[TMP3]], i64 0) // CHECK-NEXT: [[TMP5:%.*]] = extractvalue { <vscale x 4 x float>, <vscale x 4 x float> } [[TMP2]], 1 @@ -96,7 +96,7 @@ svfloat32x2_t test_svcvt_f32_u32_x2(svuint32x2_t zn) __arm_streaming { // CPP-CHECK-NEXT: entry: // CPP-CHECK-NEXT: [[TMP0:%.*]] = tail call <vscale x 4 x i32> @llvm.vector.extract.nxv4i32.nxv8i32(<vscale x 8 x i32> [[ZN:%.*]], i64 0) // CPP-CHECK-NEXT: [[TMP1:%.*]] = tail call <vscale x 4 x i32> @llvm.vector.extract.nxv4i32.nxv8i32(<vscale x 8 x i32> [[ZN]], i64 4) -// CPP-CHECK-NEXT: [[TMP2:%.*]] = tail call { <vscale x 4 x float>, <vscale x 4 x float> } @llvm.aarch64.sve.scvtf.x2.nxv4f32(<vscale x 4 x i32> [[TMP0]], <vscale x 4 x i32> [[TMP1]]) +// CPP-CHECK-NEXT: [[TMP2:%.*]] = tail call { <vscale x 4 x float>, <vscale x 4 x float> } @llvm.aarch64.sve.scvtf.x2.nxv4f32.nxv4i32(<vscale x 4 x i32> [[TMP0]], <vscale x 4 x i32> [[TMP1]]) // CPP-CHECK-NEXT: [[TMP3:%.*]] = extractvalue { <vscale x 4 x float>, <vscale x 4 x float> } [[TMP2]], 0 // CPP-CHECK-NEXT: [[TMP4:%.*]] = tail call <vscale x 8 x float> @llvm.vector.insert.nxv8f32.nxv4f32(<vscale x 8 x float> poison, <vscale x 4 x float> [[TMP3]], i64 0) // CPP-CHECK-NEXT: [[TMP5:%.*]] = extractvalue { <vscale x 4 x float>, <vscale x 4 x float> } [[TMP2]], 1 @@ -111,7 +111,7 @@ svfloat32x2_t test_svcvt_f32_s32_x2(svint32x2_t zn) __arm_streaming { // CHECK-NEXT: entry: // CHECK-NEXT: [[TMP0:%.*]] = tail call <vscale x 4 x float> @llvm.vector.extract.nxv4f32.nxv8f32(<vscale x 8 x float> [[ZN:%.*]], i64 0) // CHECK-NEXT: [[TMP1:%.*]] = tail call <vscale x 4 x float> @llvm.vector.extract.nxv4f32.nxv8f32(<vscale x 8 x float> [[ZN]], i64 4) -// CHECK-NEXT: [[TMP2:%.*]] = tail call { <vscale x 4 x i32>, <vscale x 4 x i32> } @llvm.aarch64.sve.fcvtu.x2.nxv4f32(<vscale x 4 x float> [[TMP0]], <vscale x 4 x float> [[TMP1]]) +// CHECK-NEXT: [[TMP2:%.*]] = tail call { <vscale x 4 x i32>, <vscale x 4 x i32> } @llvm.aarch64.sve.fcvtzu.x2.nxv4i32.nxv4f32(<vscale x 4 x float> [[TMP0]], <vscale x 4 x float> [[TMP1]]) // CHECK-NEXT: [[TMP3:%.*]] = extractvalue { <vscale x 4 x i32>, <vscale x 4 x i32> } [[TMP2]], 0 // CHECK-NEXT: [[TMP4:%.*]] = tail call <vscale x 8 x i32> @llvm.vector.insert.nxv8i32.nxv4i32(<vscale x 8 x i32> poison, <vscale x 4 x i32> [[TMP3]], i64 0) // CHECK-NEXT: [[TMP5:%.*]] = extractvalue { <vscale x 4 x i32>, <vscale x 4 x i32> } [[TMP2]], 1 @@ -122,7 +122,7 @@ svfloat32x2_t test_svcvt_f32_s32_x2(svint32x2_t zn) __arm_streaming { // CPP-CHECK-NEXT: entry: // CPP-CHECK-NEXT: [[TMP0:%.*]] = tail call <vscale x 4 x float> @llvm.vector.extract.nxv4f32.nxv8f32(<vscale x 8 x float> [[ZN:%.*]], i64 0) // CPP-CHECK-NEXT: [[TMP1:%.*]] = tail call <vscale x 4 x float> @llvm.vector.extract.nxv4f32.nxv8f32(<vscale x 8 x float> [[ZN]], i64 4) -// CPP-CHECK-NEXT: [[TMP2:%.*]] = tail call { <vscale x 4 x i32>, <vscale x 4 x i32> } @llvm.aarch64.sve.fcvtu.x2.nxv4f32(<vscale x 4 x float> [[TMP0]], <vscale x 4 x float> [[TMP1]]) +// CPP-CHECK-NEXT: [[TMP2:%.*]] = tail call { <vscale x 4 x i32>, <vscale x 4 x i32> } @llvm.aarch64.sve.fcvtzu.x2.nxv4i32.nxv4f32(<vscale x 4 x float> [[TMP0]], <vscale x 4 x float> [[TMP1]]) // CPP-CHECK-NEXT: [[TMP3:%.*]] = extractvalue { <vscale x 4 x i32>, <vscale x 4 x i32> } [[TMP2]], 0 // CPP-CHECK-NEXT: [[TMP4:%.*]] = tail call <vscale x 8 x i32> @llvm.vector.insert.nxv8i32.nxv4i32(<vscale x 8 x i32> poison, <vscale x 4 x i32> [[TMP3]], i64 0) // CPP-CHECK-NEXT: [[TMP5:%.*]] = extractvalue { <vscale x 4 x i32>, <vscale x 4 x i32> } [[TMP2]], 1 @@ -137,7 +137,7 @@ svuint32x2_t test_svcvt_u32_f32_x2(svfloat32x2_t zn) __arm_streaming { // CHECK-NEXT: entry: // CHECK-NEXT: [[TMP0:%.*]] = tail call <vscale x 4 x float> @llvm.vector.extract.nxv4f32.nxv8f32(<vscale x 8 x float> [[ZN:%.*]], i64 0) // CHECK-NEXT: [[TMP1:%.*]] = tail call <vscale x 4 x float> @llvm.vector.extract.nxv4f32.nxv8f32(<vscale x 8 x float> [[ZN]], i64 4) -// CHECK-NEXT: [[TMP2:%.*]] = tail call { <vscale x 4 x i32>, <vscale x 4 x i32> } @llvm.aarch64.sve.fcvts.x2.nxv4f32(<vscale x 4 x float> [[TMP0]], <vscale x 4 x float> [[TMP1]]) +// CHECK-NEXT: [[TMP2:%.*]] = tail call { <vscale x 4 x i32>, <vscale x 4 x i32> } @llvm.aarch64.sve.fcvtzs.x2.nxv4i32.nxv4f32(<vscale x 4 x float> [[TMP0]], <vscale x 4 x float> [[TMP1]]) // CHECK-NEXT: [[TMP3:%.*]] = extractvalue { <vscale x 4 x i32>, <vscale x 4 x i32> } [[TMP2]], 0 // CHECK-NEXT: [[TMP4:%.*]] = tail call <vscale x 8 x i32> @llvm.vector.insert.nxv8i32.nxv4i32(<vscale x 8 x i32> poison, <vscale x 4 x i32> [[TMP3]], i64 0) // CHECK-NEXT: [[TMP5:%.*]] = extractvalue { <vscale x 4 x i32>, <vscale x 4 x i32> } [[TMP2]], 1 @@ -148,7 +148,7 @@ svuint32x2_t test_svcvt_u32_f32_x2(svfloat32x2_t zn) __arm_streaming { // CPP-CHECK-NEXT: entry: // CPP-CHECK-NEXT: [[TMP0:%.*]] = tail call <vscale x 4 x float> @llvm.vector.extract.nxv4f32.nxv8f32(<vscale x 8 x float> [[ZN:%.*]], i64 0) // CPP-CHECK-NEXT: [[TMP1:%.*]] = tail call <vscale x 4 x float> @llvm.vector.extract.nxv4f32.nxv8f32(<vscale x 8 x float> [[ZN]], i64 4) -// CPP-CHECK-NEXT: [[TMP2:%.*]] = tail call { <vscale x 4 x i32>, <vscale x 4 x i32> } @llvm.aarch64.sve.fcvts.x2.nxv4f32(<vscale x 4 x float> [[TMP0]], <vscale x 4 x float> [[TMP1]]) +// CPP-CHECK-NEXT: [[TMP2:%.*]] = tail call { <vscale x 4 x i32>, <vscale x 4 x i32> } @llvm.aarch64.sve.fcvtzs.x2.nxv4i32.nxv4f32(<vscale x 4 x float> [[TMP0]], <vscale x 4 x float> [[TMP1]]) // CPP-CHECK-NEXT: [[TMP3:%.*]] = extractvalue { <vscale x 4 x i32>, <vscale x 4 x i32> } [[TMP2]], 0 // CPP-CHECK-NEXT: [[TMP4:%.*]] = tail call <vscale x 8 x i32> @llvm.vector.insert.nxv8i32.nxv4i32(<vscale x 8 x i32> poison, <vscale x 4 x i32> [[TMP3]], i64 0) // CPP-CHECK-NEXT: [[TMP5:%.*]] = extractvalue { <vscale x 4 x i32>, <vscale x 4 x i32> } [[TMP2]], 1 @@ -166,7 +166,7 @@ svint32x2_t test_svcvt_s32_f32_x2(svfloat32x2_t zn) __arm_streaming { // CHECK-NEXT: [[TMP1:%.*]] = tail call <vscale x 4 x i32> @llvm.vector.extract.nxv4i32.nxv16i32(<vscale x 16 x i32> [[ZN]], i64 4) // CHECK-NEXT: [[TMP2:%.*]] = tail call <vscale x 4 x i32> @llvm.vector.extract.nxv4i32.nxv16i32(<vscale x 16 x i32> [[ZN]], i64 8) // CHECK-NEXT: [[TMP3:%.*]] = tail call <vscale x 4 x i32> @llvm.vector.extract.nxv4i32.nxv16i32(<vscale x 16 x i32> [[ZN]], i64 12) -// CHECK-NEXT: [[TMP4:%.*]] = tail call { <vscale x 4 x float>, <vscale x 4 x float>, <vscale x 4 x float>, <vscale x 4 x float> } @llvm.aarch64.sve.ucvtf.x4.nxv4f32(<vscale x 4 x i32> [[TMP0]], <vscale x 4 x i32> [[TMP1]], <vscale x 4 x i32> [[TMP2]], <vscale x 4 x i32> [[TMP3]]) +// CHECK-NEXT: [[TMP4:%.*]] = tail call { <vscale x 4 x float>, <vscale x 4 x float>, <vscale x 4 x float>, <vscale x 4 x float> } @llvm.aarch64.sve.ucvtf.x4.nxv4f32.nxv4i32(<vscale x 4 x i32> [[TMP0]], <vscale x 4 x i32> [[TMP1]], <vscale x 4 x i32> [[TMP2]], <vscale x 4 x i32> [[TMP3]]) // CHECK-NEXT: [[TMP5:%.*]] = extractvalue { <vscale x 4 x float>, <vscale x 4 x float>, <vscale x 4 x float>, <vscale x 4 x float> } [[TMP4]], 0 // CHECK-NEXT: [[TMP6:%.*]] = tail call <vscale x 16 x float> @llvm.vector.insert.nxv16f32.nxv4f32(<vscale x 16 x float> poison, <vscale x 4 x float> [[TMP5]], i64 0) // CHECK-NEXT: [[TMP7:%.*]] = extractvalue { <vscale x 4 x float>, <vscale x 4 x float>, <vscale x 4 x float>, <vscale x 4 x float> } [[TMP4]], 1 @@ -183,7 +183,7 @@ svint32x2_t test_svcvt_s32_f32_x2(svfloat32x2_t zn) __arm_streaming { // CPP-CHECK-NEXT: [[TMP1:%.*]] = tail call <vscale x 4 x i32> @llvm.vector.extract.nxv4i32.nxv16i32(<vscale x 16 x i32> [[ZN]], i64 4) // CPP-CHECK-NEXT: [[TMP2:%.*]] = tail call <vscale x 4 x i32> @llvm.vector.extract.nxv4i32.nxv16i32(<vscale x 16 x i32> [[ZN]], i64 8) // CPP-CHECK-NEXT: [[TMP3:%.*]] = tail call <vscale x 4 x i32> @llvm.vector.extract.nxv4i32.nxv16i32(<vscale x 16 x i32> [[ZN]], i64 12) -// CPP-CHECK-NEXT: [[TMP4:%.*]] = tail call { <vscale x 4 x float>, <vscale x 4 x float>, <vscale x 4 x float>, <vscale x 4 x float> } @llvm.aarch64.sve.ucvtf.x4.nxv4f32(<vscale x 4 x i32> [[TMP0]], <vscale x 4 x i32> [[TMP1]], <vscale x 4 x i32> [[TMP2]], <vscale x 4 x i32> [[TMP3]]) +// CPP-CHECK-NEXT: [[TMP4:%.*]] = tail call { <vscale x 4 x float>, <vscale x 4 x float>, <vscale x 4 x float>, <vscale x 4 x float> } @llvm.aarch64.sve.ucvtf.x4.nxv4f32.nxv4i32(<vscale x 4 x i32> [[TMP0]], <vscale x 4 x i32> [[TMP1]], <vscale x 4 x i32> [[TMP2]], <vscale x 4 x i32> [[TMP3]]) // CPP-CHECK-NEXT: [[TMP5:%.*]] = extractvalue { <vscale x 4 x float>, <vscale x 4 x float>, <vscale x 4 x float>, <vscale x 4 x float> } [[TMP4]], 0 // CPP-CHECK-NEXT: [[TMP6:%.*]] = tail call <vscale x 16 x float> @llvm.vector.insert.nxv16f32.nxv4f32(<vscale x 16 x float> poison, <vscale x 4 x float> [[TMP5]], i64 0) // CPP-CHECK-NEXT: [[TMP7:%.*]] = extractvalue { <vscale x 4 x float>, <vscale x 4 x float>, <vscale x 4 x float>, <vscale x 4 x float> } [[TMP4]], 1 @@ -204,7 +204,7 @@ svfloat32x4_t test_svcvt_f32_u32_x4(svuint32x4_t zn) __arm_streaming { // CHECK-NEXT: [[TMP1:%.*]] = tail call <vscale x 4 x i32> @llvm.vector.extract.nxv4i32.nxv16i32(<vscale x 16 x i32> [[ZN]], i64 4) // CHECK-NEXT: [[TMP2:%.*]] = tail call <vscale x 4 x i32> @llvm.vector.extract.nxv4i32.nxv16i32(<vscale x 16 x i32> [[ZN]], i64 8) // CHECK-NEXT: [[TMP3:%.*]] = tail call <vscale x 4 x i32> @llvm.vector.extract.nxv4i32.nxv16i32(<vscale x 16 x i32> [[ZN]], i64 12) -// CHECK-NEXT: [[TMP4:%.*]] = tail call { <vscale x 4 x float>, <vscale x 4 x float>, <vscale x 4 x float>, <vscale x 4 x float> } @llvm.aarch64.sve.scvtf.x4.nxv4f32(<vscale x 4 x i32> [[TMP0]], <vscale x 4 x i32> [[TMP1]], <vscale x 4 x i32> [[TMP2]], <vscale x 4 x i32> [[TMP3]]) +// CHECK-NEXT: [[TMP4:%.*]] = tail call { <vscale x 4 x float>, <vscale x 4 x float>, <vscale x 4 x float>, <vscale x 4 x float> } @llvm.aarch64.sve.scvtf.x4.nxv4f32.nxv4i32(<vscale x 4 x i32> [[TMP0]], <vscale x 4 x i32> [[TMP1]], <vscale x 4 x i32> [[TMP2]], <vscale x 4 x i32> [[TMP3]]) // CHECK-NEXT: [[TMP5:%.*]] = extractvalue { <vscale x 4 x float>, <vscale x 4 x float>, <vscale x 4 x float>, <vscale x 4 x float> } [[TMP4]], 0 // CHECK-NEXT: [[TMP6:%.*]] = tail call <vscale x 16 x float> @llvm.vector.insert.nxv16f32.nxv4f32(<vscale x 16 x float> poison, <vscale x 4 x float> [[TMP5]], i64 0) // CHECK-NEXT: [[TMP7:%.*]] = extractvalue { <vscale x 4 x float>, <vscale x 4 x float>, <vscale x 4 x float>, <vscale x 4 x float> } [[TMP4]], 1 @@ -221,7 +221,7 @@ svfloat32x4_t test_svcvt_f32_u32_x4(svuint32x4_t zn) __arm_streaming { // CPP-CHECK-NEXT: [[TMP1:%.*]] = tail call <vscale x 4 x i32> @llvm.vector.extract.nxv4i32.nxv16i32(<vscale x 16 x i32> [[ZN]], i64 4) // CPP-CHECK-NEXT: [[TMP2:%.*]] = tail call <vscale x 4 x i32> @llvm.vector.extract.nxv4i32.nxv16i32(<vscale x 16 x i32> [[ZN]], i64 8) // CPP-CHECK-NEXT: [[TMP3:%.*]] = tail call <vscale x 4 x i32> @llvm.vector.extract.nxv4i32.nxv16i32(<vscale x 16 x i32> [[ZN]], i64 12) -// CPP-CHECK-NEXT: [[TMP4:%.*]] = tail call { <vscale x 4 x float>, <vscale x 4 x float>, <vscale x 4 x float>, <vscale x 4 x float> } @llvm.aarch64.sve.scvtf.x4.nxv4f32(<vscale x 4 x i32> [[TMP0]], <vscale x 4 x i32> [[TMP1]], <vscale x 4 x i32> [[TMP2]], <vscale x 4 x i32> [[TMP3]]) +// CPP-CHECK-NEXT: [[TMP4:%.*]] = tail call { <vscale x 4 x float>, <vscale x 4 x float>, <vscale x 4 x float>, <vscale x 4 x float> } @llvm.aarch64.sve.scvtf.x4.nxv4f32.nxv4i32(<vscale x 4 x i32> [[TMP0]], <vscale x 4 x i32> [[TMP1]], <vscale x 4 x i32> [[TMP2]], <vscale x 4 x i32> [[TMP3]]) // CPP-CHECK-NEXT: [[TMP5:%.*]] = extractvalue { <vscale x 4 x float>, <vscale x 4 x float>, <vscale x 4 x float>, <vscale x 4 x float> } [[TMP4]], 0 // CPP-CHECK-NEXT: [[TMP6:%.*]] = tail call <vscale x 16 x float> @llvm.vector.insert.nxv16f32.nxv4f32(<vscale x 16 x float> poison, <vscale x 4 x float> [[TMP5]], i64 0) // CPP-CHECK-NEXT: [[TMP7:%.*]] = extractvalue { <vscale x 4 x float>, <vscale x 4 x float>, <vscale x 4 x float>, <vscale x 4 x float> } [[TMP4]], 1 @@ -242,7 +242,7 @@ svfloat32x4_t test_svcvt_f32_s32_x4(svint32x4_t zn) __arm_streaming { // CHECK-NEXT: [[TMP1:%.*]] = tail call <vscale x 4 x float> @llvm.vector.extract.nxv4f32.nxv16f32(<vscale x 16 x float> [[ZN]], i64 4) // CHECK-NEXT: [[TMP2:%.*]] = tail call <vscale x 4 x float> @llvm.vector.extract.nxv4f32.nxv16f32(<vscale x 16 x float> [[ZN]], i64 8) // CHECK-NEXT: [[TMP3:%.*]] = tail call <vscale x 4 x float> @llvm.vector.extract.nxv4f32.nxv16f32(<vscale x 16 x float> [[ZN]], i64 12) -// CHECK-NEXT: [[TMP4:%.*]] = tail call { <vscale x 4 x i32>, <vscale x 4 x i32>, <vscale x 4 x i32>, <vscale x 4 x i32> } @llvm.aarch64.sve.fcvtu.x4.nxv4f32(<vscale x 4 x float> [[TMP0]], <vscale x 4 x float> [[TMP1]], <vscale x 4 x float> [[TMP2]], <vscale x 4 x float> [[TMP3]]) +// CHECK-NEXT: [[TMP4:%.*]] = tail call { <vscale x 4 x i32>, <vscale x 4 x i32>, <vscale x 4 x i32>, <vscale x 4 x i32> } @llvm.aarch64.sve.fcvtzu.x4.nxv4i32.nxv4f32(<vscale x 4 x float> [[TMP0]], <vscale x 4 x float> [[TMP1]], <vscale x 4 x float> [[TMP2]], <vscale x 4 x float> [[TMP3]]) // CHECK-NEXT: [[TMP5:%.*]] = extractvalue { <vscale x 4 x i32>, <vscale x 4 x i32>, <vscale x 4 x i32>, <vscale x 4 x i32> } [[TMP4]], 0 // CHECK-NEXT: [[TMP6:%.*]] = tail call <vscale x 16 x i32> @llvm.vector.insert.nxv16i32.nxv4i32(<vscale x 16 x i32> poison, <vscale x 4 x i32> [[TMP5]], i64 0) // CHECK-NEXT: [[TMP7:%.*]] = extractvalue { <vscale x 4 x i32>, <vscale x 4 x i32>, <vscale x 4 x i32>, <vscale x 4 x i32> } [[TMP4]], 1 @@ -259,7 +259,7 @@ svfloat32x4_t test_svcvt_f32_s32_x4(svint32x4_t zn) __arm_streaming { // CPP-CHECK-NEXT: [[TMP1:%.*]] = tail call <vscale x 4 x float> @llvm.vector.extract.nxv4f32.nxv16f32(<vscale x 16 x float> [[ZN]], i64 4) // CPP-CHECK-NEXT: [[TMP2:%.*]] = tail call <vscale x 4 x float> @llvm.vector.extract.nxv4f32.nxv16f32(<vscale x 16 x float> [[ZN]], i64 8) // CPP-CHECK-NEXT: [[TMP3:%.*]] = tail call <vscale x 4 x float> @llvm.vector.extract.nxv4f32.nxv16f32(<vscale x 16 x float> [[ZN]], i64 12) -// CPP-CHECK-NEXT: [[TMP4:%.*]] = tail call { <vscale x 4 x i32>, <vscale x 4 x i32>, <vscale x 4 x i32>, <vscale x 4 x i32> } @llvm.aarch64.sve.fcvtu.x4.nxv4f32(<vscale x 4 x float> [[TMP0]], <vscale x 4 x float> [[TMP1]], <vscale x 4 x float> [[TMP2]], <vscale x 4 x float> [[TMP3]]) +// CPP-CHECK-NEXT: [[TMP4:%.*]] = tail call { <vscale x 4 x i32>, <vscale x 4 x i32>, <vscale x 4 x i32>, <vscale x 4 x i32> } @llvm.aarch64.sve.fcvtzu.x4.nxv4i32.nxv4f32(<vscale x 4 x float> [[TMP0]], <vscale x 4 x float> [[TMP1]], <vscale x 4 x float> [[TMP2]], <vscale x 4 x float> [[TMP3]]) // CPP-CHECK-NEXT: [[TMP5:%.*]] = extractvalue { <vscale x 4 x i32>, <vscale x 4 x i32>, <vscale x 4 x i32>, <vscale x 4 x i32> } [[TMP4]], 0 // CPP-CHECK-NEXT: [[TMP6:%.*]] = tail call <vscale x 16 x i32> @llvm.vector.insert.nxv16i32.nxv4i32(<vscale x 16 x i32> poison, <vscale x 4 x i32> [[TMP5]], i64 0) // CPP-CHECK-NEXT: [[TMP7:%.*]] = extractvalue { <vscale x 4 x i32>, <vscale x 4 x i32>, <vscale x 4 x i32>, <vscale x 4 x i32> } [[TMP4]], 1 @@ -280,7 +280,7 @@ svuint32x4_t test_svcvt_u32_f32_x4(svfloat32x4_t zn) __arm_streaming { // CHECK-NEXT: [[TMP1:%.*]] = tail call <vscale x 4 x float> @llvm.vector.extract.nxv4f32.nxv16f32(<vscale x 16 x float> [[ZN]], i64 4) // CHECK-NEXT: [[TMP2:%.*]] = tail call <vscale x 4 x float> @llvm.vector.extract.nxv4f32.nxv16f32(<vscale x 16 x float> [[ZN]], i64 8) // CHECK-NEXT: [[TMP3:%.*]] = tail call <vscale x 4 x float> @llvm.vector.extract.nxv4f32.nxv16f32(<vscale x 16 x float> [[ZN]], i64 12) -// CHECK-NEXT: [[TMP4:%.*]] = tail call { <vscale x 4 x i32>, <vscale x 4 x i32>, <vscale x 4 x i32>, <vscale x 4 x i32> } @llvm.aarch64.sve.fcvts.x4.nxv4f32(<vscale x 4 x float> [[TMP0]], <vscale x 4 x float> [[TMP1]], <vscale x 4 x float> [[TMP2]], <vscale x 4 x float> [[TMP3]]) +// CHECK-NEXT: [[TMP4:%.*]] = tail call { <vscale x 4 x i32>, <vscale x 4 x i32>, <vscale x 4 x i32>, <vscale x 4 x i32> } @llvm.aarch64.sve.fcvtzs.x4.nxv4i32.nxv4f32(<vscale x 4 x float> [[TMP0]], <vscale x 4 x float> [[TMP1]], <vscale x 4 x float> [[TMP2]], <vscale x 4 x float> [[TMP3]]) // CHECK-NEXT: [[TMP5:%.*]] = extractvalue { <vscale x 4 x i32>, <vscale x 4 x i32>, <vscale x 4 x i32>, <vscale x 4 x i32> } [[TMP4]], 0 // CHECK-NEXT: [[TMP6:%.*]] = tail call <vscale x 16 x i32> @llvm.vector.insert.nxv16i32.nxv4i32(<vscale x 16 x i32> poison, <vscale x 4 x i32> [[TMP5]], i64 0) // CHECK-NEXT: [[TMP7:%.*]] = extractvalue { <vscale x 4 x i32>, <vscale x 4 x i32>, <vscale x 4 x i32>, <vscale x 4 x i32> } [[TMP4]], 1 @@ -297,7 +297,7 @@ svuint32x4_t test_svcvt_u32_f32_x4(svfloat32x4_t zn) __arm_streaming { // CPP-CHECK-NEXT: [[TMP1:%.*]] = tail call <vscale x 4 x float> @llvm.vector.extract.nxv4f32.nxv16f32(<vscale x 16 x float> [[ZN]], i64 4) // CPP-CHECK-NEXT: [[TMP2:%.*]] = tail call <vscale x 4 x float> @llvm.vector.extract.nxv4f32.nxv16f32(<vscale x 16 x float> [[ZN]], i64 8) // CPP-CHECK-NEXT: [[TMP3:%.*]] = tail call <vscale x 4 x float> @llvm.vector.extract.nxv4f32.nxv16f32(<vscale x 16 x float> [[ZN]], i64 12) -// CPP-CHECK-NEXT: [[TMP4:%.*]] = tail call { <vscale x 4 x i32>, <vscale x 4 x i32>, <vscale x 4 x i32>, <vscale x 4 x i32> } @llvm.aarch64.sve.fcvts.x4.nxv4f32(<vscale x 4 x float> [[TMP0]], <vscale x 4 x float> [[TMP1]], <vscale x 4 x float> [[TMP2]], <vscale x 4 x float> [[TMP3]]) +// CPP-CHECK-NEXT: [[TMP4:%.*]] = tail call { <vscale x 4 x i32>, <vscale x 4 x i32>, <vscale x 4 x i32>, <vscale x 4 x i32> } @llvm.aarch64.sve.fcvtzs.x4.nxv4i32.nxv4f32(<vscale x 4 x float> [[TMP0]], <vscale x 4 x float> [[TMP1]], <vscale x 4 x float> [[TMP2]], <vscale x 4 x float> [[TMP3]]) // CPP-CHECK-NEXT: [[TMP5:%.*]] = extractvalue { <vscale x 4 x i32>, <vscale x 4 x i32>, <vscale x 4 x i32>, <vscale x 4 x i32> } [[TMP4]], 0 // CPP-CHECK-NEXT: [[TMP6:%.*]] = tail call <vscale x 16 x i32> @llvm.vector.insert.nxv16i32.nxv4i32(<vscale x 16 x i32> poison, <vscale x 4 x i32> [[TMP5]], i64 0) // CPP-CHECK-NEXT: [[TMP7:%.*]] = extractvalue { <vscale x 4 x i32>, <vscale x 4 x i32>, <vscale x 4 x i32>, <vscale x 4 x i32> } [[TMP4]], 1 diff --git a/llvm/include/llvm/IR/IntrinsicsAArch64.td b/llvm/include/llvm/IR/IntrinsicsAArch64.td index 9955e78ef3ce890..921e5b95ae03e8d 100644 --- a/llvm/include/llvm/IR/IntrinsicsAArch64.td +++ b/llvm/include/llvm/IR/IntrinsicsAArch64.td @@ -3095,26 +3095,14 @@ let TargetPrefix = "aarch64" in { [llvm_anyvector_ty, LLVMMatchType<0>, LLVMMatchType<0>, LLVMMatchType<0>], [IntrNoMem]>; - class SME2_CVT_ItoF_X2_Intrinsic + class SME2_CVT_X2_Intrinsic : DefaultAttrsIntrinsic<[llvm_anyvector_ty, LLVMMatchType<0>], - [LLVMVectorOfBitcastsToInt<0>, LLVMVectorOfBitcastsToInt<0>], + [llvm_anyvector_ty, LLVMMatchType<1>], [IntrNoMem]>; - class SME2_CVT_FtoI_X2_Intrinsic - : DefaultAttrsIntrinsic<[LLVMVectorOfBitcastsToInt<0>, LLVMVectorOfBitcastsToInt<0>], - [llvm_anyvector_ty, LLVMMatchType<0>], - [IntrNoMem]>; - - class SME2_CVT_ItoF_X4_Intrinsic + class SME2_CVT_X4_Intrinsic : DefaultAttrsIntrinsic<[llvm_anyvector_ty, LLVMMatchType<0>, LLVMMatchType<0>, LLVMMatchType<0>], - [LLVMVectorOfBitcastsToInt<0>, LLVMVectorOfBitcastsToInt<0>, - LLVMVectorOfBitcastsToInt<0>, LLVMVectorOfBitcastsToInt<0>], - [IntrNoMem]>; - - class SME2_CVT_FtoI_X4_Intrinsic - : DefaultAttrsIntrinsic<[LLVMVectorOfBitcastsToInt<0>, LLVMVectorOfBitcastsToInt<0>, - LLVMVectorOfBitcastsToInt<0>, LLVMVectorOfBitcastsToInt<0>], - [llvm_anyvector_ty, LLVMMatchType<0>, LLVMMatchType<0>, LLVMMatchType<0>], + [llvm_anyvector_ty, LLVMMatchType<1>, LLVMMatchType<1>, LLVMMatchType<1>], [IntrNoMem]>; class SME2_BFMLS_Intrinsic @@ -3403,14 +3391,14 @@ let TargetPrefix = "aarch64" in { // def int_aarch64_sve_fcvt_x2 : SME2_CVT_VG2_SINGLE_Intrinsic; def int_aarch64_sve_bfcvt_x2 : SME2_CVT_VG2_SINGLE_BF16_Intrinsic; - def int_aarch64_sve_fcvts_x2 : SME2_CVT_FtoI_X2_Intrinsic; - def int_aarch64_sve_fcvtu_x2 : SME2_CVT_FtoI_X2_Intrinsic; - def int_aarch64_sve_scvtf_x2 : SME2_CVT_ItoF_X2_Intrinsic; - def int_aarch64_sve_ucvtf_x2 : SME2_CVT_ItoF_X2_Intrinsic; - def int_aarch64_sve_fcvts_x4 : SME2_CVT_FtoI_X4_Intrinsic; - def int_aarch64_sve_fcvtu_x4 : SME2_CVT_FtoI_X4_Intrinsic; - def int_aarch64_sve_scvtf_x4 : SME2_CVT_ItoF_X4_Intrinsic; - def int_aarch64_sve_ucvtf_x4 : SME2_CVT_ItoF_X4_Intrinsic; + def int_aarch64_sve_fcvtzs_x2 : SME2_CVT_X2_Intrinsic; + def int_aarch64_sve_fcvtzu_x2 : SME2_CVT_X2_Intrinsic; + def int_aarch64_sve_scvtf_x2 : SME2_CVT_X2_Intrinsic; + def int_aarch64_sve_ucvtf_x2 : SME2_CVT_X2_Intrinsic; + def int_aarch64_sve_fcvtzs_x4 : SME2_CVT_X4_Intrinsic; + def int_aarch64_sve_fcvtzu_x4 : SME2_CVT_X4_Intrinsic; + def int_aarch64_sve_scvtf_x4 : SME2_CVT_X4_Intrinsic; + def int_aarch64_sve_ucvtf_x4 : SME2_CVT_X4_Intrinsic; // // Multi-vector saturating extract narrow diff --git a/llvm/lib/Target/AArch64/AArch64ISelDAGToDAG.cpp b/llvm/lib/Target/AArch64/AArch64ISelDAGToDAG.cpp index 1437a2b5acbcf6f..163ed520a8a677b 100644 --- a/llvm/lib/Target/AArch64/AArch64ISelDAGToDAG.cpp +++ b/llvm/lib/Target/AArch64/AArch64ISelDAGToDAG.cpp @@ -5627,25 +5627,25 @@ void AArch64DAGToDAGISel::Select(SDNode *Node) { AArch64::FMINNM_VG4_4Z4Z_D})) SelectDestructiveMultiIntrinsic(Node, 4, true, Op); return; - case Intrinsic::aarch64_sve_fcvts_x2: + case Intrinsic::aarch64_sve_fcvtzs_x2: SelectCVTIntrinsic(Node, 2, AArch64::FCVTZS_2Z2Z_StoS); return; case Intrinsic::aarch64_sve_scvtf_x2: SelectCVTIntrinsic(Node, 2, AArch64::SCVTF_2Z2Z_StoS); return; - case Intrinsic::aarch64_sve_fcvtu_x2: + case Intrinsic::aarch64_sve_fcvtzu_x2: SelectCVTIntrinsic(Node, 2, AArch64::FCVTZU_2Z2Z_StoS); return; case Intrinsic::aarch64_sve_ucvtf_x2: SelectCVTIntrinsic(Node, 2, AArch64::UCVTF_2Z2Z_StoS); return; - case Intrinsic::aarch64_sve_fcvts_x4: + case Intrinsic::aarch64_sve_fcvtzs_x4: SelectCVTIntrinsic(Node, 4, AArch64::FCVTZS_4Z4Z_StoS); return; case Intrinsic::aarch64_sve_scvtf_x4: SelectCVTIntrinsic(Node, 4, AArch64::SCVTF_4Z4Z_StoS); return; - case Intrinsic::aarch64_sve_fcvtu_x4: + case Intrinsic::aarch64_sve_fcvtzu_x4: SelectCVTIntrinsic(Node, 4, AArch64::FCVTZU_4Z4Z_StoS); return; case Intrinsic::aarch64_sve_ucvtf_x4: diff --git a/llvm/test/CodeGen/AArch64/sme2-intrinsics-cvt.ll b/llvm/test/CodeGen/AArch64/sme2-intrinsics-cvt.ll index 2f7e2ba5bede54c..bc1db878cbd3132 100644 --- a/llvm/test/CodeGen/AArch64/sme2-intrinsics-cvt.ll +++ b/llvm/test/CodeGen/AArch64/sme2-intrinsics-cvt.ll @@ -41,7 +41,7 @@ define {<vscale x 4 x i32>, <vscale x 4 x i32>} @multi_vector_cvt_x2_s32_f32(<v ; CHECK-NEXT: mov z2.d, z1.d ; CHECK-NEXT: fcvtzs { z0.s, z1.s }, { z2.s, z3.s } ; CHECK-NEXT: ret - %res = call {<vscale x 4 x i32>, <vscale x 4 x i32>} @llvm.aarch64.sve.fcvts.x2.nxv4f32(<vscale x 4 x float> %zn0, <vscale x 4 x float> %zn1) + %res = call {<vscale x 4 x i32>, <vscale x 4 x i32>} @llvm.aarch64.sve.fcvtzs.x2.nxv4i32.nxv4f32(<vscale x 4 x float> %zn0, <vscale x 4 x float> %zn1) ret {<vscale x 4 x i32>, <vscale x 4 x i32>} %res } @@ -54,7 +54,7 @@ define {<vscale x 4 x i32>, <vscale x 4 x i32>,<vscale x 4 x i32>, <vscale x 4 x ; CHECK-NEXT: mov z4.d, z1.d ; CHECK-NEXT: fcvtzs { z0.s - z3.s }, { z4.s - z7.s } ; CHECK-NEXT: ret - %res = call {<vscale x 4 x i32>, <vscale x 4 x i32>,<vscale x 4 x i32>, <vscale x 4 x i32>} @llvm.aarch64.sve.fcvts.x4.nxv4f32(<vscale x 4 x float> %zn0, <vscale x 4 x float> %zn1, <vscale x 4 x float> %zn2, <vscale x 4 x float> %zn3) + %res = call {<vscale x 4 x i32>, <vscale x 4 x i32>,<vscale x 4 x i32>, <vscale x 4 x i32>} @llvm.aarch64.sve.fcvtzs.x4.nxv4i32.nxv4f32(<vscale x 4 x float> %zn0, <vscale x 4 x float> %zn1, <vscale x 4 x float> %zn2, <vscale x 4 x float> %zn3) ret {<vscale x 4 x i32>, <vscale x 4 x i32>, <vscale x 4 x i32>, <vscale x 4 x i32>} %res } @@ -68,7 +68,7 @@ define {<vscale x 4 x i32>, <vscale x 4 x i32>} @multi_vector_cvt_x2_u32_f32(<v ; CHECK-NEXT: mov z2.d, z1.d ; CHECK-NEXT: fcvtzu { z0.s, z1.s }, { z2.s, z3.s } ; CHECK-NEXT: ret - %res = call {<vscale x 4 x i32>, <vscale x 4 x i32>} @llvm.aarch64.sve.fcvtu.x2.nxv4f32(<vscale x 4 x float> %zn0, <vscale x 4 x float> %zn1) + %res = call {<vscale x 4 x i32>, <vscale x 4 x i32>} @llvm.aarch64.sve.fcvtzu.x2.nxv4i32.nxv4f32(<vscale x 4 x float> %zn0, <vscale x 4 x float> %zn1) ret {<vscale x 4 x i32>, <vscale x 4 x i32>} %res } @@ -81,7 +81,7 @@ define {<vscale x 4 x i32>, <vscale x 4 x i32>, <vscale x 4 x i32>, <vscale x 4 ; CHECK-NEXT: mov z4.d, z1.d ; CHECK-NEXT: fcvtzu { z0.s - z3.s }, { z4.s - z7.s } ; CHECK-NEXT: ret - %res = call {<vscale x 4 x i32>, <vscale x 4 x i32>,<vscale x 4 x i32>, <vscale x 4 x i32>} @llvm.aarch64.sve.fcvtu.x4.nxv4f32(<vscale x 4 x float> %zn0, <vscale x 4 x float> %zn1, <vscale x 4 x float> %zn2, <vscale x 4 x float> %zn3) + %res = call {<vscale x 4 x i32>, <vscale x 4 x i32>,<vscale x 4 x i32>, <vscale x 4 x i32>} @llvm.aarch64.sve.fcvtzu.x4.nxv4i32.nxv4f32(<vscale x 4 x float> %zn0, <vscale x 4 x float> %zn1, <vscale x 4 x float> %zn2, <vscale x 4 x float> %zn3) ret {<vscale x 4 x i32>, <vscale x 4 x i32>, <vscale x 4 x i32>, <vscale x 4 x i32>} %res } @@ -95,7 +95,7 @@ define {<vscale x 4 x float>, <vscale x 4 x float>} @multi_vector_cvt_x2_f32_s3 ; CHECK-NEXT: mov z2.d, z1.d ; CHECK-NEXT: scvtf { z0.s, z1.s }, { z2.s, z3.s } ; CHECK-NEXT: ret - %res = call {<vscale x 4 x float>, <vscale x 4 x float>} @llvm.aarch64.sve.scvtf.x2.nxv4f32(<vscale x 4 x i32> %zn0, <vscale x 4 x i32> %zn1) + %res = call {<vscale x 4 x float>, <vscale x 4 x float>} @llvm.aarch64.sve.scvtf.x2.nxv4f32.nxv4i32(<vscale x 4 x i32> %zn0, <vscale x 4 x i32> %zn1) ret {<vscale x 4 x float>, <vscale x 4 x float>} %res } @@ -108,7 +108,7 @@ define {<vscale x 4 x float>, <vscale x 4 x float>,<vscale x 4 x float>, <vscale ; CHECK-NEXT: mov z4.d, z1.d ; CHECK-NEXT: scvtf { z0.s - z3.s }, { z4.s - z7.s } ; CHECK-NEXT: ret - %res = call {<vscale x 4 x float>, <vscale x 4 x float>, <vscale x 4 x float>, <vscale x 4 x float>} @llvm.aarch64.sve.scvtf.x4.nxv4f32(<vscale x 4 x i32> %zn0, <vscale x 4 x i32> %zn1, <vscale x 4 x i32> %zn2, <vscale x 4 x i32> %zn3) + %res = call {<vscale x 4 x float>, <vscale x 4 x float>, <vscale x 4 x float>, <vscale x 4 x float>} @llvm.aarch64.sve.scvtf.x4.nxv4f32.nxv4i32(<vscale x 4 x i32> %zn0, <vscale x 4 x i32> %zn1, <vscale x 4 x i32> %zn2, <vscale x 4 x i32> %zn3) ret {<vscale x 4 x float>, <vscale x 4 x float>,<vscale x 4 x float>, <vscale x 4 x float>} %res } @@ -122,7 +122,7 @@ define {<vscale x 4 x float>, <vscale x 4 x float>} @multi_vector_cvt_x2_f32_u3 ; CHECK-NEXT: mov z2.d, z1.d ; CHECK-NEXT: ucvtf { z0.s, z1.s }, { z2.s, z3.s } ; CHECK-NEXT: ret - %res = call {<vscale x 4 x float>, <vscale x 4 x float>} @llvm.aarch64.sve.ucvtf.x2.nxv4f32(<vscale x 4 x i32> %zn0, <vscale x 4 x i32> %zn1) + %res = call {<vscale x 4 x float>, <vscale x 4 x float>} @llvm.aarch64.sve.ucvtf.x2.nxv4f32.nxv4i32(<vscale x 4 x i32> %zn0, <vscale x 4 x i32> %zn1) ret {<vscale x 4 x float>, <vscale x 4 x float>} %res } @@ -135,17 +135,17 @@ define {<vscale x 4 x float>, <vscale x 4 x float>,<vscale x 4 x float>, <vscale ; CHECK-NEXT: mov z4.d, z1.d ; CHECK-NEXT: ucvtf { z0.s - z3.s }, { z4.s - z7.s } ; CHECK-NEXT: ret - %res = call {<vscale x 4 x float>, <vscale x 4 x float>, <vscale x 4 x float>, <vscale x 4 x float>} @llvm.aarch64.sve.ucvtf.x4.nxv4f32(<vscale x 4 x i32> %zn0, <vscale x 4 x i32> %zn1, <vscale x 4 x i32> %zn2, <vscale x 4 x i32> %zn3) + %res = call {<vscale x 4 x float>, <vscale x 4 x float>, <vscale x 4 x float>, <vscale x 4 x float>} @llvm.aarch64.sve.ucvtf.x4.nxv4f32.nxv4i32(<vscale x 4 x i32> %zn0, <vscale x 4 x i32> %zn1, <vscale x 4 x i32> %zn2, <vscale x 4 x i32> %zn3) ret {<vscale x 4 x float>, <vscale x 4 x float>,<vscale x 4 x float>, <vscale x 4 x float>} %res } declare <vscale x 8 x half> @llvm.aarch64.sve.fcvt.x2.nxv4f32(<vscale x 4 x float>, <vscale x 4 x float>) declare <vscale x 8 x bfloat> @llvm.aarch64.sve.bfcvt.x2(<vscale x 4 x float>, <vscale x 4 x float>) -declare {<vscale x 4 x i32>, <vscale x 4 x i32>} @llvm.aarch64.sve.fcvts.x2.nxv4f32(<vscale x 4 x float>,<vscale x 4 x float>) -declare {<vscale x 4 x i32>, <vscale x 4 x i32>} @llvm.aarch64.sve.fcvtu.x2.nxv4f32(<vscale x 4 x float>,<vscale x 4 x float>) -declare {<vscale x 4 x float>, <vscale x 4 x float>} @llvm.aarch64.sve.scvtf.x2.nxv4f32(<vscale x 4 x i32>,<vscale x 4 x i32>) -declare {<vscale x 4 x float>, <vscale x 4 x float>} @llvm.aarch64.sve.ucvtf.x2.nxv4f32(<vscale x 4 x i32>,<vscale x 4 x i32>) -declare {<vscale x 4 x i32>, <vscale x 4 x i32>,<vscale x 4 x i32>, <vscale x 4 x i32>} @llvm.aarch64.sve.fcvts.x4.nxv4f32(<vscale x 4 x float>,<vscale x 4 x float>,<vscale x 4 x float>,<vscale x 4 x float>) -declare {<vscale x 4 x i32>, <vscale x 4 x i32>,<vscale x 4 x i32>, <vscale x 4 x i32>} @llvm.aarch64.sve.fcvtu.x4.nxv4f32(<vscale x 4 x float>,<vscale x 4 x float>,<vscale x 4 x float>,<vscale x 4 x float>) -declare {<vscale x 4 x float>, <vscale x 4 x float>,<vscale x 4 x float>, <vscale x 4 x float>} @llvm.aarch64.sve.scvtf.x4.nxv4f32(<vscale x 4 x i32>,<vscale x 4 x i32>,<vscale x 4 x i32>,<vscale x 4 x i32>) -declare {<vscale x 4 x float>, <vscale x 4 x float>,<vscale x 4 x float>, <vscale x 4 x float>} @llvm.aarch64.sve.ucvtf.x4.nxv4f32(<vscale x 4 x i32>,<vscale x 4 x i32>,<vscale x 4 x i32>,<vscale x 4 x i32>) +declare {<vscale x 4 x i32>, <vscale x 4 x i32>} @llvm.aarch64.sve.fcvtzs.x2.nxv4i32.nxv4f32(<vscale x 4 x float>,<vscale x 4 x float>) +declare {<vscale x 4 x i32>, <vscale x 4 x i32>} @llvm.aarch64.sve.fcvtzu.x2.nxv4i32.nxv4f32(<vscale x 4 x float>,<vscale x 4 x float>) +declare {<vscale x 4 x float>, <vscale x 4 x float>} @llvm.aarch64.sve.scvtf.x2.nxv4f32.nxv4i32(<vscale x 4 x i32>,<vscale x 4 x i32>) +declare {<vscale x 4 x float>, <vscale x 4 x float>} @llvm.aarch64.sve.ucvtf.x2.nxv4f32.nxv4i32(<vscale x 4 x i32>,<vscale x 4 x i32>) +declare {<vscale x 4 x i32>, <vscale x 4 x i32>,<vscale x 4 x i32>, <vscale x 4 x i32>} @llvm.aarch64.sve.fcvtzs.x4.nxv4i32.nxv4f32(<vscale x 4 x float>,<vscale x 4 x float>,<vscale x 4 x float>,<vscale x 4 x float>) +declare {<vscale x 4 x i32>, <vscale x 4 x i32>,<vscale x 4 x i32>, <vscale x 4 x i32>} @llvm.aarch64.sve.fcvtzu.x4.nxv4i32.nxv4f32(<vscale x 4 x float>,<vscale x 4 x float>,<vscale x 4 x float>,<vscale x 4 x float>) +declare {<vscale x 4 x float>, <vscale x 4 x float>,<vscale x 4 x float>, <vscale x 4 x float>} @llvm.aarch64.sve.scvtf.x4.nxv4f32.nxv4i32(<vscale x 4 x i32>,<vscale x 4 x i32>,<vscale x 4 x i32>,<vscale x 4 x i32>) +declare {<vscale x 4 x float>, <vscale x 4 x float>,<vscale x 4 x float>, <vscale x 4 x float>} @llvm.aarch64.sve.ucvtf.x4.nxv4f32.nxv4i32(<vscale x 4 x i32>,<vscale x 4 x i32>,<vscale x 4 x i32>,<vscale x 4 x i32>) >From 240a83f468ccf5d1e3b48426284a30c3c7cb2116 Mon Sep 17 00:00:00 2001 From: Matt Devereau <matthew.dever...@arm.com> Date: Fri, 19 Jan 2024 15:56:38 +0000 Subject: [PATCH 2/2] Combine IsOverloadWhile and IsOverloadMultiVecCvt --- clang/include/clang/Basic/TargetBuiltins.h | 5 +- clang/include/clang/Basic/arm_sve.td | 48 ++++++------- clang/include/clang/Basic/arm_sve_sme_incl.td | 71 +++++++++---------- clang/lib/CodeGen/CGBuiltin.cpp | 2 +- 4 files changed, 63 insertions(+), 63 deletions(-) diff --git a/clang/include/clang/Basic/TargetBuiltins.h b/clang/include/clang/Basic/TargetBuiltins.h index d9eba18dab43f38..c31834fb52a904d 100644 --- a/clang/include/clang/Basic/TargetBuiltins.h +++ b/clang/include/clang/Basic/TargetBuiltins.h @@ -291,7 +291,9 @@ namespace clang { bool isZExtReturn() const { return Flags & IsZExtReturn; } bool isByteIndexed() const { return Flags & IsByteIndexed; } bool isOverloadNone() const { return Flags & IsOverloadNone; } - bool isOverloadWhile() const { return Flags & IsOverloadWhile; } + bool isOverloadWhileOrMultiVecCvt() const { + return Flags & IsOverloadWhileOrMultiVecCvt; + } bool isOverloadDefault() const { return !(Flags & OverloadKindMask); } bool isOverloadWhileRW() const { return Flags & IsOverloadWhileRW; } bool isOverloadCvt() const { return Flags & IsOverloadCvt; } @@ -310,7 +312,6 @@ namespace clang { bool isReadZA() const { return Flags & IsReadZA; } bool isWriteZA() const { return Flags & IsWriteZA; } bool isReductionQV() const { return Flags & IsReductionQV; } - bool isOverloadMultiVecCvt() const { return Flags & IsOverloadMultiVecCvt; } uint64_t getBits() const { return Flags; } bool isFlagSet(uint64_t Flag) const { return Flags & Flag; } }; diff --git a/clang/include/clang/Basic/arm_sve.td b/clang/include/clang/Basic/arm_sve.td index 2c3be6e795f62ac..4c5c1b5603f181a 100644 --- a/clang/include/clang/Basic/arm_sve.td +++ b/clang/include/clang/Basic/arm_sve.td @@ -749,14 +749,14 @@ def SVCMPLS_WIDE_N : SInst<"svcmple_wide[_n_{d}]", "PPdj", "UcUsUi", MergeNone, //////////////////////////////////////////////////////////////////////////////// // While comparisons -def SVWHILELE_S32 : SInst<"svwhilele_{d}[_{1}]", "Pkk", "PcPsPiPl", MergeNone, "aarch64_sve_whilele", [IsOverloadWhile, IsStreamingCompatible]>; -def SVWHILELE_S64 : SInst<"svwhilele_{d}[_{1}]", "Pll", "PcPsPiPl", MergeNone, "aarch64_sve_whilele", [IsOverloadWhile, IsStreamingCompatible]>; -def SVWHILELO_U32 : SInst<"svwhilelt_{d}[_{1}]", "Pmm", "PUcPUsPUiPUl", MergeNone, "aarch64_sve_whilelo", [IsOverloadWhile, IsStreamingCompatible]>; -def SVWHILELO_U64 : SInst<"svwhilelt_{d}[_{1}]", "Pnn", "PUcPUsPUiPUl", MergeNone, "aarch64_sve_whilelo", [IsOverloadWhile, IsStreamingCompatible]>; -def SVWHILELS_U32 : SInst<"svwhilele_{d}[_{1}]", "Pmm", "PUcPUsPUiPUl", MergeNone, "aarch64_sve_whilels", [IsOverloadWhile, IsStreamingCompatible]>; -def SVWHILELS_U64 : SInst<"svwhilele_{d}[_{1}]", "Pnn", "PUcPUsPUiPUl", MergeNone, "aarch64_sve_whilels", [IsOverloadWhile, IsStreamingCompatible]>; -def SVWHILELT_S32 : SInst<"svwhilelt_{d}[_{1}]", "Pkk", "PcPsPiPl", MergeNone, "aarch64_sve_whilelt", [IsOverloadWhile, IsStreamingCompatible]>; -def SVWHILELT_S64 : SInst<"svwhilelt_{d}[_{1}]", "Pll", "PcPsPiPl", MergeNone, "aarch64_sve_whilelt", [IsOverloadWhile, IsStreamingCompatible]>; +def SVWHILELE_S32 : SInst<"svwhilele_{d}[_{1}]", "Pkk", "PcPsPiPl", MergeNone, "aarch64_sve_whilele", [IsOverloadWhileOrMultiVecCvt, IsStreamingCompatible]>; +def SVWHILELE_S64 : SInst<"svwhilele_{d}[_{1}]", "Pll", "PcPsPiPl", MergeNone, "aarch64_sve_whilele", [IsOverloadWhileOrMultiVecCvt, IsStreamingCompatible]>; +def SVWHILELO_U32 : SInst<"svwhilelt_{d}[_{1}]", "Pmm", "PUcPUsPUiPUl", MergeNone, "aarch64_sve_whilelo", [IsOverloadWhileOrMultiVecCvt, IsStreamingCompatible]>; +def SVWHILELO_U64 : SInst<"svwhilelt_{d}[_{1}]", "Pnn", "PUcPUsPUiPUl", MergeNone, "aarch64_sve_whilelo", [IsOverloadWhileOrMultiVecCvt, IsStreamingCompatible]>; +def SVWHILELS_U32 : SInst<"svwhilele_{d}[_{1}]", "Pmm", "PUcPUsPUiPUl", MergeNone, "aarch64_sve_whilels", [IsOverloadWhileOrMultiVecCvt, IsStreamingCompatible]>; +def SVWHILELS_U64 : SInst<"svwhilele_{d}[_{1}]", "Pnn", "PUcPUsPUiPUl", MergeNone, "aarch64_sve_whilels", [IsOverloadWhileOrMultiVecCvt, IsStreamingCompatible]>; +def SVWHILELT_S32 : SInst<"svwhilelt_{d}[_{1}]", "Pkk", "PcPsPiPl", MergeNone, "aarch64_sve_whilelt", [IsOverloadWhileOrMultiVecCvt, IsStreamingCompatible]>; +def SVWHILELT_S64 : SInst<"svwhilelt_{d}[_{1}]", "Pll", "PcPsPiPl", MergeNone, "aarch64_sve_whilelt", [IsOverloadWhileOrMultiVecCvt, IsStreamingCompatible]>; //////////////////////////////////////////////////////////////////////////////// // Counting bit @@ -1336,14 +1336,14 @@ let TargetGuard = "sve2p1|sme2" in { //////////////////////////////////////////////////////////////////////////////// // SVE2 WhileGE/GT let TargetGuard = "sve2" in { -def SVWHILEGE_S32 : SInst<"svwhilege_{d}[_{1}]", "Pkk", "PcPsPiPl", MergeNone, "aarch64_sve_whilege", [IsOverloadWhile, IsStreamingCompatible]>; -def SVWHILEGE_S64 : SInst<"svwhilege_{d}[_{1}]", "Pll", "PcPsPiPl", MergeNone, "aarch64_sve_whilege", [IsOverloadWhile, IsStreamingCompatible]>; -def SVWHILEGT_S32 : SInst<"svwhilegt_{d}[_{1}]", "Pkk", "PcPsPiPl", MergeNone, "aarch64_sve_whilegt", [IsOverloadWhile, IsStreamingCompatible]>; -def SVWHILEGT_S64 : SInst<"svwhilegt_{d}[_{1}]", "Pll", "PcPsPiPl", MergeNone, "aarch64_sve_whilegt", [IsOverloadWhile, IsStreamingCompatible]>; -def SVWHILEHI_U32 : SInst<"svwhilegt_{d}[_{1}]", "Pmm", "PUcPUsPUiPUl", MergeNone, "aarch64_sve_whilehi", [IsOverloadWhile, IsStreamingCompatible]>; -def SVWHILEHI_U64 : SInst<"svwhilegt_{d}[_{1}]", "Pnn", "PUcPUsPUiPUl", MergeNone, "aarch64_sve_whilehi", [IsOverloadWhile, IsStreamingCompatible]>; -def SVWHILEHS_U32 : SInst<"svwhilege_{d}[_{1}]", "Pmm", "PUcPUsPUiPUl", MergeNone, "aarch64_sve_whilehs", [IsOverloadWhile, IsStreamingCompatible]>; -def SVWHILEHS_U64 : SInst<"svwhilege_{d}[_{1}]", "Pnn", "PUcPUsPUiPUl", MergeNone, "aarch64_sve_whilehs", [IsOverloadWhile, IsStreamingCompatible]>; +def SVWHILEGE_S32 : SInst<"svwhilege_{d}[_{1}]", "Pkk", "PcPsPiPl", MergeNone, "aarch64_sve_whilege", [IsOverloadWhileOrMultiVecCvt, IsStreamingCompatible]>; +def SVWHILEGE_S64 : SInst<"svwhilege_{d}[_{1}]", "Pll", "PcPsPiPl", MergeNone, "aarch64_sve_whilege", [IsOverloadWhileOrMultiVecCvt, IsStreamingCompatible]>; +def SVWHILEGT_S32 : SInst<"svwhilegt_{d}[_{1}]", "Pkk", "PcPsPiPl", MergeNone, "aarch64_sve_whilegt", [IsOverloadWhileOrMultiVecCvt, IsStreamingCompatible]>; +def SVWHILEGT_S64 : SInst<"svwhilegt_{d}[_{1}]", "Pll", "PcPsPiPl", MergeNone, "aarch64_sve_whilegt", [IsOverloadWhileOrMultiVecCvt, IsStreamingCompatible]>; +def SVWHILEHI_U32 : SInst<"svwhilegt_{d}[_{1}]", "Pmm", "PUcPUsPUiPUl", MergeNone, "aarch64_sve_whilehi", [IsOverloadWhileOrMultiVecCvt, IsStreamingCompatible]>; +def SVWHILEHI_U64 : SInst<"svwhilegt_{d}[_{1}]", "Pnn", "PUcPUsPUiPUl", MergeNone, "aarch64_sve_whilehi", [IsOverloadWhileOrMultiVecCvt, IsStreamingCompatible]>; +def SVWHILEHS_U32 : SInst<"svwhilege_{d}[_{1}]", "Pmm", "PUcPUsPUiPUl", MergeNone, "aarch64_sve_whilehs", [IsOverloadWhileOrMultiVecCvt, IsStreamingCompatible]>; +def SVWHILEHS_U64 : SInst<"svwhilege_{d}[_{1}]", "Pnn", "PUcPUsPUiPUl", MergeNone, "aarch64_sve_whilehs", [IsOverloadWhileOrMultiVecCvt, IsStreamingCompatible]>; } let TargetGuard = "sve2p1|sme2" in { @@ -2244,15 +2244,15 @@ let TargetGuard = "sme2" in { def SVCVT_F16_X2 : SInst<"svcvt_f16[_f32_x2]", "e2", "f", MergeNone, "aarch64_sve_fcvt_x2", [IsStreaming],[]>; def SVCVT_BF16_X2 : SInst<"svcvt_bf16[_f32_x2]", "$2", "f", MergeNone, "aarch64_sve_bfcvt_x2", [IsOverloadNone, IsStreaming],[]>; - def SVCVT_F32_U32_X2 : SInst<"svcvt_{d}[_u32_x2]", "2.d2.u", "f", MergeNone, "aarch64_sve_ucvtf_x2", [IsStreaming, IsOverloadMultiVecCvt], []>; - def SVCVT_U32_F32_X2 : SInst<"svcvt_{d}[_f32_x2]", "2.d2.M", "Ui", MergeNone, "aarch64_sve_fcvtzu_x2", [IsStreaming, IsOverloadMultiVecCvt], []>; - def SVCVT_F32_S32_X2 : SInst<"svcvt_{d}[_s32_x2]", "2.d2.x", "f", MergeNone, "aarch64_sve_scvtf_x2", [IsStreaming, IsOverloadMultiVecCvt], []>; - def SVCVT_S32_F32_X2 : SInst<"svcvt_{d}[_f32_x2]", "2.d2.M", "i", MergeNone, "aarch64_sve_fcvtzs_x2", [IsStreaming, IsOverloadMultiVecCvt], []>; + def SVCVT_F32_U32_X2 : SInst<"svcvt_{d}[_u32_x2]", "2.d2.u", "f", MergeNone, "aarch64_sve_ucvtf_x2", [IsStreaming, IsOverloadWhileOrMultiVecCvt], []>; + def SVCVT_U32_F32_X2 : SInst<"svcvt_{d}[_f32_x2]", "2.d2.M", "Ui", MergeNone, "aarch64_sve_fcvtzu_x2", [IsStreaming, IsOverloadWhileOrMultiVecCvt], []>; + def SVCVT_F32_S32_X2 : SInst<"svcvt_{d}[_s32_x2]", "2.d2.x", "f", MergeNone, "aarch64_sve_scvtf_x2", [IsStreaming, IsOverloadWhileOrMultiVecCvt], []>; + def SVCVT_S32_F32_X2 : SInst<"svcvt_{d}[_f32_x2]", "2.d2.M", "i", MergeNone, "aarch64_sve_fcvtzs_x2", [IsStreaming, IsOverloadWhileOrMultiVecCvt], []>; - def SVCVT_F32_U32_X4 : SInst<"svcvt_{d}[_u32_x4]", "4.d4.u", "f", MergeNone, "aarch64_sve_ucvtf_x4", [IsStreaming, IsOverloadMultiVecCvt], []>; - def SVCVT_U32_F32_X4 : SInst<"svcvt_{d}[_f32_x4]", "4.d4.M", "Ui", MergeNone, "aarch64_sve_fcvtzu_x4", [IsStreaming, IsOverloadMultiVecCvt], []>; - def SVCVT_F32_S32_X4 : SInst<"svcvt_{d}[_s32_x4]", "4.d4.x", "f", MergeNone, "aarch64_sve_scvtf_x4", [IsStreaming, IsOverloadMultiVecCvt], []>; - def SVCVT_S32_F32_X4 : SInst<"svcvt_{d}[_f32_x4]", "4.d4.M", "i", MergeNone, "aarch64_sve_fcvtzs_x4", [IsStreaming, IsOverloadMultiVecCvt], []>; + def SVCVT_F32_U32_X4 : SInst<"svcvt_{d}[_u32_x4]", "4.d4.u", "f", MergeNone, "aarch64_sve_ucvtf_x4", [IsStreaming, IsOverloadWhileOrMultiVecCvt], []>; + def SVCVT_U32_F32_X4 : SInst<"svcvt_{d}[_f32_x4]", "4.d4.M", "Ui", MergeNone, "aarch64_sve_fcvtzu_x4", [IsStreaming, IsOverloadWhileOrMultiVecCvt], []>; + def SVCVT_F32_S32_X4 : SInst<"svcvt_{d}[_s32_x4]", "4.d4.x", "f", MergeNone, "aarch64_sve_scvtf_x4", [IsStreaming, IsOverloadWhileOrMultiVecCvt], []>; + def SVCVT_S32_F32_X4 : SInst<"svcvt_{d}[_f32_x4]", "4.d4.M", "i", MergeNone, "aarch64_sve_fcvtzs_x4", [IsStreaming, IsOverloadWhileOrMultiVecCvt], []>; } // diff --git a/clang/include/clang/Basic/arm_sve_sme_incl.td b/clang/include/clang/Basic/arm_sve_sme_incl.td index 94ef66c53701930..e6ad84676053b5c 100644 --- a/clang/include/clang/Basic/arm_sve_sme_incl.td +++ b/clang/include/clang/Basic/arm_sve_sme_incl.td @@ -194,42 +194,41 @@ def FirstSplatOperand : FlagType<0x00000400>; // These flags are used to specify which scalar operand // needs to be duplicated/splatted into a vector. // : : -def SplatOperandMask : FlagType<0x00001C00>; -def IsLoad : FlagType<0x00002000>; -def IsStore : FlagType<0x00004000>; -def IsGatherLoad : FlagType<0x00008000>; -def IsScatterStore : FlagType<0x00010000>; -def IsStructLoad : FlagType<0x00020000>; -def IsStructStore : FlagType<0x00040000>; -def IsZExtReturn : FlagType<0x00080000>; // Return value is sign-extend by default -def IsOverloadNone : FlagType<0x00100000>; // Intrinsic does not take any overloaded types. -def IsOverloadWhile : FlagType<0x00200000>; // Use {default type, typeof(operand1)} as overloaded types. -def IsOverloadWhileRW : FlagType<0x00400000>; // Use {pred(default type), typeof(operand0)} as overloaded types. -def IsOverloadCvt : FlagType<0x00800000>; // Use {typeof(operand0), typeof(last operand)} as overloaded types. -def OverloadKindMask : FlagType<0x00E00000>; // When the masked values are all '0', the default type is used as overload type. -def IsByteIndexed : FlagType<0x01000000>; -def IsAppendSVALL : FlagType<0x02000000>; // Appends SV_ALL as the last operand. -def IsInsertOp1SVALL : FlagType<0x04000000>; // Inserts SV_ALL as the second operand. -def IsPrefetch : FlagType<0x08000000>; // Contiguous prefetches. -def IsGatherPrefetch : FlagType<0x10000000>; -def ReverseCompare : FlagType<0x20000000>; // Compare operands must be swapped. -def ReverseUSDOT : FlagType<0x40000000>; // Unsigned/signed operands must be swapped. -def IsUndef : FlagType<0x80000000>; // Codegen `undef` of given type. -def IsTupleCreate : FlagType<0x100000000>; -def IsTupleGet : FlagType<0x200000000>; -def IsTupleSet : FlagType<0x400000000>; -def ReverseMergeAnyBinOp : FlagType<0x800000000>; // e.g. Implement SUBR_X using SUB_X. -def ReverseMergeAnyAccOp : FlagType<0x1000000000>; // e.g. Implement MSB_X using MLS_X. -def IsStreaming : FlagType<0x2000000000>; -def IsStreamingCompatible : FlagType<0x4000000000>; -def IsReadZA : FlagType<0x8000000000>; -def IsWriteZA : FlagType<0x10000000000>; -def IsReductionQV : FlagType<0x20000000000>; -def IsStreamingOrSVE2p1 : FlagType<0x40000000000>; // Use for intrinsics that are common between sme/sme2 and sve2p1. -def IsInZA : FlagType<0x80000000000>; -def IsOutZA : FlagType<0x100000000000>; -def IsInOutZA : FlagType<0x200000000000>; -def IsOverloadMultiVecCvt : FlagType<0x400000000000>; +def SplatOperandMask : FlagType<0x00001C00>; +def IsLoad : FlagType<0x00002000>; +def IsStore : FlagType<0x00004000>; +def IsGatherLoad : FlagType<0x00008000>; +def IsScatterStore : FlagType<0x00010000>; +def IsStructLoad : FlagType<0x00020000>; +def IsStructStore : FlagType<0x00040000>; +def IsZExtReturn : FlagType<0x00080000>; // Return value is sign-extend by default +def IsOverloadNone : FlagType<0x00100000>; // Intrinsic does not take any overloaded types. +def IsOverloadWhileOrMultiVecCvt : FlagType<0x00200000>; // Use {default type, typeof(operand1)} as overloaded types. +def IsOverloadWhileRW : FlagType<0x00400000>; // Use {pred(default type), typeof(operand0)} as overloaded types. +def IsOverloadCvt : FlagType<0x00800000>; // Use {typeof(operand0), typeof(last operand)} as overloaded types. +def OverloadKindMask : FlagType<0x00E00000>; // When the masked values are all '0', the default type is used as overload type. +def IsByteIndexed : FlagType<0x01000000>; +def IsAppendSVALL : FlagType<0x02000000>; // Appends SV_ALL as the last operand. +def IsInsertOp1SVALL : FlagType<0x04000000>; // Inserts SV_ALL as the second operand. +def IsPrefetch : FlagType<0x08000000>; // Contiguous prefetches. +def IsGatherPrefetch : FlagType<0x10000000>; +def ReverseCompare : FlagType<0x20000000>; // Compare operands must be swapped. +def ReverseUSDOT : FlagType<0x40000000>; // Unsigned/signed operands must be swapped. +def IsUndef : FlagType<0x80000000>; // Codegen `undef` of given type. +def IsTupleCreate : FlagType<0x100000000>; +def IsTupleGet : FlagType<0x200000000>; +def IsTupleSet : FlagType<0x400000000>; +def ReverseMergeAnyBinOp : FlagType<0x800000000>; // e.g. Implement SUBR_X using SUB_X. +def ReverseMergeAnyAccOp : FlagType<0x1000000000>; // e.g. Implement MSB_X using MLS_X. +def IsStreaming : FlagType<0x2000000000>; +def IsStreamingCompatible : FlagType<0x4000000000>; +def IsReadZA : FlagType<0x8000000000>; +def IsWriteZA : FlagType<0x10000000000>; +def IsReductionQV : FlagType<0x20000000000>; +def IsStreamingOrSVE2p1 : FlagType<0x40000000000>; // Use for intrinsics that are common between sme/sme2 and sve2p1. +def IsInZA : FlagType<0x80000000000>; +def IsOutZA : FlagType<0x100000000000>; +def IsInOutZA : FlagType<0x200000000000>; // These must be kept in sync with the flags in include/clang/Basic/TargetBuiltins.h class ImmCheckType<int val> { diff --git a/clang/lib/CodeGen/CGBuiltin.cpp b/clang/lib/CodeGen/CGBuiltin.cpp index 570b380297c9e69..7ef764b8e1ac80e 100644 --- a/clang/lib/CodeGen/CGBuiltin.cpp +++ b/clang/lib/CodeGen/CGBuiltin.cpp @@ -10056,7 +10056,7 @@ CodeGenFunction::getSVEOverloadTypes(const SVETypeFlags &TypeFlags, llvm::Type *DefaultType = getSVEType(TypeFlags); - if (TypeFlags.isOverloadWhile() || TypeFlags.isOverloadMultiVecCvt()) + if (TypeFlags.isOverloadWhileOrMultiVecCvt()) return {DefaultType, Ops[1]->getType()}; if (TypeFlags.isOverloadWhileRW()) _______________________________________________ cfe-commits mailing list cfe-commits@lists.llvm.org https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits