https://github.com/paulwalker-arm created https://github.com/llvm/llvm-project/pull/170894
None >From 08a70da9f9ddc1f83eeb062bdb6299c011cb7ffb Mon Sep 17 00:00:00 2001 From: Paul Walker <[email protected]> Date: Tue, 2 Dec 2025 12:33:17 +0000 Subject: [PATCH] [LLVM][AArch64] Add "u" variants of sve.[s,u]hadd intrinsics --- clang/include/clang/Basic/arm_sve.td | 8 +- .../AArch64/sve2-intrinsics/acle_sve2_hsub.c | 64 ++-- .../AArch64/sve2-intrinsics/acle_sve2_hsubr.c | 64 ++-- llvm/include/llvm/IR/IntrinsicsAArch64.td | 2 + .../lib/Target/AArch64/AArch64SVEInstrInfo.td | 11 +- llvm/lib/Target/AArch64/AArch64SchedA510.td | 2 +- .../Target/AArch64/AArch64SchedNeoverseN2.td | 2 +- .../Target/AArch64/AArch64SchedNeoverseN3.td | 2 +- .../Target/AArch64/AArch64SchedNeoverseV2.td | 2 +- .../AArch64/AArch64TargetTransformInfo.cpp | 8 + .../sve2-intrinsics-uniform-dsp-undef.ll | 296 ++++++++++++++++++ .../sve-intrinsic-comb-no-active-lanes.ll | 12 +- .../sve-intrinsic-simplify-to-u-form.ll | 44 +++ 13 files changed, 433 insertions(+), 84 deletions(-) diff --git a/clang/include/clang/Basic/arm_sve.td b/clang/include/clang/Basic/arm_sve.td index 41179207dd060..1b1350f1b2ec1 100644 --- a/clang/include/clang/Basic/arm_sve.td +++ b/clang/include/clang/Basic/arm_sve.td @@ -1280,10 +1280,10 @@ defm SVQSUB_S : SInstZPZZ<"svqsub", "csli", "aarch64_sve_sqsub", "aarch64 defm SVQSUB_U : SInstZPZZ<"svqsub", "UcUsUiUl", "aarch64_sve_uqsub", "aarch64_sve_uqsub_u">; defm SVQSUBR_S : SInstZPZZ<"svqsubr", "csli", "aarch64_sve_sqsubr", "aarch64_sve_sqsub_u", [ReverseMergeAnyBinOp]>; defm SVQSUBR_U : SInstZPZZ<"svqsubr", "UcUsUiUl", "aarch64_sve_uqsubr", "aarch64_sve_uqsub_u", [ReverseMergeAnyBinOp]>; -defm SVHSUB_S : SInstZPZZ<"svhsub", "csli", "aarch64_sve_shsub", "aarch64_sve_shsub">; -defm SVHSUB_U : SInstZPZZ<"svhsub", "UcUsUiUl", "aarch64_sve_uhsub", "aarch64_sve_uhsub">; -defm SVHSUBR_S : SInstZPZZ<"svhsubr", "csli", "aarch64_sve_shsubr", "aarch64_sve_shsubr">; -defm SVHSUBR_U : SInstZPZZ<"svhsubr", "UcUsUiUl", "aarch64_sve_uhsubr", "aarch64_sve_uhsubr">; +defm SVHSUB_S : SInstZPZZ<"svhsub", "csli", "aarch64_sve_shsub", "aarch64_sve_shsub_u">; +defm SVHSUB_U : SInstZPZZ<"svhsub", "UcUsUiUl", "aarch64_sve_uhsub", "aarch64_sve_uhsub_u">; +defm SVHSUBR_S : SInstZPZZ<"svhsubr", "csli", "aarch64_sve_shsubr", "aarch64_sve_shsub_u", [ReverseMergeAnyBinOp]>; +defm SVHSUBR_U : SInstZPZZ<"svhsubr", "UcUsUiUl", "aarch64_sve_uhsubr", "aarch64_sve_uhsub_u", [ReverseMergeAnyBinOp]>; defm SVQABS : SInstZPZ<"svqabs", "csil", "aarch64_sve_sqabs">; defm SVQNEG : SInstZPZ<"svqneg", "csil", "aarch64_sve_sqneg">; diff --git a/clang/test/CodeGen/AArch64/sve2-intrinsics/acle_sve2_hsub.c b/clang/test/CodeGen/AArch64/sve2-intrinsics/acle_sve2_hsub.c index 22c11466a7288..e101dd2fe3399 100644 --- a/clang/test/CodeGen/AArch64/sve2-intrinsics/acle_sve2_hsub.c +++ b/clang/test/CodeGen/AArch64/sve2-intrinsics/acle_sve2_hsub.c @@ -297,12 +297,12 @@ svuint64_t test_svhsub_u64_m(svbool_t pg, svuint64_t op1, svuint64_t op2) // CHECK-LABEL: @test_svhsub_s8_x( // CHECK-NEXT: entry: -// CHECK-NEXT: [[TMP0:%.*]] = tail call <vscale x 16 x i8> @llvm.aarch64.sve.shsub.nxv16i8(<vscale x 16 x i1> [[PG:%.*]], <vscale x 16 x i8> [[OP1:%.*]], <vscale x 16 x i8> [[OP2:%.*]]) +// CHECK-NEXT: [[TMP0:%.*]] = tail call <vscale x 16 x i8> @llvm.aarch64.sve.shsub.u.nxv16i8(<vscale x 16 x i1> [[PG:%.*]], <vscale x 16 x i8> [[OP1:%.*]], <vscale x 16 x i8> [[OP2:%.*]]) // CHECK-NEXT: ret <vscale x 16 x i8> [[TMP0]] // // CPP-CHECK-LABEL: @_Z16test_svhsub_s8_xu10__SVBool_tu10__SVInt8_tS0_( // CPP-CHECK-NEXT: entry: -// CPP-CHECK-NEXT: [[TMP0:%.*]] = tail call <vscale x 16 x i8> @llvm.aarch64.sve.shsub.nxv16i8(<vscale x 16 x i1> [[PG:%.*]], <vscale x 16 x i8> [[OP1:%.*]], <vscale x 16 x i8> [[OP2:%.*]]) +// CPP-CHECK-NEXT: [[TMP0:%.*]] = tail call <vscale x 16 x i8> @llvm.aarch64.sve.shsub.u.nxv16i8(<vscale x 16 x i1> [[PG:%.*]], <vscale x 16 x i8> [[OP1:%.*]], <vscale x 16 x i8> [[OP2:%.*]]) // CPP-CHECK-NEXT: ret <vscale x 16 x i8> [[TMP0]] // svint8_t test_svhsub_s8_x(svbool_t pg, svint8_t op1, svint8_t op2) @@ -313,13 +313,13 @@ svint8_t test_svhsub_s8_x(svbool_t pg, svint8_t op1, svint8_t op2) // CHECK-LABEL: @test_svhsub_s16_x( // CHECK-NEXT: entry: // CHECK-NEXT: [[TMP0:%.*]] = tail call <vscale x 8 x i1> @llvm.aarch64.sve.convert.from.svbool.nxv8i1(<vscale x 16 x i1> [[PG:%.*]]) -// CHECK-NEXT: [[TMP1:%.*]] = tail call <vscale x 8 x i16> @llvm.aarch64.sve.shsub.nxv8i16(<vscale x 8 x i1> [[TMP0]], <vscale x 8 x i16> [[OP1:%.*]], <vscale x 8 x i16> [[OP2:%.*]]) +// CHECK-NEXT: [[TMP1:%.*]] = tail call <vscale x 8 x i16> @llvm.aarch64.sve.shsub.u.nxv8i16(<vscale x 8 x i1> [[TMP0]], <vscale x 8 x i16> [[OP1:%.*]], <vscale x 8 x i16> [[OP2:%.*]]) // CHECK-NEXT: ret <vscale x 8 x i16> [[TMP1]] // // CPP-CHECK-LABEL: @_Z17test_svhsub_s16_xu10__SVBool_tu11__SVInt16_tS0_( // CPP-CHECK-NEXT: entry: // CPP-CHECK-NEXT: [[TMP0:%.*]] = tail call <vscale x 8 x i1> @llvm.aarch64.sve.convert.from.svbool.nxv8i1(<vscale x 16 x i1> [[PG:%.*]]) -// CPP-CHECK-NEXT: [[TMP1:%.*]] = tail call <vscale x 8 x i16> @llvm.aarch64.sve.shsub.nxv8i16(<vscale x 8 x i1> [[TMP0]], <vscale x 8 x i16> [[OP1:%.*]], <vscale x 8 x i16> [[OP2:%.*]]) +// CPP-CHECK-NEXT: [[TMP1:%.*]] = tail call <vscale x 8 x i16> @llvm.aarch64.sve.shsub.u.nxv8i16(<vscale x 8 x i1> [[TMP0]], <vscale x 8 x i16> [[OP1:%.*]], <vscale x 8 x i16> [[OP2:%.*]]) // CPP-CHECK-NEXT: ret <vscale x 8 x i16> [[TMP1]] // svint16_t test_svhsub_s16_x(svbool_t pg, svint16_t op1, svint16_t op2) @@ -330,13 +330,13 @@ svint16_t test_svhsub_s16_x(svbool_t pg, svint16_t op1, svint16_t op2) // CHECK-LABEL: @test_svhsub_s32_x( // CHECK-NEXT: entry: // CHECK-NEXT: [[TMP0:%.*]] = tail call <vscale x 4 x i1> @llvm.aarch64.sve.convert.from.svbool.nxv4i1(<vscale x 16 x i1> [[PG:%.*]]) -// CHECK-NEXT: [[TMP1:%.*]] = tail call <vscale x 4 x i32> @llvm.aarch64.sve.shsub.nxv4i32(<vscale x 4 x i1> [[TMP0]], <vscale x 4 x i32> [[OP1:%.*]], <vscale x 4 x i32> [[OP2:%.*]]) +// CHECK-NEXT: [[TMP1:%.*]] = tail call <vscale x 4 x i32> @llvm.aarch64.sve.shsub.u.nxv4i32(<vscale x 4 x i1> [[TMP0]], <vscale x 4 x i32> [[OP1:%.*]], <vscale x 4 x i32> [[OP2:%.*]]) // CHECK-NEXT: ret <vscale x 4 x i32> [[TMP1]] // // CPP-CHECK-LABEL: @_Z17test_svhsub_s32_xu10__SVBool_tu11__SVInt32_tS0_( // CPP-CHECK-NEXT: entry: // CPP-CHECK-NEXT: [[TMP0:%.*]] = tail call <vscale x 4 x i1> @llvm.aarch64.sve.convert.from.svbool.nxv4i1(<vscale x 16 x i1> [[PG:%.*]]) -// CPP-CHECK-NEXT: [[TMP1:%.*]] = tail call <vscale x 4 x i32> @llvm.aarch64.sve.shsub.nxv4i32(<vscale x 4 x i1> [[TMP0]], <vscale x 4 x i32> [[OP1:%.*]], <vscale x 4 x i32> [[OP2:%.*]]) +// CPP-CHECK-NEXT: [[TMP1:%.*]] = tail call <vscale x 4 x i32> @llvm.aarch64.sve.shsub.u.nxv4i32(<vscale x 4 x i1> [[TMP0]], <vscale x 4 x i32> [[OP1:%.*]], <vscale x 4 x i32> [[OP2:%.*]]) // CPP-CHECK-NEXT: ret <vscale x 4 x i32> [[TMP1]] // svint32_t test_svhsub_s32_x(svbool_t pg, svint32_t op1, svint32_t op2) @@ -347,13 +347,13 @@ svint32_t test_svhsub_s32_x(svbool_t pg, svint32_t op1, svint32_t op2) // CHECK-LABEL: @test_svhsub_s64_x( // CHECK-NEXT: entry: // CHECK-NEXT: [[TMP0:%.*]] = tail call <vscale x 2 x i1> @llvm.aarch64.sve.convert.from.svbool.nxv2i1(<vscale x 16 x i1> [[PG:%.*]]) -// CHECK-NEXT: [[TMP1:%.*]] = tail call <vscale x 2 x i64> @llvm.aarch64.sve.shsub.nxv2i64(<vscale x 2 x i1> [[TMP0]], <vscale x 2 x i64> [[OP1:%.*]], <vscale x 2 x i64> [[OP2:%.*]]) +// CHECK-NEXT: [[TMP1:%.*]] = tail call <vscale x 2 x i64> @llvm.aarch64.sve.shsub.u.nxv2i64(<vscale x 2 x i1> [[TMP0]], <vscale x 2 x i64> [[OP1:%.*]], <vscale x 2 x i64> [[OP2:%.*]]) // CHECK-NEXT: ret <vscale x 2 x i64> [[TMP1]] // // CPP-CHECK-LABEL: @_Z17test_svhsub_s64_xu10__SVBool_tu11__SVInt64_tS0_( // CPP-CHECK-NEXT: entry: // CPP-CHECK-NEXT: [[TMP0:%.*]] = tail call <vscale x 2 x i1> @llvm.aarch64.sve.convert.from.svbool.nxv2i1(<vscale x 16 x i1> [[PG:%.*]]) -// CPP-CHECK-NEXT: [[TMP1:%.*]] = tail call <vscale x 2 x i64> @llvm.aarch64.sve.shsub.nxv2i64(<vscale x 2 x i1> [[TMP0]], <vscale x 2 x i64> [[OP1:%.*]], <vscale x 2 x i64> [[OP2:%.*]]) +// CPP-CHECK-NEXT: [[TMP1:%.*]] = tail call <vscale x 2 x i64> @llvm.aarch64.sve.shsub.u.nxv2i64(<vscale x 2 x i1> [[TMP0]], <vscale x 2 x i64> [[OP1:%.*]], <vscale x 2 x i64> [[OP2:%.*]]) // CPP-CHECK-NEXT: ret <vscale x 2 x i64> [[TMP1]] // svint64_t test_svhsub_s64_x(svbool_t pg, svint64_t op1, svint64_t op2) @@ -363,12 +363,12 @@ svint64_t test_svhsub_s64_x(svbool_t pg, svint64_t op1, svint64_t op2) // CHECK-LABEL: @test_svhsub_u8_x( // CHECK-NEXT: entry: -// CHECK-NEXT: [[TMP0:%.*]] = tail call <vscale x 16 x i8> @llvm.aarch64.sve.uhsub.nxv16i8(<vscale x 16 x i1> [[PG:%.*]], <vscale x 16 x i8> [[OP1:%.*]], <vscale x 16 x i8> [[OP2:%.*]]) +// CHECK-NEXT: [[TMP0:%.*]] = tail call <vscale x 16 x i8> @llvm.aarch64.sve.uhsub.u.nxv16i8(<vscale x 16 x i1> [[PG:%.*]], <vscale x 16 x i8> [[OP1:%.*]], <vscale x 16 x i8> [[OP2:%.*]]) // CHECK-NEXT: ret <vscale x 16 x i8> [[TMP0]] // // CPP-CHECK-LABEL: @_Z16test_svhsub_u8_xu10__SVBool_tu11__SVUint8_tS0_( // CPP-CHECK-NEXT: entry: -// CPP-CHECK-NEXT: [[TMP0:%.*]] = tail call <vscale x 16 x i8> @llvm.aarch64.sve.uhsub.nxv16i8(<vscale x 16 x i1> [[PG:%.*]], <vscale x 16 x i8> [[OP1:%.*]], <vscale x 16 x i8> [[OP2:%.*]]) +// CPP-CHECK-NEXT: [[TMP0:%.*]] = tail call <vscale x 16 x i8> @llvm.aarch64.sve.uhsub.u.nxv16i8(<vscale x 16 x i1> [[PG:%.*]], <vscale x 16 x i8> [[OP1:%.*]], <vscale x 16 x i8> [[OP2:%.*]]) // CPP-CHECK-NEXT: ret <vscale x 16 x i8> [[TMP0]] // svuint8_t test_svhsub_u8_x(svbool_t pg, svuint8_t op1, svuint8_t op2) @@ -379,13 +379,13 @@ svuint8_t test_svhsub_u8_x(svbool_t pg, svuint8_t op1, svuint8_t op2) // CHECK-LABEL: @test_svhsub_u16_x( // CHECK-NEXT: entry: // CHECK-NEXT: [[TMP0:%.*]] = tail call <vscale x 8 x i1> @llvm.aarch64.sve.convert.from.svbool.nxv8i1(<vscale x 16 x i1> [[PG:%.*]]) -// CHECK-NEXT: [[TMP1:%.*]] = tail call <vscale x 8 x i16> @llvm.aarch64.sve.uhsub.nxv8i16(<vscale x 8 x i1> [[TMP0]], <vscale x 8 x i16> [[OP1:%.*]], <vscale x 8 x i16> [[OP2:%.*]]) +// CHECK-NEXT: [[TMP1:%.*]] = tail call <vscale x 8 x i16> @llvm.aarch64.sve.uhsub.u.nxv8i16(<vscale x 8 x i1> [[TMP0]], <vscale x 8 x i16> [[OP1:%.*]], <vscale x 8 x i16> [[OP2:%.*]]) // CHECK-NEXT: ret <vscale x 8 x i16> [[TMP1]] // // CPP-CHECK-LABEL: @_Z17test_svhsub_u16_xu10__SVBool_tu12__SVUint16_tS0_( // CPP-CHECK-NEXT: entry: // CPP-CHECK-NEXT: [[TMP0:%.*]] = tail call <vscale x 8 x i1> @llvm.aarch64.sve.convert.from.svbool.nxv8i1(<vscale x 16 x i1> [[PG:%.*]]) -// CPP-CHECK-NEXT: [[TMP1:%.*]] = tail call <vscale x 8 x i16> @llvm.aarch64.sve.uhsub.nxv8i16(<vscale x 8 x i1> [[TMP0]], <vscale x 8 x i16> [[OP1:%.*]], <vscale x 8 x i16> [[OP2:%.*]]) +// CPP-CHECK-NEXT: [[TMP1:%.*]] = tail call <vscale x 8 x i16> @llvm.aarch64.sve.uhsub.u.nxv8i16(<vscale x 8 x i1> [[TMP0]], <vscale x 8 x i16> [[OP1:%.*]], <vscale x 8 x i16> [[OP2:%.*]]) // CPP-CHECK-NEXT: ret <vscale x 8 x i16> [[TMP1]] // svuint16_t test_svhsub_u16_x(svbool_t pg, svuint16_t op1, svuint16_t op2) @@ -396,13 +396,13 @@ svuint16_t test_svhsub_u16_x(svbool_t pg, svuint16_t op1, svuint16_t op2) // CHECK-LABEL: @test_svhsub_u32_x( // CHECK-NEXT: entry: // CHECK-NEXT: [[TMP0:%.*]] = tail call <vscale x 4 x i1> @llvm.aarch64.sve.convert.from.svbool.nxv4i1(<vscale x 16 x i1> [[PG:%.*]]) -// CHECK-NEXT: [[TMP1:%.*]] = tail call <vscale x 4 x i32> @llvm.aarch64.sve.uhsub.nxv4i32(<vscale x 4 x i1> [[TMP0]], <vscale x 4 x i32> [[OP1:%.*]], <vscale x 4 x i32> [[OP2:%.*]]) +// CHECK-NEXT: [[TMP1:%.*]] = tail call <vscale x 4 x i32> @llvm.aarch64.sve.uhsub.u.nxv4i32(<vscale x 4 x i1> [[TMP0]], <vscale x 4 x i32> [[OP1:%.*]], <vscale x 4 x i32> [[OP2:%.*]]) // CHECK-NEXT: ret <vscale x 4 x i32> [[TMP1]] // // CPP-CHECK-LABEL: @_Z17test_svhsub_u32_xu10__SVBool_tu12__SVUint32_tS0_( // CPP-CHECK-NEXT: entry: // CPP-CHECK-NEXT: [[TMP0:%.*]] = tail call <vscale x 4 x i1> @llvm.aarch64.sve.convert.from.svbool.nxv4i1(<vscale x 16 x i1> [[PG:%.*]]) -// CPP-CHECK-NEXT: [[TMP1:%.*]] = tail call <vscale x 4 x i32> @llvm.aarch64.sve.uhsub.nxv4i32(<vscale x 4 x i1> [[TMP0]], <vscale x 4 x i32> [[OP1:%.*]], <vscale x 4 x i32> [[OP2:%.*]]) +// CPP-CHECK-NEXT: [[TMP1:%.*]] = tail call <vscale x 4 x i32> @llvm.aarch64.sve.uhsub.u.nxv4i32(<vscale x 4 x i1> [[TMP0]], <vscale x 4 x i32> [[OP1:%.*]], <vscale x 4 x i32> [[OP2:%.*]]) // CPP-CHECK-NEXT: ret <vscale x 4 x i32> [[TMP1]] // svuint32_t test_svhsub_u32_x(svbool_t pg, svuint32_t op1, svuint32_t op2) @@ -413,13 +413,13 @@ svuint32_t test_svhsub_u32_x(svbool_t pg, svuint32_t op1, svuint32_t op2) // CHECK-LABEL: @test_svhsub_u64_x( // CHECK-NEXT: entry: // CHECK-NEXT: [[TMP0:%.*]] = tail call <vscale x 2 x i1> @llvm.aarch64.sve.convert.from.svbool.nxv2i1(<vscale x 16 x i1> [[PG:%.*]]) -// CHECK-NEXT: [[TMP1:%.*]] = tail call <vscale x 2 x i64> @llvm.aarch64.sve.uhsub.nxv2i64(<vscale x 2 x i1> [[TMP0]], <vscale x 2 x i64> [[OP1:%.*]], <vscale x 2 x i64> [[OP2:%.*]]) +// CHECK-NEXT: [[TMP1:%.*]] = tail call <vscale x 2 x i64> @llvm.aarch64.sve.uhsub.u.nxv2i64(<vscale x 2 x i1> [[TMP0]], <vscale x 2 x i64> [[OP1:%.*]], <vscale x 2 x i64> [[OP2:%.*]]) // CHECK-NEXT: ret <vscale x 2 x i64> [[TMP1]] // // CPP-CHECK-LABEL: @_Z17test_svhsub_u64_xu10__SVBool_tu12__SVUint64_tS0_( // CPP-CHECK-NEXT: entry: // CPP-CHECK-NEXT: [[TMP0:%.*]] = tail call <vscale x 2 x i1> @llvm.aarch64.sve.convert.from.svbool.nxv2i1(<vscale x 16 x i1> [[PG:%.*]]) -// CPP-CHECK-NEXT: [[TMP1:%.*]] = tail call <vscale x 2 x i64> @llvm.aarch64.sve.uhsub.nxv2i64(<vscale x 2 x i1> [[TMP0]], <vscale x 2 x i64> [[OP1:%.*]], <vscale x 2 x i64> [[OP2:%.*]]) +// CPP-CHECK-NEXT: [[TMP1:%.*]] = tail call <vscale x 2 x i64> @llvm.aarch64.sve.uhsub.u.nxv2i64(<vscale x 2 x i1> [[TMP0]], <vscale x 2 x i64> [[OP1:%.*]], <vscale x 2 x i64> [[OP2:%.*]]) // CPP-CHECK-NEXT: ret <vscale x 2 x i64> [[TMP1]] // svuint64_t test_svhsub_u64_x(svbool_t pg, svuint64_t op1, svuint64_t op2) @@ -775,14 +775,14 @@ svuint64_t test_svhsub_n_u64_m(svbool_t pg, svuint64_t op1, uint64_t op2) // CHECK-NEXT: entry: // CHECK-NEXT: [[DOTSPLATINSERT:%.*]] = insertelement <vscale x 16 x i8> poison, i8 [[OP2:%.*]], i64 0 // CHECK-NEXT: [[DOTSPLAT:%.*]] = shufflevector <vscale x 16 x i8> [[DOTSPLATINSERT]], <vscale x 16 x i8> poison, <vscale x 16 x i32> zeroinitializer -// CHECK-NEXT: [[TMP0:%.*]] = tail call <vscale x 16 x i8> @llvm.aarch64.sve.shsub.nxv16i8(<vscale x 16 x i1> [[PG:%.*]], <vscale x 16 x i8> [[OP1:%.*]], <vscale x 16 x i8> [[DOTSPLAT]]) +// CHECK-NEXT: [[TMP0:%.*]] = tail call <vscale x 16 x i8> @llvm.aarch64.sve.shsub.u.nxv16i8(<vscale x 16 x i1> [[PG:%.*]], <vscale x 16 x i8> [[OP1:%.*]], <vscale x 16 x i8> [[DOTSPLAT]]) // CHECK-NEXT: ret <vscale x 16 x i8> [[TMP0]] // // CPP-CHECK-LABEL: @_Z18test_svhsub_n_s8_xu10__SVBool_tu10__SVInt8_ta( // CPP-CHECK-NEXT: entry: // CPP-CHECK-NEXT: [[DOTSPLATINSERT:%.*]] = insertelement <vscale x 16 x i8> poison, i8 [[OP2:%.*]], i64 0 // CPP-CHECK-NEXT: [[DOTSPLAT:%.*]] = shufflevector <vscale x 16 x i8> [[DOTSPLATINSERT]], <vscale x 16 x i8> poison, <vscale x 16 x i32> zeroinitializer -// CPP-CHECK-NEXT: [[TMP0:%.*]] = tail call <vscale x 16 x i8> @llvm.aarch64.sve.shsub.nxv16i8(<vscale x 16 x i1> [[PG:%.*]], <vscale x 16 x i8> [[OP1:%.*]], <vscale x 16 x i8> [[DOTSPLAT]]) +// CPP-CHECK-NEXT: [[TMP0:%.*]] = tail call <vscale x 16 x i8> @llvm.aarch64.sve.shsub.u.nxv16i8(<vscale x 16 x i1> [[PG:%.*]], <vscale x 16 x i8> [[OP1:%.*]], <vscale x 16 x i8> [[DOTSPLAT]]) // CPP-CHECK-NEXT: ret <vscale x 16 x i8> [[TMP0]] // svint8_t test_svhsub_n_s8_x(svbool_t pg, svint8_t op1, int8_t op2) @@ -795,7 +795,7 @@ svint8_t test_svhsub_n_s8_x(svbool_t pg, svint8_t op1, int8_t op2) // CHECK-NEXT: [[TMP0:%.*]] = tail call <vscale x 8 x i1> @llvm.aarch64.sve.convert.from.svbool.nxv8i1(<vscale x 16 x i1> [[PG:%.*]]) // CHECK-NEXT: [[DOTSPLATINSERT:%.*]] = insertelement <vscale x 8 x i16> poison, i16 [[OP2:%.*]], i64 0 // CHECK-NEXT: [[DOTSPLAT:%.*]] = shufflevector <vscale x 8 x i16> [[DOTSPLATINSERT]], <vscale x 8 x i16> poison, <vscale x 8 x i32> zeroinitializer -// CHECK-NEXT: [[TMP1:%.*]] = tail call <vscale x 8 x i16> @llvm.aarch64.sve.shsub.nxv8i16(<vscale x 8 x i1> [[TMP0]], <vscale x 8 x i16> [[OP1:%.*]], <vscale x 8 x i16> [[DOTSPLAT]]) +// CHECK-NEXT: [[TMP1:%.*]] = tail call <vscale x 8 x i16> @llvm.aarch64.sve.shsub.u.nxv8i16(<vscale x 8 x i1> [[TMP0]], <vscale x 8 x i16> [[OP1:%.*]], <vscale x 8 x i16> [[DOTSPLAT]]) // CHECK-NEXT: ret <vscale x 8 x i16> [[TMP1]] // // CPP-CHECK-LABEL: @_Z19test_svhsub_n_s16_xu10__SVBool_tu11__SVInt16_ts( @@ -803,7 +803,7 @@ svint8_t test_svhsub_n_s8_x(svbool_t pg, svint8_t op1, int8_t op2) // CPP-CHECK-NEXT: [[TMP0:%.*]] = tail call <vscale x 8 x i1> @llvm.aarch64.sve.convert.from.svbool.nxv8i1(<vscale x 16 x i1> [[PG:%.*]]) // CPP-CHECK-NEXT: [[DOTSPLATINSERT:%.*]] = insertelement <vscale x 8 x i16> poison, i16 [[OP2:%.*]], i64 0 // CPP-CHECK-NEXT: [[DOTSPLAT:%.*]] = shufflevector <vscale x 8 x i16> [[DOTSPLATINSERT]], <vscale x 8 x i16> poison, <vscale x 8 x i32> zeroinitializer -// CPP-CHECK-NEXT: [[TMP1:%.*]] = tail call <vscale x 8 x i16> @llvm.aarch64.sve.shsub.nxv8i16(<vscale x 8 x i1> [[TMP0]], <vscale x 8 x i16> [[OP1:%.*]], <vscale x 8 x i16> [[DOTSPLAT]]) +// CPP-CHECK-NEXT: [[TMP1:%.*]] = tail call <vscale x 8 x i16> @llvm.aarch64.sve.shsub.u.nxv8i16(<vscale x 8 x i1> [[TMP0]], <vscale x 8 x i16> [[OP1:%.*]], <vscale x 8 x i16> [[DOTSPLAT]]) // CPP-CHECK-NEXT: ret <vscale x 8 x i16> [[TMP1]] // svint16_t test_svhsub_n_s16_x(svbool_t pg, svint16_t op1, int16_t op2) @@ -816,7 +816,7 @@ svint16_t test_svhsub_n_s16_x(svbool_t pg, svint16_t op1, int16_t op2) // CHECK-NEXT: [[TMP0:%.*]] = tail call <vscale x 4 x i1> @llvm.aarch64.sve.convert.from.svbool.nxv4i1(<vscale x 16 x i1> [[PG:%.*]]) // CHECK-NEXT: [[DOTSPLATINSERT:%.*]] = insertelement <vscale x 4 x i32> poison, i32 [[OP2:%.*]], i64 0 // CHECK-NEXT: [[DOTSPLAT:%.*]] = shufflevector <vscale x 4 x i32> [[DOTSPLATINSERT]], <vscale x 4 x i32> poison, <vscale x 4 x i32> zeroinitializer -// CHECK-NEXT: [[TMP1:%.*]] = tail call <vscale x 4 x i32> @llvm.aarch64.sve.shsub.nxv4i32(<vscale x 4 x i1> [[TMP0]], <vscale x 4 x i32> [[OP1:%.*]], <vscale x 4 x i32> [[DOTSPLAT]]) +// CHECK-NEXT: [[TMP1:%.*]] = tail call <vscale x 4 x i32> @llvm.aarch64.sve.shsub.u.nxv4i32(<vscale x 4 x i1> [[TMP0]], <vscale x 4 x i32> [[OP1:%.*]], <vscale x 4 x i32> [[DOTSPLAT]]) // CHECK-NEXT: ret <vscale x 4 x i32> [[TMP1]] // // CPP-CHECK-LABEL: @_Z19test_svhsub_n_s32_xu10__SVBool_tu11__SVInt32_ti( @@ -824,7 +824,7 @@ svint16_t test_svhsub_n_s16_x(svbool_t pg, svint16_t op1, int16_t op2) // CPP-CHECK-NEXT: [[TMP0:%.*]] = tail call <vscale x 4 x i1> @llvm.aarch64.sve.convert.from.svbool.nxv4i1(<vscale x 16 x i1> [[PG:%.*]]) // CPP-CHECK-NEXT: [[DOTSPLATINSERT:%.*]] = insertelement <vscale x 4 x i32> poison, i32 [[OP2:%.*]], i64 0 // CPP-CHECK-NEXT: [[DOTSPLAT:%.*]] = shufflevector <vscale x 4 x i32> [[DOTSPLATINSERT]], <vscale x 4 x i32> poison, <vscale x 4 x i32> zeroinitializer -// CPP-CHECK-NEXT: [[TMP1:%.*]] = tail call <vscale x 4 x i32> @llvm.aarch64.sve.shsub.nxv4i32(<vscale x 4 x i1> [[TMP0]], <vscale x 4 x i32> [[OP1:%.*]], <vscale x 4 x i32> [[DOTSPLAT]]) +// CPP-CHECK-NEXT: [[TMP1:%.*]] = tail call <vscale x 4 x i32> @llvm.aarch64.sve.shsub.u.nxv4i32(<vscale x 4 x i1> [[TMP0]], <vscale x 4 x i32> [[OP1:%.*]], <vscale x 4 x i32> [[DOTSPLAT]]) // CPP-CHECK-NEXT: ret <vscale x 4 x i32> [[TMP1]] // svint32_t test_svhsub_n_s32_x(svbool_t pg, svint32_t op1, int32_t op2) @@ -837,7 +837,7 @@ svint32_t test_svhsub_n_s32_x(svbool_t pg, svint32_t op1, int32_t op2) // CHECK-NEXT: [[TMP0:%.*]] = tail call <vscale x 2 x i1> @llvm.aarch64.sve.convert.from.svbool.nxv2i1(<vscale x 16 x i1> [[PG:%.*]]) // CHECK-NEXT: [[DOTSPLATINSERT:%.*]] = insertelement <vscale x 2 x i64> poison, i64 [[OP2:%.*]], i64 0 // CHECK-NEXT: [[DOTSPLAT:%.*]] = shufflevector <vscale x 2 x i64> [[DOTSPLATINSERT]], <vscale x 2 x i64> poison, <vscale x 2 x i32> zeroinitializer -// CHECK-NEXT: [[TMP1:%.*]] = tail call <vscale x 2 x i64> @llvm.aarch64.sve.shsub.nxv2i64(<vscale x 2 x i1> [[TMP0]], <vscale x 2 x i64> [[OP1:%.*]], <vscale x 2 x i64> [[DOTSPLAT]]) +// CHECK-NEXT: [[TMP1:%.*]] = tail call <vscale x 2 x i64> @llvm.aarch64.sve.shsub.u.nxv2i64(<vscale x 2 x i1> [[TMP0]], <vscale x 2 x i64> [[OP1:%.*]], <vscale x 2 x i64> [[DOTSPLAT]]) // CHECK-NEXT: ret <vscale x 2 x i64> [[TMP1]] // // CPP-CHECK-LABEL: @_Z19test_svhsub_n_s64_xu10__SVBool_tu11__SVInt64_tl( @@ -845,7 +845,7 @@ svint32_t test_svhsub_n_s32_x(svbool_t pg, svint32_t op1, int32_t op2) // CPP-CHECK-NEXT: [[TMP0:%.*]] = tail call <vscale x 2 x i1> @llvm.aarch64.sve.convert.from.svbool.nxv2i1(<vscale x 16 x i1> [[PG:%.*]]) // CPP-CHECK-NEXT: [[DOTSPLATINSERT:%.*]] = insertelement <vscale x 2 x i64> poison, i64 [[OP2:%.*]], i64 0 // CPP-CHECK-NEXT: [[DOTSPLAT:%.*]] = shufflevector <vscale x 2 x i64> [[DOTSPLATINSERT]], <vscale x 2 x i64> poison, <vscale x 2 x i32> zeroinitializer -// CPP-CHECK-NEXT: [[TMP1:%.*]] = tail call <vscale x 2 x i64> @llvm.aarch64.sve.shsub.nxv2i64(<vscale x 2 x i1> [[TMP0]], <vscale x 2 x i64> [[OP1:%.*]], <vscale x 2 x i64> [[DOTSPLAT]]) +// CPP-CHECK-NEXT: [[TMP1:%.*]] = tail call <vscale x 2 x i64> @llvm.aarch64.sve.shsub.u.nxv2i64(<vscale x 2 x i1> [[TMP0]], <vscale x 2 x i64> [[OP1:%.*]], <vscale x 2 x i64> [[DOTSPLAT]]) // CPP-CHECK-NEXT: ret <vscale x 2 x i64> [[TMP1]] // svint64_t test_svhsub_n_s64_x(svbool_t pg, svint64_t op1, int64_t op2) @@ -857,14 +857,14 @@ svint64_t test_svhsub_n_s64_x(svbool_t pg, svint64_t op1, int64_t op2) // CHECK-NEXT: entry: // CHECK-NEXT: [[DOTSPLATINSERT:%.*]] = insertelement <vscale x 16 x i8> poison, i8 [[OP2:%.*]], i64 0 // CHECK-NEXT: [[DOTSPLAT:%.*]] = shufflevector <vscale x 16 x i8> [[DOTSPLATINSERT]], <vscale x 16 x i8> poison, <vscale x 16 x i32> zeroinitializer -// CHECK-NEXT: [[TMP0:%.*]] = tail call <vscale x 16 x i8> @llvm.aarch64.sve.uhsub.nxv16i8(<vscale x 16 x i1> [[PG:%.*]], <vscale x 16 x i8> [[OP1:%.*]], <vscale x 16 x i8> [[DOTSPLAT]]) +// CHECK-NEXT: [[TMP0:%.*]] = tail call <vscale x 16 x i8> @llvm.aarch64.sve.uhsub.u.nxv16i8(<vscale x 16 x i1> [[PG:%.*]], <vscale x 16 x i8> [[OP1:%.*]], <vscale x 16 x i8> [[DOTSPLAT]]) // CHECK-NEXT: ret <vscale x 16 x i8> [[TMP0]] // // CPP-CHECK-LABEL: @_Z18test_svhsub_n_u8_xu10__SVBool_tu11__SVUint8_th( // CPP-CHECK-NEXT: entry: // CPP-CHECK-NEXT: [[DOTSPLATINSERT:%.*]] = insertelement <vscale x 16 x i8> poison, i8 [[OP2:%.*]], i64 0 // CPP-CHECK-NEXT: [[DOTSPLAT:%.*]] = shufflevector <vscale x 16 x i8> [[DOTSPLATINSERT]], <vscale x 16 x i8> poison, <vscale x 16 x i32> zeroinitializer -// CPP-CHECK-NEXT: [[TMP0:%.*]] = tail call <vscale x 16 x i8> @llvm.aarch64.sve.uhsub.nxv16i8(<vscale x 16 x i1> [[PG:%.*]], <vscale x 16 x i8> [[OP1:%.*]], <vscale x 16 x i8> [[DOTSPLAT]]) +// CPP-CHECK-NEXT: [[TMP0:%.*]] = tail call <vscale x 16 x i8> @llvm.aarch64.sve.uhsub.u.nxv16i8(<vscale x 16 x i1> [[PG:%.*]], <vscale x 16 x i8> [[OP1:%.*]], <vscale x 16 x i8> [[DOTSPLAT]]) // CPP-CHECK-NEXT: ret <vscale x 16 x i8> [[TMP0]] // svuint8_t test_svhsub_n_u8_x(svbool_t pg, svuint8_t op1, uint8_t op2) @@ -877,7 +877,7 @@ svuint8_t test_svhsub_n_u8_x(svbool_t pg, svuint8_t op1, uint8_t op2) // CHECK-NEXT: [[TMP0:%.*]] = tail call <vscale x 8 x i1> @llvm.aarch64.sve.convert.from.svbool.nxv8i1(<vscale x 16 x i1> [[PG:%.*]]) // CHECK-NEXT: [[DOTSPLATINSERT:%.*]] = insertelement <vscale x 8 x i16> poison, i16 [[OP2:%.*]], i64 0 // CHECK-NEXT: [[DOTSPLAT:%.*]] = shufflevector <vscale x 8 x i16> [[DOTSPLATINSERT]], <vscale x 8 x i16> poison, <vscale x 8 x i32> zeroinitializer -// CHECK-NEXT: [[TMP1:%.*]] = tail call <vscale x 8 x i16> @llvm.aarch64.sve.uhsub.nxv8i16(<vscale x 8 x i1> [[TMP0]], <vscale x 8 x i16> [[OP1:%.*]], <vscale x 8 x i16> [[DOTSPLAT]]) +// CHECK-NEXT: [[TMP1:%.*]] = tail call <vscale x 8 x i16> @llvm.aarch64.sve.uhsub.u.nxv8i16(<vscale x 8 x i1> [[TMP0]], <vscale x 8 x i16> [[OP1:%.*]], <vscale x 8 x i16> [[DOTSPLAT]]) // CHECK-NEXT: ret <vscale x 8 x i16> [[TMP1]] // // CPP-CHECK-LABEL: @_Z19test_svhsub_n_u16_xu10__SVBool_tu12__SVUint16_tt( @@ -885,7 +885,7 @@ svuint8_t test_svhsub_n_u8_x(svbool_t pg, svuint8_t op1, uint8_t op2) // CPP-CHECK-NEXT: [[TMP0:%.*]] = tail call <vscale x 8 x i1> @llvm.aarch64.sve.convert.from.svbool.nxv8i1(<vscale x 16 x i1> [[PG:%.*]]) // CPP-CHECK-NEXT: [[DOTSPLATINSERT:%.*]] = insertelement <vscale x 8 x i16> poison, i16 [[OP2:%.*]], i64 0 // CPP-CHECK-NEXT: [[DOTSPLAT:%.*]] = shufflevector <vscale x 8 x i16> [[DOTSPLATINSERT]], <vscale x 8 x i16> poison, <vscale x 8 x i32> zeroinitializer -// CPP-CHECK-NEXT: [[TMP1:%.*]] = tail call <vscale x 8 x i16> @llvm.aarch64.sve.uhsub.nxv8i16(<vscale x 8 x i1> [[TMP0]], <vscale x 8 x i16> [[OP1:%.*]], <vscale x 8 x i16> [[DOTSPLAT]]) +// CPP-CHECK-NEXT: [[TMP1:%.*]] = tail call <vscale x 8 x i16> @llvm.aarch64.sve.uhsub.u.nxv8i16(<vscale x 8 x i1> [[TMP0]], <vscale x 8 x i16> [[OP1:%.*]], <vscale x 8 x i16> [[DOTSPLAT]]) // CPP-CHECK-NEXT: ret <vscale x 8 x i16> [[TMP1]] // svuint16_t test_svhsub_n_u16_x(svbool_t pg, svuint16_t op1, uint16_t op2) @@ -898,7 +898,7 @@ svuint16_t test_svhsub_n_u16_x(svbool_t pg, svuint16_t op1, uint16_t op2) // CHECK-NEXT: [[TMP0:%.*]] = tail call <vscale x 4 x i1> @llvm.aarch64.sve.convert.from.svbool.nxv4i1(<vscale x 16 x i1> [[PG:%.*]]) // CHECK-NEXT: [[DOTSPLATINSERT:%.*]] = insertelement <vscale x 4 x i32> poison, i32 [[OP2:%.*]], i64 0 // CHECK-NEXT: [[DOTSPLAT:%.*]] = shufflevector <vscale x 4 x i32> [[DOTSPLATINSERT]], <vscale x 4 x i32> poison, <vscale x 4 x i32> zeroinitializer -// CHECK-NEXT: [[TMP1:%.*]] = tail call <vscale x 4 x i32> @llvm.aarch64.sve.uhsub.nxv4i32(<vscale x 4 x i1> [[TMP0]], <vscale x 4 x i32> [[OP1:%.*]], <vscale x 4 x i32> [[DOTSPLAT]]) +// CHECK-NEXT: [[TMP1:%.*]] = tail call <vscale x 4 x i32> @llvm.aarch64.sve.uhsub.u.nxv4i32(<vscale x 4 x i1> [[TMP0]], <vscale x 4 x i32> [[OP1:%.*]], <vscale x 4 x i32> [[DOTSPLAT]]) // CHECK-NEXT: ret <vscale x 4 x i32> [[TMP1]] // // CPP-CHECK-LABEL: @_Z19test_svhsub_n_u32_xu10__SVBool_tu12__SVUint32_tj( @@ -906,7 +906,7 @@ svuint16_t test_svhsub_n_u16_x(svbool_t pg, svuint16_t op1, uint16_t op2) // CPP-CHECK-NEXT: [[TMP0:%.*]] = tail call <vscale x 4 x i1> @llvm.aarch64.sve.convert.from.svbool.nxv4i1(<vscale x 16 x i1> [[PG:%.*]]) // CPP-CHECK-NEXT: [[DOTSPLATINSERT:%.*]] = insertelement <vscale x 4 x i32> poison, i32 [[OP2:%.*]], i64 0 // CPP-CHECK-NEXT: [[DOTSPLAT:%.*]] = shufflevector <vscale x 4 x i32> [[DOTSPLATINSERT]], <vscale x 4 x i32> poison, <vscale x 4 x i32> zeroinitializer -// CPP-CHECK-NEXT: [[TMP1:%.*]] = tail call <vscale x 4 x i32> @llvm.aarch64.sve.uhsub.nxv4i32(<vscale x 4 x i1> [[TMP0]], <vscale x 4 x i32> [[OP1:%.*]], <vscale x 4 x i32> [[DOTSPLAT]]) +// CPP-CHECK-NEXT: [[TMP1:%.*]] = tail call <vscale x 4 x i32> @llvm.aarch64.sve.uhsub.u.nxv4i32(<vscale x 4 x i1> [[TMP0]], <vscale x 4 x i32> [[OP1:%.*]], <vscale x 4 x i32> [[DOTSPLAT]]) // CPP-CHECK-NEXT: ret <vscale x 4 x i32> [[TMP1]] // svuint32_t test_svhsub_n_u32_x(svbool_t pg, svuint32_t op1, uint32_t op2) @@ -919,7 +919,7 @@ svuint32_t test_svhsub_n_u32_x(svbool_t pg, svuint32_t op1, uint32_t op2) // CHECK-NEXT: [[TMP0:%.*]] = tail call <vscale x 2 x i1> @llvm.aarch64.sve.convert.from.svbool.nxv2i1(<vscale x 16 x i1> [[PG:%.*]]) // CHECK-NEXT: [[DOTSPLATINSERT:%.*]] = insertelement <vscale x 2 x i64> poison, i64 [[OP2:%.*]], i64 0 // CHECK-NEXT: [[DOTSPLAT:%.*]] = shufflevector <vscale x 2 x i64> [[DOTSPLATINSERT]], <vscale x 2 x i64> poison, <vscale x 2 x i32> zeroinitializer -// CHECK-NEXT: [[TMP1:%.*]] = tail call <vscale x 2 x i64> @llvm.aarch64.sve.uhsub.nxv2i64(<vscale x 2 x i1> [[TMP0]], <vscale x 2 x i64> [[OP1:%.*]], <vscale x 2 x i64> [[DOTSPLAT]]) +// CHECK-NEXT: [[TMP1:%.*]] = tail call <vscale x 2 x i64> @llvm.aarch64.sve.uhsub.u.nxv2i64(<vscale x 2 x i1> [[TMP0]], <vscale x 2 x i64> [[OP1:%.*]], <vscale x 2 x i64> [[DOTSPLAT]]) // CHECK-NEXT: ret <vscale x 2 x i64> [[TMP1]] // // CPP-CHECK-LABEL: @_Z19test_svhsub_n_u64_xu10__SVBool_tu12__SVUint64_tm( @@ -927,7 +927,7 @@ svuint32_t test_svhsub_n_u32_x(svbool_t pg, svuint32_t op1, uint32_t op2) // CPP-CHECK-NEXT: [[TMP0:%.*]] = tail call <vscale x 2 x i1> @llvm.aarch64.sve.convert.from.svbool.nxv2i1(<vscale x 16 x i1> [[PG:%.*]]) // CPP-CHECK-NEXT: [[DOTSPLATINSERT:%.*]] = insertelement <vscale x 2 x i64> poison, i64 [[OP2:%.*]], i64 0 // CPP-CHECK-NEXT: [[DOTSPLAT:%.*]] = shufflevector <vscale x 2 x i64> [[DOTSPLATINSERT]], <vscale x 2 x i64> poison, <vscale x 2 x i32> zeroinitializer -// CPP-CHECK-NEXT: [[TMP1:%.*]] = tail call <vscale x 2 x i64> @llvm.aarch64.sve.uhsub.nxv2i64(<vscale x 2 x i1> [[TMP0]], <vscale x 2 x i64> [[OP1:%.*]], <vscale x 2 x i64> [[DOTSPLAT]]) +// CPP-CHECK-NEXT: [[TMP1:%.*]] = tail call <vscale x 2 x i64> @llvm.aarch64.sve.uhsub.u.nxv2i64(<vscale x 2 x i1> [[TMP0]], <vscale x 2 x i64> [[OP1:%.*]], <vscale x 2 x i64> [[DOTSPLAT]]) // CPP-CHECK-NEXT: ret <vscale x 2 x i64> [[TMP1]] // svuint64_t test_svhsub_n_u64_x(svbool_t pg, svuint64_t op1, uint64_t op2) diff --git a/clang/test/CodeGen/AArch64/sve2-intrinsics/acle_sve2_hsubr.c b/clang/test/CodeGen/AArch64/sve2-intrinsics/acle_sve2_hsubr.c index 236ee3b5320e1..0f4fd0d84aff4 100644 --- a/clang/test/CodeGen/AArch64/sve2-intrinsics/acle_sve2_hsubr.c +++ b/clang/test/CodeGen/AArch64/sve2-intrinsics/acle_sve2_hsubr.c @@ -297,12 +297,12 @@ svuint64_t test_svhsubr_u64_m(svbool_t pg, svuint64_t op1, svuint64_t op2) // CHECK-LABEL: @test_svhsubr_s8_x( // CHECK-NEXT: entry: -// CHECK-NEXT: [[TMP0:%.*]] = tail call <vscale x 16 x i8> @llvm.aarch64.sve.shsubr.nxv16i8(<vscale x 16 x i1> [[PG:%.*]], <vscale x 16 x i8> [[OP1:%.*]], <vscale x 16 x i8> [[OP2:%.*]]) +// CHECK-NEXT: [[TMP0:%.*]] = tail call <vscale x 16 x i8> @llvm.aarch64.sve.shsub.u.nxv16i8(<vscale x 16 x i1> [[PG:%.*]], <vscale x 16 x i8> [[OP2:%.*]], <vscale x 16 x i8> [[OP1:%.*]]) // CHECK-NEXT: ret <vscale x 16 x i8> [[TMP0]] // // CPP-CHECK-LABEL: @_Z17test_svhsubr_s8_xu10__SVBool_tu10__SVInt8_tS0_( // CPP-CHECK-NEXT: entry: -// CPP-CHECK-NEXT: [[TMP0:%.*]] = tail call <vscale x 16 x i8> @llvm.aarch64.sve.shsubr.nxv16i8(<vscale x 16 x i1> [[PG:%.*]], <vscale x 16 x i8> [[OP1:%.*]], <vscale x 16 x i8> [[OP2:%.*]]) +// CPP-CHECK-NEXT: [[TMP0:%.*]] = tail call <vscale x 16 x i8> @llvm.aarch64.sve.shsub.u.nxv16i8(<vscale x 16 x i1> [[PG:%.*]], <vscale x 16 x i8> [[OP2:%.*]], <vscale x 16 x i8> [[OP1:%.*]]) // CPP-CHECK-NEXT: ret <vscale x 16 x i8> [[TMP0]] // svint8_t test_svhsubr_s8_x(svbool_t pg, svint8_t op1, svint8_t op2) @@ -313,13 +313,13 @@ svint8_t test_svhsubr_s8_x(svbool_t pg, svint8_t op1, svint8_t op2) // CHECK-LABEL: @test_svhsubr_s16_x( // CHECK-NEXT: entry: // CHECK-NEXT: [[TMP0:%.*]] = tail call <vscale x 8 x i1> @llvm.aarch64.sve.convert.from.svbool.nxv8i1(<vscale x 16 x i1> [[PG:%.*]]) -// CHECK-NEXT: [[TMP1:%.*]] = tail call <vscale x 8 x i16> @llvm.aarch64.sve.shsubr.nxv8i16(<vscale x 8 x i1> [[TMP0]], <vscale x 8 x i16> [[OP1:%.*]], <vscale x 8 x i16> [[OP2:%.*]]) +// CHECK-NEXT: [[TMP1:%.*]] = tail call <vscale x 8 x i16> @llvm.aarch64.sve.shsub.u.nxv8i16(<vscale x 8 x i1> [[TMP0]], <vscale x 8 x i16> [[OP2:%.*]], <vscale x 8 x i16> [[OP1:%.*]]) // CHECK-NEXT: ret <vscale x 8 x i16> [[TMP1]] // // CPP-CHECK-LABEL: @_Z18test_svhsubr_s16_xu10__SVBool_tu11__SVInt16_tS0_( // CPP-CHECK-NEXT: entry: // CPP-CHECK-NEXT: [[TMP0:%.*]] = tail call <vscale x 8 x i1> @llvm.aarch64.sve.convert.from.svbool.nxv8i1(<vscale x 16 x i1> [[PG:%.*]]) -// CPP-CHECK-NEXT: [[TMP1:%.*]] = tail call <vscale x 8 x i16> @llvm.aarch64.sve.shsubr.nxv8i16(<vscale x 8 x i1> [[TMP0]], <vscale x 8 x i16> [[OP1:%.*]], <vscale x 8 x i16> [[OP2:%.*]]) +// CPP-CHECK-NEXT: [[TMP1:%.*]] = tail call <vscale x 8 x i16> @llvm.aarch64.sve.shsub.u.nxv8i16(<vscale x 8 x i1> [[TMP0]], <vscale x 8 x i16> [[OP2:%.*]], <vscale x 8 x i16> [[OP1:%.*]]) // CPP-CHECK-NEXT: ret <vscale x 8 x i16> [[TMP1]] // svint16_t test_svhsubr_s16_x(svbool_t pg, svint16_t op1, svint16_t op2) @@ -330,13 +330,13 @@ svint16_t test_svhsubr_s16_x(svbool_t pg, svint16_t op1, svint16_t op2) // CHECK-LABEL: @test_svhsubr_s32_x( // CHECK-NEXT: entry: // CHECK-NEXT: [[TMP0:%.*]] = tail call <vscale x 4 x i1> @llvm.aarch64.sve.convert.from.svbool.nxv4i1(<vscale x 16 x i1> [[PG:%.*]]) -// CHECK-NEXT: [[TMP1:%.*]] = tail call <vscale x 4 x i32> @llvm.aarch64.sve.shsubr.nxv4i32(<vscale x 4 x i1> [[TMP0]], <vscale x 4 x i32> [[OP1:%.*]], <vscale x 4 x i32> [[OP2:%.*]]) +// CHECK-NEXT: [[TMP1:%.*]] = tail call <vscale x 4 x i32> @llvm.aarch64.sve.shsub.u.nxv4i32(<vscale x 4 x i1> [[TMP0]], <vscale x 4 x i32> [[OP2:%.*]], <vscale x 4 x i32> [[OP1:%.*]]) // CHECK-NEXT: ret <vscale x 4 x i32> [[TMP1]] // // CPP-CHECK-LABEL: @_Z18test_svhsubr_s32_xu10__SVBool_tu11__SVInt32_tS0_( // CPP-CHECK-NEXT: entry: // CPP-CHECK-NEXT: [[TMP0:%.*]] = tail call <vscale x 4 x i1> @llvm.aarch64.sve.convert.from.svbool.nxv4i1(<vscale x 16 x i1> [[PG:%.*]]) -// CPP-CHECK-NEXT: [[TMP1:%.*]] = tail call <vscale x 4 x i32> @llvm.aarch64.sve.shsubr.nxv4i32(<vscale x 4 x i1> [[TMP0]], <vscale x 4 x i32> [[OP1:%.*]], <vscale x 4 x i32> [[OP2:%.*]]) +// CPP-CHECK-NEXT: [[TMP1:%.*]] = tail call <vscale x 4 x i32> @llvm.aarch64.sve.shsub.u.nxv4i32(<vscale x 4 x i1> [[TMP0]], <vscale x 4 x i32> [[OP2:%.*]], <vscale x 4 x i32> [[OP1:%.*]]) // CPP-CHECK-NEXT: ret <vscale x 4 x i32> [[TMP1]] // svint32_t test_svhsubr_s32_x(svbool_t pg, svint32_t op1, svint32_t op2) @@ -347,13 +347,13 @@ svint32_t test_svhsubr_s32_x(svbool_t pg, svint32_t op1, svint32_t op2) // CHECK-LABEL: @test_svhsubr_s64_x( // CHECK-NEXT: entry: // CHECK-NEXT: [[TMP0:%.*]] = tail call <vscale x 2 x i1> @llvm.aarch64.sve.convert.from.svbool.nxv2i1(<vscale x 16 x i1> [[PG:%.*]]) -// CHECK-NEXT: [[TMP1:%.*]] = tail call <vscale x 2 x i64> @llvm.aarch64.sve.shsubr.nxv2i64(<vscale x 2 x i1> [[TMP0]], <vscale x 2 x i64> [[OP1:%.*]], <vscale x 2 x i64> [[OP2:%.*]]) +// CHECK-NEXT: [[TMP1:%.*]] = tail call <vscale x 2 x i64> @llvm.aarch64.sve.shsub.u.nxv2i64(<vscale x 2 x i1> [[TMP0]], <vscale x 2 x i64> [[OP2:%.*]], <vscale x 2 x i64> [[OP1:%.*]]) // CHECK-NEXT: ret <vscale x 2 x i64> [[TMP1]] // // CPP-CHECK-LABEL: @_Z18test_svhsubr_s64_xu10__SVBool_tu11__SVInt64_tS0_( // CPP-CHECK-NEXT: entry: // CPP-CHECK-NEXT: [[TMP0:%.*]] = tail call <vscale x 2 x i1> @llvm.aarch64.sve.convert.from.svbool.nxv2i1(<vscale x 16 x i1> [[PG:%.*]]) -// CPP-CHECK-NEXT: [[TMP1:%.*]] = tail call <vscale x 2 x i64> @llvm.aarch64.sve.shsubr.nxv2i64(<vscale x 2 x i1> [[TMP0]], <vscale x 2 x i64> [[OP1:%.*]], <vscale x 2 x i64> [[OP2:%.*]]) +// CPP-CHECK-NEXT: [[TMP1:%.*]] = tail call <vscale x 2 x i64> @llvm.aarch64.sve.shsub.u.nxv2i64(<vscale x 2 x i1> [[TMP0]], <vscale x 2 x i64> [[OP2:%.*]], <vscale x 2 x i64> [[OP1:%.*]]) // CPP-CHECK-NEXT: ret <vscale x 2 x i64> [[TMP1]] // svint64_t test_svhsubr_s64_x(svbool_t pg, svint64_t op1, svint64_t op2) @@ -363,12 +363,12 @@ svint64_t test_svhsubr_s64_x(svbool_t pg, svint64_t op1, svint64_t op2) // CHECK-LABEL: @test_svhsubr_u8_x( // CHECK-NEXT: entry: -// CHECK-NEXT: [[TMP0:%.*]] = tail call <vscale x 16 x i8> @llvm.aarch64.sve.uhsubr.nxv16i8(<vscale x 16 x i1> [[PG:%.*]], <vscale x 16 x i8> [[OP1:%.*]], <vscale x 16 x i8> [[OP2:%.*]]) +// CHECK-NEXT: [[TMP0:%.*]] = tail call <vscale x 16 x i8> @llvm.aarch64.sve.uhsub.u.nxv16i8(<vscale x 16 x i1> [[PG:%.*]], <vscale x 16 x i8> [[OP2:%.*]], <vscale x 16 x i8> [[OP1:%.*]]) // CHECK-NEXT: ret <vscale x 16 x i8> [[TMP0]] // // CPP-CHECK-LABEL: @_Z17test_svhsubr_u8_xu10__SVBool_tu11__SVUint8_tS0_( // CPP-CHECK-NEXT: entry: -// CPP-CHECK-NEXT: [[TMP0:%.*]] = tail call <vscale x 16 x i8> @llvm.aarch64.sve.uhsubr.nxv16i8(<vscale x 16 x i1> [[PG:%.*]], <vscale x 16 x i8> [[OP1:%.*]], <vscale x 16 x i8> [[OP2:%.*]]) +// CPP-CHECK-NEXT: [[TMP0:%.*]] = tail call <vscale x 16 x i8> @llvm.aarch64.sve.uhsub.u.nxv16i8(<vscale x 16 x i1> [[PG:%.*]], <vscale x 16 x i8> [[OP2:%.*]], <vscale x 16 x i8> [[OP1:%.*]]) // CPP-CHECK-NEXT: ret <vscale x 16 x i8> [[TMP0]] // svuint8_t test_svhsubr_u8_x(svbool_t pg, svuint8_t op1, svuint8_t op2) @@ -379,13 +379,13 @@ svuint8_t test_svhsubr_u8_x(svbool_t pg, svuint8_t op1, svuint8_t op2) // CHECK-LABEL: @test_svhsubr_u16_x( // CHECK-NEXT: entry: // CHECK-NEXT: [[TMP0:%.*]] = tail call <vscale x 8 x i1> @llvm.aarch64.sve.convert.from.svbool.nxv8i1(<vscale x 16 x i1> [[PG:%.*]]) -// CHECK-NEXT: [[TMP1:%.*]] = tail call <vscale x 8 x i16> @llvm.aarch64.sve.uhsubr.nxv8i16(<vscale x 8 x i1> [[TMP0]], <vscale x 8 x i16> [[OP1:%.*]], <vscale x 8 x i16> [[OP2:%.*]]) +// CHECK-NEXT: [[TMP1:%.*]] = tail call <vscale x 8 x i16> @llvm.aarch64.sve.uhsub.u.nxv8i16(<vscale x 8 x i1> [[TMP0]], <vscale x 8 x i16> [[OP2:%.*]], <vscale x 8 x i16> [[OP1:%.*]]) // CHECK-NEXT: ret <vscale x 8 x i16> [[TMP1]] // // CPP-CHECK-LABEL: @_Z18test_svhsubr_u16_xu10__SVBool_tu12__SVUint16_tS0_( // CPP-CHECK-NEXT: entry: // CPP-CHECK-NEXT: [[TMP0:%.*]] = tail call <vscale x 8 x i1> @llvm.aarch64.sve.convert.from.svbool.nxv8i1(<vscale x 16 x i1> [[PG:%.*]]) -// CPP-CHECK-NEXT: [[TMP1:%.*]] = tail call <vscale x 8 x i16> @llvm.aarch64.sve.uhsubr.nxv8i16(<vscale x 8 x i1> [[TMP0]], <vscale x 8 x i16> [[OP1:%.*]], <vscale x 8 x i16> [[OP2:%.*]]) +// CPP-CHECK-NEXT: [[TMP1:%.*]] = tail call <vscale x 8 x i16> @llvm.aarch64.sve.uhsub.u.nxv8i16(<vscale x 8 x i1> [[TMP0]], <vscale x 8 x i16> [[OP2:%.*]], <vscale x 8 x i16> [[OP1:%.*]]) // CPP-CHECK-NEXT: ret <vscale x 8 x i16> [[TMP1]] // svuint16_t test_svhsubr_u16_x(svbool_t pg, svuint16_t op1, svuint16_t op2) @@ -396,13 +396,13 @@ svuint16_t test_svhsubr_u16_x(svbool_t pg, svuint16_t op1, svuint16_t op2) // CHECK-LABEL: @test_svhsubr_u32_x( // CHECK-NEXT: entry: // CHECK-NEXT: [[TMP0:%.*]] = tail call <vscale x 4 x i1> @llvm.aarch64.sve.convert.from.svbool.nxv4i1(<vscale x 16 x i1> [[PG:%.*]]) -// CHECK-NEXT: [[TMP1:%.*]] = tail call <vscale x 4 x i32> @llvm.aarch64.sve.uhsubr.nxv4i32(<vscale x 4 x i1> [[TMP0]], <vscale x 4 x i32> [[OP1:%.*]], <vscale x 4 x i32> [[OP2:%.*]]) +// CHECK-NEXT: [[TMP1:%.*]] = tail call <vscale x 4 x i32> @llvm.aarch64.sve.uhsub.u.nxv4i32(<vscale x 4 x i1> [[TMP0]], <vscale x 4 x i32> [[OP2:%.*]], <vscale x 4 x i32> [[OP1:%.*]]) // CHECK-NEXT: ret <vscale x 4 x i32> [[TMP1]] // // CPP-CHECK-LABEL: @_Z18test_svhsubr_u32_xu10__SVBool_tu12__SVUint32_tS0_( // CPP-CHECK-NEXT: entry: // CPP-CHECK-NEXT: [[TMP0:%.*]] = tail call <vscale x 4 x i1> @llvm.aarch64.sve.convert.from.svbool.nxv4i1(<vscale x 16 x i1> [[PG:%.*]]) -// CPP-CHECK-NEXT: [[TMP1:%.*]] = tail call <vscale x 4 x i32> @llvm.aarch64.sve.uhsubr.nxv4i32(<vscale x 4 x i1> [[TMP0]], <vscale x 4 x i32> [[OP1:%.*]], <vscale x 4 x i32> [[OP2:%.*]]) +// CPP-CHECK-NEXT: [[TMP1:%.*]] = tail call <vscale x 4 x i32> @llvm.aarch64.sve.uhsub.u.nxv4i32(<vscale x 4 x i1> [[TMP0]], <vscale x 4 x i32> [[OP2:%.*]], <vscale x 4 x i32> [[OP1:%.*]]) // CPP-CHECK-NEXT: ret <vscale x 4 x i32> [[TMP1]] // svuint32_t test_svhsubr_u32_x(svbool_t pg, svuint32_t op1, svuint32_t op2) @@ -413,13 +413,13 @@ svuint32_t test_svhsubr_u32_x(svbool_t pg, svuint32_t op1, svuint32_t op2) // CHECK-LABEL: @test_svhsubr_u64_x( // CHECK-NEXT: entry: // CHECK-NEXT: [[TMP0:%.*]] = tail call <vscale x 2 x i1> @llvm.aarch64.sve.convert.from.svbool.nxv2i1(<vscale x 16 x i1> [[PG:%.*]]) -// CHECK-NEXT: [[TMP1:%.*]] = tail call <vscale x 2 x i64> @llvm.aarch64.sve.uhsubr.nxv2i64(<vscale x 2 x i1> [[TMP0]], <vscale x 2 x i64> [[OP1:%.*]], <vscale x 2 x i64> [[OP2:%.*]]) +// CHECK-NEXT: [[TMP1:%.*]] = tail call <vscale x 2 x i64> @llvm.aarch64.sve.uhsub.u.nxv2i64(<vscale x 2 x i1> [[TMP0]], <vscale x 2 x i64> [[OP2:%.*]], <vscale x 2 x i64> [[OP1:%.*]]) // CHECK-NEXT: ret <vscale x 2 x i64> [[TMP1]] // // CPP-CHECK-LABEL: @_Z18test_svhsubr_u64_xu10__SVBool_tu12__SVUint64_tS0_( // CPP-CHECK-NEXT: entry: // CPP-CHECK-NEXT: [[TMP0:%.*]] = tail call <vscale x 2 x i1> @llvm.aarch64.sve.convert.from.svbool.nxv2i1(<vscale x 16 x i1> [[PG:%.*]]) -// CPP-CHECK-NEXT: [[TMP1:%.*]] = tail call <vscale x 2 x i64> @llvm.aarch64.sve.uhsubr.nxv2i64(<vscale x 2 x i1> [[TMP0]], <vscale x 2 x i64> [[OP1:%.*]], <vscale x 2 x i64> [[OP2:%.*]]) +// CPP-CHECK-NEXT: [[TMP1:%.*]] = tail call <vscale x 2 x i64> @llvm.aarch64.sve.uhsub.u.nxv2i64(<vscale x 2 x i1> [[TMP0]], <vscale x 2 x i64> [[OP2:%.*]], <vscale x 2 x i64> [[OP1:%.*]]) // CPP-CHECK-NEXT: ret <vscale x 2 x i64> [[TMP1]] // svuint64_t test_svhsubr_u64_x(svbool_t pg, svuint64_t op1, svuint64_t op2) @@ -775,14 +775,14 @@ svuint64_t test_svhsubr_n_u64_m(svbool_t pg, svuint64_t op1, uint64_t op2) // CHECK-NEXT: entry: // CHECK-NEXT: [[DOTSPLATINSERT:%.*]] = insertelement <vscale x 16 x i8> poison, i8 [[OP2:%.*]], i64 0 // CHECK-NEXT: [[DOTSPLAT:%.*]] = shufflevector <vscale x 16 x i8> [[DOTSPLATINSERT]], <vscale x 16 x i8> poison, <vscale x 16 x i32> zeroinitializer -// CHECK-NEXT: [[TMP0:%.*]] = tail call <vscale x 16 x i8> @llvm.aarch64.sve.shsubr.nxv16i8(<vscale x 16 x i1> [[PG:%.*]], <vscale x 16 x i8> [[OP1:%.*]], <vscale x 16 x i8> [[DOTSPLAT]]) +// CHECK-NEXT: [[TMP0:%.*]] = tail call <vscale x 16 x i8> @llvm.aarch64.sve.shsub.u.nxv16i8(<vscale x 16 x i1> [[PG:%.*]], <vscale x 16 x i8> [[DOTSPLAT]], <vscale x 16 x i8> [[OP1:%.*]]) // CHECK-NEXT: ret <vscale x 16 x i8> [[TMP0]] // // CPP-CHECK-LABEL: @_Z19test_svhsubr_n_s8_xu10__SVBool_tu10__SVInt8_ta( // CPP-CHECK-NEXT: entry: // CPP-CHECK-NEXT: [[DOTSPLATINSERT:%.*]] = insertelement <vscale x 16 x i8> poison, i8 [[OP2:%.*]], i64 0 // CPP-CHECK-NEXT: [[DOTSPLAT:%.*]] = shufflevector <vscale x 16 x i8> [[DOTSPLATINSERT]], <vscale x 16 x i8> poison, <vscale x 16 x i32> zeroinitializer -// CPP-CHECK-NEXT: [[TMP0:%.*]] = tail call <vscale x 16 x i8> @llvm.aarch64.sve.shsubr.nxv16i8(<vscale x 16 x i1> [[PG:%.*]], <vscale x 16 x i8> [[OP1:%.*]], <vscale x 16 x i8> [[DOTSPLAT]]) +// CPP-CHECK-NEXT: [[TMP0:%.*]] = tail call <vscale x 16 x i8> @llvm.aarch64.sve.shsub.u.nxv16i8(<vscale x 16 x i1> [[PG:%.*]], <vscale x 16 x i8> [[DOTSPLAT]], <vscale x 16 x i8> [[OP1:%.*]]) // CPP-CHECK-NEXT: ret <vscale x 16 x i8> [[TMP0]] // svint8_t test_svhsubr_n_s8_x(svbool_t pg, svint8_t op1, int8_t op2) @@ -795,7 +795,7 @@ svint8_t test_svhsubr_n_s8_x(svbool_t pg, svint8_t op1, int8_t op2) // CHECK-NEXT: [[TMP0:%.*]] = tail call <vscale x 8 x i1> @llvm.aarch64.sve.convert.from.svbool.nxv8i1(<vscale x 16 x i1> [[PG:%.*]]) // CHECK-NEXT: [[DOTSPLATINSERT:%.*]] = insertelement <vscale x 8 x i16> poison, i16 [[OP2:%.*]], i64 0 // CHECK-NEXT: [[DOTSPLAT:%.*]] = shufflevector <vscale x 8 x i16> [[DOTSPLATINSERT]], <vscale x 8 x i16> poison, <vscale x 8 x i32> zeroinitializer -// CHECK-NEXT: [[TMP1:%.*]] = tail call <vscale x 8 x i16> @llvm.aarch64.sve.shsubr.nxv8i16(<vscale x 8 x i1> [[TMP0]], <vscale x 8 x i16> [[OP1:%.*]], <vscale x 8 x i16> [[DOTSPLAT]]) +// CHECK-NEXT: [[TMP1:%.*]] = tail call <vscale x 8 x i16> @llvm.aarch64.sve.shsub.u.nxv8i16(<vscale x 8 x i1> [[TMP0]], <vscale x 8 x i16> [[DOTSPLAT]], <vscale x 8 x i16> [[OP1:%.*]]) // CHECK-NEXT: ret <vscale x 8 x i16> [[TMP1]] // // CPP-CHECK-LABEL: @_Z20test_svhsubr_n_s16_xu10__SVBool_tu11__SVInt16_ts( @@ -803,7 +803,7 @@ svint8_t test_svhsubr_n_s8_x(svbool_t pg, svint8_t op1, int8_t op2) // CPP-CHECK-NEXT: [[TMP0:%.*]] = tail call <vscale x 8 x i1> @llvm.aarch64.sve.convert.from.svbool.nxv8i1(<vscale x 16 x i1> [[PG:%.*]]) // CPP-CHECK-NEXT: [[DOTSPLATINSERT:%.*]] = insertelement <vscale x 8 x i16> poison, i16 [[OP2:%.*]], i64 0 // CPP-CHECK-NEXT: [[DOTSPLAT:%.*]] = shufflevector <vscale x 8 x i16> [[DOTSPLATINSERT]], <vscale x 8 x i16> poison, <vscale x 8 x i32> zeroinitializer -// CPP-CHECK-NEXT: [[TMP1:%.*]] = tail call <vscale x 8 x i16> @llvm.aarch64.sve.shsubr.nxv8i16(<vscale x 8 x i1> [[TMP0]], <vscale x 8 x i16> [[OP1:%.*]], <vscale x 8 x i16> [[DOTSPLAT]]) +// CPP-CHECK-NEXT: [[TMP1:%.*]] = tail call <vscale x 8 x i16> @llvm.aarch64.sve.shsub.u.nxv8i16(<vscale x 8 x i1> [[TMP0]], <vscale x 8 x i16> [[DOTSPLAT]], <vscale x 8 x i16> [[OP1:%.*]]) // CPP-CHECK-NEXT: ret <vscale x 8 x i16> [[TMP1]] // svint16_t test_svhsubr_n_s16_x(svbool_t pg, svint16_t op1, int16_t op2) @@ -816,7 +816,7 @@ svint16_t test_svhsubr_n_s16_x(svbool_t pg, svint16_t op1, int16_t op2) // CHECK-NEXT: [[TMP0:%.*]] = tail call <vscale x 4 x i1> @llvm.aarch64.sve.convert.from.svbool.nxv4i1(<vscale x 16 x i1> [[PG:%.*]]) // CHECK-NEXT: [[DOTSPLATINSERT:%.*]] = insertelement <vscale x 4 x i32> poison, i32 [[OP2:%.*]], i64 0 // CHECK-NEXT: [[DOTSPLAT:%.*]] = shufflevector <vscale x 4 x i32> [[DOTSPLATINSERT]], <vscale x 4 x i32> poison, <vscale x 4 x i32> zeroinitializer -// CHECK-NEXT: [[TMP1:%.*]] = tail call <vscale x 4 x i32> @llvm.aarch64.sve.shsubr.nxv4i32(<vscale x 4 x i1> [[TMP0]], <vscale x 4 x i32> [[OP1:%.*]], <vscale x 4 x i32> [[DOTSPLAT]]) +// CHECK-NEXT: [[TMP1:%.*]] = tail call <vscale x 4 x i32> @llvm.aarch64.sve.shsub.u.nxv4i32(<vscale x 4 x i1> [[TMP0]], <vscale x 4 x i32> [[DOTSPLAT]], <vscale x 4 x i32> [[OP1:%.*]]) // CHECK-NEXT: ret <vscale x 4 x i32> [[TMP1]] // // CPP-CHECK-LABEL: @_Z20test_svhsubr_n_s32_xu10__SVBool_tu11__SVInt32_ti( @@ -824,7 +824,7 @@ svint16_t test_svhsubr_n_s16_x(svbool_t pg, svint16_t op1, int16_t op2) // CPP-CHECK-NEXT: [[TMP0:%.*]] = tail call <vscale x 4 x i1> @llvm.aarch64.sve.convert.from.svbool.nxv4i1(<vscale x 16 x i1> [[PG:%.*]]) // CPP-CHECK-NEXT: [[DOTSPLATINSERT:%.*]] = insertelement <vscale x 4 x i32> poison, i32 [[OP2:%.*]], i64 0 // CPP-CHECK-NEXT: [[DOTSPLAT:%.*]] = shufflevector <vscale x 4 x i32> [[DOTSPLATINSERT]], <vscale x 4 x i32> poison, <vscale x 4 x i32> zeroinitializer -// CPP-CHECK-NEXT: [[TMP1:%.*]] = tail call <vscale x 4 x i32> @llvm.aarch64.sve.shsubr.nxv4i32(<vscale x 4 x i1> [[TMP0]], <vscale x 4 x i32> [[OP1:%.*]], <vscale x 4 x i32> [[DOTSPLAT]]) +// CPP-CHECK-NEXT: [[TMP1:%.*]] = tail call <vscale x 4 x i32> @llvm.aarch64.sve.shsub.u.nxv4i32(<vscale x 4 x i1> [[TMP0]], <vscale x 4 x i32> [[DOTSPLAT]], <vscale x 4 x i32> [[OP1:%.*]]) // CPP-CHECK-NEXT: ret <vscale x 4 x i32> [[TMP1]] // svint32_t test_svhsubr_n_s32_x(svbool_t pg, svint32_t op1, int32_t op2) @@ -837,7 +837,7 @@ svint32_t test_svhsubr_n_s32_x(svbool_t pg, svint32_t op1, int32_t op2) // CHECK-NEXT: [[TMP0:%.*]] = tail call <vscale x 2 x i1> @llvm.aarch64.sve.convert.from.svbool.nxv2i1(<vscale x 16 x i1> [[PG:%.*]]) // CHECK-NEXT: [[DOTSPLATINSERT:%.*]] = insertelement <vscale x 2 x i64> poison, i64 [[OP2:%.*]], i64 0 // CHECK-NEXT: [[DOTSPLAT:%.*]] = shufflevector <vscale x 2 x i64> [[DOTSPLATINSERT]], <vscale x 2 x i64> poison, <vscale x 2 x i32> zeroinitializer -// CHECK-NEXT: [[TMP1:%.*]] = tail call <vscale x 2 x i64> @llvm.aarch64.sve.shsubr.nxv2i64(<vscale x 2 x i1> [[TMP0]], <vscale x 2 x i64> [[OP1:%.*]], <vscale x 2 x i64> [[DOTSPLAT]]) +// CHECK-NEXT: [[TMP1:%.*]] = tail call <vscale x 2 x i64> @llvm.aarch64.sve.shsub.u.nxv2i64(<vscale x 2 x i1> [[TMP0]], <vscale x 2 x i64> [[DOTSPLAT]], <vscale x 2 x i64> [[OP1:%.*]]) // CHECK-NEXT: ret <vscale x 2 x i64> [[TMP1]] // // CPP-CHECK-LABEL: @_Z20test_svhsubr_n_s64_xu10__SVBool_tu11__SVInt64_tl( @@ -845,7 +845,7 @@ svint32_t test_svhsubr_n_s32_x(svbool_t pg, svint32_t op1, int32_t op2) // CPP-CHECK-NEXT: [[TMP0:%.*]] = tail call <vscale x 2 x i1> @llvm.aarch64.sve.convert.from.svbool.nxv2i1(<vscale x 16 x i1> [[PG:%.*]]) // CPP-CHECK-NEXT: [[DOTSPLATINSERT:%.*]] = insertelement <vscale x 2 x i64> poison, i64 [[OP2:%.*]], i64 0 // CPP-CHECK-NEXT: [[DOTSPLAT:%.*]] = shufflevector <vscale x 2 x i64> [[DOTSPLATINSERT]], <vscale x 2 x i64> poison, <vscale x 2 x i32> zeroinitializer -// CPP-CHECK-NEXT: [[TMP1:%.*]] = tail call <vscale x 2 x i64> @llvm.aarch64.sve.shsubr.nxv2i64(<vscale x 2 x i1> [[TMP0]], <vscale x 2 x i64> [[OP1:%.*]], <vscale x 2 x i64> [[DOTSPLAT]]) +// CPP-CHECK-NEXT: [[TMP1:%.*]] = tail call <vscale x 2 x i64> @llvm.aarch64.sve.shsub.u.nxv2i64(<vscale x 2 x i1> [[TMP0]], <vscale x 2 x i64> [[DOTSPLAT]], <vscale x 2 x i64> [[OP1:%.*]]) // CPP-CHECK-NEXT: ret <vscale x 2 x i64> [[TMP1]] // svint64_t test_svhsubr_n_s64_x(svbool_t pg, svint64_t op1, int64_t op2) @@ -857,14 +857,14 @@ svint64_t test_svhsubr_n_s64_x(svbool_t pg, svint64_t op1, int64_t op2) // CHECK-NEXT: entry: // CHECK-NEXT: [[DOTSPLATINSERT:%.*]] = insertelement <vscale x 16 x i8> poison, i8 [[OP2:%.*]], i64 0 // CHECK-NEXT: [[DOTSPLAT:%.*]] = shufflevector <vscale x 16 x i8> [[DOTSPLATINSERT]], <vscale x 16 x i8> poison, <vscale x 16 x i32> zeroinitializer -// CHECK-NEXT: [[TMP0:%.*]] = tail call <vscale x 16 x i8> @llvm.aarch64.sve.uhsubr.nxv16i8(<vscale x 16 x i1> [[PG:%.*]], <vscale x 16 x i8> [[OP1:%.*]], <vscale x 16 x i8> [[DOTSPLAT]]) +// CHECK-NEXT: [[TMP0:%.*]] = tail call <vscale x 16 x i8> @llvm.aarch64.sve.uhsub.u.nxv16i8(<vscale x 16 x i1> [[PG:%.*]], <vscale x 16 x i8> [[DOTSPLAT]], <vscale x 16 x i8> [[OP1:%.*]]) // CHECK-NEXT: ret <vscale x 16 x i8> [[TMP0]] // // CPP-CHECK-LABEL: @_Z19test_svhsubr_n_u8_xu10__SVBool_tu11__SVUint8_th( // CPP-CHECK-NEXT: entry: // CPP-CHECK-NEXT: [[DOTSPLATINSERT:%.*]] = insertelement <vscale x 16 x i8> poison, i8 [[OP2:%.*]], i64 0 // CPP-CHECK-NEXT: [[DOTSPLAT:%.*]] = shufflevector <vscale x 16 x i8> [[DOTSPLATINSERT]], <vscale x 16 x i8> poison, <vscale x 16 x i32> zeroinitializer -// CPP-CHECK-NEXT: [[TMP0:%.*]] = tail call <vscale x 16 x i8> @llvm.aarch64.sve.uhsubr.nxv16i8(<vscale x 16 x i1> [[PG:%.*]], <vscale x 16 x i8> [[OP1:%.*]], <vscale x 16 x i8> [[DOTSPLAT]]) +// CPP-CHECK-NEXT: [[TMP0:%.*]] = tail call <vscale x 16 x i8> @llvm.aarch64.sve.uhsub.u.nxv16i8(<vscale x 16 x i1> [[PG:%.*]], <vscale x 16 x i8> [[DOTSPLAT]], <vscale x 16 x i8> [[OP1:%.*]]) // CPP-CHECK-NEXT: ret <vscale x 16 x i8> [[TMP0]] // svuint8_t test_svhsubr_n_u8_x(svbool_t pg, svuint8_t op1, uint8_t op2) @@ -877,7 +877,7 @@ svuint8_t test_svhsubr_n_u8_x(svbool_t pg, svuint8_t op1, uint8_t op2) // CHECK-NEXT: [[TMP0:%.*]] = tail call <vscale x 8 x i1> @llvm.aarch64.sve.convert.from.svbool.nxv8i1(<vscale x 16 x i1> [[PG:%.*]]) // CHECK-NEXT: [[DOTSPLATINSERT:%.*]] = insertelement <vscale x 8 x i16> poison, i16 [[OP2:%.*]], i64 0 // CHECK-NEXT: [[DOTSPLAT:%.*]] = shufflevector <vscale x 8 x i16> [[DOTSPLATINSERT]], <vscale x 8 x i16> poison, <vscale x 8 x i32> zeroinitializer -// CHECK-NEXT: [[TMP1:%.*]] = tail call <vscale x 8 x i16> @llvm.aarch64.sve.uhsubr.nxv8i16(<vscale x 8 x i1> [[TMP0]], <vscale x 8 x i16> [[OP1:%.*]], <vscale x 8 x i16> [[DOTSPLAT]]) +// CHECK-NEXT: [[TMP1:%.*]] = tail call <vscale x 8 x i16> @llvm.aarch64.sve.uhsub.u.nxv8i16(<vscale x 8 x i1> [[TMP0]], <vscale x 8 x i16> [[DOTSPLAT]], <vscale x 8 x i16> [[OP1:%.*]]) // CHECK-NEXT: ret <vscale x 8 x i16> [[TMP1]] // // CPP-CHECK-LABEL: @_Z20test_svhsubr_n_u16_xu10__SVBool_tu12__SVUint16_tt( @@ -885,7 +885,7 @@ svuint8_t test_svhsubr_n_u8_x(svbool_t pg, svuint8_t op1, uint8_t op2) // CPP-CHECK-NEXT: [[TMP0:%.*]] = tail call <vscale x 8 x i1> @llvm.aarch64.sve.convert.from.svbool.nxv8i1(<vscale x 16 x i1> [[PG:%.*]]) // CPP-CHECK-NEXT: [[DOTSPLATINSERT:%.*]] = insertelement <vscale x 8 x i16> poison, i16 [[OP2:%.*]], i64 0 // CPP-CHECK-NEXT: [[DOTSPLAT:%.*]] = shufflevector <vscale x 8 x i16> [[DOTSPLATINSERT]], <vscale x 8 x i16> poison, <vscale x 8 x i32> zeroinitializer -// CPP-CHECK-NEXT: [[TMP1:%.*]] = tail call <vscale x 8 x i16> @llvm.aarch64.sve.uhsubr.nxv8i16(<vscale x 8 x i1> [[TMP0]], <vscale x 8 x i16> [[OP1:%.*]], <vscale x 8 x i16> [[DOTSPLAT]]) +// CPP-CHECK-NEXT: [[TMP1:%.*]] = tail call <vscale x 8 x i16> @llvm.aarch64.sve.uhsub.u.nxv8i16(<vscale x 8 x i1> [[TMP0]], <vscale x 8 x i16> [[DOTSPLAT]], <vscale x 8 x i16> [[OP1:%.*]]) // CPP-CHECK-NEXT: ret <vscale x 8 x i16> [[TMP1]] // svuint16_t test_svhsubr_n_u16_x(svbool_t pg, svuint16_t op1, uint16_t op2) @@ -898,7 +898,7 @@ svuint16_t test_svhsubr_n_u16_x(svbool_t pg, svuint16_t op1, uint16_t op2) // CHECK-NEXT: [[TMP0:%.*]] = tail call <vscale x 4 x i1> @llvm.aarch64.sve.convert.from.svbool.nxv4i1(<vscale x 16 x i1> [[PG:%.*]]) // CHECK-NEXT: [[DOTSPLATINSERT:%.*]] = insertelement <vscale x 4 x i32> poison, i32 [[OP2:%.*]], i64 0 // CHECK-NEXT: [[DOTSPLAT:%.*]] = shufflevector <vscale x 4 x i32> [[DOTSPLATINSERT]], <vscale x 4 x i32> poison, <vscale x 4 x i32> zeroinitializer -// CHECK-NEXT: [[TMP1:%.*]] = tail call <vscale x 4 x i32> @llvm.aarch64.sve.uhsubr.nxv4i32(<vscale x 4 x i1> [[TMP0]], <vscale x 4 x i32> [[OP1:%.*]], <vscale x 4 x i32> [[DOTSPLAT]]) +// CHECK-NEXT: [[TMP1:%.*]] = tail call <vscale x 4 x i32> @llvm.aarch64.sve.uhsub.u.nxv4i32(<vscale x 4 x i1> [[TMP0]], <vscale x 4 x i32> [[DOTSPLAT]], <vscale x 4 x i32> [[OP1:%.*]]) // CHECK-NEXT: ret <vscale x 4 x i32> [[TMP1]] // // CPP-CHECK-LABEL: @_Z20test_svhsubr_n_u32_xu10__SVBool_tu12__SVUint32_tj( @@ -906,7 +906,7 @@ svuint16_t test_svhsubr_n_u16_x(svbool_t pg, svuint16_t op1, uint16_t op2) // CPP-CHECK-NEXT: [[TMP0:%.*]] = tail call <vscale x 4 x i1> @llvm.aarch64.sve.convert.from.svbool.nxv4i1(<vscale x 16 x i1> [[PG:%.*]]) // CPP-CHECK-NEXT: [[DOTSPLATINSERT:%.*]] = insertelement <vscale x 4 x i32> poison, i32 [[OP2:%.*]], i64 0 // CPP-CHECK-NEXT: [[DOTSPLAT:%.*]] = shufflevector <vscale x 4 x i32> [[DOTSPLATINSERT]], <vscale x 4 x i32> poison, <vscale x 4 x i32> zeroinitializer -// CPP-CHECK-NEXT: [[TMP1:%.*]] = tail call <vscale x 4 x i32> @llvm.aarch64.sve.uhsubr.nxv4i32(<vscale x 4 x i1> [[TMP0]], <vscale x 4 x i32> [[OP1:%.*]], <vscale x 4 x i32> [[DOTSPLAT]]) +// CPP-CHECK-NEXT: [[TMP1:%.*]] = tail call <vscale x 4 x i32> @llvm.aarch64.sve.uhsub.u.nxv4i32(<vscale x 4 x i1> [[TMP0]], <vscale x 4 x i32> [[DOTSPLAT]], <vscale x 4 x i32> [[OP1:%.*]]) // CPP-CHECK-NEXT: ret <vscale x 4 x i32> [[TMP1]] // svuint32_t test_svhsubr_n_u32_x(svbool_t pg, svuint32_t op1, uint32_t op2) @@ -919,7 +919,7 @@ svuint32_t test_svhsubr_n_u32_x(svbool_t pg, svuint32_t op1, uint32_t op2) // CHECK-NEXT: [[TMP0:%.*]] = tail call <vscale x 2 x i1> @llvm.aarch64.sve.convert.from.svbool.nxv2i1(<vscale x 16 x i1> [[PG:%.*]]) // CHECK-NEXT: [[DOTSPLATINSERT:%.*]] = insertelement <vscale x 2 x i64> poison, i64 [[OP2:%.*]], i64 0 // CHECK-NEXT: [[DOTSPLAT:%.*]] = shufflevector <vscale x 2 x i64> [[DOTSPLATINSERT]], <vscale x 2 x i64> poison, <vscale x 2 x i32> zeroinitializer -// CHECK-NEXT: [[TMP1:%.*]] = tail call <vscale x 2 x i64> @llvm.aarch64.sve.uhsubr.nxv2i64(<vscale x 2 x i1> [[TMP0]], <vscale x 2 x i64> [[OP1:%.*]], <vscale x 2 x i64> [[DOTSPLAT]]) +// CHECK-NEXT: [[TMP1:%.*]] = tail call <vscale x 2 x i64> @llvm.aarch64.sve.uhsub.u.nxv2i64(<vscale x 2 x i1> [[TMP0]], <vscale x 2 x i64> [[DOTSPLAT]], <vscale x 2 x i64> [[OP1:%.*]]) // CHECK-NEXT: ret <vscale x 2 x i64> [[TMP1]] // // CPP-CHECK-LABEL: @_Z20test_svhsubr_n_u64_xu10__SVBool_tu12__SVUint64_tm( @@ -927,7 +927,7 @@ svuint32_t test_svhsubr_n_u32_x(svbool_t pg, svuint32_t op1, uint32_t op2) // CPP-CHECK-NEXT: [[TMP0:%.*]] = tail call <vscale x 2 x i1> @llvm.aarch64.sve.convert.from.svbool.nxv2i1(<vscale x 16 x i1> [[PG:%.*]]) // CPP-CHECK-NEXT: [[DOTSPLATINSERT:%.*]] = insertelement <vscale x 2 x i64> poison, i64 [[OP2:%.*]], i64 0 // CPP-CHECK-NEXT: [[DOTSPLAT:%.*]] = shufflevector <vscale x 2 x i64> [[DOTSPLATINSERT]], <vscale x 2 x i64> poison, <vscale x 2 x i32> zeroinitializer -// CPP-CHECK-NEXT: [[TMP1:%.*]] = tail call <vscale x 2 x i64> @llvm.aarch64.sve.uhsubr.nxv2i64(<vscale x 2 x i1> [[TMP0]], <vscale x 2 x i64> [[OP1:%.*]], <vscale x 2 x i64> [[DOTSPLAT]]) +// CPP-CHECK-NEXT: [[TMP1:%.*]] = tail call <vscale x 2 x i64> @llvm.aarch64.sve.uhsub.u.nxv2i64(<vscale x 2 x i1> [[TMP0]], <vscale x 2 x i64> [[DOTSPLAT]], <vscale x 2 x i64> [[OP1:%.*]]) // CPP-CHECK-NEXT: ret <vscale x 2 x i64> [[TMP1]] // svuint64_t test_svhsubr_n_u64_x(svbool_t pg, svuint64_t op1, uint64_t op2) diff --git a/llvm/include/llvm/IR/IntrinsicsAArch64.td b/llvm/include/llvm/IR/IntrinsicsAArch64.td index b8f87a0517efa..170eff4b7db90 100644 --- a/llvm/include/llvm/IR/IntrinsicsAArch64.td +++ b/llvm/include/llvm/IR/IntrinsicsAArch64.td @@ -2435,6 +2435,7 @@ def int_aarch64_sve_stnt1_scatter_scalar_offset : AdvSIMD_ScatterStore_VS_Intri def int_aarch64_sve_saba : AdvSIMD_3VectorArg_Intrinsic<[IntrSpeculatable]>; def int_aarch64_sve_shadd : AdvSIMD_Pred2VectorArg_Intrinsic<[IntrSpeculatable]>; def int_aarch64_sve_shsub : AdvSIMD_Pred2VectorArg_Intrinsic<[IntrSpeculatable]>; +def int_aarch64_sve_shsub_u : AdvSIMD_Pred2VectorArg_Intrinsic<[IntrSpeculatable]>; def int_aarch64_sve_shsubr : AdvSIMD_Pred2VectorArg_Intrinsic<[IntrSpeculatable]>; def int_aarch64_sve_sli : AdvSIMD_2VectorArgIndexed_Intrinsic<[IntrSpeculatable]>; def int_aarch64_sve_sqabs : AdvSIMD_Merged1VectorArg_Intrinsic<[IntrSpeculatable]>; @@ -2467,6 +2468,7 @@ def int_aarch64_sve_suqadd : AdvSIMD_Pred2VectorArg_Intrinsic<[IntrSpecul def int_aarch64_sve_uaba : AdvSIMD_3VectorArg_Intrinsic<[IntrSpeculatable]>; def int_aarch64_sve_uhadd : AdvSIMD_Pred2VectorArg_Intrinsic<[IntrSpeculatable]>; def int_aarch64_sve_uhsub : AdvSIMD_Pred2VectorArg_Intrinsic<[IntrSpeculatable]>; +def int_aarch64_sve_uhsub_u : AdvSIMD_Pred2VectorArg_Intrinsic<[IntrSpeculatable]>; def int_aarch64_sve_uhsubr : AdvSIMD_Pred2VectorArg_Intrinsic<[IntrSpeculatable]>; def int_aarch64_sve_uqadd : AdvSIMD_Pred2VectorArg_Intrinsic<[IntrSpeculatable]>; def int_aarch64_sve_uqrshl : AdvSIMD_Pred2VectorArg_Intrinsic<[IntrSpeculatable]>; diff --git a/llvm/lib/Target/AArch64/AArch64SVEInstrInfo.td b/llvm/lib/Target/AArch64/AArch64SVEInstrInfo.td index bbcffb0c44e85..32eede90e42b8 100644 --- a/llvm/lib/Target/AArch64/AArch64SVEInstrInfo.td +++ b/llvm/lib/Target/AArch64/AArch64SVEInstrInfo.td @@ -3838,12 +3838,15 @@ let Predicates = [HasSVE2_or_SME] in { // SVE2 integer halving add/subtract (predicated) defm SHADD_ZPmZ : sve2_int_arith_pred<0b100000, "shadd", AArch64shadd>; defm UHADD_ZPmZ : sve2_int_arith_pred<0b100010, "uhadd", AArch64uhadd>; - defm SHSUB_ZPmZ : sve2_int_arith_pred<0b100100, "shsub", int_aarch64_sve_shsub>; - defm UHSUB_ZPmZ : sve2_int_arith_pred<0b100110, "uhsub", int_aarch64_sve_uhsub>; + defm SHSUB_ZPmZ : sve2_int_arith_pred<0b100100, "shsub", int_aarch64_sve_shsub, "SHSUB_ZPZZ", DestructiveBinaryCommWithRev, "SHSUBR_ZPmZ">; + defm UHSUB_ZPmZ : sve2_int_arith_pred<0b100110, "uhsub", int_aarch64_sve_uhsub, "UHSUB_ZPZZ", DestructiveBinaryCommWithRev, "UHSUBR_ZPmZ">; defm SRHADD_ZPmZ : sve2_int_arith_pred<0b101000, "srhadd", AArch64srhadd>; defm URHADD_ZPmZ : sve2_int_arith_pred<0b101010, "urhadd", AArch64urhadd>; - defm SHSUBR_ZPmZ : sve2_int_arith_pred<0b101100, "shsubr", int_aarch64_sve_shsubr>; - defm UHSUBR_ZPmZ : sve2_int_arith_pred<0b101110, "uhsubr", int_aarch64_sve_uhsubr>; + defm SHSUBR_ZPmZ : sve2_int_arith_pred<0b101100, "shsubr", int_aarch64_sve_shsubr, "SHSUBR_ZPZZ", DestructiveBinaryCommWithRev, "SHSUB_ZPmZ", /*isReverseInstr*/ 1>; + defm UHSUBR_ZPmZ : sve2_int_arith_pred<0b101110, "uhsubr", int_aarch64_sve_uhsubr, "UHSUBR_ZPZZ", DestructiveBinaryCommWithRev, "UHSUB_ZPmZ", /*isReverseInstr*/ 1>; + + defm SHSUB_ZPZZ : sve_int_bin_pred_bhsd<int_aarch64_sve_shsub_u>; + defm UHSUB_ZPZZ : sve_int_bin_pred_bhsd<int_aarch64_sve_uhsub_u>; // SVE2 integer pairwise add and accumulate long defm SADALP_ZPmZ : sve2_int_sadd_long_accum_pairwise<0, "sadalp", int_aarch64_sve_sadalp>; diff --git a/llvm/lib/Target/AArch64/AArch64SchedA510.td b/llvm/lib/Target/AArch64/AArch64SchedA510.td index 66f49f040ad12..6acac222a81c2 100644 --- a/llvm/lib/Target/AArch64/AArch64SchedA510.td +++ b/llvm/lib/Target/AArch64/AArch64SchedA510.td @@ -652,7 +652,7 @@ def : InstRW<[CortexA510Write<3, CortexA510UnitVALU>], "^(ADD|SUB|SUBR)_ZI_[BHSD]", "^ADR_[SU]XTW_ZZZ_D_[0123]", "^ADR_LSL_ZZZ_[SD]_[0123]", - "^[SU]H(ADD|SUB|SUBR)_ZPmZ_[BHSD]")>; + "^[SU]H(ADD|SUB|SUBR)_(ZPmZ|ZPZZ)_[BHSD]")>; def : InstRW<[CortexA510Write<4, CortexA510UnitVALU>], (instregex "^[SU](ADD|SUB)[LW][BT]_ZZZ_[HSD]", "^SADDLBT_ZZZ_[HSD]", diff --git a/llvm/lib/Target/AArch64/AArch64SchedNeoverseN2.td b/llvm/lib/Target/AArch64/AArch64SchedNeoverseN2.td index a02130f8390a7..dee6e50f960b1 100644 --- a/llvm/lib/Target/AArch64/AArch64SchedNeoverseN2.td +++ b/llvm/lib/Target/AArch64/AArch64SchedNeoverseN2.td @@ -1662,7 +1662,7 @@ def : InstRW<[N2Write_2c_1V], "^ADR_LSL_ZZZ_[SD]_[0123]", "^[SU](ADD|SUB)[LW][BT]_ZZZ_[HSD]", "^SADDLBT_ZZZ_[HSD]", - "^[SU]H(ADD|SUB|SUBR)_ZPmZ_[BHSD]", + "^[SU]H(ADD|SUB|SUBR)_(ZPmZ|ZPZZ)_[BHSD]", "^SSUBL(BT|TB)_ZZZ_[HSD]")>; // Arithmetic, complex diff --git a/llvm/lib/Target/AArch64/AArch64SchedNeoverseN3.td b/llvm/lib/Target/AArch64/AArch64SchedNeoverseN3.td index 22e6d1107a337..817ddcbdca6f2 100644 --- a/llvm/lib/Target/AArch64/AArch64SchedNeoverseN3.td +++ b/llvm/lib/Target/AArch64/AArch64SchedNeoverseN3.td @@ -1752,7 +1752,7 @@ def : InstRW<[N3Write_2c_1V], "^ADR_LSL_ZZZ_[SD]_[0123]", "^[SU](ADD|SUB)[LW][BT]_ZZZ_[HSD]", "^SADDLBT_ZZZ_[HSD]", - "^[SU]H(ADD|SUB|SUBR)_ZPmZ_[BHSD]", + "^[SU]H(ADD|SUB|SUBR)_(ZPmZ|ZPZZ)_[BHSD]", "^SSUBL(BT|TB)_ZZZ_[HSD]")>; // Arithmetic, complex diff --git a/llvm/lib/Target/AArch64/AArch64SchedNeoverseV2.td b/llvm/lib/Target/AArch64/AArch64SchedNeoverseV2.td index 2387f176f3051..94861253e22d3 100644 --- a/llvm/lib/Target/AArch64/AArch64SchedNeoverseV2.td +++ b/llvm/lib/Target/AArch64/AArch64SchedNeoverseV2.td @@ -2125,7 +2125,7 @@ def : InstRW<[V2Write_2c_1V], "^ADR_LSL_ZZZ_[SD]_[0123]", "^[SU](ADD|SUB)[LW][BT]_ZZZ_[HSD]", "^SADDLBT_ZZZ_[HSD]", - "^[SU]H(ADD|SUB|SUBR)_ZPmZ_[BHSD]", + "^[SU]H(ADD|SUB|SUBR)_(ZPmZ|ZPZZ)_[BHSD]", "^SSUBL(BT|TB)_ZZZ_[HSD]")>; // Arithmetic, complex diff --git a/llvm/lib/Target/AArch64/AArch64TargetTransformInfo.cpp b/llvm/lib/Target/AArch64/AArch64TargetTransformInfo.cpp index b2a9f9cb75910..1df0a38983261 100644 --- a/llvm/lib/Target/AArch64/AArch64TargetTransformInfo.cpp +++ b/llvm/lib/Target/AArch64/AArch64TargetTransformInfo.cpp @@ -1446,6 +1446,10 @@ static SVEIntrinsicInfo constructSVEIntrinsicInfo(IntrinsicInst &II) { case Intrinsic::aarch64_sve_orr: return SVEIntrinsicInfo::defaultMergingOp(Intrinsic::aarch64_sve_orr_u) .setMatchingIROpcode(Instruction::Or); + case Intrinsic::aarch64_sve_shsub: + return SVEIntrinsicInfo::defaultMergingOp(Intrinsic::aarch64_sve_shsub_u); + case Intrinsic::aarch64_sve_shsubr: + return SVEIntrinsicInfo::defaultMergingOp(); case Intrinsic::aarch64_sve_sqrshl: return SVEIntrinsicInfo::defaultMergingOp(Intrinsic::aarch64_sve_sqrshl_u); case Intrinsic::aarch64_sve_sqshl: @@ -1454,6 +1458,10 @@ static SVEIntrinsicInfo constructSVEIntrinsicInfo(IntrinsicInst &II) { return SVEIntrinsicInfo::defaultMergingOp(Intrinsic::aarch64_sve_sqsub_u); case Intrinsic::aarch64_sve_srshl: return SVEIntrinsicInfo::defaultMergingOp(Intrinsic::aarch64_sve_srshl_u); + case Intrinsic::aarch64_sve_uhsub: + return SVEIntrinsicInfo::defaultMergingOp(Intrinsic::aarch64_sve_uhsub_u); + case Intrinsic::aarch64_sve_uhsubr: + return SVEIntrinsicInfo::defaultMergingOp(); case Intrinsic::aarch64_sve_uqrshl: return SVEIntrinsicInfo::defaultMergingOp(Intrinsic::aarch64_sve_uqrshl_u); case Intrinsic::aarch64_sve_uqshl: diff --git a/llvm/test/CodeGen/AArch64/sve2-intrinsics-uniform-dsp-undef.ll b/llvm/test/CodeGen/AArch64/sve2-intrinsics-uniform-dsp-undef.ll index a471625cd5ad8..5356bba94a30b 100644 --- a/llvm/test/CodeGen/AArch64/sve2-intrinsics-uniform-dsp-undef.ll +++ b/llvm/test/CodeGen/AArch64/sve2-intrinsics-uniform-dsp-undef.ll @@ -1,6 +1,154 @@ ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py ; RUN: llc -mtriple=aarch64-linux-gnu -mattr=+sve2 < %s | FileCheck %s +; +; SHSUB +; + +define <vscale x 16 x i8> @shsub_i8(<vscale x 16 x i1> %pg, <vscale x 16 x i8> %a, <vscale x 16 x i8> %b) { +; CHECK-LABEL: shsub_i8: +; CHECK: // %bb.0: +; CHECK-NEXT: shsub z0.b, p0/m, z0.b, z1.b +; CHECK-NEXT: ret + %out = call <vscale x 16 x i8> @llvm.aarch64.sve.shsub.u.nxv16i8(<vscale x 16 x i1> %pg, + <vscale x 16 x i8> %a, + <vscale x 16 x i8> %b) + ret <vscale x 16 x i8> %out +} + +define <vscale x 8 x i16> @shsub_i16(<vscale x 8 x i1> %pg, <vscale x 8 x i16> %a, <vscale x 8 x i16> %b) { +; CHECK-LABEL: shsub_i16: +; CHECK: // %bb.0: +; CHECK-NEXT: shsub z0.h, p0/m, z0.h, z1.h +; CHECK-NEXT: ret + %out = call <vscale x 8 x i16> @llvm.aarch64.sve.shsub.u.nxv8i16(<vscale x 8 x i1> %pg, + <vscale x 8 x i16> %a, + <vscale x 8 x i16> %b) + ret <vscale x 8 x i16> %out +} + +define <vscale x 4 x i32> @shsub_i32(<vscale x 4 x i1> %pg, <vscale x 4 x i32> %a, <vscale x 4 x i32> %b) { +; CHECK-LABEL: shsub_i32: +; CHECK: // %bb.0: +; CHECK-NEXT: shsub z0.s, p0/m, z0.s, z1.s +; CHECK-NEXT: ret + %out = call <vscale x 4 x i32> @llvm.aarch64.sve.shsub.u.nxv4i32(<vscale x 4 x i1> %pg, + <vscale x 4 x i32> %a, + <vscale x 4 x i32> %b) + ret <vscale x 4 x i32> %out +} + +define <vscale x 2 x i64> @shsub_i64(<vscale x 2 x i1> %pg, <vscale x 2 x i64> %a, <vscale x 2 x i64> %b) { +; CHECK-LABEL: shsub_i64: +; CHECK: // %bb.0: +; CHECK-NEXT: shsub z0.d, p0/m, z0.d, z1.d +; CHECK-NEXT: ret + %out = call <vscale x 2 x i64> @llvm.aarch64.sve.shsub.u.nxv2i64(<vscale x 2 x i1> %pg, + <vscale x 2 x i64> %a, + <vscale x 2 x i64> %b) + ret <vscale x 2 x i64> %out +} + +; +; SHSUB (swapped operands) +; + +define <vscale x 16 x i8> @shsub_i8_swapped_operands(<vscale x 16 x i1> %pg, <vscale x 16 x i8> %a, <vscale x 16 x i8> %b) { +; CHECK-LABEL: shsub_i8_swapped_operands: +; CHECK: // %bb.0: +; CHECK-NEXT: shsubr z0.b, p0/m, z0.b, z1.b +; CHECK-NEXT: ret + %out = call <vscale x 16 x i8> @llvm.aarch64.sve.shsub.u.nxv16i8(<vscale x 16 x i1> %pg, + <vscale x 16 x i8> %b, + <vscale x 16 x i8> %a) + ret <vscale x 16 x i8> %out +} + +define <vscale x 8 x i16> @shsub_i16_swapped_operands(<vscale x 8 x i1> %pg, <vscale x 8 x i16> %a, <vscale x 8 x i16> %b) { +; CHECK-LABEL: shsub_i16_swapped_operands: +; CHECK: // %bb.0: +; CHECK-NEXT: shsubr z0.h, p0/m, z0.h, z1.h +; CHECK-NEXT: ret + %out = call <vscale x 8 x i16> @llvm.aarch64.sve.shsub.u.nxv8i16(<vscale x 8 x i1> %pg, + <vscale x 8 x i16> %b, + <vscale x 8 x i16> %a) + ret <vscale x 8 x i16> %out +} + +define <vscale x 4 x i32> @shsub_i32_swapped_operands(<vscale x 4 x i1> %pg, <vscale x 4 x i32> %a, <vscale x 4 x i32> %b) { +; CHECK-LABEL: shsub_i32_swapped_operands: +; CHECK: // %bb.0: +; CHECK-NEXT: shsubr z0.s, p0/m, z0.s, z1.s +; CHECK-NEXT: ret + %out = call <vscale x 4 x i32> @llvm.aarch64.sve.shsub.u.nxv4i32(<vscale x 4 x i1> %pg, + <vscale x 4 x i32> %b, + <vscale x 4 x i32> %a) + ret <vscale x 4 x i32> %out +} + +define <vscale x 2 x i64> @shsub_i64_swapped_operands(<vscale x 2 x i1> %pg, <vscale x 2 x i64> %a, <vscale x 2 x i64> %b) { +; CHECK-LABEL: shsub_i64_swapped_operands: +; CHECK: // %bb.0: +; CHECK-NEXT: shsubr z0.d, p0/m, z0.d, z1.d +; CHECK-NEXT: ret + %out = call <vscale x 2 x i64> @llvm.aarch64.sve.shsub.u.nxv2i64(<vscale x 2 x i1> %pg, + <vscale x 2 x i64> %b, + <vscale x 2 x i64> %a) + ret <vscale x 2 x i64> %out +} + +; +; SHSUB (movprfx) +; + +define <vscale x 16 x i8> @shsub_i8_movprfx(<vscale x 16 x i1> %pg, <vscale x 16 x i8> %unused, <vscale x 16 x i8> %a, <vscale x 16 x i8> %b) { +; CHECK-LABEL: shsub_i8_movprfx: +; CHECK: // %bb.0: +; CHECK-NEXT: movprfx z0, z1 +; CHECK-NEXT: shsub z0.b, p0/m, z0.b, z2.b +; CHECK-NEXT: ret + %out = call <vscale x 16 x i8> @llvm.aarch64.sve.shsub.u.nxv16i8(<vscale x 16 x i1> %pg, + <vscale x 16 x i8> %a, + <vscale x 16 x i8> %b) + ret <vscale x 16 x i8> %out +} + +define <vscale x 8 x i16> @shsub_i16_movprfx(<vscale x 8 x i1> %pg, <vscale x 8 x i16> %unused, <vscale x 8 x i16> %a, <vscale x 8 x i16> %b) { +; CHECK-LABEL: shsub_i16_movprfx: +; CHECK: // %bb.0: +; CHECK-NEXT: movprfx z0, z1 +; CHECK-NEXT: shsub z0.h, p0/m, z0.h, z2.h +; CHECK-NEXT: ret + %out = call <vscale x 8 x i16> @llvm.aarch64.sve.shsub.u.nxv8i16(<vscale x 8 x i1> %pg, + <vscale x 8 x i16> %a, + <vscale x 8 x i16> %b) + ret <vscale x 8 x i16> %out +} + +define <vscale x 4 x i32> @shsub_i32_movprfx(<vscale x 4 x i1> %pg, <vscale x 4 x i32> %unused, <vscale x 4 x i32> %a, <vscale x 4 x i32> %b) { +; CHECK-LABEL: shsub_i32_movprfx: +; CHECK: // %bb.0: +; CHECK-NEXT: movprfx z0, z1 +; CHECK-NEXT: shsub z0.s, p0/m, z0.s, z2.s +; CHECK-NEXT: ret + %out = call <vscale x 4 x i32> @llvm.aarch64.sve.shsub.u.nxv4i32(<vscale x 4 x i1> %pg, + <vscale x 4 x i32> %a, + <vscale x 4 x i32> %b) + ret <vscale x 4 x i32> %out +} + +define <vscale x 2 x i64> @shsub_i64_movprfx(<vscale x 2 x i1> %pg, <vscale x 2 x i64> %unused, <vscale x 2 x i64> %a, <vscale x 2 x i64> %b) { +; CHECK-LABEL: shsub_i64_movprfx: +; CHECK: // %bb.0: +; CHECK-NEXT: movprfx z0, z1 +; CHECK-NEXT: shsub z0.d, p0/m, z0.d, z2.d +; CHECK-NEXT: ret + %out = call <vscale x 2 x i64> @llvm.aarch64.sve.shsub.u.nxv2i64(<vscale x 2 x i1> %pg, + <vscale x 2 x i64> %a, + <vscale x 2 x i64> %b) + ret <vscale x 2 x i64> %out +} + ; ; SQRSHL ; @@ -641,6 +789,154 @@ define <vscale x 2 x i64> @srshl_i64_movprfx(<vscale x 2 x i1> %pg, <vscale x 2 ret <vscale x 2 x i64> %out } +; +; UHSUB +; + +define <vscale x 16 x i8> @uhsub_i8(<vscale x 16 x i1> %pg, <vscale x 16 x i8> %a, <vscale x 16 x i8> %b) { +; CHECK-LABEL: uhsub_i8: +; CHECK: // %bb.0: +; CHECK-NEXT: uhsub z0.b, p0/m, z0.b, z1.b +; CHECK-NEXT: ret + %out = call <vscale x 16 x i8> @llvm.aarch64.sve.uhsub.u.nxv16i8(<vscale x 16 x i1> %pg, + <vscale x 16 x i8> %a, + <vscale x 16 x i8> %b) + ret <vscale x 16 x i8> %out +} + +define <vscale x 8 x i16> @uhsub_i16(<vscale x 8 x i1> %pg, <vscale x 8 x i16> %a, <vscale x 8 x i16> %b) { +; CHECK-LABEL: uhsub_i16: +; CHECK: // %bb.0: +; CHECK-NEXT: uhsub z0.h, p0/m, z0.h, z1.h +; CHECK-NEXT: ret + %out = call <vscale x 8 x i16> @llvm.aarch64.sve.uhsub.u.nxv8i16(<vscale x 8 x i1> %pg, + <vscale x 8 x i16> %a, + <vscale x 8 x i16> %b) + ret <vscale x 8 x i16> %out +} + +define <vscale x 4 x i32> @uhsub_i32(<vscale x 4 x i1> %pg, <vscale x 4 x i32> %a, <vscale x 4 x i32> %b) { +; CHECK-LABEL: uhsub_i32: +; CHECK: // %bb.0: +; CHECK-NEXT: uhsub z0.s, p0/m, z0.s, z1.s +; CHECK-NEXT: ret + %out = call <vscale x 4 x i32> @llvm.aarch64.sve.uhsub.u.nxv4i32(<vscale x 4 x i1> %pg, + <vscale x 4 x i32> %a, + <vscale x 4 x i32> %b) + ret <vscale x 4 x i32> %out +} + +define <vscale x 2 x i64> @uhsub_i64(<vscale x 2 x i1> %pg, <vscale x 2 x i64> %a, <vscale x 2 x i64> %b) { +; CHECK-LABEL: uhsub_i64: +; CHECK: // %bb.0: +; CHECK-NEXT: uhsub z0.d, p0/m, z0.d, z1.d +; CHECK-NEXT: ret + %out = call <vscale x 2 x i64> @llvm.aarch64.sve.uhsub.u.nxv2i64(<vscale x 2 x i1> %pg, + <vscale x 2 x i64> %a, + <vscale x 2 x i64> %b) + ret <vscale x 2 x i64> %out +} + +; +; UHSUB (swapped operands) +; + +define <vscale x 16 x i8> @uhsub_i8_swapped_operands(<vscale x 16 x i1> %pg, <vscale x 16 x i8> %a, <vscale x 16 x i8> %b) { +; CHECK-LABEL: uhsub_i8_swapped_operands: +; CHECK: // %bb.0: +; CHECK-NEXT: uhsubr z0.b, p0/m, z0.b, z1.b +; CHECK-NEXT: ret + %out = call <vscale x 16 x i8> @llvm.aarch64.sve.uhsub.u.nxv16i8(<vscale x 16 x i1> %pg, + <vscale x 16 x i8> %b, + <vscale x 16 x i8> %a) + ret <vscale x 16 x i8> %out +} + +define <vscale x 8 x i16> @uhsub_i16_swapped_operands(<vscale x 8 x i1> %pg, <vscale x 8 x i16> %a, <vscale x 8 x i16> %b) { +; CHECK-LABEL: uhsub_i16_swapped_operands: +; CHECK: // %bb.0: +; CHECK-NEXT: uhsubr z0.h, p0/m, z0.h, z1.h +; CHECK-NEXT: ret + %out = call <vscale x 8 x i16> @llvm.aarch64.sve.uhsub.u.nxv8i16(<vscale x 8 x i1> %pg, + <vscale x 8 x i16> %b, + <vscale x 8 x i16> %a) + ret <vscale x 8 x i16> %out +} + +define <vscale x 4 x i32> @uhsub_i32_swapped_operands(<vscale x 4 x i1> %pg, <vscale x 4 x i32> %a, <vscale x 4 x i32> %b) { +; CHECK-LABEL: uhsub_i32_swapped_operands: +; CHECK: // %bb.0: +; CHECK-NEXT: uhsubr z0.s, p0/m, z0.s, z1.s +; CHECK-NEXT: ret + %out = call <vscale x 4 x i32> @llvm.aarch64.sve.uhsub.u.nxv4i32(<vscale x 4 x i1> %pg, + <vscale x 4 x i32> %b, + <vscale x 4 x i32> %a) + ret <vscale x 4 x i32> %out +} + +define <vscale x 2 x i64> @uhsub_i64_swapped_operands(<vscale x 2 x i1> %pg, <vscale x 2 x i64> %a, <vscale x 2 x i64> %b) { +; CHECK-LABEL: uhsub_i64_swapped_operands: +; CHECK: // %bb.0: +; CHECK-NEXT: uhsubr z0.d, p0/m, z0.d, z1.d +; CHECK-NEXT: ret + %out = call <vscale x 2 x i64> @llvm.aarch64.sve.uhsub.u.nxv2i64(<vscale x 2 x i1> %pg, + <vscale x 2 x i64> %b, + <vscale x 2 x i64> %a) + ret <vscale x 2 x i64> %out +} + +; +; UHSUB (movprfx) +; + +define <vscale x 16 x i8> @uhsub_i8_movprfx(<vscale x 16 x i1> %pg, <vscale x 16 x i8> %unused, <vscale x 16 x i8> %a, <vscale x 16 x i8> %b) { +; CHECK-LABEL: uhsub_i8_movprfx: +; CHECK: // %bb.0: +; CHECK-NEXT: movprfx z0, z1 +; CHECK-NEXT: uhsub z0.b, p0/m, z0.b, z2.b +; CHECK-NEXT: ret + %out = call <vscale x 16 x i8> @llvm.aarch64.sve.uhsub.u.nxv16i8(<vscale x 16 x i1> %pg, + <vscale x 16 x i8> %a, + <vscale x 16 x i8> %b) + ret <vscale x 16 x i8> %out +} + +define <vscale x 8 x i16> @uhsub_i16_movprfx(<vscale x 8 x i1> %pg, <vscale x 8 x i16> %unused, <vscale x 8 x i16> %a, <vscale x 8 x i16> %b) { +; CHECK-LABEL: uhsub_i16_movprfx: +; CHECK: // %bb.0: +; CHECK-NEXT: movprfx z0, z1 +; CHECK-NEXT: uhsub z0.h, p0/m, z0.h, z2.h +; CHECK-NEXT: ret + %out = call <vscale x 8 x i16> @llvm.aarch64.sve.uhsub.u.nxv8i16(<vscale x 8 x i1> %pg, + <vscale x 8 x i16> %a, + <vscale x 8 x i16> %b) + ret <vscale x 8 x i16> %out +} + +define <vscale x 4 x i32> @uhsub_i32_movprfx(<vscale x 4 x i1> %pg, <vscale x 4 x i32> %unused, <vscale x 4 x i32> %a, <vscale x 4 x i32> %b) { +; CHECK-LABEL: uhsub_i32_movprfx: +; CHECK: // %bb.0: +; CHECK-NEXT: movprfx z0, z1 +; CHECK-NEXT: uhsub z0.s, p0/m, z0.s, z2.s +; CHECK-NEXT: ret + %out = call <vscale x 4 x i32> @llvm.aarch64.sve.uhsub.u.nxv4i32(<vscale x 4 x i1> %pg, + <vscale x 4 x i32> %a, + <vscale x 4 x i32> %b) + ret <vscale x 4 x i32> %out +} + +define <vscale x 2 x i64> @uhsub_i64_movprfx(<vscale x 2 x i1> %pg, <vscale x 2 x i64> %unused, <vscale x 2 x i64> %a, <vscale x 2 x i64> %b) { +; CHECK-LABEL: uhsub_i64_movprfx: +; CHECK: // %bb.0: +; CHECK-NEXT: movprfx z0, z1 +; CHECK-NEXT: uhsub z0.d, p0/m, z0.d, z2.d +; CHECK-NEXT: ret + %out = call <vscale x 2 x i64> @llvm.aarch64.sve.uhsub.u.nxv2i64(<vscale x 2 x i1> %pg, + <vscale x 2 x i64> %a, + <vscale x 2 x i64> %b) + ret <vscale x 2 x i64> %out +} + ; ; UQRSHL ; diff --git a/llvm/test/Transforms/InstCombine/AArch64/sve-intrinsic-comb-no-active-lanes.ll b/llvm/test/Transforms/InstCombine/AArch64/sve-intrinsic-comb-no-active-lanes.ll index b5420e9111746..f082bddbf9fec 100644 --- a/llvm/test/Transforms/InstCombine/AArch64/sve-intrinsic-comb-no-active-lanes.ll +++ b/llvm/test/Transforms/InstCombine/AArch64/sve-intrinsic-comb-no-active-lanes.ll @@ -741,8 +741,7 @@ declare <vscale x 4 x i32> @llvm.aarch64.sve.shsub.nxv4i32(<vscale x 4 x i1>, <v define <vscale x 4 x i32> @simplify_shsub_intrinsic(<vscale x 4 x i32> %a, <vscale x 4 x i32> %b) #0 { ; CHECK-LABEL: define <vscale x 4 x i32> @simplify_shsub_intrinsic ; CHECK-SAME: (<vscale x 4 x i32> [[A:%.*]], <vscale x 4 x i32> [[B:%.*]]) #[[ATTR1]] { -; CHECK-NEXT: [[R:%.*]] = tail call <vscale x 4 x i32> @llvm.aarch64.sve.shsub.nxv4i32(<vscale x 4 x i1> zeroinitializer, <vscale x 4 x i32> [[A]], <vscale x 4 x i32> [[B]]) -; CHECK-NEXT: ret <vscale x 4 x i32> [[R]] +; CHECK-NEXT: ret <vscale x 4 x i32> [[A]] ; %r = tail call <vscale x 4 x i32> @llvm.aarch64.sve.shsub.nxv4i32(<vscale x 4 x i1> zeroinitializer, <vscale x 4 x i32> %a, <vscale x 4 x i32> %b) ret <vscale x 4 x i32> %r @@ -752,8 +751,7 @@ declare <vscale x 4 x i32> @llvm.aarch64.sve.shsubr.nxv4i32(<vscale x 4 x i1>, < define <vscale x 4 x i32> @simplify_shsubr_intrinsic(<vscale x 4 x i32> %a, <vscale x 4 x i32> %b) #0 { ; CHECK-LABEL: define <vscale x 4 x i32> @simplify_shsubr_intrinsic ; CHECK-SAME: (<vscale x 4 x i32> [[A:%.*]], <vscale x 4 x i32> [[B:%.*]]) #[[ATTR1]] { -; CHECK-NEXT: [[R:%.*]] = tail call <vscale x 4 x i32> @llvm.aarch64.sve.shsubr.nxv4i32(<vscale x 4 x i1> zeroinitializer, <vscale x 4 x i32> [[A]], <vscale x 4 x i32> [[B]]) -; CHECK-NEXT: ret <vscale x 4 x i32> [[R]] +; CHECK-NEXT: ret <vscale x 4 x i32> [[A]] ; %r = tail call <vscale x 4 x i32> @llvm.aarch64.sve.shsubr.nxv4i32(<vscale x 4 x i1> zeroinitializer, <vscale x 4 x i32> %a, <vscale x 4 x i32> %b) ret <vscale x 4 x i32> %r @@ -1017,8 +1015,7 @@ declare <vscale x 4 x i32> @llvm.aarch64.sve.uhsub.nxv4i32(<vscale x 4 x i1>, <v define <vscale x 4 x i32> @simplify_uhsub_intrinsic(<vscale x 4 x i32> %a, <vscale x 4 x i32> %b) #0 { ; CHECK-LABEL: define <vscale x 4 x i32> @simplify_uhsub_intrinsic ; CHECK-SAME: (<vscale x 4 x i32> [[A:%.*]], <vscale x 4 x i32> [[B:%.*]]) #[[ATTR1]] { -; CHECK-NEXT: [[R:%.*]] = tail call <vscale x 4 x i32> @llvm.aarch64.sve.uhsub.nxv4i32(<vscale x 4 x i1> zeroinitializer, <vscale x 4 x i32> [[A]], <vscale x 4 x i32> [[B]]) -; CHECK-NEXT: ret <vscale x 4 x i32> [[R]] +; CHECK-NEXT: ret <vscale x 4 x i32> [[A]] ; %r = tail call <vscale x 4 x i32> @llvm.aarch64.sve.uhsub.nxv4i32(<vscale x 4 x i1> zeroinitializer, <vscale x 4 x i32> %a, <vscale x 4 x i32> %b) ret <vscale x 4 x i32> %r @@ -1028,8 +1025,7 @@ declare <vscale x 4 x i32> @llvm.aarch64.sve.uhsubr.nxv4i32(<vscale x 4 x i1>, < define <vscale x 4 x i32> @simplify_uhsubr_intrinsic(<vscale x 4 x i32> %a, <vscale x 4 x i32> %b) #0 { ; CHECK-LABEL: define <vscale x 4 x i32> @simplify_uhsubr_intrinsic ; CHECK-SAME: (<vscale x 4 x i32> [[A:%.*]], <vscale x 4 x i32> [[B:%.*]]) #[[ATTR1]] { -; CHECK-NEXT: [[R:%.*]] = tail call <vscale x 4 x i32> @llvm.aarch64.sve.uhsubr.nxv4i32(<vscale x 4 x i1> zeroinitializer, <vscale x 4 x i32> [[A]], <vscale x 4 x i32> [[B]]) -; CHECK-NEXT: ret <vscale x 4 x i32> [[R]] +; CHECK-NEXT: ret <vscale x 4 x i32> [[A]] ; %r = tail call <vscale x 4 x i32> @llvm.aarch64.sve.uhsubr.nxv4i32(<vscale x 4 x i1> zeroinitializer, <vscale x 4 x i32> %a, <vscale x 4 x i32> %b) ret <vscale x 4 x i32> %r diff --git a/llvm/test/Transforms/InstCombine/AArch64/sve-intrinsic-simplify-to-u-form.ll b/llvm/test/Transforms/InstCombine/AArch64/sve-intrinsic-simplify-to-u-form.ll index 96ac0efde8764..097ed87279eda 100644 --- a/llvm/test/Transforms/InstCombine/AArch64/sve-intrinsic-simplify-to-u-form.ll +++ b/llvm/test/Transforms/InstCombine/AArch64/sve-intrinsic-simplify-to-u-form.ll @@ -324,6 +324,28 @@ define <vscale x 4 x i32> @replace_sdiv_intrinsic_i32(<vscale x 4 x i32> %a, <vs ret <vscale x 4 x i32> %r } +declare <vscale x 4 x i32> @llvm.aarch64.sve.shsub.nxv4i32(<vscale x 4 x i1>, <vscale x 4 x i32>, <vscale x 4 x i32>) +define <vscale x 4 x i32> @replace_shsub_intrinsic_i32(<vscale x 4 x i32> %a, <vscale x 4 x i32> %b) #0 { +; CHECK-LABEL: define <vscale x 4 x i32> @replace_shsub_intrinsic_i32 +; CHECK-SAME: (<vscale x 4 x i32> [[A:%.*]], <vscale x 4 x i32> [[B:%.*]]) #[[ATTR1]] { +; CHECK-NEXT: [[R:%.*]] = tail call <vscale x 4 x i32> @llvm.aarch64.sve.shsub.u.nxv4i32(<vscale x 4 x i1> splat (i1 true), <vscale x 4 x i32> [[A]], <vscale x 4 x i32> [[B]]) +; CHECK-NEXT: ret <vscale x 4 x i32> [[R]] +; + %r = tail call <vscale x 4 x i32> @llvm.aarch64.sve.shsub.nxv4i32(<vscale x 4 x i1> splat (i1 true), <vscale x 4 x i32> %a, <vscale x 4 x i32> %b) + ret <vscale x 4 x i32> %r +} + +declare <vscale x 4 x i32> @llvm.aarch64.sve.shsubr.nxv4i32(<vscale x 4 x i1>, <vscale x 4 x i32>, <vscale x 4 x i32>) +define <vscale x 4 x i32> @replace_shsubr_intrinsic_i32(<vscale x 4 x i32> %a, <vscale x 4 x i32> %b) #0 { +; CHECK-LABEL: define <vscale x 4 x i32> @replace_shsubr_intrinsic_i32 +; CHECK-SAME: (<vscale x 4 x i32> [[A:%.*]], <vscale x 4 x i32> [[B:%.*]]) #[[ATTR1]] { +; CHECK-NEXT: [[R:%.*]] = tail call <vscale x 4 x i32> @llvm.aarch64.sve.shsubr.nxv4i32(<vscale x 4 x i1> splat (i1 true), <vscale x 4 x i32> [[A]], <vscale x 4 x i32> [[B]]) +; CHECK-NEXT: ret <vscale x 4 x i32> [[R]] +; + %r = tail call <vscale x 4 x i32> @llvm.aarch64.sve.shsubr.nxv4i32(<vscale x 4 x i1> splat (i1 true), <vscale x 4 x i32> %a, <vscale x 4 x i32> %b) + ret <vscale x 4 x i32> %r +} + declare <vscale x 4 x i32> @llvm.aarch64.sve.smax.nxv4i32(<vscale x 4 x i1>, <vscale x 4 x i32>, <vscale x 4 x i32>) define <vscale x 4 x i32> @replace_smax_intrinsic_i32(<vscale x 4 x i32> %a, <vscale x 4 x i32> %b) #0 { ; CHECK-LABEL: define <vscale x 4 x i32> @replace_smax_intrinsic_i32 @@ -431,6 +453,28 @@ define <vscale x 4 x i32> @replace_udiv_intrinsic_i32(<vscale x 4 x i32> %a, <vs ret <vscale x 4 x i32> %r } +declare <vscale x 4 x i32> @llvm.aarch64.sve.uhsub.nxv4i32(<vscale x 4 x i1>, <vscale x 4 x i32>, <vscale x 4 x i32>) +define <vscale x 4 x i32> @replace_uhsub_intrinsic_i32(<vscale x 4 x i32> %a, <vscale x 4 x i32> %b) #0 { +; CHECK-LABEL: define <vscale x 4 x i32> @replace_uhsub_intrinsic_i32 +; CHECK-SAME: (<vscale x 4 x i32> [[A:%.*]], <vscale x 4 x i32> [[B:%.*]]) #[[ATTR1]] { +; CHECK-NEXT: [[R:%.*]] = tail call <vscale x 4 x i32> @llvm.aarch64.sve.uhsub.u.nxv4i32(<vscale x 4 x i1> splat (i1 true), <vscale x 4 x i32> [[A]], <vscale x 4 x i32> [[B]]) +; CHECK-NEXT: ret <vscale x 4 x i32> [[R]] +; + %r = tail call <vscale x 4 x i32> @llvm.aarch64.sve.uhsub.nxv4i32(<vscale x 4 x i1> splat (i1 true), <vscale x 4 x i32> %a, <vscale x 4 x i32> %b) + ret <vscale x 4 x i32> %r +} + +declare <vscale x 4 x i32> @llvm.aarch64.sve.uhsubr.nxv4i32(<vscale x 4 x i1>, <vscale x 4 x i32>, <vscale x 4 x i32>) +define <vscale x 4 x i32> @replace_uhsubr_intrinsic_i32(<vscale x 4 x i32> %a, <vscale x 4 x i32> %b) #0 { +; CHECK-LABEL: define <vscale x 4 x i32> @replace_uhsubr_intrinsic_i32 +; CHECK-SAME: (<vscale x 4 x i32> [[A:%.*]], <vscale x 4 x i32> [[B:%.*]]) #[[ATTR1]] { +; CHECK-NEXT: [[R:%.*]] = tail call <vscale x 4 x i32> @llvm.aarch64.sve.uhsubr.nxv4i32(<vscale x 4 x i1> splat (i1 true), <vscale x 4 x i32> [[A]], <vscale x 4 x i32> [[B]]) +; CHECK-NEXT: ret <vscale x 4 x i32> [[R]] +; + %r = tail call <vscale x 4 x i32> @llvm.aarch64.sve.uhsubr.nxv4i32(<vscale x 4 x i1> splat (i1 true), <vscale x 4 x i32> %a, <vscale x 4 x i32> %b) + ret <vscale x 4 x i32> %r +} + declare <vscale x 4 x i32> @llvm.aarch64.sve.umax.nxv4i32(<vscale x 4 x i1>, <vscale x 4 x i32>, <vscale x 4 x i32>) define <vscale x 4 x i32> @replace_umax_intrinsic_i32(<vscale x 4 x i32> %a, <vscale x 4 x i32> %b) #0 { ; CHECK-LABEL: define <vscale x 4 x i32> @replace_umax_intrinsic_i32 _______________________________________________ cfe-commits mailing list [email protected] https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits
