https://github.com/E00N777 updated https://github.com/llvm/llvm-project/pull/197651
>From 60e9e65a4444882097d6283586380a7c0bbc9789 Mon Sep 17 00:00:00 2001 From: E0N777 <[email protected]> Date: Thu, 14 May 2026 18:47:00 +0800 Subject: [PATCH] [CIR][AArch64] Lower NEON vtrn intrinsics --- .../lib/CIR/CodeGen/CIRGenBuiltinAArch64.cpp | 24 +- .../fp8-intrinsics/acle_neon_fp8_untyped.c | 36 -- clang/test/CodeGen/AArch64/neon-perm.c | 383 ------------------ clang/test/CodeGen/AArch64/neon/perm.c | 372 +++++++++++++++++ 4 files changed, 394 insertions(+), 421 deletions(-) delete mode 100644 clang/test/CodeGen/AArch64/neon-perm.c diff --git a/clang/lib/CIR/CodeGen/CIRGenBuiltinAArch64.cpp b/clang/lib/CIR/CodeGen/CIRGenBuiltinAArch64.cpp index c142b69f6be6e..18ec9e24722bd 100644 --- a/clang/lib/CIR/CodeGen/CIRGenBuiltinAArch64.cpp +++ b/clang/lib/CIR/CodeGen/CIRGenBuiltinAArch64.cpp @@ -2904,12 +2904,32 @@ CIRGenFunction::emitAArch64BuiltinExpr(unsigned builtinID, const CallExpr *expr, case NEON::BI__builtin_neon_vst4q_v: case NEON::BI__builtin_neon_vst4_lane_v: case NEON::BI__builtin_neon_vst4q_lane_v: - case NEON::BI__builtin_neon_vtrn_v: - case NEON::BI__builtin_neon_vtrnq_v: cgm.errorNYI(expr->getSourceRange(), std::string("unimplemented AArch64 builtin call: ") + getContext().BuiltinInfo.getName(builtinID)); return mlir::Value{}; + case NEON::BI__builtin_neon_vtrn_v: + case NEON::BI__builtin_neon_vtrnq_v: { + ops[1] = builder.createBitcast(ops[1], ty); + ops[2] = builder.createBitcast(ops[2], ty); + // Adding a bitcast here as Ops[0] might be a void pointer. + mlir::Value baseAddr = + builder.createBitcast(ops[0], builder.getPointerTo(ty)); + mlir::Value sv; + + for (unsigned vi = 0; vi != 2; ++vi) { + llvm::SmallVector<int64_t, 16> indices; + for (unsigned i = 0, e = ty.getSize(); i != e; i += 2) { + indices.push_back(i + vi); + indices.push_back(i + e + vi); + } + cir::ConstantOp idx = builder.getConstInt(loc, builder.getSInt32Ty(), vi); + mlir::Value addr = builder.createPtrStride(loc, baseAddr, idx); + sv = builder.createVecShuffle(loc, ops[1], ops[2], indices); + (void)builder.CIRBaseBuilderTy::createStore(loc, sv, addr); + } + return sv; + } case NEON::BI__builtin_neon_vuzp_v: case NEON::BI__builtin_neon_vuzpq_v: { ops[1] = builder.createBitcast(ops[1], ty); diff --git a/clang/test/CodeGen/AArch64/fp8-intrinsics/acle_neon_fp8_untyped.c b/clang/test/CodeGen/AArch64/fp8-intrinsics/acle_neon_fp8_untyped.c index 40635342b8949..16543e41fcccc 100644 --- a/clang/test/CodeGen/AArch64/fp8-intrinsics/acle_neon_fp8_untyped.c +++ b/clang/test/CodeGen/AArch64/fp8-intrinsics/acle_neon_fp8_untyped.c @@ -483,42 +483,6 @@ mfloat8x16_t test_vrev16q_mf8(mfloat8x16_t a) { return vrev16q_mf8(a); } -// CHECK-LABEL: define dso_local %struct.mfloat8x8x2_t @test_vtrn_mf8( -// CHECK-SAME: <8 x i8> [[A:%.*]], <8 x i8> [[B:%.*]]) #[[ATTR0]] { -// CHECK-NEXT: [[ENTRY:.*:]] -// CHECK-NEXT: [[VTRN_I:%.*]] = shufflevector <8 x i8> [[A]], <8 x i8> [[B]], <8 x i32> <i32 0, i32 8, i32 2, i32 10, i32 4, i32 12, i32 6, i32 14> -// CHECK-NEXT: [[VTRN1_I:%.*]] = shufflevector <8 x i8> [[A]], <8 x i8> [[B]], <8 x i32> <i32 1, i32 9, i32 3, i32 11, i32 5, i32 13, i32 7, i32 15> -// CHECK-NEXT: [[DOTFCA_0_0_INSERT1:%.*]] = insertvalue [[STRUCT_MFLOAT8X8X2_T:%.*]] poison, <8 x i8> [[VTRN_I]], 0, 0 -// CHECK-NEXT: [[DOTFCA_0_1_INSERT2:%.*]] = insertvalue [[STRUCT_MFLOAT8X8X2_T]] [[DOTFCA_0_0_INSERT1]], <8 x i8> [[VTRN1_I]], 0, 1 -// CHECK-NEXT: [[TMP0:%.*]] = extractvalue [[STRUCT_MFLOAT8X8X2_T]] [[DOTFCA_0_1_INSERT2]], 0 -// CHECK-NEXT: [[DOTFCA_0_EXTRACT:%.*]] = extractvalue [2 x <8 x i8>] [[TMP0]], 0 -// CHECK-NEXT: [[DOTFCA_1_EXTRACT:%.*]] = extractvalue [2 x <8 x i8>] [[TMP0]], 1 -// CHECK-NEXT: [[DOTFCA_0_0_INSERT:%.*]] = insertvalue [[STRUCT_MFLOAT8X8X2_T]] poison, <8 x i8> [[DOTFCA_0_EXTRACT]], 0, 0 -// CHECK-NEXT: [[DOTFCA_0_1_INSERT:%.*]] = insertvalue [[STRUCT_MFLOAT8X8X2_T]] [[DOTFCA_0_0_INSERT]], <8 x i8> [[DOTFCA_1_EXTRACT]], 0, 1 -// CHECK-NEXT: ret [[STRUCT_MFLOAT8X8X2_T]] [[DOTFCA_0_1_INSERT]] -// -mfloat8x8x2_t test_vtrn_mf8(mfloat8x8_t a, mfloat8x8_t b) { - return vtrn_mf8(a, b); -} - -// CHECK-LABEL: define dso_local %struct.mfloat8x16x2_t @test_vtrnq_mf8( -// CHECK-SAME: <16 x i8> [[A:%.*]], <16 x i8> [[B:%.*]]) #[[ATTR0]] { -// CHECK-NEXT: [[ENTRY:.*:]] -// CHECK-NEXT: [[VTRN_I:%.*]] = shufflevector <16 x i8> [[A]], <16 x i8> [[B]], <16 x i32> <i32 0, i32 16, i32 2, i32 18, i32 4, i32 20, i32 6, i32 22, i32 8, i32 24, i32 10, i32 26, i32 12, i32 28, i32 14, i32 30> -// CHECK-NEXT: [[VTRN1_I:%.*]] = shufflevector <16 x i8> [[A]], <16 x i8> [[B]], <16 x i32> <i32 1, i32 17, i32 3, i32 19, i32 5, i32 21, i32 7, i32 23, i32 9, i32 25, i32 11, i32 27, i32 13, i32 29, i32 15, i32 31> -// CHECK-NEXT: [[DOTFCA_0_0_INSERT1:%.*]] = insertvalue [[STRUCT_MFLOAT8X16X2_T:%.*]] poison, <16 x i8> [[VTRN_I]], 0, 0 -// CHECK-NEXT: [[DOTFCA_0_1_INSERT2:%.*]] = insertvalue [[STRUCT_MFLOAT8X16X2_T]] [[DOTFCA_0_0_INSERT1]], <16 x i8> [[VTRN1_I]], 0, 1 -// CHECK-NEXT: [[TMP0:%.*]] = extractvalue [[STRUCT_MFLOAT8X16X2_T]] [[DOTFCA_0_1_INSERT2]], 0 -// CHECK-NEXT: [[DOTFCA_0_EXTRACT:%.*]] = extractvalue [2 x <16 x i8>] [[TMP0]], 0 -// CHECK-NEXT: [[DOTFCA_1_EXTRACT:%.*]] = extractvalue [2 x <16 x i8>] [[TMP0]], 1 -// CHECK-NEXT: [[DOTFCA_0_0_INSERT:%.*]] = insertvalue [[STRUCT_MFLOAT8X16X2_T]] poison, <16 x i8> [[DOTFCA_0_EXTRACT]], 0, 0 -// CHECK-NEXT: [[DOTFCA_0_1_INSERT:%.*]] = insertvalue [[STRUCT_MFLOAT8X16X2_T]] [[DOTFCA_0_0_INSERT]], <16 x i8> [[DOTFCA_1_EXTRACT]], 0, 1 -// CHECK-NEXT: ret [[STRUCT_MFLOAT8X16X2_T]] [[DOTFCA_0_1_INSERT]] -// -mfloat8x16x2_t test_vtrnq_mf8(mfloat8x16_t a, mfloat8x16_t b) { - return vtrnq_mf8(a, b); -} - // CHECK-LABEL: define dso_local void @test_vcopy_lane_mf8( // CHECK-SAME: <8 x i8> [[ARG_I8X8:%.*]]) #[[ATTR0]] { // CHECK-NEXT: [[ENTRY:.*:]] diff --git a/clang/test/CodeGen/AArch64/neon-perm.c b/clang/test/CodeGen/AArch64/neon-perm.c deleted file mode 100644 index df8b526e47a1a..0000000000000 --- a/clang/test/CodeGen/AArch64/neon-perm.c +++ /dev/null @@ -1,383 +0,0 @@ -// NOTE: Assertions have been autogenerated by utils/update_cc_test_checks.py UTC_ARGS: --version 5 -// RUN: %clang_cc1 -triple arm64-none-linux-gnu -target-feature +neon \ -// RUN: -disable-O0-optnone -emit-llvm -o - %s | opt -S -passes=mem2reg,sroa | FileCheck %s - -// REQUIRES: aarch64-registered-target || arm-registered-target - -#include <arm_neon.h> - -// CHECK-LABEL: define dso_local %struct.int8x8x2_t @test_vtrn_s8( -// CHECK-SAME: <8 x i8> noundef [[A:%.*]], <8 x i8> noundef [[B:%.*]]) #[[ATTR0:[0-9]+]] { -// CHECK-NEXT: [[ENTRY:.*:]] -// CHECK-NEXT: [[VTRN_I:%.*]] = shufflevector <8 x i8> [[A]], <8 x i8> [[B]], <8 x i32> <i32 0, i32 8, i32 2, i32 10, i32 4, i32 12, i32 6, i32 14> -// CHECK-NEXT: [[VTRN1_I:%.*]] = shufflevector <8 x i8> [[A]], <8 x i8> [[B]], <8 x i32> <i32 1, i32 9, i32 3, i32 11, i32 5, i32 13, i32 7, i32 15> -// CHECK-NEXT: [[DOTFCA_0_0_INSERT1:%.*]] = insertvalue [[STRUCT_INT8X8X2_T:%.*]] poison, <8 x i8> [[VTRN_I]], 0, 0 -// CHECK-NEXT: [[DOTFCA_0_1_INSERT2:%.*]] = insertvalue [[STRUCT_INT8X8X2_T]] [[DOTFCA_0_0_INSERT1]], <8 x i8> [[VTRN1_I]], 0, 1 -// CHECK-NEXT: [[TMP0:%.*]] = extractvalue [[STRUCT_INT8X8X2_T]] [[DOTFCA_0_1_INSERT2]], 0 -// CHECK-NEXT: [[DOTFCA_0_EXTRACT:%.*]] = extractvalue [2 x <8 x i8>] [[TMP0]], 0 -// CHECK-NEXT: [[DOTFCA_1_EXTRACT:%.*]] = extractvalue [2 x <8 x i8>] [[TMP0]], 1 -// CHECK-NEXT: [[DOTFCA_0_0_INSERT:%.*]] = insertvalue [[STRUCT_INT8X8X2_T]] poison, <8 x i8> [[DOTFCA_0_EXTRACT]], 0, 0 -// CHECK-NEXT: [[DOTFCA_0_1_INSERT:%.*]] = insertvalue [[STRUCT_INT8X8X2_T]] [[DOTFCA_0_0_INSERT]], <8 x i8> [[DOTFCA_1_EXTRACT]], 0, 1 -// CHECK-NEXT: ret [[STRUCT_INT8X8X2_T]] [[DOTFCA_0_1_INSERT]] -// -int8x8x2_t test_vtrn_s8(int8x8_t a, int8x8_t b) { - return vtrn_s8(a, b); -} - -// CHECK-LABEL: define dso_local %struct.int16x4x2_t @test_vtrn_s16( -// CHECK-SAME: <4 x i16> noundef [[A:%.*]], <4 x i16> noundef [[B:%.*]]) #[[ATTR0]] { -// CHECK-NEXT: [[ENTRY:.*:]] -// CHECK-NEXT: [[TMP0:%.*]] = bitcast <4 x i16> [[A]] to <8 x i8> -// CHECK-NEXT: [[TMP1:%.*]] = bitcast <4 x i16> [[B]] to <8 x i8> -// CHECK-NEXT: [[TMP2:%.*]] = bitcast <8 x i8> [[TMP0]] to <4 x i16> -// CHECK-NEXT: [[TMP3:%.*]] = bitcast <8 x i8> [[TMP1]] to <4 x i16> -// CHECK-NEXT: [[VTRN_I:%.*]] = shufflevector <4 x i16> [[TMP2]], <4 x i16> [[TMP3]], <4 x i32> <i32 0, i32 4, i32 2, i32 6> -// CHECK-NEXT: [[VTRN1_I:%.*]] = shufflevector <4 x i16> [[TMP2]], <4 x i16> [[TMP3]], <4 x i32> <i32 1, i32 5, i32 3, i32 7> -// CHECK-NEXT: [[DOTFCA_0_0_INSERT1:%.*]] = insertvalue [[STRUCT_INT16X4X2_T:%.*]] poison, <4 x i16> [[VTRN_I]], 0, 0 -// CHECK-NEXT: [[DOTFCA_0_1_INSERT2:%.*]] = insertvalue [[STRUCT_INT16X4X2_T]] [[DOTFCA_0_0_INSERT1]], <4 x i16> [[VTRN1_I]], 0, 1 -// CHECK-NEXT: [[TMP4:%.*]] = extractvalue [[STRUCT_INT16X4X2_T]] [[DOTFCA_0_1_INSERT2]], 0 -// CHECK-NEXT: [[DOTFCA_0_EXTRACT:%.*]] = extractvalue [2 x <4 x i16>] [[TMP4]], 0 -// CHECK-NEXT: [[DOTFCA_1_EXTRACT:%.*]] = extractvalue [2 x <4 x i16>] [[TMP4]], 1 -// CHECK-NEXT: [[DOTFCA_0_0_INSERT:%.*]] = insertvalue [[STRUCT_INT16X4X2_T]] poison, <4 x i16> [[DOTFCA_0_EXTRACT]], 0, 0 -// CHECK-NEXT: [[DOTFCA_0_1_INSERT:%.*]] = insertvalue [[STRUCT_INT16X4X2_T]] [[DOTFCA_0_0_INSERT]], <4 x i16> [[DOTFCA_1_EXTRACT]], 0, 1 -// CHECK-NEXT: ret [[STRUCT_INT16X4X2_T]] [[DOTFCA_0_1_INSERT]] -// -int16x4x2_t test_vtrn_s16(int16x4_t a, int16x4_t b) { - return vtrn_s16(a, b); -} - -// CHECK-LABEL: define dso_local %struct.int32x2x2_t @test_vtrn_s32( -// CHECK-SAME: <2 x i32> noundef [[A:%.*]], <2 x i32> noundef [[B:%.*]]) #[[ATTR0]] { -// CHECK-NEXT: [[ENTRY:.*:]] -// CHECK-NEXT: [[TMP0:%.*]] = bitcast <2 x i32> [[A]] to <8 x i8> -// CHECK-NEXT: [[TMP1:%.*]] = bitcast <2 x i32> [[B]] to <8 x i8> -// CHECK-NEXT: [[TMP2:%.*]] = bitcast <8 x i8> [[TMP0]] to <2 x i32> -// CHECK-NEXT: [[TMP3:%.*]] = bitcast <8 x i8> [[TMP1]] to <2 x i32> -// CHECK-NEXT: [[VTRN_I:%.*]] = shufflevector <2 x i32> [[TMP2]], <2 x i32> [[TMP3]], <2 x i32> <i32 0, i32 2> -// CHECK-NEXT: [[VTRN1_I:%.*]] = shufflevector <2 x i32> [[TMP2]], <2 x i32> [[TMP3]], <2 x i32> <i32 1, i32 3> -// CHECK-NEXT: [[DOTFCA_0_0_INSERT1:%.*]] = insertvalue [[STRUCT_INT32X2X2_T:%.*]] poison, <2 x i32> [[VTRN_I]], 0, 0 -// CHECK-NEXT: [[DOTFCA_0_1_INSERT2:%.*]] = insertvalue [[STRUCT_INT32X2X2_T]] [[DOTFCA_0_0_INSERT1]], <2 x i32> [[VTRN1_I]], 0, 1 -// CHECK-NEXT: [[TMP4:%.*]] = extractvalue [[STRUCT_INT32X2X2_T]] [[DOTFCA_0_1_INSERT2]], 0 -// CHECK-NEXT: [[DOTFCA_0_EXTRACT:%.*]] = extractvalue [2 x <2 x i32>] [[TMP4]], 0 -// CHECK-NEXT: [[DOTFCA_1_EXTRACT:%.*]] = extractvalue [2 x <2 x i32>] [[TMP4]], 1 -// CHECK-NEXT: [[DOTFCA_0_0_INSERT:%.*]] = insertvalue [[STRUCT_INT32X2X2_T]] poison, <2 x i32> [[DOTFCA_0_EXTRACT]], 0, 0 -// CHECK-NEXT: [[DOTFCA_0_1_INSERT:%.*]] = insertvalue [[STRUCT_INT32X2X2_T]] [[DOTFCA_0_0_INSERT]], <2 x i32> [[DOTFCA_1_EXTRACT]], 0, 1 -// CHECK-NEXT: ret [[STRUCT_INT32X2X2_T]] [[DOTFCA_0_1_INSERT]] -// -int32x2x2_t test_vtrn_s32(int32x2_t a, int32x2_t b) { - return vtrn_s32(a, b); -} - -// CHECK-LABEL: define dso_local %struct.uint8x8x2_t @test_vtrn_u8( -// CHECK-SAME: <8 x i8> noundef [[A:%.*]], <8 x i8> noundef [[B:%.*]]) #[[ATTR0]] { -// CHECK-NEXT: [[ENTRY:.*:]] -// CHECK-NEXT: [[VTRN_I:%.*]] = shufflevector <8 x i8> [[A]], <8 x i8> [[B]], <8 x i32> <i32 0, i32 8, i32 2, i32 10, i32 4, i32 12, i32 6, i32 14> -// CHECK-NEXT: [[VTRN1_I:%.*]] = shufflevector <8 x i8> [[A]], <8 x i8> [[B]], <8 x i32> <i32 1, i32 9, i32 3, i32 11, i32 5, i32 13, i32 7, i32 15> -// CHECK-NEXT: [[DOTFCA_0_0_INSERT1:%.*]] = insertvalue [[STRUCT_UINT8X8X2_T:%.*]] poison, <8 x i8> [[VTRN_I]], 0, 0 -// CHECK-NEXT: [[DOTFCA_0_1_INSERT2:%.*]] = insertvalue [[STRUCT_UINT8X8X2_T]] [[DOTFCA_0_0_INSERT1]], <8 x i8> [[VTRN1_I]], 0, 1 -// CHECK-NEXT: [[TMP0:%.*]] = extractvalue [[STRUCT_UINT8X8X2_T]] [[DOTFCA_0_1_INSERT2]], 0 -// CHECK-NEXT: [[DOTFCA_0_EXTRACT:%.*]] = extractvalue [2 x <8 x i8>] [[TMP0]], 0 -// CHECK-NEXT: [[DOTFCA_1_EXTRACT:%.*]] = extractvalue [2 x <8 x i8>] [[TMP0]], 1 -// CHECK-NEXT: [[DOTFCA_0_0_INSERT:%.*]] = insertvalue [[STRUCT_UINT8X8X2_T]] poison, <8 x i8> [[DOTFCA_0_EXTRACT]], 0, 0 -// CHECK-NEXT: [[DOTFCA_0_1_INSERT:%.*]] = insertvalue [[STRUCT_UINT8X8X2_T]] [[DOTFCA_0_0_INSERT]], <8 x i8> [[DOTFCA_1_EXTRACT]], 0, 1 -// CHECK-NEXT: ret [[STRUCT_UINT8X8X2_T]] [[DOTFCA_0_1_INSERT]] -// -uint8x8x2_t test_vtrn_u8(uint8x8_t a, uint8x8_t b) { - return vtrn_u8(a, b); -} - -// CHECK-LABEL: define dso_local %struct.uint16x4x2_t @test_vtrn_u16( -// CHECK-SAME: <4 x i16> noundef [[A:%.*]], <4 x i16> noundef [[B:%.*]]) #[[ATTR0]] { -// CHECK-NEXT: [[ENTRY:.*:]] -// CHECK-NEXT: [[TMP0:%.*]] = bitcast <4 x i16> [[A]] to <8 x i8> -// CHECK-NEXT: [[TMP1:%.*]] = bitcast <4 x i16> [[B]] to <8 x i8> -// CHECK-NEXT: [[TMP2:%.*]] = bitcast <8 x i8> [[TMP0]] to <4 x i16> -// CHECK-NEXT: [[TMP3:%.*]] = bitcast <8 x i8> [[TMP1]] to <4 x i16> -// CHECK-NEXT: [[VTRN_I:%.*]] = shufflevector <4 x i16> [[TMP2]], <4 x i16> [[TMP3]], <4 x i32> <i32 0, i32 4, i32 2, i32 6> -// CHECK-NEXT: [[VTRN1_I:%.*]] = shufflevector <4 x i16> [[TMP2]], <4 x i16> [[TMP3]], <4 x i32> <i32 1, i32 5, i32 3, i32 7> -// CHECK-NEXT: [[DOTFCA_0_0_INSERT1:%.*]] = insertvalue [[STRUCT_UINT16X4X2_T:%.*]] poison, <4 x i16> [[VTRN_I]], 0, 0 -// CHECK-NEXT: [[DOTFCA_0_1_INSERT2:%.*]] = insertvalue [[STRUCT_UINT16X4X2_T]] [[DOTFCA_0_0_INSERT1]], <4 x i16> [[VTRN1_I]], 0, 1 -// CHECK-NEXT: [[TMP4:%.*]] = extractvalue [[STRUCT_UINT16X4X2_T]] [[DOTFCA_0_1_INSERT2]], 0 -// CHECK-NEXT: [[DOTFCA_0_EXTRACT:%.*]] = extractvalue [2 x <4 x i16>] [[TMP4]], 0 -// CHECK-NEXT: [[DOTFCA_1_EXTRACT:%.*]] = extractvalue [2 x <4 x i16>] [[TMP4]], 1 -// CHECK-NEXT: [[DOTFCA_0_0_INSERT:%.*]] = insertvalue [[STRUCT_UINT16X4X2_T]] poison, <4 x i16> [[DOTFCA_0_EXTRACT]], 0, 0 -// CHECK-NEXT: [[DOTFCA_0_1_INSERT:%.*]] = insertvalue [[STRUCT_UINT16X4X2_T]] [[DOTFCA_0_0_INSERT]], <4 x i16> [[DOTFCA_1_EXTRACT]], 0, 1 -// CHECK-NEXT: ret [[STRUCT_UINT16X4X2_T]] [[DOTFCA_0_1_INSERT]] -// -uint16x4x2_t test_vtrn_u16(uint16x4_t a, uint16x4_t b) { - return vtrn_u16(a, b); -} - -// CHECK-LABEL: define dso_local %struct.uint32x2x2_t @test_vtrn_u32( -// CHECK-SAME: <2 x i32> noundef [[A:%.*]], <2 x i32> noundef [[B:%.*]]) #[[ATTR0]] { -// CHECK-NEXT: [[ENTRY:.*:]] -// CHECK-NEXT: [[TMP0:%.*]] = bitcast <2 x i32> [[A]] to <8 x i8> -// CHECK-NEXT: [[TMP1:%.*]] = bitcast <2 x i32> [[B]] to <8 x i8> -// CHECK-NEXT: [[TMP2:%.*]] = bitcast <8 x i8> [[TMP0]] to <2 x i32> -// CHECK-NEXT: [[TMP3:%.*]] = bitcast <8 x i8> [[TMP1]] to <2 x i32> -// CHECK-NEXT: [[VTRN_I:%.*]] = shufflevector <2 x i32> [[TMP2]], <2 x i32> [[TMP3]], <2 x i32> <i32 0, i32 2> -// CHECK-NEXT: [[VTRN1_I:%.*]] = shufflevector <2 x i32> [[TMP2]], <2 x i32> [[TMP3]], <2 x i32> <i32 1, i32 3> -// CHECK-NEXT: [[DOTFCA_0_0_INSERT1:%.*]] = insertvalue [[STRUCT_UINT32X2X2_T:%.*]] poison, <2 x i32> [[VTRN_I]], 0, 0 -// CHECK-NEXT: [[DOTFCA_0_1_INSERT2:%.*]] = insertvalue [[STRUCT_UINT32X2X2_T]] [[DOTFCA_0_0_INSERT1]], <2 x i32> [[VTRN1_I]], 0, 1 -// CHECK-NEXT: [[TMP4:%.*]] = extractvalue [[STRUCT_UINT32X2X2_T]] [[DOTFCA_0_1_INSERT2]], 0 -// CHECK-NEXT: [[DOTFCA_0_EXTRACT:%.*]] = extractvalue [2 x <2 x i32>] [[TMP4]], 0 -// CHECK-NEXT: [[DOTFCA_1_EXTRACT:%.*]] = extractvalue [2 x <2 x i32>] [[TMP4]], 1 -// CHECK-NEXT: [[DOTFCA_0_0_INSERT:%.*]] = insertvalue [[STRUCT_UINT32X2X2_T]] poison, <2 x i32> [[DOTFCA_0_EXTRACT]], 0, 0 -// CHECK-NEXT: [[DOTFCA_0_1_INSERT:%.*]] = insertvalue [[STRUCT_UINT32X2X2_T]] [[DOTFCA_0_0_INSERT]], <2 x i32> [[DOTFCA_1_EXTRACT]], 0, 1 -// CHECK-NEXT: ret [[STRUCT_UINT32X2X2_T]] [[DOTFCA_0_1_INSERT]] -// -uint32x2x2_t test_vtrn_u32(uint32x2_t a, uint32x2_t b) { - return vtrn_u32(a, b); -} - -// CHECK-LABEL: define dso_local %struct.float32x2x2_t @test_vtrn_f32( -// CHECK-SAME: <2 x float> noundef [[A:%.*]], <2 x float> noundef [[B:%.*]]) #[[ATTR0]] { -// CHECK-NEXT: [[ENTRY:.*:]] -// CHECK-NEXT: [[TMP0:%.*]] = bitcast <2 x float> [[A]] to <2 x i32> -// CHECK-NEXT: [[TMP1:%.*]] = bitcast <2 x float> [[B]] to <2 x i32> -// CHECK-NEXT: [[TMP2:%.*]] = bitcast <2 x i32> [[TMP0]] to <8 x i8> -// CHECK-NEXT: [[TMP3:%.*]] = bitcast <2 x i32> [[TMP1]] to <8 x i8> -// CHECK-NEXT: [[TMP4:%.*]] = bitcast <8 x i8> [[TMP2]] to <2 x float> -// CHECK-NEXT: [[TMP5:%.*]] = bitcast <8 x i8> [[TMP3]] to <2 x float> -// CHECK-NEXT: [[VTRN_I:%.*]] = shufflevector <2 x float> [[TMP4]], <2 x float> [[TMP5]], <2 x i32> <i32 0, i32 2> -// CHECK-NEXT: [[VTRN1_I:%.*]] = shufflevector <2 x float> [[TMP4]], <2 x float> [[TMP5]], <2 x i32> <i32 1, i32 3> -// CHECK-NEXT: [[DOTFCA_0_0_INSERT3:%.*]] = insertvalue [[STRUCT_FLOAT32X2X2_T:%.*]] poison, <2 x float> [[VTRN_I]], 0, 0 -// CHECK-NEXT: [[DOTFCA_0_1_INSERT4:%.*]] = insertvalue [[STRUCT_FLOAT32X2X2_T]] [[DOTFCA_0_0_INSERT3]], <2 x float> [[VTRN1_I]], 0, 1 -// CHECK-NEXT: [[TMP6:%.*]] = extractvalue [[STRUCT_FLOAT32X2X2_T]] [[DOTFCA_0_1_INSERT4]], 0 -// CHECK-NEXT: [[DOTFCA_0_EXTRACT:%.*]] = extractvalue [2 x <2 x float>] [[TMP6]], 0 -// CHECK-NEXT: [[DOTFCA_1_EXTRACT:%.*]] = extractvalue [2 x <2 x float>] [[TMP6]], 1 -// CHECK-NEXT: [[DOTFCA_0_0_INSERT:%.*]] = insertvalue [[STRUCT_FLOAT32X2X2_T]] poison, <2 x float> [[DOTFCA_0_EXTRACT]], 0, 0 -// CHECK-NEXT: [[DOTFCA_0_1_INSERT:%.*]] = insertvalue [[STRUCT_FLOAT32X2X2_T]] [[DOTFCA_0_0_INSERT]], <2 x float> [[DOTFCA_1_EXTRACT]], 0, 1 -// CHECK-NEXT: ret [[STRUCT_FLOAT32X2X2_T]] [[DOTFCA_0_1_INSERT]] -// -float32x2x2_t test_vtrn_f32(float32x2_t a, float32x2_t b) { - return vtrn_f32(a, b); -} - -// CHECK-LABEL: define dso_local %struct.poly8x8x2_t @test_vtrn_p8( -// CHECK-SAME: <8 x i8> noundef [[A:%.*]], <8 x i8> noundef [[B:%.*]]) #[[ATTR0]] { -// CHECK-NEXT: [[ENTRY:.*:]] -// CHECK-NEXT: [[VTRN_I:%.*]] = shufflevector <8 x i8> [[A]], <8 x i8> [[B]], <8 x i32> <i32 0, i32 8, i32 2, i32 10, i32 4, i32 12, i32 6, i32 14> -// CHECK-NEXT: [[VTRN1_I:%.*]] = shufflevector <8 x i8> [[A]], <8 x i8> [[B]], <8 x i32> <i32 1, i32 9, i32 3, i32 11, i32 5, i32 13, i32 7, i32 15> -// CHECK-NEXT: [[DOTFCA_0_0_INSERT1:%.*]] = insertvalue [[STRUCT_POLY8X8X2_T:%.*]] poison, <8 x i8> [[VTRN_I]], 0, 0 -// CHECK-NEXT: [[DOTFCA_0_1_INSERT2:%.*]] = insertvalue [[STRUCT_POLY8X8X2_T]] [[DOTFCA_0_0_INSERT1]], <8 x i8> [[VTRN1_I]], 0, 1 -// CHECK-NEXT: [[TMP0:%.*]] = extractvalue [[STRUCT_POLY8X8X2_T]] [[DOTFCA_0_1_INSERT2]], 0 -// CHECK-NEXT: [[DOTFCA_0_EXTRACT:%.*]] = extractvalue [2 x <8 x i8>] [[TMP0]], 0 -// CHECK-NEXT: [[DOTFCA_1_EXTRACT:%.*]] = extractvalue [2 x <8 x i8>] [[TMP0]], 1 -// CHECK-NEXT: [[DOTFCA_0_0_INSERT:%.*]] = insertvalue [[STRUCT_POLY8X8X2_T]] poison, <8 x i8> [[DOTFCA_0_EXTRACT]], 0, 0 -// CHECK-NEXT: [[DOTFCA_0_1_INSERT:%.*]] = insertvalue [[STRUCT_POLY8X8X2_T]] [[DOTFCA_0_0_INSERT]], <8 x i8> [[DOTFCA_1_EXTRACT]], 0, 1 -// CHECK-NEXT: ret [[STRUCT_POLY8X8X2_T]] [[DOTFCA_0_1_INSERT]] -// -poly8x8x2_t test_vtrn_p8(poly8x8_t a, poly8x8_t b) { - return vtrn_p8(a, b); -} - -// CHECK-LABEL: define dso_local %struct.poly16x4x2_t @test_vtrn_p16( -// CHECK-SAME: <4 x i16> noundef [[A:%.*]], <4 x i16> noundef [[B:%.*]]) #[[ATTR0]] { -// CHECK-NEXT: [[ENTRY:.*:]] -// CHECK-NEXT: [[TMP0:%.*]] = bitcast <4 x i16> [[A]] to <8 x i8> -// CHECK-NEXT: [[TMP1:%.*]] = bitcast <4 x i16> [[B]] to <8 x i8> -// CHECK-NEXT: [[TMP2:%.*]] = bitcast <8 x i8> [[TMP0]] to <4 x i16> -// CHECK-NEXT: [[TMP3:%.*]] = bitcast <8 x i8> [[TMP1]] to <4 x i16> -// CHECK-NEXT: [[VTRN_I:%.*]] = shufflevector <4 x i16> [[TMP2]], <4 x i16> [[TMP3]], <4 x i32> <i32 0, i32 4, i32 2, i32 6> -// CHECK-NEXT: [[VTRN1_I:%.*]] = shufflevector <4 x i16> [[TMP2]], <4 x i16> [[TMP3]], <4 x i32> <i32 1, i32 5, i32 3, i32 7> -// CHECK-NEXT: [[DOTFCA_0_0_INSERT1:%.*]] = insertvalue [[STRUCT_POLY16X4X2_T:%.*]] poison, <4 x i16> [[VTRN_I]], 0, 0 -// CHECK-NEXT: [[DOTFCA_0_1_INSERT2:%.*]] = insertvalue [[STRUCT_POLY16X4X2_T]] [[DOTFCA_0_0_INSERT1]], <4 x i16> [[VTRN1_I]], 0, 1 -// CHECK-NEXT: [[TMP4:%.*]] = extractvalue [[STRUCT_POLY16X4X2_T]] [[DOTFCA_0_1_INSERT2]], 0 -// CHECK-NEXT: [[DOTFCA_0_EXTRACT:%.*]] = extractvalue [2 x <4 x i16>] [[TMP4]], 0 -// CHECK-NEXT: [[DOTFCA_1_EXTRACT:%.*]] = extractvalue [2 x <4 x i16>] [[TMP4]], 1 -// CHECK-NEXT: [[DOTFCA_0_0_INSERT:%.*]] = insertvalue [[STRUCT_POLY16X4X2_T]] poison, <4 x i16> [[DOTFCA_0_EXTRACT]], 0, 0 -// CHECK-NEXT: [[DOTFCA_0_1_INSERT:%.*]] = insertvalue [[STRUCT_POLY16X4X2_T]] [[DOTFCA_0_0_INSERT]], <4 x i16> [[DOTFCA_1_EXTRACT]], 0, 1 -// CHECK-NEXT: ret [[STRUCT_POLY16X4X2_T]] [[DOTFCA_0_1_INSERT]] -// -poly16x4x2_t test_vtrn_p16(poly16x4_t a, poly16x4_t b) { - return vtrn_p16(a, b); -} - -// CHECK-LABEL: define dso_local %struct.int8x16x2_t @test_vtrnq_s8( -// CHECK-SAME: <16 x i8> noundef [[A:%.*]], <16 x i8> noundef [[B:%.*]]) #[[ATTR0]] { -// CHECK-NEXT: [[ENTRY:.*:]] -// CHECK-NEXT: [[VTRN_I:%.*]] = shufflevector <16 x i8> [[A]], <16 x i8> [[B]], <16 x i32> <i32 0, i32 16, i32 2, i32 18, i32 4, i32 20, i32 6, i32 22, i32 8, i32 24, i32 10, i32 26, i32 12, i32 28, i32 14, i32 30> -// CHECK-NEXT: [[VTRN1_I:%.*]] = shufflevector <16 x i8> [[A]], <16 x i8> [[B]], <16 x i32> <i32 1, i32 17, i32 3, i32 19, i32 5, i32 21, i32 7, i32 23, i32 9, i32 25, i32 11, i32 27, i32 13, i32 29, i32 15, i32 31> -// CHECK-NEXT: [[DOTFCA_0_0_INSERT1:%.*]] = insertvalue [[STRUCT_INT8X16X2_T:%.*]] poison, <16 x i8> [[VTRN_I]], 0, 0 -// CHECK-NEXT: [[DOTFCA_0_1_INSERT2:%.*]] = insertvalue [[STRUCT_INT8X16X2_T]] [[DOTFCA_0_0_INSERT1]], <16 x i8> [[VTRN1_I]], 0, 1 -// CHECK-NEXT: [[TMP0:%.*]] = extractvalue [[STRUCT_INT8X16X2_T]] [[DOTFCA_0_1_INSERT2]], 0 -// CHECK-NEXT: [[DOTFCA_0_EXTRACT:%.*]] = extractvalue [2 x <16 x i8>] [[TMP0]], 0 -// CHECK-NEXT: [[DOTFCA_1_EXTRACT:%.*]] = extractvalue [2 x <16 x i8>] [[TMP0]], 1 -// CHECK-NEXT: [[DOTFCA_0_0_INSERT:%.*]] = insertvalue [[STRUCT_INT8X16X2_T]] poison, <16 x i8> [[DOTFCA_0_EXTRACT]], 0, 0 -// CHECK-NEXT: [[DOTFCA_0_1_INSERT:%.*]] = insertvalue [[STRUCT_INT8X16X2_T]] [[DOTFCA_0_0_INSERT]], <16 x i8> [[DOTFCA_1_EXTRACT]], 0, 1 -// CHECK-NEXT: ret [[STRUCT_INT8X16X2_T]] [[DOTFCA_0_1_INSERT]] -// -int8x16x2_t test_vtrnq_s8(int8x16_t a, int8x16_t b) { - return vtrnq_s8(a, b); -} - -// CHECK-LABEL: define dso_local %struct.int16x8x2_t @test_vtrnq_s16( -// CHECK-SAME: <8 x i16> noundef [[A:%.*]], <8 x i16> noundef [[B:%.*]]) #[[ATTR0]] { -// CHECK-NEXT: [[ENTRY:.*:]] -// CHECK-NEXT: [[TMP0:%.*]] = bitcast <8 x i16> [[A]] to <16 x i8> -// CHECK-NEXT: [[TMP1:%.*]] = bitcast <8 x i16> [[B]] to <16 x i8> -// CHECK-NEXT: [[TMP2:%.*]] = bitcast <16 x i8> [[TMP0]] to <8 x i16> -// CHECK-NEXT: [[TMP3:%.*]] = bitcast <16 x i8> [[TMP1]] to <8 x i16> -// CHECK-NEXT: [[VTRN_I:%.*]] = shufflevector <8 x i16> [[TMP2]], <8 x i16> [[TMP3]], <8 x i32> <i32 0, i32 8, i32 2, i32 10, i32 4, i32 12, i32 6, i32 14> -// CHECK-NEXT: [[VTRN1_I:%.*]] = shufflevector <8 x i16> [[TMP2]], <8 x i16> [[TMP3]], <8 x i32> <i32 1, i32 9, i32 3, i32 11, i32 5, i32 13, i32 7, i32 15> -// CHECK-NEXT: [[DOTFCA_0_0_INSERT1:%.*]] = insertvalue [[STRUCT_INT16X8X2_T:%.*]] poison, <8 x i16> [[VTRN_I]], 0, 0 -// CHECK-NEXT: [[DOTFCA_0_1_INSERT2:%.*]] = insertvalue [[STRUCT_INT16X8X2_T]] [[DOTFCA_0_0_INSERT1]], <8 x i16> [[VTRN1_I]], 0, 1 -// CHECK-NEXT: [[TMP4:%.*]] = extractvalue [[STRUCT_INT16X8X2_T]] [[DOTFCA_0_1_INSERT2]], 0 -// CHECK-NEXT: [[DOTFCA_0_EXTRACT:%.*]] = extractvalue [2 x <8 x i16>] [[TMP4]], 0 -// CHECK-NEXT: [[DOTFCA_1_EXTRACT:%.*]] = extractvalue [2 x <8 x i16>] [[TMP4]], 1 -// CHECK-NEXT: [[DOTFCA_0_0_INSERT:%.*]] = insertvalue [[STRUCT_INT16X8X2_T]] poison, <8 x i16> [[DOTFCA_0_EXTRACT]], 0, 0 -// CHECK-NEXT: [[DOTFCA_0_1_INSERT:%.*]] = insertvalue [[STRUCT_INT16X8X2_T]] [[DOTFCA_0_0_INSERT]], <8 x i16> [[DOTFCA_1_EXTRACT]], 0, 1 -// CHECK-NEXT: ret [[STRUCT_INT16X8X2_T]] [[DOTFCA_0_1_INSERT]] -// -int16x8x2_t test_vtrnq_s16(int16x8_t a, int16x8_t b) { - return vtrnq_s16(a, b); -} - -// CHECK-LABEL: define dso_local %struct.int32x4x2_t @test_vtrnq_s32( -// CHECK-SAME: <4 x i32> noundef [[A:%.*]], <4 x i32> noundef [[B:%.*]]) #[[ATTR0]] { -// CHECK-NEXT: [[ENTRY:.*:]] -// CHECK-NEXT: [[TMP0:%.*]] = bitcast <4 x i32> [[A]] to <16 x i8> -// CHECK-NEXT: [[TMP1:%.*]] = bitcast <4 x i32> [[B]] to <16 x i8> -// CHECK-NEXT: [[TMP2:%.*]] = bitcast <16 x i8> [[TMP0]] to <4 x i32> -// CHECK-NEXT: [[TMP3:%.*]] = bitcast <16 x i8> [[TMP1]] to <4 x i32> -// CHECK-NEXT: [[VTRN_I:%.*]] = shufflevector <4 x i32> [[TMP2]], <4 x i32> [[TMP3]], <4 x i32> <i32 0, i32 4, i32 2, i32 6> -// CHECK-NEXT: [[VTRN1_I:%.*]] = shufflevector <4 x i32> [[TMP2]], <4 x i32> [[TMP3]], <4 x i32> <i32 1, i32 5, i32 3, i32 7> -// CHECK-NEXT: [[DOTFCA_0_0_INSERT1:%.*]] = insertvalue [[STRUCT_INT32X4X2_T:%.*]] poison, <4 x i32> [[VTRN_I]], 0, 0 -// CHECK-NEXT: [[DOTFCA_0_1_INSERT2:%.*]] = insertvalue [[STRUCT_INT32X4X2_T]] [[DOTFCA_0_0_INSERT1]], <4 x i32> [[VTRN1_I]], 0, 1 -// CHECK-NEXT: [[TMP4:%.*]] = extractvalue [[STRUCT_INT32X4X2_T]] [[DOTFCA_0_1_INSERT2]], 0 -// CHECK-NEXT: [[DOTFCA_0_EXTRACT:%.*]] = extractvalue [2 x <4 x i32>] [[TMP4]], 0 -// CHECK-NEXT: [[DOTFCA_1_EXTRACT:%.*]] = extractvalue [2 x <4 x i32>] [[TMP4]], 1 -// CHECK-NEXT: [[DOTFCA_0_0_INSERT:%.*]] = insertvalue [[STRUCT_INT32X4X2_T]] poison, <4 x i32> [[DOTFCA_0_EXTRACT]], 0, 0 -// CHECK-NEXT: [[DOTFCA_0_1_INSERT:%.*]] = insertvalue [[STRUCT_INT32X4X2_T]] [[DOTFCA_0_0_INSERT]], <4 x i32> [[DOTFCA_1_EXTRACT]], 0, 1 -// CHECK-NEXT: ret [[STRUCT_INT32X4X2_T]] [[DOTFCA_0_1_INSERT]] -// -int32x4x2_t test_vtrnq_s32(int32x4_t a, int32x4_t b) { - return vtrnq_s32(a, b); -} - -// CHECK-LABEL: define dso_local %struct.uint8x16x2_t @test_vtrnq_u8( -// CHECK-SAME: <16 x i8> noundef [[A:%.*]], <16 x i8> noundef [[B:%.*]]) #[[ATTR0]] { -// CHECK-NEXT: [[ENTRY:.*:]] -// CHECK-NEXT: [[VTRN_I:%.*]] = shufflevector <16 x i8> [[A]], <16 x i8> [[B]], <16 x i32> <i32 0, i32 16, i32 2, i32 18, i32 4, i32 20, i32 6, i32 22, i32 8, i32 24, i32 10, i32 26, i32 12, i32 28, i32 14, i32 30> -// CHECK-NEXT: [[VTRN1_I:%.*]] = shufflevector <16 x i8> [[A]], <16 x i8> [[B]], <16 x i32> <i32 1, i32 17, i32 3, i32 19, i32 5, i32 21, i32 7, i32 23, i32 9, i32 25, i32 11, i32 27, i32 13, i32 29, i32 15, i32 31> -// CHECK-NEXT: [[DOTFCA_0_0_INSERT1:%.*]] = insertvalue [[STRUCT_UINT8X16X2_T:%.*]] poison, <16 x i8> [[VTRN_I]], 0, 0 -// CHECK-NEXT: [[DOTFCA_0_1_INSERT2:%.*]] = insertvalue [[STRUCT_UINT8X16X2_T]] [[DOTFCA_0_0_INSERT1]], <16 x i8> [[VTRN1_I]], 0, 1 -// CHECK-NEXT: [[TMP0:%.*]] = extractvalue [[STRUCT_UINT8X16X2_T]] [[DOTFCA_0_1_INSERT2]], 0 -// CHECK-NEXT: [[DOTFCA_0_EXTRACT:%.*]] = extractvalue [2 x <16 x i8>] [[TMP0]], 0 -// CHECK-NEXT: [[DOTFCA_1_EXTRACT:%.*]] = extractvalue [2 x <16 x i8>] [[TMP0]], 1 -// CHECK-NEXT: [[DOTFCA_0_0_INSERT:%.*]] = insertvalue [[STRUCT_UINT8X16X2_T]] poison, <16 x i8> [[DOTFCA_0_EXTRACT]], 0, 0 -// CHECK-NEXT: [[DOTFCA_0_1_INSERT:%.*]] = insertvalue [[STRUCT_UINT8X16X2_T]] [[DOTFCA_0_0_INSERT]], <16 x i8> [[DOTFCA_1_EXTRACT]], 0, 1 -// CHECK-NEXT: ret [[STRUCT_UINT8X16X2_T]] [[DOTFCA_0_1_INSERT]] -// -uint8x16x2_t test_vtrnq_u8(uint8x16_t a, uint8x16_t b) { - return vtrnq_u8(a, b); -} - -// CHECK-LABEL: define dso_local %struct.uint16x8x2_t @test_vtrnq_u16( -// CHECK-SAME: <8 x i16> noundef [[A:%.*]], <8 x i16> noundef [[B:%.*]]) #[[ATTR0]] { -// CHECK-NEXT: [[ENTRY:.*:]] -// CHECK-NEXT: [[TMP0:%.*]] = bitcast <8 x i16> [[A]] to <16 x i8> -// CHECK-NEXT: [[TMP1:%.*]] = bitcast <8 x i16> [[B]] to <16 x i8> -// CHECK-NEXT: [[TMP2:%.*]] = bitcast <16 x i8> [[TMP0]] to <8 x i16> -// CHECK-NEXT: [[TMP3:%.*]] = bitcast <16 x i8> [[TMP1]] to <8 x i16> -// CHECK-NEXT: [[VTRN_I:%.*]] = shufflevector <8 x i16> [[TMP2]], <8 x i16> [[TMP3]], <8 x i32> <i32 0, i32 8, i32 2, i32 10, i32 4, i32 12, i32 6, i32 14> -// CHECK-NEXT: [[VTRN1_I:%.*]] = shufflevector <8 x i16> [[TMP2]], <8 x i16> [[TMP3]], <8 x i32> <i32 1, i32 9, i32 3, i32 11, i32 5, i32 13, i32 7, i32 15> -// CHECK-NEXT: [[DOTFCA_0_0_INSERT1:%.*]] = insertvalue [[STRUCT_UINT16X8X2_T:%.*]] poison, <8 x i16> [[VTRN_I]], 0, 0 -// CHECK-NEXT: [[DOTFCA_0_1_INSERT2:%.*]] = insertvalue [[STRUCT_UINT16X8X2_T]] [[DOTFCA_0_0_INSERT1]], <8 x i16> [[VTRN1_I]], 0, 1 -// CHECK-NEXT: [[TMP4:%.*]] = extractvalue [[STRUCT_UINT16X8X2_T]] [[DOTFCA_0_1_INSERT2]], 0 -// CHECK-NEXT: [[DOTFCA_0_EXTRACT:%.*]] = extractvalue [2 x <8 x i16>] [[TMP4]], 0 -// CHECK-NEXT: [[DOTFCA_1_EXTRACT:%.*]] = extractvalue [2 x <8 x i16>] [[TMP4]], 1 -// CHECK-NEXT: [[DOTFCA_0_0_INSERT:%.*]] = insertvalue [[STRUCT_UINT16X8X2_T]] poison, <8 x i16> [[DOTFCA_0_EXTRACT]], 0, 0 -// CHECK-NEXT: [[DOTFCA_0_1_INSERT:%.*]] = insertvalue [[STRUCT_UINT16X8X2_T]] [[DOTFCA_0_0_INSERT]], <8 x i16> [[DOTFCA_1_EXTRACT]], 0, 1 -// CHECK-NEXT: ret [[STRUCT_UINT16X8X2_T]] [[DOTFCA_0_1_INSERT]] -// -uint16x8x2_t test_vtrnq_u16(uint16x8_t a, uint16x8_t b) { - return vtrnq_u16(a, b); -} - -// CHECK-LABEL: define dso_local %struct.uint32x4x2_t @test_vtrnq_u32( -// CHECK-SAME: <4 x i32> noundef [[A:%.*]], <4 x i32> noundef [[B:%.*]]) #[[ATTR0]] { -// CHECK-NEXT: [[ENTRY:.*:]] -// CHECK-NEXT: [[TMP0:%.*]] = bitcast <4 x i32> [[A]] to <16 x i8> -// CHECK-NEXT: [[TMP1:%.*]] = bitcast <4 x i32> [[B]] to <16 x i8> -// CHECK-NEXT: [[TMP2:%.*]] = bitcast <16 x i8> [[TMP0]] to <4 x i32> -// CHECK-NEXT: [[TMP3:%.*]] = bitcast <16 x i8> [[TMP1]] to <4 x i32> -// CHECK-NEXT: [[VTRN_I:%.*]] = shufflevector <4 x i32> [[TMP2]], <4 x i32> [[TMP3]], <4 x i32> <i32 0, i32 4, i32 2, i32 6> -// CHECK-NEXT: [[VTRN1_I:%.*]] = shufflevector <4 x i32> [[TMP2]], <4 x i32> [[TMP3]], <4 x i32> <i32 1, i32 5, i32 3, i32 7> -// CHECK-NEXT: [[DOTFCA_0_0_INSERT1:%.*]] = insertvalue [[STRUCT_UINT32X4X2_T:%.*]] poison, <4 x i32> [[VTRN_I]], 0, 0 -// CHECK-NEXT: [[DOTFCA_0_1_INSERT2:%.*]] = insertvalue [[STRUCT_UINT32X4X2_T]] [[DOTFCA_0_0_INSERT1]], <4 x i32> [[VTRN1_I]], 0, 1 -// CHECK-NEXT: [[TMP4:%.*]] = extractvalue [[STRUCT_UINT32X4X2_T]] [[DOTFCA_0_1_INSERT2]], 0 -// CHECK-NEXT: [[DOTFCA_0_EXTRACT:%.*]] = extractvalue [2 x <4 x i32>] [[TMP4]], 0 -// CHECK-NEXT: [[DOTFCA_1_EXTRACT:%.*]] = extractvalue [2 x <4 x i32>] [[TMP4]], 1 -// CHECK-NEXT: [[DOTFCA_0_0_INSERT:%.*]] = insertvalue [[STRUCT_UINT32X4X2_T]] poison, <4 x i32> [[DOTFCA_0_EXTRACT]], 0, 0 -// CHECK-NEXT: [[DOTFCA_0_1_INSERT:%.*]] = insertvalue [[STRUCT_UINT32X4X2_T]] [[DOTFCA_0_0_INSERT]], <4 x i32> [[DOTFCA_1_EXTRACT]], 0, 1 -// CHECK-NEXT: ret [[STRUCT_UINT32X4X2_T]] [[DOTFCA_0_1_INSERT]] -// -uint32x4x2_t test_vtrnq_u32(uint32x4_t a, uint32x4_t b) { - return vtrnq_u32(a, b); -} - -// CHECK-LABEL: define dso_local %struct.float32x4x2_t @test_vtrnq_f32( -// CHECK-SAME: <4 x float> noundef [[A:%.*]], <4 x float> noundef [[B:%.*]]) #[[ATTR0]] { -// CHECK-NEXT: [[ENTRY:.*:]] -// CHECK-NEXT: [[TMP0:%.*]] = bitcast <4 x float> [[A]] to <4 x i32> -// CHECK-NEXT: [[TMP1:%.*]] = bitcast <4 x float> [[B]] to <4 x i32> -// CHECK-NEXT: [[TMP2:%.*]] = bitcast <4 x i32> [[TMP0]] to <16 x i8> -// CHECK-NEXT: [[TMP3:%.*]] = bitcast <4 x i32> [[TMP1]] to <16 x i8> -// CHECK-NEXT: [[TMP4:%.*]] = bitcast <16 x i8> [[TMP2]] to <4 x float> -// CHECK-NEXT: [[TMP5:%.*]] = bitcast <16 x i8> [[TMP3]] to <4 x float> -// CHECK-NEXT: [[VTRN_I:%.*]] = shufflevector <4 x float> [[TMP4]], <4 x float> [[TMP5]], <4 x i32> <i32 0, i32 4, i32 2, i32 6> -// CHECK-NEXT: [[VTRN1_I:%.*]] = shufflevector <4 x float> [[TMP4]], <4 x float> [[TMP5]], <4 x i32> <i32 1, i32 5, i32 3, i32 7> -// CHECK-NEXT: [[DOTFCA_0_0_INSERT3:%.*]] = insertvalue [[STRUCT_FLOAT32X4X2_T:%.*]] poison, <4 x float> [[VTRN_I]], 0, 0 -// CHECK-NEXT: [[DOTFCA_0_1_INSERT4:%.*]] = insertvalue [[STRUCT_FLOAT32X4X2_T]] [[DOTFCA_0_0_INSERT3]], <4 x float> [[VTRN1_I]], 0, 1 -// CHECK-NEXT: [[TMP6:%.*]] = extractvalue [[STRUCT_FLOAT32X4X2_T]] [[DOTFCA_0_1_INSERT4]], 0 -// CHECK-NEXT: [[DOTFCA_0_EXTRACT:%.*]] = extractvalue [2 x <4 x float>] [[TMP6]], 0 -// CHECK-NEXT: [[DOTFCA_1_EXTRACT:%.*]] = extractvalue [2 x <4 x float>] [[TMP6]], 1 -// CHECK-NEXT: [[DOTFCA_0_0_INSERT:%.*]] = insertvalue [[STRUCT_FLOAT32X4X2_T]] poison, <4 x float> [[DOTFCA_0_EXTRACT]], 0, 0 -// CHECK-NEXT: [[DOTFCA_0_1_INSERT:%.*]] = insertvalue [[STRUCT_FLOAT32X4X2_T]] [[DOTFCA_0_0_INSERT]], <4 x float> [[DOTFCA_1_EXTRACT]], 0, 1 -// CHECK-NEXT: ret [[STRUCT_FLOAT32X4X2_T]] [[DOTFCA_0_1_INSERT]] -// -float32x4x2_t test_vtrnq_f32(float32x4_t a, float32x4_t b) { - return vtrnq_f32(a, b); -} - -// CHECK-LABEL: define dso_local %struct.poly8x16x2_t @test_vtrnq_p8( -// CHECK-SAME: <16 x i8> noundef [[A:%.*]], <16 x i8> noundef [[B:%.*]]) #[[ATTR0]] { -// CHECK-NEXT: [[ENTRY:.*:]] -// CHECK-NEXT: [[VTRN_I:%.*]] = shufflevector <16 x i8> [[A]], <16 x i8> [[B]], <16 x i32> <i32 0, i32 16, i32 2, i32 18, i32 4, i32 20, i32 6, i32 22, i32 8, i32 24, i32 10, i32 26, i32 12, i32 28, i32 14, i32 30> -// CHECK-NEXT: [[VTRN1_I:%.*]] = shufflevector <16 x i8> [[A]], <16 x i8> [[B]], <16 x i32> <i32 1, i32 17, i32 3, i32 19, i32 5, i32 21, i32 7, i32 23, i32 9, i32 25, i32 11, i32 27, i32 13, i32 29, i32 15, i32 31> -// CHECK-NEXT: [[DOTFCA_0_0_INSERT1:%.*]] = insertvalue [[STRUCT_POLY8X16X2_T:%.*]] poison, <16 x i8> [[VTRN_I]], 0, 0 -// CHECK-NEXT: [[DOTFCA_0_1_INSERT2:%.*]] = insertvalue [[STRUCT_POLY8X16X2_T]] [[DOTFCA_0_0_INSERT1]], <16 x i8> [[VTRN1_I]], 0, 1 -// CHECK-NEXT: [[TMP0:%.*]] = extractvalue [[STRUCT_POLY8X16X2_T]] [[DOTFCA_0_1_INSERT2]], 0 -// CHECK-NEXT: [[DOTFCA_0_EXTRACT:%.*]] = extractvalue [2 x <16 x i8>] [[TMP0]], 0 -// CHECK-NEXT: [[DOTFCA_1_EXTRACT:%.*]] = extractvalue [2 x <16 x i8>] [[TMP0]], 1 -// CHECK-NEXT: [[DOTFCA_0_0_INSERT:%.*]] = insertvalue [[STRUCT_POLY8X16X2_T]] poison, <16 x i8> [[DOTFCA_0_EXTRACT]], 0, 0 -// CHECK-NEXT: [[DOTFCA_0_1_INSERT:%.*]] = insertvalue [[STRUCT_POLY8X16X2_T]] [[DOTFCA_0_0_INSERT]], <16 x i8> [[DOTFCA_1_EXTRACT]], 0, 1 -// CHECK-NEXT: ret [[STRUCT_POLY8X16X2_T]] [[DOTFCA_0_1_INSERT]] -// -poly8x16x2_t test_vtrnq_p8(poly8x16_t a, poly8x16_t b) { - return vtrnq_p8(a, b); -} - -// CHECK-LABEL: define dso_local %struct.poly16x8x2_t @test_vtrnq_p16( -// CHECK-SAME: <8 x i16> noundef [[A:%.*]], <8 x i16> noundef [[B:%.*]]) #[[ATTR0]] { -// CHECK-NEXT: [[ENTRY:.*:]] -// CHECK-NEXT: [[TMP0:%.*]] = bitcast <8 x i16> [[A]] to <16 x i8> -// CHECK-NEXT: [[TMP1:%.*]] = bitcast <8 x i16> [[B]] to <16 x i8> -// CHECK-NEXT: [[TMP2:%.*]] = bitcast <16 x i8> [[TMP0]] to <8 x i16> -// CHECK-NEXT: [[TMP3:%.*]] = bitcast <16 x i8> [[TMP1]] to <8 x i16> -// CHECK-NEXT: [[VTRN_I:%.*]] = shufflevector <8 x i16> [[TMP2]], <8 x i16> [[TMP3]], <8 x i32> <i32 0, i32 8, i32 2, i32 10, i32 4, i32 12, i32 6, i32 14> -// CHECK-NEXT: [[VTRN1_I:%.*]] = shufflevector <8 x i16> [[TMP2]], <8 x i16> [[TMP3]], <8 x i32> <i32 1, i32 9, i32 3, i32 11, i32 5, i32 13, i32 7, i32 15> -// CHECK-NEXT: [[DOTFCA_0_0_INSERT1:%.*]] = insertvalue [[STRUCT_POLY16X8X2_T:%.*]] poison, <8 x i16> [[VTRN_I]], 0, 0 -// CHECK-NEXT: [[DOTFCA_0_1_INSERT2:%.*]] = insertvalue [[STRUCT_POLY16X8X2_T]] [[DOTFCA_0_0_INSERT1]], <8 x i16> [[VTRN1_I]], 0, 1 -// CHECK-NEXT: [[TMP4:%.*]] = extractvalue [[STRUCT_POLY16X8X2_T]] [[DOTFCA_0_1_INSERT2]], 0 -// CHECK-NEXT: [[DOTFCA_0_EXTRACT:%.*]] = extractvalue [2 x <8 x i16>] [[TMP4]], 0 -// CHECK-NEXT: [[DOTFCA_1_EXTRACT:%.*]] = extractvalue [2 x <8 x i16>] [[TMP4]], 1 -// CHECK-NEXT: [[DOTFCA_0_0_INSERT:%.*]] = insertvalue [[STRUCT_POLY16X8X2_T]] poison, <8 x i16> [[DOTFCA_0_EXTRACT]], 0, 0 -// CHECK-NEXT: [[DOTFCA_0_1_INSERT:%.*]] = insertvalue [[STRUCT_POLY16X8X2_T]] [[DOTFCA_0_0_INSERT]], <8 x i16> [[DOTFCA_1_EXTRACT]], 0, 1 -// CHECK-NEXT: ret [[STRUCT_POLY16X8X2_T]] [[DOTFCA_0_1_INSERT]] -// -poly16x8x2_t test_vtrnq_p16(poly16x8_t a, poly16x8_t b) { - return vtrnq_p16(a, b); -} diff --git a/clang/test/CodeGen/AArch64/neon/perm.c b/clang/test/CodeGen/AArch64/neon/perm.c index 419769ae3f0fa..aa8c7ccc4e7df 100644 --- a/clang/test/CodeGen/AArch64/neon/perm.c +++ b/clang/test/CodeGen/AArch64/neon/perm.c @@ -2363,3 +2363,375 @@ mfloat8x16_t test_vtrn2q_mf8(mfloat8x16_t a, mfloat8x16_t b) { // LLVM: ret <16 x i8> [[SHUFFLE]] return vtrn2q_mf8(a, b); } + +// LLVM-LABEL: @test_vtrn_s8( +// CIR-LABEL: @vtrn_s8( +int8x8x2_t test_vtrn_s8(int8x8_t a, int8x8_t b) { +// CIR: [[LO:%.*]] = cir.vec.shuffle(%{{.*}}, %{{.*}} : !cir.vector<8 x !s8i>) [#cir.int<0> : !s32i, #cir.int<8> : !s32i, #cir.int<2> : !s32i, #cir.int<10> : !s32i, #cir.int<4> : !s32i, #cir.int<12> : !s32i, #cir.int<6> : !s32i, #cir.int<14> : !s32i] : !cir.vector<8 x !s8i> +// CIR: cir.store [[LO]], %{{.*}} : !cir.vector<8 x !s8i>, !cir.ptr<!cir.vector<8 x !s8i>> +// CIR: [[HI:%.*]] = cir.vec.shuffle(%{{.*}}, %{{.*}} : !cir.vector<8 x !s8i>) [#cir.int<1> : !s32i, #cir.int<9> : !s32i, #cir.int<3> : !s32i, #cir.int<11> : !s32i, #cir.int<5> : !s32i, #cir.int<13> : !s32i, #cir.int<7> : !s32i, #cir.int<15> : !s32i] : !cir.vector<8 x !s8i> +// CIR: cir.store [[HI]], %{{.*}} : !cir.vector<8 x !s8i>, !cir.ptr<!cir.vector<8 x !s8i>> + +// LLVM-SAME: <8 x i8> {{.*}} [[A:%.*]], <8 x i8> {{.*}} [[B:%.*]]) {{.*}} { +// LLVM: [[VTRN_LO:%.*]] = shufflevector <8 x i8> [[A]], <8 x i8> [[B]], <8 x i32> <i32 0, i32 8, i32 2, i32 10, i32 4, i32 12, i32 6, i32 14> +// LLVM: [[VTRN_HI:%.*]] = shufflevector <8 x i8> [[A]], <8 x i8> [[B]], <8 x i32> <i32 1, i32 9, i32 3, i32 11, i32 5, i32 13, i32 7, i32 15> +// LLVM: [[RES0:%.*]] = insertvalue [[RTY:%.*]] poison, <8 x i8> [[VTRN_LO]], 0, 0 +// LLVM: [[RES1:%.*]] = insertvalue [[RTY]] [[RES0]], <8 x i8> [[VTRN_HI]], 0, 1 +// LLVM: ret [[RTY]] [[RES1]] + return vtrn_s8(a, b); +} + +// LLVM-LABEL: @test_vtrn_s16( +// CIR-LABEL: @vtrn_s16( +int16x4x2_t test_vtrn_s16(int16x4_t a, int16x4_t b) { +// CIR: [[A_CAST:%.*]] = cir.cast bitcast %{{.*}} : !cir.vector<8 x !s8i> -> !cir.vector<4 x !s16i> +// CIR: [[B_CAST:%.*]] = cir.cast bitcast %{{.*}} : !cir.vector<8 x !s8i> -> !cir.vector<4 x !s16i> +// CIR: [[LO:%.*]] = cir.vec.shuffle([[A_CAST]], [[B_CAST]] : !cir.vector<4 x !s16i>) [#cir.int<0> : !s32i, #cir.int<4> : !s32i, #cir.int<2> : !s32i, #cir.int<6> : !s32i] : !cir.vector<4 x !s16i> +// CIR: cir.store [[LO]], %{{.*}} : !cir.vector<4 x !s16i>, !cir.ptr<!cir.vector<4 x !s16i>> +// CIR: [[HI:%.*]] = cir.vec.shuffle([[A_CAST]], [[B_CAST]] : !cir.vector<4 x !s16i>) [#cir.int<1> : !s32i, #cir.int<5> : !s32i, #cir.int<3> : !s32i, #cir.int<7> : !s32i] : !cir.vector<4 x !s16i> +// CIR: cir.store [[HI]], %{{.*}} : !cir.vector<4 x !s16i>, !cir.ptr<!cir.vector<4 x !s16i>> + +// LLVM-SAME: <4 x i16> {{.*}} [[A:%.*]], <4 x i16> {{.*}} [[B:%.*]]) {{.*}} { +// LLVM: [[VTRN_LO:%.*]] = shufflevector <4 x i16> [[A]], <4 x i16> [[B]], <4 x i32> <i32 0, i32 4, i32 2, i32 6> +// LLVM: [[VTRN_HI:%.*]] = shufflevector <4 x i16> [[A]], <4 x i16> [[B]], <4 x i32> <i32 1, i32 5, i32 3, i32 7> +// LLVM: [[RES0:%.*]] = insertvalue [[RTY:%.*]] poison, <4 x i16> [[VTRN_LO]], 0, 0 +// LLVM: [[RES1:%.*]] = insertvalue [[RTY]] [[RES0]], <4 x i16> [[VTRN_HI]], 0, 1 +// LLVM: ret [[RTY]] [[RES1]] + return vtrn_s16(a, b); +} + +// LLVM-LABEL: @test_vtrn_u8( +// CIR-LABEL: @vtrn_u8( +uint8x8x2_t test_vtrn_u8(uint8x8_t a, uint8x8_t b) { +// CIR: [[A_CAST:%.*]] = cir.cast bitcast %{{.*}} : !cir.vector<8 x !s8i> -> !cir.vector<8 x !u8i> +// CIR: [[B_CAST:%.*]] = cir.cast bitcast %{{.*}} : !cir.vector<8 x !s8i> -> !cir.vector<8 x !u8i> +// CIR: [[LO:%.*]] = cir.vec.shuffle([[A_CAST]], [[B_CAST]] : !cir.vector<8 x !u8i>) [#cir.int<0> : !s32i, #cir.int<8> : !s32i, #cir.int<2> : !s32i, #cir.int<10> : !s32i, #cir.int<4> : !s32i, #cir.int<12> : !s32i, #cir.int<6> : !s32i, #cir.int<14> : !s32i] : !cir.vector<8 x !u8i> +// CIR: cir.store [[LO]], %{{.*}} : !cir.vector<8 x !u8i>, !cir.ptr<!cir.vector<8 x !u8i>> +// CIR: [[HI:%.*]] = cir.vec.shuffle([[A_CAST]], [[B_CAST]] : !cir.vector<8 x !u8i>) [#cir.int<1> : !s32i, #cir.int<9> : !s32i, #cir.int<3> : !s32i, #cir.int<11> : !s32i, #cir.int<5> : !s32i, #cir.int<13> : !s32i, #cir.int<7> : !s32i, #cir.int<15> : !s32i] : !cir.vector<8 x !u8i> +// CIR: cir.store [[HI]], %{{.*}} : !cir.vector<8 x !u8i>, !cir.ptr<!cir.vector<8 x !u8i>> + +// LLVM-SAME: <8 x i8> {{.*}} [[A:%.*]], <8 x i8> {{.*}} [[B:%.*]]) {{.*}} { +// LLVM: [[VTRN_LO:%.*]] = shufflevector <8 x i8> [[A]], <8 x i8> [[B]], <8 x i32> <i32 0, i32 8, i32 2, i32 10, i32 4, i32 12, i32 6, i32 14> +// LLVM: [[VTRN_HI:%.*]] = shufflevector <8 x i8> [[A]], <8 x i8> [[B]], <8 x i32> <i32 1, i32 9, i32 3, i32 11, i32 5, i32 13, i32 7, i32 15> +// LLVM: [[RES0:%.*]] = insertvalue [[RTY:%.*]] poison, <8 x i8> [[VTRN_LO]], 0, 0 +// LLVM: [[RES1:%.*]] = insertvalue [[RTY]] [[RES0]], <8 x i8> [[VTRN_HI]], 0, 1 +// LLVM: ret [[RTY]] [[RES1]] + return vtrn_u8(a, b); +} + +// LLVM-LABEL: @test_vtrn_u16( +// CIR-LABEL: @vtrn_u16( +uint16x4x2_t test_vtrn_u16(uint16x4_t a, uint16x4_t b) { +// CIR: [[A_CAST:%.*]] = cir.cast bitcast %{{.*}} : !cir.vector<8 x !s8i> -> !cir.vector<4 x !u16i> +// CIR: [[B_CAST:%.*]] = cir.cast bitcast %{{.*}} : !cir.vector<8 x !s8i> -> !cir.vector<4 x !u16i> +// CIR: [[LO:%.*]] = cir.vec.shuffle([[A_CAST]], [[B_CAST]] : !cir.vector<4 x !u16i>) [#cir.int<0> : !s32i, #cir.int<4> : !s32i, #cir.int<2> : !s32i, #cir.int<6> : !s32i] : !cir.vector<4 x !u16i> +// CIR: cir.store [[LO]], %{{.*}} : !cir.vector<4 x !u16i>, !cir.ptr<!cir.vector<4 x !u16i>> +// CIR: [[HI:%.*]] = cir.vec.shuffle([[A_CAST]], [[B_CAST]] : !cir.vector<4 x !u16i>) [#cir.int<1> : !s32i, #cir.int<5> : !s32i, #cir.int<3> : !s32i, #cir.int<7> : !s32i] : !cir.vector<4 x !u16i> +// CIR: cir.store [[HI]], %{{.*}} : !cir.vector<4 x !u16i>, !cir.ptr<!cir.vector<4 x !u16i>> + +// LLVM-SAME: <4 x i16> {{.*}} [[A:%.*]], <4 x i16> {{.*}} [[B:%.*]]) {{.*}} { +// LLVM: [[VTRN_LO:%.*]] = shufflevector <4 x i16> [[A]], <4 x i16> [[B]], <4 x i32> <i32 0, i32 4, i32 2, i32 6> +// LLVM: [[VTRN_HI:%.*]] = shufflevector <4 x i16> [[A]], <4 x i16> [[B]], <4 x i32> <i32 1, i32 5, i32 3, i32 7> +// LLVM: [[RES0:%.*]] = insertvalue [[RTY:%.*]] poison, <4 x i16> [[VTRN_LO]], 0, 0 +// LLVM: [[RES1:%.*]] = insertvalue [[RTY]] [[RES0]], <4 x i16> [[VTRN_HI]], 0, 1 +// LLVM: ret [[RTY]] [[RES1]] + return vtrn_u16(a, b); +} + +// LLVM-LABEL: @test_vtrn_p8( +// CIR-LABEL: @vtrn_p8( +poly8x8x2_t test_vtrn_p8(poly8x8_t a, poly8x8_t b) { +// CIR: [[LO:%.*]] = cir.vec.shuffle(%{{.*}}, %{{.*}} : !cir.vector<8 x !s8i>) [#cir.int<0> : !s32i, #cir.int<8> : !s32i, #cir.int<2> : !s32i, #cir.int<10> : !s32i, #cir.int<4> : !s32i, #cir.int<12> : !s32i, #cir.int<6> : !s32i, #cir.int<14> : !s32i] : !cir.vector<8 x !s8i> +// CIR: cir.store [[LO]], %{{.*}} : !cir.vector<8 x !s8i>, !cir.ptr<!cir.vector<8 x !s8i>> +// CIR: [[HI:%.*]] = cir.vec.shuffle(%{{.*}}, %{{.*}} : !cir.vector<8 x !s8i>) [#cir.int<1> : !s32i, #cir.int<9> : !s32i, #cir.int<3> : !s32i, #cir.int<11> : !s32i, #cir.int<5> : !s32i, #cir.int<13> : !s32i, #cir.int<7> : !s32i, #cir.int<15> : !s32i] : !cir.vector<8 x !s8i> +// CIR: cir.store [[HI]], %{{.*}} : !cir.vector<8 x !s8i>, !cir.ptr<!cir.vector<8 x !s8i>> + +// LLVM-SAME: <8 x i8> {{.*}} [[A:%.*]], <8 x i8> {{.*}} [[B:%.*]]) {{.*}} { +// LLVM: [[VTRN_LO:%.*]] = shufflevector <8 x i8> [[A]], <8 x i8> [[B]], <8 x i32> <i32 0, i32 8, i32 2, i32 10, i32 4, i32 12, i32 6, i32 14> +// LLVM: [[VTRN_HI:%.*]] = shufflevector <8 x i8> [[A]], <8 x i8> [[B]], <8 x i32> <i32 1, i32 9, i32 3, i32 11, i32 5, i32 13, i32 7, i32 15> +// LLVM: [[RES0:%.*]] = insertvalue [[RTY:%.*]] poison, <8 x i8> [[VTRN_LO]], 0, 0 +// LLVM: [[RES1:%.*]] = insertvalue [[RTY]] [[RES0]], <8 x i8> [[VTRN_HI]], 0, 1 +// LLVM: ret [[RTY]] [[RES1]] + return vtrn_p8(a, b); +} + +// LLVM-LABEL: @test_vtrn_p16( +// CIR-LABEL: @vtrn_p16( +poly16x4x2_t test_vtrn_p16(poly16x4_t a, poly16x4_t b) { +// CIR: [[A_CAST:%.*]] = cir.cast bitcast %{{.*}} : !cir.vector<8 x !s8i> -> !cir.vector<4 x !s16i> +// CIR: [[B_CAST:%.*]] = cir.cast bitcast %{{.*}} : !cir.vector<8 x !s8i> -> !cir.vector<4 x !s16i> +// CIR: [[LO:%.*]] = cir.vec.shuffle([[A_CAST]], [[B_CAST]] : !cir.vector<4 x !s16i>) [#cir.int<0> : !s32i, #cir.int<4> : !s32i, #cir.int<2> : !s32i, #cir.int<6> : !s32i] : !cir.vector<4 x !s16i> +// CIR: cir.store [[LO]], %{{.*}} : !cir.vector<4 x !s16i>, !cir.ptr<!cir.vector<4 x !s16i>> +// CIR: [[HI:%.*]] = cir.vec.shuffle([[A_CAST]], [[B_CAST]] : !cir.vector<4 x !s16i>) [#cir.int<1> : !s32i, #cir.int<5> : !s32i, #cir.int<3> : !s32i, #cir.int<7> : !s32i] : !cir.vector<4 x !s16i> +// CIR: cir.store [[HI]], %{{.*}} : !cir.vector<4 x !s16i>, !cir.ptr<!cir.vector<4 x !s16i>> + +// LLVM-SAME: <4 x i16> {{.*}} [[A:%.*]], <4 x i16> {{.*}} [[B:%.*]]) {{.*}} { +// LLVM: [[VTRN_LO:%.*]] = shufflevector <4 x i16> [[A]], <4 x i16> [[B]], <4 x i32> <i32 0, i32 4, i32 2, i32 6> +// LLVM: [[VTRN_HI:%.*]] = shufflevector <4 x i16> [[A]], <4 x i16> [[B]], <4 x i32> <i32 1, i32 5, i32 3, i32 7> +// LLVM: [[RES0:%.*]] = insertvalue [[RTY:%.*]] poison, <4 x i16> [[VTRN_LO]], 0, 0 +// LLVM: [[RES1:%.*]] = insertvalue [[RTY]] [[RES0]], <4 x i16> [[VTRN_HI]], 0, 1 +// LLVM: ret [[RTY]] [[RES1]] + return vtrn_p16(a, b); +} + +// LLVM-LABEL: @test_vtrn_s32( +// CIR-LABEL: @vtrn_s32( +int32x2x2_t test_vtrn_s32(int32x2_t a, int32x2_t b) { +// CIR: [[A_CAST:%.*]] = cir.cast bitcast %{{.*}} : !cir.vector<8 x !s8i> -> !cir.vector<2 x !s32i> +// CIR: [[B_CAST:%.*]] = cir.cast bitcast %{{.*}} : !cir.vector<8 x !s8i> -> !cir.vector<2 x !s32i> +// CIR: [[LO:%.*]] = cir.vec.shuffle([[A_CAST]], [[B_CAST]] : !cir.vector<2 x !s32i>) [#cir.int<0> : !s32i, #cir.int<2> : !s32i] : !cir.vector<2 x !s32i> +// CIR: cir.store [[LO]], %{{.*}} : !cir.vector<2 x !s32i>, !cir.ptr<!cir.vector<2 x !s32i>> +// CIR: [[HI:%.*]] = cir.vec.shuffle([[A_CAST]], [[B_CAST]] : !cir.vector<2 x !s32i>) [#cir.int<1> : !s32i, #cir.int<3> : !s32i] : !cir.vector<2 x !s32i> +// CIR: cir.store [[HI]], %{{.*}} : !cir.vector<2 x !s32i>, !cir.ptr<!cir.vector<2 x !s32i>> + +// LLVM-SAME: <2 x i32> {{.*}} [[A:%.*]], <2 x i32> {{.*}} [[B:%.*]]) {{.*}} { +// LLVM: [[VTRN_LO:%.*]] = shufflevector <2 x i32> [[A]], <2 x i32> [[B]], <2 x i32> <i32 0, i32 2> +// LLVM: [[VTRN_HI:%.*]] = shufflevector <2 x i32> [[A]], <2 x i32> [[B]], <2 x i32> <i32 1, i32 3> +// LLVM: [[RES0:%.*]] = insertvalue [[RTY:%.*]] poison, <2 x i32> [[VTRN_LO]], 0, 0 +// LLVM: [[RES1:%.*]] = insertvalue [[RTY]] [[RES0]], <2 x i32> [[VTRN_HI]], 0, 1 +// LLVM: ret [[RTY]] [[RES1]] + return vtrn_s32(a, b); +} + +// LLVM-LABEL: @test_vtrn_f32( +// CIR-LABEL: @vtrn_f32( +float32x2x2_t test_vtrn_f32(float32x2_t a, float32x2_t b) { +// CIR: [[A_CAST:%.*]] = cir.cast bitcast %{{.*}} : !cir.vector<8 x !s8i> -> !cir.vector<2 x !cir.float> +// CIR: [[B_CAST:%.*]] = cir.cast bitcast %{{.*}} : !cir.vector<8 x !s8i> -> !cir.vector<2 x !cir.float> +// CIR: [[LO:%.*]] = cir.vec.shuffle([[A_CAST]], [[B_CAST]] : !cir.vector<2 x !cir.float>) [#cir.int<0> : !s32i, #cir.int<2> : !s32i] : !cir.vector<2 x !cir.float> +// CIR: cir.store [[LO]], %{{.*}} : !cir.vector<2 x !cir.float>, !cir.ptr<!cir.vector<2 x !cir.float>> +// CIR: [[HI:%.*]] = cir.vec.shuffle([[A_CAST]], [[B_CAST]] : !cir.vector<2 x !cir.float>) [#cir.int<1> : !s32i, #cir.int<3> : !s32i] : !cir.vector<2 x !cir.float> +// CIR: cir.store [[HI]], %{{.*}} : !cir.vector<2 x !cir.float>, !cir.ptr<!cir.vector<2 x !cir.float>> + +// LLVM-SAME: <2 x float> {{.*}} [[A:%.*]], <2 x float> {{.*}} [[B:%.*]]) {{.*}} { +// LLVM: [[VTRN_LO:%.*]] = shufflevector <2 x float> [[A]], <2 x float> [[B]], <2 x i32> <i32 0, i32 2> +// LLVM: [[VTRN_HI:%.*]] = shufflevector <2 x float> [[A]], <2 x float> [[B]], <2 x i32> <i32 1, i32 3> +// LLVM: [[RES0:%.*]] = insertvalue [[RTY:%.*]] poison, <2 x float> [[VTRN_LO]], 0, 0 +// LLVM: [[RES1:%.*]] = insertvalue [[RTY]] [[RES0]], <2 x float> [[VTRN_HI]], 0, 1 +// LLVM: ret [[RTY]] [[RES1]] + return vtrn_f32(a, b); +} + +// LLVM-LABEL: @test_vtrn_u32( +// CIR-LABEL: @vtrn_u32( +uint32x2x2_t test_vtrn_u32(uint32x2_t a, uint32x2_t b) { +// CIR: [[A_CAST:%.*]] = cir.cast bitcast %{{.*}} : !cir.vector<8 x !s8i> -> !cir.vector<2 x !u32i> +// CIR: [[B_CAST:%.*]] = cir.cast bitcast %{{.*}} : !cir.vector<8 x !s8i> -> !cir.vector<2 x !u32i> +// CIR: [[LO:%.*]] = cir.vec.shuffle([[A_CAST]], [[B_CAST]] : !cir.vector<2 x !u32i>) [#cir.int<0> : !s32i, #cir.int<2> : !s32i] : !cir.vector<2 x !u32i> +// CIR: cir.store [[LO]], %{{.*}} : !cir.vector<2 x !u32i>, !cir.ptr<!cir.vector<2 x !u32i>> +// CIR: [[HI:%.*]] = cir.vec.shuffle([[A_CAST]], [[B_CAST]] : !cir.vector<2 x !u32i>) [#cir.int<1> : !s32i, #cir.int<3> : !s32i] : !cir.vector<2 x !u32i> +// CIR: cir.store [[HI]], %{{.*}} : !cir.vector<2 x !u32i>, !cir.ptr<!cir.vector<2 x !u32i>> + +// LLVM-SAME: <2 x i32> {{.*}} [[A:%.*]], <2 x i32> {{.*}} [[B:%.*]]) {{.*}} { +// LLVM: [[VTRN_LO:%.*]] = shufflevector <2 x i32> [[A]], <2 x i32> [[B]], <2 x i32> <i32 0, i32 2> +// LLVM: [[VTRN_HI:%.*]] = shufflevector <2 x i32> [[A]], <2 x i32> [[B]], <2 x i32> <i32 1, i32 3> +// LLVM: [[RES0:%.*]] = insertvalue [[RTY:%.*]] poison, <2 x i32> [[VTRN_LO]], 0, 0 +// LLVM: [[RES1:%.*]] = insertvalue [[RTY]] [[RES0]], <2 x i32> [[VTRN_HI]], 0, 1 +// LLVM: ret [[RTY]] [[RES1]] + return vtrn_u32(a, b); +} + +// LLVM-LABEL: @test_vtrn_mf8( +// CIR-LABEL: @vtrn_mf8( +mfloat8x8x2_t test_vtrn_mf8(mfloat8x8_t a, mfloat8x8_t b) { +// CIR: [[A_CAST:%.*]] = cir.cast bitcast %{{.*}} : !cir.vector<8 x !s8i> -> !cir.vector<8 x !u8i> +// CIR: [[B_CAST:%.*]] = cir.cast bitcast %{{.*}} : !cir.vector<8 x !s8i> -> !cir.vector<8 x !u8i> +// CIR: [[LO:%.*]] = cir.vec.shuffle([[A_CAST]], [[B_CAST]] : !cir.vector<8 x !u8i>) [#cir.int<0> : !s32i, #cir.int<8> : !s32i, #cir.int<2> : !s32i, #cir.int<10> : !s32i, #cir.int<4> : !s32i, #cir.int<12> : !s32i, #cir.int<6> : !s32i, #cir.int<14> : !s32i] : !cir.vector<8 x !u8i> +// CIR: cir.store [[LO]], %{{.*}} : !cir.vector<8 x !u8i>, !cir.ptr<!cir.vector<8 x !u8i>> +// CIR: [[HI:%.*]] = cir.vec.shuffle([[A_CAST]], [[B_CAST]] : !cir.vector<8 x !u8i>) [#cir.int<1> : !s32i, #cir.int<9> : !s32i, #cir.int<3> : !s32i, #cir.int<11> : !s32i, #cir.int<5> : !s32i, #cir.int<13> : !s32i, #cir.int<7> : !s32i, #cir.int<15> : !s32i] : !cir.vector<8 x !u8i> +// CIR: cir.store [[HI]], %{{.*}} : !cir.vector<8 x !u8i>, !cir.ptr<!cir.vector<8 x !u8i>> + +// LLVM-SAME: <8 x i8> {{.*}}[[A:%.*]], <8 x i8> {{.*}}[[B:%.*]]) {{.*}} { +// LLVM: [[VTRN_LO:%.*]] = shufflevector <8 x i8> [[A]], <8 x i8> [[B]], <8 x i32> <i32 0, i32 8, i32 2, i32 10, i32 4, i32 12, i32 6, i32 14> +// LLVM: [[VTRN_HI:%.*]] = shufflevector <8 x i8> [[A]], <8 x i8> [[B]], <8 x i32> <i32 1, i32 9, i32 3, i32 11, i32 5, i32 13, i32 7, i32 15> +// LLVM: [[RES0:%.*]] = insertvalue [[RTY:%.*]] poison, <8 x i8> [[VTRN_LO]], 0, 0 +// LLVM: [[RES1:%.*]] = insertvalue [[RTY]] [[RES0]], <8 x i8> [[VTRN_HI]], 0, 1 +// LLVM: ret [[RTY]] [[RES1]] + return vtrn_mf8(a, b); +} + +// LLVM-LABEL: @test_vtrnq_s8( +// CIR-LABEL: @vtrnq_s8( +int8x16x2_t test_vtrnq_s8(int8x16_t a, int8x16_t b) { +// CIR: [[LO:%.*]] = cir.vec.shuffle(%{{.*}}, %{{.*}} : !cir.vector<16 x !s8i>) [#cir.int<0> : !s32i, #cir.int<16> : !s32i, #cir.int<2> : !s32i, #cir.int<18> : !s32i, #cir.int<4> : !s32i, #cir.int<20> : !s32i, #cir.int<6> : !s32i, #cir.int<22> : !s32i, #cir.int<8> : !s32i, #cir.int<24> : !s32i, #cir.int<10> : !s32i, #cir.int<26> : !s32i, #cir.int<12> : !s32i, #cir.int<28> : !s32i, #cir.int<14> : !s32i, #cir.int<30> : !s32i] : !cir.vector<16 x !s8i> +// CIR: cir.store [[LO]], %{{.*}} : !cir.vector<16 x !s8i>, !cir.ptr<!cir.vector<16 x !s8i>> +// CIR: [[HI:%.*]] = cir.vec.shuffle(%{{.*}}, %{{.*}} : !cir.vector<16 x !s8i>) [#cir.int<1> : !s32i, #cir.int<17> : !s32i, #cir.int<3> : !s32i, #cir.int<19> : !s32i, #cir.int<5> : !s32i, #cir.int<21> : !s32i, #cir.int<7> : !s32i, #cir.int<23> : !s32i, #cir.int<9> : !s32i, #cir.int<25> : !s32i, #cir.int<11> : !s32i, #cir.int<27> : !s32i, #cir.int<13> : !s32i, #cir.int<29> : !s32i, #cir.int<15> : !s32i, #cir.int<31> : !s32i] : !cir.vector<16 x !s8i> +// CIR: cir.store [[HI]], %{{.*}} : !cir.vector<16 x !s8i>, !cir.ptr<!cir.vector<16 x !s8i>> + +// LLVM-SAME: <16 x i8> {{.*}} [[A:%.*]], <16 x i8> {{.*}} [[B:%.*]]) {{.*}} { +// LLVM: [[VTRN_LO:%.*]] = shufflevector <16 x i8> [[A]], <16 x i8> [[B]], <16 x i32> <i32 0, i32 16, i32 2, i32 18, i32 4, i32 20, i32 6, i32 22, i32 8, i32 24, i32 10, i32 26, i32 12, i32 28, i32 14, i32 30> +// LLVM: [[VTRN_HI:%.*]] = shufflevector <16 x i8> [[A]], <16 x i8> [[B]], <16 x i32> <i32 1, i32 17, i32 3, i32 19, i32 5, i32 21, i32 7, i32 23, i32 9, i32 25, i32 11, i32 27, i32 13, i32 29, i32 15, i32 31> +// LLVM: [[RES0:%.*]] = insertvalue [[RTY:%.*]] poison, <16 x i8> [[VTRN_LO]], 0, 0 +// LLVM: [[RES1:%.*]] = insertvalue [[RTY]] [[RES0]], <16 x i8> [[VTRN_HI]], 0, 1 +// LLVM: ret [[RTY]] [[RES1]] + return vtrnq_s8(a, b); +} + +// LLVM-LABEL: @test_vtrnq_s16( +// CIR-LABEL: @vtrnq_s16( +int16x8x2_t test_vtrnq_s16(int16x8_t a, int16x8_t b) { +// CIR: [[A_CAST:%.*]] = cir.cast bitcast %{{.*}} : !cir.vector<16 x !s8i> -> !cir.vector<8 x !s16i> +// CIR: [[B_CAST:%.*]] = cir.cast bitcast %{{.*}} : !cir.vector<16 x !s8i> -> !cir.vector<8 x !s16i> +// CIR: [[LO:%.*]] = cir.vec.shuffle([[A_CAST]], [[B_CAST]] : !cir.vector<8 x !s16i>) [#cir.int<0> : !s32i, #cir.int<8> : !s32i, #cir.int<2> : !s32i, #cir.int<10> : !s32i, #cir.int<4> : !s32i, #cir.int<12> : !s32i, #cir.int<6> : !s32i, #cir.int<14> : !s32i] : !cir.vector<8 x !s16i> +// CIR: cir.store [[LO]], %{{.*}} : !cir.vector<8 x !s16i>, !cir.ptr<!cir.vector<8 x !s16i>> +// CIR: [[HI:%.*]] = cir.vec.shuffle([[A_CAST]], [[B_CAST]] : !cir.vector<8 x !s16i>) [#cir.int<1> : !s32i, #cir.int<9> : !s32i, #cir.int<3> : !s32i, #cir.int<11> : !s32i, #cir.int<5> : !s32i, #cir.int<13> : !s32i, #cir.int<7> : !s32i, #cir.int<15> : !s32i] : !cir.vector<8 x !s16i> +// CIR: cir.store [[HI]], %{{.*}} : !cir.vector<8 x !s16i>, !cir.ptr<!cir.vector<8 x !s16i>> + +// LLVM-SAME: <8 x i16> {{.*}} [[A:%.*]], <8 x i16> {{.*}} [[B:%.*]]) {{.*}} { +// LLVM: [[VTRN_LO:%.*]] = shufflevector <8 x i16> [[A]], <8 x i16> [[B]], <8 x i32> <i32 0, i32 8, i32 2, i32 10, i32 4, i32 12, i32 6, i32 14> +// LLVM: [[VTRN_HI:%.*]] = shufflevector <8 x i16> [[A]], <8 x i16> [[B]], <8 x i32> <i32 1, i32 9, i32 3, i32 11, i32 5, i32 13, i32 7, i32 15> +// LLVM: [[RES0:%.*]] = insertvalue [[RTY:%.*]] poison, <8 x i16> [[VTRN_LO]], 0, 0 +// LLVM: [[RES1:%.*]] = insertvalue [[RTY]] [[RES0]], <8 x i16> [[VTRN_HI]], 0, 1 +// LLVM: ret [[RTY]] [[RES1]] + return vtrnq_s16(a, b); +} + +// LLVM-LABEL: @test_vtrnq_s32( +// CIR-LABEL: @vtrnq_s32( +int32x4x2_t test_vtrnq_s32(int32x4_t a, int32x4_t b) { +// CIR: [[A_CAST:%.*]] = cir.cast bitcast %{{.*}} : !cir.vector<16 x !s8i> -> !cir.vector<4 x !s32i> +// CIR: [[B_CAST:%.*]] = cir.cast bitcast %{{.*}} : !cir.vector<16 x !s8i> -> !cir.vector<4 x !s32i> +// CIR: [[LO:%.*]] = cir.vec.shuffle([[A_CAST]], [[B_CAST]] : !cir.vector<4 x !s32i>) [#cir.int<0> : !s32i, #cir.int<4> : !s32i, #cir.int<2> : !s32i, #cir.int<6> : !s32i] : !cir.vector<4 x !s32i> +// CIR: cir.store [[LO]], %{{.*}} : !cir.vector<4 x !s32i>, !cir.ptr<!cir.vector<4 x !s32i>> +// CIR: [[HI:%.*]] = cir.vec.shuffle([[A_CAST]], [[B_CAST]] : !cir.vector<4 x !s32i>) [#cir.int<1> : !s32i, #cir.int<5> : !s32i, #cir.int<3> : !s32i, #cir.int<7> : !s32i] : !cir.vector<4 x !s32i> +// CIR: cir.store [[HI]], %{{.*}} : !cir.vector<4 x !s32i>, !cir.ptr<!cir.vector<4 x !s32i>> + +// LLVM-SAME: <4 x i32> {{.*}} [[A:%.*]], <4 x i32> {{.*}} [[B:%.*]]) {{.*}} { +// LLVM: [[VTRN_LO:%.*]] = shufflevector <4 x i32> [[A]], <4 x i32> [[B]], <4 x i32> <i32 0, i32 4, i32 2, i32 6> +// LLVM: [[VTRN_HI:%.*]] = shufflevector <4 x i32> [[A]], <4 x i32> [[B]], <4 x i32> <i32 1, i32 5, i32 3, i32 7> +// LLVM: [[RES0:%.*]] = insertvalue [[RTY:%.*]] poison, <4 x i32> [[VTRN_LO]], 0, 0 +// LLVM: [[RES1:%.*]] = insertvalue [[RTY]] [[RES0]], <4 x i32> [[VTRN_HI]], 0, 1 +// LLVM: ret [[RTY]] [[RES1]] + return vtrnq_s32(a, b); +} + +// LLVM-LABEL: @test_vtrnq_f32( +// CIR-LABEL: @vtrnq_f32( +float32x4x2_t test_vtrnq_f32(float32x4_t a, float32x4_t b) { +// CIR: [[A_CAST:%.*]] = cir.cast bitcast %{{.*}} : !cir.vector<16 x !s8i> -> !cir.vector<4 x !cir.float> +// CIR: [[B_CAST:%.*]] = cir.cast bitcast %{{.*}} : !cir.vector<16 x !s8i> -> !cir.vector<4 x !cir.float> +// CIR: [[LO:%.*]] = cir.vec.shuffle([[A_CAST]], [[B_CAST]] : !cir.vector<4 x !cir.float>) [#cir.int<0> : !s32i, #cir.int<4> : !s32i, #cir.int<2> : !s32i, #cir.int<6> : !s32i] : !cir.vector<4 x !cir.float> +// CIR: cir.store [[LO]], %{{.*}} : !cir.vector<4 x !cir.float>, !cir.ptr<!cir.vector<4 x !cir.float>> +// CIR: [[HI:%.*]] = cir.vec.shuffle([[A_CAST]], [[B_CAST]] : !cir.vector<4 x !cir.float>) [#cir.int<1> : !s32i, #cir.int<5> : !s32i, #cir.int<3> : !s32i, #cir.int<7> : !s32i] : !cir.vector<4 x !cir.float> +// CIR: cir.store [[HI]], %{{.*}} : !cir.vector<4 x !cir.float>, !cir.ptr<!cir.vector<4 x !cir.float>> + +// LLVM-SAME: <4 x float> {{.*}} [[A:%.*]], <4 x float> {{.*}} [[B:%.*]]) {{.*}} { +// LLVM: [[VTRN_LO:%.*]] = shufflevector <4 x float> [[A]], <4 x float> [[B]], <4 x i32> <i32 0, i32 4, i32 2, i32 6> +// LLVM: [[VTRN_HI:%.*]] = shufflevector <4 x float> [[A]], <4 x float> [[B]], <4 x i32> <i32 1, i32 5, i32 3, i32 7> +// LLVM: [[RES0:%.*]] = insertvalue [[RTY:%.*]] poison, <4 x float> [[VTRN_LO]], 0, 0 +// LLVM: [[RES1:%.*]] = insertvalue [[RTY]] [[RES0]], <4 x float> [[VTRN_HI]], 0, 1 +// LLVM: ret [[RTY]] [[RES1]] + return vtrnq_f32(a, b); +} + +// LLVM-LABEL: @test_vtrnq_u8( +// CIR-LABEL: @vtrnq_u8( +uint8x16x2_t test_vtrnq_u8(uint8x16_t a, uint8x16_t b) { +// CIR: [[A_CAST:%.*]] = cir.cast bitcast %{{.*}} : !cir.vector<16 x !s8i> -> !cir.vector<16 x !u8i> +// CIR: [[B_CAST:%.*]] = cir.cast bitcast %{{.*}} : !cir.vector<16 x !s8i> -> !cir.vector<16 x !u8i> +// CIR: [[LO:%.*]] = cir.vec.shuffle([[A_CAST]], [[B_CAST]] : !cir.vector<16 x !u8i>) [#cir.int<0> : !s32i, #cir.int<16> : !s32i, #cir.int<2> : !s32i, #cir.int<18> : !s32i, #cir.int<4> : !s32i, #cir.int<20> : !s32i, #cir.int<6> : !s32i, #cir.int<22> : !s32i, #cir.int<8> : !s32i, #cir.int<24> : !s32i, #cir.int<10> : !s32i, #cir.int<26> : !s32i, #cir.int<12> : !s32i, #cir.int<28> : !s32i, #cir.int<14> : !s32i, #cir.int<30> : !s32i] : !cir.vector<16 x !u8i> +// CIR: cir.store [[LO]], %{{.*}} : !cir.vector<16 x !u8i>, !cir.ptr<!cir.vector<16 x !u8i>> +// CIR: [[HI:%.*]] = cir.vec.shuffle([[A_CAST]], [[B_CAST]] : !cir.vector<16 x !u8i>) [#cir.int<1> : !s32i, #cir.int<17> : !s32i, #cir.int<3> : !s32i, #cir.int<19> : !s32i, #cir.int<5> : !s32i, #cir.int<21> : !s32i, #cir.int<7> : !s32i, #cir.int<23> : !s32i, #cir.int<9> : !s32i, #cir.int<25> : !s32i, #cir.int<11> : !s32i, #cir.int<27> : !s32i, #cir.int<13> : !s32i, #cir.int<29> : !s32i, #cir.int<15> : !s32i, #cir.int<31> : !s32i] : !cir.vector<16 x !u8i> +// CIR: cir.store [[HI]], %{{.*}} : !cir.vector<16 x !u8i>, !cir.ptr<!cir.vector<16 x !u8i>> + +// LLVM-SAME: <16 x i8> {{.*}} [[A:%.*]], <16 x i8> {{.*}} [[B:%.*]]) {{.*}} { +// LLVM: [[VTRN_LO:%.*]] = shufflevector <16 x i8> [[A]], <16 x i8> [[B]], <16 x i32> <i32 0, i32 16, i32 2, i32 18, i32 4, i32 20, i32 6, i32 22, i32 8, i32 24, i32 10, i32 26, i32 12, i32 28, i32 14, i32 30> +// LLVM: [[VTRN_HI:%.*]] = shufflevector <16 x i8> [[A]], <16 x i8> [[B]], <16 x i32> <i32 1, i32 17, i32 3, i32 19, i32 5, i32 21, i32 7, i32 23, i32 9, i32 25, i32 11, i32 27, i32 13, i32 29, i32 15, i32 31> +// LLVM: [[RES0:%.*]] = insertvalue [[RTY:%.*]] poison, <16 x i8> [[VTRN_LO]], 0, 0 +// LLVM: [[RES1:%.*]] = insertvalue [[RTY]] [[RES0]], <16 x i8> [[VTRN_HI]], 0, 1 +// LLVM: ret [[RTY]] [[RES1]] + return vtrnq_u8(a, b); +} + +// LLVM-LABEL: @test_vtrnq_u16( +// CIR-LABEL: @vtrnq_u16( +uint16x8x2_t test_vtrnq_u16(uint16x8_t a, uint16x8_t b) { +// CIR: [[A_CAST:%.*]] = cir.cast bitcast %{{.*}} : !cir.vector<16 x !s8i> -> !cir.vector<8 x !u16i> +// CIR: [[B_CAST:%.*]] = cir.cast bitcast %{{.*}} : !cir.vector<16 x !s8i> -> !cir.vector<8 x !u16i> +// CIR: [[LO:%.*]] = cir.vec.shuffle([[A_CAST]], [[B_CAST]] : !cir.vector<8 x !u16i>) [#cir.int<0> : !s32i, #cir.int<8> : !s32i, #cir.int<2> : !s32i, #cir.int<10> : !s32i, #cir.int<4> : !s32i, #cir.int<12> : !s32i, #cir.int<6> : !s32i, #cir.int<14> : !s32i] : !cir.vector<8 x !u16i> +// CIR: cir.store [[LO]], %{{.*}} : !cir.vector<8 x !u16i>, !cir.ptr<!cir.vector<8 x !u16i>> +// CIR: [[HI:%.*]] = cir.vec.shuffle([[A_CAST]], [[B_CAST]] : !cir.vector<8 x !u16i>) [#cir.int<1> : !s32i, #cir.int<9> : !s32i, #cir.int<3> : !s32i, #cir.int<11> : !s32i, #cir.int<5> : !s32i, #cir.int<13> : !s32i, #cir.int<7> : !s32i, #cir.int<15> : !s32i] : !cir.vector<8 x !u16i> +// CIR: cir.store [[HI]], %{{.*}} : !cir.vector<8 x !u16i>, !cir.ptr<!cir.vector<8 x !u16i>> + +// LLVM-SAME: <8 x i16> {{.*}} [[A:%.*]], <8 x i16> {{.*}} [[B:%.*]]) {{.*}} { +// LLVM: [[VTRN_LO:%.*]] = shufflevector <8 x i16> [[A]], <8 x i16> [[B]], <8 x i32> <i32 0, i32 8, i32 2, i32 10, i32 4, i32 12, i32 6, i32 14> +// LLVM: [[VTRN_HI:%.*]] = shufflevector <8 x i16> [[A]], <8 x i16> [[B]], <8 x i32> <i32 1, i32 9, i32 3, i32 11, i32 5, i32 13, i32 7, i32 15> +// LLVM: [[RES0:%.*]] = insertvalue [[RTY:%.*]] poison, <8 x i16> [[VTRN_LO]], 0, 0 +// LLVM: [[RES1:%.*]] = insertvalue [[RTY]] [[RES0]], <8 x i16> [[VTRN_HI]], 0, 1 +// LLVM: ret [[RTY]] [[RES1]] + return vtrnq_u16(a, b); +} + +// LLVM-LABEL: @test_vtrnq_u32( +// CIR-LABEL: @vtrnq_u32( +uint32x4x2_t test_vtrnq_u32(uint32x4_t a, uint32x4_t b) { +// CIR: [[A_CAST:%.*]] = cir.cast bitcast %{{.*}} : !cir.vector<16 x !s8i> -> !cir.vector<4 x !u32i> +// CIR: [[B_CAST:%.*]] = cir.cast bitcast %{{.*}} : !cir.vector<16 x !s8i> -> !cir.vector<4 x !u32i> +// CIR: [[LO:%.*]] = cir.vec.shuffle([[A_CAST]], [[B_CAST]] : !cir.vector<4 x !u32i>) [#cir.int<0> : !s32i, #cir.int<4> : !s32i, #cir.int<2> : !s32i, #cir.int<6> : !s32i] : !cir.vector<4 x !u32i> +// CIR: cir.store [[LO]], %{{.*}} : !cir.vector<4 x !u32i>, !cir.ptr<!cir.vector<4 x !u32i>> +// CIR: [[HI:%.*]] = cir.vec.shuffle([[A_CAST]], [[B_CAST]] : !cir.vector<4 x !u32i>) [#cir.int<1> : !s32i, #cir.int<5> : !s32i, #cir.int<3> : !s32i, #cir.int<7> : !s32i] : !cir.vector<4 x !u32i> +// CIR: cir.store [[HI]], %{{.*}} : !cir.vector<4 x !u32i>, !cir.ptr<!cir.vector<4 x !u32i>> + +// LLVM-SAME: <4 x i32> {{.*}} [[A:%.*]], <4 x i32> {{.*}} [[B:%.*]]) {{.*}} { +// LLVM: [[VTRN_LO:%.*]] = shufflevector <4 x i32> [[A]], <4 x i32> [[B]], <4 x i32> <i32 0, i32 4, i32 2, i32 6> +// LLVM: [[VTRN_HI:%.*]] = shufflevector <4 x i32> [[A]], <4 x i32> [[B]], <4 x i32> <i32 1, i32 5, i32 3, i32 7> +// LLVM: [[RES0:%.*]] = insertvalue [[RTY:%.*]] poison, <4 x i32> [[VTRN_LO]], 0, 0 +// LLVM: [[RES1:%.*]] = insertvalue [[RTY]] [[RES0]], <4 x i32> [[VTRN_HI]], 0, 1 +// LLVM: ret [[RTY]] [[RES1]] + return vtrnq_u32(a, b); +} + +// LLVM-LABEL: @test_vtrnq_p8( +// CIR-LABEL: @vtrnq_p8( +poly8x16x2_t test_vtrnq_p8(poly8x16_t a, poly8x16_t b) { +// CIR: [[LO:%.*]] = cir.vec.shuffle(%{{.*}}, %{{.*}} : !cir.vector<16 x !s8i>) [#cir.int<0> : !s32i, #cir.int<16> : !s32i, #cir.int<2> : !s32i, #cir.int<18> : !s32i, #cir.int<4> : !s32i, #cir.int<20> : !s32i, #cir.int<6> : !s32i, #cir.int<22> : !s32i, #cir.int<8> : !s32i, #cir.int<24> : !s32i, #cir.int<10> : !s32i, #cir.int<26> : !s32i, #cir.int<12> : !s32i, #cir.int<28> : !s32i, #cir.int<14> : !s32i, #cir.int<30> : !s32i] : !cir.vector<16 x !s8i> +// CIR: cir.store [[LO]], %{{.*}} : !cir.vector<16 x !s8i>, !cir.ptr<!cir.vector<16 x !s8i>> +// CIR: [[HI:%.*]] = cir.vec.shuffle(%{{.*}}, %{{.*}} : !cir.vector<16 x !s8i>) [#cir.int<1> : !s32i, #cir.int<17> : !s32i, #cir.int<3> : !s32i, #cir.int<19> : !s32i, #cir.int<5> : !s32i, #cir.int<21> : !s32i, #cir.int<7> : !s32i, #cir.int<23> : !s32i, #cir.int<9> : !s32i, #cir.int<25> : !s32i, #cir.int<11> : !s32i, #cir.int<27> : !s32i, #cir.int<13> : !s32i, #cir.int<29> : !s32i, #cir.int<15> : !s32i, #cir.int<31> : !s32i] : !cir.vector<16 x !s8i> +// CIR: cir.store [[HI]], %{{.*}} : !cir.vector<16 x !s8i>, !cir.ptr<!cir.vector<16 x !s8i>> + +// LLVM-SAME: <16 x i8> {{.*}} [[A:%.*]], <16 x i8> {{.*}} [[B:%.*]]) {{.*}} { +// LLVM: [[VTRN_LO:%.*]] = shufflevector <16 x i8> [[A]], <16 x i8> [[B]], <16 x i32> <i32 0, i32 16, i32 2, i32 18, i32 4, i32 20, i32 6, i32 22, i32 8, i32 24, i32 10, i32 26, i32 12, i32 28, i32 14, i32 30> +// LLVM: [[VTRN_HI:%.*]] = shufflevector <16 x i8> [[A]], <16 x i8> [[B]], <16 x i32> <i32 1, i32 17, i32 3, i32 19, i32 5, i32 21, i32 7, i32 23, i32 9, i32 25, i32 11, i32 27, i32 13, i32 29, i32 15, i32 31> +// LLVM: [[RES0:%.*]] = insertvalue [[RTY:%.*]] poison, <16 x i8> [[VTRN_LO]], 0, 0 +// LLVM: [[RES1:%.*]] = insertvalue [[RTY]] [[RES0]], <16 x i8> [[VTRN_HI]], 0, 1 +// LLVM: ret [[RTY]] [[RES1]] + return vtrnq_p8(a, b); +} + +// LLVM-LABEL: @test_vtrnq_p16( +// CIR-LABEL: @vtrnq_p16( +poly16x8x2_t test_vtrnq_p16(poly16x8_t a, poly16x8_t b) { +// CIR: [[A_CAST:%.*]] = cir.cast bitcast %{{.*}} : !cir.vector<16 x !s8i> -> !cir.vector<8 x !s16i> +// CIR: [[B_CAST:%.*]] = cir.cast bitcast %{{.*}} : !cir.vector<16 x !s8i> -> !cir.vector<8 x !s16i> +// CIR: [[LO:%.*]] = cir.vec.shuffle([[A_CAST]], [[B_CAST]] : !cir.vector<8 x !s16i>) [#cir.int<0> : !s32i, #cir.int<8> : !s32i, #cir.int<2> : !s32i, #cir.int<10> : !s32i, #cir.int<4> : !s32i, #cir.int<12> : !s32i, #cir.int<6> : !s32i, #cir.int<14> : !s32i] : !cir.vector<8 x !s16i> +// CIR: cir.store [[LO]], %{{.*}} : !cir.vector<8 x !s16i>, !cir.ptr<!cir.vector<8 x !s16i>> +// CIR: [[HI:%.*]] = cir.vec.shuffle([[A_CAST]], [[B_CAST]] : !cir.vector<8 x !s16i>) [#cir.int<1> : !s32i, #cir.int<9> : !s32i, #cir.int<3> : !s32i, #cir.int<11> : !s32i, #cir.int<5> : !s32i, #cir.int<13> : !s32i, #cir.int<7> : !s32i, #cir.int<15> : !s32i] : !cir.vector<8 x !s16i> +// CIR: cir.store [[HI]], %{{.*}} : !cir.vector<8 x !s16i>, !cir.ptr<!cir.vector<8 x !s16i>> + +// LLVM-SAME: <8 x i16> {{.*}} [[A:%.*]], <8 x i16> {{.*}} [[B:%.*]]) {{.*}} { +// LLVM: [[VTRN_LO:%.*]] = shufflevector <8 x i16> [[A]], <8 x i16> [[B]], <8 x i32> <i32 0, i32 8, i32 2, i32 10, i32 4, i32 12, i32 6, i32 14> +// LLVM: [[VTRN_HI:%.*]] = shufflevector <8 x i16> [[A]], <8 x i16> [[B]], <8 x i32> <i32 1, i32 9, i32 3, i32 11, i32 5, i32 13, i32 7, i32 15> +// LLVM: [[RES0:%.*]] = insertvalue [[RTY:%.*]] poison, <8 x i16> [[VTRN_LO]], 0, 0 +// LLVM: [[RES1:%.*]] = insertvalue [[RTY]] [[RES0]], <8 x i16> [[VTRN_HI]], 0, 1 +// LLVM: ret [[RTY]] [[RES1]] + return vtrnq_p16(a, b); +} + +// LLVM-LABEL: @test_vtrnq_mf8( +// CIR-LABEL: @vtrnq_mf8( +mfloat8x16x2_t test_vtrnq_mf8(mfloat8x16_t a, mfloat8x16_t b) { +// CIR: [[A_CAST:%.*]] = cir.cast bitcast %{{.*}} : !cir.vector<16 x !s8i> -> !cir.vector<16 x !u8i> +// CIR: [[B_CAST:%.*]] = cir.cast bitcast %{{.*}} : !cir.vector<16 x !s8i> -> !cir.vector<16 x !u8i> +// CIR: [[LO:%.*]] = cir.vec.shuffle([[A_CAST]], [[B_CAST]] : !cir.vector<16 x !u8i>) [#cir.int<0> : !s32i, #cir.int<16> : !s32i, #cir.int<2> : !s32i, #cir.int<18> : !s32i, #cir.int<4> : !s32i, #cir.int<20> : !s32i, #cir.int<6> : !s32i, #cir.int<22> : !s32i, #cir.int<8> : !s32i, #cir.int<24> : !s32i, #cir.int<10> : !s32i, #cir.int<26> : !s32i, #cir.int<12> : !s32i, #cir.int<28> : !s32i, #cir.int<14> : !s32i, #cir.int<30> : !s32i] : !cir.vector<16 x !u8i> +// CIR: cir.store [[LO]], %{{.*}} : !cir.vector<16 x !u8i>, !cir.ptr<!cir.vector<16 x !u8i>> +// CIR: [[HI:%.*]] = cir.vec.shuffle([[A_CAST]], [[B_CAST]] : !cir.vector<16 x !u8i>) [#cir.int<1> : !s32i, #cir.int<17> : !s32i, #cir.int<3> : !s32i, #cir.int<19> : !s32i, #cir.int<5> : !s32i, #cir.int<21> : !s32i, #cir.int<7> : !s32i, #cir.int<23> : !s32i, #cir.int<9> : !s32i, #cir.int<25> : !s32i, #cir.int<11> : !s32i, #cir.int<27> : !s32i, #cir.int<13> : !s32i, #cir.int<29> : !s32i, #cir.int<15> : !s32i, #cir.int<31> : !s32i] : !cir.vector<16 x !u8i> +// CIR: cir.store [[HI]], %{{.*}} : !cir.vector<16 x !u8i>, !cir.ptr<!cir.vector<16 x !u8i>> + +// LLVM-SAME: <16 x i8> {{.*}}[[A:%.*]], <16 x i8> {{.*}}[[B:%.*]]) {{.*}} { +// LLVM: [[VTRN_LO:%.*]] = shufflevector <16 x i8> [[A]], <16 x i8> [[B]], <16 x i32> <i32 0, i32 16, i32 2, i32 18, i32 4, i32 20, i32 6, i32 22, i32 8, i32 24, i32 10, i32 26, i32 12, i32 28, i32 14, i32 30> +// LLVM: [[VTRN_HI:%.*]] = shufflevector <16 x i8> [[A]], <16 x i8> [[B]], <16 x i32> <i32 1, i32 17, i32 3, i32 19, i32 5, i32 21, i32 7, i32 23, i32 9, i32 25, i32 11, i32 27, i32 13, i32 29, i32 15, i32 31> +// LLVM: [[RES0:%.*]] = insertvalue [[RTY:%.*]] poison, <16 x i8> [[VTRN_LO]], 0, 0 +// LLVM: [[RES1:%.*]] = insertvalue [[RTY]] [[RES0]], <16 x i8> [[VTRN_HI]], 0, 1 +// LLVM: ret [[RTY]] [[RES1]] + return vtrnq_mf8(a, b); +} _______________________________________________ cfe-commits mailing list [email protected] https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits
