https://github.com/E00N777 updated 
https://github.com/llvm/llvm-project/pull/197651

>From 60e9e65a4444882097d6283586380a7c0bbc9789 Mon Sep 17 00:00:00 2001
From: E0N777 <[email protected]>
Date: Thu, 14 May 2026 18:47:00 +0800
Subject: [PATCH] [CIR][AArch64] Lower NEON vtrn intrinsics

---
 .../lib/CIR/CodeGen/CIRGenBuiltinAArch64.cpp  |  24 +-
 .../fp8-intrinsics/acle_neon_fp8_untyped.c    |  36 --
 clang/test/CodeGen/AArch64/neon-perm.c        | 383 ------------------
 clang/test/CodeGen/AArch64/neon/perm.c        | 372 +++++++++++++++++
 4 files changed, 394 insertions(+), 421 deletions(-)
 delete mode 100644 clang/test/CodeGen/AArch64/neon-perm.c

diff --git a/clang/lib/CIR/CodeGen/CIRGenBuiltinAArch64.cpp 
b/clang/lib/CIR/CodeGen/CIRGenBuiltinAArch64.cpp
index c142b69f6be6e..18ec9e24722bd 100644
--- a/clang/lib/CIR/CodeGen/CIRGenBuiltinAArch64.cpp
+++ b/clang/lib/CIR/CodeGen/CIRGenBuiltinAArch64.cpp
@@ -2904,12 +2904,32 @@ CIRGenFunction::emitAArch64BuiltinExpr(unsigned 
builtinID, const CallExpr *expr,
   case NEON::BI__builtin_neon_vst4q_v:
   case NEON::BI__builtin_neon_vst4_lane_v:
   case NEON::BI__builtin_neon_vst4q_lane_v:
-  case NEON::BI__builtin_neon_vtrn_v:
-  case NEON::BI__builtin_neon_vtrnq_v:
     cgm.errorNYI(expr->getSourceRange(),
                  std::string("unimplemented AArch64 builtin call: ") +
                      getContext().BuiltinInfo.getName(builtinID));
     return mlir::Value{};
+  case NEON::BI__builtin_neon_vtrn_v:
+  case NEON::BI__builtin_neon_vtrnq_v: {
+    ops[1] = builder.createBitcast(ops[1], ty);
+    ops[2] = builder.createBitcast(ops[2], ty);
+    // Adding a bitcast here as Ops[0] might be a void pointer.
+    mlir::Value baseAddr =
+        builder.createBitcast(ops[0], builder.getPointerTo(ty));
+    mlir::Value sv;
+
+    for (unsigned vi = 0; vi != 2; ++vi) {
+      llvm::SmallVector<int64_t, 16> indices;
+      for (unsigned i = 0, e = ty.getSize(); i != e; i += 2) {
+        indices.push_back(i + vi);
+        indices.push_back(i + e + vi);
+      }
+      cir::ConstantOp idx = builder.getConstInt(loc, builder.getSInt32Ty(), 
vi);
+      mlir::Value addr = builder.createPtrStride(loc, baseAddr, idx);
+      sv = builder.createVecShuffle(loc, ops[1], ops[2], indices);
+      (void)builder.CIRBaseBuilderTy::createStore(loc, sv, addr);
+    }
+    return sv;
+  }
   case NEON::BI__builtin_neon_vuzp_v:
   case NEON::BI__builtin_neon_vuzpq_v: {
     ops[1] = builder.createBitcast(ops[1], ty);
diff --git a/clang/test/CodeGen/AArch64/fp8-intrinsics/acle_neon_fp8_untyped.c 
b/clang/test/CodeGen/AArch64/fp8-intrinsics/acle_neon_fp8_untyped.c
index 40635342b8949..16543e41fcccc 100644
--- a/clang/test/CodeGen/AArch64/fp8-intrinsics/acle_neon_fp8_untyped.c
+++ b/clang/test/CodeGen/AArch64/fp8-intrinsics/acle_neon_fp8_untyped.c
@@ -483,42 +483,6 @@ mfloat8x16_t test_vrev16q_mf8(mfloat8x16_t a) {
   return vrev16q_mf8(a);
 }
 
-// CHECK-LABEL: define dso_local %struct.mfloat8x8x2_t @test_vtrn_mf8(
-// CHECK-SAME: <8 x i8> [[A:%.*]], <8 x i8> [[B:%.*]]) #[[ATTR0]] {
-// CHECK-NEXT:  [[ENTRY:.*:]]
-// CHECK-NEXT:    [[VTRN_I:%.*]] = shufflevector <8 x i8> [[A]], <8 x i8> 
[[B]], <8 x i32> <i32 0, i32 8, i32 2, i32 10, i32 4, i32 12, i32 6, i32 14>
-// CHECK-NEXT:    [[VTRN1_I:%.*]] = shufflevector <8 x i8> [[A]], <8 x i8> 
[[B]], <8 x i32> <i32 1, i32 9, i32 3, i32 11, i32 5, i32 13, i32 7, i32 15>
-// CHECK-NEXT:    [[DOTFCA_0_0_INSERT1:%.*]] = insertvalue 
[[STRUCT_MFLOAT8X8X2_T:%.*]] poison, <8 x i8> [[VTRN_I]], 0, 0
-// CHECK-NEXT:    [[DOTFCA_0_1_INSERT2:%.*]] = insertvalue 
[[STRUCT_MFLOAT8X8X2_T]] [[DOTFCA_0_0_INSERT1]], <8 x i8> [[VTRN1_I]], 0, 1
-// CHECK-NEXT:    [[TMP0:%.*]] = extractvalue [[STRUCT_MFLOAT8X8X2_T]] 
[[DOTFCA_0_1_INSERT2]], 0
-// CHECK-NEXT:    [[DOTFCA_0_EXTRACT:%.*]] = extractvalue [2 x <8 x i8>] 
[[TMP0]], 0
-// CHECK-NEXT:    [[DOTFCA_1_EXTRACT:%.*]] = extractvalue [2 x <8 x i8>] 
[[TMP0]], 1
-// CHECK-NEXT:    [[DOTFCA_0_0_INSERT:%.*]] = insertvalue 
[[STRUCT_MFLOAT8X8X2_T]] poison, <8 x i8> [[DOTFCA_0_EXTRACT]], 0, 0
-// CHECK-NEXT:    [[DOTFCA_0_1_INSERT:%.*]] = insertvalue 
[[STRUCT_MFLOAT8X8X2_T]] [[DOTFCA_0_0_INSERT]], <8 x i8> [[DOTFCA_1_EXTRACT]], 
0, 1
-// CHECK-NEXT:    ret [[STRUCT_MFLOAT8X8X2_T]] [[DOTFCA_0_1_INSERT]]
-//
-mfloat8x8x2_t test_vtrn_mf8(mfloat8x8_t a, mfloat8x8_t b) {
-  return vtrn_mf8(a, b);
-}
-
-// CHECK-LABEL: define dso_local %struct.mfloat8x16x2_t @test_vtrnq_mf8(
-// CHECK-SAME: <16 x i8> [[A:%.*]], <16 x i8> [[B:%.*]]) #[[ATTR0]] {
-// CHECK-NEXT:  [[ENTRY:.*:]]
-// CHECK-NEXT:    [[VTRN_I:%.*]] = shufflevector <16 x i8> [[A]], <16 x i8> 
[[B]], <16 x i32> <i32 0, i32 16, i32 2, i32 18, i32 4, i32 20, i32 6, i32 22, 
i32 8, i32 24, i32 10, i32 26, i32 12, i32 28, i32 14, i32 30>
-// CHECK-NEXT:    [[VTRN1_I:%.*]] = shufflevector <16 x i8> [[A]], <16 x i8> 
[[B]], <16 x i32> <i32 1, i32 17, i32 3, i32 19, i32 5, i32 21, i32 7, i32 23, 
i32 9, i32 25, i32 11, i32 27, i32 13, i32 29, i32 15, i32 31>
-// CHECK-NEXT:    [[DOTFCA_0_0_INSERT1:%.*]] = insertvalue 
[[STRUCT_MFLOAT8X16X2_T:%.*]] poison, <16 x i8> [[VTRN_I]], 0, 0
-// CHECK-NEXT:    [[DOTFCA_0_1_INSERT2:%.*]] = insertvalue 
[[STRUCT_MFLOAT8X16X2_T]] [[DOTFCA_0_0_INSERT1]], <16 x i8> [[VTRN1_I]], 0, 1
-// CHECK-NEXT:    [[TMP0:%.*]] = extractvalue [[STRUCT_MFLOAT8X16X2_T]] 
[[DOTFCA_0_1_INSERT2]], 0
-// CHECK-NEXT:    [[DOTFCA_0_EXTRACT:%.*]] = extractvalue [2 x <16 x i8>] 
[[TMP0]], 0
-// CHECK-NEXT:    [[DOTFCA_1_EXTRACT:%.*]] = extractvalue [2 x <16 x i8>] 
[[TMP0]], 1
-// CHECK-NEXT:    [[DOTFCA_0_0_INSERT:%.*]] = insertvalue 
[[STRUCT_MFLOAT8X16X2_T]] poison, <16 x i8> [[DOTFCA_0_EXTRACT]], 0, 0
-// CHECK-NEXT:    [[DOTFCA_0_1_INSERT:%.*]] = insertvalue 
[[STRUCT_MFLOAT8X16X2_T]] [[DOTFCA_0_0_INSERT]], <16 x i8> 
[[DOTFCA_1_EXTRACT]], 0, 1
-// CHECK-NEXT:    ret [[STRUCT_MFLOAT8X16X2_T]] [[DOTFCA_0_1_INSERT]]
-//
-mfloat8x16x2_t test_vtrnq_mf8(mfloat8x16_t a, mfloat8x16_t b) {
-  return vtrnq_mf8(a, b);
-}
-
 // CHECK-LABEL: define dso_local void @test_vcopy_lane_mf8(
 // CHECK-SAME: <8 x i8> [[ARG_I8X8:%.*]]) #[[ATTR0]] {
 // CHECK-NEXT:  [[ENTRY:.*:]]
diff --git a/clang/test/CodeGen/AArch64/neon-perm.c 
b/clang/test/CodeGen/AArch64/neon-perm.c
deleted file mode 100644
index df8b526e47a1a..0000000000000
--- a/clang/test/CodeGen/AArch64/neon-perm.c
+++ /dev/null
@@ -1,383 +0,0 @@
-// NOTE: Assertions have been autogenerated by utils/update_cc_test_checks.py 
UTC_ARGS: --version 5
-// RUN: %clang_cc1 -triple arm64-none-linux-gnu -target-feature +neon \
-// RUN: -disable-O0-optnone -emit-llvm -o - %s | opt -S -passes=mem2reg,sroa | 
FileCheck %s
-
-// REQUIRES: aarch64-registered-target || arm-registered-target
-
-#include <arm_neon.h>
-
-// CHECK-LABEL: define dso_local %struct.int8x8x2_t @test_vtrn_s8(
-// CHECK-SAME: <8 x i8> noundef [[A:%.*]], <8 x i8> noundef [[B:%.*]]) 
#[[ATTR0:[0-9]+]] {
-// CHECK-NEXT:  [[ENTRY:.*:]]
-// CHECK-NEXT:    [[VTRN_I:%.*]] = shufflevector <8 x i8> [[A]], <8 x i8> 
[[B]], <8 x i32> <i32 0, i32 8, i32 2, i32 10, i32 4, i32 12, i32 6, i32 14>
-// CHECK-NEXT:    [[VTRN1_I:%.*]] = shufflevector <8 x i8> [[A]], <8 x i8> 
[[B]], <8 x i32> <i32 1, i32 9, i32 3, i32 11, i32 5, i32 13, i32 7, i32 15>
-// CHECK-NEXT:    [[DOTFCA_0_0_INSERT1:%.*]] = insertvalue 
[[STRUCT_INT8X8X2_T:%.*]] poison, <8 x i8> [[VTRN_I]], 0, 0
-// CHECK-NEXT:    [[DOTFCA_0_1_INSERT2:%.*]] = insertvalue 
[[STRUCT_INT8X8X2_T]] [[DOTFCA_0_0_INSERT1]], <8 x i8> [[VTRN1_I]], 0, 1
-// CHECK-NEXT:    [[TMP0:%.*]] = extractvalue [[STRUCT_INT8X8X2_T]] 
[[DOTFCA_0_1_INSERT2]], 0
-// CHECK-NEXT:    [[DOTFCA_0_EXTRACT:%.*]] = extractvalue [2 x <8 x i8>] 
[[TMP0]], 0
-// CHECK-NEXT:    [[DOTFCA_1_EXTRACT:%.*]] = extractvalue [2 x <8 x i8>] 
[[TMP0]], 1
-// CHECK-NEXT:    [[DOTFCA_0_0_INSERT:%.*]] = insertvalue 
[[STRUCT_INT8X8X2_T]] poison, <8 x i8> [[DOTFCA_0_EXTRACT]], 0, 0
-// CHECK-NEXT:    [[DOTFCA_0_1_INSERT:%.*]] = insertvalue 
[[STRUCT_INT8X8X2_T]] [[DOTFCA_0_0_INSERT]], <8 x i8> [[DOTFCA_1_EXTRACT]], 0, 1
-// CHECK-NEXT:    ret [[STRUCT_INT8X8X2_T]] [[DOTFCA_0_1_INSERT]]
-//
-int8x8x2_t test_vtrn_s8(int8x8_t a, int8x8_t b) {
-  return vtrn_s8(a, b);
-}
-
-// CHECK-LABEL: define dso_local %struct.int16x4x2_t @test_vtrn_s16(
-// CHECK-SAME: <4 x i16> noundef [[A:%.*]], <4 x i16> noundef [[B:%.*]]) 
#[[ATTR0]] {
-// CHECK-NEXT:  [[ENTRY:.*:]]
-// CHECK-NEXT:    [[TMP0:%.*]] = bitcast <4 x i16> [[A]] to <8 x i8>
-// CHECK-NEXT:    [[TMP1:%.*]] = bitcast <4 x i16> [[B]] to <8 x i8>
-// CHECK-NEXT:    [[TMP2:%.*]] = bitcast <8 x i8> [[TMP0]] to <4 x i16>
-// CHECK-NEXT:    [[TMP3:%.*]] = bitcast <8 x i8> [[TMP1]] to <4 x i16>
-// CHECK-NEXT:    [[VTRN_I:%.*]] = shufflevector <4 x i16> [[TMP2]], <4 x i16> 
[[TMP3]], <4 x i32> <i32 0, i32 4, i32 2, i32 6>
-// CHECK-NEXT:    [[VTRN1_I:%.*]] = shufflevector <4 x i16> [[TMP2]], <4 x 
i16> [[TMP3]], <4 x i32> <i32 1, i32 5, i32 3, i32 7>
-// CHECK-NEXT:    [[DOTFCA_0_0_INSERT1:%.*]] = insertvalue 
[[STRUCT_INT16X4X2_T:%.*]] poison, <4 x i16> [[VTRN_I]], 0, 0
-// CHECK-NEXT:    [[DOTFCA_0_1_INSERT2:%.*]] = insertvalue 
[[STRUCT_INT16X4X2_T]] [[DOTFCA_0_0_INSERT1]], <4 x i16> [[VTRN1_I]], 0, 1
-// CHECK-NEXT:    [[TMP4:%.*]] = extractvalue [[STRUCT_INT16X4X2_T]] 
[[DOTFCA_0_1_INSERT2]], 0
-// CHECK-NEXT:    [[DOTFCA_0_EXTRACT:%.*]] = extractvalue [2 x <4 x i16>] 
[[TMP4]], 0
-// CHECK-NEXT:    [[DOTFCA_1_EXTRACT:%.*]] = extractvalue [2 x <4 x i16>] 
[[TMP4]], 1
-// CHECK-NEXT:    [[DOTFCA_0_0_INSERT:%.*]] = insertvalue 
[[STRUCT_INT16X4X2_T]] poison, <4 x i16> [[DOTFCA_0_EXTRACT]], 0, 0
-// CHECK-NEXT:    [[DOTFCA_0_1_INSERT:%.*]] = insertvalue 
[[STRUCT_INT16X4X2_T]] [[DOTFCA_0_0_INSERT]], <4 x i16> [[DOTFCA_1_EXTRACT]], 
0, 1
-// CHECK-NEXT:    ret [[STRUCT_INT16X4X2_T]] [[DOTFCA_0_1_INSERT]]
-//
-int16x4x2_t test_vtrn_s16(int16x4_t a, int16x4_t b) {
-  return vtrn_s16(a, b);
-}
-
-// CHECK-LABEL: define dso_local %struct.int32x2x2_t @test_vtrn_s32(
-// CHECK-SAME: <2 x i32> noundef [[A:%.*]], <2 x i32> noundef [[B:%.*]]) 
#[[ATTR0]] {
-// CHECK-NEXT:  [[ENTRY:.*:]]
-// CHECK-NEXT:    [[TMP0:%.*]] = bitcast <2 x i32> [[A]] to <8 x i8>
-// CHECK-NEXT:    [[TMP1:%.*]] = bitcast <2 x i32> [[B]] to <8 x i8>
-// CHECK-NEXT:    [[TMP2:%.*]] = bitcast <8 x i8> [[TMP0]] to <2 x i32>
-// CHECK-NEXT:    [[TMP3:%.*]] = bitcast <8 x i8> [[TMP1]] to <2 x i32>
-// CHECK-NEXT:    [[VTRN_I:%.*]] = shufflevector <2 x i32> [[TMP2]], <2 x i32> 
[[TMP3]], <2 x i32> <i32 0, i32 2>
-// CHECK-NEXT:    [[VTRN1_I:%.*]] = shufflevector <2 x i32> [[TMP2]], <2 x 
i32> [[TMP3]], <2 x i32> <i32 1, i32 3>
-// CHECK-NEXT:    [[DOTFCA_0_0_INSERT1:%.*]] = insertvalue 
[[STRUCT_INT32X2X2_T:%.*]] poison, <2 x i32> [[VTRN_I]], 0, 0
-// CHECK-NEXT:    [[DOTFCA_0_1_INSERT2:%.*]] = insertvalue 
[[STRUCT_INT32X2X2_T]] [[DOTFCA_0_0_INSERT1]], <2 x i32> [[VTRN1_I]], 0, 1
-// CHECK-NEXT:    [[TMP4:%.*]] = extractvalue [[STRUCT_INT32X2X2_T]] 
[[DOTFCA_0_1_INSERT2]], 0
-// CHECK-NEXT:    [[DOTFCA_0_EXTRACT:%.*]] = extractvalue [2 x <2 x i32>] 
[[TMP4]], 0
-// CHECK-NEXT:    [[DOTFCA_1_EXTRACT:%.*]] = extractvalue [2 x <2 x i32>] 
[[TMP4]], 1
-// CHECK-NEXT:    [[DOTFCA_0_0_INSERT:%.*]] = insertvalue 
[[STRUCT_INT32X2X2_T]] poison, <2 x i32> [[DOTFCA_0_EXTRACT]], 0, 0
-// CHECK-NEXT:    [[DOTFCA_0_1_INSERT:%.*]] = insertvalue 
[[STRUCT_INT32X2X2_T]] [[DOTFCA_0_0_INSERT]], <2 x i32> [[DOTFCA_1_EXTRACT]], 
0, 1
-// CHECK-NEXT:    ret [[STRUCT_INT32X2X2_T]] [[DOTFCA_0_1_INSERT]]
-//
-int32x2x2_t test_vtrn_s32(int32x2_t a, int32x2_t b) {
-  return vtrn_s32(a, b);
-}
-
-// CHECK-LABEL: define dso_local %struct.uint8x8x2_t @test_vtrn_u8(
-// CHECK-SAME: <8 x i8> noundef [[A:%.*]], <8 x i8> noundef [[B:%.*]]) 
#[[ATTR0]] {
-// CHECK-NEXT:  [[ENTRY:.*:]]
-// CHECK-NEXT:    [[VTRN_I:%.*]] = shufflevector <8 x i8> [[A]], <8 x i8> 
[[B]], <8 x i32> <i32 0, i32 8, i32 2, i32 10, i32 4, i32 12, i32 6, i32 14>
-// CHECK-NEXT:    [[VTRN1_I:%.*]] = shufflevector <8 x i8> [[A]], <8 x i8> 
[[B]], <8 x i32> <i32 1, i32 9, i32 3, i32 11, i32 5, i32 13, i32 7, i32 15>
-// CHECK-NEXT:    [[DOTFCA_0_0_INSERT1:%.*]] = insertvalue 
[[STRUCT_UINT8X8X2_T:%.*]] poison, <8 x i8> [[VTRN_I]], 0, 0
-// CHECK-NEXT:    [[DOTFCA_0_1_INSERT2:%.*]] = insertvalue 
[[STRUCT_UINT8X8X2_T]] [[DOTFCA_0_0_INSERT1]], <8 x i8> [[VTRN1_I]], 0, 1
-// CHECK-NEXT:    [[TMP0:%.*]] = extractvalue [[STRUCT_UINT8X8X2_T]] 
[[DOTFCA_0_1_INSERT2]], 0
-// CHECK-NEXT:    [[DOTFCA_0_EXTRACT:%.*]] = extractvalue [2 x <8 x i8>] 
[[TMP0]], 0
-// CHECK-NEXT:    [[DOTFCA_1_EXTRACT:%.*]] = extractvalue [2 x <8 x i8>] 
[[TMP0]], 1
-// CHECK-NEXT:    [[DOTFCA_0_0_INSERT:%.*]] = insertvalue 
[[STRUCT_UINT8X8X2_T]] poison, <8 x i8> [[DOTFCA_0_EXTRACT]], 0, 0
-// CHECK-NEXT:    [[DOTFCA_0_1_INSERT:%.*]] = insertvalue 
[[STRUCT_UINT8X8X2_T]] [[DOTFCA_0_0_INSERT]], <8 x i8> [[DOTFCA_1_EXTRACT]], 0, 
1
-// CHECK-NEXT:    ret [[STRUCT_UINT8X8X2_T]] [[DOTFCA_0_1_INSERT]]
-//
-uint8x8x2_t test_vtrn_u8(uint8x8_t a, uint8x8_t b) {
-  return vtrn_u8(a, b);
-}
-
-// CHECK-LABEL: define dso_local %struct.uint16x4x2_t @test_vtrn_u16(
-// CHECK-SAME: <4 x i16> noundef [[A:%.*]], <4 x i16> noundef [[B:%.*]]) 
#[[ATTR0]] {
-// CHECK-NEXT:  [[ENTRY:.*:]]
-// CHECK-NEXT:    [[TMP0:%.*]] = bitcast <4 x i16> [[A]] to <8 x i8>
-// CHECK-NEXT:    [[TMP1:%.*]] = bitcast <4 x i16> [[B]] to <8 x i8>
-// CHECK-NEXT:    [[TMP2:%.*]] = bitcast <8 x i8> [[TMP0]] to <4 x i16>
-// CHECK-NEXT:    [[TMP3:%.*]] = bitcast <8 x i8> [[TMP1]] to <4 x i16>
-// CHECK-NEXT:    [[VTRN_I:%.*]] = shufflevector <4 x i16> [[TMP2]], <4 x i16> 
[[TMP3]], <4 x i32> <i32 0, i32 4, i32 2, i32 6>
-// CHECK-NEXT:    [[VTRN1_I:%.*]] = shufflevector <4 x i16> [[TMP2]], <4 x 
i16> [[TMP3]], <4 x i32> <i32 1, i32 5, i32 3, i32 7>
-// CHECK-NEXT:    [[DOTFCA_0_0_INSERT1:%.*]] = insertvalue 
[[STRUCT_UINT16X4X2_T:%.*]] poison, <4 x i16> [[VTRN_I]], 0, 0
-// CHECK-NEXT:    [[DOTFCA_0_1_INSERT2:%.*]] = insertvalue 
[[STRUCT_UINT16X4X2_T]] [[DOTFCA_0_0_INSERT1]], <4 x i16> [[VTRN1_I]], 0, 1
-// CHECK-NEXT:    [[TMP4:%.*]] = extractvalue [[STRUCT_UINT16X4X2_T]] 
[[DOTFCA_0_1_INSERT2]], 0
-// CHECK-NEXT:    [[DOTFCA_0_EXTRACT:%.*]] = extractvalue [2 x <4 x i16>] 
[[TMP4]], 0
-// CHECK-NEXT:    [[DOTFCA_1_EXTRACT:%.*]] = extractvalue [2 x <4 x i16>] 
[[TMP4]], 1
-// CHECK-NEXT:    [[DOTFCA_0_0_INSERT:%.*]] = insertvalue 
[[STRUCT_UINT16X4X2_T]] poison, <4 x i16> [[DOTFCA_0_EXTRACT]], 0, 0
-// CHECK-NEXT:    [[DOTFCA_0_1_INSERT:%.*]] = insertvalue 
[[STRUCT_UINT16X4X2_T]] [[DOTFCA_0_0_INSERT]], <4 x i16> [[DOTFCA_1_EXTRACT]], 
0, 1
-// CHECK-NEXT:    ret [[STRUCT_UINT16X4X2_T]] [[DOTFCA_0_1_INSERT]]
-//
-uint16x4x2_t test_vtrn_u16(uint16x4_t a, uint16x4_t b) {
-  return vtrn_u16(a, b);
-}
-
-// CHECK-LABEL: define dso_local %struct.uint32x2x2_t @test_vtrn_u32(
-// CHECK-SAME: <2 x i32> noundef [[A:%.*]], <2 x i32> noundef [[B:%.*]]) 
#[[ATTR0]] {
-// CHECK-NEXT:  [[ENTRY:.*:]]
-// CHECK-NEXT:    [[TMP0:%.*]] = bitcast <2 x i32> [[A]] to <8 x i8>
-// CHECK-NEXT:    [[TMP1:%.*]] = bitcast <2 x i32> [[B]] to <8 x i8>
-// CHECK-NEXT:    [[TMP2:%.*]] = bitcast <8 x i8> [[TMP0]] to <2 x i32>
-// CHECK-NEXT:    [[TMP3:%.*]] = bitcast <8 x i8> [[TMP1]] to <2 x i32>
-// CHECK-NEXT:    [[VTRN_I:%.*]] = shufflevector <2 x i32> [[TMP2]], <2 x i32> 
[[TMP3]], <2 x i32> <i32 0, i32 2>
-// CHECK-NEXT:    [[VTRN1_I:%.*]] = shufflevector <2 x i32> [[TMP2]], <2 x 
i32> [[TMP3]], <2 x i32> <i32 1, i32 3>
-// CHECK-NEXT:    [[DOTFCA_0_0_INSERT1:%.*]] = insertvalue 
[[STRUCT_UINT32X2X2_T:%.*]] poison, <2 x i32> [[VTRN_I]], 0, 0
-// CHECK-NEXT:    [[DOTFCA_0_1_INSERT2:%.*]] = insertvalue 
[[STRUCT_UINT32X2X2_T]] [[DOTFCA_0_0_INSERT1]], <2 x i32> [[VTRN1_I]], 0, 1
-// CHECK-NEXT:    [[TMP4:%.*]] = extractvalue [[STRUCT_UINT32X2X2_T]] 
[[DOTFCA_0_1_INSERT2]], 0
-// CHECK-NEXT:    [[DOTFCA_0_EXTRACT:%.*]] = extractvalue [2 x <2 x i32>] 
[[TMP4]], 0
-// CHECK-NEXT:    [[DOTFCA_1_EXTRACT:%.*]] = extractvalue [2 x <2 x i32>] 
[[TMP4]], 1
-// CHECK-NEXT:    [[DOTFCA_0_0_INSERT:%.*]] = insertvalue 
[[STRUCT_UINT32X2X2_T]] poison, <2 x i32> [[DOTFCA_0_EXTRACT]], 0, 0
-// CHECK-NEXT:    [[DOTFCA_0_1_INSERT:%.*]] = insertvalue 
[[STRUCT_UINT32X2X2_T]] [[DOTFCA_0_0_INSERT]], <2 x i32> [[DOTFCA_1_EXTRACT]], 
0, 1
-// CHECK-NEXT:    ret [[STRUCT_UINT32X2X2_T]] [[DOTFCA_0_1_INSERT]]
-//
-uint32x2x2_t test_vtrn_u32(uint32x2_t a, uint32x2_t b) {
-  return vtrn_u32(a, b);
-}
-
-// CHECK-LABEL: define dso_local %struct.float32x2x2_t @test_vtrn_f32(
-// CHECK-SAME: <2 x float> noundef [[A:%.*]], <2 x float> noundef [[B:%.*]]) 
#[[ATTR0]] {
-// CHECK-NEXT:  [[ENTRY:.*:]]
-// CHECK-NEXT:    [[TMP0:%.*]] = bitcast <2 x float> [[A]] to <2 x i32>
-// CHECK-NEXT:    [[TMP1:%.*]] = bitcast <2 x float> [[B]] to <2 x i32>
-// CHECK-NEXT:    [[TMP2:%.*]] = bitcast <2 x i32> [[TMP0]] to <8 x i8>
-// CHECK-NEXT:    [[TMP3:%.*]] = bitcast <2 x i32> [[TMP1]] to <8 x i8>
-// CHECK-NEXT:    [[TMP4:%.*]] = bitcast <8 x i8> [[TMP2]] to <2 x float>
-// CHECK-NEXT:    [[TMP5:%.*]] = bitcast <8 x i8> [[TMP3]] to <2 x float>
-// CHECK-NEXT:    [[VTRN_I:%.*]] = shufflevector <2 x float> [[TMP4]], <2 x 
float> [[TMP5]], <2 x i32> <i32 0, i32 2>
-// CHECK-NEXT:    [[VTRN1_I:%.*]] = shufflevector <2 x float> [[TMP4]], <2 x 
float> [[TMP5]], <2 x i32> <i32 1, i32 3>
-// CHECK-NEXT:    [[DOTFCA_0_0_INSERT3:%.*]] = insertvalue 
[[STRUCT_FLOAT32X2X2_T:%.*]] poison, <2 x float> [[VTRN_I]], 0, 0
-// CHECK-NEXT:    [[DOTFCA_0_1_INSERT4:%.*]] = insertvalue 
[[STRUCT_FLOAT32X2X2_T]] [[DOTFCA_0_0_INSERT3]], <2 x float> [[VTRN1_I]], 0, 1
-// CHECK-NEXT:    [[TMP6:%.*]] = extractvalue [[STRUCT_FLOAT32X2X2_T]] 
[[DOTFCA_0_1_INSERT4]], 0
-// CHECK-NEXT:    [[DOTFCA_0_EXTRACT:%.*]] = extractvalue [2 x <2 x float>] 
[[TMP6]], 0
-// CHECK-NEXT:    [[DOTFCA_1_EXTRACT:%.*]] = extractvalue [2 x <2 x float>] 
[[TMP6]], 1
-// CHECK-NEXT:    [[DOTFCA_0_0_INSERT:%.*]] = insertvalue 
[[STRUCT_FLOAT32X2X2_T]] poison, <2 x float> [[DOTFCA_0_EXTRACT]], 0, 0
-// CHECK-NEXT:    [[DOTFCA_0_1_INSERT:%.*]] = insertvalue 
[[STRUCT_FLOAT32X2X2_T]] [[DOTFCA_0_0_INSERT]], <2 x float> 
[[DOTFCA_1_EXTRACT]], 0, 1
-// CHECK-NEXT:    ret [[STRUCT_FLOAT32X2X2_T]] [[DOTFCA_0_1_INSERT]]
-//
-float32x2x2_t test_vtrn_f32(float32x2_t a, float32x2_t b) {
-  return vtrn_f32(a, b);
-}
-
-// CHECK-LABEL: define dso_local %struct.poly8x8x2_t @test_vtrn_p8(
-// CHECK-SAME: <8 x i8> noundef [[A:%.*]], <8 x i8> noundef [[B:%.*]]) 
#[[ATTR0]] {
-// CHECK-NEXT:  [[ENTRY:.*:]]
-// CHECK-NEXT:    [[VTRN_I:%.*]] = shufflevector <8 x i8> [[A]], <8 x i8> 
[[B]], <8 x i32> <i32 0, i32 8, i32 2, i32 10, i32 4, i32 12, i32 6, i32 14>
-// CHECK-NEXT:    [[VTRN1_I:%.*]] = shufflevector <8 x i8> [[A]], <8 x i8> 
[[B]], <8 x i32> <i32 1, i32 9, i32 3, i32 11, i32 5, i32 13, i32 7, i32 15>
-// CHECK-NEXT:    [[DOTFCA_0_0_INSERT1:%.*]] = insertvalue 
[[STRUCT_POLY8X8X2_T:%.*]] poison, <8 x i8> [[VTRN_I]], 0, 0
-// CHECK-NEXT:    [[DOTFCA_0_1_INSERT2:%.*]] = insertvalue 
[[STRUCT_POLY8X8X2_T]] [[DOTFCA_0_0_INSERT1]], <8 x i8> [[VTRN1_I]], 0, 1
-// CHECK-NEXT:    [[TMP0:%.*]] = extractvalue [[STRUCT_POLY8X8X2_T]] 
[[DOTFCA_0_1_INSERT2]], 0
-// CHECK-NEXT:    [[DOTFCA_0_EXTRACT:%.*]] = extractvalue [2 x <8 x i8>] 
[[TMP0]], 0
-// CHECK-NEXT:    [[DOTFCA_1_EXTRACT:%.*]] = extractvalue [2 x <8 x i8>] 
[[TMP0]], 1
-// CHECK-NEXT:    [[DOTFCA_0_0_INSERT:%.*]] = insertvalue 
[[STRUCT_POLY8X8X2_T]] poison, <8 x i8> [[DOTFCA_0_EXTRACT]], 0, 0
-// CHECK-NEXT:    [[DOTFCA_0_1_INSERT:%.*]] = insertvalue 
[[STRUCT_POLY8X8X2_T]] [[DOTFCA_0_0_INSERT]], <8 x i8> [[DOTFCA_1_EXTRACT]], 0, 
1
-// CHECK-NEXT:    ret [[STRUCT_POLY8X8X2_T]] [[DOTFCA_0_1_INSERT]]
-//
-poly8x8x2_t test_vtrn_p8(poly8x8_t a, poly8x8_t b) {
-  return vtrn_p8(a, b);
-}
-
-// CHECK-LABEL: define dso_local %struct.poly16x4x2_t @test_vtrn_p16(
-// CHECK-SAME: <4 x i16> noundef [[A:%.*]], <4 x i16> noundef [[B:%.*]]) 
#[[ATTR0]] {
-// CHECK-NEXT:  [[ENTRY:.*:]]
-// CHECK-NEXT:    [[TMP0:%.*]] = bitcast <4 x i16> [[A]] to <8 x i8>
-// CHECK-NEXT:    [[TMP1:%.*]] = bitcast <4 x i16> [[B]] to <8 x i8>
-// CHECK-NEXT:    [[TMP2:%.*]] = bitcast <8 x i8> [[TMP0]] to <4 x i16>
-// CHECK-NEXT:    [[TMP3:%.*]] = bitcast <8 x i8> [[TMP1]] to <4 x i16>
-// CHECK-NEXT:    [[VTRN_I:%.*]] = shufflevector <4 x i16> [[TMP2]], <4 x i16> 
[[TMP3]], <4 x i32> <i32 0, i32 4, i32 2, i32 6>
-// CHECK-NEXT:    [[VTRN1_I:%.*]] = shufflevector <4 x i16> [[TMP2]], <4 x 
i16> [[TMP3]], <4 x i32> <i32 1, i32 5, i32 3, i32 7>
-// CHECK-NEXT:    [[DOTFCA_0_0_INSERT1:%.*]] = insertvalue 
[[STRUCT_POLY16X4X2_T:%.*]] poison, <4 x i16> [[VTRN_I]], 0, 0
-// CHECK-NEXT:    [[DOTFCA_0_1_INSERT2:%.*]] = insertvalue 
[[STRUCT_POLY16X4X2_T]] [[DOTFCA_0_0_INSERT1]], <4 x i16> [[VTRN1_I]], 0, 1
-// CHECK-NEXT:    [[TMP4:%.*]] = extractvalue [[STRUCT_POLY16X4X2_T]] 
[[DOTFCA_0_1_INSERT2]], 0
-// CHECK-NEXT:    [[DOTFCA_0_EXTRACT:%.*]] = extractvalue [2 x <4 x i16>] 
[[TMP4]], 0
-// CHECK-NEXT:    [[DOTFCA_1_EXTRACT:%.*]] = extractvalue [2 x <4 x i16>] 
[[TMP4]], 1
-// CHECK-NEXT:    [[DOTFCA_0_0_INSERT:%.*]] = insertvalue 
[[STRUCT_POLY16X4X2_T]] poison, <4 x i16> [[DOTFCA_0_EXTRACT]], 0, 0
-// CHECK-NEXT:    [[DOTFCA_0_1_INSERT:%.*]] = insertvalue 
[[STRUCT_POLY16X4X2_T]] [[DOTFCA_0_0_INSERT]], <4 x i16> [[DOTFCA_1_EXTRACT]], 
0, 1
-// CHECK-NEXT:    ret [[STRUCT_POLY16X4X2_T]] [[DOTFCA_0_1_INSERT]]
-//
-poly16x4x2_t test_vtrn_p16(poly16x4_t a, poly16x4_t b) {
-  return vtrn_p16(a, b);
-}
-
-// CHECK-LABEL: define dso_local %struct.int8x16x2_t @test_vtrnq_s8(
-// CHECK-SAME: <16 x i8> noundef [[A:%.*]], <16 x i8> noundef [[B:%.*]]) 
#[[ATTR0]] {
-// CHECK-NEXT:  [[ENTRY:.*:]]
-// CHECK-NEXT:    [[VTRN_I:%.*]] = shufflevector <16 x i8> [[A]], <16 x i8> 
[[B]], <16 x i32> <i32 0, i32 16, i32 2, i32 18, i32 4, i32 20, i32 6, i32 22, 
i32 8, i32 24, i32 10, i32 26, i32 12, i32 28, i32 14, i32 30>
-// CHECK-NEXT:    [[VTRN1_I:%.*]] = shufflevector <16 x i8> [[A]], <16 x i8> 
[[B]], <16 x i32> <i32 1, i32 17, i32 3, i32 19, i32 5, i32 21, i32 7, i32 23, 
i32 9, i32 25, i32 11, i32 27, i32 13, i32 29, i32 15, i32 31>
-// CHECK-NEXT:    [[DOTFCA_0_0_INSERT1:%.*]] = insertvalue 
[[STRUCT_INT8X16X2_T:%.*]] poison, <16 x i8> [[VTRN_I]], 0, 0
-// CHECK-NEXT:    [[DOTFCA_0_1_INSERT2:%.*]] = insertvalue 
[[STRUCT_INT8X16X2_T]] [[DOTFCA_0_0_INSERT1]], <16 x i8> [[VTRN1_I]], 0, 1
-// CHECK-NEXT:    [[TMP0:%.*]] = extractvalue [[STRUCT_INT8X16X2_T]] 
[[DOTFCA_0_1_INSERT2]], 0
-// CHECK-NEXT:    [[DOTFCA_0_EXTRACT:%.*]] = extractvalue [2 x <16 x i8>] 
[[TMP0]], 0
-// CHECK-NEXT:    [[DOTFCA_1_EXTRACT:%.*]] = extractvalue [2 x <16 x i8>] 
[[TMP0]], 1
-// CHECK-NEXT:    [[DOTFCA_0_0_INSERT:%.*]] = insertvalue 
[[STRUCT_INT8X16X2_T]] poison, <16 x i8> [[DOTFCA_0_EXTRACT]], 0, 0
-// CHECK-NEXT:    [[DOTFCA_0_1_INSERT:%.*]] = insertvalue 
[[STRUCT_INT8X16X2_T]] [[DOTFCA_0_0_INSERT]], <16 x i8> [[DOTFCA_1_EXTRACT]], 
0, 1
-// CHECK-NEXT:    ret [[STRUCT_INT8X16X2_T]] [[DOTFCA_0_1_INSERT]]
-//
-int8x16x2_t test_vtrnq_s8(int8x16_t a, int8x16_t b) {
-  return vtrnq_s8(a, b);
-}
-
-// CHECK-LABEL: define dso_local %struct.int16x8x2_t @test_vtrnq_s16(
-// CHECK-SAME: <8 x i16> noundef [[A:%.*]], <8 x i16> noundef [[B:%.*]]) 
#[[ATTR0]] {
-// CHECK-NEXT:  [[ENTRY:.*:]]
-// CHECK-NEXT:    [[TMP0:%.*]] = bitcast <8 x i16> [[A]] to <16 x i8>
-// CHECK-NEXT:    [[TMP1:%.*]] = bitcast <8 x i16> [[B]] to <16 x i8>
-// CHECK-NEXT:    [[TMP2:%.*]] = bitcast <16 x i8> [[TMP0]] to <8 x i16>
-// CHECK-NEXT:    [[TMP3:%.*]] = bitcast <16 x i8> [[TMP1]] to <8 x i16>
-// CHECK-NEXT:    [[VTRN_I:%.*]] = shufflevector <8 x i16> [[TMP2]], <8 x i16> 
[[TMP3]], <8 x i32> <i32 0, i32 8, i32 2, i32 10, i32 4, i32 12, i32 6, i32 14>
-// CHECK-NEXT:    [[VTRN1_I:%.*]] = shufflevector <8 x i16> [[TMP2]], <8 x 
i16> [[TMP3]], <8 x i32> <i32 1, i32 9, i32 3, i32 11, i32 5, i32 13, i32 7, 
i32 15>
-// CHECK-NEXT:    [[DOTFCA_0_0_INSERT1:%.*]] = insertvalue 
[[STRUCT_INT16X8X2_T:%.*]] poison, <8 x i16> [[VTRN_I]], 0, 0
-// CHECK-NEXT:    [[DOTFCA_0_1_INSERT2:%.*]] = insertvalue 
[[STRUCT_INT16X8X2_T]] [[DOTFCA_0_0_INSERT1]], <8 x i16> [[VTRN1_I]], 0, 1
-// CHECK-NEXT:    [[TMP4:%.*]] = extractvalue [[STRUCT_INT16X8X2_T]] 
[[DOTFCA_0_1_INSERT2]], 0
-// CHECK-NEXT:    [[DOTFCA_0_EXTRACT:%.*]] = extractvalue [2 x <8 x i16>] 
[[TMP4]], 0
-// CHECK-NEXT:    [[DOTFCA_1_EXTRACT:%.*]] = extractvalue [2 x <8 x i16>] 
[[TMP4]], 1
-// CHECK-NEXT:    [[DOTFCA_0_0_INSERT:%.*]] = insertvalue 
[[STRUCT_INT16X8X2_T]] poison, <8 x i16> [[DOTFCA_0_EXTRACT]], 0, 0
-// CHECK-NEXT:    [[DOTFCA_0_1_INSERT:%.*]] = insertvalue 
[[STRUCT_INT16X8X2_T]] [[DOTFCA_0_0_INSERT]], <8 x i16> [[DOTFCA_1_EXTRACT]], 
0, 1
-// CHECK-NEXT:    ret [[STRUCT_INT16X8X2_T]] [[DOTFCA_0_1_INSERT]]
-//
-int16x8x2_t test_vtrnq_s16(int16x8_t a, int16x8_t b) {
-  return vtrnq_s16(a, b);
-}
-
-// CHECK-LABEL: define dso_local %struct.int32x4x2_t @test_vtrnq_s32(
-// CHECK-SAME: <4 x i32> noundef [[A:%.*]], <4 x i32> noundef [[B:%.*]]) 
#[[ATTR0]] {
-// CHECK-NEXT:  [[ENTRY:.*:]]
-// CHECK-NEXT:    [[TMP0:%.*]] = bitcast <4 x i32> [[A]] to <16 x i8>
-// CHECK-NEXT:    [[TMP1:%.*]] = bitcast <4 x i32> [[B]] to <16 x i8>
-// CHECK-NEXT:    [[TMP2:%.*]] = bitcast <16 x i8> [[TMP0]] to <4 x i32>
-// CHECK-NEXT:    [[TMP3:%.*]] = bitcast <16 x i8> [[TMP1]] to <4 x i32>
-// CHECK-NEXT:    [[VTRN_I:%.*]] = shufflevector <4 x i32> [[TMP2]], <4 x i32> 
[[TMP3]], <4 x i32> <i32 0, i32 4, i32 2, i32 6>
-// CHECK-NEXT:    [[VTRN1_I:%.*]] = shufflevector <4 x i32> [[TMP2]], <4 x 
i32> [[TMP3]], <4 x i32> <i32 1, i32 5, i32 3, i32 7>
-// CHECK-NEXT:    [[DOTFCA_0_0_INSERT1:%.*]] = insertvalue 
[[STRUCT_INT32X4X2_T:%.*]] poison, <4 x i32> [[VTRN_I]], 0, 0
-// CHECK-NEXT:    [[DOTFCA_0_1_INSERT2:%.*]] = insertvalue 
[[STRUCT_INT32X4X2_T]] [[DOTFCA_0_0_INSERT1]], <4 x i32> [[VTRN1_I]], 0, 1
-// CHECK-NEXT:    [[TMP4:%.*]] = extractvalue [[STRUCT_INT32X4X2_T]] 
[[DOTFCA_0_1_INSERT2]], 0
-// CHECK-NEXT:    [[DOTFCA_0_EXTRACT:%.*]] = extractvalue [2 x <4 x i32>] 
[[TMP4]], 0
-// CHECK-NEXT:    [[DOTFCA_1_EXTRACT:%.*]] = extractvalue [2 x <4 x i32>] 
[[TMP4]], 1
-// CHECK-NEXT:    [[DOTFCA_0_0_INSERT:%.*]] = insertvalue 
[[STRUCT_INT32X4X2_T]] poison, <4 x i32> [[DOTFCA_0_EXTRACT]], 0, 0
-// CHECK-NEXT:    [[DOTFCA_0_1_INSERT:%.*]] = insertvalue 
[[STRUCT_INT32X4X2_T]] [[DOTFCA_0_0_INSERT]], <4 x i32> [[DOTFCA_1_EXTRACT]], 
0, 1
-// CHECK-NEXT:    ret [[STRUCT_INT32X4X2_T]] [[DOTFCA_0_1_INSERT]]
-//
-int32x4x2_t test_vtrnq_s32(int32x4_t a, int32x4_t b) {
-  return vtrnq_s32(a, b);
-}
-
-// CHECK-LABEL: define dso_local %struct.uint8x16x2_t @test_vtrnq_u8(
-// CHECK-SAME: <16 x i8> noundef [[A:%.*]], <16 x i8> noundef [[B:%.*]]) 
#[[ATTR0]] {
-// CHECK-NEXT:  [[ENTRY:.*:]]
-// CHECK-NEXT:    [[VTRN_I:%.*]] = shufflevector <16 x i8> [[A]], <16 x i8> 
[[B]], <16 x i32> <i32 0, i32 16, i32 2, i32 18, i32 4, i32 20, i32 6, i32 22, 
i32 8, i32 24, i32 10, i32 26, i32 12, i32 28, i32 14, i32 30>
-// CHECK-NEXT:    [[VTRN1_I:%.*]] = shufflevector <16 x i8> [[A]], <16 x i8> 
[[B]], <16 x i32> <i32 1, i32 17, i32 3, i32 19, i32 5, i32 21, i32 7, i32 23, 
i32 9, i32 25, i32 11, i32 27, i32 13, i32 29, i32 15, i32 31>
-// CHECK-NEXT:    [[DOTFCA_0_0_INSERT1:%.*]] = insertvalue 
[[STRUCT_UINT8X16X2_T:%.*]] poison, <16 x i8> [[VTRN_I]], 0, 0
-// CHECK-NEXT:    [[DOTFCA_0_1_INSERT2:%.*]] = insertvalue 
[[STRUCT_UINT8X16X2_T]] [[DOTFCA_0_0_INSERT1]], <16 x i8> [[VTRN1_I]], 0, 1
-// CHECK-NEXT:    [[TMP0:%.*]] = extractvalue [[STRUCT_UINT8X16X2_T]] 
[[DOTFCA_0_1_INSERT2]], 0
-// CHECK-NEXT:    [[DOTFCA_0_EXTRACT:%.*]] = extractvalue [2 x <16 x i8>] 
[[TMP0]], 0
-// CHECK-NEXT:    [[DOTFCA_1_EXTRACT:%.*]] = extractvalue [2 x <16 x i8>] 
[[TMP0]], 1
-// CHECK-NEXT:    [[DOTFCA_0_0_INSERT:%.*]] = insertvalue 
[[STRUCT_UINT8X16X2_T]] poison, <16 x i8> [[DOTFCA_0_EXTRACT]], 0, 0
-// CHECK-NEXT:    [[DOTFCA_0_1_INSERT:%.*]] = insertvalue 
[[STRUCT_UINT8X16X2_T]] [[DOTFCA_0_0_INSERT]], <16 x i8> [[DOTFCA_1_EXTRACT]], 
0, 1
-// CHECK-NEXT:    ret [[STRUCT_UINT8X16X2_T]] [[DOTFCA_0_1_INSERT]]
-//
-uint8x16x2_t test_vtrnq_u8(uint8x16_t a, uint8x16_t b) {
-  return vtrnq_u8(a, b);
-}
-
-// CHECK-LABEL: define dso_local %struct.uint16x8x2_t @test_vtrnq_u16(
-// CHECK-SAME: <8 x i16> noundef [[A:%.*]], <8 x i16> noundef [[B:%.*]]) 
#[[ATTR0]] {
-// CHECK-NEXT:  [[ENTRY:.*:]]
-// CHECK-NEXT:    [[TMP0:%.*]] = bitcast <8 x i16> [[A]] to <16 x i8>
-// CHECK-NEXT:    [[TMP1:%.*]] = bitcast <8 x i16> [[B]] to <16 x i8>
-// CHECK-NEXT:    [[TMP2:%.*]] = bitcast <16 x i8> [[TMP0]] to <8 x i16>
-// CHECK-NEXT:    [[TMP3:%.*]] = bitcast <16 x i8> [[TMP1]] to <8 x i16>
-// CHECK-NEXT:    [[VTRN_I:%.*]] = shufflevector <8 x i16> [[TMP2]], <8 x i16> 
[[TMP3]], <8 x i32> <i32 0, i32 8, i32 2, i32 10, i32 4, i32 12, i32 6, i32 14>
-// CHECK-NEXT:    [[VTRN1_I:%.*]] = shufflevector <8 x i16> [[TMP2]], <8 x 
i16> [[TMP3]], <8 x i32> <i32 1, i32 9, i32 3, i32 11, i32 5, i32 13, i32 7, 
i32 15>
-// CHECK-NEXT:    [[DOTFCA_0_0_INSERT1:%.*]] = insertvalue 
[[STRUCT_UINT16X8X2_T:%.*]] poison, <8 x i16> [[VTRN_I]], 0, 0
-// CHECK-NEXT:    [[DOTFCA_0_1_INSERT2:%.*]] = insertvalue 
[[STRUCT_UINT16X8X2_T]] [[DOTFCA_0_0_INSERT1]], <8 x i16> [[VTRN1_I]], 0, 1
-// CHECK-NEXT:    [[TMP4:%.*]] = extractvalue [[STRUCT_UINT16X8X2_T]] 
[[DOTFCA_0_1_INSERT2]], 0
-// CHECK-NEXT:    [[DOTFCA_0_EXTRACT:%.*]] = extractvalue [2 x <8 x i16>] 
[[TMP4]], 0
-// CHECK-NEXT:    [[DOTFCA_1_EXTRACT:%.*]] = extractvalue [2 x <8 x i16>] 
[[TMP4]], 1
-// CHECK-NEXT:    [[DOTFCA_0_0_INSERT:%.*]] = insertvalue 
[[STRUCT_UINT16X8X2_T]] poison, <8 x i16> [[DOTFCA_0_EXTRACT]], 0, 0
-// CHECK-NEXT:    [[DOTFCA_0_1_INSERT:%.*]] = insertvalue 
[[STRUCT_UINT16X8X2_T]] [[DOTFCA_0_0_INSERT]], <8 x i16> [[DOTFCA_1_EXTRACT]], 
0, 1
-// CHECK-NEXT:    ret [[STRUCT_UINT16X8X2_T]] [[DOTFCA_0_1_INSERT]]
-//
-uint16x8x2_t test_vtrnq_u16(uint16x8_t a, uint16x8_t b) {
-  return vtrnq_u16(a, b);
-}
-
-// CHECK-LABEL: define dso_local %struct.uint32x4x2_t @test_vtrnq_u32(
-// CHECK-SAME: <4 x i32> noundef [[A:%.*]], <4 x i32> noundef [[B:%.*]]) 
#[[ATTR0]] {
-// CHECK-NEXT:  [[ENTRY:.*:]]
-// CHECK-NEXT:    [[TMP0:%.*]] = bitcast <4 x i32> [[A]] to <16 x i8>
-// CHECK-NEXT:    [[TMP1:%.*]] = bitcast <4 x i32> [[B]] to <16 x i8>
-// CHECK-NEXT:    [[TMP2:%.*]] = bitcast <16 x i8> [[TMP0]] to <4 x i32>
-// CHECK-NEXT:    [[TMP3:%.*]] = bitcast <16 x i8> [[TMP1]] to <4 x i32>
-// CHECK-NEXT:    [[VTRN_I:%.*]] = shufflevector <4 x i32> [[TMP2]], <4 x i32> 
[[TMP3]], <4 x i32> <i32 0, i32 4, i32 2, i32 6>
-// CHECK-NEXT:    [[VTRN1_I:%.*]] = shufflevector <4 x i32> [[TMP2]], <4 x 
i32> [[TMP3]], <4 x i32> <i32 1, i32 5, i32 3, i32 7>
-// CHECK-NEXT:    [[DOTFCA_0_0_INSERT1:%.*]] = insertvalue 
[[STRUCT_UINT32X4X2_T:%.*]] poison, <4 x i32> [[VTRN_I]], 0, 0
-// CHECK-NEXT:    [[DOTFCA_0_1_INSERT2:%.*]] = insertvalue 
[[STRUCT_UINT32X4X2_T]] [[DOTFCA_0_0_INSERT1]], <4 x i32> [[VTRN1_I]], 0, 1
-// CHECK-NEXT:    [[TMP4:%.*]] = extractvalue [[STRUCT_UINT32X4X2_T]] 
[[DOTFCA_0_1_INSERT2]], 0
-// CHECK-NEXT:    [[DOTFCA_0_EXTRACT:%.*]] = extractvalue [2 x <4 x i32>] 
[[TMP4]], 0
-// CHECK-NEXT:    [[DOTFCA_1_EXTRACT:%.*]] = extractvalue [2 x <4 x i32>] 
[[TMP4]], 1
-// CHECK-NEXT:    [[DOTFCA_0_0_INSERT:%.*]] = insertvalue 
[[STRUCT_UINT32X4X2_T]] poison, <4 x i32> [[DOTFCA_0_EXTRACT]], 0, 0
-// CHECK-NEXT:    [[DOTFCA_0_1_INSERT:%.*]] = insertvalue 
[[STRUCT_UINT32X4X2_T]] [[DOTFCA_0_0_INSERT]], <4 x i32> [[DOTFCA_1_EXTRACT]], 
0, 1
-// CHECK-NEXT:    ret [[STRUCT_UINT32X4X2_T]] [[DOTFCA_0_1_INSERT]]
-//
-uint32x4x2_t test_vtrnq_u32(uint32x4_t a, uint32x4_t b) {
-  return vtrnq_u32(a, b);
-}
-
-// CHECK-LABEL: define dso_local %struct.float32x4x2_t @test_vtrnq_f32(
-// CHECK-SAME: <4 x float> noundef [[A:%.*]], <4 x float> noundef [[B:%.*]]) 
#[[ATTR0]] {
-// CHECK-NEXT:  [[ENTRY:.*:]]
-// CHECK-NEXT:    [[TMP0:%.*]] = bitcast <4 x float> [[A]] to <4 x i32>
-// CHECK-NEXT:    [[TMP1:%.*]] = bitcast <4 x float> [[B]] to <4 x i32>
-// CHECK-NEXT:    [[TMP2:%.*]] = bitcast <4 x i32> [[TMP0]] to <16 x i8>
-// CHECK-NEXT:    [[TMP3:%.*]] = bitcast <4 x i32> [[TMP1]] to <16 x i8>
-// CHECK-NEXT:    [[TMP4:%.*]] = bitcast <16 x i8> [[TMP2]] to <4 x float>
-// CHECK-NEXT:    [[TMP5:%.*]] = bitcast <16 x i8> [[TMP3]] to <4 x float>
-// CHECK-NEXT:    [[VTRN_I:%.*]] = shufflevector <4 x float> [[TMP4]], <4 x 
float> [[TMP5]], <4 x i32> <i32 0, i32 4, i32 2, i32 6>
-// CHECK-NEXT:    [[VTRN1_I:%.*]] = shufflevector <4 x float> [[TMP4]], <4 x 
float> [[TMP5]], <4 x i32> <i32 1, i32 5, i32 3, i32 7>
-// CHECK-NEXT:    [[DOTFCA_0_0_INSERT3:%.*]] = insertvalue 
[[STRUCT_FLOAT32X4X2_T:%.*]] poison, <4 x float> [[VTRN_I]], 0, 0
-// CHECK-NEXT:    [[DOTFCA_0_1_INSERT4:%.*]] = insertvalue 
[[STRUCT_FLOAT32X4X2_T]] [[DOTFCA_0_0_INSERT3]], <4 x float> [[VTRN1_I]], 0, 1
-// CHECK-NEXT:    [[TMP6:%.*]] = extractvalue [[STRUCT_FLOAT32X4X2_T]] 
[[DOTFCA_0_1_INSERT4]], 0
-// CHECK-NEXT:    [[DOTFCA_0_EXTRACT:%.*]] = extractvalue [2 x <4 x float>] 
[[TMP6]], 0
-// CHECK-NEXT:    [[DOTFCA_1_EXTRACT:%.*]] = extractvalue [2 x <4 x float>] 
[[TMP6]], 1
-// CHECK-NEXT:    [[DOTFCA_0_0_INSERT:%.*]] = insertvalue 
[[STRUCT_FLOAT32X4X2_T]] poison, <4 x float> [[DOTFCA_0_EXTRACT]], 0, 0
-// CHECK-NEXT:    [[DOTFCA_0_1_INSERT:%.*]] = insertvalue 
[[STRUCT_FLOAT32X4X2_T]] [[DOTFCA_0_0_INSERT]], <4 x float> 
[[DOTFCA_1_EXTRACT]], 0, 1
-// CHECK-NEXT:    ret [[STRUCT_FLOAT32X4X2_T]] [[DOTFCA_0_1_INSERT]]
-//
-float32x4x2_t test_vtrnq_f32(float32x4_t a, float32x4_t b) {
-  return vtrnq_f32(a, b);
-}
-
-// CHECK-LABEL: define dso_local %struct.poly8x16x2_t @test_vtrnq_p8(
-// CHECK-SAME: <16 x i8> noundef [[A:%.*]], <16 x i8> noundef [[B:%.*]]) 
#[[ATTR0]] {
-// CHECK-NEXT:  [[ENTRY:.*:]]
-// CHECK-NEXT:    [[VTRN_I:%.*]] = shufflevector <16 x i8> [[A]], <16 x i8> 
[[B]], <16 x i32> <i32 0, i32 16, i32 2, i32 18, i32 4, i32 20, i32 6, i32 22, 
i32 8, i32 24, i32 10, i32 26, i32 12, i32 28, i32 14, i32 30>
-// CHECK-NEXT:    [[VTRN1_I:%.*]] = shufflevector <16 x i8> [[A]], <16 x i8> 
[[B]], <16 x i32> <i32 1, i32 17, i32 3, i32 19, i32 5, i32 21, i32 7, i32 23, 
i32 9, i32 25, i32 11, i32 27, i32 13, i32 29, i32 15, i32 31>
-// CHECK-NEXT:    [[DOTFCA_0_0_INSERT1:%.*]] = insertvalue 
[[STRUCT_POLY8X16X2_T:%.*]] poison, <16 x i8> [[VTRN_I]], 0, 0
-// CHECK-NEXT:    [[DOTFCA_0_1_INSERT2:%.*]] = insertvalue 
[[STRUCT_POLY8X16X2_T]] [[DOTFCA_0_0_INSERT1]], <16 x i8> [[VTRN1_I]], 0, 1
-// CHECK-NEXT:    [[TMP0:%.*]] = extractvalue [[STRUCT_POLY8X16X2_T]] 
[[DOTFCA_0_1_INSERT2]], 0
-// CHECK-NEXT:    [[DOTFCA_0_EXTRACT:%.*]] = extractvalue [2 x <16 x i8>] 
[[TMP0]], 0
-// CHECK-NEXT:    [[DOTFCA_1_EXTRACT:%.*]] = extractvalue [2 x <16 x i8>] 
[[TMP0]], 1
-// CHECK-NEXT:    [[DOTFCA_0_0_INSERT:%.*]] = insertvalue 
[[STRUCT_POLY8X16X2_T]] poison, <16 x i8> [[DOTFCA_0_EXTRACT]], 0, 0
-// CHECK-NEXT:    [[DOTFCA_0_1_INSERT:%.*]] = insertvalue 
[[STRUCT_POLY8X16X2_T]] [[DOTFCA_0_0_INSERT]], <16 x i8> [[DOTFCA_1_EXTRACT]], 
0, 1
-// CHECK-NEXT:    ret [[STRUCT_POLY8X16X2_T]] [[DOTFCA_0_1_INSERT]]
-//
-poly8x16x2_t test_vtrnq_p8(poly8x16_t a, poly8x16_t b) {
-  return vtrnq_p8(a, b);
-}
-
-// CHECK-LABEL: define dso_local %struct.poly16x8x2_t @test_vtrnq_p16(
-// CHECK-SAME: <8 x i16> noundef [[A:%.*]], <8 x i16> noundef [[B:%.*]]) 
#[[ATTR0]] {
-// CHECK-NEXT:  [[ENTRY:.*:]]
-// CHECK-NEXT:    [[TMP0:%.*]] = bitcast <8 x i16> [[A]] to <16 x i8>
-// CHECK-NEXT:    [[TMP1:%.*]] = bitcast <8 x i16> [[B]] to <16 x i8>
-// CHECK-NEXT:    [[TMP2:%.*]] = bitcast <16 x i8> [[TMP0]] to <8 x i16>
-// CHECK-NEXT:    [[TMP3:%.*]] = bitcast <16 x i8> [[TMP1]] to <8 x i16>
-// CHECK-NEXT:    [[VTRN_I:%.*]] = shufflevector <8 x i16> [[TMP2]], <8 x i16> 
[[TMP3]], <8 x i32> <i32 0, i32 8, i32 2, i32 10, i32 4, i32 12, i32 6, i32 14>
-// CHECK-NEXT:    [[VTRN1_I:%.*]] = shufflevector <8 x i16> [[TMP2]], <8 x 
i16> [[TMP3]], <8 x i32> <i32 1, i32 9, i32 3, i32 11, i32 5, i32 13, i32 7, 
i32 15>
-// CHECK-NEXT:    [[DOTFCA_0_0_INSERT1:%.*]] = insertvalue 
[[STRUCT_POLY16X8X2_T:%.*]] poison, <8 x i16> [[VTRN_I]], 0, 0
-// CHECK-NEXT:    [[DOTFCA_0_1_INSERT2:%.*]] = insertvalue 
[[STRUCT_POLY16X8X2_T]] [[DOTFCA_0_0_INSERT1]], <8 x i16> [[VTRN1_I]], 0, 1
-// CHECK-NEXT:    [[TMP4:%.*]] = extractvalue [[STRUCT_POLY16X8X2_T]] 
[[DOTFCA_0_1_INSERT2]], 0
-// CHECK-NEXT:    [[DOTFCA_0_EXTRACT:%.*]] = extractvalue [2 x <8 x i16>] 
[[TMP4]], 0
-// CHECK-NEXT:    [[DOTFCA_1_EXTRACT:%.*]] = extractvalue [2 x <8 x i16>] 
[[TMP4]], 1
-// CHECK-NEXT:    [[DOTFCA_0_0_INSERT:%.*]] = insertvalue 
[[STRUCT_POLY16X8X2_T]] poison, <8 x i16> [[DOTFCA_0_EXTRACT]], 0, 0
-// CHECK-NEXT:    [[DOTFCA_0_1_INSERT:%.*]] = insertvalue 
[[STRUCT_POLY16X8X2_T]] [[DOTFCA_0_0_INSERT]], <8 x i16> [[DOTFCA_1_EXTRACT]], 
0, 1
-// CHECK-NEXT:    ret [[STRUCT_POLY16X8X2_T]] [[DOTFCA_0_1_INSERT]]
-//
-poly16x8x2_t test_vtrnq_p16(poly16x8_t a, poly16x8_t b) {
-  return vtrnq_p16(a, b);
-}
diff --git a/clang/test/CodeGen/AArch64/neon/perm.c 
b/clang/test/CodeGen/AArch64/neon/perm.c
index 419769ae3f0fa..aa8c7ccc4e7df 100644
--- a/clang/test/CodeGen/AArch64/neon/perm.c
+++ b/clang/test/CodeGen/AArch64/neon/perm.c
@@ -2363,3 +2363,375 @@ mfloat8x16_t test_vtrn2q_mf8(mfloat8x16_t a, 
mfloat8x16_t b) {
 // LLVM: ret <16 x i8> [[SHUFFLE]]
   return vtrn2q_mf8(a, b);
 }
+
+// LLVM-LABEL: @test_vtrn_s8(
+// CIR-LABEL: @vtrn_s8(
+int8x8x2_t test_vtrn_s8(int8x8_t a, int8x8_t b) {
+// CIR: [[LO:%.*]] = cir.vec.shuffle(%{{.*}}, %{{.*}} : !cir.vector<8 x !s8i>) 
[#cir.int<0> : !s32i, #cir.int<8> : !s32i, #cir.int<2> : !s32i, #cir.int<10> : 
!s32i, #cir.int<4> : !s32i, #cir.int<12> : !s32i, #cir.int<6> : !s32i, 
#cir.int<14> : !s32i] : !cir.vector<8 x !s8i>
+// CIR: cir.store [[LO]], %{{.*}} : !cir.vector<8 x !s8i>, 
!cir.ptr<!cir.vector<8 x !s8i>>
+// CIR: [[HI:%.*]] = cir.vec.shuffle(%{{.*}}, %{{.*}} : !cir.vector<8 x !s8i>) 
[#cir.int<1> : !s32i, #cir.int<9> : !s32i, #cir.int<3> : !s32i, #cir.int<11> : 
!s32i, #cir.int<5> : !s32i, #cir.int<13> : !s32i, #cir.int<7> : !s32i, 
#cir.int<15> : !s32i] : !cir.vector<8 x !s8i>
+// CIR: cir.store [[HI]], %{{.*}} : !cir.vector<8 x !s8i>, 
!cir.ptr<!cir.vector<8 x !s8i>>
+
+// LLVM-SAME: <8 x i8> {{.*}} [[A:%.*]], <8 x i8> {{.*}} [[B:%.*]]) {{.*}} {
+// LLVM: [[VTRN_LO:%.*]] = shufflevector <8 x i8> [[A]], <8 x i8> [[B]], <8 x 
i32> <i32 0, i32 8, i32 2, i32 10, i32 4, i32 12, i32 6, i32 14>
+// LLVM: [[VTRN_HI:%.*]] = shufflevector <8 x i8> [[A]], <8 x i8> [[B]], <8 x 
i32> <i32 1, i32 9, i32 3, i32 11, i32 5, i32 13, i32 7, i32 15>
+// LLVM: [[RES0:%.*]] = insertvalue [[RTY:%.*]] poison, <8 x i8> [[VTRN_LO]], 
0, 0
+// LLVM: [[RES1:%.*]] = insertvalue [[RTY]] [[RES0]], <8 x i8> [[VTRN_HI]], 0, 
1
+// LLVM: ret [[RTY]] [[RES1]]
+  return vtrn_s8(a, b);
+}
+
+// LLVM-LABEL: @test_vtrn_s16(
+// CIR-LABEL: @vtrn_s16(
+int16x4x2_t test_vtrn_s16(int16x4_t a, int16x4_t b) {
+// CIR: [[A_CAST:%.*]] = cir.cast bitcast %{{.*}} : !cir.vector<8 x !s8i> -> 
!cir.vector<4 x !s16i>
+// CIR: [[B_CAST:%.*]] = cir.cast bitcast %{{.*}} : !cir.vector<8 x !s8i> -> 
!cir.vector<4 x !s16i>
+// CIR: [[LO:%.*]] = cir.vec.shuffle([[A_CAST]], [[B_CAST]] : !cir.vector<4 x 
!s16i>) [#cir.int<0> : !s32i, #cir.int<4> : !s32i, #cir.int<2> : !s32i, 
#cir.int<6> : !s32i] : !cir.vector<4 x !s16i>
+// CIR: cir.store [[LO]], %{{.*}} : !cir.vector<4 x !s16i>, 
!cir.ptr<!cir.vector<4 x !s16i>>
+// CIR: [[HI:%.*]] = cir.vec.shuffle([[A_CAST]], [[B_CAST]] : !cir.vector<4 x 
!s16i>) [#cir.int<1> : !s32i, #cir.int<5> : !s32i, #cir.int<3> : !s32i, 
#cir.int<7> : !s32i] : !cir.vector<4 x !s16i>
+// CIR: cir.store [[HI]], %{{.*}} : !cir.vector<4 x !s16i>, 
!cir.ptr<!cir.vector<4 x !s16i>>
+
+// LLVM-SAME: <4 x i16> {{.*}} [[A:%.*]], <4 x i16> {{.*}} [[B:%.*]]) {{.*}} {
+// LLVM: [[VTRN_LO:%.*]] = shufflevector <4 x i16> [[A]], <4 x i16> [[B]], <4 
x i32> <i32 0, i32 4, i32 2, i32 6>
+// LLVM: [[VTRN_HI:%.*]] = shufflevector <4 x i16> [[A]], <4 x i16> [[B]], <4 
x i32> <i32 1, i32 5, i32 3, i32 7>
+// LLVM: [[RES0:%.*]] = insertvalue [[RTY:%.*]] poison, <4 x i16> [[VTRN_LO]], 
0, 0
+// LLVM: [[RES1:%.*]] = insertvalue [[RTY]] [[RES0]], <4 x i16> [[VTRN_HI]], 
0, 1
+// LLVM: ret [[RTY]] [[RES1]]
+  return vtrn_s16(a, b);
+}
+
+// LLVM-LABEL: @test_vtrn_u8(
+// CIR-LABEL: @vtrn_u8(
+uint8x8x2_t test_vtrn_u8(uint8x8_t a, uint8x8_t b) {
+// CIR: [[A_CAST:%.*]] = cir.cast bitcast %{{.*}} : !cir.vector<8 x !s8i> -> 
!cir.vector<8 x !u8i>
+// CIR: [[B_CAST:%.*]] = cir.cast bitcast %{{.*}} : !cir.vector<8 x !s8i> -> 
!cir.vector<8 x !u8i>
+// CIR: [[LO:%.*]] = cir.vec.shuffle([[A_CAST]], [[B_CAST]] : !cir.vector<8 x 
!u8i>) [#cir.int<0> : !s32i, #cir.int<8> : !s32i, #cir.int<2> : !s32i, 
#cir.int<10> : !s32i, #cir.int<4> : !s32i, #cir.int<12> : !s32i, #cir.int<6> : 
!s32i, #cir.int<14> : !s32i] : !cir.vector<8 x !u8i>
+// CIR: cir.store [[LO]], %{{.*}} : !cir.vector<8 x !u8i>, 
!cir.ptr<!cir.vector<8 x !u8i>>
+// CIR: [[HI:%.*]] = cir.vec.shuffle([[A_CAST]], [[B_CAST]] : !cir.vector<8 x 
!u8i>) [#cir.int<1> : !s32i, #cir.int<9> : !s32i, #cir.int<3> : !s32i, 
#cir.int<11> : !s32i, #cir.int<5> : !s32i, #cir.int<13> : !s32i, #cir.int<7> : 
!s32i, #cir.int<15> : !s32i] : !cir.vector<8 x !u8i>
+// CIR: cir.store [[HI]], %{{.*}} : !cir.vector<8 x !u8i>, 
!cir.ptr<!cir.vector<8 x !u8i>>
+
+// LLVM-SAME: <8 x i8> {{.*}} [[A:%.*]], <8 x i8> {{.*}} [[B:%.*]]) {{.*}} {
+// LLVM: [[VTRN_LO:%.*]] = shufflevector <8 x i8> [[A]], <8 x i8> [[B]], <8 x 
i32> <i32 0, i32 8, i32 2, i32 10, i32 4, i32 12, i32 6, i32 14>
+// LLVM: [[VTRN_HI:%.*]] = shufflevector <8 x i8> [[A]], <8 x i8> [[B]], <8 x 
i32> <i32 1, i32 9, i32 3, i32 11, i32 5, i32 13, i32 7, i32 15>
+// LLVM: [[RES0:%.*]] = insertvalue [[RTY:%.*]] poison, <8 x i8> [[VTRN_LO]], 
0, 0
+// LLVM: [[RES1:%.*]] = insertvalue [[RTY]] [[RES0]], <8 x i8> [[VTRN_HI]], 0, 
1
+// LLVM: ret [[RTY]] [[RES1]]
+  return vtrn_u8(a, b);
+}
+
+// LLVM-LABEL: @test_vtrn_u16(
+// CIR-LABEL: @vtrn_u16(
+uint16x4x2_t test_vtrn_u16(uint16x4_t a, uint16x4_t b) {
+// CIR: [[A_CAST:%.*]] = cir.cast bitcast %{{.*}} : !cir.vector<8 x !s8i> -> 
!cir.vector<4 x !u16i>
+// CIR: [[B_CAST:%.*]] = cir.cast bitcast %{{.*}} : !cir.vector<8 x !s8i> -> 
!cir.vector<4 x !u16i>
+// CIR: [[LO:%.*]] = cir.vec.shuffle([[A_CAST]], [[B_CAST]] : !cir.vector<4 x 
!u16i>) [#cir.int<0> : !s32i, #cir.int<4> : !s32i, #cir.int<2> : !s32i, 
#cir.int<6> : !s32i] : !cir.vector<4 x !u16i>
+// CIR: cir.store [[LO]], %{{.*}} : !cir.vector<4 x !u16i>, 
!cir.ptr<!cir.vector<4 x !u16i>>
+// CIR: [[HI:%.*]] = cir.vec.shuffle([[A_CAST]], [[B_CAST]] : !cir.vector<4 x 
!u16i>) [#cir.int<1> : !s32i, #cir.int<5> : !s32i, #cir.int<3> : !s32i, 
#cir.int<7> : !s32i] : !cir.vector<4 x !u16i>
+// CIR: cir.store [[HI]], %{{.*}} : !cir.vector<4 x !u16i>, 
!cir.ptr<!cir.vector<4 x !u16i>>
+
+// LLVM-SAME: <4 x i16> {{.*}} [[A:%.*]], <4 x i16> {{.*}} [[B:%.*]]) {{.*}} {
+// LLVM: [[VTRN_LO:%.*]] = shufflevector <4 x i16> [[A]], <4 x i16> [[B]], <4 
x i32> <i32 0, i32 4, i32 2, i32 6>
+// LLVM: [[VTRN_HI:%.*]] = shufflevector <4 x i16> [[A]], <4 x i16> [[B]], <4 
x i32> <i32 1, i32 5, i32 3, i32 7>
+// LLVM: [[RES0:%.*]] = insertvalue [[RTY:%.*]] poison, <4 x i16> [[VTRN_LO]], 
0, 0
+// LLVM: [[RES1:%.*]] = insertvalue [[RTY]] [[RES0]], <4 x i16> [[VTRN_HI]], 
0, 1
+// LLVM: ret [[RTY]] [[RES1]]
+  return vtrn_u16(a, b);
+}
+
+// LLVM-LABEL: @test_vtrn_p8(
+// CIR-LABEL: @vtrn_p8(
+poly8x8x2_t test_vtrn_p8(poly8x8_t a, poly8x8_t b) {
+// CIR: [[LO:%.*]] = cir.vec.shuffle(%{{.*}}, %{{.*}} : !cir.vector<8 x !s8i>) 
[#cir.int<0> : !s32i, #cir.int<8> : !s32i, #cir.int<2> : !s32i, #cir.int<10> : 
!s32i, #cir.int<4> : !s32i, #cir.int<12> : !s32i, #cir.int<6> : !s32i, 
#cir.int<14> : !s32i] : !cir.vector<8 x !s8i>
+// CIR: cir.store [[LO]], %{{.*}} : !cir.vector<8 x !s8i>, 
!cir.ptr<!cir.vector<8 x !s8i>>
+// CIR: [[HI:%.*]] = cir.vec.shuffle(%{{.*}}, %{{.*}} : !cir.vector<8 x !s8i>) 
[#cir.int<1> : !s32i, #cir.int<9> : !s32i, #cir.int<3> : !s32i, #cir.int<11> : 
!s32i, #cir.int<5> : !s32i, #cir.int<13> : !s32i, #cir.int<7> : !s32i, 
#cir.int<15> : !s32i] : !cir.vector<8 x !s8i>
+// CIR: cir.store [[HI]], %{{.*}} : !cir.vector<8 x !s8i>, 
!cir.ptr<!cir.vector<8 x !s8i>>
+
+// LLVM-SAME: <8 x i8> {{.*}} [[A:%.*]], <8 x i8> {{.*}} [[B:%.*]]) {{.*}} {
+// LLVM: [[VTRN_LO:%.*]] = shufflevector <8 x i8> [[A]], <8 x i8> [[B]], <8 x 
i32> <i32 0, i32 8, i32 2, i32 10, i32 4, i32 12, i32 6, i32 14>
+// LLVM: [[VTRN_HI:%.*]] = shufflevector <8 x i8> [[A]], <8 x i8> [[B]], <8 x 
i32> <i32 1, i32 9, i32 3, i32 11, i32 5, i32 13, i32 7, i32 15>
+// LLVM: [[RES0:%.*]] = insertvalue [[RTY:%.*]] poison, <8 x i8> [[VTRN_LO]], 
0, 0
+// LLVM: [[RES1:%.*]] = insertvalue [[RTY]] [[RES0]], <8 x i8> [[VTRN_HI]], 0, 
1
+// LLVM: ret [[RTY]] [[RES1]]
+  return vtrn_p8(a, b);
+}
+
+// LLVM-LABEL: @test_vtrn_p16(
+// CIR-LABEL: @vtrn_p16(
+poly16x4x2_t test_vtrn_p16(poly16x4_t a, poly16x4_t b) {
+// CIR: [[A_CAST:%.*]] = cir.cast bitcast %{{.*}} : !cir.vector<8 x !s8i> -> 
!cir.vector<4 x !s16i>
+// CIR: [[B_CAST:%.*]] = cir.cast bitcast %{{.*}} : !cir.vector<8 x !s8i> -> 
!cir.vector<4 x !s16i>
+// CIR: [[LO:%.*]] = cir.vec.shuffle([[A_CAST]], [[B_CAST]] : !cir.vector<4 x 
!s16i>) [#cir.int<0> : !s32i, #cir.int<4> : !s32i, #cir.int<2> : !s32i, 
#cir.int<6> : !s32i] : !cir.vector<4 x !s16i>
+// CIR: cir.store [[LO]], %{{.*}} : !cir.vector<4 x !s16i>, 
!cir.ptr<!cir.vector<4 x !s16i>>
+// CIR: [[HI:%.*]] = cir.vec.shuffle([[A_CAST]], [[B_CAST]] : !cir.vector<4 x 
!s16i>) [#cir.int<1> : !s32i, #cir.int<5> : !s32i, #cir.int<3> : !s32i, 
#cir.int<7> : !s32i] : !cir.vector<4 x !s16i>
+// CIR: cir.store [[HI]], %{{.*}} : !cir.vector<4 x !s16i>, 
!cir.ptr<!cir.vector<4 x !s16i>>
+
+// LLVM-SAME: <4 x i16> {{.*}} [[A:%.*]], <4 x i16> {{.*}} [[B:%.*]]) {{.*}} {
+// LLVM: [[VTRN_LO:%.*]] = shufflevector <4 x i16> [[A]], <4 x i16> [[B]], <4 
x i32> <i32 0, i32 4, i32 2, i32 6>
+// LLVM: [[VTRN_HI:%.*]] = shufflevector <4 x i16> [[A]], <4 x i16> [[B]], <4 
x i32> <i32 1, i32 5, i32 3, i32 7>
+// LLVM: [[RES0:%.*]] = insertvalue [[RTY:%.*]] poison, <4 x i16> [[VTRN_LO]], 
0, 0
+// LLVM: [[RES1:%.*]] = insertvalue [[RTY]] [[RES0]], <4 x i16> [[VTRN_HI]], 
0, 1
+// LLVM: ret [[RTY]] [[RES1]]
+  return vtrn_p16(a, b);
+}
+
+// LLVM-LABEL: @test_vtrn_s32(
+// CIR-LABEL: @vtrn_s32(
+int32x2x2_t test_vtrn_s32(int32x2_t a, int32x2_t b) {
+// CIR: [[A_CAST:%.*]] = cir.cast bitcast %{{.*}} : !cir.vector<8 x !s8i> -> 
!cir.vector<2 x !s32i>
+// CIR: [[B_CAST:%.*]] = cir.cast bitcast %{{.*}} : !cir.vector<8 x !s8i> -> 
!cir.vector<2 x !s32i>
+// CIR: [[LO:%.*]] = cir.vec.shuffle([[A_CAST]], [[B_CAST]] : !cir.vector<2 x 
!s32i>) [#cir.int<0> : !s32i, #cir.int<2> : !s32i] : !cir.vector<2 x !s32i>
+// CIR: cir.store [[LO]], %{{.*}} : !cir.vector<2 x !s32i>, 
!cir.ptr<!cir.vector<2 x !s32i>>
+// CIR: [[HI:%.*]] = cir.vec.shuffle([[A_CAST]], [[B_CAST]] : !cir.vector<2 x 
!s32i>) [#cir.int<1> : !s32i, #cir.int<3> : !s32i] : !cir.vector<2 x !s32i>
+// CIR: cir.store [[HI]], %{{.*}} : !cir.vector<2 x !s32i>, 
!cir.ptr<!cir.vector<2 x !s32i>>
+
+// LLVM-SAME: <2 x i32> {{.*}} [[A:%.*]], <2 x i32> {{.*}} [[B:%.*]]) {{.*}} {
+// LLVM: [[VTRN_LO:%.*]] = shufflevector <2 x i32> [[A]], <2 x i32> [[B]], <2 
x i32> <i32 0, i32 2>
+// LLVM: [[VTRN_HI:%.*]] = shufflevector <2 x i32> [[A]], <2 x i32> [[B]], <2 
x i32> <i32 1, i32 3>
+// LLVM: [[RES0:%.*]] = insertvalue [[RTY:%.*]] poison, <2 x i32> [[VTRN_LO]], 
0, 0
+// LLVM: [[RES1:%.*]] = insertvalue [[RTY]] [[RES0]], <2 x i32> [[VTRN_HI]], 
0, 1
+// LLVM: ret [[RTY]] [[RES1]]
+  return vtrn_s32(a, b);
+}
+
+// LLVM-LABEL: @test_vtrn_f32(
+// CIR-LABEL: @vtrn_f32(
+float32x2x2_t test_vtrn_f32(float32x2_t a, float32x2_t b) {
+// CIR: [[A_CAST:%.*]] = cir.cast bitcast %{{.*}} : !cir.vector<8 x !s8i> -> 
!cir.vector<2 x !cir.float>
+// CIR: [[B_CAST:%.*]] = cir.cast bitcast %{{.*}} : !cir.vector<8 x !s8i> -> 
!cir.vector<2 x !cir.float>
+// CIR: [[LO:%.*]] = cir.vec.shuffle([[A_CAST]], [[B_CAST]] : !cir.vector<2 x 
!cir.float>) [#cir.int<0> : !s32i, #cir.int<2> : !s32i] : !cir.vector<2 x 
!cir.float>
+// CIR: cir.store [[LO]], %{{.*}} : !cir.vector<2 x !cir.float>, 
!cir.ptr<!cir.vector<2 x !cir.float>>
+// CIR: [[HI:%.*]] = cir.vec.shuffle([[A_CAST]], [[B_CAST]] : !cir.vector<2 x 
!cir.float>) [#cir.int<1> : !s32i, #cir.int<3> : !s32i] : !cir.vector<2 x 
!cir.float>
+// CIR: cir.store [[HI]], %{{.*}} : !cir.vector<2 x !cir.float>, 
!cir.ptr<!cir.vector<2 x !cir.float>>
+
+// LLVM-SAME: <2 x float> {{.*}} [[A:%.*]], <2 x float> {{.*}} [[B:%.*]]) 
{{.*}} {
+// LLVM: [[VTRN_LO:%.*]] = shufflevector <2 x float> [[A]], <2 x float> [[B]], 
<2 x i32> <i32 0, i32 2>
+// LLVM: [[VTRN_HI:%.*]] = shufflevector <2 x float> [[A]], <2 x float> [[B]], 
<2 x i32> <i32 1, i32 3>
+// LLVM: [[RES0:%.*]] = insertvalue [[RTY:%.*]] poison, <2 x float> 
[[VTRN_LO]], 0, 0
+// LLVM: [[RES1:%.*]] = insertvalue [[RTY]] [[RES0]], <2 x float> [[VTRN_HI]], 
0, 1
+// LLVM: ret [[RTY]] [[RES1]]
+  return vtrn_f32(a, b);
+}
+
+// LLVM-LABEL: @test_vtrn_u32(
+// CIR-LABEL: @vtrn_u32(
+uint32x2x2_t test_vtrn_u32(uint32x2_t a, uint32x2_t b) {
+// CIR: [[A_CAST:%.*]] = cir.cast bitcast %{{.*}} : !cir.vector<8 x !s8i> -> 
!cir.vector<2 x !u32i>
+// CIR: [[B_CAST:%.*]] = cir.cast bitcast %{{.*}} : !cir.vector<8 x !s8i> -> 
!cir.vector<2 x !u32i>
+// CIR: [[LO:%.*]] = cir.vec.shuffle([[A_CAST]], [[B_CAST]] : !cir.vector<2 x 
!u32i>) [#cir.int<0> : !s32i, #cir.int<2> : !s32i] : !cir.vector<2 x !u32i>
+// CIR: cir.store [[LO]], %{{.*}} : !cir.vector<2 x !u32i>, 
!cir.ptr<!cir.vector<2 x !u32i>>
+// CIR: [[HI:%.*]] = cir.vec.shuffle([[A_CAST]], [[B_CAST]] : !cir.vector<2 x 
!u32i>) [#cir.int<1> : !s32i, #cir.int<3> : !s32i] : !cir.vector<2 x !u32i>
+// CIR: cir.store [[HI]], %{{.*}} : !cir.vector<2 x !u32i>, 
!cir.ptr<!cir.vector<2 x !u32i>>
+
+// LLVM-SAME: <2 x i32> {{.*}} [[A:%.*]], <2 x i32> {{.*}} [[B:%.*]]) {{.*}} {
+// LLVM: [[VTRN_LO:%.*]] = shufflevector <2 x i32> [[A]], <2 x i32> [[B]], <2 
x i32> <i32 0, i32 2>
+// LLVM: [[VTRN_HI:%.*]] = shufflevector <2 x i32> [[A]], <2 x i32> [[B]], <2 
x i32> <i32 1, i32 3>
+// LLVM: [[RES0:%.*]] = insertvalue [[RTY:%.*]] poison, <2 x i32> [[VTRN_LO]], 
0, 0
+// LLVM: [[RES1:%.*]] = insertvalue [[RTY]] [[RES0]], <2 x i32> [[VTRN_HI]], 
0, 1
+// LLVM: ret [[RTY]] [[RES1]]
+  return vtrn_u32(a, b);
+}
+
+// LLVM-LABEL: @test_vtrn_mf8(
+// CIR-LABEL: @vtrn_mf8(
+mfloat8x8x2_t test_vtrn_mf8(mfloat8x8_t a, mfloat8x8_t b) {
+// CIR: [[A_CAST:%.*]] = cir.cast bitcast %{{.*}} : !cir.vector<8 x !s8i> -> 
!cir.vector<8 x !u8i>
+// CIR: [[B_CAST:%.*]] = cir.cast bitcast %{{.*}} : !cir.vector<8 x !s8i> -> 
!cir.vector<8 x !u8i>
+// CIR: [[LO:%.*]] = cir.vec.shuffle([[A_CAST]], [[B_CAST]] : !cir.vector<8 x 
!u8i>) [#cir.int<0> : !s32i, #cir.int<8> : !s32i, #cir.int<2> : !s32i, 
#cir.int<10> : !s32i, #cir.int<4> : !s32i, #cir.int<12> : !s32i, #cir.int<6> : 
!s32i, #cir.int<14> : !s32i] : !cir.vector<8 x !u8i>
+// CIR: cir.store [[LO]], %{{.*}} : !cir.vector<8 x !u8i>, 
!cir.ptr<!cir.vector<8 x !u8i>>
+// CIR: [[HI:%.*]] = cir.vec.shuffle([[A_CAST]], [[B_CAST]] : !cir.vector<8 x 
!u8i>) [#cir.int<1> : !s32i, #cir.int<9> : !s32i, #cir.int<3> : !s32i, 
#cir.int<11> : !s32i, #cir.int<5> : !s32i, #cir.int<13> : !s32i, #cir.int<7> : 
!s32i, #cir.int<15> : !s32i] : !cir.vector<8 x !u8i>
+// CIR: cir.store [[HI]], %{{.*}} : !cir.vector<8 x !u8i>, 
!cir.ptr<!cir.vector<8 x !u8i>>
+
+// LLVM-SAME: <8 x i8> {{.*}}[[A:%.*]], <8 x i8> {{.*}}[[B:%.*]]) {{.*}} {
+// LLVM: [[VTRN_LO:%.*]] = shufflevector <8 x i8> [[A]], <8 x i8> [[B]], <8 x 
i32> <i32 0, i32 8, i32 2, i32 10, i32 4, i32 12, i32 6, i32 14>
+// LLVM: [[VTRN_HI:%.*]] = shufflevector <8 x i8> [[A]], <8 x i8> [[B]], <8 x 
i32> <i32 1, i32 9, i32 3, i32 11, i32 5, i32 13, i32 7, i32 15>
+// LLVM: [[RES0:%.*]] = insertvalue [[RTY:%.*]] poison, <8 x i8> [[VTRN_LO]], 
0, 0
+// LLVM: [[RES1:%.*]] = insertvalue [[RTY]] [[RES0]], <8 x i8> [[VTRN_HI]], 0, 
1
+// LLVM: ret [[RTY]] [[RES1]]
+  return vtrn_mf8(a, b);
+}
+
+// LLVM-LABEL: @test_vtrnq_s8(
+// CIR-LABEL: @vtrnq_s8(
+int8x16x2_t test_vtrnq_s8(int8x16_t a, int8x16_t b) {
+// CIR: [[LO:%.*]] = cir.vec.shuffle(%{{.*}}, %{{.*}} : !cir.vector<16 x 
!s8i>) [#cir.int<0> : !s32i, #cir.int<16> : !s32i, #cir.int<2> : !s32i, 
#cir.int<18> : !s32i, #cir.int<4> : !s32i, #cir.int<20> : !s32i, #cir.int<6> : 
!s32i, #cir.int<22> : !s32i, #cir.int<8> : !s32i, #cir.int<24> : !s32i, 
#cir.int<10> : !s32i, #cir.int<26> : !s32i, #cir.int<12> : !s32i, #cir.int<28> 
: !s32i, #cir.int<14> : !s32i, #cir.int<30> : !s32i] : !cir.vector<16 x !s8i>
+// CIR: cir.store [[LO]], %{{.*}} : !cir.vector<16 x !s8i>, 
!cir.ptr<!cir.vector<16 x !s8i>>
+// CIR: [[HI:%.*]] = cir.vec.shuffle(%{{.*}}, %{{.*}} : !cir.vector<16 x 
!s8i>) [#cir.int<1> : !s32i, #cir.int<17> : !s32i, #cir.int<3> : !s32i, 
#cir.int<19> : !s32i, #cir.int<5> : !s32i, #cir.int<21> : !s32i, #cir.int<7> : 
!s32i, #cir.int<23> : !s32i, #cir.int<9> : !s32i, #cir.int<25> : !s32i, 
#cir.int<11> : !s32i, #cir.int<27> : !s32i, #cir.int<13> : !s32i, #cir.int<29> 
: !s32i, #cir.int<15> : !s32i, #cir.int<31> : !s32i] : !cir.vector<16 x !s8i>
+// CIR: cir.store [[HI]], %{{.*}} : !cir.vector<16 x !s8i>, 
!cir.ptr<!cir.vector<16 x !s8i>>
+
+// LLVM-SAME: <16 x i8> {{.*}} [[A:%.*]], <16 x i8> {{.*}} [[B:%.*]]) {{.*}} {
+// LLVM: [[VTRN_LO:%.*]] = shufflevector <16 x i8> [[A]], <16 x i8> [[B]], <16 
x i32> <i32 0, i32 16, i32 2, i32 18, i32 4, i32 20, i32 6, i32 22, i32 8, i32 
24, i32 10, i32 26, i32 12, i32 28, i32 14, i32 30>
+// LLVM: [[VTRN_HI:%.*]] = shufflevector <16 x i8> [[A]], <16 x i8> [[B]], <16 
x i32> <i32 1, i32 17, i32 3, i32 19, i32 5, i32 21, i32 7, i32 23, i32 9, i32 
25, i32 11, i32 27, i32 13, i32 29, i32 15, i32 31>
+// LLVM: [[RES0:%.*]] = insertvalue [[RTY:%.*]] poison, <16 x i8> [[VTRN_LO]], 
0, 0
+// LLVM: [[RES1:%.*]] = insertvalue [[RTY]] [[RES0]], <16 x i8> [[VTRN_HI]], 
0, 1
+// LLVM: ret [[RTY]] [[RES1]]
+  return vtrnq_s8(a, b);
+}
+
+// LLVM-LABEL: @test_vtrnq_s16(
+// CIR-LABEL: @vtrnq_s16(
+int16x8x2_t test_vtrnq_s16(int16x8_t a, int16x8_t b) {
+// CIR: [[A_CAST:%.*]] = cir.cast bitcast %{{.*}} : !cir.vector<16 x !s8i> -> 
!cir.vector<8 x !s16i>
+// CIR: [[B_CAST:%.*]] = cir.cast bitcast %{{.*}} : !cir.vector<16 x !s8i> -> 
!cir.vector<8 x !s16i>
+// CIR: [[LO:%.*]] = cir.vec.shuffle([[A_CAST]], [[B_CAST]] : !cir.vector<8 x 
!s16i>) [#cir.int<0> : !s32i, #cir.int<8> : !s32i, #cir.int<2> : !s32i, 
#cir.int<10> : !s32i, #cir.int<4> : !s32i, #cir.int<12> : !s32i, #cir.int<6> : 
!s32i, #cir.int<14> : !s32i] : !cir.vector<8 x !s16i>
+// CIR: cir.store [[LO]], %{{.*}} : !cir.vector<8 x !s16i>, 
!cir.ptr<!cir.vector<8 x !s16i>>
+// CIR: [[HI:%.*]] = cir.vec.shuffle([[A_CAST]], [[B_CAST]] : !cir.vector<8 x 
!s16i>) [#cir.int<1> : !s32i, #cir.int<9> : !s32i, #cir.int<3> : !s32i, 
#cir.int<11> : !s32i, #cir.int<5> : !s32i, #cir.int<13> : !s32i, #cir.int<7> : 
!s32i, #cir.int<15> : !s32i] : !cir.vector<8 x !s16i>
+// CIR: cir.store [[HI]], %{{.*}} : !cir.vector<8 x !s16i>, 
!cir.ptr<!cir.vector<8 x !s16i>>
+
+// LLVM-SAME: <8 x i16> {{.*}} [[A:%.*]], <8 x i16> {{.*}} [[B:%.*]]) {{.*}} {
+// LLVM: [[VTRN_LO:%.*]] = shufflevector <8 x i16> [[A]], <8 x i16> [[B]], <8 
x i32> <i32 0, i32 8, i32 2, i32 10, i32 4, i32 12, i32 6, i32 14>
+// LLVM: [[VTRN_HI:%.*]] = shufflevector <8 x i16> [[A]], <8 x i16> [[B]], <8 
x i32> <i32 1, i32 9, i32 3, i32 11, i32 5, i32 13, i32 7, i32 15>
+// LLVM: [[RES0:%.*]] = insertvalue [[RTY:%.*]] poison, <8 x i16> [[VTRN_LO]], 
0, 0
+// LLVM: [[RES1:%.*]] = insertvalue [[RTY]] [[RES0]], <8 x i16> [[VTRN_HI]], 
0, 1
+// LLVM: ret [[RTY]] [[RES1]]
+  return vtrnq_s16(a, b);
+}
+
+// LLVM-LABEL: @test_vtrnq_s32(
+// CIR-LABEL: @vtrnq_s32(
+int32x4x2_t test_vtrnq_s32(int32x4_t a, int32x4_t b) {
+// CIR: [[A_CAST:%.*]] = cir.cast bitcast %{{.*}} : !cir.vector<16 x !s8i> -> 
!cir.vector<4 x !s32i>
+// CIR: [[B_CAST:%.*]] = cir.cast bitcast %{{.*}} : !cir.vector<16 x !s8i> -> 
!cir.vector<4 x !s32i>
+// CIR: [[LO:%.*]] = cir.vec.shuffle([[A_CAST]], [[B_CAST]] : !cir.vector<4 x 
!s32i>) [#cir.int<0> : !s32i, #cir.int<4> : !s32i, #cir.int<2> : !s32i, 
#cir.int<6> : !s32i] : !cir.vector<4 x !s32i>
+// CIR: cir.store [[LO]], %{{.*}} : !cir.vector<4 x !s32i>, 
!cir.ptr<!cir.vector<4 x !s32i>>
+// CIR: [[HI:%.*]] = cir.vec.shuffle([[A_CAST]], [[B_CAST]] : !cir.vector<4 x 
!s32i>) [#cir.int<1> : !s32i, #cir.int<5> : !s32i, #cir.int<3> : !s32i, 
#cir.int<7> : !s32i] : !cir.vector<4 x !s32i>
+// CIR: cir.store [[HI]], %{{.*}} : !cir.vector<4 x !s32i>, 
!cir.ptr<!cir.vector<4 x !s32i>>
+
+// LLVM-SAME: <4 x i32> {{.*}} [[A:%.*]], <4 x i32> {{.*}} [[B:%.*]]) {{.*}} {
+// LLVM: [[VTRN_LO:%.*]] = shufflevector <4 x i32> [[A]], <4 x i32> [[B]], <4 
x i32> <i32 0, i32 4, i32 2, i32 6>
+// LLVM: [[VTRN_HI:%.*]] = shufflevector <4 x i32> [[A]], <4 x i32> [[B]], <4 
x i32> <i32 1, i32 5, i32 3, i32 7>
+// LLVM: [[RES0:%.*]] = insertvalue [[RTY:%.*]] poison, <4 x i32> [[VTRN_LO]], 
0, 0
+// LLVM: [[RES1:%.*]] = insertvalue [[RTY]] [[RES0]], <4 x i32> [[VTRN_HI]], 
0, 1
+// LLVM: ret [[RTY]] [[RES1]]
+  return vtrnq_s32(a, b);
+}
+
+// LLVM-LABEL: @test_vtrnq_f32(
+// CIR-LABEL: @vtrnq_f32(
+float32x4x2_t test_vtrnq_f32(float32x4_t a, float32x4_t b) {
+// CIR: [[A_CAST:%.*]] = cir.cast bitcast %{{.*}} : !cir.vector<16 x !s8i> -> 
!cir.vector<4 x !cir.float>
+// CIR: [[B_CAST:%.*]] = cir.cast bitcast %{{.*}} : !cir.vector<16 x !s8i> -> 
!cir.vector<4 x !cir.float>
+// CIR: [[LO:%.*]] = cir.vec.shuffle([[A_CAST]], [[B_CAST]] : !cir.vector<4 x 
!cir.float>) [#cir.int<0> : !s32i, #cir.int<4> : !s32i, #cir.int<2> : !s32i, 
#cir.int<6> : !s32i] : !cir.vector<4 x !cir.float>
+// CIR: cir.store [[LO]], %{{.*}} : !cir.vector<4 x !cir.float>, 
!cir.ptr<!cir.vector<4 x !cir.float>>
+// CIR: [[HI:%.*]] = cir.vec.shuffle([[A_CAST]], [[B_CAST]] : !cir.vector<4 x 
!cir.float>) [#cir.int<1> : !s32i, #cir.int<5> : !s32i, #cir.int<3> : !s32i, 
#cir.int<7> : !s32i] : !cir.vector<4 x !cir.float>
+// CIR: cir.store [[HI]], %{{.*}} : !cir.vector<4 x !cir.float>, 
!cir.ptr<!cir.vector<4 x !cir.float>>
+
+// LLVM-SAME: <4 x float> {{.*}} [[A:%.*]], <4 x float> {{.*}} [[B:%.*]]) 
{{.*}} {
+// LLVM: [[VTRN_LO:%.*]] = shufflevector <4 x float> [[A]], <4 x float> [[B]], 
<4 x i32> <i32 0, i32 4, i32 2, i32 6>
+// LLVM: [[VTRN_HI:%.*]] = shufflevector <4 x float> [[A]], <4 x float> [[B]], 
<4 x i32> <i32 1, i32 5, i32 3, i32 7>
+// LLVM: [[RES0:%.*]] = insertvalue [[RTY:%.*]] poison, <4 x float> 
[[VTRN_LO]], 0, 0
+// LLVM: [[RES1:%.*]] = insertvalue [[RTY]] [[RES0]], <4 x float> [[VTRN_HI]], 
0, 1
+// LLVM: ret [[RTY]] [[RES1]]
+  return vtrnq_f32(a, b);
+}
+
+// LLVM-LABEL: @test_vtrnq_u8(
+// CIR-LABEL: @vtrnq_u8(
+uint8x16x2_t test_vtrnq_u8(uint8x16_t a, uint8x16_t b) {
+// CIR: [[A_CAST:%.*]] = cir.cast bitcast %{{.*}} : !cir.vector<16 x !s8i> -> 
!cir.vector<16 x !u8i>
+// CIR: [[B_CAST:%.*]] = cir.cast bitcast %{{.*}} : !cir.vector<16 x !s8i> -> 
!cir.vector<16 x !u8i>
+// CIR: [[LO:%.*]] = cir.vec.shuffle([[A_CAST]], [[B_CAST]] : !cir.vector<16 x 
!u8i>) [#cir.int<0> : !s32i, #cir.int<16> : !s32i, #cir.int<2> : !s32i, 
#cir.int<18> : !s32i, #cir.int<4> : !s32i, #cir.int<20> : !s32i, #cir.int<6> : 
!s32i, #cir.int<22> : !s32i, #cir.int<8> : !s32i, #cir.int<24> : !s32i, 
#cir.int<10> : !s32i, #cir.int<26> : !s32i, #cir.int<12> : !s32i, #cir.int<28> 
: !s32i, #cir.int<14> : !s32i, #cir.int<30> : !s32i] : !cir.vector<16 x !u8i>
+// CIR: cir.store [[LO]], %{{.*}} : !cir.vector<16 x !u8i>, 
!cir.ptr<!cir.vector<16 x !u8i>>
+// CIR: [[HI:%.*]] = cir.vec.shuffle([[A_CAST]], [[B_CAST]] : !cir.vector<16 x 
!u8i>) [#cir.int<1> : !s32i, #cir.int<17> : !s32i, #cir.int<3> : !s32i, 
#cir.int<19> : !s32i, #cir.int<5> : !s32i, #cir.int<21> : !s32i, #cir.int<7> : 
!s32i, #cir.int<23> : !s32i, #cir.int<9> : !s32i, #cir.int<25> : !s32i, 
#cir.int<11> : !s32i, #cir.int<27> : !s32i, #cir.int<13> : !s32i, #cir.int<29> 
: !s32i, #cir.int<15> : !s32i, #cir.int<31> : !s32i] : !cir.vector<16 x !u8i>
+// CIR: cir.store [[HI]], %{{.*}} : !cir.vector<16 x !u8i>, 
!cir.ptr<!cir.vector<16 x !u8i>>
+
+// LLVM-SAME: <16 x i8> {{.*}} [[A:%.*]], <16 x i8> {{.*}} [[B:%.*]]) {{.*}} {
+// LLVM: [[VTRN_LO:%.*]] = shufflevector <16 x i8> [[A]], <16 x i8> [[B]], <16 
x i32> <i32 0, i32 16, i32 2, i32 18, i32 4, i32 20, i32 6, i32 22, i32 8, i32 
24, i32 10, i32 26, i32 12, i32 28, i32 14, i32 30>
+// LLVM: [[VTRN_HI:%.*]] = shufflevector <16 x i8> [[A]], <16 x i8> [[B]], <16 
x i32> <i32 1, i32 17, i32 3, i32 19, i32 5, i32 21, i32 7, i32 23, i32 9, i32 
25, i32 11, i32 27, i32 13, i32 29, i32 15, i32 31>
+// LLVM: [[RES0:%.*]] = insertvalue [[RTY:%.*]] poison, <16 x i8> [[VTRN_LO]], 
0, 0
+// LLVM: [[RES1:%.*]] = insertvalue [[RTY]] [[RES0]], <16 x i8> [[VTRN_HI]], 
0, 1
+// LLVM: ret [[RTY]] [[RES1]]
+  return vtrnq_u8(a, b);
+}
+
+// LLVM-LABEL: @test_vtrnq_u16(
+// CIR-LABEL: @vtrnq_u16(
+uint16x8x2_t test_vtrnq_u16(uint16x8_t a, uint16x8_t b) {
+// CIR: [[A_CAST:%.*]] = cir.cast bitcast %{{.*}} : !cir.vector<16 x !s8i> -> 
!cir.vector<8 x !u16i>
+// CIR: [[B_CAST:%.*]] = cir.cast bitcast %{{.*}} : !cir.vector<16 x !s8i> -> 
!cir.vector<8 x !u16i>
+// CIR: [[LO:%.*]] = cir.vec.shuffle([[A_CAST]], [[B_CAST]] : !cir.vector<8 x 
!u16i>) [#cir.int<0> : !s32i, #cir.int<8> : !s32i, #cir.int<2> : !s32i, 
#cir.int<10> : !s32i, #cir.int<4> : !s32i, #cir.int<12> : !s32i, #cir.int<6> : 
!s32i, #cir.int<14> : !s32i] : !cir.vector<8 x !u16i>
+// CIR: cir.store [[LO]], %{{.*}} : !cir.vector<8 x !u16i>, 
!cir.ptr<!cir.vector<8 x !u16i>>
+// CIR: [[HI:%.*]] = cir.vec.shuffle([[A_CAST]], [[B_CAST]] : !cir.vector<8 x 
!u16i>) [#cir.int<1> : !s32i, #cir.int<9> : !s32i, #cir.int<3> : !s32i, 
#cir.int<11> : !s32i, #cir.int<5> : !s32i, #cir.int<13> : !s32i, #cir.int<7> : 
!s32i, #cir.int<15> : !s32i] : !cir.vector<8 x !u16i>
+// CIR: cir.store [[HI]], %{{.*}} : !cir.vector<8 x !u16i>, 
!cir.ptr<!cir.vector<8 x !u16i>>
+
+// LLVM-SAME: <8 x i16> {{.*}} [[A:%.*]], <8 x i16> {{.*}} [[B:%.*]]) {{.*}} {
+// LLVM: [[VTRN_LO:%.*]] = shufflevector <8 x i16> [[A]], <8 x i16> [[B]], <8 
x i32> <i32 0, i32 8, i32 2, i32 10, i32 4, i32 12, i32 6, i32 14>
+// LLVM: [[VTRN_HI:%.*]] = shufflevector <8 x i16> [[A]], <8 x i16> [[B]], <8 
x i32> <i32 1, i32 9, i32 3, i32 11, i32 5, i32 13, i32 7, i32 15>
+// LLVM: [[RES0:%.*]] = insertvalue [[RTY:%.*]] poison, <8 x i16> [[VTRN_LO]], 
0, 0
+// LLVM: [[RES1:%.*]] = insertvalue [[RTY]] [[RES0]], <8 x i16> [[VTRN_HI]], 
0, 1
+// LLVM: ret [[RTY]] [[RES1]]
+  return vtrnq_u16(a, b);
+}
+
+// LLVM-LABEL: @test_vtrnq_u32(
+// CIR-LABEL: @vtrnq_u32(
+uint32x4x2_t test_vtrnq_u32(uint32x4_t a, uint32x4_t b) {
+// CIR: [[A_CAST:%.*]] = cir.cast bitcast %{{.*}} : !cir.vector<16 x !s8i> -> 
!cir.vector<4 x !u32i>
+// CIR: [[B_CAST:%.*]] = cir.cast bitcast %{{.*}} : !cir.vector<16 x !s8i> -> 
!cir.vector<4 x !u32i>
+// CIR: [[LO:%.*]] = cir.vec.shuffle([[A_CAST]], [[B_CAST]] : !cir.vector<4 x 
!u32i>) [#cir.int<0> : !s32i, #cir.int<4> : !s32i, #cir.int<2> : !s32i, 
#cir.int<6> : !s32i] : !cir.vector<4 x !u32i>
+// CIR: cir.store [[LO]], %{{.*}} : !cir.vector<4 x !u32i>, 
!cir.ptr<!cir.vector<4 x !u32i>>
+// CIR: [[HI:%.*]] = cir.vec.shuffle([[A_CAST]], [[B_CAST]] : !cir.vector<4 x 
!u32i>) [#cir.int<1> : !s32i, #cir.int<5> : !s32i, #cir.int<3> : !s32i, 
#cir.int<7> : !s32i] : !cir.vector<4 x !u32i>
+// CIR: cir.store [[HI]], %{{.*}} : !cir.vector<4 x !u32i>, 
!cir.ptr<!cir.vector<4 x !u32i>>
+
+// LLVM-SAME: <4 x i32> {{.*}} [[A:%.*]], <4 x i32> {{.*}} [[B:%.*]]) {{.*}} {
+// LLVM: [[VTRN_LO:%.*]] = shufflevector <4 x i32> [[A]], <4 x i32> [[B]], <4 
x i32> <i32 0, i32 4, i32 2, i32 6>
+// LLVM: [[VTRN_HI:%.*]] = shufflevector <4 x i32> [[A]], <4 x i32> [[B]], <4 
x i32> <i32 1, i32 5, i32 3, i32 7>
+// LLVM: [[RES0:%.*]] = insertvalue [[RTY:%.*]] poison, <4 x i32> [[VTRN_LO]], 
0, 0
+// LLVM: [[RES1:%.*]] = insertvalue [[RTY]] [[RES0]], <4 x i32> [[VTRN_HI]], 
0, 1
+// LLVM: ret [[RTY]] [[RES1]]
+  return vtrnq_u32(a, b);
+}
+
+// LLVM-LABEL: @test_vtrnq_p8(
+// CIR-LABEL: @vtrnq_p8(
+poly8x16x2_t test_vtrnq_p8(poly8x16_t a, poly8x16_t b) {
+// CIR: [[LO:%.*]] = cir.vec.shuffle(%{{.*}}, %{{.*}} : !cir.vector<16 x 
!s8i>) [#cir.int<0> : !s32i, #cir.int<16> : !s32i, #cir.int<2> : !s32i, 
#cir.int<18> : !s32i, #cir.int<4> : !s32i, #cir.int<20> : !s32i, #cir.int<6> : 
!s32i, #cir.int<22> : !s32i, #cir.int<8> : !s32i, #cir.int<24> : !s32i, 
#cir.int<10> : !s32i, #cir.int<26> : !s32i, #cir.int<12> : !s32i, #cir.int<28> 
: !s32i, #cir.int<14> : !s32i, #cir.int<30> : !s32i] : !cir.vector<16 x !s8i>
+// CIR: cir.store [[LO]], %{{.*}} : !cir.vector<16 x !s8i>, 
!cir.ptr<!cir.vector<16 x !s8i>>
+// CIR: [[HI:%.*]] = cir.vec.shuffle(%{{.*}}, %{{.*}} : !cir.vector<16 x 
!s8i>) [#cir.int<1> : !s32i, #cir.int<17> : !s32i, #cir.int<3> : !s32i, 
#cir.int<19> : !s32i, #cir.int<5> : !s32i, #cir.int<21> : !s32i, #cir.int<7> : 
!s32i, #cir.int<23> : !s32i, #cir.int<9> : !s32i, #cir.int<25> : !s32i, 
#cir.int<11> : !s32i, #cir.int<27> : !s32i, #cir.int<13> : !s32i, #cir.int<29> 
: !s32i, #cir.int<15> : !s32i, #cir.int<31> : !s32i] : !cir.vector<16 x !s8i>
+// CIR: cir.store [[HI]], %{{.*}} : !cir.vector<16 x !s8i>, 
!cir.ptr<!cir.vector<16 x !s8i>>
+
+// LLVM-SAME: <16 x i8> {{.*}} [[A:%.*]], <16 x i8> {{.*}} [[B:%.*]]) {{.*}} {
+// LLVM: [[VTRN_LO:%.*]] = shufflevector <16 x i8> [[A]], <16 x i8> [[B]], <16 
x i32> <i32 0, i32 16, i32 2, i32 18, i32 4, i32 20, i32 6, i32 22, i32 8, i32 
24, i32 10, i32 26, i32 12, i32 28, i32 14, i32 30>
+// LLVM: [[VTRN_HI:%.*]] = shufflevector <16 x i8> [[A]], <16 x i8> [[B]], <16 
x i32> <i32 1, i32 17, i32 3, i32 19, i32 5, i32 21, i32 7, i32 23, i32 9, i32 
25, i32 11, i32 27, i32 13, i32 29, i32 15, i32 31>
+// LLVM: [[RES0:%.*]] = insertvalue [[RTY:%.*]] poison, <16 x i8> [[VTRN_LO]], 
0, 0
+// LLVM: [[RES1:%.*]] = insertvalue [[RTY]] [[RES0]], <16 x i8> [[VTRN_HI]], 
0, 1
+// LLVM: ret [[RTY]] [[RES1]]
+  return vtrnq_p8(a, b);
+}
+
+// LLVM-LABEL: @test_vtrnq_p16(
+// CIR-LABEL: @vtrnq_p16(
+poly16x8x2_t test_vtrnq_p16(poly16x8_t a, poly16x8_t b) {
+// CIR: [[A_CAST:%.*]] = cir.cast bitcast %{{.*}} : !cir.vector<16 x !s8i> -> 
!cir.vector<8 x !s16i>
+// CIR: [[B_CAST:%.*]] = cir.cast bitcast %{{.*}} : !cir.vector<16 x !s8i> -> 
!cir.vector<8 x !s16i>
+// CIR: [[LO:%.*]] = cir.vec.shuffle([[A_CAST]], [[B_CAST]] : !cir.vector<8 x 
!s16i>) [#cir.int<0> : !s32i, #cir.int<8> : !s32i, #cir.int<2> : !s32i, 
#cir.int<10> : !s32i, #cir.int<4> : !s32i, #cir.int<12> : !s32i, #cir.int<6> : 
!s32i, #cir.int<14> : !s32i] : !cir.vector<8 x !s16i>
+// CIR: cir.store [[LO]], %{{.*}} : !cir.vector<8 x !s16i>, 
!cir.ptr<!cir.vector<8 x !s16i>>
+// CIR: [[HI:%.*]] = cir.vec.shuffle([[A_CAST]], [[B_CAST]] : !cir.vector<8 x 
!s16i>) [#cir.int<1> : !s32i, #cir.int<9> : !s32i, #cir.int<3> : !s32i, 
#cir.int<11> : !s32i, #cir.int<5> : !s32i, #cir.int<13> : !s32i, #cir.int<7> : 
!s32i, #cir.int<15> : !s32i] : !cir.vector<8 x !s16i>
+// CIR: cir.store [[HI]], %{{.*}} : !cir.vector<8 x !s16i>, 
!cir.ptr<!cir.vector<8 x !s16i>>
+
+// LLVM-SAME: <8 x i16> {{.*}} [[A:%.*]], <8 x i16> {{.*}} [[B:%.*]]) {{.*}} {
+// LLVM: [[VTRN_LO:%.*]] = shufflevector <8 x i16> [[A]], <8 x i16> [[B]], <8 
x i32> <i32 0, i32 8, i32 2, i32 10, i32 4, i32 12, i32 6, i32 14>
+// LLVM: [[VTRN_HI:%.*]] = shufflevector <8 x i16> [[A]], <8 x i16> [[B]], <8 
x i32> <i32 1, i32 9, i32 3, i32 11, i32 5, i32 13, i32 7, i32 15>
+// LLVM: [[RES0:%.*]] = insertvalue [[RTY:%.*]] poison, <8 x i16> [[VTRN_LO]], 
0, 0
+// LLVM: [[RES1:%.*]] = insertvalue [[RTY]] [[RES0]], <8 x i16> [[VTRN_HI]], 
0, 1
+// LLVM: ret [[RTY]] [[RES1]]
+  return vtrnq_p16(a, b);
+}
+
+// LLVM-LABEL: @test_vtrnq_mf8(
+// CIR-LABEL: @vtrnq_mf8(
+mfloat8x16x2_t test_vtrnq_mf8(mfloat8x16_t a, mfloat8x16_t b) {
+// CIR: [[A_CAST:%.*]] = cir.cast bitcast %{{.*}} : !cir.vector<16 x !s8i> -> 
!cir.vector<16 x !u8i>
+// CIR: [[B_CAST:%.*]] = cir.cast bitcast %{{.*}} : !cir.vector<16 x !s8i> -> 
!cir.vector<16 x !u8i>
+// CIR: [[LO:%.*]] = cir.vec.shuffle([[A_CAST]], [[B_CAST]] : !cir.vector<16 x 
!u8i>) [#cir.int<0> : !s32i, #cir.int<16> : !s32i, #cir.int<2> : !s32i, 
#cir.int<18> : !s32i, #cir.int<4> : !s32i, #cir.int<20> : !s32i, #cir.int<6> : 
!s32i, #cir.int<22> : !s32i, #cir.int<8> : !s32i, #cir.int<24> : !s32i, 
#cir.int<10> : !s32i, #cir.int<26> : !s32i, #cir.int<12> : !s32i, #cir.int<28> 
: !s32i, #cir.int<14> : !s32i, #cir.int<30> : !s32i] : !cir.vector<16 x !u8i>
+// CIR: cir.store [[LO]], %{{.*}} : !cir.vector<16 x !u8i>, 
!cir.ptr<!cir.vector<16 x !u8i>>
+// CIR: [[HI:%.*]] = cir.vec.shuffle([[A_CAST]], [[B_CAST]] : !cir.vector<16 x 
!u8i>) [#cir.int<1> : !s32i, #cir.int<17> : !s32i, #cir.int<3> : !s32i, 
#cir.int<19> : !s32i, #cir.int<5> : !s32i, #cir.int<21> : !s32i, #cir.int<7> : 
!s32i, #cir.int<23> : !s32i, #cir.int<9> : !s32i, #cir.int<25> : !s32i, 
#cir.int<11> : !s32i, #cir.int<27> : !s32i, #cir.int<13> : !s32i, #cir.int<29> 
: !s32i, #cir.int<15> : !s32i, #cir.int<31> : !s32i] : !cir.vector<16 x !u8i>
+// CIR: cir.store [[HI]], %{{.*}} : !cir.vector<16 x !u8i>, 
!cir.ptr<!cir.vector<16 x !u8i>>
+
+// LLVM-SAME: <16 x i8> {{.*}}[[A:%.*]], <16 x i8> {{.*}}[[B:%.*]]) {{.*}} {
+// LLVM: [[VTRN_LO:%.*]] = shufflevector <16 x i8> [[A]], <16 x i8> [[B]], <16 
x i32> <i32 0, i32 16, i32 2, i32 18, i32 4, i32 20, i32 6, i32 22, i32 8, i32 
24, i32 10, i32 26, i32 12, i32 28, i32 14, i32 30>
+// LLVM: [[VTRN_HI:%.*]] = shufflevector <16 x i8> [[A]], <16 x i8> [[B]], <16 
x i32> <i32 1, i32 17, i32 3, i32 19, i32 5, i32 21, i32 7, i32 23, i32 9, i32 
25, i32 11, i32 27, i32 13, i32 29, i32 15, i32 31>
+// LLVM: [[RES0:%.*]] = insertvalue [[RTY:%.*]] poison, <16 x i8> [[VTRN_LO]], 
0, 0
+// LLVM: [[RES1:%.*]] = insertvalue [[RTY]] [[RES0]], <16 x i8> [[VTRN_HI]], 
0, 1
+// LLVM: ret [[RTY]] [[RES1]]
+  return vtrnq_mf8(a, b);
+}

_______________________________________________
cfe-commits mailing list
[email protected]
https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits

Reply via email to