[clang] 6bdb838 - [CIR] Upstream vec shuffle builtins in CIR codegen (#169178)

via cfe-commits Tue, 02 Dec 2025 15:29:30 -0800

Author: Thibault Monnier
Date: 2025-12-02T15:29:12-08:00
New Revision: 6bdb838a05bb7c6f293e53800f46ba182a22f571


URL: 
https://github.com/llvm/llvm-project/commit/6bdb838a05bb7c6f293e53800f46ba182a22f571
DIFF: 
https://github.com/llvm/llvm-project/commit/6bdb838a05bb7c6f293e53800f46ba182a22f571.diff

LOG: [CIR] Upstream vec shuffle builtins in CIR codegen (#169178)

This PR is part of #167752. It upstreams the codegen and tests for the
shuffle builtins implemented in the incubator, including:
- `vinsert` + `insert`
- `pblend` + `blend`
- `vpermilp`
- `pshuf` + `shufp`
- `palignr`

It does NOT upstream the `perm`, `vperm2`, `vpshuf`, `shuf_i` / `shuf_f`
and `align` builtins, which are not yet implemented in the incubator.

This _is_ a large commit, but most of it is tests.

The `pshufd` / `vpermilp` builtins seem to have no test coverage in the
incubator, what should I do?

Added: 
    clang/test/CIR/CodeGenBuiltins/X86/avx2-builtins.c

Modified: 
    clang/lib/CIR/CodeGen/CIRGenBuiltinX86.cpp
    clang/lib/CIR/CodeGen/CIRGenFunction.h
    clang/test/CIR/CodeGenBuiltins/X86/avx-builtins.c
    clang/test/CIR/CodeGenBuiltins/X86/avx512bw-builtins.c
    clang/test/CIR/CodeGenBuiltins/X86/avx512f-builtins.c
    clang/test/CIR/CodeGenBuiltins/X86/sse-builtins.c
    clang/test/CIR/CodeGenBuiltins/X86/sse2-builtins.c

Removed: 
    


################################################################################
diff  --git a/clang/lib/CIR/CodeGen/CIRGenBuiltinX86.cpp 
b/clang/lib/CIR/CodeGen/CIRGenBuiltinX86.cpp
index 224a182ed17d1..b2e498a0fea64 100644
--- a/clang/lib/CIR/CodeGen/CIRGenBuiltinX86.cpp
+++ b/clang/lib/CIR/CodeGen/CIRGenBuiltinX86.cpp
@@ -85,6 +85,69 @@ static mlir::Value getMaskVecValue(CIRGenBuilderTy &builder, 
mlir::Location loc,
   return maskVec;
 }
 
+// Builds the VecShuffleOp for pshuflw and pshufhw x86 builtins.
+//
+// The vector is split into lanes of 8 word elements (16 bits). The lower or
+// upper half of each lane, controlled by `isLow`, is shuffled in the following
+// way: The immediate is truncated to 8 bits, separated into 4 2-bit fields. 
The
+// i-th field's value represents the resulting index of the i-th element in the
+// half lane after shuffling. The other half of the lane remains unchanged.
+static cir::VecShuffleOp emitPshufWord(CIRGenBuilderTy &builder,
+                                       const mlir::Value vec,
+                                       const mlir::Value immediate,
+                                       const mlir::Location loc,
+                                       const bool isLow) {
+  uint32_t imm = CIRGenFunction::getZExtIntValueFromConstOp(immediate);
+
+  auto vecTy = cast<cir::VectorType>(vec.getType());
+  unsigned numElts = vecTy.getSize();
+
+  unsigned firstHalfStart = isLow ? 0 : 4;
+  unsigned secondHalfStart = 4 - firstHalfStart;
+
+  // Splat the 8-bits of immediate 4 times to help the loop wrap around.
+  imm = (imm & 0xff) * 0x01010101;
+
+  int64_t indices[32];
+  for (unsigned l = 0; l != numElts; l += 8) {
+    for (unsigned i = firstHalfStart; i != firstHalfStart + 4; ++i) {
+      indices[l + i] = l + (imm & 3) + firstHalfStart;
+      imm >>= 2;
+    }
+    for (unsigned i = secondHalfStart; i != secondHalfStart + 4; ++i)
+      indices[l + i] = l + i;
+  }
+
+  return builder.createVecShuffle(loc, vec, ArrayRef(indices, numElts));
+}
+
+// Builds the shuffle mask for pshufd and shufpd/shufps x86 builtins.
+// The shuffle mask is written to outIndices.
+static void
+computeFullLaneShuffleMask(CIRGenFunction &cgf, const mlir::Value vec,
+                           uint32_t imm, const bool isShufP,
+                           llvm::SmallVectorImpl<int64_t> &outIndices) {
+  auto vecTy = cast<cir::VectorType>(vec.getType());
+  unsigned numElts = vecTy.getSize();
+  unsigned numLanes = cgf.cgm.getDataLayout().getTypeSizeInBits(vecTy) / 128;
+  unsigned numLaneElts = numElts / numLanes;
+
+  // Splat the 8-bits of immediate 4 times to help the loop wrap around.
+  imm = (imm & 0xff) * 0x01010101;
+
+  for (unsigned l = 0; l != numElts; l += numLaneElts) {
+    for (unsigned i = 0; i != numLaneElts; ++i) {
+      uint32_t idx = imm % numLaneElts;
+      imm /= numLaneElts;
+      if (isShufP && i >= (numLaneElts / 2))
+        idx += numElts;
+      outIndices[l + i] = l + idx;
+    }
+  }
+
+  outIndices.resize(numElts);
+}
+
 static mlir::Value emitX86MaskAddLogic(CIRGenBuilderTy &builder,
                                        mlir::Location loc,
                                        const std::string &intrinsicName,
@@ -270,9 +333,7 @@ mlir::Value CIRGenFunction::emitX86BuiltinExpr(unsigned 
builtinID,
   case X86::BI__builtin_ia32_vec_ext_v4di: {
     unsigned numElts = cast<cir::VectorType>(ops[0].getType()).getSize();
 
-    uint64_t index =
-        ops[1].getDefiningOp<cir::ConstantOp>().getIntValue().getZExtValue();
-
+    uint64_t index = getZExtIntValueFromConstOp(ops[1]);
     index &= numElts - 1;
 
     cir::ConstantOp indexVal =
@@ -728,12 +789,20 @@ mlir::Value CIRGenFunction::emitX86BuiltinExpr(unsigned 
builtinID,
   case X86::BI__builtin_ia32_pblendw256:
   case X86::BI__builtin_ia32_pblendd128:
   case X86::BI__builtin_ia32_pblendd256:
+    cgm.errorNYI(expr->getSourceRange(),
+                 std::string("unimplemented X86 builtin call: ") +
+                     getContext().BuiltinInfo.getName(builtinID));
+    return {};
   case X86::BI__builtin_ia32_pshuflw:
   case X86::BI__builtin_ia32_pshuflw256:
   case X86::BI__builtin_ia32_pshuflw512:
+    return emitPshufWord(builder, ops[0], ops[1], getLoc(expr->getExprLoc()),
+                         true);
   case X86::BI__builtin_ia32_pshufhw:
   case X86::BI__builtin_ia32_pshufhw256:
   case X86::BI__builtin_ia32_pshufhw512:
+    return emitPshufWord(builder, ops[0], ops[1], getLoc(expr->getExprLoc()),
+                         false);
   case X86::BI__builtin_ia32_pshufd:
   case X86::BI__builtin_ia32_pshufd256:
   case X86::BI__builtin_ia32_pshufd512:
@@ -742,13 +811,28 @@ mlir::Value CIRGenFunction::emitX86BuiltinExpr(unsigned 
builtinID,
   case X86::BI__builtin_ia32_vpermilpd256:
   case X86::BI__builtin_ia32_vpermilps256:
   case X86::BI__builtin_ia32_vpermilpd512:
-  case X86::BI__builtin_ia32_vpermilps512:
+  case X86::BI__builtin_ia32_vpermilps512: {
+    const uint32_t imm = getSExtIntValueFromConstOp(ops[1]);
+
+    llvm::SmallVector<int64_t, 16> mask(16);
+    computeFullLaneShuffleMask(*this, ops[0], imm, false, mask);
+
+    return builder.createVecShuffle(getLoc(expr->getExprLoc()), ops[0], mask);
+  }
   case X86::BI__builtin_ia32_shufpd:
   case X86::BI__builtin_ia32_shufpd256:
   case X86::BI__builtin_ia32_shufpd512:
   case X86::BI__builtin_ia32_shufps:
   case X86::BI__builtin_ia32_shufps256:
-  case X86::BI__builtin_ia32_shufps512:
+  case X86::BI__builtin_ia32_shufps512: {
+    const uint32_t imm = getZExtIntValueFromConstOp(ops[2]);
+
+    llvm::SmallVector<int64_t, 16> mask(16);
+    computeFullLaneShuffleMask(*this, ops[0], imm, true, mask);
+
+    return builder.createVecShuffle(getLoc(expr->getExprLoc()), ops[0], ops[1],
+                                    mask);
+  }
   case X86::BI__builtin_ia32_permdi256:
   case X86::BI__builtin_ia32_permdf256:
   case X86::BI__builtin_ia32_permdi512:

diff  --git a/clang/lib/CIR/CodeGen/CIRGenFunction.h 
b/clang/lib/CIR/CodeGen/CIRGenFunction.h
index b6926bb88ac85..ada16ff187430 100644
--- a/clang/lib/CIR/CodeGen/CIRGenFunction.h
+++ b/clang/lib/CIR/CodeGen/CIRGenFunction.h
@@ -203,6 +203,22 @@ class CIRGenFunction : public CIRGenTypeCache {
     return convertType(getContext().getTypeDeclType(t));
   }
 
+  /// Get integer from a mlir::Value that is an int constant or a constant op.
+  static int64_t getSExtIntValueFromConstOp(mlir::Value val) {
+    auto constOp = val.getDefiningOp<cir::ConstantOp>();
+    assert(constOp && "getIntValueFromConstOp call with non ConstantOp");
+    return constOp.getIntValue().getSExtValue();
+  }
+
+  /// Get zero-extended integer from a mlir::Value that is an int constant or a
+  /// constant op.
+  static int64_t getZExtIntValueFromConstOp(mlir::Value val) {
+    auto constOp = val.getDefiningOp<cir::ConstantOp>();
+    assert(constOp &&
+           "getZeroExtendedIntValueFromConstOp call with non ConstantOp");
+    return constOp.getIntValue().getZExtValue();
+  }
+
   ///  Return the cir::TypeEvaluationKind of QualType \c type.
   static cir::TypeEvaluationKind getEvaluationKind(clang::QualType type);
 
@@ -1816,7 +1832,7 @@ class CIRGenFunction : public CIRGenTypeCache {
 
   mlir::LogicalResult emitWhileStmt(const clang::WhileStmt &s);
 
-  mlir::Value emitX86BuiltinExpr(unsigned builtinID, const CallExpr *e);
+  mlir::Value emitX86BuiltinExpr(unsigned builtinID, const CallExpr *expr);
 
   /// Given an assignment `*lhs = rhs`, emit a test that checks if \p rhs is
   /// nonnull, if 1\p LHS is marked _Nonnull.

diff  --git a/clang/test/CIR/CodeGenBuiltins/X86/avx-builtins.c 
b/clang/test/CIR/CodeGenBuiltins/X86/avx-builtins.c
index 82fa4358dc400..d9a8771023fa7 100644
--- a/clang/test/CIR/CodeGenBuiltins/X86/avx-builtins.c
+++ b/clang/test/CIR/CodeGenBuiltins/X86/avx-builtins.c
@@ -73,4 +73,76 @@ __m256i test_mm256_undefined_si256(void) {
   // OGCG-LABEL: test_mm256_undefined_si256
   // OGCG: ret <4 x i64> zeroinitializer
   return _mm256_undefined_si256();
-}
\ No newline at end of file
+}
+
+__m256d test_mm256_shuffle_pd(__m256d A, __m256d B) {
+  // CIR-LABEL: test_mm256_shuffle_pd
+  // CIR: %{{.*}} = cir.vec.shuffle(%{{.*}}, %{{.*}} : !cir.vector<4 x 
!cir.double>) [#cir.int<0> : !s32i, #cir.int<4> : !s32i, #cir.int<2> : !s32i, 
#cir.int<6> : !s32i] : !cir.vector<4 x !cir.double>
+
+  // LLVM-LABEL: test_mm256_shuffle_pd
+  // LLVM: shufflevector <4 x double> %{{.*}}, <4 x double> %{{.*}}, <4 x i32> 
<i32 0, i32 4, i32 2, i32 6>
+
+  // OGCG-LABEL: test_mm256_shuffle_pd
+  // OGCG: shufflevector <4 x double> %{{.*}}, <4 x double> %{{.*}}, <4 x i32> 
<i32 0, i32 4, i32 2, i32 6>
+  return _mm256_shuffle_pd(A, B, 0);
+}
+
+__m256 test_mm256_shuffle_ps(__m256 A, __m256 B) {
+  // CIR-LABEL: test_mm256_shuffle_ps
+  // CIR: %{{.*}} = cir.vec.shuffle(%{{.*}}, %{{.*}} : !cir.vector<8 x 
!cir.float>) [#cir.int<0> : !s32i, #cir.int<0> : !s32i, #cir.int<8> : !s32i, 
#cir.int<8> : !s32i, #cir.int<4> : !s32i, #cir.int<4> : !s32i, #cir.int<12> : 
!s32i, #cir.int<12> : !s32i] : !cir.vector<8 x !cir.float>
+
+  // LLVM-LABEL: test_mm256_shuffle_ps
+  // LLVM: shufflevector <8 x float> %{{.*}}, <8 x float> %{{.*}}, <8 x i32> 
<i32 0, i32 0, i32 8, i32 8, i32 4, i32 4, i32 12, i32 12>
+
+  // OGCG-LABEL: test_mm256_shuffle_ps
+  // OGCG: shufflevector <8 x float> %{{.*}}, <8 x float> %{{.*}}, <8 x i32> 
<i32 0, i32 0, i32 8, i32 8, i32 4, i32 4, i32 12, i32 12>
+  return _mm256_shuffle_ps(A, B, 0);
+}
+
+__m128 test_mm_permute_ps(__m128 A) {
+    // CIR-LABEL: test_mm_permute_ps
+    // CIR: cir.vec.shuffle(%{{.*}}, %{{.*}} :  !cir.vector<4 x !cir.float>) 
[#cir.int<2> : !s32i, #cir.int<3> : !s32i, #cir.int<0> : !s32i, #cir.int<1> : 
!s32i] : !cir.vector<4 x !cir.float>
+
+       // LLVM-LABEL: test_mm_permute_ps
+    // LLVM: shufflevector <4 x float> %{{.*}}, <4 x float> poison, <4 x i32> 
<i32 2, i32 3, i32 0, i32 1>
+
+    // OGCG-LABEL: test_mm_permute_ps
+       // OGCG: shufflevector <4 x float> %{{.*}}, <4 x float> poison, <4 x 
i32> <i32 2, i32 3, i32 0, i32 1>
+    return _mm_permute_ps(A, 0x4E);
+}
+
+__m256 test_mm256_permute_ps(__m256 A) {
+    // CIR-LABEL: test_mm256_permute_ps
+    // CIR: cir.vec.shuffle(%{{.*}}, %{{.*}} :  !cir.vector<8 x !cir.float>) 
[#cir.int<2> : !s32i, #cir.int<3> : !s32i, #cir.int<0> : !s32i, #cir.int<1> : 
!s32i, #cir.int<6> : !s32i, #cir.int<7> : !s32i, #cir.int<4> : !s32i, 
#cir.int<5> : !s32i] : !cir.vector<8 x !cir.float>
+
+    // LLVM-LABEL: test_mm256_permute_ps
+    // LLVM: shufflevector <8 x float> %{{.*}}, <8 x float> poison, <8 x i32> 
<i32 2, i32 3, i32 0, i32 1, i32 6, i32 7, i32 4, i32 5>
+
+    // OGCG-LABEL: test_mm256_permute_ps
+    // OGCG: shufflevector <8 x float> %{{.*}}, <8 x float> poison, <8 x i32> 
<i32 2, i32 3, i32 0, i32 1, i32 6, i32 7, i32 4, i32 5>
+    return _mm256_permute_ps(A, 0x4E);
+}
+
+__m128d test_mm_permute_pd(__m128d A) {
+    // CIR-LABEL: test_mm_permute_pd
+    // CIR: cir.vec.shuffle(%{{.*}}, %{{.*}} :  !cir.vector<2 x !cir.double>) 
[#cir.int<1> : !s32i, #cir.int<0> : !s32i] : !cir.vector<2 x !cir.double>
+
+    // LLVM-LABEL: test_mm_permute_pd
+    // LLVM: shufflevector <2 x double> %{{.*}}, <2 x double> poison, <2 x 
i32> <i32 1, i32 0>
+
+    // OGCG-LABEL: test_mm_permute_pd
+    // OGCG: shufflevector <2 x double> %{{.*}}, <2 x double> poison, <2 x 
i32> <i32 1, i32 0>
+    return _mm_permute_pd(A, 0x1);
+}
+
+__m256d test_mm256_permute_pd(__m256d A) {
+    // CIR-LABEL: test_mm256_permute_pd
+    // CIR: cir.vec.shuffle(%{{.*}}, %{{.*}} :  !cir.vector<4 x !cir.double>) 
[#cir.int<1> : !s32i, #cir.int<0> : !s32i, #cir.int<3> : !s32i, #cir.int<2> : 
!s32i] : !cir.vector<4 x !cir.double>
+
+    // LLVM-LABEL: test_mm256_permute_pd
+    // LLVM: shufflevector <4 x double> %{{.*}}, <4 x double> poison, <4 x 
i32> <i32 1, i32 0, i32 3, i32 2>
+
+    // OGCG-LABEL: test_mm256_permute_pd
+    // OGCG: shufflevector <4 x double> %{{.*}}, <4 x double> poison, <4 x 
i32> <i32 1, i32 0, i32 3, i32 2>
+    return _mm256_permute_pd(A, 0x5);
+}

diff  --git a/clang/test/CIR/CodeGenBuiltins/X86/avx2-builtins.c 
b/clang/test/CIR/CodeGenBuiltins/X86/avx2-builtins.c
new file mode 100644
index 0000000000000..b7497c2053b2d
--- /dev/null
+++ b/clang/test/CIR/CodeGenBuiltins/X86/avx2-builtins.c
@@ -0,0 +1,53 @@
+// RUN: %clang_cc1 -x c -flax-vector-conversions=none -ffreestanding %s 
-triple=x86_64-unknown-linux -target-feature +avx2 -fclangir -emit-cir -o 
%t.cir -Wall -Werror
+// RUN: FileCheck --check-prefixes=CIR --input-file=%t.cir %s
+// RUN: %clang_cc1 -x c -flax-vector-conversions=none -ffreestanding %s 
-triple=x86_64-unknown-linux -target-feature +avx2 -fno-signed-char -fclangir 
-emit-cir -o %t.cir -Wall -Werror
+// RUN: FileCheck --check-prefixes=CIR --input-file=%t.cir %s
+
+// RUN: %clang_cc1 -x c -flax-vector-conversions=none -ffreestanding %s 
-triple=x86_64-unknown-linux -target-feature +avx2 -fclangir -emit-llvm -o 
%t.ll -Wall -Werror
+// RUN: FileCheck --check-prefixes=LLVM --input-file=%t.ll %s
+// RUN: %clang_cc1 -x c -flax-vector-conversions=none -ffreestanding %s 
-triple=x86_64-unknown-linux -target-feature +avx2 -fno-signed-char -fclangir 
-emit-llvm -o %t.ll -Wall -Werror
+// RUN: FileCheck --check-prefixes=LLVM --input-file=%t.ll %s
+
+// RUN: %clang_cc1 -x c++ -flax-vector-conversions=none -ffreestanding %s 
-triple=x86_64-unknown-linux -target-feature +avx2 -fclangir -emit-cir -o 
%t.cir -Wall -Werror
+// RUN: FileCheck --check-prefixes=CIR --input-file=%t.cir %s
+// RUN: %clang_cc1 -x c++ -flax-vector-conversions=none -ffreestanding %s 
-triple=x86_64-unknown-linux -target-feature +avx2 -fno-signed-char -fclangir 
-emit-cir -o %t.cir -Wall -Werror
+// RUN: FileCheck --check-prefixes=CIR --input-file=%t.cir %s
+
+// RUN: %clang_cc1 -x c++ -flax-vector-conversions=none -ffreestanding %s 
-triple=x86_64-unknown-linux -target-feature +avx2 -fclangir -emit-llvm -o 
%t.ll -Wall -Werror
+// RUN: FileCheck --check-prefixes=LLVM --input-file=%t.ll %s
+// RUN: %clang_cc1 -x c++ -flax-vector-conversions=none -ffreestanding %s 
-triple=x86_64-unknown-linux -target-feature +avx2 -fno-signed-char -fclangir 
-emit-llvm -o %t.ll -Wall -Werror
+// RUN: FileCheck --check-prefixes=LLVM --input-file=%t.ll %s
+
+// RUN: %clang_cc1 -x c -flax-vector-conversions=none -ffreestanding %s 
-triple=x86_64-apple-darwin -target-feature +avx2 -emit-llvm -o - -Wall -Werror 
| FileCheck %s --check-prefixes=OGCG
+// RUN: %clang_cc1 -x c -flax-vector-conversions=none -ffreestanding %s 
-triple=x86_64-apple-darwin -target-feature +avx2 -fno-signed-char -emit-llvm 
-o - -Wall -Werror | FileCheck %s --check-prefixes=OGCG
+// RUN: %clang_cc1 -x c++ -flax-vector-conversions=none -ffreestanding %s 
-triple=x86_64-apple-darwin -target-feature +avx2 -emit-llvm -o - -Wall -Werror 
| FileCheck %s --check-prefixes=OGCG
+// RUN: %clang_cc1 -x c++ -flax-vector-conversions=none -ffreestanding %s 
-triple=x86_64-apple-darwin -target-feature +avx2 -fno-signed-char -emit-llvm 
-o - -Wall -Werror | FileCheck %s --check-prefixes=OGCG
+
+// This test mimics clang/test/CodeGen/X86/avx2-builtins.c, which eventually
+// CIR shall be able to support fully.
+
+#include <immintrin.h>
+
+__m256i test_mm256_shufflelo_epi16(__m256i a) {
+  // CIR-LABEL: _mm256_shufflelo_epi16
+  // CIR: %{{.*}} = cir.vec.shuffle(%{{.*}}, %{{.*}} : !cir.vector<16 x 
!s16i>) [#cir.int<3> : !s32i, #cir.int<0> : !s32i, #cir.int<1> : !s32i, 
#cir.int<1> : !s32i, #cir.int<4> : !s32i, #cir.int<5> : !s32i, #cir.int<6> : 
!s32i, #cir.int<7> : !s32i, #cir.int<11> : !s32i, #cir.int<8> : !s32i, 
#cir.int<9> : !s32i, #cir.int<9> : !s32i, #cir.int<12> : !s32i, #cir.int<13> : 
!s32i, #cir.int<14> : !s32i, #cir.int<15> : !s32i] : !cir.vector<16 x !s16i>
+
+  // LLVM-LABEL: test_mm256_shufflelo_epi16
+  // LLVM: shufflevector <16 x i16> %{{.*}}, <16 x i16> poison, <16 x i32> 
<i32 3, i32 0, i32 1, i32 1, i32 4, i32 5, i32 6, i32 7, i32 11, i32 8, i32 9, 
i32 9, i32 12, i32 13, i32 14, i32 15>
+
+  // OGCG-LABEL: test_mm256_shufflelo_epi16
+  // OGCG: shufflevector <16 x i16> %{{.*}}, <16 x i16> poison, <16 x i32> 
<i32 3, i32 0, i32 1, i32 1, i32 4, i32 5, i32 6, i32 7, i32 11, i32 8, i32 9, 
i32 9, i32 12, i32 13, i32 14, i32 15>
+  return _mm256_shufflelo_epi16(a, 83);
+}
+
+__m256i test_mm256_shufflehi_epi16(__m256i a) {
+  // CIR-LABEL: _mm256_shufflehi_epi16
+  // CIR: %{{.*}} = cir.vec.shuffle(%{{.*}}, %{{.*}} : !cir.vector<16 x 
!s16i>) [#cir.int<0> : !s32i, #cir.int<1> : !s32i, #cir.int<2> : !s32i, 
#cir.int<3> : !s32i, #cir.int<7> : !s32i, #cir.int<6> : !s32i, #cir.int<6> : 
!s32i, #cir.int<5> : !s32i, #cir.int<8> : !s32i, #cir.int<9> : !s32i, 
#cir.int<10> : !s32i, #cir.int<11> : !s32i, #cir.int<15> : !s32i, #cir.int<14> 
: !s32i, #cir.int<14> : !s32i, #cir.int<13> : !s32i] : !cir.vector<16 x !s16i>
+
+  // LLVM-LABEL: test_mm256_shufflehi_epi16
+  // LLVM: shufflevector <16 x i16> %{{.*}}, <16 x i16> poison, <16 x i32> 
<i32 0, i32 1, i32 2, i32 3, i32 7, i32 6, i32 6, i32 5, i32 8, i32 9, i32 10, 
i32 11, i32 15, i32 14, i32 14, i32 13>
+
+  // OGCG-LABEL: test_mm256_shufflehi_epi16
+  // OGCG: shufflevector <16 x i16> %{{.*}}, <16 x i16> poison, <16 x i32> 
<i32 0, i32 1, i32 2, i32 3, i32 7, i32 6, i32 6, i32 5, i32 8, i32 9, i32 10, 
i32 11, i32 15, i32 14, i32 14, i32 13>
+  return _mm256_shufflehi_epi16(a, 107);
+}

diff  --git a/clang/test/CIR/CodeGenBuiltins/X86/avx512bw-builtins.c 
b/clang/test/CIR/CodeGenBuiltins/X86/avx512bw-builtins.c
index 774e1452d10fa..48a89769ea10f 100644
--- a/clang/test/CIR/CodeGenBuiltins/X86/avx512bw-builtins.c
+++ b/clang/test/CIR/CodeGenBuiltins/X86/avx512bw-builtins.c
@@ -1,15 +1,32 @@
-// RUN: %clang_cc1 -x c -flax-vector-conversions=none -ffreestanding %s 
-triple=x86_64-unknown-linux -target-feature +avx512bw -fclangir -emit-cir -o 
%t.cir -Wall -Werror
+// RUN: %clang_cc1 -x c -flax-vector-conversions=none -ffreestanding %s 
-triple=x86_64-unknown-linux -target-feature +avx512bw -fclangir -emit-cir -o 
%t.cir -Wall -Werror -Wsign-conversion
+// RUN: FileCheck --check-prefix=CIR --input-file=%t.cir %s
+// RUN: %clang_cc1 -x c -flax-vector-conversions=none -ffreestanding %s 
-triple=x86_64-unknown-linux -target-feature +avx512bw -fno-signed-char  
-fclangir -emit-cir -o %t.cir -Wall -Werror -Wsign-conversion
 // RUN: FileCheck --check-prefix=CIR --input-file=%t.cir %s
-// RUN: %clang_cc1 -x c -flax-vector-conversions=none -ffreestanding %s 
-triple=x86_64-unknown-linux -target-feature +avx512bw -fclangir -emit-llvm -o 
%t.ll -Wall -Werror
-// RUN: FileCheck --check-prefix=LLVM --input-file=%t.ll %s
 
-// RUN: %clang_cc1 -x c++ -flax-vector-conversions=none -ffreestanding %s 
-triple=x86_64-unknown-linux -target-feature +avx512bw -fno-signed-char 
-fclangir -emit-cir -o %t.cir -Wall -Werror
+// RUN: %clang_cc1 -x c++ -flax-vector-conversions=none -ffreestanding %s 
-triple=x86_64-unknown-linux -target-feature +avx512bw -fclangir -emit-cir -o 
%t.cir -Wall -Werror -Wsign-conversion
+// RUN: FileCheck --check-prefix=CIR --input-file=%t.cir %s
+// RUN: %clang_cc1 -x c++ -flax-vector-conversions=none -ffreestanding %s 
-triple=x86_64-unknown-linux -target-feature +avx512bw -fno-signed-char  
-fclangir -emit-cir -o %t.cir -Wall -Werror -Wsign-conversion
 // RUN: FileCheck --check-prefix=CIR --input-file=%t.cir %s
-// RUN: %clang_cc1 -x c++ -flax-vector-conversions=none -ffreestanding %s 
-triple=x86_64-unknown-linux -target-feature +avx512bw -fno-signed-char 
-fclangir -emit-llvm -o %t.ll -Wall -Werror
-// RUN: FileCheck --check-prefix=LLVM --input-file=%t.ll %s
 
-// RUN: %clang_cc1 -x c -flax-vector-conversions=none -ffreestanding %s 
-triple=x86_64-unknown-linux -target-feature +avx512bw -emit-llvm -o - -Wall 
-Werror | FileCheck %s -check-prefix=OGCG
-// RUN: %clang_cc1 -x c++ -flax-vector-conversions=none -ffreestanding %s 
-triple=x86_64-unknown-linux -target-feature +avx512bw -emit-llvm -o - -Wall 
-Werror | FileCheck %s -check-prefix=OGCG
+// RUN: %clang_cc1 -x c -flax-vector-conversions=none -ffreestanding %s 
-triple=x86_64-unknown-linux -target-feature +avx512bw  -fclangir -emit-llvm -o 
%t.ll -Wall -Werror -Wsign-conversion
+// RUN: FileCheck --check-prefixes=LLVM --input-file=%t.ll %s
+// RUN: %clang_cc1 -x c -flax-vector-conversions=none -ffreestanding %s 
-triple=x86_64-unknown-linux  -target-feature +avx512bw -fno-signed-char  
-fclangir -emit-llvm -o %t.ll -Wall -Werror -Wsign-conversion
+// RUN: FileCheck --check-prefixes=LLVM --input-file=%t.ll %s
+
+// RUN: %clang_cc1 -x c++ -flax-vector-conversions=none -ffreestanding %s 
-triple=x86_64-unknown-linux -target-feature +avx512bw  -fclangir -emit-llvm -o 
%t.ll -Wall -Werror -Wsign-conversion
+// RUN: FileCheck --check-prefixes=LLVM --input-file=%t.ll %s
+// RUN: %clang_cc1 -x c++ -flax-vector-conversions=none -ffreestanding %s 
-triple=x86_64-unknown-linux  -target-feature +avx512bw -fno-signed-char  
-fclangir -emit-llvm -o %t.ll -Wall -Werror -Wsign-conversion
+// RUN: FileCheck --check-prefixes=LLVM --input-file=%t.ll %s
+
+// RUN: %clang_cc1 -x c -flax-vector-conversions=none -ffreestanding %s 
-triple=x86_64-unknown-linux -target-feature +avx512bw -emit-llvm -o - -Wall 
-Werror -Wsign-conversion | FileCheck %s --check-prefix=OGCG
+// RUN: %clang_cc1 -x c -flax-vector-conversions=none -ffreestanding %s 
-triple=x86_64-unknown-linux -target-feature +avx512bw -fno-signed-char 
-emit-llvm -o - -Wall -Werror -Wsign-conversion | FileCheck %s 
--check-prefix=OGCG
+// RUN: %clang_cc1 -x c -flax-vector-conversions=none -ffreestanding %s 
-triple=x86_64-apple-darwin -target-feature +avx512bw -emit-llvm -o - -Wall 
-Werror -Wsign-conversion | FileCheck %s --check-prefixes=OGCG
+// RUN: %clang_cc1 -x c -flax-vector-conversions=none -ffreestanding %s 
-triple=x86_64-apple-darwin -target-feature +avx512bw -fno-signed-char 
-emit-llvm -o - -Wall -Werror -Wsign-conversion | FileCheck %s 
--check-prefixes=OGCG
+
+// RUN: %clang_cc1 -x c++ -flax-vector-conversions=none -ffreestanding %s 
-triple=x86_64-unknown-linux -target-feature +avx512bw -emit-llvm -o - -Wall 
-Werror -Wsign-conversion | FileCheck %s --check-prefix=OGCG
+// RUN: %clang_cc1 -x c++ -flax-vector-conversions=none -ffreestanding %s 
-triple=x86_64-unknown-linux -target-feature +avx512bw -fno-signed-char 
-emit-llvm -o - -Wall -Werror -Wsign-conversion | FileCheck %s 
--check-prefix=OGCG
+// RUN: %clang_cc1 -x c++ -flax-vector-conversions=none -ffreestanding %s 
-triple=x86_64-apple-darwin -target-feature +avx512bw -emit-llvm -o - -Wall 
-Werror -Wsign-conversion | FileCheck %s --check-prefixes=OGCG
+// RUN: %clang_cc1 -x c++ -flax-vector-conversions=none -ffreestanding %s 
-triple=x86_64-apple-darwin -target-feature +avx512bw -fno-signed-char 
-emit-llvm -o - -Wall -Werror -Wsign-conversion | FileCheck %s 
--check-prefixes=OGCG
 
 // This test mimics clang/test/CodeGen/X86/avx512bw-builtins.c, which 
eventually
 // CIR shall be able to support fully.
@@ -116,7 +133,6 @@ __mmask32 test_kshiftri_mask32_out_of_range(__mmask32 A) {
   return _kshiftri_mask32(A, 33);
 }
 
-
 __mmask32 test_kadd_mask32(__mmask32 A, __mmask32 B) {
   // CIR-LABEL: _kadd_mask32
   // CIR: cir.cast bitcast {{.*}} : !u32i -> !cir.vector<32 x !cir.int<u, 1>>
@@ -519,3 +535,27 @@ __mmask64 test_mm512_kunpackd(__mmask64 A, __mmask64 B) {
   // OGCG: bitcast <64 x i1> [[RES]] to i64
   return _mm512_kunpackd(A, B);
 }
+
+__m512i test_mm512_shufflelo_epi16(__m512i __A) {
+  // CIR-LABEL: _mm512_shufflelo_epi16
+  // CIR: %{{.*}} = cir.vec.shuffle(%{{.*}}, %{{.*}} : !cir.vector<32 x 
!s16i>) [#cir.int<1> : !s32i, #cir.int<1> : !s32i, #cir.int<0> : !s32i, 
#cir.int<0> : !s32i, #cir.int<4> : !s32i, #cir.int<5> : !s32i, #cir.int<6> : 
!s32i, #cir.int<7> : !s32i, #cir.int<9> : !s32i, #cir.int<9> : !s32i, 
#cir.int<8> : !s32i, #cir.int<8> : !s32i, #cir.int<12> : !s32i, #cir.int<13> : 
!s32i, #cir.int<14> : !s32i, #cir.int<15> : !s32i, #cir.int<17> : !s32i, 
#cir.int<17> : !s32i, #cir.int<16> : !s32i, #cir.int<16> : !s32i, #cir.int<20> 
: !s32i, #cir.int<21> : !s32i, #cir.int<22> : !s32i, #cir.int<23> : !s32i, 
#cir.int<25> : !s32i, #cir.int<25> : !s32i, #cir.int<24> : !s32i, #cir.int<24> 
: !s32i, #cir.int<28> : !s32i, #cir.int<29> : !s32i, #cir.int<30> : !s32i, 
#cir.int<31> : !s32i] : !cir.vector<32 x !s16i>
+
+  // LLVM-LABEL: test_mm512_shufflelo_epi16
+  // LLVM: shufflevector <32 x i16> %{{.*}}, <32 x i16> poison, <32 x i32> 
<i32 1, i32 1, i32 0, i32 0, i32 4, i32 5, i32 6, i32 7, i32 9, i32 9, i32 8, 
i32 8, i32 12, i32 13, i32 14, i32 15, i32 17, i32 17, i32 16, i32 16, i32 20, 
i32 21, i32 22, i32 23, i32 25, i32 25, i32 24, i32 24, i32 28, i32 29, i32 30, 
i32 31>
+
+  // OGCG-LABEL: test_mm512_shufflelo_epi16
+  // OGCG: shufflevector <32 x i16> %{{.*}}, <32 x i16> poison, <32 x i32> 
<i32 1, i32 1, i32 0, i32 0, i32 4, i32 5, i32 6, i32 7, i32 9, i32 9, i32 8, 
i32 8, i32 12, i32 13, i32 14, i32 15, i32 17, i32 17, i32 16, i32 16, i32 20, 
i32 21, i32 22, i32 23, i32 25, i32 25, i32 24, i32 24, i32 28, i32 29, i32 30, 
i32 31>
+  return _mm512_shufflelo_epi16(__A, 5);
+}
+
+__m512i test_mm512_shufflehi_epi16(__m512i __A) {
+  // CIR-LABEL: _mm512_shufflehi_epi16
+  // CIR: %{{.*}} = cir.vec.shuffle(%{{.*}}, %{{.*}} : !cir.vector<32 x 
!s16i>) [#cir.int<0> : !s32i, #cir.int<1> : !s32i, #cir.int<2> : !s32i, 
#cir.int<3> : !s32i, #cir.int<5> : !s32i, #cir.int<5> : !s32i, #cir.int<4> : 
!s32i, #cir.int<4> : !s32i, #cir.int<8> : !s32i, #cir.int<9> : !s32i, 
#cir.int<10> : !s32i, #cir.int<11> : !s32i, #cir.int<13> : !s32i, #cir.int<13> 
: !s32i, #cir.int<12> : !s32i, #cir.int<12> : !s32i, #cir.int<16> : !s32i, 
#cir.int<17> : !s32i, #cir.int<18> : !s32i, #cir.int<19> : !s32i, #cir.int<21> 
: !s32i, #cir.int<21> : !s32i, #cir.int<20> : !s32i, #cir.int<20> : !s32i, 
#cir.int<24> : !s32i, #cir.int<25> : !s32i, #cir.int<26> : !s32i, #cir.int<27> 
: !s32i, #cir.int<29> : !s32i, #cir.int<29> : !s32i, #cir.int<28> : !s32i, 
#cir.int<28> : !s32i] : !cir.vector<32 x !s16i>
+
+  // LLVM-LABEL: test_mm512_shufflehi_epi16
+  // LLVM: shufflevector <32 x i16> %{{.*}}, <32 x i16> poison, <32 x i32> 
<i32 0, i32 1, i32 2, i32 3, i32 5, i32 5, i32 4, i32 4, i32 8, i32 9, i32 10, 
i32 11, i32 13, i32 13, i32 12, i32 12, i32 16, i32 17, i32 18, i32 19, i32 21, 
i32 21, i32 20, i32 20, i32 24, i32 25, i32 26, i32 27, i32 29, i32 29, i32 28, 
i32 28>
+
+  // OGCG-LABEL: test_mm512_shufflehi_epi16
+  // OGCG: shufflevector <32 x i16> %{{.*}}, <32 x i16> poison, <32 x i32> 
<i32 0, i32 1, i32 2, i32 3, i32 5, i32 5, i32 4, i32 4, i32 8, i32 9, i32 10, 
i32 11, i32 13, i32 13, i32 12, i32 12, i32 16, i32 17, i32 18, i32 19, i32 21, 
i32 21, i32 20, i32 20, i32 24, i32 25, i32 26, i32 27, i32 29, i32 29, i32 28, 
i32 28>
+  return _mm512_shufflehi_epi16(__A, 5);
+}

diff  --git a/clang/test/CIR/CodeGenBuiltins/X86/avx512f-builtins.c 
b/clang/test/CIR/CodeGenBuiltins/X86/avx512f-builtins.c
index e03109510a931..7b9579ec4a238 100644
--- a/clang/test/CIR/CodeGenBuiltins/X86/avx512f-builtins.c
+++ b/clang/test/CIR/CodeGenBuiltins/X86/avx512f-builtins.c
@@ -78,6 +78,54 @@ __m512i test_mm512_undefined_epi32(void) {
   return _mm512_undefined_epi32();
 }
 
+__m512d test_mm512_shuffle_pd(__m512d __M, __m512d __V) {
+  // CIR-LABEL: test_mm512_shuffle_pd
+  // CIR: %{{.*}} = cir.vec.shuffle(%{{.*}}, %{{.*}} : !cir.vector<8 x 
!cir.double>) [#cir.int<0> : !s32i, #cir.int<8> : !s32i, #cir.int<3> : !s32i, 
#cir.int<10> : !s32i, #cir.int<4> : !s32i, #cir.int<12> : !s32i, #cir.int<6> : 
!s32i, #cir.int<14> : !s32i] : !cir.vector<8 x !cir.double>
+
+  // LLVM-LABEL: test_mm512_shuffle_pd
+  // LLVM: shufflevector <8 x double> %{{.*}}, <8 x double> %{{.*}}, <8 x i32> 
<i32 0, i32 8, i32 3, i32 10, i32 4, i32 12, i32 6, i32 14>
+
+  // OGCG-LABEL: test_mm512_shuffle_pd
+  // OGCG: shufflevector <8 x double> %{{.*}}, <8 x double> %{{.*}}, <8 x i32> 
<i32 0, i32 8, i32 3, i32 10, i32 4, i32 12, i32 6, i32 14>
+  return _mm512_shuffle_pd(__M, __V, 4);
+}
+
+__m512 test_mm512_shuffle_ps(__m512 __M, __m512 __V) {
+  // CIR-LABEL: test_mm512_shuffle_ps
+  // CIR: %{{.*}} = cir.vec.shuffle(%{{.*}}, %{{.*}} : !cir.vector<16 x 
!cir.float>) [#cir.int<0> : !s32i, #cir.int<1> : !s32i, #cir.int<16> : !s32i, 
#cir.int<16> : !s32i, #cir.int<4> : !s32i, #cir.int<5> : !s32i, #cir.int<20> : 
!s32i, #cir.int<20> : !s32i, #cir.int<8> : !s32i, #cir.int<9> : !s32i, 
#cir.int<24> : !s32i, #cir.int<24> : !s32i, #cir.int<12> : !s32i, #cir.int<13> 
: !s32i, #cir.int<28> : !s32i, #cir.int<28> : !s32i] : !cir.vector<16 x 
!cir.float>
+
+  // LLVM-LABEL: test_mm512_shuffle_ps
+  // LLVM: shufflevector <16 x float> %{{.*}}, <16 x float> %{{.*}}, <16 x 
i32> <i32 0, i32 1, i32 16, i32 16, i32 4, i32 5, i32 20, i32 20, i32 8, i32 9, 
i32 24, i32 24, i32 12, i32 13, i32 28, i32 28>
+
+  // OGCG-LABEL: test_mm512_shuffle_ps
+  // OGCG: shufflevector <16 x float> %{{.*}}, <16 x float> %{{.*}}, <16 x 
i32> <i32 0, i32 1, i32 16, i32 16, i32 4, i32 5, i32 20, i32 20, i32 8, i32 9, 
i32 24, i32 24, i32 12, i32 13, i32 28, i32 28>
+  return _mm512_shuffle_ps(__M, __V, 4);
+}
+
+__m512 test_mm512_permute_ps(__m512 A) {
+    // CIR-LABEL: test_mm512_permute_ps
+    // CIR: cir.vec.shuffle(%{{.*}}, %{{.*}} : !cir.vector<16 x !cir.float>) 
[#cir.int<2> : !s32i, #cir.int<3> : !s32i, #cir.int<0> : !s32i, #cir.int<1> : 
!s32i, #cir.int<6> : !s32i, #cir.int<7> : !s32i, #cir.int<4> : !s32i, 
#cir.int<5> : !s32i, #cir.int<10> : !s32i, #cir.int<11> : !s32i, #cir.int<8> : 
!s32i, #cir.int<9> : !s32i, #cir.int<14> : !s32i, #cir.int<15> : !s32i, 
#cir.int<12> : !s32i, #cir.int<13> : !s32i] : !cir.vector<16 x !cir.float>
+
+    // LLVM-LABEL: test_mm512_permute_ps
+    // LLVM: shufflevector <16 x float> %{{.*}}, <16 x float> poison, <16 x 
i32> <i32 2, i32 3, i32 0, i32 1, i32 6, i32 7, i32 4, i32 5, i32 10, i32 11, 
i32 8, i32 9, i32 14, i32 15, i32 12, i32 13>
+
+    // OGCG-LABEL: test_mm512_permute_ps
+    // OGCG: shufflevector <16 x float> %{{.*}}, <16 x float> poison, <16 x 
i32> <i32 2, i32 3, i32 0, i32 1, i32 6, i32 7, i32 4, i32 5, i32 10, i32 11, 
i32 8, i32 9, i32 14, i32 15, i32 12, i32 13>
+    return _mm512_permute_ps(A, 0x4E);
+}
+
+__m512d test_mm512_permute_pd(__m512d A) {
+    // CIR-LABEL: test_mm512_permute_pd
+    // CIR: cir.vec.shuffle(%{{.*}}, %{{.*}} : !cir.vector<8 x !cir.double>) 
[#cir.int<1> : !s32i, #cir.int<0> : !s32i, #cir.int<3> : !s32i, #cir.int<2> : 
!s32i, #cir.int<5> : !s32i, #cir.int<4> : !s32i, #cir.int<7> : !s32i, 
#cir.int<6> : !s32i] : !cir.vector<8 x !cir.double>
+
+    // LLVM-LABEL: test_mm512_permute_pd
+    // LLVM: shufflevector <8 x double> %{{.*}}, <8 x double> poison, <8 x 
i32> <i32 1, i32 0, i32 3, i32 2, i32 5, i32 4, i32 7, i32 6>
+
+    // OGCG-LABEL: test_mm512_permute_pd
+    // OGCG: shufflevector <8 x double> %{{.*}}, <8 x double> poison, <8 x 
i32> <i32 1, i32 0, i32 3, i32 2, i32 5, i32 4, i32 7, i32 6>
+    return _mm512_permute_pd(A, 0x55);
+}
+
 __mmask16 test_mm512_kand(__mmask16 A, __mmask16 B) {
   // CIR-LABEL: _mm512_kand
   // CIR: cir.cast bitcast {{.*}} : !u16i -> !cir.vector<16 x !cir.int<u, 1>>

diff  --git a/clang/test/CIR/CodeGenBuiltins/X86/sse-builtins.c 
b/clang/test/CIR/CodeGenBuiltins/X86/sse-builtins.c
index c893859b297cc..db52021d1aa9f 100644
--- a/clang/test/CIR/CodeGenBuiltins/X86/sse-builtins.c
+++ b/clang/test/CIR/CodeGenBuiltins/X86/sse-builtins.c
@@ -71,3 +71,15 @@ __m128 test_mm_undefined_ps(void) {
   // OGCG: ret <4 x float> zeroinitializer
   return _mm_undefined_ps();
 }
+
+__m128 test_mm_shuffle_ps(__m128 A, __m128 B) {
+  // CIR-LABEL: _mm_shuffle_ps
+  // CIR: %{{.*}} = cir.vec.shuffle(%{{.*}}, %{{.*}} : !cir.vector<4 x 
!cir.float>) [#cir.int<0> : !s32i, #cir.int<0> : !s32i, #cir.int<4> : !s32i, 
#cir.int<4> : !s32i] : !cir.vector<4 x !cir.float>
+
+  // LLVM-LABEL: test_mm_shuffle_ps
+  // LLVM: shufflevector <4 x float> {{.*}}, <4 x float> {{.*}}, <4 x i32> 
<i32 0, i32 0, i32 4, i32 4>
+
+  // OGCG-LABEL: test_mm_shuffle_ps
+  // OGCG: shufflevector <4 x float> {{.*}}, <4 x float> {{.*}}, <4 x i32> 
<i32 0, i32 0, i32 4, i32 4>
+  return _mm_shuffle_ps(A, B, 0);
+}

diff  --git a/clang/test/CIR/CodeGenBuiltins/X86/sse2-builtins.c 
b/clang/test/CIR/CodeGenBuiltins/X86/sse2-builtins.c
index f5e07cdc28ccd..4bb17e9d20bc6 100644
--- a/clang/test/CIR/CodeGenBuiltins/X86/sse2-builtins.c
+++ b/clang/test/CIR/CodeGenBuiltins/X86/sse2-builtins.c
@@ -8,8 +8,11 @@
 // RUN: %clang_cc1 -x c++ -flax-vector-conversions=none -ffreestanding %s 
-triple=x86_64-unknown-linux -target-feature +sse2 -fno-signed-char -fclangir 
-emit-llvm -o %t.ll -Wall -Werror
 // RUN: FileCheck --check-prefixes=LLVM --input-file=%t.ll %s
 
-// RUN: %clang_cc1 -x c -flax-vector-conversions=none -ffreestanding %s 
-triple=x86_64-unknown-linux -target-feature +sse -emit-llvm -o - -Wall -Werror 
| FileCheck %s -check-prefix=OGCG
-// RUN: %clang_cc1 -x c++ -flax-vector-conversions=none -ffreestanding %s 
-triple=x86_64-unknown-linux -target-feature +sse -emit-llvm -o - -Wall -Werror 
| FileCheck %s -check-prefix=OGCG
+// RUN: %clang_cc1 -x c -flax-vector-conversions=none -ffreestanding %s 
-triple=x86_64-apple-darwin -target-feature +sse2 -emit-llvm -o - -Wall -Werror 
| FileCheck %s --check-prefixes=OGCG
+// RUN: %clang_cc1 -x c -flax-vector-conversions=none -ffreestanding %s 
-triple=x86_64-apple-darwin -target-feature +sse2 -fno-signed-char -emit-llvm 
-o - -Wall -Werror | FileCheck %s --check-prefixes=OGCG
+
+// RUN: %clang_cc1 -x c++ -flax-vector-conversions=none -ffreestanding %s 
-triple=x86_64-apple-darwin -target-feature +sse2 -emit-llvm -o - -Wall -Werror 
| FileCheck %s --check-prefixes=OGCG
+// RUN: %clang_cc1 -x c++ -flax-vector-conversions=none -ffreestanding %s 
-triple=x86_64-apple-darwin -target-feature +sse2 -fno-signed-char -emit-llvm 
-o - -Wall -Werror | FileCheck %s --check-prefixes=OGCG
 
 // This test mimics clang/test/CodeGen/X86/sse2-builtins.c, which eventually
 // CIR shall be able to support fully.
@@ -108,3 +111,51 @@ void test_mm_pause(void) {
   // LLVM: call void @llvm.x86.sse2.pause()
   // OGCG: call void @llvm.x86.sse2.pause()
 }
+
+__m128i test_mm_shufflelo_epi16(__m128i A) {
+  // CIR-LABEL: _mm_shufflelo_epi16
+  // CIR: %{{.*}} = cir.vec.shuffle(%{{.*}}, %{{.*}} : !cir.vector<8 x !s16i>) 
[#cir.int<0> : !s32i, #cir.int<0> : !s32i, #cir.int<0> : !s32i, #cir.int<0> : 
!s32i, #cir.int<4> : !s32i, #cir.int<5> : !s32i, #cir.int<6> : !s32i, 
#cir.int<7> : !s32i] : !cir.vector<8 x !s16i>
+
+  // LLVM-LABEL: test_mm_shufflelo_epi16
+  // LLVM: shufflevector <8 x i16> %{{.*}}, <8 x i16> poison, <8 x i32> <i32 
0, i32 0, i32 0, i32 0, i32 4, i32 5, i32 6, i32 7>
+
+  // OGCG-LABEL: test_mm_shufflelo_epi16
+  // OGCG: shufflevector <8 x i16> %{{.*}}, <8 x i16> poison, <8 x i32> <i32 
0, i32 0, i32 0, i32 0, i32 4, i32 5, i32 6, i32 7>
+  return _mm_shufflelo_epi16(A, 0);
+}
+
+__m128i test_mm_shufflehi_epi16(__m128i A) {
+  // CIR-LABEL: _mm_shufflehi_epi16
+  // CIR: %{{.*}} = cir.vec.shuffle(%{{.*}}, %{{.*}} : !cir.vector<8 x !s16i>) 
[#cir.int<0> : !s32i, #cir.int<1> : !s32i, #cir.int<2> : !s32i, #cir.int<3> : 
!s32i, #cir.int<4> : !s32i, #cir.int<4> : !s32i, #cir.int<4> : !s32i, 
#cir.int<4> : !s32i] : !cir.vector<8 x !s16i>
+
+  // LLVM-LABEL: test_mm_shufflehi_epi16
+  // LLVM: shufflevector <8 x i16> %{{.*}}, <8 x i16> poison, <8 x i32> <i32 
0, i32 1, i32 2, i32 3, i32 4, i32 4, i32 4, i32 4>
+
+  // OGCG-LABEL: test_mm_shufflehi_epi16
+  // OGCG: shufflevector <8 x i16> %{{.*}}, <8 x i16> poison, <8 x i32> <i32 
0, i32 1, i32 2, i32 3, i32 4, i32 4, i32 4, i32 4>
+  return _mm_shufflehi_epi16(A, 0);
+}
+
+__m128d test_mm_shuffle_pd(__m128d A, __m128d B) {
+  // CIR-LABEL: test_mm_shuffle_pd
+  // CIR: %{{.*}} = cir.vec.shuffle(%{{.*}}, %{{.*}} : !cir.vector<2 x 
!cir.double>) [#cir.int<1> : !s32i, #cir.int<2> : !s32i] : !cir.vector<2 x 
!cir.double>
+
+  // LLVM-LABEL: test_mm_shuffle_pd
+  // LLVM: shufflevector <2 x double> %{{.*}}, <2 x double> %{{.*}}, <2 x i32> 
<i32 1, i32 2>
+
+  // OGCG-LABEL: test_mm_shuffle_pd
+  // OGCG: shufflevector <2 x double> %{{.*}}, <2 x double> %{{.*}}, <2 x i32> 
<i32 1, i32 2>
+  return _mm_shuffle_pd(A, B, 1);
+}
+
+__m128i test_mm_shuffle_epi32(__m128i A) {
+       // CIR-LABEL: test_mm_shuffle_epi32
+       // CIR: %{{.*}} = cir.vec.shuffle(%{{.*}}, %{{.*}}: !cir.vector<4 x 
!s32i>) [#cir.int<2> : !s32i, #cir.int<3> : !s32i, #cir.int<0> : !s32i, 
#cir.int<1> : !s32i] : !cir.vector<4 x !s32i>
+
+    // LLVM-LABEL: test_mm_shuffle_epi32
+       // LLVM: shufflevector <4 x i32> %{{.*}}, <4 x i32> poison, <4 x i32> 
<i32 2, i32 3, i32 0, i32 1>
+
+       // OGCG-LABEL: test_mm_shuffle_epi32
+    // OGCG: shufflevector <4 x i32> %{{.*}}, <4 x i32> poison, <4 x i32> <i32 
2, i32 3, i32 0, i32 1>
+    return _mm_shuffle_epi32(A, 0x4E);
+}


        
_______________________________________________
cfe-commits mailing list
[email protected]
https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits

[clang] 6bdb838 - [CIR] Upstream vec shuffle builtins in CIR codegen (#169178)

Reply via email to