[clang] [llvm] [CIR] Support x86 builtin rotate (PR #169566)

Omar Hossam via cfe-commits Fri, 28 Nov 2025 21:37:10 -0800

https://github.com/moar55 updated 
https://github.com/llvm/llvm-project/pull/169566


>From b660675acc3c0131b9714e1779ff1192ace62d46 Mon Sep 17 00:00:00 2001
From: Omar Ibrahim <[email protected]>
Date: Thu, 20 Nov 2025 23:12:40 +0100
Subject: [PATCH 1/8] [CIR] Implement x86 rotate builtins

---
 clang/lib/CIR/CodeGen/CIRGenBuiltinX86.cpp | 28 +++++++++++++++++++---
 shell.nix                                  | 14 +++++++++++
 2 files changed, 39 insertions(+), 3 deletions(-)
 create mode 100644 shell.nix

diff --git a/clang/lib/CIR/CodeGen/CIRGenBuiltinX86.cpp 
b/clang/lib/CIR/CodeGen/CIRGenBuiltinX86.cpp
index e7aa8a234efd9..ae241de90abee 100644
--- a/clang/lib/CIR/CodeGen/CIRGenBuiltinX86.cpp
+++ b/clang/lib/CIR/CodeGen/CIRGenBuiltinX86.cpp
@@ -90,6 +90,25 @@ static mlir::Value getMaskVecValue(CIRGenFunction &cgf, 
const CallExpr *expr,
   return maskVec;
 }
 
+static mlir::Value emitX86FunnelShift(CIRGenFunction &cgf, const CallExpr *e,
+                                      mlir::Value &op0, mlir::Value &op1,
+                                      mlir::Value &amt, bool isRight) {
+  auto ty = op0.getType();
+
+  // Amount may be scalar immediate, in which case create a splat vector.
+  // Funnel shifts amounts are treated as modulo and types are all power-of-2
+  // so we only care about the lowest log2 bits anyway.
+  if (amt.getType() != ty) {
+    amt = cgf.getBuilder().createIntCast(
+        amt, mlir::cast<cir::VectorType>(ty).getElementType());
+    amt = cir::VecSplatOp::create(cgf.getBuilder(), 
cgf.getLoc(e->getExprLoc()),
+                                  ty, amt);
+  }
+
+  const std::string intrinsicName = isRight ? "fshr" : "fshl";
+  return emitIntrinsicCallOp(cgf, e, intrinsicName, ty, op0, op1, amt);
+}
+
 mlir::Value CIRGenFunction::emitX86BuiltinExpr(unsigned builtinID,
                                                const CallExpr *expr) {
   if (builtinID == Builtin::BI__builtin_cpu_is) {
@@ -109,14 +128,15 @@ mlir::Value CIRGenFunction::emitX86BuiltinExpr(unsigned 
builtinID,
   // evaluation.
   assert(!cir::MissingFeatures::msvcBuiltins());
 
-  // Find out if any arguments are required to be integer constant expressions.
+  // Find out if any arguments are required to be integer constant
+  // expressions.
   assert(!cir::MissingFeatures::handleBuiltinICEArguments());
 
   // The operands of the builtin call
   llvm::SmallVector<mlir::Value> ops;
 
-  // `ICEArguments` is a bitmap indicating whether the argument at the i-th bit
-  // is required to be a constant integer expression.
+  // `ICEArguments` is a bitmap indicating whether the argument at the i-th
+  // bit is required to be a constant integer expression.
   unsigned iceArguments = 0;
   ASTContext::GetBuiltinTypeError error;
   getContext().GetBuiltinType(builtinID, error, &iceArguments);
@@ -661,12 +681,14 @@ mlir::Value CIRGenFunction::emitX86BuiltinExpr(unsigned 
builtinID,
   case X86::BI__builtin_ia32_prolq128:
   case X86::BI__builtin_ia32_prolq256:
   case X86::BI__builtin_ia32_prolq512:
+    return emitX86FunnelShift(*this, e, ops[0], ops[1], ops[1], false);
   case X86::BI__builtin_ia32_prord128:
   case X86::BI__builtin_ia32_prord256:
   case X86::BI__builtin_ia32_prord512:
   case X86::BI__builtin_ia32_prorq128:
   case X86::BI__builtin_ia32_prorq256:
   case X86::BI__builtin_ia32_prorq512:
+    return emitX86FunnelShift(*this, e, ops[0], ops[1], ops[1], true);
   case X86::BI__builtin_ia32_selectb_128:
   case X86::BI__builtin_ia32_selectb_256:
   case X86::BI__builtin_ia32_selectb_512:
diff --git a/shell.nix b/shell.nix
new file mode 100644
index 0000000000000..c30f6dc7b6928
--- /dev/null
+++ b/shell.nix
@@ -0,0 +1,14 @@
+let
+  nixpkgs = fetchTarball 
"https://github.com/NixOS/nixpkgs/tarball/nixos-24.05";;
+  pkgs = import nixpkgs { config = {}; overlays = []; };
+in
+
+
+pkgs.mkShellNoCC {
+  packages = with pkgs; [
+    cmake
+    ninja
+    llvmPackages_latest.llvm
+  ];
+stdenv = pkgs.clangStdenv;
+}

>From aec5122eb0f4a45bea83cf4f09de973aa97dc694 Mon Sep 17 00:00:00 2001
From: Omar Ibrahim <[email protected]>
Date: Sun, 23 Nov 2025 15:36:45 +0100
Subject: [PATCH 2/8] update and add test

---
 clang/lib/CIR/CodeGen/CIRGenBuiltinX86.cpp    |  23 +-
 .../CIR/CodeGen/X86/builtin_test_helpers.h    | 304 ++++++++++++++++++
 clang/test/CIR/CodeGen/X86/xop-builtin.c      |  82 +++++
 3 files changed, 401 insertions(+), 8 deletions(-)
 create mode 100644 clang/test/CIR/CodeGen/X86/builtin_test_helpers.h
 create mode 100644 clang/test/CIR/CodeGen/X86/xop-builtin.c

diff --git a/clang/lib/CIR/CodeGen/CIRGenBuiltinX86.cpp 
b/clang/lib/CIR/CodeGen/CIRGenBuiltinX86.cpp
index ae241de90abee..f5039a8a016a0 100644
--- a/clang/lib/CIR/CodeGen/CIRGenBuiltinX86.cpp
+++ b/clang/lib/CIR/CodeGen/CIRGenBuiltinX86.cpp
@@ -13,6 +13,7 @@
 
 #include "CIRGenFunction.h"
 #include "CIRGenModule.h"
+#include "mlir/IR/ValueRange.h"
 #include "clang/Basic/Builtins.h"
 #include "clang/Basic/TargetBuiltins.h"
 #include "clang/CIR/MissingFeatures.h"
@@ -21,12 +22,12 @@ using namespace clang;
 using namespace clang::CIRGen;
 
 template <typename... Operands>
-static mlir::Value emitIntrinsicCallOp(CIRGenFunction &cgf, const CallExpr *e,
-                                       const std::string &str,
-                                       const mlir::Type &resTy,
-                                       Operands &&...op) {
+static mlir::Value
+emitIntrinsicCallOp(CIRGenFunction &cgf, const CallExpr *expr,
+                    const std::string &str, const mlir::Type &resTy,
+                    Operands &&...op) {
   CIRGenBuilderTy &builder = cgf.getBuilder();
-  mlir::Location location = cgf.getLoc(e->getExprLoc());
+  mlir::Location location = cgf.getLoc(expr->getExprLoc());
   return cir::LLVMIntrinsicCallOp::create(builder, location,
                                           builder.getStringAttr(str), resTy,
                                           std::forward<Operands>(op)...)
@@ -106,7 +107,8 @@ static mlir::Value emitX86FunnelShift(CIRGenFunction &cgf, 
const CallExpr *e,
   }
 
   const std::string intrinsicName = isRight ? "fshr" : "fshl";
-  return emitIntrinsicCallOp(cgf, e, intrinsicName, ty, op0, op1, amt);
+  return emitIntrinsicCallOp(cgf, e, intrinsicName, ty,
+                             mlir::ValueRange{op0, op1, amt});
 }
 
 mlir::Value CIRGenFunction::emitX86BuiltinExpr(unsigned builtinID,
@@ -671,6 +673,11 @@ mlir::Value CIRGenFunction::emitX86BuiltinExpr(unsigned 
builtinID,
         builder.createVecShuffle(getLoc(expr->getExprLoc()), in, zero, 
indices);
     return builder.createBitcast(sv, ops[0].getType());
   }
+  case X86::BI__builtin_ia32_kshiftridi:
+    cgm.errorNYI(expr->getSourceRange(),
+                 std::string("unimplemented X86 builtin call: ") +
+                     getContext().BuiltinInfo.getName(builtinID));
+    return {};
   case X86::BI__builtin_ia32_vprotbi:
   case X86::BI__builtin_ia32_vprotwi:
   case X86::BI__builtin_ia32_vprotdi:
@@ -681,14 +688,14 @@ mlir::Value CIRGenFunction::emitX86BuiltinExpr(unsigned 
builtinID,
   case X86::BI__builtin_ia32_prolq128:
   case X86::BI__builtin_ia32_prolq256:
   case X86::BI__builtin_ia32_prolq512:
-    return emitX86FunnelShift(*this, e, ops[0], ops[1], ops[1], false);
+    return emitX86FunnelShift(*this, expr, ops[0], ops[1], ops[1], false);
   case X86::BI__builtin_ia32_prord128:
   case X86::BI__builtin_ia32_prord256:
   case X86::BI__builtin_ia32_prord512:
   case X86::BI__builtin_ia32_prorq128:
   case X86::BI__builtin_ia32_prorq256:
   case X86::BI__builtin_ia32_prorq512:
-    return emitX86FunnelShift(*this, e, ops[0], ops[1], ops[1], true);
+    return emitX86FunnelShift(*this, expr, ops[0], ops[1], ops[1], true);
   case X86::BI__builtin_ia32_selectb_128:
   case X86::BI__builtin_ia32_selectb_256:
   case X86::BI__builtin_ia32_selectb_512:
diff --git a/clang/test/CIR/CodeGen/X86/builtin_test_helpers.h 
b/clang/test/CIR/CodeGen/X86/builtin_test_helpers.h
new file mode 100644
index 0000000000000..fcaf360626a2d
--- /dev/null
+++ b/clang/test/CIR/CodeGen/X86/builtin_test_helpers.h
@@ -0,0 +1,304 @@
+/* Helper methods for builtin intrinsic tests */
+
+#include <immintrin.h>
+
+#if defined(__cplusplus) && (__cplusplus >= 201103L)
+
+constexpr bool match_m64(__m64 _v, unsigned long long a) {
+  __v1du v = (__v1du)_v;
+  return v[0] == a;
+}
+
+constexpr bool match_v1di(__m64 v, long long a) {
+  return v[0] == a;
+}
+
+constexpr bool match_v1du(__m64 _v, unsigned long long a) {
+  __v1du v = (__v1du)_v;
+  return v[0] == a;
+}
+
+constexpr bool match_v2si(__m64 _v, int a, int b) {
+  __v2si v = (__v2si)_v;
+  return v[0] == a && v[1] == b;
+}
+
+constexpr bool match_v2su(__m64 _v, unsigned a, unsigned b) {
+  __v2su v = (__v2su)_v;
+  return v[0] == a && v[1] == b;
+}
+
+constexpr bool match_v4hi(__m64 _v, short a, short b, short c, short d) {
+  __v4hi v = (__v4hi)_v;
+  return v[0] == a && v[1] == b && v[2] == c && v[3] == d;
+}
+
+constexpr bool match_v4hu(__m64 _v, unsigned short a, unsigned short b, 
unsigned short c, unsigned short d) {
+  __v4hu v = (__v4hu)_v;
+  return v[0] == a && v[1] == b && v[2] == c && v[3] == d;
+}
+
+constexpr bool match_v8qi(__m64 _v, signed char a, signed char b, signed char 
c, signed char d, signed char e, signed char f, signed char g, signed char h) {
+  __v8qs v = (__v8qs)_v;
+  return v[0] == a && v[1] == b && v[2] == c && v[3] == d && v[4] == e && v[5] 
== f && v[6] == g && v[7] == h;
+}
+
+constexpr bool match_v8qu(__m64 _v, unsigned char a, unsigned char b, unsigned 
char c, unsigned char d, unsigned char e, unsigned char f, unsigned char g, 
unsigned char h) {
+  __v8qu v = (__v8qu)_v;
+  return v[0] == a && v[1] == b && v[2] == c && v[3] == d && v[4] == e && v[5] 
== f && v[6] == g && v[7] == h;
+}
+
+constexpr bool match_m128(__m128 _v, float a, float b, float c, float d) {
+  __v4su v = (__v4su)_v;
+  return v[0] == __builtin_bit_cast(unsigned, a) && v[1] == 
__builtin_bit_cast(unsigned, b) && v[2] == __builtin_bit_cast(unsigned, c) && 
v[3] == __builtin_bit_cast(unsigned, d);
+}
+
+constexpr bool match_m128d(__m128d _v, double a, double b) {
+  __v2du v = (__v2du)_v;
+  return v[0] == __builtin_bit_cast(unsigned long long, a) && v[1] == 
__builtin_bit_cast(unsigned long long, b);
+}
+
+#ifdef __SSE2__
+constexpr bool match_m128h(__m128h _v, _Float16 __e00, _Float16 __e01, 
_Float16 __e02, _Float16 __e03, _Float16 __e04, _Float16 __e05, _Float16 __e06, 
_Float16 __e07) {
+  __v8hu v = (__v8hu)_v;
+  return v[ 0] == __builtin_bit_cast(unsigned short, __e00) && v[ 1] == 
__builtin_bit_cast(unsigned short, __e01) && v[ 2] == 
__builtin_bit_cast(unsigned short, __e02) && v[ 3] == 
__builtin_bit_cast(unsigned short, __e03) &&
+         v[ 4] == __builtin_bit_cast(unsigned short, __e04) && v[ 5] == 
__builtin_bit_cast(unsigned short, __e05) && v[ 6] == 
__builtin_bit_cast(unsigned short, __e06) && v[ 7] == 
__builtin_bit_cast(unsigned short, __e07);
+}
+#endif
+
+constexpr bool match_m128i(__m128i _v, unsigned long long a, unsigned long 
long b) {
+  __v2du v = (__v2du)_v;
+  return v[0] == a && v[1] == b;
+}
+
+constexpr bool match_v2di(__m128i v, long long a, long long b) {
+  return v[0] == a && v[1] == b;
+}
+
+constexpr bool match_v2du(__m128i _v, unsigned long long a, unsigned long long 
b) {
+  __v2du v = (__v2du)_v;
+  return v[0] == a && v[1] == b;
+}
+
+constexpr bool match_v4si(__m128i _v, int a, int b, int c, int d) {
+  __v4si v = (__v4si)_v;
+  return v[0] == a && v[1] == b && v[2] == c && v[3] == d;
+}
+
+constexpr bool match_v4su(__m128i _v, unsigned a, unsigned b, unsigned c, 
unsigned d) {
+  __v4su v = (__v4su)_v;
+  return v[0] == a && v[1] == b && v[2] == c && v[3] == d;
+}
+
+constexpr bool match_v8hi(__m128i _v, short a, short b, short c, short d, 
short e, short f, short g, short h) {
+  __v8hi v = (__v8hi)_v;
+  return v[0] == a && v[1] == b && v[2] == c && v[3] == d && v[4] == e && v[5] 
== f && v[6] == g && v[7] == h;
+}
+
+constexpr bool match_v8hu(__m128i _v, unsigned short a, unsigned short b, 
unsigned short c, unsigned short d, unsigned short e, unsigned short f, 
unsigned short g, unsigned short h) {
+  __v8hu v = (__v8hu)_v;
+  return v[0] == a && v[1] == b && v[2] == c && v[3] == d && v[4] == e && v[5] 
== f && v[6] == g && v[7] == h;
+}
+
+constexpr bool match_v16qi(__m128i _v, signed char a, signed char b, signed 
char c, signed char d, signed char e, signed char f, signed char g, signed char 
h, signed char i, signed char j, signed char k, signed char l, signed char m, 
signed char n, signed char o, signed char p) {
+  __v16qs v = (__v16qs)_v;
+  return v[0] == a && v[1] == b && v[2] == c && v[3] == d && v[4] == e && v[5] 
== f && v[6] == g && v[7] == h && v[8] == i && v[9] == j && v[10] == k && v[11] 
== l && v[12] == m && v[13] == n && v[14] == o && v[15] == p;
+}
+
+constexpr bool match_v16qu(__m128i _v, unsigned char a, unsigned char b, 
unsigned char c, unsigned char d, unsigned char e, unsigned char f, unsigned 
char g, unsigned char h, unsigned char i, unsigned char j, unsigned char k, 
unsigned char l, unsigned char m, unsigned char n, unsigned char o, unsigned 
char p) {
+  __v16qu v = (__v16qu)_v;
+  return v[0] == a && v[1] == b && v[2] == c && v[3] == d && v[4] == e && v[5] 
== f && v[6] == g && v[7] == h && v[8] == i && v[9] == j && v[10] == k && v[11] 
== l && v[12] == m && v[13] == n && v[14] == o && v[15] == p;
+}
+
+constexpr bool match_m256(__m256 _v, float __e00, float __e01, float __e02, 
float __e03, float __e04, float __e05, float __e06, float __e07) {
+  __v8su v = (__v8su)_v;
+  return v[ 0] == __builtin_bit_cast(unsigned, __e00) && v[ 1] == 
__builtin_bit_cast(unsigned, __e01) && v[ 2] == __builtin_bit_cast(unsigned, 
__e02) && v[ 3] == __builtin_bit_cast(unsigned, __e03) &&
+         v[ 4] == __builtin_bit_cast(unsigned, __e04) && v[ 5] == 
__builtin_bit_cast(unsigned, __e05) && v[ 6] == __builtin_bit_cast(unsigned, 
__e06) && v[ 7] == __builtin_bit_cast(unsigned, __e07);
+}
+
+constexpr bool match_m256d(__m256d _v, double a, double b, double c, double d) 
{
+  __v4du v = (__v4du)_v;
+  return v[0] == __builtin_bit_cast(unsigned long long, a) && v[1] == 
__builtin_bit_cast(unsigned long long, b) && v[2] == 
__builtin_bit_cast(unsigned long long, c) && v[3] == 
__builtin_bit_cast(unsigned long long, d);
+}
+
+#ifdef __SSE2__
+constexpr bool match_m256h(__m256h _v, _Float16 __e00, _Float16 __e01, 
_Float16 __e02, _Float16 __e03, _Float16 __e04, _Float16 __e05, _Float16 __e06, 
_Float16 __e07,
+                                       _Float16 __e08, _Float16 __e09, 
_Float16 __e10, _Float16 __e11, _Float16 __e12, _Float16 __e13, _Float16 __e14, 
_Float16 __e15) {
+  __v16hu v = (__v16hu)_v;
+  return v[ 0] == __builtin_bit_cast(unsigned short, __e00) && v[ 1] == 
__builtin_bit_cast(unsigned short, __e01) && v[ 2] == 
__builtin_bit_cast(unsigned short, __e02) && v[ 3] == 
__builtin_bit_cast(unsigned short, __e03) &&
+         v[ 4] == __builtin_bit_cast(unsigned short, __e04) && v[ 5] == 
__builtin_bit_cast(unsigned short, __e05) && v[ 6] == 
__builtin_bit_cast(unsigned short, __e06) && v[ 7] == 
__builtin_bit_cast(unsigned short, __e07) &&
+         v[ 8] == __builtin_bit_cast(unsigned short, __e08) && v[ 9] == 
__builtin_bit_cast(unsigned short, __e09) && v[10] == 
__builtin_bit_cast(unsigned short, __e10) && v[11] == 
__builtin_bit_cast(unsigned short, __e11) &&
+         v[12] == __builtin_bit_cast(unsigned short, __e12) && v[13] == 
__builtin_bit_cast(unsigned short, __e13) && v[14] == 
__builtin_bit_cast(unsigned short, __e14) && v[15] == 
__builtin_bit_cast(unsigned short, __e15);
+}
+#endif
+
+constexpr bool match_m256i(__m256i _v, unsigned long long a, unsigned long 
long b, unsigned long long c, unsigned long long d) {
+  __v4du v = (__v4du)_v;
+  return v[0] == a && v[1] == b && v[2] == c && v[3] == d;
+}
+
+constexpr bool match_v4di(__m256i _v, long long a, long long b, long long c, 
long long d) {
+  __v4di v = (__v4di)_v;
+  return v[0] == a && v[1] == b && v[2] == c && v[3] == d;
+}
+
+constexpr bool match_v8si(__m256i _v, int a, int b, int c, int d, int e, int 
f, int g, int h) {
+  __v8si v = (__v8si)_v;
+  return v[0] == a && v[1] == b && v[2] == c && v[3] == d && v[4] == e && v[5] 
== f && v[6] == g && v[7] == h;
+}
+
+constexpr bool match_v8su(__m256i _v, unsigned a, unsigned b, unsigned c, 
unsigned d, unsigned e, unsigned f, unsigned g, unsigned h) {
+  __v8su v = (__v8su)_v;
+  return v[0] == a && v[1] == b && v[2] == c && v[3] == d && v[4] == e && v[5] 
== f && v[6] == g && v[7] == h;
+}
+
+constexpr bool match_v16hi(__m256i _v, short a, short b, short c, short d, 
short e, short f, short g, short h, short i, short j, short k, short l, short 
m, short n, short o, short p) {
+  __v16hi v = (__v16hi)_v;
+  return v[0] == a && v[1] == b && v[2] == c && v[3] == d && v[4] == e && v[5] 
== f && v[6] == g && v[7] == h && v[8] == i && v[9] == j && v[10] == k && v[11] 
== l && v[12] == m && v[13] == n && v[14] == o && v[15] == p;
+}
+
+constexpr bool match_v16hu(__m256i _v, unsigned short a, unsigned short b, 
unsigned short c, unsigned short d, unsigned short e, unsigned short f, 
unsigned short g, unsigned short h, unsigned short i, unsigned short j, 
unsigned short k, unsigned short l, unsigned short m, unsigned short n, 
unsigned short o, unsigned short p) {
+  __v16hu v = (__v16hu)_v;
+  return v[0] == a && v[1] == b && v[2] == c && v[3] == d && v[4] == e && v[5] 
== f && v[6] == g && v[7] == h && v[8] == i && v[9] == j && v[10] == k && v[11] 
== l && v[12] == m && v[13] == n && v[14] == o && v[15] == p;
+}
+
+constexpr bool match_v32qi(__m256i _v, signed char __b00, signed char __b01, 
signed char __b02, signed char __b03, signed char __b04, signed char __b05, 
signed char __b06, signed char __b07,
+                                       signed char __b08, signed char __b09, 
signed char __b10, signed char __b11, signed char __b12, signed char __b13, 
signed char __b14, signed char __b15,
+                                       signed char __b16, signed char __b17, 
signed char __b18, signed char __b19, signed char __b20, signed char __b21, 
signed char __b22, signed char __b23,
+                                       signed char __b24, signed char __b25, 
signed char __b26, signed char __b27, signed char __b28, signed char __b29, 
signed char __b30, signed char __b31) {
+  __v32qs v = (__v32qs)_v;
+  return v[ 0] == __b00 && v[ 1] == __b01 && v[ 2] == __b02 && v[ 3] == __b03 
&& v[ 4] == __b04 && v[ 5] == __b05 && v[ 6] == __b06 && v[ 7] ==  __b07 &&
+         v[ 8] == __b08 && v[ 9] == __b09 && v[10] == __b10 && v[11] == __b11 
&& v[12] == __b12 && v[13] == __b13 && v[14] == __b14 && v[15] ==  __b15 &&
+         v[16] == __b16 && v[17] == __b17 && v[18] == __b18 && v[19] == __b19 
&& v[20] == __b20 && v[21] == __b21 && v[22] == __b22 && v[23] ==  __b23 &&
+         v[24] == __b24 && v[25] == __b25 && v[26] == __b26 && v[27] == __b27 
&& v[28] == __b28 && v[29] == __b29 && v[30] == __b30 && v[31] ==  __b31;
+}
+
+constexpr bool match_v32qu(__m256i _v, unsigned char __b00, unsigned char 
__b01, unsigned char __b02, unsigned char __b03, unsigned char __b04, unsigned 
char __b05, unsigned char __b06, unsigned char __b07,
+                                       unsigned char __b08, unsigned char 
__b09, unsigned char __b10, unsigned char __b11, unsigned char __b12, unsigned 
char __b13, unsigned char __b14, unsigned char __b15,
+                                       unsigned char __b16, unsigned char 
__b17, unsigned char __b18, unsigned char __b19, unsigned char __b20, unsigned 
char __b21, unsigned char __b22, unsigned char __b23,
+                                       unsigned char __b24, unsigned char 
__b25, unsigned char __b26, unsigned char __b27, unsigned char __b28, unsigned 
char __b29, unsigned char __b30, unsigned char __b31) {
+  __v32qu v = (__v32qu)_v;
+  return v[ 0] == __b00 && v[ 1] == __b01 && v[ 2] == __b02 && v[ 3] == __b03 
&& v[ 4] == __b04 && v[ 5] == __b05 && v[ 6] == __b06 && v[ 7] ==  __b07 &&
+         v[ 8] == __b08 && v[ 9] == __b09 && v[10] == __b10 && v[11] == __b11 
&& v[12] == __b12 && v[13] == __b13 && v[14] == __b14 && v[15] ==  __b15 &&
+         v[16] == __b16 && v[17] == __b17 && v[18] == __b18 && v[19] == __b19 
&& v[20] == __b20 && v[21] == __b21 && v[22] == __b22 && v[23] ==  __b23 &&
+         v[24] == __b24 && v[25] == __b25 && v[26] == __b26 && v[27] == __b27 
&& v[28] == __b28 && v[29] == __b29 && v[30] == __b30 && v[31] ==  __b31;
+}
+
+constexpr bool match_m512(__m512 _v, float __e00, float __e01, float __e02, 
float __e03, float __e04, float __e05, float __e06, float __e07, float __e08, 
float __e09, float __e10, float __e11, float __e12, float __e13, float __e14, 
float __e15) {
+  __v16su v = (__v16su)_v;
+  return v[ 0] == __builtin_bit_cast(unsigned, __e00) && v[ 1] == 
__builtin_bit_cast(unsigned, __e01) && v[ 2] == __builtin_bit_cast(unsigned, 
__e02) && v[ 3] == __builtin_bit_cast(unsigned, __e03) &&
+         v[ 4] == __builtin_bit_cast(unsigned, __e04) && v[ 5] == 
__builtin_bit_cast(unsigned, __e05) && v[ 6] == __builtin_bit_cast(unsigned, 
__e06) && v[ 7] == __builtin_bit_cast(unsigned, __e07) &&
+         v[ 8] == __builtin_bit_cast(unsigned, __e08) && v[ 9] == 
__builtin_bit_cast(unsigned, __e09) && v[10] == __builtin_bit_cast(unsigned, 
__e10) && v[11] == __builtin_bit_cast(unsigned, __e11) &&
+         v[12] == __builtin_bit_cast(unsigned, __e12) && v[13] == 
__builtin_bit_cast(unsigned, __e13) && v[14] == __builtin_bit_cast(unsigned, 
__e14) && v[15] == __builtin_bit_cast(unsigned, __e15);
+}
+
+constexpr bool match_m512d(__m512d _v, double __e00, double __e01, double 
__e02, double __e03, double __e04, double __e05, double __e06, double __e07) {
+  __v8du v = (__v8du)_v;
+  return v[ 0] == __builtin_bit_cast(unsigned long long, __e00) && v[ 1] == 
__builtin_bit_cast(unsigned long long, __e01) && v[ 2] == 
__builtin_bit_cast(unsigned long long, __e02) && v[ 3] == 
__builtin_bit_cast(unsigned long long, __e03) &&
+         v[ 4] == __builtin_bit_cast(unsigned long long, __e04) && v[ 5] == 
__builtin_bit_cast(unsigned long long, __e05) && v[ 6] == 
__builtin_bit_cast(unsigned long long, __e06) && v[ 7] == 
__builtin_bit_cast(unsigned long long, __e07);
+}
+
+#ifdef __SSE2__
+constexpr bool match_m512h(__m512h _v, _Float16 __e00, _Float16 __e01, 
_Float16 __e02, _Float16 __e03, _Float16 __e04, _Float16 __e05, _Float16 __e06, 
_Float16 __e07,
+                                       _Float16 __e08, _Float16 __e09, 
_Float16 __e10, _Float16 __e11, _Float16 __e12, _Float16 __e13, _Float16 __e14, 
_Float16 __e15,
+                                       _Float16 __e16, _Float16 __e17, 
_Float16 __e18, _Float16 __e19, _Float16 __e20, _Float16 __e21, _Float16 __e22, 
_Float16 __e23,
+                                       _Float16 __e24, _Float16 __e25, 
_Float16 __e26, _Float16 __e27, _Float16 __e28, _Float16 __e29, _Float16 __e30, 
_Float16 __e31) {
+  __v32hu v = (__v32hu)_v;
+  return v[ 0] == __builtin_bit_cast(unsigned short, __e00) && v[ 1] == 
__builtin_bit_cast(unsigned short, __e01) && v[ 2] == 
__builtin_bit_cast(unsigned short, __e02) && v[ 3] == 
__builtin_bit_cast(unsigned short, __e03) &&
+         v[ 4] == __builtin_bit_cast(unsigned short, __e04) && v[ 5] == 
__builtin_bit_cast(unsigned short, __e05) && v[ 6] == 
__builtin_bit_cast(unsigned short, __e06) && v[ 7] == 
__builtin_bit_cast(unsigned short, __e07) &&
+         v[ 8] == __builtin_bit_cast(unsigned short, __e08) && v[ 9] == 
__builtin_bit_cast(unsigned short, __e09) && v[10] == 
__builtin_bit_cast(unsigned short, __e10) && v[11] == 
__builtin_bit_cast(unsigned short, __e11) &&
+         v[12] == __builtin_bit_cast(unsigned short, __e12) && v[13] == 
__builtin_bit_cast(unsigned short, __e13) && v[14] == 
__builtin_bit_cast(unsigned short, __e14) && v[15] == 
__builtin_bit_cast(unsigned short, __e15) &&
+         v[16] == __builtin_bit_cast(unsigned short, __e16) && v[17] == 
__builtin_bit_cast(unsigned short, __e17) && v[18] == 
__builtin_bit_cast(unsigned short, __e18) && v[19] == 
__builtin_bit_cast(unsigned short, __e19) &&
+         v[20] == __builtin_bit_cast(unsigned short, __e20) && v[21] == 
__builtin_bit_cast(unsigned short, __e21) && v[22] == 
__builtin_bit_cast(unsigned short, __e22) && v[23] == 
__builtin_bit_cast(unsigned short, __e23) &&
+         v[24] == __builtin_bit_cast(unsigned short, __e24) && v[25] == 
__builtin_bit_cast(unsigned short, __e25) && v[26] == 
__builtin_bit_cast(unsigned short, __e26) && v[27] == 
__builtin_bit_cast(unsigned short, __e27) &&
+         v[28] == __builtin_bit_cast(unsigned short, __e28) && v[29] == 
__builtin_bit_cast(unsigned short, __e29) && v[30] == 
__builtin_bit_cast(unsigned short, __e30) && v[31] == 
__builtin_bit_cast(unsigned short, __e31);
+}
+#endif
+
+constexpr bool match_m512i(__m512i _v, unsigned long long a, unsigned long 
long b, unsigned long long c, unsigned long long d, unsigned long long e, 
unsigned long long f, unsigned long long g, unsigned long long h) {
+  __v8du v = (__v8du)_v;
+  return v[0] == a && v[1] == b && v[2] == c && v[3] == d && v[4] == e && v[5] 
== f && v[6] == g && v[7] == h;
+}
+
+constexpr bool match_v8di(__m512i _v, long long a, long long b, long long c, 
long long d, long long e, long long f, long long g, long long h) {
+  __v8di v = (__v8di)_v;
+  return v[0] == a && v[1] == b && v[2] == c && v[3] == d && v[4] == e && v[5] 
== f && v[6] == g && v[7] == h;
+}
+
+constexpr bool match_v16si(__m512i _v, int a, int b, int c, int d, int e, int 
f, int g, int h, int i, int j, int k, int l, int m, int n, int o, int p) {
+  __v16si v = (__v16si)_v;
+  return v[0] == a && v[1] == b && v[2] == c && v[3] == d && v[4] == e && v[5] 
== f && v[6] == g && v[7] == h && v[8] == i && v[9] == j && v[10] == k && v[11] 
== l && v[12] == m && v[13] == n && v[14] == o && v[15] == p;
+}
+
+constexpr bool match_v16su(__m512i _v, unsigned int a, unsigned int b, 
unsigned int c, unsigned int d, unsigned int e, unsigned int f, unsigned int g, 
unsigned int h, unsigned int i, unsigned int j, unsigned int k, unsigned int l, 
unsigned int m, unsigned int n, unsigned int o, unsigned int p) {
+  __v16su v = (__v16su)_v;
+  return v[0] == a && v[1] == b && v[2] == c && v[3] == d && v[4] == e && v[5] 
== f && v[6] == g && v[7] == h && v[8] == i && v[9] == j && v[10] == k && v[11] 
== l && v[12] == m && v[13] == n && v[14] == o && v[15] == p;
+}
+
+constexpr bool match_v32hi(__m512i _v, short __e00, short __e01, short __e02, 
short __e03, short __e04, short __e05, short __e06, short __e07,
+                                       short __e08, short __e09, short __e10, 
short __e11, short __e12, short __e13, short __e14, short __e15,
+                                       short __e16, short __e17, short __e18, 
short __e19, short __e20, short __e21, short __e22, short __e23,
+                                       short __e24, short __e25, short __e26, 
short __e27, short __e28, short __e29, short __e30, short __e31) {
+  __v32hi v = (__v32hi)_v;
+  return v[ 0] == __e00 && v[ 1] == __e01 && v[ 2] == __e02 && v[ 3] == __e03 
&& v[ 4] == __e04 && v[ 5] == __e05 && v[ 6] == __e06 && v[ 7] ==  __e07 &&
+         v[ 8] == __e08 && v[ 9] == __e09 && v[10] == __e10 && v[11] == __e11 
&& v[12] == __e12 && v[13] == __e13 && v[14] == __e14 && v[15] ==  __e15 &&
+         v[16] == __e16 && v[17] == __e17 && v[18] == __e18 && v[19] == __e19 
&& v[20] == __e20 && v[21] == __e21 && v[22] == __e22 && v[23] ==  __e23 &&
+         v[24] == __e24 && v[25] == __e25 && v[26] == __e26 && v[27] == __e27 
&& v[28] == __e28 && v[29] == __e29 && v[30] == __e30 && v[31] ==  __e31;
+}
+
+constexpr bool match_v32hu(__m512i _v, unsigned short __e00, unsigned short 
__e01, unsigned short __e02, unsigned short __e03, unsigned short __e04, 
unsigned short __e05, unsigned short __e06, unsigned short __e07,
+                                       unsigned short __e08, unsigned short 
__e09, unsigned short __e10, unsigned short __e11, unsigned short __e12, 
unsigned short __e13, unsigned short __e14, unsigned short __e15,
+                                       unsigned short __e16, unsigned short 
__e17, unsigned short __e18, unsigned short __e19, unsigned short __e20, 
unsigned short __e21, unsigned short __e22, unsigned short __e23,
+                                       unsigned short __e24, unsigned short 
__e25, unsigned short __e26, unsigned short __e27, unsigned short __e28, 
unsigned short __e29, unsigned short __e30, unsigned short __e31) {
+  __v32hu v = (__v32hu)_v;
+  return v[ 0] == __e00 && v[ 1] == __e01 && v[ 2] == __e02 && v[ 3] == __e03 
&& v[ 4] == __e04 && v[ 5] == __e05 && v[ 6] == __e06 && v[ 7] ==  __e07 &&
+         v[ 8] == __e08 && v[ 9] == __e09 && v[10] == __e10 && v[11] == __e11 
&& v[12] == __e12 && v[13] == __e13 && v[14] == __e14 && v[15] ==  __e15 &&
+         v[16] == __e16 && v[17] == __e17 && v[18] == __e18 && v[19] == __e19 
&& v[20] == __e20 && v[21] == __e21 && v[22] == __e22 && v[23] ==  __e23 &&
+         v[24] == __e24 && v[25] == __e25 && v[26] == __e26 && v[27] == __e27 
&& v[28] == __e28 && v[29] == __e29 && v[30] == __e30 && v[31] ==  __e31;
+}
+
+constexpr bool match_v64qi(__m512i _v, signed char __e00, signed char __e01, 
signed char __e02, signed char __e03, signed char __e04, signed char __e05, 
signed char __e06, signed char __e07,
+                                       signed char __e08, signed char __e09, 
signed char __e10, signed char __e11, signed char __e12, signed char __e13, 
signed char __e14, signed char __e15,
+                                       signed char __e16, signed char __e17, 
signed char __e18, signed char __e19, signed char __e20, signed char __e21, 
signed char __e22, signed char __e23,
+                                       signed char __e24, signed char __e25, 
signed char __e26, signed char __e27, signed char __e28, signed char __e29, 
signed char __e30, signed char __e31,
+                                       signed char __e32, signed char __e33, 
signed char __e34, signed char __e35, signed char __e36, signed char __e37, 
signed char __e38, signed char __e39,
+                                       signed char __e40, signed char __e41, 
signed char __e42, signed char __e43, signed char __e44, signed char __e45, 
signed char __e46, signed char __e47,
+                                       signed char __e48, signed char __e49, 
signed char __e50, signed char __e51, signed char __e52, signed char __e53, 
signed char __e54, signed char __e55,
+                                       signed char __e56, signed char __e57, 
signed char __e58, signed char __e59, signed char __e60, signed char __e61, 
signed char __e62, signed char __e63) {
+  __v64qs v = (__v64qs)_v;
+  return v[ 0] == __e00 && v[ 1] == __e01 && v[ 2] == __e02 && v[ 3] == __e03 
&& v[ 4] == __e04 && v[ 5] == __e05 && v[ 6] == __e06 && v[ 7] == __e07 &&
+         v[ 8] == __e08 && v[ 9] == __e09 && v[10] == __e10 && v[11] == __e11 
&& v[12] == __e12 && v[13] == __e13 && v[14] == __e14 && v[15] == __e15 &&
+         v[16] == __e16 && v[17] == __e17 && v[18] == __e18 && v[19] == __e19 
&& v[20] == __e20 && v[21] == __e21 && v[22] == __e22 && v[23] == __e23 &&
+         v[24] == __e24 && v[25] == __e25 && v[26] == __e26 && v[27] == __e27 
&& v[28] == __e28 && v[29] == __e29 && v[30] == __e30 && v[31] == __e31 &&
+         v[32] == __e32 && v[33] == __e33 && v[34] == __e34 && v[35] == __e35 
&& v[36] == __e36 && v[37] == __e37 && v[38] == __e38 && v[39] == __e39 &&
+         v[40] == __e40 && v[41] == __e41 && v[42] == __e42 && v[43] == __e43 
&& v[44] == __e44 && v[45] == __e45 && v[46] == __e46 && v[47] == __e47 &&
+         v[48] == __e48 && v[49] == __e49 && v[50] == __e50 && v[51] == __e51 
&& v[52] == __e52 && v[53] == __e53 && v[54] == __e54 && v[55] == __e55 &&
+         v[56] == __e56 && v[57] == __e57 && v[58] == __e58 && v[59] == __e59 
&& v[60] == __e60 && v[61] == __e61 && v[62] == __e62 && v[63] == __e63;
+}
+
+constexpr bool match_v64qu(__m512i _v, unsigned char __e00, unsigned char 
__e01, unsigned char __e02, unsigned char __e03, unsigned char __e04, unsigned 
char __e05, unsigned char __e06, unsigned char __e07,
+                                       unsigned char __e08, unsigned char 
__e09, unsigned char __e10, unsigned char __e11, unsigned char __e12, unsigned 
char __e13, unsigned char __e14, unsigned char __e15,
+                                       unsigned char __e16, unsigned char 
__e17, unsigned char __e18, unsigned char __e19, unsigned char __e20, unsigned 
char __e21, unsigned char __e22, unsigned char __e23,
+                                       unsigned char __e24, unsigned char 
__e25, unsigned char __e26, unsigned char __e27, unsigned char __e28, unsigned 
char __e29, unsigned char __e30, unsigned char __e31,
+                                       unsigned char __e32, unsigned char 
__e33, unsigned char __e34, unsigned char __e35, unsigned char __e36, unsigned 
char __e37, unsigned char __e38, unsigned char __e39,
+                                       unsigned char __e40, unsigned char 
__e41, unsigned char __e42, unsigned char __e43, unsigned char __e44, unsigned 
char __e45, unsigned char __e46, unsigned char __e47,
+                                       unsigned char __e48, unsigned char 
__e49, unsigned char __e50, unsigned char __e51, unsigned char __e52, unsigned 
char __e53, unsigned char __e54, unsigned char __e55,
+                                       unsigned char __e56, unsigned char 
__e57, unsigned char __e58, unsigned char __e59, unsigned char __e60, unsigned 
char __e61, unsigned char __e62, unsigned char __e63) {
+  __v64qu v = (__v64qu)_v;
+  return v[ 0] == __e00 && v[ 1] == __e01 && v[ 2] == __e02 && v[ 3] == __e03 
&& v[ 4] == __e04 && v[ 5] == __e05 && v[ 6] == __e06 && v[ 7] == __e07 &&
+         v[ 8] == __e08 && v[ 9] == __e09 && v[10] == __e10 && v[11] == __e11 
&& v[12] == __e12 && v[13] == __e13 && v[14] == __e14 && v[15] == __e15 &&
+         v[16] == __e16 && v[17] == __e17 && v[18] == __e18 && v[19] == __e19 
&& v[20] == __e20 && v[21] == __e21 && v[22] == __e22 && v[23] == __e23 &&
+         v[24] == __e24 && v[25] == __e25 && v[26] == __e26 && v[27] == __e27 
&& v[28] == __e28 && v[29] == __e29 && v[30] == __e30 && v[31] == __e31 &&
+         v[32] == __e32 && v[33] == __e33 && v[34] == __e34 && v[35] == __e35 
&& v[36] == __e36 && v[37] == __e37 && v[38] == __e38 && v[39] == __e39 &&
+         v[40] == __e40 && v[41] == __e41 && v[42] == __e42 && v[43] == __e43 
&& v[44] == __e44 && v[45] == __e45 && v[46] == __e46 && v[47] == __e47 &&
+         v[48] == __e48 && v[49] == __e49 && v[50] == __e50 && v[51] == __e51 
&& v[52] == __e52 && v[53] == __e53 && v[54] == __e54 && v[55] == __e55 &&
+         v[56] == __e56 && v[57] == __e57 && v[58] == __e58 && v[59] == __e59 
&& v[60] == __e60 && v[61] == __e61 && v[62] == __e62 && v[63] == __e63;
+}
+
+#define TEST_CONSTEXPR(...) static_assert(__VA_ARGS__)
+
+#else
+
+#define TEST_CONSTEXPR(...)
+
+#endif
diff --git a/clang/test/CIR/CodeGen/X86/xop-builtin.c 
b/clang/test/CIR/CodeGen/X86/xop-builtin.c
new file mode 100644
index 0000000000000..378e8c71fa378
--- /dev/null
+++ b/clang/test/CIR/CodeGen/X86/xop-builtin.c
@@ -0,0 +1,82 @@
+// RUN: %clang_cc1 -x c -flax-vector-conversions=none -ffreestanding %s 
-triple=x86_64-apple-darwin -target-feature +xop -emit-llvm -o - -Wall -Werror 
| FileCheck %s
+// RUN: %clang_cc1 -x c -flax-vector-conversions=none -ffreestanding %s 
-triple=x86_64-apple-darwin -target-feature +xop -fno-signed-char -emit-llvm -o 
- -Wall -Werror | FileCheck %s
+// RUN: %clang_cc1 -x c -flax-vector-conversions=none -ffreestanding %s 
-triple=i386-apple-darwin -target-feature +xop -emit-llvm -o - -Wall -Werror | 
FileCheck %s
+// RUN: %clang_cc1 -x c -flax-vector-conversions=none -ffreestanding %s 
-triple=i386-apple-darwin -target-feature +xop -fno-signed-char -emit-llvm -o - 
-Wall -Werror | FileCheck %s
+// RUN: %clang_cc1 -x c++ -flax-vector-conversions=none -ffreestanding %s 
-triple=x86_64-apple-darwin -target-feature +xop -emit-llvm -o - -Wall -Werror 
| FileCheck %s
+// RUN: %clang_cc1 -x c++ -flax-vector-conversions=none -ffreestanding %s 
-triple=x86_64-apple-darwin -target-feature +xop -fno-signed-char -emit-llvm -o 
- -Wall -Werror | FileCheck %s
+// RUN: %clang_cc1 -x c++ -flax-vector-conversions=none -ffreestanding %s 
-triple=i386-apple-darwin -target-feature +xop -emit-llvm -o - -Wall -Werror | 
FileCheck %s
+// RUN: %clang_cc1 -x c++ -flax-vector-conversions=none -ffreestanding %s 
-triple=i386-apple-darwin -target-feature +xop -fno-signed-char -emit-llvm -o - 
-Wall -Werror | FileCheck %s
+
+// RUN: %clang_cc1 -x c -flax-vector-conversions=none -ffreestanding %s 
-triple=x86_64-apple-darwin -target-feature +xop -emit-llvm -o - -Wall -Werror 
-fexperimental-new-constant-interpreter | FileCheck %s
+// RUN: %clang_cc1 -x c -flax-vector-conversions=none -ffreestanding %s 
-triple=x86_64-apple-darwin -target-feature +xop -fno-signed-char -emit-llvm -o 
- -Wall -Werror -fexperimental-new-constant-interpreter | FileCheck %s
+// RUN: %clang_cc1 -x c -flax-vector-conversions=none -ffreestanding %s 
-triple=i386-apple-darwin -target-feature +xop -emit-llvm -o - -Wall -Werror 
-fexperimental-new-constant-interpreter | FileCheck %s
+// RUN: %clang_cc1 -x c -flax-vector-conversions=none -ffreestanding %s 
-triple=i386-apple-darwin -target-feature +xop -fno-signed-char -emit-llvm -o - 
-Wall -Werror -fexperimental-new-constant-interpreter | FileCheck %s
+// RUN: %clang_cc1 -x c++ -flax-vector-conversions=none -ffreestanding %s 
-triple=x86_64-apple-darwin -target-feature +xop -emit-llvm -o - -Wall -Werror 
-fexperimental-new-constant-interpreter | FileCheck %s
+// RUN: %clang_cc1 -x c++ -flax-vector-conversions=none -ffreestanding %s 
-triple=x86_64-apple-darwin -target-feature +xop -fno-signed-char -emit-llvm -o 
- -Wall -Werror -fexperimental-new-constant-interpreter | FileCheck %s
+// RUN: %clang_cc1 -x c++ -flax-vector-conversions=none -ffreestanding %s 
-triple=i386-apple-darwin -target-feature +xop -emit-llvm -o - -Wall -Werror 
-fexperimental-new-constant-interpreter | FileCheck %s
+// RUN: %clang_cc1 -x c++ -flax-vector-conversions=none -ffreestanding %s 
-triple=i386-apple-darwin -target-feature +xop -fno-signed-char -emit-llvm -o - 
-Wall -Werror -fexperimental-new-constant-interpreter | FileCheck %s
+
+
+#include <x86intrin.h>
+#include "builtin_test_helpers.h"
+
+// This test mimics clang/test/CodeGen/X86/xop-builtins.c, which eventually
+// CIR shall be able to support fully.
+
+__m128i test_mm_rot_epi8(__m128i a, __m128i b) {
+  // CHECK-LABEL: test_mm_rot_epi8
+  // CHECK: call <16 x i8> @llvm.fshl.v16i8(<16 x i8> %{{.*}}, <16 x i8> 
%{{.*}}, <16 x i8> %{{.*}})
+  return _mm_rot_epi8(a, b);
+}
+TEST_CONSTEXPR(match_v16qi(_mm_rot_epi8((__m128i)(__v16qs){15, -14, -13, -12, 
11, 10, 9, 8, 7, 6, 5, -4, 3, -2, 1, 0}, (__m128i)(__v16qs){0, 1, -2, 3, -4, 5, 
-6, 7, -8, 9, -10, 11, -12, 13, -14, 15}), 15, -27, -4, -89, -80, 65, 36, 4, 7, 
12, 65, -25, 48, -33, 4, 0));
+
+__m128i test_mm_rot_epi16(__m128i a, __m128i b) {
+  // CHECK-LABEL: test_mm_rot_epi16
+  // CHECK: call <8 x i16> @llvm.fshl.v8i16(<8 x i16> %{{.*}}, <8 x i16> 
%{{.*}}, <8 x i16> %{{.*}})
+  return _mm_rot_epi16(a, b);
+}
+TEST_CONSTEXPR(match_v8hi(_mm_rot_epi16((__m128i)(__v8hi){7, 6, 5, -4, 3, -2, 
1, 0}, (__m128i)(__v8hi){0, 1, -2, 3, -4, 5, -6, 7}), 7, 12, 16385, -25, 12288, 
-33, 1024, 0));
+
+__m128i test_mm_rot_epi32(__m128i a, __m128i b) {
+  // CHECK-LABEL: test_mm_rot_epi32
+  // CHECK: call <4 x i32> @llvm.fshl.v4i32(<4 x i32> %{{.*}}, <4 x i32> 
%{{.*}}, <4 x i32> %{{.*}})
+  return _mm_rot_epi32(a, b);
+}
+TEST_CONSTEXPR(match_v4si(_mm_rot_epi32((__m128i)(__v4si){3, -2, 1, 0}, 
(__m128i)(__v4si){0, 1, -2, 3}), 3, -3, 1073741824, 0));
+
+__m128i test_mm_rot_epi64(__m128i a, __m128i b) {
+  // CHECK-LABEL: test_mm_rot_epi64
+  // CHECK: call {{.*}}<2 x i64> @llvm.fshl.v2i64(<2 x i64> %{{.*}}, <2 x i64> 
%{{.*}}, <2 x i64> %{{.*}})
+  return _mm_rot_epi64(a, b);
+}
+TEST_CONSTEXPR(match_v2di(_mm_rot_epi64((__m128i)(__v2di){99, -55}, 
(__m128i)(__v2di){1, -2}), 198, 9223372036854775794LL));
+
+__m128i test_mm_roti_epi8(__m128i a) {
+  // CHECK-LABEL: test_mm_roti_epi8
+  // CHECK: call <16 x i8> @llvm.fshl.v16i8(<16 x i8> %{{.*}}, <16 x i8> 
%{{.*}}, <16 x i8> splat (i8 1))
+  return _mm_roti_epi8(a, 1);
+}
+TEST_CONSTEXPR(match_v16qi(_mm_roti_epi8(((__m128i)(__v16qs){0, 1, -2, 3, -4, 
5, -6, 7, -8, 9, -10, 11, -12, 13, -14, 15}), 3), 0, 8, -9, 24, -25, 40, -41, 
56, -57, 72, -73, 88, -89, 104, -105, 120));
+
+__m128i test_mm_roti_epi16(__m128i a) {
+  // CHECK-LABEL: test_mm_roti_epi16
+  // CHECK: call <8 x i16> @llvm.fshl.v8i16(<8 x i16> %{{.*}}, <8 x i16> 
%{{.*}}, <8 x i16> splat (i16 50))
+  return _mm_roti_epi16(a, 50);
+}
+TEST_CONSTEXPR(match_v8hi(_mm_roti_epi16(((__m128i)(__v8hi){2, -3, 4, -5, 6, 
-7, 8, -9}), 1), 4, -5, 8, -9, 12, -13, 16, -17));
+
+__m128i test_mm_roti_epi32(__m128i a) {
+  // CHECK-LABEL: test_mm_roti_epi32
+  // CHECK: call <4 x i32> @llvm.fshl.v4i32(<4 x i32> %{{.*}}, <4 x i32> 
%{{.*}}, <4 x i32> splat (i32 226))
+  return _mm_roti_epi32(a, -30);
+}
+TEST_CONSTEXPR(match_v4si(_mm_roti_epi32(((__m128i)(__v4si){1, -2, 3, -4}), 
5), 32, -33, 96, -97));
+
+__m128i test_mm_roti_epi64(__m128i a) {
+  // CHECK-LABEL: test_mm_roti_epi64
+  // CHECK: call {{.*}}<2 x i64> @llvm.fshl.v2i64(<2 x i64> %{{.*}}, <2 x i64> 
%{{.*}}, <2 x i64> splat (i64 100))
+  return _mm_roti_epi64(a, 100);
+}
+TEST_CONSTEXPR(match_v2di(_mm_roti_epi64(((__m128i)(__v2di){99, -55}), 19), 
51904512, -28311553));
+
+

>From 54a6e7e292edf8e9d85f4eee431b67ffc3414b55 Mon Sep 17 00:00:00 2001
From: Omar Ibrahim <[email protected]>
Date: Sun, 23 Nov 2025 15:47:29 +0100
Subject: [PATCH 3/8] remove local file, fix formatting issues

---
 clang/lib/CIR/CodeGen/CIRGenBuiltinX86.cpp | 17 ++++++++---------
 1 file changed, 8 insertions(+), 9 deletions(-)

diff --git a/clang/lib/CIR/CodeGen/CIRGenBuiltinX86.cpp 
b/clang/lib/CIR/CodeGen/CIRGenBuiltinX86.cpp
index f5039a8a016a0..95636e585271d 100644
--- a/clang/lib/CIR/CodeGen/CIRGenBuiltinX86.cpp
+++ b/clang/lib/CIR/CodeGen/CIRGenBuiltinX86.cpp
@@ -22,12 +22,12 @@ using namespace clang;
 using namespace clang::CIRGen;
 
 template <typename... Operands>
-static mlir::Value
-emitIntrinsicCallOp(CIRGenFunction &cgf, const CallExpr *expr,
-                    const std::string &str, const mlir::Type &resTy,
-                    Operands &&...op) {
+static mlir::Value emitIntrinsicCallOp(CIRGenFunction &cgf, const CallExpr *e,
+                                       const std::string &str,
+                                       const mlir::Type &resTy,
+                                       Operands &&...op) {
   CIRGenBuilderTy &builder = cgf.getBuilder();
-  mlir::Location location = cgf.getLoc(expr->getExprLoc());
+  mlir::Location location = cgf.getLoc(e->getExprLoc());
   return cir::LLVMIntrinsicCallOp::create(builder, location,
                                           builder.getStringAttr(str), resTy,
                                           std::forward<Operands>(op)...)
@@ -130,15 +130,14 @@ mlir::Value CIRGenFunction::emitX86BuiltinExpr(unsigned 
builtinID,
   // evaluation.
   assert(!cir::MissingFeatures::msvcBuiltins());
 
-  // Find out if any arguments are required to be integer constant
-  // expressions.
+  // Find out if any arguments are required to be integer constant expressions.
   assert(!cir::MissingFeatures::handleBuiltinICEArguments());
 
   // The operands of the builtin call
   llvm::SmallVector<mlir::Value> ops;
 
-  // `ICEArguments` is a bitmap indicating whether the argument at the i-th
-  // bit is required to be a constant integer expression.
+  // `ICEArguments` is a bitmap indicating whether the argument at the i-th bit
+  // is required to be a constant integer expression.
   unsigned iceArguments = 0;
   ASTContext::GetBuiltinTypeError error;
   getContext().GetBuiltinType(builtinID, error, &iceArguments);

>From 84b8a6076c743cc02e642ef37686f4d7eaf4bdf4 Mon Sep 17 00:00:00 2001
From: Omar Ibrahim <[email protected]>
Date: Sun, 23 Nov 2025 15:55:17 +0100
Subject: [PATCH 4/8] remove local file

---
 shell.nix | 14 --------------
 1 file changed, 14 deletions(-)
 delete mode 100644 shell.nix

diff --git a/shell.nix b/shell.nix
deleted file mode 100644
index c30f6dc7b6928..0000000000000
--- a/shell.nix
+++ /dev/null
@@ -1,14 +0,0 @@
-let
-  nixpkgs = fetchTarball 
"https://github.com/NixOS/nixpkgs/tarball/nixos-24.05";;
-  pkgs = import nixpkgs { config = {}; overlays = []; };
-in
-
-
-pkgs.mkShellNoCC {
-  packages = with pkgs; [
-    cmake
-    ninja
-    llvmPackages_latest.llvm
-  ];
-stdenv = pkgs.clangStdenv;
-}

>From b5926a90c9ba50fc968e8397eccf5777bc4adb06 Mon Sep 17 00:00:00 2001
From: Omar Ibrahim <[email protected]>
Date: Mon, 24 Nov 2025 21:03:14 +0100
Subject: [PATCH 5/8] backup

---
 clang/lib/CIR/CodeGen/CIRGenBuiltinX86.cpp    |   4 +-
 .../CIR/CodeGen/X86/builtin_test_helpers.h    | 304 ------------------
 clang/test/CIR/CodeGen/X86/xop-builtin.c      | 108 ++-----
 3 files changed, 36 insertions(+), 380 deletions(-)
 delete mode 100644 clang/test/CIR/CodeGen/X86/builtin_test_helpers.h

diff --git a/clang/lib/CIR/CodeGen/CIRGenBuiltinX86.cpp 
b/clang/lib/CIR/CodeGen/CIRGenBuiltinX86.cpp
index 95636e585271d..a59280ea24f02 100644
--- a/clang/lib/CIR/CodeGen/CIRGenBuiltinX86.cpp
+++ b/clang/lib/CIR/CodeGen/CIRGenBuiltinX86.cpp
@@ -687,14 +687,14 @@ mlir::Value CIRGenFunction::emitX86BuiltinExpr(unsigned 
builtinID,
   case X86::BI__builtin_ia32_prolq128:
   case X86::BI__builtin_ia32_prolq256:
   case X86::BI__builtin_ia32_prolq512:
-    return emitX86FunnelShift(*this, expr, ops[0], ops[1], ops[1], false);
+    return emitX86FunnelShift(*this, expr, ops[0], ops[0], ops[1], false);
   case X86::BI__builtin_ia32_prord128:
   case X86::BI__builtin_ia32_prord256:
   case X86::BI__builtin_ia32_prord512:
   case X86::BI__builtin_ia32_prorq128:
   case X86::BI__builtin_ia32_prorq256:
   case X86::BI__builtin_ia32_prorq512:
-    return emitX86FunnelShift(*this, expr, ops[0], ops[1], ops[1], true);
+    return emitX86FunnelShift(*this, expr, ops[0], ops[0], ops[1], true);
   case X86::BI__builtin_ia32_selectb_128:
   case X86::BI__builtin_ia32_selectb_256:
   case X86::BI__builtin_ia32_selectb_512:
diff --git a/clang/test/CIR/CodeGen/X86/builtin_test_helpers.h 
b/clang/test/CIR/CodeGen/X86/builtin_test_helpers.h
deleted file mode 100644
index fcaf360626a2d..0000000000000
--- a/clang/test/CIR/CodeGen/X86/builtin_test_helpers.h
+++ /dev/null
@@ -1,304 +0,0 @@
-/* Helper methods for builtin intrinsic tests */
-
-#include <immintrin.h>
-
-#if defined(__cplusplus) && (__cplusplus >= 201103L)
-
-constexpr bool match_m64(__m64 _v, unsigned long long a) {
-  __v1du v = (__v1du)_v;
-  return v[0] == a;
-}
-
-constexpr bool match_v1di(__m64 v, long long a) {
-  return v[0] == a;
-}
-
-constexpr bool match_v1du(__m64 _v, unsigned long long a) {
-  __v1du v = (__v1du)_v;
-  return v[0] == a;
-}
-
-constexpr bool match_v2si(__m64 _v, int a, int b) {
-  __v2si v = (__v2si)_v;
-  return v[0] == a && v[1] == b;
-}
-
-constexpr bool match_v2su(__m64 _v, unsigned a, unsigned b) {
-  __v2su v = (__v2su)_v;
-  return v[0] == a && v[1] == b;
-}
-
-constexpr bool match_v4hi(__m64 _v, short a, short b, short c, short d) {
-  __v4hi v = (__v4hi)_v;
-  return v[0] == a && v[1] == b && v[2] == c && v[3] == d;
-}
-
-constexpr bool match_v4hu(__m64 _v, unsigned short a, unsigned short b, 
unsigned short c, unsigned short d) {
-  __v4hu v = (__v4hu)_v;
-  return v[0] == a && v[1] == b && v[2] == c && v[3] == d;
-}
-
-constexpr bool match_v8qi(__m64 _v, signed char a, signed char b, signed char 
c, signed char d, signed char e, signed char f, signed char g, signed char h) {
-  __v8qs v = (__v8qs)_v;
-  return v[0] == a && v[1] == b && v[2] == c && v[3] == d && v[4] == e && v[5] 
== f && v[6] == g && v[7] == h;
-}
-
-constexpr bool match_v8qu(__m64 _v, unsigned char a, unsigned char b, unsigned 
char c, unsigned char d, unsigned char e, unsigned char f, unsigned char g, 
unsigned char h) {
-  __v8qu v = (__v8qu)_v;
-  return v[0] == a && v[1] == b && v[2] == c && v[3] == d && v[4] == e && v[5] 
== f && v[6] == g && v[7] == h;
-}
-
-constexpr bool match_m128(__m128 _v, float a, float b, float c, float d) {
-  __v4su v = (__v4su)_v;
-  return v[0] == __builtin_bit_cast(unsigned, a) && v[1] == 
__builtin_bit_cast(unsigned, b) && v[2] == __builtin_bit_cast(unsigned, c) && 
v[3] == __builtin_bit_cast(unsigned, d);
-}
-
-constexpr bool match_m128d(__m128d _v, double a, double b) {
-  __v2du v = (__v2du)_v;
-  return v[0] == __builtin_bit_cast(unsigned long long, a) && v[1] == 
__builtin_bit_cast(unsigned long long, b);
-}
-
-#ifdef __SSE2__
-constexpr bool match_m128h(__m128h _v, _Float16 __e00, _Float16 __e01, 
_Float16 __e02, _Float16 __e03, _Float16 __e04, _Float16 __e05, _Float16 __e06, 
_Float16 __e07) {
-  __v8hu v = (__v8hu)_v;
-  return v[ 0] == __builtin_bit_cast(unsigned short, __e00) && v[ 1] == 
__builtin_bit_cast(unsigned short, __e01) && v[ 2] == 
__builtin_bit_cast(unsigned short, __e02) && v[ 3] == 
__builtin_bit_cast(unsigned short, __e03) &&
-         v[ 4] == __builtin_bit_cast(unsigned short, __e04) && v[ 5] == 
__builtin_bit_cast(unsigned short, __e05) && v[ 6] == 
__builtin_bit_cast(unsigned short, __e06) && v[ 7] == 
__builtin_bit_cast(unsigned short, __e07);
-}
-#endif
-
-constexpr bool match_m128i(__m128i _v, unsigned long long a, unsigned long 
long b) {
-  __v2du v = (__v2du)_v;
-  return v[0] == a && v[1] == b;
-}
-
-constexpr bool match_v2di(__m128i v, long long a, long long b) {
-  return v[0] == a && v[1] == b;
-}
-
-constexpr bool match_v2du(__m128i _v, unsigned long long a, unsigned long long 
b) {
-  __v2du v = (__v2du)_v;
-  return v[0] == a && v[1] == b;
-}
-
-constexpr bool match_v4si(__m128i _v, int a, int b, int c, int d) {
-  __v4si v = (__v4si)_v;
-  return v[0] == a && v[1] == b && v[2] == c && v[3] == d;
-}
-
-constexpr bool match_v4su(__m128i _v, unsigned a, unsigned b, unsigned c, 
unsigned d) {
-  __v4su v = (__v4su)_v;
-  return v[0] == a && v[1] == b && v[2] == c && v[3] == d;
-}
-
-constexpr bool match_v8hi(__m128i _v, short a, short b, short c, short d, 
short e, short f, short g, short h) {
-  __v8hi v = (__v8hi)_v;
-  return v[0] == a && v[1] == b && v[2] == c && v[3] == d && v[4] == e && v[5] 
== f && v[6] == g && v[7] == h;
-}
-
-constexpr bool match_v8hu(__m128i _v, unsigned short a, unsigned short b, 
unsigned short c, unsigned short d, unsigned short e, unsigned short f, 
unsigned short g, unsigned short h) {
-  __v8hu v = (__v8hu)_v;
-  return v[0] == a && v[1] == b && v[2] == c && v[3] == d && v[4] == e && v[5] 
== f && v[6] == g && v[7] == h;
-}
-
-constexpr bool match_v16qi(__m128i _v, signed char a, signed char b, signed 
char c, signed char d, signed char e, signed char f, signed char g, signed char 
h, signed char i, signed char j, signed char k, signed char l, signed char m, 
signed char n, signed char o, signed char p) {
-  __v16qs v = (__v16qs)_v;
-  return v[0] == a && v[1] == b && v[2] == c && v[3] == d && v[4] == e && v[5] 
== f && v[6] == g && v[7] == h && v[8] == i && v[9] == j && v[10] == k && v[11] 
== l && v[12] == m && v[13] == n && v[14] == o && v[15] == p;
-}
-
-constexpr bool match_v16qu(__m128i _v, unsigned char a, unsigned char b, 
unsigned char c, unsigned char d, unsigned char e, unsigned char f, unsigned 
char g, unsigned char h, unsigned char i, unsigned char j, unsigned char k, 
unsigned char l, unsigned char m, unsigned char n, unsigned char o, unsigned 
char p) {
-  __v16qu v = (__v16qu)_v;
-  return v[0] == a && v[1] == b && v[2] == c && v[3] == d && v[4] == e && v[5] 
== f && v[6] == g && v[7] == h && v[8] == i && v[9] == j && v[10] == k && v[11] 
== l && v[12] == m && v[13] == n && v[14] == o && v[15] == p;
-}
-
-constexpr bool match_m256(__m256 _v, float __e00, float __e01, float __e02, 
float __e03, float __e04, float __e05, float __e06, float __e07) {
-  __v8su v = (__v8su)_v;
-  return v[ 0] == __builtin_bit_cast(unsigned, __e00) && v[ 1] == 
__builtin_bit_cast(unsigned, __e01) && v[ 2] == __builtin_bit_cast(unsigned, 
__e02) && v[ 3] == __builtin_bit_cast(unsigned, __e03) &&
-         v[ 4] == __builtin_bit_cast(unsigned, __e04) && v[ 5] == 
__builtin_bit_cast(unsigned, __e05) && v[ 6] == __builtin_bit_cast(unsigned, 
__e06) && v[ 7] == __builtin_bit_cast(unsigned, __e07);
-}
-
-constexpr bool match_m256d(__m256d _v, double a, double b, double c, double d) 
{
-  __v4du v = (__v4du)_v;
-  return v[0] == __builtin_bit_cast(unsigned long long, a) && v[1] == 
__builtin_bit_cast(unsigned long long, b) && v[2] == 
__builtin_bit_cast(unsigned long long, c) && v[3] == 
__builtin_bit_cast(unsigned long long, d);
-}
-
-#ifdef __SSE2__
-constexpr bool match_m256h(__m256h _v, _Float16 __e00, _Float16 __e01, 
_Float16 __e02, _Float16 __e03, _Float16 __e04, _Float16 __e05, _Float16 __e06, 
_Float16 __e07,
-                                       _Float16 __e08, _Float16 __e09, 
_Float16 __e10, _Float16 __e11, _Float16 __e12, _Float16 __e13, _Float16 __e14, 
_Float16 __e15) {
-  __v16hu v = (__v16hu)_v;
-  return v[ 0] == __builtin_bit_cast(unsigned short, __e00) && v[ 1] == 
__builtin_bit_cast(unsigned short, __e01) && v[ 2] == 
__builtin_bit_cast(unsigned short, __e02) && v[ 3] == 
__builtin_bit_cast(unsigned short, __e03) &&
-         v[ 4] == __builtin_bit_cast(unsigned short, __e04) && v[ 5] == 
__builtin_bit_cast(unsigned short, __e05) && v[ 6] == 
__builtin_bit_cast(unsigned short, __e06) && v[ 7] == 
__builtin_bit_cast(unsigned short, __e07) &&
-         v[ 8] == __builtin_bit_cast(unsigned short, __e08) && v[ 9] == 
__builtin_bit_cast(unsigned short, __e09) && v[10] == 
__builtin_bit_cast(unsigned short, __e10) && v[11] == 
__builtin_bit_cast(unsigned short, __e11) &&
-         v[12] == __builtin_bit_cast(unsigned short, __e12) && v[13] == 
__builtin_bit_cast(unsigned short, __e13) && v[14] == 
__builtin_bit_cast(unsigned short, __e14) && v[15] == 
__builtin_bit_cast(unsigned short, __e15);
-}
-#endif
-
-constexpr bool match_m256i(__m256i _v, unsigned long long a, unsigned long 
long b, unsigned long long c, unsigned long long d) {
-  __v4du v = (__v4du)_v;
-  return v[0] == a && v[1] == b && v[2] == c && v[3] == d;
-}
-
-constexpr bool match_v4di(__m256i _v, long long a, long long b, long long c, 
long long d) {
-  __v4di v = (__v4di)_v;
-  return v[0] == a && v[1] == b && v[2] == c && v[3] == d;
-}
-
-constexpr bool match_v8si(__m256i _v, int a, int b, int c, int d, int e, int 
f, int g, int h) {
-  __v8si v = (__v8si)_v;
-  return v[0] == a && v[1] == b && v[2] == c && v[3] == d && v[4] == e && v[5] 
== f && v[6] == g && v[7] == h;
-}
-
-constexpr bool match_v8su(__m256i _v, unsigned a, unsigned b, unsigned c, 
unsigned d, unsigned e, unsigned f, unsigned g, unsigned h) {
-  __v8su v = (__v8su)_v;
-  return v[0] == a && v[1] == b && v[2] == c && v[3] == d && v[4] == e && v[5] 
== f && v[6] == g && v[7] == h;
-}
-
-constexpr bool match_v16hi(__m256i _v, short a, short b, short c, short d, 
short e, short f, short g, short h, short i, short j, short k, short l, short 
m, short n, short o, short p) {
-  __v16hi v = (__v16hi)_v;
-  return v[0] == a && v[1] == b && v[2] == c && v[3] == d && v[4] == e && v[5] 
== f && v[6] == g && v[7] == h && v[8] == i && v[9] == j && v[10] == k && v[11] 
== l && v[12] == m && v[13] == n && v[14] == o && v[15] == p;
-}
-
-constexpr bool match_v16hu(__m256i _v, unsigned short a, unsigned short b, 
unsigned short c, unsigned short d, unsigned short e, unsigned short f, 
unsigned short g, unsigned short h, unsigned short i, unsigned short j, 
unsigned short k, unsigned short l, unsigned short m, unsigned short n, 
unsigned short o, unsigned short p) {
-  __v16hu v = (__v16hu)_v;
-  return v[0] == a && v[1] == b && v[2] == c && v[3] == d && v[4] == e && v[5] 
== f && v[6] == g && v[7] == h && v[8] == i && v[9] == j && v[10] == k && v[11] 
== l && v[12] == m && v[13] == n && v[14] == o && v[15] == p;
-}
-
-constexpr bool match_v32qi(__m256i _v, signed char __b00, signed char __b01, 
signed char __b02, signed char __b03, signed char __b04, signed char __b05, 
signed char __b06, signed char __b07,
-                                       signed char __b08, signed char __b09, 
signed char __b10, signed char __b11, signed char __b12, signed char __b13, 
signed char __b14, signed char __b15,
-                                       signed char __b16, signed char __b17, 
signed char __b18, signed char __b19, signed char __b20, signed char __b21, 
signed char __b22, signed char __b23,
-                                       signed char __b24, signed char __b25, 
signed char __b26, signed char __b27, signed char __b28, signed char __b29, 
signed char __b30, signed char __b31) {
-  __v32qs v = (__v32qs)_v;
-  return v[ 0] == __b00 && v[ 1] == __b01 && v[ 2] == __b02 && v[ 3] == __b03 
&& v[ 4] == __b04 && v[ 5] == __b05 && v[ 6] == __b06 && v[ 7] ==  __b07 &&
-         v[ 8] == __b08 && v[ 9] == __b09 && v[10] == __b10 && v[11] == __b11 
&& v[12] == __b12 && v[13] == __b13 && v[14] == __b14 && v[15] ==  __b15 &&
-         v[16] == __b16 && v[17] == __b17 && v[18] == __b18 && v[19] == __b19 
&& v[20] == __b20 && v[21] == __b21 && v[22] == __b22 && v[23] ==  __b23 &&
-         v[24] == __b24 && v[25] == __b25 && v[26] == __b26 && v[27] == __b27 
&& v[28] == __b28 && v[29] == __b29 && v[30] == __b30 && v[31] ==  __b31;
-}
-
-constexpr bool match_v32qu(__m256i _v, unsigned char __b00, unsigned char 
__b01, unsigned char __b02, unsigned char __b03, unsigned char __b04, unsigned 
char __b05, unsigned char __b06, unsigned char __b07,
-                                       unsigned char __b08, unsigned char 
__b09, unsigned char __b10, unsigned char __b11, unsigned char __b12, unsigned 
char __b13, unsigned char __b14, unsigned char __b15,
-                                       unsigned char __b16, unsigned char 
__b17, unsigned char __b18, unsigned char __b19, unsigned char __b20, unsigned 
char __b21, unsigned char __b22, unsigned char __b23,
-                                       unsigned char __b24, unsigned char 
__b25, unsigned char __b26, unsigned char __b27, unsigned char __b28, unsigned 
char __b29, unsigned char __b30, unsigned char __b31) {
-  __v32qu v = (__v32qu)_v;
-  return v[ 0] == __b00 && v[ 1] == __b01 && v[ 2] == __b02 && v[ 3] == __b03 
&& v[ 4] == __b04 && v[ 5] == __b05 && v[ 6] == __b06 && v[ 7] ==  __b07 &&
-         v[ 8] == __b08 && v[ 9] == __b09 && v[10] == __b10 && v[11] == __b11 
&& v[12] == __b12 && v[13] == __b13 && v[14] == __b14 && v[15] ==  __b15 &&
-         v[16] == __b16 && v[17] == __b17 && v[18] == __b18 && v[19] == __b19 
&& v[20] == __b20 && v[21] == __b21 && v[22] == __b22 && v[23] ==  __b23 &&
-         v[24] == __b24 && v[25] == __b25 && v[26] == __b26 && v[27] == __b27 
&& v[28] == __b28 && v[29] == __b29 && v[30] == __b30 && v[31] ==  __b31;
-}
-
-constexpr bool match_m512(__m512 _v, float __e00, float __e01, float __e02, 
float __e03, float __e04, float __e05, float __e06, float __e07, float __e08, 
float __e09, float __e10, float __e11, float __e12, float __e13, float __e14, 
float __e15) {
-  __v16su v = (__v16su)_v;
-  return v[ 0] == __builtin_bit_cast(unsigned, __e00) && v[ 1] == 
__builtin_bit_cast(unsigned, __e01) && v[ 2] == __builtin_bit_cast(unsigned, 
__e02) && v[ 3] == __builtin_bit_cast(unsigned, __e03) &&
-         v[ 4] == __builtin_bit_cast(unsigned, __e04) && v[ 5] == 
__builtin_bit_cast(unsigned, __e05) && v[ 6] == __builtin_bit_cast(unsigned, 
__e06) && v[ 7] == __builtin_bit_cast(unsigned, __e07) &&
-         v[ 8] == __builtin_bit_cast(unsigned, __e08) && v[ 9] == 
__builtin_bit_cast(unsigned, __e09) && v[10] == __builtin_bit_cast(unsigned, 
__e10) && v[11] == __builtin_bit_cast(unsigned, __e11) &&
-         v[12] == __builtin_bit_cast(unsigned, __e12) && v[13] == 
__builtin_bit_cast(unsigned, __e13) && v[14] == __builtin_bit_cast(unsigned, 
__e14) && v[15] == __builtin_bit_cast(unsigned, __e15);
-}
-
-constexpr bool match_m512d(__m512d _v, double __e00, double __e01, double 
__e02, double __e03, double __e04, double __e05, double __e06, double __e07) {
-  __v8du v = (__v8du)_v;
-  return v[ 0] == __builtin_bit_cast(unsigned long long, __e00) && v[ 1] == 
__builtin_bit_cast(unsigned long long, __e01) && v[ 2] == 
__builtin_bit_cast(unsigned long long, __e02) && v[ 3] == 
__builtin_bit_cast(unsigned long long, __e03) &&
-         v[ 4] == __builtin_bit_cast(unsigned long long, __e04) && v[ 5] == 
__builtin_bit_cast(unsigned long long, __e05) && v[ 6] == 
__builtin_bit_cast(unsigned long long, __e06) && v[ 7] == 
__builtin_bit_cast(unsigned long long, __e07);
-}
-
-#ifdef __SSE2__
-constexpr bool match_m512h(__m512h _v, _Float16 __e00, _Float16 __e01, 
_Float16 __e02, _Float16 __e03, _Float16 __e04, _Float16 __e05, _Float16 __e06, 
_Float16 __e07,
-                                       _Float16 __e08, _Float16 __e09, 
_Float16 __e10, _Float16 __e11, _Float16 __e12, _Float16 __e13, _Float16 __e14, 
_Float16 __e15,
-                                       _Float16 __e16, _Float16 __e17, 
_Float16 __e18, _Float16 __e19, _Float16 __e20, _Float16 __e21, _Float16 __e22, 
_Float16 __e23,
-                                       _Float16 __e24, _Float16 __e25, 
_Float16 __e26, _Float16 __e27, _Float16 __e28, _Float16 __e29, _Float16 __e30, 
_Float16 __e31) {
-  __v32hu v = (__v32hu)_v;
-  return v[ 0] == __builtin_bit_cast(unsigned short, __e00) && v[ 1] == 
__builtin_bit_cast(unsigned short, __e01) && v[ 2] == 
__builtin_bit_cast(unsigned short, __e02) && v[ 3] == 
__builtin_bit_cast(unsigned short, __e03) &&
-         v[ 4] == __builtin_bit_cast(unsigned short, __e04) && v[ 5] == 
__builtin_bit_cast(unsigned short, __e05) && v[ 6] == 
__builtin_bit_cast(unsigned short, __e06) && v[ 7] == 
__builtin_bit_cast(unsigned short, __e07) &&
-         v[ 8] == __builtin_bit_cast(unsigned short, __e08) && v[ 9] == 
__builtin_bit_cast(unsigned short, __e09) && v[10] == 
__builtin_bit_cast(unsigned short, __e10) && v[11] == 
__builtin_bit_cast(unsigned short, __e11) &&
-         v[12] == __builtin_bit_cast(unsigned short, __e12) && v[13] == 
__builtin_bit_cast(unsigned short, __e13) && v[14] == 
__builtin_bit_cast(unsigned short, __e14) && v[15] == 
__builtin_bit_cast(unsigned short, __e15) &&
-         v[16] == __builtin_bit_cast(unsigned short, __e16) && v[17] == 
__builtin_bit_cast(unsigned short, __e17) && v[18] == 
__builtin_bit_cast(unsigned short, __e18) && v[19] == 
__builtin_bit_cast(unsigned short, __e19) &&
-         v[20] == __builtin_bit_cast(unsigned short, __e20) && v[21] == 
__builtin_bit_cast(unsigned short, __e21) && v[22] == 
__builtin_bit_cast(unsigned short, __e22) && v[23] == 
__builtin_bit_cast(unsigned short, __e23) &&
-         v[24] == __builtin_bit_cast(unsigned short, __e24) && v[25] == 
__builtin_bit_cast(unsigned short, __e25) && v[26] == 
__builtin_bit_cast(unsigned short, __e26) && v[27] == 
__builtin_bit_cast(unsigned short, __e27) &&
-         v[28] == __builtin_bit_cast(unsigned short, __e28) && v[29] == 
__builtin_bit_cast(unsigned short, __e29) && v[30] == 
__builtin_bit_cast(unsigned short, __e30) && v[31] == 
__builtin_bit_cast(unsigned short, __e31);
-}
-#endif
-
-constexpr bool match_m512i(__m512i _v, unsigned long long a, unsigned long 
long b, unsigned long long c, unsigned long long d, unsigned long long e, 
unsigned long long f, unsigned long long g, unsigned long long h) {
-  __v8du v = (__v8du)_v;
-  return v[0] == a && v[1] == b && v[2] == c && v[3] == d && v[4] == e && v[5] 
== f && v[6] == g && v[7] == h;
-}
-
-constexpr bool match_v8di(__m512i _v, long long a, long long b, long long c, 
long long d, long long e, long long f, long long g, long long h) {
-  __v8di v = (__v8di)_v;
-  return v[0] == a && v[1] == b && v[2] == c && v[3] == d && v[4] == e && v[5] 
== f && v[6] == g && v[7] == h;
-}
-
-constexpr bool match_v16si(__m512i _v, int a, int b, int c, int d, int e, int 
f, int g, int h, int i, int j, int k, int l, int m, int n, int o, int p) {
-  __v16si v = (__v16si)_v;
-  return v[0] == a && v[1] == b && v[2] == c && v[3] == d && v[4] == e && v[5] 
== f && v[6] == g && v[7] == h && v[8] == i && v[9] == j && v[10] == k && v[11] 
== l && v[12] == m && v[13] == n && v[14] == o && v[15] == p;
-}
-
-constexpr bool match_v16su(__m512i _v, unsigned int a, unsigned int b, 
unsigned int c, unsigned int d, unsigned int e, unsigned int f, unsigned int g, 
unsigned int h, unsigned int i, unsigned int j, unsigned int k, unsigned int l, 
unsigned int m, unsigned int n, unsigned int o, unsigned int p) {
-  __v16su v = (__v16su)_v;
-  return v[0] == a && v[1] == b && v[2] == c && v[3] == d && v[4] == e && v[5] 
== f && v[6] == g && v[7] == h && v[8] == i && v[9] == j && v[10] == k && v[11] 
== l && v[12] == m && v[13] == n && v[14] == o && v[15] == p;
-}
-
-constexpr bool match_v32hi(__m512i _v, short __e00, short __e01, short __e02, 
short __e03, short __e04, short __e05, short __e06, short __e07,
-                                       short __e08, short __e09, short __e10, 
short __e11, short __e12, short __e13, short __e14, short __e15,
-                                       short __e16, short __e17, short __e18, 
short __e19, short __e20, short __e21, short __e22, short __e23,
-                                       short __e24, short __e25, short __e26, 
short __e27, short __e28, short __e29, short __e30, short __e31) {
-  __v32hi v = (__v32hi)_v;
-  return v[ 0] == __e00 && v[ 1] == __e01 && v[ 2] == __e02 && v[ 3] == __e03 
&& v[ 4] == __e04 && v[ 5] == __e05 && v[ 6] == __e06 && v[ 7] ==  __e07 &&
-         v[ 8] == __e08 && v[ 9] == __e09 && v[10] == __e10 && v[11] == __e11 
&& v[12] == __e12 && v[13] == __e13 && v[14] == __e14 && v[15] ==  __e15 &&
-         v[16] == __e16 && v[17] == __e17 && v[18] == __e18 && v[19] == __e19 
&& v[20] == __e20 && v[21] == __e21 && v[22] == __e22 && v[23] ==  __e23 &&
-         v[24] == __e24 && v[25] == __e25 && v[26] == __e26 && v[27] == __e27 
&& v[28] == __e28 && v[29] == __e29 && v[30] == __e30 && v[31] ==  __e31;
-}
-
-constexpr bool match_v32hu(__m512i _v, unsigned short __e00, unsigned short 
__e01, unsigned short __e02, unsigned short __e03, unsigned short __e04, 
unsigned short __e05, unsigned short __e06, unsigned short __e07,
-                                       unsigned short __e08, unsigned short 
__e09, unsigned short __e10, unsigned short __e11, unsigned short __e12, 
unsigned short __e13, unsigned short __e14, unsigned short __e15,
-                                       unsigned short __e16, unsigned short 
__e17, unsigned short __e18, unsigned short __e19, unsigned short __e20, 
unsigned short __e21, unsigned short __e22, unsigned short __e23,
-                                       unsigned short __e24, unsigned short 
__e25, unsigned short __e26, unsigned short __e27, unsigned short __e28, 
unsigned short __e29, unsigned short __e30, unsigned short __e31) {
-  __v32hu v = (__v32hu)_v;
-  return v[ 0] == __e00 && v[ 1] == __e01 && v[ 2] == __e02 && v[ 3] == __e03 
&& v[ 4] == __e04 && v[ 5] == __e05 && v[ 6] == __e06 && v[ 7] ==  __e07 &&
-         v[ 8] == __e08 && v[ 9] == __e09 && v[10] == __e10 && v[11] == __e11 
&& v[12] == __e12 && v[13] == __e13 && v[14] == __e14 && v[15] ==  __e15 &&
-         v[16] == __e16 && v[17] == __e17 && v[18] == __e18 && v[19] == __e19 
&& v[20] == __e20 && v[21] == __e21 && v[22] == __e22 && v[23] ==  __e23 &&
-         v[24] == __e24 && v[25] == __e25 && v[26] == __e26 && v[27] == __e27 
&& v[28] == __e28 && v[29] == __e29 && v[30] == __e30 && v[31] ==  __e31;
-}
-
-constexpr bool match_v64qi(__m512i _v, signed char __e00, signed char __e01, 
signed char __e02, signed char __e03, signed char __e04, signed char __e05, 
signed char __e06, signed char __e07,
-                                       signed char __e08, signed char __e09, 
signed char __e10, signed char __e11, signed char __e12, signed char __e13, 
signed char __e14, signed char __e15,
-                                       signed char __e16, signed char __e17, 
signed char __e18, signed char __e19, signed char __e20, signed char __e21, 
signed char __e22, signed char __e23,
-                                       signed char __e24, signed char __e25, 
signed char __e26, signed char __e27, signed char __e28, signed char __e29, 
signed char __e30, signed char __e31,
-                                       signed char __e32, signed char __e33, 
signed char __e34, signed char __e35, signed char __e36, signed char __e37, 
signed char __e38, signed char __e39,
-                                       signed char __e40, signed char __e41, 
signed char __e42, signed char __e43, signed char __e44, signed char __e45, 
signed char __e46, signed char __e47,
-                                       signed char __e48, signed char __e49, 
signed char __e50, signed char __e51, signed char __e52, signed char __e53, 
signed char __e54, signed char __e55,
-                                       signed char __e56, signed char __e57, 
signed char __e58, signed char __e59, signed char __e60, signed char __e61, 
signed char __e62, signed char __e63) {
-  __v64qs v = (__v64qs)_v;
-  return v[ 0] == __e00 && v[ 1] == __e01 && v[ 2] == __e02 && v[ 3] == __e03 
&& v[ 4] == __e04 && v[ 5] == __e05 && v[ 6] == __e06 && v[ 7] == __e07 &&
-         v[ 8] == __e08 && v[ 9] == __e09 && v[10] == __e10 && v[11] == __e11 
&& v[12] == __e12 && v[13] == __e13 && v[14] == __e14 && v[15] == __e15 &&
-         v[16] == __e16 && v[17] == __e17 && v[18] == __e18 && v[19] == __e19 
&& v[20] == __e20 && v[21] == __e21 && v[22] == __e22 && v[23] == __e23 &&
-         v[24] == __e24 && v[25] == __e25 && v[26] == __e26 && v[27] == __e27 
&& v[28] == __e28 && v[29] == __e29 && v[30] == __e30 && v[31] == __e31 &&
-         v[32] == __e32 && v[33] == __e33 && v[34] == __e34 && v[35] == __e35 
&& v[36] == __e36 && v[37] == __e37 && v[38] == __e38 && v[39] == __e39 &&
-         v[40] == __e40 && v[41] == __e41 && v[42] == __e42 && v[43] == __e43 
&& v[44] == __e44 && v[45] == __e45 && v[46] == __e46 && v[47] == __e47 &&
-         v[48] == __e48 && v[49] == __e49 && v[50] == __e50 && v[51] == __e51 
&& v[52] == __e52 && v[53] == __e53 && v[54] == __e54 && v[55] == __e55 &&
-         v[56] == __e56 && v[57] == __e57 && v[58] == __e58 && v[59] == __e59 
&& v[60] == __e60 && v[61] == __e61 && v[62] == __e62 && v[63] == __e63;
-}
-
-constexpr bool match_v64qu(__m512i _v, unsigned char __e00, unsigned char 
__e01, unsigned char __e02, unsigned char __e03, unsigned char __e04, unsigned 
char __e05, unsigned char __e06, unsigned char __e07,
-                                       unsigned char __e08, unsigned char 
__e09, unsigned char __e10, unsigned char __e11, unsigned char __e12, unsigned 
char __e13, unsigned char __e14, unsigned char __e15,
-                                       unsigned char __e16, unsigned char 
__e17, unsigned char __e18, unsigned char __e19, unsigned char __e20, unsigned 
char __e21, unsigned char __e22, unsigned char __e23,
-                                       unsigned char __e24, unsigned char 
__e25, unsigned char __e26, unsigned char __e27, unsigned char __e28, unsigned 
char __e29, unsigned char __e30, unsigned char __e31,
-                                       unsigned char __e32, unsigned char 
__e33, unsigned char __e34, unsigned char __e35, unsigned char __e36, unsigned 
char __e37, unsigned char __e38, unsigned char __e39,
-                                       unsigned char __e40, unsigned char 
__e41, unsigned char __e42, unsigned char __e43, unsigned char __e44, unsigned 
char __e45, unsigned char __e46, unsigned char __e47,
-                                       unsigned char __e48, unsigned char 
__e49, unsigned char __e50, unsigned char __e51, unsigned char __e52, unsigned 
char __e53, unsigned char __e54, unsigned char __e55,
-                                       unsigned char __e56, unsigned char 
__e57, unsigned char __e58, unsigned char __e59, unsigned char __e60, unsigned 
char __e61, unsigned char __e62, unsigned char __e63) {
-  __v64qu v = (__v64qu)_v;
-  return v[ 0] == __e00 && v[ 1] == __e01 && v[ 2] == __e02 && v[ 3] == __e03 
&& v[ 4] == __e04 && v[ 5] == __e05 && v[ 6] == __e06 && v[ 7] == __e07 &&
-         v[ 8] == __e08 && v[ 9] == __e09 && v[10] == __e10 && v[11] == __e11 
&& v[12] == __e12 && v[13] == __e13 && v[14] == __e14 && v[15] == __e15 &&
-         v[16] == __e16 && v[17] == __e17 && v[18] == __e18 && v[19] == __e19 
&& v[20] == __e20 && v[21] == __e21 && v[22] == __e22 && v[23] == __e23 &&
-         v[24] == __e24 && v[25] == __e25 && v[26] == __e26 && v[27] == __e27 
&& v[28] == __e28 && v[29] == __e29 && v[30] == __e30 && v[31] == __e31 &&
-         v[32] == __e32 && v[33] == __e33 && v[34] == __e34 && v[35] == __e35 
&& v[36] == __e36 && v[37] == __e37 && v[38] == __e38 && v[39] == __e39 &&
-         v[40] == __e40 && v[41] == __e41 && v[42] == __e42 && v[43] == __e43 
&& v[44] == __e44 && v[45] == __e45 && v[46] == __e46 && v[47] == __e47 &&
-         v[48] == __e48 && v[49] == __e49 && v[50] == __e50 && v[51] == __e51 
&& v[52] == __e52 && v[53] == __e53 && v[54] == __e54 && v[55] == __e55 &&
-         v[56] == __e56 && v[57] == __e57 && v[58] == __e58 && v[59] == __e59 
&& v[60] == __e60 && v[61] == __e61 && v[62] == __e62 && v[63] == __e63;
-}
-
-#define TEST_CONSTEXPR(...) static_assert(__VA_ARGS__)
-
-#else
-
-#define TEST_CONSTEXPR(...)
-
-#endif
diff --git a/clang/test/CIR/CodeGen/X86/xop-builtin.c 
b/clang/test/CIR/CodeGen/X86/xop-builtin.c
index 378e8c71fa378..a90ccd8b60461 100644
--- a/clang/test/CIR/CodeGen/X86/xop-builtin.c
+++ b/clang/test/CIR/CodeGen/X86/xop-builtin.c
@@ -1,82 +1,42 @@
-// RUN: %clang_cc1 -x c -flax-vector-conversions=none -ffreestanding %s 
-triple=x86_64-apple-darwin -target-feature +xop -emit-llvm -o - -Wall -Werror 
| FileCheck %s
-// RUN: %clang_cc1 -x c -flax-vector-conversions=none -ffreestanding %s 
-triple=x86_64-apple-darwin -target-feature +xop -fno-signed-char -emit-llvm -o 
- -Wall -Werror | FileCheck %s
-// RUN: %clang_cc1 -x c -flax-vector-conversions=none -ffreestanding %s 
-triple=i386-apple-darwin -target-feature +xop -emit-llvm -o - -Wall -Werror | 
FileCheck %s
-// RUN: %clang_cc1 -x c -flax-vector-conversions=none -ffreestanding %s 
-triple=i386-apple-darwin -target-feature +xop -fno-signed-char -emit-llvm -o - 
-Wall -Werror | FileCheck %s
-// RUN: %clang_cc1 -x c++ -flax-vector-conversions=none -ffreestanding %s 
-triple=x86_64-apple-darwin -target-feature +xop -emit-llvm -o - -Wall -Werror 
| FileCheck %s
-// RUN: %clang_cc1 -x c++ -flax-vector-conversions=none -ffreestanding %s 
-triple=x86_64-apple-darwin -target-feature +xop -fno-signed-char -emit-llvm -o 
- -Wall -Werror | FileCheck %s
-// RUN: %clang_cc1 -x c++ -flax-vector-conversions=none -ffreestanding %s 
-triple=i386-apple-darwin -target-feature +xop -emit-llvm -o - -Wall -Werror | 
FileCheck %s
-// RUN: %clang_cc1 -x c++ -flax-vector-conversions=none -ffreestanding %s 
-triple=i386-apple-darwin -target-feature +xop -fno-signed-char -emit-llvm -o - 
-Wall -Werror | FileCheck %s
-
-// RUN: %clang_cc1 -x c -flax-vector-conversions=none -ffreestanding %s 
-triple=x86_64-apple-darwin -target-feature +xop -emit-llvm -o - -Wall -Werror 
-fexperimental-new-constant-interpreter | FileCheck %s
-// RUN: %clang_cc1 -x c -flax-vector-conversions=none -ffreestanding %s 
-triple=x86_64-apple-darwin -target-feature +xop -fno-signed-char -emit-llvm -o 
- -Wall -Werror -fexperimental-new-constant-interpreter | FileCheck %s
-// RUN: %clang_cc1 -x c -flax-vector-conversions=none -ffreestanding %s 
-triple=i386-apple-darwin -target-feature +xop -emit-llvm -o - -Wall -Werror 
-fexperimental-new-constant-interpreter | FileCheck %s
-// RUN: %clang_cc1 -x c -flax-vector-conversions=none -ffreestanding %s 
-triple=i386-apple-darwin -target-feature +xop -fno-signed-char -emit-llvm -o - 
-Wall -Werror -fexperimental-new-constant-interpreter | FileCheck %s
-// RUN: %clang_cc1 -x c++ -flax-vector-conversions=none -ffreestanding %s 
-triple=x86_64-apple-darwin -target-feature +xop -emit-llvm -o - -Wall -Werror 
-fexperimental-new-constant-interpreter | FileCheck %s
-// RUN: %clang_cc1 -x c++ -flax-vector-conversions=none -ffreestanding %s 
-triple=x86_64-apple-darwin -target-feature +xop -fno-signed-char -emit-llvm -o 
- -Wall -Werror -fexperimental-new-constant-interpreter | FileCheck %s
-// RUN: %clang_cc1 -x c++ -flax-vector-conversions=none -ffreestanding %s 
-triple=i386-apple-darwin -target-feature +xop -emit-llvm -o - -Wall -Werror 
-fexperimental-new-constant-interpreter | FileCheck %s
-// RUN: %clang_cc1 -x c++ -flax-vector-conversions=none -ffreestanding %s 
-triple=i386-apple-darwin -target-feature +xop -fno-signed-char -emit-llvm -o - 
-Wall -Werror -fexperimental-new-constant-interpreter | FileCheck %s
-
+// RUN: %clang_cc1 -x c -flax-vector-conversions=none -ffreestanding %s 
-triple=x86_64-apple-darwin -target-feature +xop -emit-cir -o %t.cir
+// RUN: FileCheck --check-prefix=CIR --input-file=%t.cir %s
+// RUN: %clang_cc1 -x c -flax-vector-conversions=none -ffreestanding %s 
-triple=x86_64-apple-darwin -target-feature +xop -fno-signed-char -emit-cir -o 
%t.cir
+// RUN: FileCheck --check-prefix=CIR --input-file=%t.cir %s
+
+// RUN: %clang_cc1 -x c -flax-vector-conversions=none -ffreestanding %s 
-triple=x86_64-apple-darwin -target-feature +xop -fclangir -emit-llvm -o %t.ll
+// RUN: FileCheck --check-prefix=LLVM --input-file=%t.ll %s
+// RUN: %clang_cc1 -x c -flax-vector-conversions=none -ffreestanding %s 
-triple=x86_64-apple-darwin -target-feature +xop -fno-signed-char -fclangir 
-emit-llvm -o %t.ll
+// RUN: FileCheck --check-prefix=LLVM --input-file=%t.ll %s
+
+// RUN: %clang_cc1 -x c++ -flax-vector-conversions=none -ffreestanding %s 
-triple=x86_64-apple-darwin -target-feature +xop -emit-cir -o %t.cir
+// RUN: FileCheck --check-prefix=CIR --input-file=%t.cir %s
+// RUN: %clang_cc1 -x c++ -flax-vector-conversions=none -ffreestanding %s 
-triple=x86_64-apple-darwin -target-feature +xop -fno-signed-char -emit-cir -o 
%t.cir
+// RUN: FileCheck --check-prefix=CIR --input-file=%t.cir %s
+
+// RUN: %clang_cc1 -x c++ -flax-vector-conversions=none -ffreestanding %s 
-triple=x86_64-apple-darwin -target-feature +xop -fclangir -emit-llvm -o %t.ll
+// RUN: FileCheck --check-prefix=LLVM --input-file=%t.ll %s
+// RUN: %clang_cc1 -x c++ -flax-vector-conversions=none -ffreestanding %s 
-triple=x86_64-apple-darwin -target-feature +xop -fno-signed-char -fclangir 
-emit-llvm -o %t.ll
+// RUN: FileCheck --check-prefix=LLVM --input-file=%t.ll %s
+
+// RUN: %clang_cc1 -x c -flax-vector-conversions=none -ffreestanding %s 
-triple=x86_64-apple-darwin -target-feature +xop -emit-llvm -o - -Wall -Werror 
| FileCheck %s -check-prefix=OGCG
+// RUN: %clang_cc1 -x c -flax-vector-conversions=none -ffreestanding %s 
-triple=x86_64-apple-darwin -target-feature +xop -fno-signed-char -emit-llvm -o 
- -Wall -Werror | FileCheck %s -check-prefix=OGCG
+// RUN: %clang_cc1 -x c++ -flax-vector-conversions=none -ffreestanding %s 
-triple=x86_64-apple-darwin -target-feature +xop -emit-llvm -o - -Wall -Werror 
| FileCheck %s -check-prefix=OGCG
+// RUN: %clang_cc1 -x c++ -flax-vector-conversions=none -ffreestanding %s 
-triple=x86_64-apple-darwin -target-feature +xop -fno-signed-char -emit-llvm -o 
- -Wall -Werror | FileCheck %s -check-prefix=OGCG
 
 #include <x86intrin.h>
-#include "builtin_test_helpers.h"
 
 // This test mimics clang/test/CodeGen/X86/xop-builtins.c, which eventually
 // CIR shall be able to support fully.
 
-__m128i test_mm_rot_epi8(__m128i a, __m128i b) {
-  // CHECK-LABEL: test_mm_rot_epi8
-  // CHECK: call <16 x i8> @llvm.fshl.v16i8(<16 x i8> %{{.*}}, <16 x i8> 
%{{.*}}, <16 x i8> %{{.*}})
-  return _mm_rot_epi8(a, b);
-}
-TEST_CONSTEXPR(match_v16qi(_mm_rot_epi8((__m128i)(__v16qs){15, -14, -13, -12, 
11, 10, 9, 8, 7, 6, 5, -4, 3, -2, 1, 0}, (__m128i)(__v16qs){0, 1, -2, 3, -4, 5, 
-6, 7, -8, 9, -10, 11, -12, 13, -14, 15}), 15, -27, -4, -89, -80, 65, 36, 4, 7, 
12, 65, -25, 48, -33, 4, 0));
-
-__m128i test_mm_rot_epi16(__m128i a, __m128i b) {
-  // CHECK-LABEL: test_mm_rot_epi16
-  // CHECK: call <8 x i16> @llvm.fshl.v8i16(<8 x i16> %{{.*}}, <8 x i16> 
%{{.*}}, <8 x i16> %{{.*}})
-  return _mm_rot_epi16(a, b);
-}
-TEST_CONSTEXPR(match_v8hi(_mm_rot_epi16((__m128i)(__v8hi){7, 6, 5, -4, 3, -2, 
1, 0}, (__m128i)(__v8hi){0, 1, -2, 3, -4, 5, -6, 7}), 7, 12, 16385, -25, 12288, 
-33, 1024, 0));
-
-__m128i test_mm_rot_epi32(__m128i a, __m128i b) {
-  // CHECK-LABEL: test_mm_rot_epi32
-  // CHECK: call <4 x i32> @llvm.fshl.v4i32(<4 x i32> %{{.*}}, <4 x i32> 
%{{.*}}, <4 x i32> %{{.*}})
-  return _mm_rot_epi32(a, b);
-}
-TEST_CONSTEXPR(match_v4si(_mm_rot_epi32((__m128i)(__v4si){3, -2, 1, 0}, 
(__m128i)(__v4si){0, 1, -2, 3}), 3, -3, 1073741824, 0));
-
-__m128i test_mm_rot_epi64(__m128i a, __m128i b) {
-  // CHECK-LABEL: test_mm_rot_epi64
-  // CHECK: call {{.*}}<2 x i64> @llvm.fshl.v2i64(<2 x i64> %{{.*}}, <2 x i64> 
%{{.*}}, <2 x i64> %{{.*}})
-  return _mm_rot_epi64(a, b);
-}
-TEST_CONSTEXPR(match_v2di(_mm_rot_epi64((__m128i)(__v2di){99, -55}, 
(__m128i)(__v2di){1, -2}), 198, 9223372036854775794LL));
-
 __m128i test_mm_roti_epi8(__m128i a) {
-  // CHECK-LABEL: test_mm_roti_epi8
-  // CHECK: call <16 x i8> @llvm.fshl.v16i8(<16 x i8> %{{.*}}, <16 x i8> 
%{{.*}}, <16 x i8> splat (i8 1))
+  // CIR-LABEL: test_mm_roti_epi8
+  // CIR: {{%.*}} = cir.vec.splat {{%.*}} : !{{[us]}}8i, !cir.vector<16 x 
!{{[us]}}8i> 
+  // CIR: {{%.*}} = cir.call_llvm_intrinsic "fshl" {{.*}} : (!cir.vector<16 x 
!{{[su]}}8i>, !cir.vector<16 x !{{[su]}}8i>, !cir.vector<16 x !{{[su]}}8i>) -> 
!cir.vector<16 x !{{[su]}}8i> 
+  // LLVM-LABEL: test_mm_roti_epi8
+  // LLVM: %[[CASTED_VAR:.*]] = bitcast <2 x i64> {{%.*}} to <16 x i8>
+  // LLVM: {{%.*}} = call <16 x i8> @llvm.fshl.v16i8(<16 x i8> 
%[[CASTED_VAR]], <16 x i8> %[[CASTED_VAR]], <16 x i8> splat (i8 1))
+  // OGCG-LABEL: test_mm_roti_epi8
+  // OGCG: %[[CASTED_VAR:.*]] = bitcast <2 x i64> {{%.*}} to <16 x i8>
+  // OGCG: {{%.*}} = call <16 x i8> @llvm.fshl.v16i8(<16 x i8> 
%[[CASTED_VAR]], <16 x i8> %[[CASTED_VAR]], <16 x i8> splat (i8 1))
   return _mm_roti_epi8(a, 1);
-}
-TEST_CONSTEXPR(match_v16qi(_mm_roti_epi8(((__m128i)(__v16qs){0, 1, -2, 3, -4, 
5, -6, 7, -8, 9, -10, 11, -12, 13, -14, 15}), 3), 0, 8, -9, 24, -25, 40, -41, 
56, -57, 72, -73, 88, -89, 104, -105, 120));
-
-__m128i test_mm_roti_epi16(__m128i a) {
-  // CHECK-LABEL: test_mm_roti_epi16
-  // CHECK: call <8 x i16> @llvm.fshl.v8i16(<8 x i16> %{{.*}}, <8 x i16> 
%{{.*}}, <8 x i16> splat (i16 50))
-  return _mm_roti_epi16(a, 50);
-}
-TEST_CONSTEXPR(match_v8hi(_mm_roti_epi16(((__m128i)(__v8hi){2, -3, 4, -5, 6, 
-7, 8, -9}), 1), 4, -5, 8, -9, 12, -13, 16, -17));
-
-__m128i test_mm_roti_epi32(__m128i a) {
-  // CHECK-LABEL: test_mm_roti_epi32
-  // CHECK: call <4 x i32> @llvm.fshl.v4i32(<4 x i32> %{{.*}}, <4 x i32> 
%{{.*}}, <4 x i32> splat (i32 226))
-  return _mm_roti_epi32(a, -30);
-}
-TEST_CONSTEXPR(match_v4si(_mm_roti_epi32(((__m128i)(__v4si){1, -2, 3, -4}), 
5), 32, -33, 96, -97));
-
-__m128i test_mm_roti_epi64(__m128i a) {
-  // CHECK-LABEL: test_mm_roti_epi64
-  // CHECK: call {{.*}}<2 x i64> @llvm.fshl.v2i64(<2 x i64> %{{.*}}, <2 x i64> 
%{{.*}}, <2 x i64> splat (i64 100))
-  return _mm_roti_epi64(a, 100);
-}
-TEST_CONSTEXPR(match_v2di(_mm_roti_epi64(((__m128i)(__v2di){99, -55}), 19), 
51904512, -28311553));
-
-
+ }

>From 9685e6302b6303725adc9830c4e32aac3fa6aef7 Mon Sep 17 00:00:00 2001
From: Omar Ibrahim <[email protected]>
Date: Tue, 25 Nov 2025 21:19:58 +0100
Subject: [PATCH 6/8] add cir tests, cast signed amts to unsigned

---
 clang/lib/CIR/CodeGen/CIRGenBuiltinX86.cpp    | 14 +++++--
 .../X86/{xop-builtin.c => xop-builtins.c}     | 37 +++++++++++++++++++
 2 files changed, 48 insertions(+), 3 deletions(-)
 rename clang/test/CIR/CodeGen/X86/{xop-builtin.c => xop-builtins.c} (59%)

diff --git a/clang/lib/CIR/CodeGen/CIRGenBuiltinX86.cpp 
b/clang/lib/CIR/CodeGen/CIRGenBuiltinX86.cpp
index a59280ea24f02..20ca0f6af9ffd 100644
--- a/clang/lib/CIR/CodeGen/CIRGenBuiltinX86.cpp
+++ b/clang/lib/CIR/CodeGen/CIRGenBuiltinX86.cpp
@@ -16,6 +16,7 @@
 #include "mlir/IR/ValueRange.h"
 #include "clang/Basic/Builtins.h"
 #include "clang/Basic/TargetBuiltins.h"
+#include "clang/CIR/Dialect/IR/CIRTypes.h"
 #include "clang/CIR/MissingFeatures.h"
 
 using namespace clang;
@@ -100,10 +101,17 @@ static mlir::Value emitX86FunnelShift(CIRGenFunction 
&cgf, const CallExpr *e,
   // Funnel shifts amounts are treated as modulo and types are all power-of-2
   // so we only care about the lowest log2 bits anyway.
   if (amt.getType() != ty) {
-    amt = cgf.getBuilder().createIntCast(
-        amt, mlir::cast<cir::VectorType>(ty).getElementType());
+    auto vecTy = mlir::cast<cir::VectorType>(ty);
+
+    auto numElems = vecTy.getSize();
+    cir::IntType vecElemType = 
mlir::cast<cir::IntType>(vecTy.getElementType());
+    auto signlessType =
+        cir::IntType::get(&cgf.getMLIRContext(), vecElemType.getWidth(), 
false);
+    amt = cgf.getBuilder().createIntCast(amt, signlessType);
+
     amt = cir::VecSplatOp::create(cgf.getBuilder(), 
cgf.getLoc(e->getExprLoc()),
-                                  ty, amt);
+                                  cir::VectorType::get(signlessType, numElems),
+                                  amt);
   }
 
   const std::string intrinsicName = isRight ? "fshr" : "fshl";
diff --git a/clang/test/CIR/CodeGen/X86/xop-builtin.c 
b/clang/test/CIR/CodeGen/X86/xop-builtins.c
similarity index 59%
rename from clang/test/CIR/CodeGen/X86/xop-builtin.c
rename to clang/test/CIR/CodeGen/X86/xop-builtins.c
index a90ccd8b60461..c8ae5eb0fd82d 100644
--- a/clang/test/CIR/CodeGen/X86/xop-builtin.c
+++ b/clang/test/CIR/CodeGen/X86/xop-builtins.c
@@ -39,4 +39,41 @@ __m128i test_mm_roti_epi8(__m128i a) {
   // OGCG: %[[CASTED_VAR:.*]] = bitcast <2 x i64> {{%.*}} to <16 x i8>
   // OGCG: {{%.*}} = call <16 x i8> @llvm.fshl.v16i8(<16 x i8> 
%[[CASTED_VAR]], <16 x i8> %[[CASTED_VAR]], <16 x i8> splat (i8 1))
   return _mm_roti_epi8(a, 1);
+}
+
+__m128i test_mm_roti_epi16(__m128i a) {
+  // CIR-LABEL: test_mm_roti_epi16
+  // CIR: {{%.*}} = cir.cast integral {{%.*}} : !{{[us]}}8i -> !u16i
+  // CIR: {{%.*}} = cir.vec.splat {{%.*}} : !{{[us]}}16i, !cir.vector<8 x 
!{{[us]}}16i> 
+  // CIR: {{%.*}} = cir.call_llvm_intrinsic "fshl" {{.*}} : (!cir.vector<8 x 
!{{[su]}}16i>, !cir.vector<8 x !{{[su]}}16i>, !cir.vector<8 x !{{[su]}}16i>) -> 
!cir.vector<8 x !{{[su]}}16i> 
+  // LLVM-LABEL: test_mm_roti_epi16
+  // LLVM: %[[CASTED_VAR:.*]] = bitcast <2 x i64> {{%.*}} to <8 x i16>
+  // LLVM: {{%.*}} = call <8 x i16> @llvm.fshl.v8i16(<8 x i16> 
%[[CASTED_VAR]], <8 x i16> %[[CASTED_VAR]], <8 x i16> splat (i16 50))
+  // OGCG-LABEL: test_mm_roti_epi16
+  // OGCG: %[[CASTED_VAR:.*]] = bitcast <2 x i64> {{%.*}} to <8 x i16>
+  // OGCG: {{%.*}} = call <8 x i16> @llvm.fshl.v8i16(<8 x i16> 
%[[CASTED_VAR]], <8 x i16> %[[CASTED_VAR]], <8 x i16> splat (i16 50))
+  return _mm_roti_epi16(a, 50);
+ }
+
+//NOTE: This only works as I expect for CIR but not for LLVMIR
+__m128i test_mm_roti_epi32(__m128i a) {
+  // CIR-LABEL: test_mm_roti_epi32
+  // CIR: {{%.*}} = cir.cast integral {{%.*}} : !{{[us]}}8i -> !u32i
+  // CIR: {{%.*}} = cir.vec.splat {{%.*}} : !{{[us]}}32i, !cir.vector<4 x 
!{{[us]}}32i> 
+  // CIR: {{%.*}} = cir.call_llvm_intrinsic "fshl" {{.*}} : (!cir.vector<4 x 
!{{[su]}}32i>, !cir.vector<4 x !{{[su]}}32i>, !cir.vector<4 x !{{[su]}}32i>) -> 
!cir.vector<4 x !{{[su]}}32i> 
+  return _mm_roti_epi32(a, -30);
+ }
+
+__m128i test_mm_roti_epi64(__m128i a) {
+  // CIR-LABEL: test_mm_roti_epi64
+  // CIR: {{%.*}} = cir.cast integral {{%.*}} : !{{[us]}}8i -> !u64i
+  // CIR: {{%.*}} = cir.vec.splat {{%.*}} : !{{.}}64i, !cir.vector<2 x 
!{{[us]}}64i> 
+  // CIR: {{%.*}} = cir.call_llvm_intrinsic "fshl" {{.*}} : (!cir.vector<2 x 
!{{[su]}}64i>, !cir.vector<2 x !{{[su]}}64i>, !cir.vector<2 x !u64i>) -> 
!cir.vector<2 x !{{[su]}}64i> 
+  // LLVM-LABEL: test_mm_roti_epi64
+  // LLVM: %[[VAR:.*]] = load <2 x i64>, ptr {{%.*}}, align 16
+  // LLVM: {{%.*}} = call <2 x i64> @llvm.fshl.v2i64(<2 x i64> %[[VAR]], <2 x 
i64> %[[VAR]], <2 x i64> splat (i64 100))
+  // OGCG-LABEL: test_mm_roti_epi64
+  // OGCG: %[[VAR:.*]] = load <2 x i64>, ptr {{%.*}}, align 16
+  // OGCG: {{%.*}} = call <2 x i64> @llvm.fshl.v2i64(<2 x i64> %[[VAR]], <2 x 
i64> %[[VAR]], <2 x i64> splat (i64 100))
+  return _mm_roti_epi64(a, 100);
  }

>From 4c41ac69165047a2e7590df3ce345129b0b0ad70 Mon Sep 17 00:00:00 2001
From: Omar Ibrahim <[email protected]>
Date: Tue, 25 Nov 2025 21:24:05 +0100
Subject: [PATCH 7/8] rebase, use auto for variable holding an explicit cast

---
 clang/lib/CIR/CodeGen/CIRGenBuiltinX86.cpp | 7 +------
 1 file changed, 1 insertion(+), 6 deletions(-)

diff --git a/clang/lib/CIR/CodeGen/CIRGenBuiltinX86.cpp 
b/clang/lib/CIR/CodeGen/CIRGenBuiltinX86.cpp
index 20ca0f6af9ffd..b52dc7b1f4b91 100644
--- a/clang/lib/CIR/CodeGen/CIRGenBuiltinX86.cpp
+++ b/clang/lib/CIR/CodeGen/CIRGenBuiltinX86.cpp
@@ -104,7 +104,7 @@ static mlir::Value emitX86FunnelShift(CIRGenFunction &cgf, 
const CallExpr *e,
     auto vecTy = mlir::cast<cir::VectorType>(ty);
 
     auto numElems = vecTy.getSize();
-    cir::IntType vecElemType = 
mlir::cast<cir::IntType>(vecTy.getElementType());
+    auto vecElemType = mlir::cast<cir::IntType>(vecTy.getElementType());
     auto signlessType =
         cir::IntType::get(&cgf.getMLIRContext(), vecElemType.getWidth(), 
false);
     amt = cgf.getBuilder().createIntCast(amt, signlessType);
@@ -680,11 +680,6 @@ mlir::Value CIRGenFunction::emitX86BuiltinExpr(unsigned 
builtinID,
         builder.createVecShuffle(getLoc(expr->getExprLoc()), in, zero, 
indices);
     return builder.createBitcast(sv, ops[0].getType());
   }
-  case X86::BI__builtin_ia32_kshiftridi:
-    cgm.errorNYI(expr->getSourceRange(),
-                 std::string("unimplemented X86 builtin call: ") +
-                     getContext().BuiltinInfo.getName(builtinID));
-    return {};
   case X86::BI__builtin_ia32_vprotbi:
   case X86::BI__builtin_ia32_vprotwi:
   case X86::BI__builtin_ia32_vprotdi:

>From e556764cc428b36a37deb561c5e8a6c5c978e956 Mon Sep 17 00:00:00 2001
From: Omar Ibrahim <[email protected]>
Date: Tue, 25 Nov 2025 23:46:44 +0100
Subject: [PATCH 8/8] cast to unsigned properly

---
 clang/lib/CIR/CodeGen/CIRGenBuiltinX86.cpp | 32 ++++++++++++++--------
 clang/test/CIR/CodeGen/X86/xop-builtins.c  | 24 ++++++++++------
 2 files changed, 35 insertions(+), 21 deletions(-)

diff --git a/clang/lib/CIR/CodeGen/CIRGenBuiltinX86.cpp 
b/clang/lib/CIR/CodeGen/CIRGenBuiltinX86.cpp
index b52dc7b1f4b91..c1009779aba96 100644
--- a/clang/lib/CIR/CodeGen/CIRGenBuiltinX86.cpp
+++ b/clang/lib/CIR/CodeGen/CIRGenBuiltinX86.cpp
@@ -95,27 +95,35 @@ static mlir::Value getMaskVecValue(CIRGenFunction &cgf, 
const CallExpr *expr,
 static mlir::Value emitX86FunnelShift(CIRGenFunction &cgf, const CallExpr *e,
                                       mlir::Value &op0, mlir::Value &op1,
                                       mlir::Value &amt, bool isRight) {
-  auto ty = op0.getType();
+  auto &builder = cgf.getBuilder();
+  auto op0Ty = op0.getType();
 
   // Amount may be scalar immediate, in which case create a splat vector.
   // Funnel shifts amounts are treated as modulo and types are all power-of-2
   // so we only care about the lowest log2 bits anyway.
-  if (amt.getType() != ty) {
-    auto vecTy = mlir::cast<cir::VectorType>(ty);
-
+  if (amt.getType() != op0Ty) {
+    auto vecTy = mlir::cast<cir::VectorType>(op0Ty);
     auto numElems = vecTy.getSize();
-    auto vecElemType = mlir::cast<cir::IntType>(vecTy.getElementType());
-    auto signlessType =
-        cir::IntType::get(&cgf.getMLIRContext(), vecElemType.getWidth(), 
false);
-    amt = cgf.getBuilder().createIntCast(amt, signlessType);
 
-    amt = cir::VecSplatOp::create(cgf.getBuilder(), 
cgf.getLoc(e->getExprLoc()),
-                                  cir::VectorType::get(signlessType, numElems),
-                                  amt);
+    auto amtTy = mlir::cast<cir::IntType>(amt.getType());
+    auto vecElemTy = mlir::cast<cir::IntType>(vecTy.getElementType());
+
+    // Cast to same width unsigned if not already unsigned.
+    if (amtTy.isSigned()) {
+      auto unsignedAmtTy = builder.getUIntNTy(amtTy.getWidth());
+      amt = builder.createIntCast(amt,
+                                  
builder.getUIntNTy(unsignedAmtTy.getWidth()));
+    }
+    // Cast the unsigned `amt` to operand element type's width unsigned.
+    auto unsingedVecElemType = builder.getUIntNTy(vecElemTy.getWidth());
+    amt = builder.createIntCast(amt, unsingedVecElemType);
+    amt = cir::VecSplatOp::create(
+        builder, cgf.getLoc(e->getExprLoc()),
+        cir::VectorType::get(unsingedVecElemType, numElems), amt);
   }
 
   const std::string intrinsicName = isRight ? "fshr" : "fshl";
-  return emitIntrinsicCallOp(cgf, e, intrinsicName, ty,
+  return emitIntrinsicCallOp(cgf, e, intrinsicName, op0Ty,
                              mlir::ValueRange{op0, op1, amt});
 }
 
diff --git a/clang/test/CIR/CodeGen/X86/xop-builtins.c 
b/clang/test/CIR/CodeGen/X86/xop-builtins.c
index c8ae5eb0fd82d..cf69331f10dca 100644
--- a/clang/test/CIR/CodeGen/X86/xop-builtins.c
+++ b/clang/test/CIR/CodeGen/X86/xop-builtins.c
@@ -43,9 +43,9 @@ __m128i test_mm_roti_epi8(__m128i a) {
 
 __m128i test_mm_roti_epi16(__m128i a) {
   // CIR-LABEL: test_mm_roti_epi16
-  // CIR: {{%.*}} = cir.cast integral {{%.*}} : !{{[us]}}8i -> !u16i
-  // CIR: {{%.*}} = cir.vec.splat {{%.*}} : !{{[us]}}16i, !cir.vector<8 x 
!{{[us]}}16i> 
-  // CIR: {{%.*}} = cir.call_llvm_intrinsic "fshl" {{.*}} : (!cir.vector<8 x 
!{{[su]}}16i>, !cir.vector<8 x !{{[su]}}16i>, !cir.vector<8 x !{{[su]}}16i>) -> 
!cir.vector<8 x !{{[su]}}16i> 
+  // CIR: {{%.*}} = cir.cast integral {{%.*}} : !u8i -> !u16i
+  // CIR: {{%.*}} = cir.vec.splat {{%.*}} : !{{[us]}}16i, !cir.vector<8 x 
!u16i> 
+  // CIR: {{%.*}} = cir.call_llvm_intrinsic "fshl" {{.*}} : (!cir.vector<8 x 
!{{[su]}}16i>, !cir.vector<8 x !{{[su]}}16i>, !cir.vector<8 x !u16i>) -> 
!cir.vector<8 x !{{[su]}}16i> 
   // LLVM-LABEL: test_mm_roti_epi16
   // LLVM: %[[CASTED_VAR:.*]] = bitcast <2 x i64> {{%.*}} to <8 x i16>
   // LLVM: {{%.*}} = call <8 x i16> @llvm.fshl.v8i16(<8 x i16> 
%[[CASTED_VAR]], <8 x i16> %[[CASTED_VAR]], <8 x i16> splat (i16 50))
@@ -58,17 +58,23 @@ __m128i test_mm_roti_epi16(__m128i a) {
 //NOTE: This only works as I expect for CIR but not for LLVMIR
 __m128i test_mm_roti_epi32(__m128i a) {
   // CIR-LABEL: test_mm_roti_epi32
-  // CIR: {{%.*}} = cir.cast integral {{%.*}} : !{{[us]}}8i -> !u32i
-  // CIR: {{%.*}} = cir.vec.splat {{%.*}} : !{{[us]}}32i, !cir.vector<4 x 
!{{[us]}}32i> 
-  // CIR: {{%.*}} = cir.call_llvm_intrinsic "fshl" {{.*}} : (!cir.vector<4 x 
!{{[su]}}32i>, !cir.vector<4 x !{{[su]}}32i>, !cir.vector<4 x !{{[su]}}32i>) -> 
!cir.vector<4 x !{{[su]}}32i> 
+  // CIR: {{%.*}} = cir.cast integral {{%.*}} : !u8i -> !u32i
+  // CIR: {{%.*}} = cir.vec.splat {{%.*}} : !{{[us]}}32i, !cir.vector<4 x 
!u32i> 
+  // CIR: {{%.*}} = cir.call_llvm_intrinsic "fshl" {{.*}} : (!cir.vector<4 x 
!{{[su]}}32i>, !cir.vector<4 x !{{[su]}}32i>, !cir.vector<4 x !u32i>) -> 
!cir.vector<4 x !{{[su]}}32i> 
+  // LLVM-LABEL: test_mm_roti_epi32
+  // LLVM: %[[CASTED_VAR:.*]] = bitcast <2 x i64> {{%.*}} to <4 x i32>
+  // LLVM: {{%.*}} = call <4 x i32> @llvm.fshl.v4i32(<4 x i32> 
%[[CASTED_VAR]], <4 x i32> %[[CASTED_VAR]], <4 x i32> splat (i32 226))
+  // OGCG-LABEL: test_mm_roti_epi32
+  // OGCG: %[[CASTED_VAR:.*]] = bitcast <2 x i64> {{%.*}} to <4 x i32>
+  // OGCG: {{%.*}} = call <4 x i32> @llvm.fshl.v4i32(<4 x i32> 
%[[CASTED_VAR]], <4 x i32> %[[CASTED_VAR]], <4 x i32> splat (i32 226))
   return _mm_roti_epi32(a, -30);
  }
 
 __m128i test_mm_roti_epi64(__m128i a) {
   // CIR-LABEL: test_mm_roti_epi64
-  // CIR: {{%.*}} = cir.cast integral {{%.*}} : !{{[us]}}8i -> !u64i
-  // CIR: {{%.*}} = cir.vec.splat {{%.*}} : !{{.}}64i, !cir.vector<2 x 
!{{[us]}}64i> 
-  // CIR: {{%.*}} = cir.call_llvm_intrinsic "fshl" {{.*}} : (!cir.vector<2 x 
!{{[su]}}64i>, !cir.vector<2 x !{{[su]}}64i>, !cir.vector<2 x !u64i>) -> 
!cir.vector<2 x !{{[su]}}64i> 
+  // CIR: {{%.*}} = cir.cast integral {{%.*}} : !u8i -> !u64i
+  // CIR: {{%.*}} = cir.vec.splat {{%.*}} : !u64i, !cir.vector<2 x !u64i> 
+  // CIR: {{%.*}} = cir.call_llvm_intrinsic "fshl" {{.*}} : (!cir.vector<2 x 
!{{[su]}}64i>, !cir.vector<2 x !{{[su]}}64i>, !cir.vector<2 x !u64i>) -> 
!cir.vector<2 x !s64i> 
   // LLVM-LABEL: test_mm_roti_epi64
   // LLVM: %[[VAR:.*]] = load <2 x i64>, ptr {{%.*}}, align 16
   // LLVM: {{%.*}} = call <2 x i64> @llvm.fshl.v2i64(<2 x i64> %[[VAR]], <2 x 
i64> %[[VAR]], <2 x i64> splat (i64 100))

_______________________________________________
cfe-commits mailing list
[email protected]
https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits

[clang] [llvm] [CIR] Support x86 builtin rotate (PR #169566)

Reply via email to