[clang] [CIR] Upstream convert to mask builtins in CIR codegen (PR #171694)

via cfe-commits Mon, 15 Dec 2025 21:44:52 -0800

https://github.com/MarwanTarik updated 
https://github.com/llvm/llvm-project/pull/171694


>From 82529b8bfd35c9e8059b49e2f17b3c837232cf09 Mon Sep 17 00:00:00 2001
From: MarwanTarik <[email protected]>
Date: Wed, 10 Dec 2025 22:21:55 +0200
Subject: [PATCH 1/9] Upstream CIR Codgen for convert to mask X86 builtins

---
 clang/lib/CIR/CodeGen/CIRGenBuiltinX86.cpp   | 114 +++++++++++++++++++
 clang/test/CodeGen/X86/avx512vlbw-builtins.c |  12 ++
 2 files changed, 126 insertions(+)

diff --git a/clang/lib/CIR/CodeGen/CIRGenBuiltinX86.cpp 
b/clang/lib/CIR/CodeGen/CIRGenBuiltinX86.cpp
index fb17e31bf36d6..bba7249666aaf 100644
--- a/clang/lib/CIR/CodeGen/CIRGenBuiltinX86.cpp
+++ b/clang/lib/CIR/CodeGen/CIRGenBuiltinX86.cpp
@@ -231,6 +231,113 @@ static mlir::Value emitX86MaskTest(CIRGenBuilderTy 
&builder, mlir::Location loc,
   return emitIntrinsicCallOp(builder, loc, intrinsicName, resTy,
                              mlir::ValueRange{lhsVec, rhsVec});
 }
+static mlir::Value emitX86MaskedCompareResult(CIRGenFunction &cgf,
+                                              mlir::Value cmp, unsigned 
numElts,
+                                              mlir::Value maskIn,
+                                              mlir::Location loc) {
+  if (maskIn) {
+    llvm_unreachable("NYI");
+  }
+  if (numElts < 8) {
+    int64_t indices[8];
+    for (unsigned i = 0; i != numElts; ++i)
+      indices[i] = i;
+    for (unsigned i = numElts; i != 8; ++i)
+      indices[i] = i % numElts + numElts;
+
+    // This should shuffle between cmp (first vector) and null (second vector)
+    mlir::Value nullVec = cgf.getBuilder().getNullValue(cmp.getType(), loc);
+    cmp = cgf.getBuilder().createVecShuffle(loc, cmp, nullVec, indices);
+  }
+  return cgf.getBuilder().createBitcast(
+      cmp, cgf.getBuilder().getUIntNTy(std::max(numElts, 8U)));
+}
+
+static mlir::Value emitX86MaskedCompare(CIRGenFunction &cgf, unsigned cc,
+                                        bool isSigned,
+                                        ArrayRef<mlir::Value> ops,
+                                        mlir::Location loc) {
+  assert((ops.size() == 2 || ops.size() == 4) &&
+         "Unexpected number of arguments");
+  unsigned numElts = cast<cir::VectorType>(ops[0].getType()).getSize();
+  mlir::Value cmp;
+
+  if (cc == 3) {
+    llvm_unreachable("NYI");
+  } else if (cc == 7) {
+    llvm_unreachable("NYI");
+  } else {
+    cir::CmpOpKind pred;
+    switch (cc) {
+    default:
+      llvm_unreachable("Unknown condition code");
+    case 0:
+      pred = cir::CmpOpKind::eq;
+      break;
+    case 1:
+      pred = cir::CmpOpKind::lt;
+      break;
+    case 2:
+      pred = cir::CmpOpKind::le;
+      break;
+    case 4:
+      pred = cir::CmpOpKind::ne;
+      break;
+    case 5:
+      pred = cir::CmpOpKind::ge;
+      break;
+    case 6:
+      pred = cir::CmpOpKind::gt;
+      break;
+    }
+
+    auto resultTy = cgf.getBuilder().getType<cir::VectorType>(
+        cgf.getBuilder().getUIntNTy(1), numElts);
+    cmp = cir::VecCmpOp::create(cgf.getBuilder(), loc, resultTy, pred, ops[0],
+                                ops[1]);
+  }
+
+  mlir::Value maskIn;
+  if (ops.size() == 4)
+    maskIn = ops[3];
+
+  return emitX86MaskedCompareResult(cgf, cmp, numElts, maskIn, loc);
+}
+
+static mlir::Value emitX86ConvertToMask(CIRGenFunction &cgf, mlir::Value in,
+                                        mlir::Location loc) {
+  cir::ConstantOp zero = cgf.getBuilder().getNullValue(in.getType(), loc);
+  return emitX86MaskedCompare(cgf, 1, true, {in, zero}, loc);
+}
+
+// Convert the mask from an integer type to a vector of i1.
+static mlir::Value getMaskVecValue(CIRGenFunction &cgf, mlir::Value mask,
+                                   unsigned numElts, mlir::Location loc) {
+  cir::VectorType maskTy =
+      cir::VectorType::get(cgf.getBuilder().getSIntNTy(1),
+                           cast<cir::IntType>(mask.getType()).getWidth());
+
+  mlir::Value maskVec = cgf.getBuilder().createBitcast(mask, maskTy);
+
+  // If we have less than 8 elements, then the starting mask was an i8 and
+  // we need to extract down to the right number of elements.
+  if (numElts < 8) {
+    llvm::SmallVector<int64_t, 4> indices;
+    for (unsigned i = 0; i != numElts; ++i)
+      indices.push_back(i);
+    maskVec = cgf.getBuilder().createVecShuffle(loc, maskVec, maskVec, 
indices);
+  }
+
+  return maskVec;
+}
+
+static mlir::Value emitX86SExtMask(CIRGenFunction &cgf, mlir::Value op,
+                                   mlir::Type dstTy, mlir::Location loc) {
+  unsigned numberOfElements = cast<cir::VectorType>(dstTy).getSize();
+  mlir::Value mask = getMaskVecValue(cgf, op, numberOfElements, loc);
+
+  return cgf.getBuilder().createCast(loc, cir::CastKind::integral, mask, 
dstTy);
+}
 
 static mlir::Value emitVecInsert(CIRGenBuilderTy &builder, mlir::Location loc,
                                  mlir::Value vec, mlir::Value value,
@@ -558,6 +665,7 @@ mlir::Value CIRGenFunction::emitX86BuiltinExpr(unsigned 
builtinID,
   case X86::BI__builtin_ia32_storesh128_mask:
   case X86::BI__builtin_ia32_storess128_mask:
   case X86::BI__builtin_ia32_storesd128_mask:
+
   case X86::BI__builtin_ia32_cvtmask2b128:
   case X86::BI__builtin_ia32_cvtmask2b256:
   case X86::BI__builtin_ia32_cvtmask2b512:
@@ -570,6 +678,8 @@ mlir::Value CIRGenFunction::emitX86BuiltinExpr(unsigned 
builtinID,
   case X86::BI__builtin_ia32_cvtmask2q128:
   case X86::BI__builtin_ia32_cvtmask2q256:
   case X86::BI__builtin_ia32_cvtmask2q512:
+    return emitX86SExtMask(*this, ops[0], convertType(expr->getType()),
+                           getLoc(expr->getExprLoc()));
   case X86::BI__builtin_ia32_cvtb2mask128:
   case X86::BI__builtin_ia32_cvtb2mask256:
   case X86::BI__builtin_ia32_cvtb2mask512:
@@ -582,18 +692,22 @@ mlir::Value CIRGenFunction::emitX86BuiltinExpr(unsigned 
builtinID,
   case X86::BI__builtin_ia32_cvtq2mask128:
   case X86::BI__builtin_ia32_cvtq2mask256:
   case X86::BI__builtin_ia32_cvtq2mask512:
+    return emitX86ConvertToMask(*this, ops[0], getLoc(expr->getExprLoc()));
   case X86::BI__builtin_ia32_cvtdq2ps512_mask:
   case X86::BI__builtin_ia32_cvtqq2ps512_mask:
   case X86::BI__builtin_ia32_cvtqq2pd512_mask:
   case X86::BI__builtin_ia32_vcvtw2ph512_mask:
   case X86::BI__builtin_ia32_vcvtdq2ph512_mask:
   case X86::BI__builtin_ia32_vcvtqq2ph512_mask:
+    llvm_unreachable("vcvtw2ph256_round_mask NYI");
   case X86::BI__builtin_ia32_cvtudq2ps512_mask:
   case X86::BI__builtin_ia32_cvtuqq2ps512_mask:
   case X86::BI__builtin_ia32_cvtuqq2pd512_mask:
   case X86::BI__builtin_ia32_vcvtuw2ph512_mask:
   case X86::BI__builtin_ia32_vcvtudq2ph512_mask:
   case X86::BI__builtin_ia32_vcvtuqq2ph512_mask:
+    llvm_unreachable("vcvtuw2ph256_round_mask NYI");
+
   case X86::BI__builtin_ia32_vfmaddsh3_mask:
   case X86::BI__builtin_ia32_vfmaddss3_mask:
   case X86::BI__builtin_ia32_vfmaddsd3_mask:
diff --git a/clang/test/CodeGen/X86/avx512vlbw-builtins.c 
b/clang/test/CodeGen/X86/avx512vlbw-builtins.c
index f6f27d9c3da3d..a088efa6784db 100644
--- a/clang/test/CodeGen/X86/avx512vlbw-builtins.c
+++ b/clang/test/CodeGen/X86/avx512vlbw-builtins.c
@@ -3226,6 +3226,18 @@ __m256i test_mm256_movm_epi8(__mmask32 __A) {
   return _mm256_movm_epi8(__A); 
 }
 
+__m512i test_mm512_movm_epi8(__mmask64 __A) {
+  // CIR-LABEL: _mm512_movm_epi8
+  // CIR: %{{.*}} = cir.cast bitcast %{{.*}} : !u64i -> 
!cir.vector<!cir.int<s, 1> x 64>
+  // CIR: %{{.*}} = cir.cast integral %{{.*}} : !cir.vector<!cir.int<s, 1> x 
64> -> !cir.vector<{{!s8i|!u8i}} x 64>
+
+  // LLVM-LABEL: @test_mm512_movm_epi8
+  // LLVM:  %{{.*}} = bitcast i64 %{{.*}} to <64 x i1>
+  // LLVM:  %{{.*}} = sext <64 x i1> %{{.*}} to <64 x i8>
+  return _mm512_movm_epi8(__A); 
+}
+
+
 __m128i test_mm_movm_epi16(__mmask8 __A) {
   // CHECK-LABEL: test_mm_movm_epi16
   // CHECK: %{{.*}} = bitcast i8 %{{.*}} to <8 x i1>

>From 392f69e188b558d2fee56cd3d50acb1f791599fe Mon Sep 17 00:00:00 2001
From: MarwanTarik <[email protected]>
Date: Fri, 12 Dec 2025 07:03:08 +0200
Subject: [PATCH 2/9] Refactor x86 masked compare functions to use
 CIRGenBuilderTy and improve error handling

---
 clang/lib/CIR/CodeGen/CIRGenBuiltinX86.cpp | 99 +++++++++++++---------
 1 file changed, 60 insertions(+), 39 deletions(-)

diff --git a/clang/lib/CIR/CodeGen/CIRGenBuiltinX86.cpp 
b/clang/lib/CIR/CodeGen/CIRGenBuiltinX86.cpp
index c284286f43cc2..5ecc79e8baf00 100644
--- a/clang/lib/CIR/CodeGen/CIRGenBuiltinX86.cpp
+++ b/clang/lib/CIR/CodeGen/CIRGenBuiltinX86.cpp
@@ -14,12 +14,18 @@
 #include "CIRGenBuilder.h"
 #include "CIRGenFunction.h"
 #include "CIRGenModule.h"
+#include "mlir/IR/Attributes.h"
+#include "mlir/IR/BuiltinAttributes.h"
 #include "mlir/IR/Location.h"
+#include "mlir/IR/Types.h"
 #include "mlir/IR/ValueRange.h"
 #include "clang/Basic/Builtins.h"
 #include "clang/Basic/TargetBuiltins.h"
+#include "clang/CIR/Dialect/IR/CIRAttrs.h"
 #include "clang/CIR/Dialect/IR/CIRTypes.h"
 #include "clang/CIR/MissingFeatures.h"
+#include "llvm/ADT/Sequence.h"
+#include <string>
 #include "llvm/Support/ErrorHandling.h"
 
 using namespace clang;
@@ -278,30 +284,36 @@ static mlir::Value emitX86MaskTest(CIRGenBuilderTy 
&builder, mlir::Location loc,
   return emitIntrinsicCallOp(builder, loc, intrinsicName, resTy,
                              mlir::ValueRange{lhsVec, rhsVec});
 }
-static mlir::Value emitX86MaskedCompareResult(CIRGenFunction &cgf,
+// TODO: The cgf parameter should be removed when all the NYI cases are
+// implemented.
+static std::optional<mlir::Value> emitX86MaskedCompareResult(CIRGenFunction 
&cgf, CIRGenBuilderTy &builder,
                                               mlir::Value cmp, unsigned 
numElts,
                                               mlir::Value maskIn,
                                               mlir::Location loc) {
   if (maskIn) {
-    llvm_unreachable("NYI");
+    cgf.cgm.errorNYI(loc, "emitX86MaskedCompareResult");
   }
   if (numElts < 8) {
-    int64_t indices[8];
+    llvm::SmallVector<mlir::Attribute> indices;
+    mlir::Type i64Ty = builder.getSInt64Ty();
+
     for (unsigned i = 0; i != numElts; ++i)
-      indices[i] = i;
+      indices.push_back(cir::IntAttr::get(i64Ty, i));
     for (unsigned i = numElts; i != 8; ++i)
-      indices[i] = i % numElts + numElts;
+      indices.push_back(cir::IntAttr::get(i64Ty, i % numElts + numElts));
 
     // This should shuffle between cmp (first vector) and null (second vector)
-    mlir::Value nullVec = cgf.getBuilder().getNullValue(cmp.getType(), loc);
-    cmp = cgf.getBuilder().createVecShuffle(loc, cmp, nullVec, indices);
+    mlir::Value nullVec = builder.getNullValue(cmp.getType(), loc);
+    cmp = builder.createVecShuffle(loc, cmp, nullVec, indices);
   }
-  return cgf.getBuilder().createBitcast(
-      cmp, cgf.getBuilder().getUIntNTy(std::max(numElts, 8U)));
+  return builder.createBitcast(
+      cmp, builder.getUIntNTy(std::max(numElts, 8U)));
 }
 
-static mlir::Value emitX86MaskedCompare(CIRGenFunction &cgf, unsigned cc,
-                                        bool isSigned,
+// TODO: The cgf parameter should be removed when all the NYI cases are
+// implemented.
+static std::optional<mlir::Value> emitX86MaskedCompare(CIRGenFunction &cgf, 
CIRGenBuilderTy &builder,
+                                        unsigned cc, bool isSigned,
                                         ArrayRef<mlir::Value> ops,
                                         mlir::Location loc) {
   assert((ops.size() == 2 || ops.size() == 4) &&
@@ -310,9 +322,9 @@ static mlir::Value emitX86MaskedCompare(CIRGenFunction 
&cgf, unsigned cc,
   mlir::Value cmp;
 
   if (cc == 3) {
-    llvm_unreachable("NYI");
+    cgf.cgm.errorNYI(loc, "emitX86MaskedCompare: cc == 3");
   } else if (cc == 7) {
-    llvm_unreachable("NYI");
+    cgf.cgm.errorNYI(loc, "emitX86MaskedCompare cc == 7");
   } else {
     cir::CmpOpKind pred;
     switch (cc) {
@@ -338,9 +350,8 @@ static mlir::Value emitX86MaskedCompare(CIRGenFunction 
&cgf, unsigned cc,
       break;
     }
 
-    auto resultTy = cgf.getBuilder().getType<cir::VectorType>(
-        cgf.getBuilder().getUIntNTy(1), numElts);
-    cmp = cir::VecCmpOp::create(cgf.getBuilder(), loc, resultTy, pred, ops[0],
+    auto resultTy = cir::VectorType::get(builder.getSIntNTy(1), numElts);
+    cmp = cir::VecCmpOp::create(builder, loc, resultTy, pred, ops[0],
                                 ops[1]);
   }
 
@@ -348,42 +359,51 @@ static mlir::Value emitX86MaskedCompare(CIRGenFunction 
&cgf, unsigned cc,
   if (ops.size() == 4)
     maskIn = ops[3];
 
-  return emitX86MaskedCompareResult(cgf, cmp, numElts, maskIn, loc);
+  return emitX86MaskedCompareResult(cgf, builder, cmp, numElts, maskIn, loc);
 }
 
-static mlir::Value emitX86ConvertToMask(CIRGenFunction &cgf, mlir::Value in,
-                                        mlir::Location loc) {
-  cir::ConstantOp zero = cgf.getBuilder().getNullValue(in.getType(), loc);
-  return emitX86MaskedCompare(cgf, 1, true, {in, zero}, loc);
+// TODO: The cgf parameter should be removed when all the NYI cases are
+// implemented.
+static std::optional<mlir::Value> emitX86ConvertToMask(CIRGenFunction &cgf, 
CIRGenBuilderTy &builder,
+                                        mlir::Value in, mlir::Location loc) {
+  cir::ConstantOp zero = builder.getNullValue(in.getType(), loc);
+  return emitX86MaskedCompare(cgf, builder, 1, true, {in, zero}, loc);
 }
 
 // Convert the mask from an integer type to a vector of i1.
-static mlir::Value getMaskVecValue(CIRGenFunction &cgf, mlir::Value mask,
+static mlir::Value getMaskVecValue(CIRGenBuilderTy &builder, CIRGenFunction 
&cgf,
+                                   mlir::Value mask,
                                    unsigned numElts, mlir::Location loc) {
   cir::VectorType maskTy =
-      cir::VectorType::get(cgf.getBuilder().getSIntNTy(1),
+      cir::VectorType::get(builder.getSIntNTy(1),
                            cast<cir::IntType>(mask.getType()).getWidth());
 
-  mlir::Value maskVec = cgf.getBuilder().createBitcast(mask, maskTy);
+  mlir::Value maskVec = builder.createBitcast(mask, maskTy);
 
   // If we have less than 8 elements, then the starting mask was an i8 and
   // we need to extract down to the right number of elements.
   if (numElts < 8) {
-    llvm::SmallVector<int64_t, 4> indices;
-    for (unsigned i = 0; i != numElts; ++i)
-      indices.push_back(i);
-    maskVec = cgf.getBuilder().createVecShuffle(loc, maskVec, maskVec, 
indices);
+    llvm::SmallVector<mlir::Attribute> indices;
+    mlir::Type i64Ty = builder.getSInt64Ty();
+
+    for (auto i : llvm::seq<unsigned>(0, numElts))
+      indices.push_back(cir::IntAttr::get(i64Ty, i));
+
+    maskVec = builder.createVecShuffle(loc, maskVec, maskVec, indices);
   }
 
   return maskVec;
 }
 
-static mlir::Value emitX86SExtMask(CIRGenFunction &cgf, mlir::Value op,
-                                   mlir::Type dstTy, mlir::Location loc) {
+// TODO: The cgf parameter should be removed when all the NYI cases are
+// implemented.
+static std::optional<mlir::Value> emitX86SExtMask(CIRGenFunction &cgf, 
CIRGenBuilderTy &builder,
+                                  mlir::Value op,
+                                  mlir::Type dstTy, mlir::Location loc) {
   unsigned numberOfElements = cast<cir::VectorType>(dstTy).getSize();
-  mlir::Value mask = getMaskVecValue(cgf, op, numberOfElements, loc);
+  mlir::Value mask = getMaskVecValue(builder, cgf, op, numberOfElements, loc);
 
-  return cgf.getBuilder().createCast(loc, cir::CastKind::integral, mask, 
dstTy);
+  return builder.createCast(loc, cir::CastKind::integral, mask, dstTy);
 }
 
 static mlir::Value emitVecInsert(CIRGenBuilderTy &builder, mlir::Location loc,
@@ -781,7 +801,9 @@ CIRGenFunction::emitX86BuiltinExpr(unsigned builtinID, 
const CallExpr *expr) {
   case X86::BI__builtin_ia32_storesh128_mask:
   case X86::BI__builtin_ia32_storess128_mask:
   case X86::BI__builtin_ia32_storesd128_mask:
-
+  cgm.errorNYI(expr->getSourceRange(),
+              std::string("unimplemented x86 builtin call: ") + 
+              getContext().BuiltinInfo.getName(builtinID));
   case X86::BI__builtin_ia32_cvtmask2b128:
   case X86::BI__builtin_ia32_cvtmask2b256:
   case X86::BI__builtin_ia32_cvtmask2b512:
@@ -794,8 +816,9 @@ CIRGenFunction::emitX86BuiltinExpr(unsigned builtinID, 
const CallExpr *expr) {
   case X86::BI__builtin_ia32_cvtmask2q128:
   case X86::BI__builtin_ia32_cvtmask2q256:
   case X86::BI__builtin_ia32_cvtmask2q512:
-    return emitX86SExtMask(*this, ops[0], convertType(expr->getType()),
-                           getLoc(expr->getExprLoc()));
+    return emitX86SExtMask(*this, this->getBuilder(), 
+                          ops[0], convertType(expr->getType()),
+                          getLoc(expr->getExprLoc())).value();
   case X86::BI__builtin_ia32_cvtb2mask128:
   case X86::BI__builtin_ia32_cvtb2mask256:
   case X86::BI__builtin_ia32_cvtb2mask512:
@@ -808,22 +831,20 @@ CIRGenFunction::emitX86BuiltinExpr(unsigned builtinID, 
const CallExpr *expr) {
   case X86::BI__builtin_ia32_cvtq2mask128:
   case X86::BI__builtin_ia32_cvtq2mask256:
   case X86::BI__builtin_ia32_cvtq2mask512:
-    return emitX86ConvertToMask(*this, ops[0], getLoc(expr->getExprLoc()));
+    return emitX86ConvertToMask(*this, this->getBuilder(),
+                               ops[0], getLoc(expr->getExprLoc())).value();
   case X86::BI__builtin_ia32_cvtdq2ps512_mask:
   case X86::BI__builtin_ia32_cvtqq2ps512_mask:
   case X86::BI__builtin_ia32_cvtqq2pd512_mask:
   case X86::BI__builtin_ia32_vcvtw2ph512_mask:
   case X86::BI__builtin_ia32_vcvtdq2ph512_mask:
   case X86::BI__builtin_ia32_vcvtqq2ph512_mask:
-    llvm_unreachable("vcvtw2ph256_round_mask NYI");
   case X86::BI__builtin_ia32_cvtudq2ps512_mask:
   case X86::BI__builtin_ia32_cvtuqq2ps512_mask:
   case X86::BI__builtin_ia32_cvtuqq2pd512_mask:
   case X86::BI__builtin_ia32_vcvtuw2ph512_mask:
   case X86::BI__builtin_ia32_vcvtudq2ph512_mask:
   case X86::BI__builtin_ia32_vcvtuqq2ph512_mask:
-    llvm_unreachable("vcvtuw2ph256_round_mask NYI");
-
   case X86::BI__builtin_ia32_vfmaddsh3_mask:
   case X86::BI__builtin_ia32_vfmaddss3_mask:
   case X86::BI__builtin_ia32_vfmaddsd3_mask:

>From 43250a6bde76f3b927c0766b8bc26ea30b56f8f5 Mon Sep 17 00:00:00 2001
From: MarwanTarik <[email protected]>
Date: Fri, 12 Dec 2025 09:33:04 +0200
Subject: [PATCH 3/9] [CIR] [X86] Fix x86 builtin expression handling by
 returning mlir::Value in error cases

---
 clang/lib/CIR/CodeGen/CIRGenBuiltinX86.cpp | 5 +++--
 1 file changed, 3 insertions(+), 2 deletions(-)

diff --git a/clang/lib/CIR/CodeGen/CIRGenBuiltinX86.cpp 
b/clang/lib/CIR/CodeGen/CIRGenBuiltinX86.cpp
index 5ecc79e8baf00..d066c4c18473d 100644
--- a/clang/lib/CIR/CodeGen/CIRGenBuiltinX86.cpp
+++ b/clang/lib/CIR/CodeGen/CIRGenBuiltinX86.cpp
@@ -804,6 +804,7 @@ CIRGenFunction::emitX86BuiltinExpr(unsigned builtinID, 
const CallExpr *expr) {
   cgm.errorNYI(expr->getSourceRange(),
               std::string("unimplemented x86 builtin call: ") + 
               getContext().BuiltinInfo.getName(builtinID));
+  return mlir::Value{};
   case X86::BI__builtin_ia32_cvtmask2b128:
   case X86::BI__builtin_ia32_cvtmask2b256:
   case X86::BI__builtin_ia32_cvtmask2b512:
@@ -818,7 +819,7 @@ CIRGenFunction::emitX86BuiltinExpr(unsigned builtinID, 
const CallExpr *expr) {
   case X86::BI__builtin_ia32_cvtmask2q512:
     return emitX86SExtMask(*this, this->getBuilder(), 
                           ops[0], convertType(expr->getType()),
-                          getLoc(expr->getExprLoc())).value();
+                          getLoc(expr->getExprLoc()));
   case X86::BI__builtin_ia32_cvtb2mask128:
   case X86::BI__builtin_ia32_cvtb2mask256:
   case X86::BI__builtin_ia32_cvtb2mask512:
@@ -832,7 +833,7 @@ CIRGenFunction::emitX86BuiltinExpr(unsigned builtinID, 
const CallExpr *expr) {
   case X86::BI__builtin_ia32_cvtq2mask256:
   case X86::BI__builtin_ia32_cvtq2mask512:
     return emitX86ConvertToMask(*this, this->getBuilder(),
-                               ops[0], getLoc(expr->getExprLoc())).value();
+                               ops[0], getLoc(expr->getExprLoc()));
   case X86::BI__builtin_ia32_cvtdq2ps512_mask:
   case X86::BI__builtin_ia32_cvtqq2ps512_mask:
   case X86::BI__builtin_ia32_cvtqq2pd512_mask:

>From d02fcae9772c8b958979c5e3f73cd5a492748071 Mon Sep 17 00:00:00 2001
From: MarwanTarik <[email protected]>
Date: Fri, 12 Dec 2025 09:33:23 +0200
Subject: [PATCH 4/9] [CodeGen][X86] Add test for _mm512_movm_epi32 intrinsic
 with CIR and LLVM checks

---
 clang/test/CodeGen/X86/avx512vldq-builtins.c | 11 +++++++++++
 1 file changed, 11 insertions(+)

diff --git a/clang/test/CodeGen/X86/avx512vldq-builtins.c 
b/clang/test/CodeGen/X86/avx512vldq-builtins.c
index 92d8e1aa0879a..7fc34ca06fe58 100644
--- a/clang/test/CodeGen/X86/avx512vldq-builtins.c
+++ b/clang/test/CodeGen/X86/avx512vldq-builtins.c
@@ -951,6 +951,17 @@ __m256i test_mm256_movm_epi32(__mmask8 __A) {
   return _mm256_movm_epi32(__A); 
 }
 
+__m512i test_mm512_movm_epi32(__mmask16 __A) {
+  // CIR-LABEL: _mm512_movm_epi32
+  // CIR: %{{.*}} = cir.cast bitcast %{{.*}} : !u16i -> 
!cir.vector<!cir.int<s, 1> x 16>
+  // CIR: %{{.*}} = cir.cast integral %{{.*}} : !cir.vector<!cir.int<s, 1> x 
16> -> !cir.vector<!s32i x 16>
+
+  // LLVM-LABEL: @test_mm512_movm_epi32
+  // LLVM: %{{.*}} = bitcast i16 %{{.*}} to <16 x i1>
+  // LLVM: %{{.*}} = sext <16 x i1> %{{.*}} to <16 x i32>
+  return _mm512_movm_epi32(__A); 
+}
+
 __m128i test_mm_movm_epi64(__mmask8 __A) {
   // CHECK-LABEL: test_mm_movm_epi64
   // CHECK: %{{.*}} = bitcast i8 %{{.*}} to <8 x i1>

>From 3518174f1be12dab784cdd4a67cc61728f479e67 Mon Sep 17 00:00:00 2001
From: MarwanTarik <[email protected]>
Date: Mon, 15 Dec 2025 20:24:22 +0200
Subject: [PATCH 5/9] Add tests for AVX512 mask and move  intrinsics in CIR

---
 .../CodeGenBuiltins/X86/avx512bw-builtins.c   |  39 +++++++
 .../CodeGenBuiltins/X86/avx512dq-builtins.c   |  35 ++++++
 .../CodeGenBuiltins/X86/avx512vlbw-builtins.c | 104 +++++++++++++++++
 .../CodeGenBuiltins/X86/avx512vldq-builtins.c | 110 ++++++++++++++++++
 4 files changed, 288 insertions(+)
 create mode 100644 clang/test/CIR/CodeGenBuiltins/X86/avx512vlbw-builtins.c
 create mode 100644 clang/test/CIR/CodeGenBuiltins/X86/avx512vldq-builtins.c

diff --git a/clang/test/CIR/CodeGenBuiltins/X86/avx512bw-builtins.c 
b/clang/test/CIR/CodeGenBuiltins/X86/avx512bw-builtins.c
index 439c89e7953ab..7b46aea0faf21 100644
--- a/clang/test/CIR/CodeGenBuiltins/X86/avx512bw-builtins.c
+++ b/clang/test/CIR/CodeGenBuiltins/X86/avx512bw-builtins.c
@@ -768,3 +768,42 @@ unsigned char test_ktestz_mask64_u8(__mmask64 A, __mmask64 
B) {
   return _ktestz_mask64_u8(A, B);
 }
 
+
+
+__m512i test_mm512_movm_epi16(__mmask32 __A) {
+  // CIR-LABEL: _mm512_movm_epi16
+  // CIR: %{{.*}} = cir.cast bitcast %{{.*}} : !u32i -> 
!cir.vector<!cir.int<s, 1> x 32>
+  // CIR: %{{.*}} = cir.cast integral %{{.*}} : !cir.vector<!cir.int<s, 1> x 
32> -> !cir.vector<!s16i x 32>
+  // LLVM-LABEL: @test_mm512_movm_epi16
+  // LLVM:  %{{.*}} = bitcast i32 %{{.*}} to <32 x i1>
+  // LLVM:  %{{.*}} = sext <32 x i1> %{{.*}} to <32 x i16>
+  return _mm512_movm_epi16(__A); 
+}
+
+__mmask64 test_mm512_movepi8_mask(__m512i __A) {
+  // CIR-LABEL: @_mm512_movepi8_mask
+  // CIR: %{{.*}} = cir.vec.cmp(lt, %{{.*}}, %{{.*}}) : 
!cir.vector<{{!s8i|!u8i}} x 64>, !cir.vector<!cir.int<u, 1> x 64>
+
+  // LLVM-LABEL: @test_mm512_movepi8_mask
+  // LLVM: [[CMP:%.*]] = icmp slt <64 x i8> %{{.*}}, zeroinitializer
+
+  // In the unsigned case below, the canonicalizer proves the comparison is
+  // always false (no i8 unsigned value can be < 0) and folds it away.
+  // LLVM-UNSIGNED-CHAR: store i64 0, ptr %{{.*}}, align 8
+  
+  // OGCG-LABEL: @test_mm512_movepi8_mask
+  // OGCG: [[CMP:%.*]] = icmp slt <64 x i8> %{{.*}}, zeroinitializer
+  return _mm512_movepi8_mask(__A); 
+}
+
+__mmask32 test_mm512_movepi16_mask(__m512i __A) {
+  // CIR-LABEL: @_mm512_movepi16_mask
+  // CIR: %{{.*}} = cir.vec.cmp(lt, %{{.*}}, %{{.*}}) : !cir.vector<!s16i x 
32>, !cir.vector<!cir.int<u, 1> x 32>
+
+  // LLVM-LABEL: @test_mm512_movepi16_mask
+  // LLVM: [[CMP:%.*]] = icmp slt <32 x i16> %{{.*}}, zeroinitializer
+
+  // OGCG-LABEL: @test_mm512_movepi16_mask
+  // OGCG: [[CMP:%.*]] = icmp slt <32 x i16> %{{.*}}, zeroinitializer
+  return _mm512_movepi16_mask(__A); 
+}
diff --git a/clang/test/CIR/CodeGenBuiltins/X86/avx512dq-builtins.c 
b/clang/test/CIR/CodeGenBuiltins/X86/avx512dq-builtins.c
index 3475e186e0c8f..66349ba4939fa 100644
--- a/clang/test/CIR/CodeGenBuiltins/X86/avx512dq-builtins.c
+++ b/clang/test/CIR/CodeGenBuiltins/X86/avx512dq-builtins.c
@@ -13,6 +13,17 @@
 
 #include <immintrin.h>
 
+__m512i test_mm512_movm_epi64(__mmask8 __A) {
+  // CIR-LABEL: _mm512_movm_epi64
+  // CIR: %{{.*}} = cir.cast bitcast %{{.*}} : !u8i -> !cir.vector<!cir.int<s, 
1> x 8>
+  // CIR: %{{.*}} = cir.cast integral %{{.*}} : !cir.vector<!cir.int<s, 1> x 
8> -> !cir.vector<!s64i x 8>
+  // LLVM-LABEL: @test_mm512_movm_epi64
+  // LLVM: %{{.*}} = bitcast i8 %{{.*}} to <8 x i1>
+  // LLVM: %{{.*}} = sext <8 x i1> %{{.*}} to <8 x i64>
+  return _mm512_movm_epi64(__A); 
+}
+
+
 __mmask8 test_kadd_mask8(__mmask8 A, __mmask8 B) {
  // CIR-LABEL: _kadd_mask8
  // CIR: cir.cast bitcast {{.*}} : !u8i -> !cir.vector<8 x !cir.int<u, 1>>
@@ -323,3 +334,27 @@ unsigned char test_ktestz_mask16_u8(__mmask16 A, __mmask16 
B) {
   // OGCG: trunc i32 %[[RES]] to i8
   return _ktestz_mask16_u8(A, B);
 }
+
+__mmask16 test_mm512_movepi32_mask(__m512i __A) {
+  // CIR-LABEL: _mm512_movepi32_mask
+  // CIR: %{{.*}} = cir.vec.cmp(lt, %{{.*}}, %{{.*}}) : !cir.vector<!s32i x 
16>, !cir.vector<!cir.int<u, 1> x 16>
+
+  // LLVM-LABEL: @test_mm512_movepi32_mask
+  // LLVM: [[CMP:%.*]] = icmp slt <16 x i32> %{{.*}}, zeroinitializer
+
+  // OGCG-LABEL: @test_mm512_movepi32_mask
+  // OGCG: [[CMP:%.*]] = icmp slt <16 x i32> %{{.*}}, zeroinitializer
+  return _mm512_movepi32_mask(__A); 
+}
+
+__mmask8 test_mm512_movepi64_mask(__m512i __A) {
+  // CIR-LABEL: @_mm512_movepi64_mask
+  // CIR: %{{.*}} = cir.vec.cmp(lt, %{{.*}}, %{{.*}}) : !cir.vector<!s64i x 
8>, !cir.vector<!cir.int<u, 1> x 8>
+
+  // LLVM-LABEL: @test_mm512_movepi64_mask
+  // LLVM: [[CMP:%.*]] = icmp slt <8 x i64> %{{.*}}, zeroinitializer
+
+  // OGCG-LABEL: @test_mm512_movepi64_mask
+  // OGCG: [[CMP:%.*]] = icmp slt <8 x i64> %{{.*}}, zeroinitializer
+  return _mm512_movepi64_mask(__A); 
+}
\ No newline at end of file
diff --git a/clang/test/CIR/CodeGenBuiltins/X86/avx512vlbw-builtins.c 
b/clang/test/CIR/CodeGenBuiltins/X86/avx512vlbw-builtins.c
new file mode 100644
index 0000000000000..ed67a859230fb
--- /dev/null
+++ b/clang/test/CIR/CodeGenBuiltins/X86/avx512vlbw-builtins.c
@@ -0,0 +1,104 @@
+// RUN: %clang_cc1 -flax-vector-conversions=none -ffreestanding %s 
-triple=x86_64-unknown-linux -target-feature +avx512bw -target-feature 
+avx512vl -fclangir -emit-cir -o %t.cir -Wall -Werror -Wsign-conversion 
+// RUN: FileCheck --check-prefix=CIR --input-file=%t.cir %s
+// RUN: %clang_cc1 -flax-vector-conversions=none -ffreestanding %s 
-triple=x86_64-unknown-linux -target-feature +avx512bw -target-feature 
+avx512vl -fclangir -emit-llvm -o %t.ll -Wall -Werror -Wsign-conversion
+// RUN: FileCheck --check-prefixes=LLVM --input-file=%t.ll %s
+
+// RUN: %clang_cc1 -flax-vector-conversions=none -ffreestanding %s 
-triple=x86_64-unknown-linux -target-feature +avx512bw -target-feature 
+avx512vl -fno-signed-char -fclangir -emit-cir -o %t.cir -Wall -Werror 
-Wsign-conversion 
+// RUN: FileCheck --check-prefix=CIR --input-file=%t.cir %s
+// RUN: %clang_cc1 -flax-vector-conversions=none -ffreestanding %s 
-triple=x86_64-unknown-linux -target-feature +avx512bw -target-feature 
+avx512vl -fno-signed-char -fclangir -emit-llvm -o %t.ll -Wall -Werror 
-Wsign-conversion
+// RUN: FileCheck --check-prefixes=LLVM-UNSIGNED-CHAR --input-file=%t.ll %s
+
+// RUN: %clang_cc1 -flax-vector-conversions=none -ffreestanding %s 
-triple=x86_64-unknown-linux -target-feature +avx10.1-512 -target-feature 
+avx512vl -fclangir -emit-cir -o %t.cir -Wall -Werror -Wsign-conversion 
+// RUN: FileCheck --check-prefix=CIR --input-file=%t.cir %s
+// RUN: %clang_cc1 -flax-vector-conversions=none -ffreestanding %s 
-triple=x86_64-unknown-linux -target-feature +avx10.1-512 -target-feature 
+avx512vl -fclangir -emit-llvm -o %t.ll -Wall -Werror -Wsign-conversion
+// RUN: FileCheck --check-prefixes=LLVM --input-file=%t.ll %s
+
+// RUN: %clang_cc1 -flax-vector-conversions=none -ffreestanding %s 
-triple=x86_64-apple-darwin -target-feature +avx512bw -target-feature +avx512vl 
-emit-llvm -o - -Wall -Werror -Wsign-conversion | FileCheck %s 
--check-prefixes=OGCG
+// RUN: %clang_cc1 -flax-vector-conversions=none -ffreestanding %s 
-triple=x86_64-apple-darwin -target-feature +avx512bw -target-feature +avx512vl 
-fno-signed-char -emit-llvm -o - -Wall -Werror -Wsign-conversion | FileCheck %s 
--check-prefixes=OGCG
+// RUN: %clang_cc1 -flax-vector-conversions=none -ffreestanding %s 
-triple=x86_64-apple-darwin -target-feature +avx10.1-512 -emit-llvm -o - -Wall 
-Werror -Wsign-conversion | FileCheck %s --check-prefixes=OGCG
+
+
+#include <immintrin.h>
+
+__m128i test_mm_movm_epi8(__mmask16 __A) {
+  // CIR-LABEL: _mm_movm_epi8
+  // CIR: %{{.*}} = cir.cast bitcast %{{.*}} : !u16i -> 
!cir.vector<!cir.int<s, 1> x 16>
+  // CIR: %{{.*}} = cir.cast integral %{{.*}} : !cir.vector<!cir.int<s, 1> x 
16> -> !cir.vector<{{!s8i|!u8i}} x 16>
+
+  // LLVM-LABEL: @test_mm_movm_epi8
+  // LLVM: %{{.*}} = bitcast i16 %{{.*}} to <16 x i1>
+  // LLVM: %{{.*}} = sext <16 x i1> %{{.*}} to <16 x i8>
+  return _mm_movm_epi8(__A); 
+}
+
+__m256i test_mm256_movm_epi8(__mmask32 __A) {
+  // CIR-LABEL: _mm256_movm_epi8
+  // CIR: %{{.*}} = cir.cast bitcast %{{.*}} : !u32i -> 
!cir.vector<!cir.int<s, 1> x 32>
+  // CIR: %{{.*}} = cir.cast integral %{{.*}} : !cir.vector<!cir.int<s, 1> x 
32> -> !cir.vector<{{!s8i|!u8i}} x 32>
+
+  // LLVM-LABEL: @test_mm256_movm_epi8
+  // LLVM: %{{.*}} = bitcast i32 %{{.*}} to <32 x i1>
+  // LLVM: %{{.*}} = sext <32 x i1> %{{.*}} to <32 x i8>
+  return _mm256_movm_epi8(__A); 
+}
+
+__m512i test_mm512_movm_epi8(__mmask64 __A) {
+  // CIR-LABEL: _mm512_movm_epi8
+  // CIR: %{{.*}} = cir.cast bitcast %{{.*}} : !u64i -> 
!cir.vector<!cir.int<s, 1> x 64>
+  // CIR: %{{.*}} = cir.cast integral %{{.*}} : !cir.vector<!cir.int<s, 1> x 
64> -> !cir.vector<{{!s8i|!u8i}} x 64>
+
+  // LLVM-LABEL: @test_mm512_movm_epi8
+  // LLVM:  %{{.*}} = bitcast i64 %{{.*}} to <64 x i1>
+  // LLVM:  %{{.*}} = sext <64 x i1> %{{.*}} to <64 x i8>
+  return _mm512_movm_epi8(__A); 
+}
+
+__m128i test_mm_movm_epi16(__mmask8 __A) {
+  // CIR-LABEL: _mm_movm_epi16
+  // CIR: %{{.*}} = cir.cast bitcast %{{.*}} : !u8i -> !cir.vector<!cir.int<s, 
1> x 8>
+  // CIR: %{{.*}} = cir.cast integral %{{.*}} : !cir.vector<!cir.int<s, 1> x 
8> -> !cir.vector<!s16i x 8>
+
+  // LLVM-LABEL: @test_mm_movm_epi16
+  // LLVM: %{{.*}} = bitcast i8 %{{.*}} to <8 x i1>
+  // LLVM: %{{.*}} = sext <8 x i1> %{{.*}} to <8 x i16>
+  return _mm_movm_epi16(__A); 
+}
+
+__m256i test_mm256_movm_epi16(__mmask16 __A) {
+  // CIR-LABEL: _mm256_movm_epi16
+  // CIR: %{{.*}} = cir.cast bitcast %{{.*}} : !u16i -> 
!cir.vector<!cir.int<s, 1> x 16>
+  // CIR: %{{.*}} = cir.cast integral %{{.*}} : !cir.vector<!cir.int<s, 1> x 
16> -> !cir.vector<!s16i x 16>
+
+  // LLVM-LABEL: @test_mm256_movm_epi16
+  // LLVM: %{{.*}} = bitcast i16 %{{.*}} to <16 x i1>
+  // LLVM: %{{.*}} = sext <16 x i1> %{{.*}} to <16 x i16>
+  return _mm256_movm_epi16(__A); 
+}
+
+__mmask16 test_mm_movepi8_mask(__m128i __A) {
+  // CIR-LABEL: _mm_movepi8_mask
+  // CIR: %{{.*}} = cir.vec.cmp(lt, %{{.*}}, %{{.*}}) : 
!cir.vector<{{!s8i|!u8i}} x 16>, !cir.vector<!cir.int<u, 1> x 16>
+
+  // LLVM-LABEL: @test_mm_movepi8_mask
+  // LLVM: [[CMP:%.*]] = icmp slt <16 x i8> %{{.*}}, zeroinitializer
+
+  // In the unsigned case below, the canonicalizer proves the comparison is
+  // always false (no i8 unsigned value can be < 0) and folds it away.
+  // LLVM-UNSIGNED-CHAR: store i16 0, ptr %{{.*}}, align 2
+
+  // OGCG-LABEL: @test_mm_movepi8_mask
+  // OGCG: [[CMP:%.*]] = icmp slt <16 x i8> %{{.*}}, zeroinitializer
+  return _mm_movepi8_mask(__A); 
+}
+
+__mmask16 test_mm256_movepi16_mask(__m256i __A) {
+  // CIR-LABEL: _mm256_movepi16_mask
+  // CIR: %{{.*}} = cir.vec.cmp(lt, %{{.*}}, %{{.*}}) : !cir.vector<!s16i x 
16>, !cir.vector<!cir.int<u, 1> x 16>
+
+  // LLVM-LABEL: @test_mm256_movepi16_mask
+  // LLVM: [[CMP:%.*]] = icmp slt <16 x i16> %{{.*}}, zeroinitializer
+
+  // OGCG-LABEL: @test_mm256_movepi16_mask
+  // OGCG: [[CMP:%.*]] = icmp slt <16 x i16> %{{.*}}, zeroinitializer
+  return _mm256_movepi16_mask(__A); 
+}
\ No newline at end of file
diff --git a/clang/test/CIR/CodeGenBuiltins/X86/avx512vldq-builtins.c 
b/clang/test/CIR/CodeGenBuiltins/X86/avx512vldq-builtins.c
new file mode 100644
index 0000000000000..ef9e9d7f9fde7
--- /dev/null
+++ b/clang/test/CIR/CodeGenBuiltins/X86/avx512vldq-builtins.c
@@ -0,0 +1,110 @@
+// RUN: %clang_cc1 -flax-vector-conversions=none -ffreestanding %s 
-triple=x86_64-unknown-linux -target-feature +avx512dq -target-feature 
+avx512vl -fclangir -emit-cir -o %t.cir -Wall -Werror 
+// RUN: FileCheck --check-prefix=CIR --input-file=%t.cir %s
+// RUN: %clang_cc1 -flax-vector-conversions=none -ffreestanding %s 
-triple=x86_64-unknown-linux -target-feature +avx512dq -target-feature 
+avx512vl -fclangir -emit-llvm -o %t.ll -Wall -Werror
+// RUN: FileCheck --check-prefixes=LLVM --input-file=%t.ll %s
+// RUN: %clang_cc1 -flax-vector-conversions=none -ffreestanding %s 
-triple=x86_64-apple-darwin -target-feature +avx512dq -target-feature +avx512vl 
-emit-llvm -o - -Wall -Werror | FileCheck %s --check-prefixes=OGCG
+
+#include <immintrin.h>
+
+
+__m128i test_mm_movm_epi32(__mmask8 __A) {
+  // CIR-LABEL: _mm_movm_epi32
+  // CIR: %{{.*}} = cir.cast bitcast %{{.*}} : !u8i -> !cir.vector<!cir.int<s, 
1> x 8>
+  // CIR: %{{.*}} = cir.vec.shuffle(%{{.*}}, %{{.*}} : !cir.vector<!cir.int<s, 
1> x 8>) [#cir.int<0> : !s32i, #cir.int<1> : !s32i, #cir.int<2> : !s32i, 
#cir.int<3> : !s32i] : !cir.vector<!cir.int<s, 1> x 4>
+  // CIR: %{{.*}} = cir.cast integral %{{.*}} : !cir.vector<!cir.int<s, 1> x 
4> -> !cir.vector<!s32i x 4>
+
+  // LLVM-LABEL: @test_mm_movm_epi32
+  // LLVM: %{{.*}} = bitcast i8 %{{.*}} to <8 x i1>
+  // LLVM: %{{.*}} = shufflevector <8 x i1> %{{.*}}, <8 x i1> %{{.*}}, <4 x 
i32> <i32 0, i32 1, i32 2, i32 3>
+  // LLVM: %{{.*}} = sext <4 x i1> %{{.*}} to <4 x i32>
+  return _mm_movm_epi32(__A); 
+}
+
+__m256i test_mm256_movm_epi32(__mmask8 __A) {
+  // CIR-LABEL: _mm256_movm_epi32
+  // CIR: %{{.*}} = cir.cast bitcast %{{.*}} : !u8i -> !cir.vector<!cir.int<s, 
1> x 8>
+  // CIR: %{{.*}} = cir.cast integral %{{.*}} : !cir.vector<!cir.int<s, 1> x 
8> -> !cir.vector<!s32i x 8>
+
+  // LLVM-LABEL: @test_mm256_movm_epi32
+  // LLVM: %{{.*}} = bitcast i8 %{{.*}} to <8 x i1>
+  // LLVM: %{{.*}} = sext <8 x i1> %{{.*}} to <8 x i32>
+  return _mm256_movm_epi32(__A); 
+}
+
+__m512i test_mm512_movm_epi32(__mmask16 __A) {
+  // CIR-LABEL: _mm512_movm_epi32
+  // CIR: %{{.*}} = cir.cast bitcast %{{.*}} : !u16i -> 
!cir.vector<!cir.int<s, 1> x 16>
+  // CIR: %{{.*}} = cir.cast integral %{{.*}} : !cir.vector<!cir.int<s, 1> x 
16> -> !cir.vector<!s32i x 16>
+
+  // LLVM-LABEL: @test_mm512_movm_epi32
+  // LLVM: %{{.*}} = bitcast i16 %{{.*}} to <16 x i1>
+  // LLVM: %{{.*}} = sext <16 x i1> %{{.*}} to <16 x i32>
+  return _mm512_movm_epi32(__A); 
+}
+
+__m128i test_mm_movm_epi64(__mmask8 __A) {
+  // CIR-LABEL: _mm_movm_epi64
+  // CIR: %{{.*}} = cir.cast bitcast %{{.*}} : !u8i -> !cir.vector<!cir.int<s, 
1> x 8>
+  // CIR: %{{.*}} = cir.vec.shuffle(%{{.*}}, %{{.*}} : !cir.vector<!cir.int<s, 
1> x 8>) [#cir.int<0> : !s32i, #cir.int<1> : !s32i] : !cir.vector<!cir.int<s, 
1> x 2>
+  // CIR: %{{.*}} = cir.cast integral %{{.*}} : !cir.vector<!cir.int<s, 1> x 
2> -> !cir.vector<!s64i x 2>
+
+  // LLVM-LABEL: @test_mm_movm_epi64
+  // LLVM: %{{.*}} = bitcast i8 %{{.*}} to <8 x i1>
+  // LLVM: %{{.*}} = shufflevector <8 x i1> %{{.*}}, <8 x i1> %{{.*}}, <2 x 
i32> <i32 0, i32 1>
+  // LLVM: %{{.*}} = sext <2 x i1> %{{.*}} to <2 x i64>
+  return _mm_movm_epi64(__A); 
+}
+
+__m256i test_mm256_movm_epi64(__mmask8 __A) {
+  // CIR-LABEL: _mm256_movm_epi64
+  // CIR: %{{.*}} = cir.cast bitcast %{{.*}} : !u8i -> !cir.vector<!cir.int<s, 
1> x 8>
+  // CIR: %{{.*}} = cir.vec.shuffle(%{{.*}}, %{{.*}} : !cir.vector<!cir.int<s, 
1> x 8>) [#cir.int<0> : !s32i, #cir.int<1> : !s32i, #cir.int<2> : !s32i, 
#cir.int<3> : !s32i] : !cir.vector<!cir.int<s, 1> x 4>
+  // CIR: %{{.*}} = cir.cast integral %{{.*}} : !cir.vector<!cir.int<s, 1> x 
4> -> !cir.vector<!s64i x 4>
+
+  // LLVM-LABEL: @test_mm256_movm_epi64
+  // LLVM: %{{.*}} = bitcast i8 %{{.*}} to <8 x i1>
+  // LLVM: %{{.*}} = shufflevector <8 x i1> %{{.*}}, <8 x i1> %{{.*}}, <4 x 
i32> <i32 0, i32 1, i32 2, i32 3>
+  // LLVM: %{{.*}} = sext <4 x i1> %{{.*}} to <4 x i64>
+  return _mm256_movm_epi64(__A); 
+}
+
+__mmask8 test_mm256_movepi32_mask(__m256i __A) {
+  // LLVM-LABEL: @test_mm256_movepi32_mask
+  // LLVM: [[CMP:%.*]] = icmp slt <8 x i32> %{{.*}}, zeroinitializer
+
+  // OGCG-LABEL: @test_mm256_movepi32_mask
+  // OGCG: [[CMP:%.*]] = icmp slt <8 x i32> %{{.*}}, zeroinitializer
+  return _mm256_movepi32_mask(__A); 
+}
+
+__mmask8 test_mm_movepi64_mask(__m128i __A) {
+  // CIR-LABEL: _mm_movepi64_mask
+  // CIR: %{{.*}} = cir.vec.cmp(lt, %{{.*}}, %{{.*}}) : !cir.vector<!s64i x 
2>, !cir.vector<!cir.int<u, 1> x 2>
+  // CIR: %{{.*}} = cir.vec.shuffle(%{{.*}}, %{{.*}} : !cir.vector<!cir.int<u, 
1> x 2>) [#cir.int<0> : !s32i, #cir.int<1> : !s32i, #cir.int<2> : !s32i, 
#cir.int<3> : !s32i, #cir.int<2> : !s32i, #cir.int<3> : !s32i, #cir.int<2> : 
!s32i, #cir.int<3> : !s32i] : !cir.vector<!cir.int<u, 1> x 8>
+  // CIR: %{{.*}} = cir.cast bitcast %{{.*}} : !cir.vector<!cir.int<u, 1> x 8> 
-> !u8i
+
+  // LLVM-LABEL: @test_mm_movepi64_mask
+  // LLVM: [[CMP:%.*]] = icmp slt <2 x i64> %{{.*}}, zeroinitializer
+  // LLVM: [[SHUF:%.*]] = shufflevector <2 x i1> [[CMP]], <2 x i1> 
zeroinitializer, <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 2, i32 3, i32 2, 
i32 3>
+
+  // OGCG-LABEL: @test_mm_movepi64_mask
+  // OGCG: [[CMP:%.*]] = icmp slt <2 x i64> %{{.*}}, zeroinitializer
+  // OGCG: [[SHUF:%.*]] = shufflevector <2 x i1> [[CMP]], <2 x i1> 
zeroinitializer, <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 2, i32 3, i32 2, 
i32 3>
+  return _mm_movepi64_mask(__A); 
+}
+
+__mmask8 test_mm256_movepi64_mask(__m256i __A) {
+  // CIR-LABEL: _mm256_movepi64_mask
+  // CIR: %{{.*}} = cir.vec.cmp(lt, %{{.*}}, %{{.*}}) : !cir.vector<!s64i x 
4>, !cir.vector<!cir.int<u, 1> x 4>
+  // CIR: %{{.*}} = cir.vec.shuffle(%{{.*}}, %{{.*}} : !cir.vector<!cir.int<u, 
1> x 4>) [#cir.int<0> : !s32i, #cir.int<1> : !s32i, #cir.int<2> : !s32i, 
#cir.int<3> : !s32i, #cir.int<4> : !s32i, #cir.int<5> : !s32i, #cir.int<6> : 
!s32i, #cir.int<7> : !s32i] : !cir.vector<!cir.int<u, 1> x 8>
+
+  // LLVM-LABEL: @test_mm256_movepi64_mask
+  // LLVM: [[CMP:%.*]] = icmp slt <4 x i64> %{{.*}}, zeroinitializer
+  // LLVM: [[SHUF:%.*]] = shufflevector <4 x i1> [[CMP]], <4 x i1> 
zeroinitializer, <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, 
i32 7>
+
+  // OGCG-LABEL: @test_mm256_movepi64_mask
+  // OGCG: [[CMP:%.*]] = icmp slt <4 x i64> %{{.*}}, zeroinitializer
+  // OGCG: [[SHUF:%.*]] = shufflevector <4 x i1> [[CMP]], <4 x i1> 
zeroinitializer, <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, 
i32 7>
+  return _mm256_movepi64_mask(__A); 
+}
+

>From 9ec96625ca84ea7951725fc29e33ccf3b12c0b89 Mon Sep 17 00:00:00 2001
From: MarwanTarik <[email protected]>
Date: Mon, 15 Dec 2025 20:24:41 +0200
Subject: [PATCH 6/9] [CIR] [X86] Improve error handling in
 emitX86MaskedCompareResult and emitX86MaskedCompare functions

---
 clang/lib/CIR/CodeGen/CIRGenBuiltinX86.cpp | 3 +++
 1 file changed, 3 insertions(+)

diff --git a/clang/lib/CIR/CodeGen/CIRGenBuiltinX86.cpp 
b/clang/lib/CIR/CodeGen/CIRGenBuiltinX86.cpp
index d066c4c18473d..6522a3d842f4d 100644
--- a/clang/lib/CIR/CodeGen/CIRGenBuiltinX86.cpp
+++ b/clang/lib/CIR/CodeGen/CIRGenBuiltinX86.cpp
@@ -292,6 +292,7 @@ static std::optional<mlir::Value> 
emitX86MaskedCompareResult(CIRGenFunction &cgf
                                               mlir::Location loc) {
   if (maskIn) {
     cgf.cgm.errorNYI(loc, "emitX86MaskedCompareResult");
+    return {};
   }
   if (numElts < 8) {
     llvm::SmallVector<mlir::Attribute> indices;
@@ -323,8 +324,10 @@ static std::optional<mlir::Value> 
emitX86MaskedCompare(CIRGenFunction &cgf, CIRG
 
   if (cc == 3) {
     cgf.cgm.errorNYI(loc, "emitX86MaskedCompare: cc == 3");
+    return {};
   } else if (cc == 7) {
     cgf.cgm.errorNYI(loc, "emitX86MaskedCompare cc == 7");
+    return {};
   } else {
     cir::CmpOpKind pred;
     switch (cc) {

>From 7d3726e8f73e9f30cb67e24906b308bce8097032 Mon Sep 17 00:00:00 2001
From: MarwanTarik <[email protected]>
Date: Tue, 16 Dec 2025 07:42:20 +0200
Subject: [PATCH 7/9] Remove unused 'cgf' parameter from emitX86SExtMask
 function

---
 clang/lib/CIR/CodeGen/CIRGenBuiltinX86.cpp | 42 ++++------------------
 1 file changed, 7 insertions(+), 35 deletions(-)

diff --git a/clang/lib/CIR/CodeGen/CIRGenBuiltinX86.cpp 
b/clang/lib/CIR/CodeGen/CIRGenBuiltinX86.cpp
index 78d23de373e5e..cc581f1d1e02a 100644
--- a/clang/lib/CIR/CodeGen/CIRGenBuiltinX86.cpp
+++ b/clang/lib/CIR/CodeGen/CIRGenBuiltinX86.cpp
@@ -284,8 +284,8 @@ static mlir::Value emitX86MaskTest(CIRGenBuilderTy 
&builder, mlir::Location loc,
   return emitIntrinsicCallOp(builder, loc, intrinsicName, resTy,
                              mlir::ValueRange{lhsVec, rhsVec});
 }
-// TODO: The cgf parameter should be removed when all the NYI cases are
-// implemented.
+
+// TODO: The cgf parameter should be removed when all the NYI cases are 
implemented.
 static std::optional<mlir::Value> emitX86MaskedCompareResult(CIRGenFunction 
&cgf, CIRGenBuilderTy &builder,
                                               mlir::Value cmp, unsigned 
numElts,
                                               mlir::Value maskIn,
@@ -311,8 +311,7 @@ static std::optional<mlir::Value> 
emitX86MaskedCompareResult(CIRGenFunction &cgf
       cmp, builder.getUIntNTy(std::max(numElts, 8U)));
 }
 
-// TODO: The cgf parameter should be removed when all the NYI cases are
-// implemented.
+// TODO: The cgf parameter should be removed when all the NYI cases are 
implemented.
 static std::optional<mlir::Value> emitX86MaskedCompare(CIRGenFunction &cgf, 
CIRGenBuilderTy &builder,
                                         unsigned cc, bool isSigned,
                                         ArrayRef<mlir::Value> ops,
@@ -365,46 +364,19 @@ static std::optional<mlir::Value> 
emitX86MaskedCompare(CIRGenFunction &cgf, CIRG
   return emitX86MaskedCompareResult(cgf, builder, cmp, numElts, maskIn, loc);
 }
 
-// TODO: The cgf parameter should be removed when all the NYI cases are
-// implemented.
+// TODO: The cgf parameter should be removed when all the NYI cases are 
implemented.
 static std::optional<mlir::Value> emitX86ConvertToMask(CIRGenFunction &cgf, 
CIRGenBuilderTy &builder,
                                         mlir::Value in, mlir::Location loc) {
   cir::ConstantOp zero = builder.getNullValue(in.getType(), loc);
   return emitX86MaskedCompare(cgf, builder, 1, true, {in, zero}, loc);
 }
 
-// Convert the mask from an integer type to a vector of i1.
-static mlir::Value getMaskVecValue(CIRGenBuilderTy &builder, CIRGenFunction 
&cgf,
-                                   mlir::Value mask,
-                                   unsigned numElts, mlir::Location loc) {
-  cir::VectorType maskTy =
-      cir::VectorType::get(builder.getSIntNTy(1),
-                           cast<cir::IntType>(mask.getType()).getWidth());
-
-  mlir::Value maskVec = builder.createBitcast(mask, maskTy);
-
-  // If we have less than 8 elements, then the starting mask was an i8 and
-  // we need to extract down to the right number of elements.
-  if (numElts < 8) {
-    llvm::SmallVector<mlir::Attribute> indices;
-    mlir::Type i64Ty = builder.getSInt64Ty();
-
-    for (auto i : llvm::seq<unsigned>(0, numElts))
-      indices.push_back(cir::IntAttr::get(i64Ty, i));
-
-    maskVec = builder.createVecShuffle(loc, maskVec, maskVec, indices);
-  }
-
-  return maskVec;
-}
 
-// TODO: The cgf parameter should be removed when all the NYI cases are
-// implemented.
-static std::optional<mlir::Value> emitX86SExtMask(CIRGenFunction &cgf, 
CIRGenBuilderTy &builder,
+static std::optional<mlir::Value> emitX86SExtMask(CIRGenBuilderTy &builder,
                                   mlir::Value op,
                                   mlir::Type dstTy, mlir::Location loc) {
   unsigned numberOfElements = cast<cir::VectorType>(dstTy).getSize();
-  mlir::Value mask = getMaskVecValue(builder, cgf, op, numberOfElements, loc);
+  mlir::Value mask = getMaskVecValue(builder, loc, op, numberOfElements);
 
   return builder.createCast(loc, cir::CastKind::integral, mask, dstTy);
 }
@@ -820,7 +792,7 @@ CIRGenFunction::emitX86BuiltinExpr(unsigned builtinID, 
const CallExpr *expr) {
   case X86::BI__builtin_ia32_cvtmask2q128:
   case X86::BI__builtin_ia32_cvtmask2q256:
   case X86::BI__builtin_ia32_cvtmask2q512:
-    return emitX86SExtMask(*this, this->getBuilder(), 
+    return emitX86SExtMask(this->getBuilder(), 
                           ops[0], convertType(expr->getType()),
                           getLoc(expr->getExprLoc()));
   case X86::BI__builtin_ia32_cvtb2mask128:

>From 3af8d9ae4c7a02a3e81462e737249ccd4209f68c Mon Sep 17 00:00:00 2001
From: MarwanTarik <[email protected]>
Date: Tue, 16 Dec 2025 07:43:47 +0200
Subject: [PATCH 8/9] Add OGCG labels for movm_epi functions in AVX512 builtins
 tests

---
 .../CodeGenBuiltins/X86/avx512bw-builtins.c   |  5 ++++
 .../CodeGenBuiltins/X86/avx512dq-builtins.c   |  5 ++++
 .../CodeGenBuiltins/X86/avx512vlbw-builtins.c | 20 ++++++++++++++++
 .../CodeGenBuiltins/X86/avx512vldq-builtins.c | 23 +++++++++++++++++++
 4 files changed, 53 insertions(+)

diff --git a/clang/test/CIR/CodeGenBuiltins/X86/avx512bw-builtins.c 
b/clang/test/CIR/CodeGenBuiltins/X86/avx512bw-builtins.c
index 7b46aea0faf21..24ac203fa9464 100644
--- a/clang/test/CIR/CodeGenBuiltins/X86/avx512bw-builtins.c
+++ b/clang/test/CIR/CodeGenBuiltins/X86/avx512bw-builtins.c
@@ -774,9 +774,14 @@ __m512i test_mm512_movm_epi16(__mmask32 __A) {
   // CIR-LABEL: _mm512_movm_epi16
   // CIR: %{{.*}} = cir.cast bitcast %{{.*}} : !u32i -> 
!cir.vector<!cir.int<s, 1> x 32>
   // CIR: %{{.*}} = cir.cast integral %{{.*}} : !cir.vector<!cir.int<s, 1> x 
32> -> !cir.vector<!s16i x 32>
+  
   // LLVM-LABEL: @test_mm512_movm_epi16
   // LLVM:  %{{.*}} = bitcast i32 %{{.*}} to <32 x i1>
   // LLVM:  %{{.*}} = sext <32 x i1> %{{.*}} to <32 x i16>
+
+  // OGCG-LABEL: @test_mm512_movm_epi16
+  // OGCG:  %{{.*}} = bitcast i32 %{{.*}} to <32 x i1>
+  // OGCG:  %{{.*}} = sext <32 x i1> %{{.*}} to <32 x i16>
   return _mm512_movm_epi16(__A); 
 }
 
diff --git a/clang/test/CIR/CodeGenBuiltins/X86/avx512dq-builtins.c 
b/clang/test/CIR/CodeGenBuiltins/X86/avx512dq-builtins.c
index 66349ba4939fa..974d6d6668a95 100644
--- a/clang/test/CIR/CodeGenBuiltins/X86/avx512dq-builtins.c
+++ b/clang/test/CIR/CodeGenBuiltins/X86/avx512dq-builtins.c
@@ -17,9 +17,14 @@ __m512i test_mm512_movm_epi64(__mmask8 __A) {
   // CIR-LABEL: _mm512_movm_epi64
   // CIR: %{{.*}} = cir.cast bitcast %{{.*}} : !u8i -> !cir.vector<!cir.int<s, 
1> x 8>
   // CIR: %{{.*}} = cir.cast integral %{{.*}} : !cir.vector<!cir.int<s, 1> x 
8> -> !cir.vector<!s64i x 8>
+
   // LLVM-LABEL: @test_mm512_movm_epi64
   // LLVM: %{{.*}} = bitcast i8 %{{.*}} to <8 x i1>
   // LLVM: %{{.*}} = sext <8 x i1> %{{.*}} to <8 x i64>
+
+  // OGCG-LABEL: @test_mm512_movm_epi64
+  // OGCG: %{{.*}} = bitcast i8 %{{.*}} to <8 x i1>
+  // OGCG: %{{.*}} = sext <8 x i1> %{{.*}} to <8 x i64>
   return _mm512_movm_epi64(__A); 
 }
 
diff --git a/clang/test/CIR/CodeGenBuiltins/X86/avx512vlbw-builtins.c 
b/clang/test/CIR/CodeGenBuiltins/X86/avx512vlbw-builtins.c
index ed67a859230fb..6d67ca7e193d8 100644
--- a/clang/test/CIR/CodeGenBuiltins/X86/avx512vlbw-builtins.c
+++ b/clang/test/CIR/CodeGenBuiltins/X86/avx512vlbw-builtins.c
@@ -28,6 +28,10 @@ __m128i test_mm_movm_epi8(__mmask16 __A) {
   // LLVM-LABEL: @test_mm_movm_epi8
   // LLVM: %{{.*}} = bitcast i16 %{{.*}} to <16 x i1>
   // LLVM: %{{.*}} = sext <16 x i1> %{{.*}} to <16 x i8>
+
+  // OGCG-LABEL: @test_mm_movm_epi8
+  // OGCG: %{{.*}} = bitcast i16 %{{.*}} to <16 x i1>
+  // OGCG: %{{.*}} = sext <16 x i1> %{{.*}} to <16 x i8>
   return _mm_movm_epi8(__A); 
 }
 
@@ -39,6 +43,10 @@ __m256i test_mm256_movm_epi8(__mmask32 __A) {
   // LLVM-LABEL: @test_mm256_movm_epi8
   // LLVM: %{{.*}} = bitcast i32 %{{.*}} to <32 x i1>
   // LLVM: %{{.*}} = sext <32 x i1> %{{.*}} to <32 x i8>
+
+  // OGCG-LABEL: @test_mm256_movm_epi8
+  // OGCG: %{{.*}} = bitcast i32 %{{.*}} to <32 x i1>
+  // OGCG: %{{.*}} = sext <32 x i1> %{{.*}} to <32 x i8>
   return _mm256_movm_epi8(__A); 
 }
 
@@ -50,6 +58,10 @@ __m512i test_mm512_movm_epi8(__mmask64 __A) {
   // LLVM-LABEL: @test_mm512_movm_epi8
   // LLVM:  %{{.*}} = bitcast i64 %{{.*}} to <64 x i1>
   // LLVM:  %{{.*}} = sext <64 x i1> %{{.*}} to <64 x i8>
+
+  // OGCG-LABEL: @test_mm512_movm_epi8
+  // OGCG: %{{.*}} = bitcast i64 %{{.*}} to <64 x i1>
+  // OGCG: %{{.*}} = sext <64 x i1> %{{.*}} to <64 x i8>
   return _mm512_movm_epi8(__A); 
 }
 
@@ -61,6 +73,10 @@ __m128i test_mm_movm_epi16(__mmask8 __A) {
   // LLVM-LABEL: @test_mm_movm_epi16
   // LLVM: %{{.*}} = bitcast i8 %{{.*}} to <8 x i1>
   // LLVM: %{{.*}} = sext <8 x i1> %{{.*}} to <8 x i16>
+
+  // OGCG-LABEL: @test_mm_movm_epi16
+  // OGCG: %{{.*}} = bitcast i8 %{{.*}} to <8 x i1>
+  // OGCG: %{{.*}} = sext <8 x i1> %{{.*}} to <8 x i16>
   return _mm_movm_epi16(__A); 
 }
 
@@ -72,6 +88,10 @@ __m256i test_mm256_movm_epi16(__mmask16 __A) {
   // LLVM-LABEL: @test_mm256_movm_epi16
   // LLVM: %{{.*}} = bitcast i16 %{{.*}} to <16 x i1>
   // LLVM: %{{.*}} = sext <16 x i1> %{{.*}} to <16 x i16>
+
+  // OGCG-LABEL: @test_mm256_movm_epi16
+  // OGCG: %{{.*}} = bitcast i16 %{{.*}} to <16 x i1>
+  // OGCG: %{{.*}} = sext <16 x i1> %{{.*}} to <16 x i16>
   return _mm256_movm_epi16(__A); 
 }
 
diff --git a/clang/test/CIR/CodeGenBuiltins/X86/avx512vldq-builtins.c 
b/clang/test/CIR/CodeGenBuiltins/X86/avx512vldq-builtins.c
index ef9e9d7f9fde7..2333fbb9ae7e7 100644
--- a/clang/test/CIR/CodeGenBuiltins/X86/avx512vldq-builtins.c
+++ b/clang/test/CIR/CodeGenBuiltins/X86/avx512vldq-builtins.c
@@ -17,6 +17,11 @@ __m128i test_mm_movm_epi32(__mmask8 __A) {
   // LLVM: %{{.*}} = bitcast i8 %{{.*}} to <8 x i1>
   // LLVM: %{{.*}} = shufflevector <8 x i1> %{{.*}}, <8 x i1> %{{.*}}, <4 x 
i32> <i32 0, i32 1, i32 2, i32 3>
   // LLVM: %{{.*}} = sext <4 x i1> %{{.*}} to <4 x i32>
+
+  // OGCG-LABEL: @test_mm_movm_epi32
+  // OGCG: %{{.*}} = bitcast i8 %{{.*}} to <8 x i1>
+  // OGCG: %{{.*}} = shufflevector <8 x i1> %{{.*}}, <8 x i1> %{{.*}}, <4 x 
i32> <i32 0, i32 1, i32 2, i32 3>
+  // OGCG: %{{.*}} = sext <4 x i1> %{{.*}} to <4 x i32>
   return _mm_movm_epi32(__A); 
 }
 
@@ -28,6 +33,10 @@ __m256i test_mm256_movm_epi32(__mmask8 __A) {
   // LLVM-LABEL: @test_mm256_movm_epi32
   // LLVM: %{{.*}} = bitcast i8 %{{.*}} to <8 x i1>
   // LLVM: %{{.*}} = sext <8 x i1> %{{.*}} to <8 x i32>
+
+  // OGCG-LABEL: @test_mm256_movm_epi32
+  // OGCG: %{{.*}} = bitcast i8 %{{.*}} to <8 x i1>
+  // OGCG: %{{.*}} = sext <8 x i1> %{{.*}} to <8 x i32>
   return _mm256_movm_epi32(__A); 
 }
 
@@ -39,6 +48,10 @@ __m512i test_mm512_movm_epi32(__mmask16 __A) {
   // LLVM-LABEL: @test_mm512_movm_epi32
   // LLVM: %{{.*}} = bitcast i16 %{{.*}} to <16 x i1>
   // LLVM: %{{.*}} = sext <16 x i1> %{{.*}} to <16 x i32>
+
+  // OGCG-LABEL: @test_mm512_movm_epi32
+  // OGCG: %{{.*}} = bitcast i16 %{{.*}} to <16 x i1>
+  // OGCG: %{{.*}} = sext <16 x i1> %{{.*}} to <16 x i32>
   return _mm512_movm_epi32(__A); 
 }
 
@@ -52,6 +65,11 @@ __m128i test_mm_movm_epi64(__mmask8 __A) {
   // LLVM: %{{.*}} = bitcast i8 %{{.*}} to <8 x i1>
   // LLVM: %{{.*}} = shufflevector <8 x i1> %{{.*}}, <8 x i1> %{{.*}}, <2 x 
i32> <i32 0, i32 1>
   // LLVM: %{{.*}} = sext <2 x i1> %{{.*}} to <2 x i64>
+
+  // OGCG-LABEL: @test_mm_movm_epi64
+  // OGCG: %{{.*}} = bitcast i8 %{{.*}} to <8 x i1>
+  // OGCG: %{{.*}} = shufflevector <8 x i1> %{{.*}}, <8 x i1> %{{.*}}, <2 x 
i32> <i32 0, i32 1>
+  // OGCG: %{{.*}} = sext <2 x i1> %{{.*}} to <2 x i64>
   return _mm_movm_epi64(__A); 
 }
 
@@ -65,6 +83,11 @@ __m256i test_mm256_movm_epi64(__mmask8 __A) {
   // LLVM: %{{.*}} = bitcast i8 %{{.*}} to <8 x i1>
   // LLVM: %{{.*}} = shufflevector <8 x i1> %{{.*}}, <8 x i1> %{{.*}}, <4 x 
i32> <i32 0, i32 1, i32 2, i32 3>
   // LLVM: %{{.*}} = sext <4 x i1> %{{.*}} to <4 x i64>
+
+  // OGCG-LABEL: @test_mm256_movm_epi64
+  // OGCG: %{{.*}} = bitcast i8 %{{.*}} to <8 x i1>
+  // OGCG: %{{.*}} = shufflevector <8 x i1> %{{.*}}, <8 x i1> %{{.*}}, <4 x 
i32> <i32 0, i32 1, i32 2, i32 3>
+  // OGCG: %{{.*}} = sext <4 x i1> %{{.*}} to <4 x i64>
   return _mm256_movm_epi64(__A); 
 }
 

>From 778da4e6bf27016ca6fd8ab2f0b840f2f710a33b Mon Sep 17 00:00:00 2001
From: MarwanTarik <[email protected]>
Date: Tue, 16 Dec 2025 07:44:11 +0200
Subject: [PATCH 9/9] Remove test functions for _mm512_movm_epi8 and
 _mm512_movm_epi32 in AVX512 builtins tests

---
 clang/test/CodeGen/X86/avx512vlbw-builtins.c | 12 ------------
 clang/test/CodeGen/X86/avx512vldq-builtins.c | 11 -----------
 2 files changed, 23 deletions(-)

diff --git a/clang/test/CodeGen/X86/avx512vlbw-builtins.c 
b/clang/test/CodeGen/X86/avx512vlbw-builtins.c
index a088efa6784db..f6f27d9c3da3d 100644
--- a/clang/test/CodeGen/X86/avx512vlbw-builtins.c
+++ b/clang/test/CodeGen/X86/avx512vlbw-builtins.c
@@ -3226,18 +3226,6 @@ __m256i test_mm256_movm_epi8(__mmask32 __A) {
   return _mm256_movm_epi8(__A); 
 }
 
-__m512i test_mm512_movm_epi8(__mmask64 __A) {
-  // CIR-LABEL: _mm512_movm_epi8
-  // CIR: %{{.*}} = cir.cast bitcast %{{.*}} : !u64i -> 
!cir.vector<!cir.int<s, 1> x 64>
-  // CIR: %{{.*}} = cir.cast integral %{{.*}} : !cir.vector<!cir.int<s, 1> x 
64> -> !cir.vector<{{!s8i|!u8i}} x 64>
-
-  // LLVM-LABEL: @test_mm512_movm_epi8
-  // LLVM:  %{{.*}} = bitcast i64 %{{.*}} to <64 x i1>
-  // LLVM:  %{{.*}} = sext <64 x i1> %{{.*}} to <64 x i8>
-  return _mm512_movm_epi8(__A); 
-}
-
-
 __m128i test_mm_movm_epi16(__mmask8 __A) {
   // CHECK-LABEL: test_mm_movm_epi16
   // CHECK: %{{.*}} = bitcast i8 %{{.*}} to <8 x i1>
diff --git a/clang/test/CodeGen/X86/avx512vldq-builtins.c 
b/clang/test/CodeGen/X86/avx512vldq-builtins.c
index 7fc34ca06fe58..92d8e1aa0879a 100644
--- a/clang/test/CodeGen/X86/avx512vldq-builtins.c
+++ b/clang/test/CodeGen/X86/avx512vldq-builtins.c
@@ -951,17 +951,6 @@ __m256i test_mm256_movm_epi32(__mmask8 __A) {
   return _mm256_movm_epi32(__A); 
 }
 
-__m512i test_mm512_movm_epi32(__mmask16 __A) {
-  // CIR-LABEL: _mm512_movm_epi32
-  // CIR: %{{.*}} = cir.cast bitcast %{{.*}} : !u16i -> 
!cir.vector<!cir.int<s, 1> x 16>
-  // CIR: %{{.*}} = cir.cast integral %{{.*}} : !cir.vector<!cir.int<s, 1> x 
16> -> !cir.vector<!s32i x 16>
-
-  // LLVM-LABEL: @test_mm512_movm_epi32
-  // LLVM: %{{.*}} = bitcast i16 %{{.*}} to <16 x i1>
-  // LLVM: %{{.*}} = sext <16 x i1> %{{.*}} to <16 x i32>
-  return _mm512_movm_epi32(__A); 
-}
-
 __m128i test_mm_movm_epi64(__mmask8 __A) {
   // CHECK-LABEL: test_mm_movm_epi64
   // CHECK: %{{.*}} = bitcast i8 %{{.*}} to <8 x i1>

_______________________________________________
cfe-commits mailing list
[email protected]
https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits

[clang] [CIR] Upstream convert to mask builtins in CIR codegen (PR #171694)

Reply via email to