[clang] [CIR][X86] Implement lowering for AVX512 mask builtins (PR #169774)

via cfe-commits Fri, 28 Nov 2025 22:24:49 -0800

https://github.com/GeneraluseAI created 
https://github.com/llvm/llvm-project/pull/169774


part of [#167765](https://github.com/llvm/llvm-project/issues/167765)
related to [#169185](https://github.com/llvm/llvm-project/pull/169185)
This patch adds CIR codegen support for AVX512 mask operations on X86, 
including kadd, kand, kandn, kor, kxor, knot, and kmov in all supported mask 
widths. Each builtin now lowers to the expected vector<i1> form and bitcast 
representations in CIR, matching the semantics of the corresponding LLVM 
intrinsics.



>From e6b31132ad9b113d375423a92843ce1b4a42e1c1 Mon Sep 17 00:00:00 2001
From: generaluseai <[email protected]>
Date: Thu, 27 Nov 2025 15:58:55 +0800
Subject: [PATCH] [CIR][X86] Implement lowering for AVX512 mask builtins (kadd,
 kand, kandn, kor, kxor, knot, kmov)

This patch adds CIR codegen support for AVX512 mask operations on X86,
including kadd, kand, kandn, kor, kxor, knot, and kmov in all supported
mask widths. Each builtin now lowers to the expected vector<i1> form
and bitcast representations in CIR, matching the semantics of the
corresponding LLVM intrinsics.

The patch also adds comprehensive CIR/LLVM/OGCG tests for AVX512F,
AVX512DQ, and AVX512BW to validate the lowering behavior.
---
 clang/lib/CIR/CodeGen/CIRGenBuiltinX86.cpp    |  72 +++-
 .../CodeGenBuiltins/X86/avx512bw-builtins.c   | 350 ++++++++++++++++++
 .../CodeGenBuiltins/X86/avx512dq-builtins.c   | 210 +++++++++++
 .../CodeGenBuiltins/X86/avx512f-builtins.c    | 151 ++++++++
 4 files changed, 781 insertions(+), 2 deletions(-)
 create mode 100644 clang/test/CIR/CodeGenBuiltins/X86/avx512dq-builtins.c

diff --git a/clang/lib/CIR/CodeGen/CIRGenBuiltinX86.cpp 
b/clang/lib/CIR/CodeGen/CIRGenBuiltinX86.cpp
index a0ee57f82a04f..0ebfd51b82b56 100644
--- a/clang/lib/CIR/CodeGen/CIRGenBuiltinX86.cpp
+++ b/clang/lib/CIR/CodeGen/CIRGenBuiltinX86.cpp
@@ -85,6 +85,37 @@ static mlir::Value getMaskVecValue(CIRGenBuilderTy &builder, 
mlir::Location loc,
   return maskVec;
 }
 
+static mlir::Value emitX86MaskAddLogic(CIRGenBuilderTy &builder,
+                                       mlir::Location loc,
+                                       const std::string &intrinsicName,
+                                       SmallVectorImpl<mlir::Value> &ops) {
+
+  auto intTy = cast<cir::IntType>(ops[0].getType());
+  unsigned numElts = intTy.getWidth();
+  mlir::Value lhsVec = getMaskVecValue(cgf, loc, ops[0], numElts);
+  mlir::Value rhsVec = getMaskVecValue(cgf, loc, ops[1], numElts);
+  mlir::Type vecTy = lhsVec.getType();
+  mlir::Value resVec = emitIntrinsicCallOp(cgf, loc, intrinsicName, vecTy,
+                                           mlir::ValueRange{lhsVec, rhsVec});
+  return builder.createBitcast(resVec, ops[0].getType());
+}
+
+static mlir::Value emitX86MaskLogic(CIRGenFunction &cgf, mlir::Location loc,
+                                    cir::BinOpKind binOpKind,
+                                    SmallVectorImpl<mlir::Value> &ops,
+                                    bool invertLHS = false) {
+  CIRGenBuilderTy &builder = cgf.getBuilder();
+  unsigned numElts = cast<cir::IntType>(ops[0].getType()).getWidth();
+  mlir::Value lhs = getMaskVecValue(cgf, loc, ops[0], numElts);
+  mlir::Value rhs = getMaskVecValue(cgf, loc, ops[1], numElts);
+
+  if (invertLHS)
+    lhs = builder.createNot(lhs);
+  return builder.createBitcast(
+      builder.createBinop(loc), lhs, binOpKind, rhs),
+      ops[0].getType());
+}
+
 mlir::Value CIRGenFunction::emitX86BuiltinExpr(unsigned builtinID,
                                                const CallExpr *expr) {
   if (builtinID == Builtin::BI__builtin_cpu_is) {
@@ -743,38 +774,75 @@ mlir::Value CIRGenFunction::emitX86BuiltinExpr(unsigned 
builtinID,
   case X86::BI__builtin_ia32_ktestzsi:
   case X86::BI__builtin_ia32_ktestcdi:
   case X86::BI__builtin_ia32_ktestzdi:
+    cgm.errorNYI(expr->getSourceRange(),
+                 std::string("unimplemented X86 builtin call: ") +
+                     getContext().BuiltinInfo.getName(builtinID));
+    return {};
   case X86::BI__builtin_ia32_kaddqi:
+    return emitX86MaskAddLogic(*this, getLoc(expr->getExprLoc()),
+                               "x86.avx512.kadd.b", ops);
   case X86::BI__builtin_ia32_kaddhi:
+    return emitX86MaskAddLogic(*this, getLoc(expr->getExprLoc()),
+                               "x86.avx512.kadd.w", ops);
   case X86::BI__builtin_ia32_kaddsi:
+    return emitX86MaskAddLogic(*this, getLoc(expr->getExprLoc()),
+                               "x86.avx512.kadd.d", ops);
   case X86::BI__builtin_ia32_kadddi:
+    return emitX86MaskAddLogic(*this, getLoc(expr->getExprLoc()),
+                               "x86.avx512.kadd.q", ops);
   case X86::BI__builtin_ia32_kandqi:
   case X86::BI__builtin_ia32_kandhi:
   case X86::BI__builtin_ia32_kandsi:
   case X86::BI__builtin_ia32_kanddi:
+    return emitX86MaskLogic(*this, getLoc(expr->getExprLoc()),
+                            cir::BinOpKind::And, ops);
   case X86::BI__builtin_ia32_kandnqi:
   case X86::BI__builtin_ia32_kandnhi:
   case X86::BI__builtin_ia32_kandnsi:
   case X86::BI__builtin_ia32_kandndi:
+    return emitX86MaskLogic(*this, getLoc(expr->getExprLoc()),
+                            cir::BinOpKind::And, ops, true);
   case X86::BI__builtin_ia32_korqi:
   case X86::BI__builtin_ia32_korhi:
   case X86::BI__builtin_ia32_korsi:
   case X86::BI__builtin_ia32_kordi:
+    return emitX86MaskLogic(*this, getLoc(expr->getExprLoc()),
+                            cir::BinOpKind::Or, ops);
   case X86::BI__builtin_ia32_kxnorqi:
   case X86::BI__builtin_ia32_kxnorhi:
   case X86::BI__builtin_ia32_kxnorsi:
   case X86::BI__builtin_ia32_kxnordi:
+    return emitX86MaskLogic(*this, getLoc(expr->getExprLoc()),
+                            cir::BinOpKind::Xor, ops, true);
   case X86::BI__builtin_ia32_kxorqi:
   case X86::BI__builtin_ia32_kxorhi:
   case X86::BI__builtin_ia32_kxorsi:
   case X86::BI__builtin_ia32_kxordi:
+    return emitX86MaskLogic(*this, getLoc(expr->getExprLoc()),
+                            cir::BinOpKind::Xor, ops);
   case X86::BI__builtin_ia32_knotqi:
   case X86::BI__builtin_ia32_knothi:
   case X86::BI__builtin_ia32_knotsi:
-  case X86::BI__builtin_ia32_knotdi:
+  case X86::BI__builtin_ia32_knotdi: {
+    cir::IntType intTy = cast<cir::IntType>(ops[0].getType());
+    unsigned numElts = intTy.getWidth();
+    mlir::Value resVec =
+        getMaskVecValue(*this, getLoc(expr->getExprLoc()), ops[0], numElts);
+    return builder.createBitcast(builder.createNot(resVec), ops[0].getType());
+  }
   case X86::BI__builtin_ia32_kmovb:
   case X86::BI__builtin_ia32_kmovw:
   case X86::BI__builtin_ia32_kmovd:
-  case X86::BI__builtin_ia32_kmovq:
+  case X86::BI__builtin_ia32_kmovq: {
+    // Bitcast to vXi1 type and then back to integer. This gets the mask
+    // register type into the IR, but might be optimized out depending on
+    // what's around it.
+    cir::IntType intTy = cast<cir::IntType>(ops[0].getType());
+    unsigned numElts = intTy.getWidth();
+    mlir::Value resVec =
+        getMaskVecValue(*this, getLoc(expr->getExprLoc()), ops[0], numElts);
+    return builder.createBitcast(resVec, ops[0].getType());
+  }
   case X86::BI__builtin_ia32_kunpckdi:
   case X86::BI__builtin_ia32_kunpcksi:
   case X86::BI__builtin_ia32_kunpckhi:
diff --git a/clang/test/CIR/CodeGenBuiltins/X86/avx512bw-builtins.c 
b/clang/test/CIR/CodeGenBuiltins/X86/avx512bw-builtins.c
index 3522e2c7e50bf..4863ba0bd8848 100644
--- a/clang/test/CIR/CodeGenBuiltins/X86/avx512bw-builtins.c
+++ b/clang/test/CIR/CodeGenBuiltins/X86/avx512bw-builtins.c
@@ -115,3 +115,353 @@ __mmask32 test_kshiftri_mask32_out_of_range(__mmask32 A) {
 
   return _kshiftri_mask32(A, 33);
 }
+
+
+__mmask32 test_kadd_mask32(__mmask32 A, __mmask32 B) {
+  // CIR-LABEL: _kadd_mask32
+  // CIR: cir.cast bitcast {{.*}} : !u32i -> !cir.vector<32 x !cir.int<u, 1>>
+  // CIR: cir.cast bitcast {{.*}} : !u32i -> !cir.vector<32 x !cir.int<u, 1>>
+  // CIR: cir.call_llvm_intrinsic "x86.avx512.kadd.d"
+  // CIR: cir.cast bitcast {{.*}} : !cir.vector<32 x !cir.int<u, 1>> -> !u32i
+
+  // LLVM-LABEL: _kadd_mask32
+  // LLVM: [[L:%.*]] = bitcast i32 %{{.*}} to <32 x i1>
+  // LLVM: [[R:%.*]] = bitcast i32 %{{.*}} to <32 x i1>
+  // LLVM: [[RES:%.*]] = call <32 x i1> @llvm.x86.avx512.kadd.d(<32 x i1> 
[[L]], <32 x i1> [[R]])
+  // LLVM: bitcast <32 x i1> [[RES]] to i32
+
+  // OGCG-LABEL: _kadd_mask32
+  // OGCG: bitcast i32 %{{.*}} to <32 x i1>
+  // OGCG: bitcast i32 %{{.*}} to <32 x i1>
+  // OGCG: call <32 x i1> @llvm.x86.avx512.kadd.d
+  // OGCG: bitcast <32 x i1> {{.*}} to i32
+  return _kadd_mask32(A, B);
+}
+
+__mmask64 test_kadd_mask64(__mmask64 A, __mmask64 B) {
+  // CIR-LABEL: _kadd_mask64
+  // CIR: cir.cast bitcast {{.*}} : !u64i -> !cir.vector<64 x !cir.int<u, 1>>
+  // CIR: cir.cast bitcast {{.*}} : !u64i -> !cir.vector<64 x !cir.int<u, 1>>
+  // CIR: cir.call_llvm_intrinsic "x86.avx512.kadd.q"
+  // CIR: cir.cast bitcast {{.*}} : !cir.vector<64 x !cir.int<u, 1>> -> !u64i
+
+  // LLVM-LABEL: _kadd_mask64
+  // LLVM: [[L:%.*]] = bitcast i64 %{{.*}} to <64 x i1>
+  // LLVM: [[R:%.*]] = bitcast i64 %{{.*}} to <64 x i1>
+  // LLVM: [[RES:%.*]] = call <64 x i1> @llvm.x86.avx512.kadd.q(<64 x i1> 
[[L]], <64 x i1> [[R]])
+  // LLVM: bitcast <64 x i1> [[RES]] to i64
+
+  // OGCG-LABEL: _kadd_mask64
+  // OGCG: bitcast i64 %{{.*}} to <64 x i1>
+  // OGCG: bitcast i64 %{{.*}} to <64 x i1>
+  // OGCG: call <64 x i1> @llvm.x86.avx512.kadd.q
+  // OGCG: bitcast <64 x i1> {{.*}} to i64
+  return _kadd_mask64(A, B);
+}
+
+__mmask32 test_kand_mask32(__mmask32 A, __mmask32 B) {
+  // CIR-LABEL: _kand_mask32
+  // CIR: cir.cast bitcast {{.*}} : !u32i -> !cir.vector<32 x !cir.int<u, 1>>
+  // CIR: cir.cast bitcast {{.*}} : !u32i -> !cir.vector<32 x !cir.int<u, 1>>
+  // CIR: cir.binop(and, {{.*}}, {{.*}}) : !cir.vector<32 x !cir.int<u, 1>>
+  // CIR: cir.cast bitcast {{.*}} : !cir.vector<32 x !cir.int<u, 1>> -> !u32i
+
+  // LLVM-LABEL: _kand_mask32
+  // LLVM: [[L:%.*]] = bitcast i32 %{{.*}} to <32 x i1>
+  // LLVM: [[R:%.*]] = bitcast i32 %{{.*}} to <32 x i1>
+  // LLVM: [[RES:%.*]] = and <32 x i1> [[L]], [[R]]
+  // LLVM: bitcast <32 x i1> [[RES]] to i32
+
+  // OGCG-LABEL: _kand_mask32
+  // OGCG: bitcast i32 %{{.*}} to <32 x i1>
+  // OGCG: bitcast i32 %{{.*}} to <32 x i1>
+  // OGCG: and <32 x i1>
+  // OGCG: bitcast <32 x i1> {{.*}} to i32
+  return _kand_mask32(A, B);
+}
+
+__mmask64 test_kand_mask64(__mmask64 A, __mmask64 B) {
+  // CIR-LABEL: _kand_mask64
+  // CIR: cir.cast bitcast {{.*}} : !u64i -> !cir.vector<64 x !cir.int<u, 1>>
+  // CIR: cir.cast bitcast {{.*}} : !u64i -> !cir.vector<64 x !cir.int<u, 1>>
+  // CIR: cir.binop(and, {{.*}}, {{.*}}) : !cir.vector<64 x !cir.int<u, 1>>
+  // CIR: cir.cast bitcast {{.*}} : !cir.vector<64 x !cir.int<u, 1>> -> !u64i
+
+  // LLVM-LABEL: _kand_mask64
+  // LLVM: [[L:%.*]] = bitcast i64 %{{.*}} to <64 x i1>
+  // LLVM: [[R:%.*]] = bitcast i64 %{{.*}} to <64 x i1>
+  // LLVM: [[RES:%.*]] = and <64 x i1> [[L]], [[R]]
+  // LLVM: bitcast <64 x i1> [[RES]] to i64
+
+  // OGCG-LABEL: _kand_mask64
+  // OGCG: bitcast i64 %{{.*}} to <64 x i1>
+  // OGCG: bitcast i64 %{{.*}} to <64 x i1>
+  // OGCG: and <64 x i1>
+  // OGCG: bitcast <64 x i1> {{.*}} to i64
+  return _kand_mask64(A, B);
+}
+
+__mmask32 test_kandn_mask32(__mmask32 A, __mmask32 B) {
+  // CIR-LABEL: _kandn_mask32
+  // CIR: cir.cast bitcast {{.*}} : !u32i -> !cir.vector<32 x !cir.int<u, 1>>
+  // CIR: cir.cast bitcast {{.*}} : !u32i -> !cir.vector<32 x !cir.int<u, 1>>
+  // CIR: cir.unary(not, {{.*}}) : !cir.vector<32 x !cir.int<u, 1>>
+  // CIR: cir.binop(and, {{.*}}, {{.*}}) : !cir.vector<32 x !cir.int<u, 1>>
+  // CIR: cir.cast bitcast {{.*}} : !cir.vector<32 x !cir.int<u, 1>> -> !u32i
+
+  // LLVM-LABEL: _kandn_mask32
+  // LLVM: [[L:%.*]] = bitcast i32 %{{.*}} to <32 x i1>
+  // LLVM: [[R:%.*]] = bitcast i32 %{{.*}} to <32 x i1>
+  // LLVM: xor <32 x i1> [[L]], splat (i1 true)
+  // LLVM: and <32 x i1>
+  // LLVM: bitcast <32 x i1> {{.*}} to i32
+
+  // OGCG-LABEL: _kandn_mask32
+  // OGCG: bitcast i32 %{{.*}} to <32 x i1>
+  // OGCG: bitcast i32 %{{.*}} to <32 x i1>
+  // OGCG: xor <32 x i1>
+  // OGCG: and <32 x i1>
+  // OGCG: bitcast <32 x i1> {{.*}} to i32
+  return _kandn_mask32(A, B);
+}
+
+__mmask64 test_kandn_mask64(__mmask64 A, __mmask64 B) {
+  // CIR-LABEL: _kandn_mask64
+  // CIR: cir.cast bitcast {{.*}} : !u64i -> !cir.vector<64 x !cir.int<u, 1>>
+  // CIR: cir.cast bitcast {{.*}} : !u64i -> !cir.vector<64 x !cir.int<u, 1>>
+  // CIR: cir.unary(not, {{.*}}) : !cir.vector<64 x !cir.int<u, 1>>
+  // CIR: cir.binop(and, {{.*}}, {{.*}}) : !cir.vector<64 x !cir.int<u, 1>>
+  // CIR: cir.cast bitcast {{.*}} : !cir.vector<64 x !cir.int<u, 1>> -> !u64i
+
+  // LLVM-LABEL: _kandn_mask64
+  // LLVM: [[L:%.*]] = bitcast i64 %{{.*}} to <64 x i1>
+  // LLVM: [[R:%.*]] = bitcast i64 %{{.*}} to <64 x i1>
+  // LLVM: xor <64 x i1> [[L]], splat (i1 true)
+  // LLVM: and <64 x i1>
+  // LLVM: bitcast <64 x i1> {{.*}} to i64
+
+  // OGCG-LABEL: _kandn_mask64
+  // OGCG: bitcast i64 %{{.*}} to <64 x i1>
+  // OGCG: bitcast i64 %{{.*}} to <64 x i1>
+  // OGCG: xor <64 x i1>
+  // OGCG: and <64 x i1>
+  // OGCG: bitcast <64 x i1> {{.*}} to i64
+  return _kandn_mask64(A, B);
+}
+
+__mmask32 test_kor_mask32(__mmask32 A, __mmask32 B) {
+  // CIR-LABEL: _kor_mask32
+  // CIR: cir.cast bitcast {{.*}} : !u32i -> !cir.vector<32 x !cir.int<u, 1>>
+  // CIR: cir.cast bitcast {{.*}} : !u32i -> !cir.vector<32 x !cir.int<u, 1>>
+  // CIR: cir.binop(or, {{.*}}, {{.*}}) : !cir.vector<32 x !cir.int<u, 1>>
+  // CIR: cir.cast bitcast {{.*}} : !cir.vector<32 x !cir.int<u, 1>> -> !u32i
+
+  // LLVM-LABEL: _kor_mask32
+  // LLVM: [[L:%.*]] = bitcast i32 %{{.*}} to <32 x i1>
+  // LLVM: [[R:%.*]] = bitcast i32 %{{.*}} to <32 x i1>
+  // LLVM: or <32 x i1> [[L]], [[R]]
+  // LLVM: bitcast <32 x i1> {{.*}} to i32
+
+  // OGCG-LABEL: _kor_mask32
+  // OGCG: bitcast i32 %{{.*}} to <32 x i1>
+  // OGCG: bitcast i32 %{{.*}} to <32 x i1>
+  // OGCG: or <32 x i1>
+  // OGCG: bitcast <32 x i1> {{.*}} to i32
+  return _kor_mask32(A, B);
+}
+
+__mmask64 test_kor_mask64(__mmask64 A, __mmask64 B) {
+  // CIR-LABEL: _kor_mask64
+  // CIR: cir.cast bitcast {{.*}} : !u64i -> !cir.vector<64 x !cir.int<u, 1>>
+  // CIR: cir.cast bitcast {{.*}} : !u64i -> !cir.vector<64 x !cir.int<u, 1>>
+  // CIR: cir.binop(or, {{.*}}, {{.*}}) : !cir.vector<64 x !cir.int<u, 1>>
+  // CIR: cir.cast bitcast {{.*}} : !cir.vector<64 x !cir.int<u, 1>> -> !u64i
+
+  // LLVM-LABEL: _kor_mask64
+  // LLVM: [[L:%.*]] = bitcast i64 %{{.*}} to <64 x i1>
+  // LLVM: [[R:%.*]] = bitcast i64 %{{.*}} to <64 x i1>
+  // LLVM: or <64 x i1> [[L]], [[R]]
+  // LLVM: bitcast <64 x i1> {{.*}} to i64
+
+  // OGCG-LABEL: _kor_mask64
+  // OGCG: bitcast i64 %{{.*}} to <64 x i1>
+  // OGCG: bitcast i64 %{{.*}} to <64 x i1>
+  // OGCG: or <64 x i1>
+  // OGCG: bitcast <64 x i1> {{.*}} to i64
+  return _kor_mask64(A, B);
+}
+
+__mmask32 test_kxor_mask32(__mmask32 A, __mmask32 B) {
+  // CIR-LABEL: _kxor_mask32
+  // CIR: cir.cast bitcast {{.*}} : !u32i -> !cir.vector<32 x !cir.int<u, 1>>
+  // CIR: cir.cast bitcast {{.*}} : !u32i -> !cir.vector<32 x !cir.int<u, 1>>
+  // CIR: cir.binop(xor, {{.*}}, {{.*}}) : !cir.vector<32 x !cir.int<u, 1>>
+  // CIR: cir.cast bitcast {{.*}} : !cir.vector<32 x !cir.int<u, 1>> -> !u32i
+
+  // LLVM-LABEL: _kxor_mask32
+  // LLVM: [[L:%.*]] = bitcast i32 %{{.*}} to <32 x i1>
+  // LLVM: [[R:%.*]] = bitcast i32 %{{.*}} to <32 x i1>
+  // LLVM: xor <32 x i1> [[L]], [[R]]
+  // LLVM: bitcast <32 x i1> {{.*}} to i32
+
+  // OGCG-LABEL: _kxor_mask32
+  // OGCG: bitcast i32 %{{.*}} to <32 x i1>
+  // OGCG: bitcast i32 %{{.*}} to <32 x i1>
+  // OGCG: xor <32 x i1>
+  // OGCG: bitcast <32 x i1> {{.*}} to i32
+  return _kxor_mask32(A, B);
+}
+
+__mmask64 test_kxor_mask64(__mmask64 A, __mmask64 B) {
+  // CIR-LABEL: _kxor_mask64
+  // CIR: cir.cast bitcast {{.*}} : !u64i -> !cir.vector<64 x !cir.int<u, 1>>
+  // CIR: cir.cast bitcast {{.*}} : !u64i -> !cir.vector<64 x !cir.int<u, 1>>
+  // CIR: cir.binop(xor, {{.*}}, {{.*}}) : !cir.vector<64 x !cir.int<u, 1>>
+  // CIR: cir.cast bitcast {{.*}} : !cir.vector<64 x !cir.int<u, 1>> -> !u64i
+
+  // LLVM-LABEL: _kxor_mask64
+  // LLVM: [[L:%.*]] = bitcast i64 %{{.*}} to <64 x i1>
+  // LLVM: [[R:%.*]] = bitcast i64 %{{.*}} to <64 x i1>
+  // LLVM: xor <64 x i1> [[L]], [[R]]
+  // LLVM: bitcast <64 x i1> {{.*}} to i64
+
+  // OGCG-LABEL: _kxor_mask64
+  // OGCG: bitcast i64 %{{.*}} to <64 x i1>
+  // OGCG: bitcast i64 %{{.*}} to <64 x i1>
+  // OGCG: xor <64 x i1>
+  // OGCG: bitcast <64 x i1> {{.*}} to i64
+  return _kxor_mask64(A, B);
+}
+
+__mmask32 test_kxnor_mask32(__mmask32 A, __mmask32 B) {
+  // CIR-LABEL: _kxnor_mask32
+  // CIR: cir.cast bitcast {{.*}} : !u32i -> !cir.vector<32 x !cir.int<u, 1>>
+  // CIR: cir.cast bitcast {{.*}} : !u32i -> !cir.vector<32 x !cir.int<u, 1>>
+  // CIR: cir.unary(not, {{.*}}) : !cir.vector<32 x !cir.int<u, 1>>
+  // CIR: cir.binop(xor, {{.*}}, {{.*}}) : !cir.vector<32 x !cir.int<u, 1>>
+  // CIR: cir.cast bitcast {{.*}} : !cir.vector<32 x !cir.int<u, 1>> -> !u32i
+
+  // LLVM-LABEL: _kxnor_mask32
+  // LLVM: [[L:%.*]] = bitcast i32 %{{.*}} to <32 x i1>
+  // LLVM: [[R:%.*]] = bitcast i32 %{{.*}} to <32 x i1>
+  // LLVM: [[NOT:%.*]] = xor <32 x i1> [[L]], splat (i1 true)
+  // LLVM: [[RES:%.*]] = xor <32 x i1> [[NOT]], [[R]]
+  // LLVM: bitcast <32 x i1> [[RES]] to i32
+
+  // OGCG-LABEL: _kxnor_mask32
+  // OGCG: bitcast i32 %{{.*}} to <32 x i1>
+  // OGCG: bitcast i32 %{{.*}} to <32 x i1>
+  // OGCG: xor <32 x i1>
+  // OGCG: xor <32 x i1>
+  // OGCG: bitcast <32 x i1> {{.*}} to i32
+
+  return _kxnor_mask32(A, B);
+}
+
+__mmask64 test_kxnor_mask64(__mmask64 A, __mmask64 B) {
+  // CIR-LABEL: _kxnor_mask64
+  // CIR: cir.cast bitcast {{.*}} : !u64i -> !cir.vector<64 x !cir.int<u, 1>>
+  // CIR: cir.cast bitcast {{.*}} : !u64i -> !cir.vector<64 x !cir.int<u, 1>>
+  // CIR: cir.unary(not, {{.*}}) : !cir.vector<64 x !cir.int<u, 1>>
+  // CIR: cir.binop(xor, {{.*}}, {{.*}}) : !cir.vector<64 x !cir.int<u, 1>>
+  // CIR: cir.cast bitcast {{.*}} : !cir.vector<64 x !cir.int<u, 1>> -> !u64i
+
+  // LLVM-LABEL: _kxnor_mask64
+  // LLVM: [[L:%.*]] = bitcast i64 %{{.*}} to <64 x i1>
+  // LLVM: [[R:%.*]] = bitcast i64 %{{.*}} to <64 x i1>
+  // LLVM: [[NOT:%.*]] = xor <64 x i1> [[L]], splat (i1 true)
+  // LLVM: [[RES:%.*]] = xor <64 x i1> [[NOT]], [[R]]
+  // LLVM: bitcast <64 x i1> [[RES]] to i64
+
+  // OGCG-LABEL: _kxnor_mask64
+  // OGCG: bitcast i64 %{{.*}} to <64 x i1>
+  // OGCG: bitcast i64 %{{.*}} to <64 x i1>
+  // OGCG: xor <64 x i1>
+  // OGCG: xor <64 x i1>
+  // OGCG: bitcast <64 x i1> {{.*}} to i64
+
+  return _kxnor_mask64(A, B);
+}
+
+
+__mmask32 test_knot_mask32(__mmask32 A) {
+  // CIR-LABEL: _knot_mask32
+  // CIR: cir.cast bitcast {{.*}} : !u32i -> !cir.vector<32 x !cir.int<u, 1>>
+  // CIR: cir.unary(not, {{.*}}) : !cir.vector<32 x !cir.int<u, 1>>
+  // CIR: cir.cast bitcast {{.*}} : !cir.vector<32 x !cir.int<u, 1>> -> !u32i
+
+  // LLVM-LABEL: _knot_mask32
+  // LLVM: bitcast i32 %{{.*}} to <32 x i1>
+  // LLVM: xor <32 x i1>
+  // LLVM: bitcast <32 x i1> {{.*}} to i32
+
+  // OGCG-LABEL: _knot_mask32
+  // OGCG: bitcast i32 %{{.*}} to <32 x i1>
+  // OGCG: xor <32 x i1>
+  // OGCG: bitcast <32 x i1> {{.*}} to i32
+  return _knot_mask32(A);
+}
+
+__mmask64 test_knot_mask64(__mmask64 A) {
+  // CIR-LABEL: _knot_mask64
+  // CIR: cir.cast bitcast {{.*}} : !u64i -> !cir.vector<64 x !cir.int<u, 1>>
+  // CIR: cir.unary(not, {{.*}}) : !cir.vector<64 x !cir.int<u, 1>>
+  // CIR: cir.cast bitcast {{.*}} : !cir.vector<64 x !cir.int<u, 1>> -> !u64i
+
+  // LLVM-LABEL: _knot_mask64
+  // LLVM: bitcast i64 %{{.*}} to <64 x i1>
+  // LLVM: xor <64 x i1>
+  // LLVM: bitcast <64 x i1> {{.*}} to i64
+
+  // OGCG-LABEL: _knot_mask64
+  // OGCG: bitcast i64 %{{.*}} to <64 x i1>
+  // OGCG: xor <64 x i1>
+  // OGCG: bitcast <64 x i1> {{.*}} to i64
+  return _knot_mask64(A);
+}
+
+// Multiple user-level mask helpers inline to this same kmov builtin.
+// CIR does not implement any special lowering for those helpers.
+//
+// Therefore, testing the builtin (__builtin_ia32_kmov*) directly is
+// sufficient to cover the CIR lowering behavior. Testing each helper
+// individually would add no new CIR paths.
+
+__mmask32 test_kmov_d(__mmask32 A) {
+  // CIR-LABEL: test_kmov_d
+  // CIR: cir.cast bitcast {{.*}} : !u32i -> !cir.vector<32 x !cir.int<u, 1>>
+  // CIR: cir.cast bitcast {{.*}} : !cir.vector<32 x !cir.int<u, 1>> -> !u32i
+
+  // LLVM-LABEL: test_kmov_d
+  // LLVM: bitcast i32 %{{.*}} to <32 x i1>
+  // LLVM: bitcast <32 x i1> {{.*}} to i32
+
+  // OGCG-LABEL: test_kmov_d
+  // OGCG: bitcast i32 %{{.*}} to <32 x i1>
+  // OGCG: bitcast <32 x i1> {{.*}} to i32
+
+  return __builtin_ia32_kmovd(A);
+}
+
+// Multiple user-level mask helpers inline to this same kmov builtin.
+// CIR does not implement any special lowering for those helpers.
+//
+// Therefore, testing the builtin (__builtin_ia32_kmov*) directly is
+// sufficient to cover the CIR lowering behavior. Testing each helper
+// individually would add no new CIR paths.
+
+__mmask64 test_kmov_q(__mmask64 A) {
+  // CIR-LABEL: test_kmov_q
+  // CIR: cir.cast bitcast {{.*}} : !u64i -> !cir.vector<64 x !cir.int<u, 1>>
+  // CIR: cir.cast bitcast {{.*}} : !cir.vector<64 x !cir.int<u, 1>> -> !u64i
+
+  // LLVM-LABEL: test_kmov_q
+  // LLVM: bitcast i64 %{{.*}} to <64 x i1>
+  // LLVM: bitcast <64 x i1> {{.*}} to i64
+
+  // OGCG-LABEL: test_kmov_q
+  // OGCG: bitcast i64 %{{.*}} to <64 x i1>
+  // OGCG: bitcast <64 x i1> {{.*}} to i64
+
+  return __builtin_ia32_kmovq(A);
+}
diff --git a/clang/test/CIR/CodeGenBuiltins/X86/avx512dq-builtins.c 
b/clang/test/CIR/CodeGenBuiltins/X86/avx512dq-builtins.c
new file mode 100644
index 0000000000000..5d81f666271be
--- /dev/null
+++ b/clang/test/CIR/CodeGenBuiltins/X86/avx512dq-builtins.c
@@ -0,0 +1,210 @@
+// RUN: %clang_cc1 -x c -flax-vector-conversions=none -ffreestanding %s 
-triple=x86_64-unknown-linux -target-feature +avx512dq -fclangir -emit-cir -o 
%t.cir -Wall -Werror
+// RUN: FileCheck --check-prefix=CIR --input-file=%t.cir %s
+// RUN: %clang_cc1 -x c -flax-vector-conversions=none -ffreestanding %s 
-triple=x86_64-unknown-linux -target-feature +avx512dq -fclangir -emit-llvm -o 
%t.ll -Wall -Werror
+// RUN: FileCheck --check-prefix=LLVM --input-file=%t.ll %s
+
+// RUN: %clang_cc1 -x c++ -flax-vector-conversions=none -ffreestanding %s 
-triple=x86_64-unknown-linux -target-feature +avx512dq -fno-signed-char 
-fclangir -emit-cir -o %t.cir -Wall -Werror
+// RUN: FileCheck --check-prefix=CIR --input-file=%t.cir %s
+// RUN: %clang_cc1 -x c++ -flax-vector-conversions=none -ffreestanding %s 
-triple=x86_64-unknown-linux -target-feature +avx512dq -fno-signed-char 
-fclangir -emit-llvm -o %t.ll -Wall -Werror
+// RUN: FileCheck --check-prefix=LLVM --input-file=%t.ll %s
+
+// RUN: %clang_cc1 -x c -flax-vector-conversions=none -ffreestanding %s 
-triple=x86_64-unknown-linux -target-feature +avx512dq -emit-llvm -o - -Wall 
-Werror | FileCheck %s -check-prefix=OGCG
+// RUN: %clang_cc1 -x c++ -flax-vector-conversions=none -ffreestanding %s 
-triple=x86_64-unknown-linux -target-feature +avx512dq -emit-llvm -o - -Wall 
-Werror | FileCheck %s -check-prefix=OGCG
+
+#include <immintrin.h>
+
+__mmask8 test_kadd_mask8(__mmask8 A, __mmask8 B) {
+ // CIR-LABEL: _kadd_mask8
+ // CIR: cir.cast bitcast {{.*}} : !u8i -> !cir.vector<8 x !cir.int<u, 1>>
+ // CIR: cir.cast bitcast {{.*}} : !u8i -> !cir.vector<8 x !cir.int<u, 1>>
+ // CIR: cir.call_llvm_intrinsic "x86.avx512.kadd.b"
+ // CIR: cir.cast bitcast {{.*}} : !cir.vector<8 x !cir.int<u, 1>> -> !u8i
+
+ // LLVM-LABEL: _kadd_mask8
+ // LLVM: [[L:%.*]] = bitcast i8 %{{.*}} to <8 x i1>
+ // LLVM: [[R:%.*]] = bitcast i8 %{{.*}} to <8 x i1>
+ // LLVM: [[RES:%.*]] = call <8 x i1> @llvm.x86.avx512.kadd.b(<8 x i1> [[L]], 
<8 x i1> [[R]])
+ // LLVM: bitcast <8 x i1> [[RES]] to i8
+
+ // OGCG-LABEL: _kadd_mask8
+ // OGCG: bitcast i8 %{{.*}} to <8 x i1>
+ // OGCG: bitcast i8 %{{.*}} to <8 x i1>
+ // OGCG: call <8 x i1> @llvm.x86.avx512.kadd.b
+ // OGCG: bitcast <8 x i1> {{.*}} to i8
+ return _kadd_mask8(A, B);
+}
+
+__mmask16 test_kadd_mask16(__mmask16 A, __mmask16 B) {
+ // CIR-LABEL: _kadd_mask16
+ // CIR: cir.cast bitcast {{.*}} : !u16i -> !cir.vector<16 x !cir.int<u, 1>>
+ // CIR: cir.cast bitcast {{.*}} : !u16i -> !cir.vector<16 x !cir.int<u, 1>>
+ // CIR: cir.call_llvm_intrinsic "x86.avx512.kadd.w"
+ // CIR: cir.cast bitcast {{.*}} : !cir.vector<16 x !cir.int<u, 1>> -> !u16i
+
+ // LLVM-LABEL: _kadd_mask16
+ // LLVM: [[L:%.*]] = bitcast i16 %{{.*}} to <16 x i1>
+ // LLVM: [[R:%.*]] = bitcast i16 %{{.*}} to <16 x i1>
+ // LLVM: [[RES:%.*]] = call <16 x i1> @llvm.x86.avx512.kadd.w(<16 x i1> 
[[L]], <16 x i1> [[R]])
+ // LLVM: bitcast <16 x i1> [[RES]] to i16
+
+ // OGCG-LABEL: _kadd_mask16
+ // OGCG: bitcast i16 %{{.*}} to <16 x i1>
+ // OGCG: bitcast i16 %{{.*}} to <16 x i1>
+ // OGCG: call <16 x i1> @llvm.x86.avx512.kadd.w
+ // OGCG: bitcast <16 x i1> {{.*}} to i16
+ return _kadd_mask16(A, B);
+}
+
+__mmask8 test_kand_mask8(__mmask8 A, __mmask8 B) {
+ // CIR-LABEL: _kand_mask8
+ // CIR: cir.cast bitcast {{.*}} : !u8i -> !cir.vector<8 x !cir.int<u, 1>>
+ // CIR: cir.cast bitcast {{.*}} : !u8i -> !cir.vector<8 x !cir.int<u, 1>>
+ // CIR: cir.binop(and, {{.*}}, {{.*}}) : !cir.vector<8 x !cir.int<u, 1>>
+ // CIR: cir.cast bitcast {{.*}} : !cir.vector<8 x !cir.int<u, 1>> -> !u8i
+
+ // LLVM-LABEL: _kand_mask8
+ // LLVM: [[L:%.*]] = bitcast i8 %{{.*}} to <8 x i1>
+ // LLVM: [[R:%.*]] = bitcast i8 %{{.*}} to <8 x i1>
+ // LLVM: [[RES:%.*]] = and <8 x i1> [[L]], [[R]]
+ // LLVM: bitcast <8 x i1> [[RES]] to i8
+
+ // OGCG-LABEL: _kand_mask8
+ // OGCG: bitcast i8 %{{.*}} to <8 x i1>
+ // OGCG: bitcast i8 %{{.*}} to <8 x i1>
+ // OGCG: and <8 x i1>
+ // OGCG: bitcast <8 x i1> {{.*}} to i8
+ return _kand_mask8(A, B);
+}
+
+
+__mmask8 test_kandn_mask8(__mmask8 A, __mmask8 B) {
+ // CIR-LABEL: _kandn_mask8
+ // CIR: cir.cast bitcast {{.*}} : !u8i -> !cir.vector<8 x !cir.int<u, 1>>
+ // CIR: cir.cast bitcast {{.*}} : !u8i -> !cir.vector<8 x !cir.int<u, 1>>
+ // CIR: cir.unary(not, {{.*}}) : !cir.vector<8 x !cir.int<u, 1>>
+ // CIR: cir.binop(and, {{.*}}, {{.*}}) : !cir.vector<8 x !cir.int<u, 1>>
+ // CIR: cir.cast bitcast {{.*}} : !cir.vector<8 x !cir.int<u, 1>> -> !u8i
+
+ // LLVM-LABEL: _kandn_mask8
+ // LLVM: [[L:%.*]] = bitcast i8 %{{.*}} to <8 x i1>
+ // LLVM: [[R:%.*]] = bitcast i8 %{{.*}} to <8 x i1>
+ // LLVM: xor <8 x i1> [[L]], splat (i1 true)
+ // LLVM: and <8 x i1>
+ // LLVM: bitcast <8 x i1> {{.*}} to i8
+
+ // OGCG-LABEL: _kandn_mask8
+ // OGCG: bitcast i8 %{{.*}} to <8 x i1>
+ // OGCG: bitcast i8 %{{.*}} to <8 x i1>
+ // OGCG: xor <8 x i1>
+ // OGCG: and <8 x i1>
+ // OGCG: bitcast <8 x i1> {{.*}} to i8
+
+ return _kandn_mask8(A, B);
+}
+
+__mmask8 test_kor_mask8(__mmask8 A, __mmask8 B) {
+ // CIR-LABEL: _kor_mask8
+ // CIR: cir.cast bitcast {{.*}} : !u8i -> !cir.vector<8 x !cir.int<u, 1>>
+ // CIR: cir.cast bitcast {{.*}} : !u8i -> !cir.vector<8 x !cir.int<u, 1>>
+ // CIR: cir.binop(or, {{.*}}, {{.*}}) : !cir.vector<8 x !cir.int<u, 1>>
+ // CIR: cir.cast bitcast {{.*}} : !cir.vector<8 x !cir.int<u, 1>> -> !u8i
+
+ // LLVM-LABEL: _kor_mask8
+ // LLVM: [[L:%.*]] = bitcast i8 %{{.*}} to <8 x i1>
+ // LLVM: [[R:%.*]] = bitcast i8 %{{.*}} to <8 x i1>
+ // LLVM: or <8 x i1> [[L]], [[R]]
+ // LLVM: bitcast <8 x i1> {{.*}} to i8
+
+ // OGCG-LABEL: _kor_mask8
+ // OGCG: bitcast i8 %{{.*}} to <8 x i1>
+ // OGCG: bitcast i8 %{{.*}} to <8 x i1>
+ // OGCG: or <8 x i1>
+ // OGCG: bitcast <8 x i1> {{.*}} to i8
+ return _kor_mask8(A, B);
+}
+
+__mmask8 test_kxor_mask8(__mmask8 A, __mmask8 B) {
+ // CIR-LABEL: _kxor_mask8
+ // CIR: cir.cast bitcast {{.*}} : !u8i -> !cir.vector<8 x !cir.int<u, 1>>
+ // CIR: cir.cast bitcast {{.*}} : !u8i -> !cir.vector<8 x !cir.int<u, 1>>
+ // CIR: cir.binop(xor, {{.*}}, {{.*}}) : !cir.vector<8 x !cir.int<u, 1>>
+ // CIR: cir.cast bitcast {{.*}} : !cir.vector<8 x !cir.int<u, 1>> -> !u8i
+
+ // LLVM-LABEL: _kxor_mask8
+ // LLVM: [[L:%.*]] = bitcast i8 %{{.*}} to <8 x i1>
+ // LLVM: [[R:%.*]] = bitcast i8 %{{.*}} to <8 x i1>
+ // LLVM: xor <8 x i1> [[L]], [[R]]
+ // LLVM: bitcast <8 x i1> {{.*}} to i8
+
+ // OGCG-LABEL: _kxor_mask8
+ // OGCG: bitcast i8 %{{.*}} to <8 x i1>
+ // OGCG: bitcast i8 %{{.*}} to <8 x i1>
+ // OGCG: xor <8 x i1>
+ // OGCG: bitcast <8 x i1> {{.*}} to i8
+ return _kxor_mask8(A, B);
+}
+
+__mmask8 test_kxnor_mask8(__mmask8 A, __mmask8 B) {
+ // CIR-LABEL: _kxnor_mask8
+ // CIR: cir.cast bitcast {{.*}} : !u8i -> !cir.vector<8 x !cir.int<u, 1>>
+ // CIR: cir.cast bitcast {{.*}} : !u8i -> !cir.vector<8 x !cir.int<u, 1>>
+ // CIR: cir.unary(not, {{.*}}) : !cir.vector<8 x !cir.int<u, 1>>
+ // CIR: cir.binop(xor, {{.*}}, {{.*}}) : !cir.vector<8 x !cir.int<u, 1>>
+ // CIR: cir.cast bitcast {{.*}} : !cir.vector<8 x !cir.int<u, 1>> -> !u8i
+
+ // LLVM-LABEL: _kxnor_mask8
+ // LLVM: [[L:%.*]] = bitcast i8 %{{.*}} to <8 x i1>
+ // LLVM: [[R:%.*]] = bitcast i8 %{{.*}} to <8 x i1>
+ // LLVM: [[NOT:%.*]] = xor <8 x i1> [[L]], splat (i1 true)
+ // LLVM: [[RES:%.*]] = xor <8 x i1> [[NOT]], [[R]]
+ // LLVM: bitcast <8 x i1> [[RES]] to i8
+
+ // OGCG-LABEL: _kxnor_mask8
+ // OGCG: bitcast i8 %{{.*}} to <8 x i1>
+ // OGCG: bitcast i8 %{{.*}} to <8 x i1>
+ // OGCG: xor <8 x i1>
+ // OGCG: xor <8 x i1>
+ // OGCG: bitcast <8 x i1> {{.*}} to i8
+ return _kxnor_mask8(A, B);
+}
+
+
+__mmask8 test_knot_mask8(__mmask8 A) {
+ // CIR-LABEL: _knot_mask8
+ // CIR: cir.cast bitcast {{.*}} : !u8i -> !cir.vector<8 x !cir.int<u, 1>>
+ // CIR: cir.unary(not, {{.*}}) : !cir.vector<8 x !cir.int<u, 1>>
+ // CIR: cir.cast bitcast {{.*}} : !cir.vector<8 x !cir.int<u, 1>> -> !u8i
+
+ // LLVM-LABEL: _knot_mask8
+ // LLVM: [[L:%.*]] = bitcast i8 %{{.*}} to <8 x i1>
+ // LLVM: xor <8 x i1> [[L]], {{.*}}
+ // LLVM: bitcast <8 x i1> {{.*}} to i8
+
+ // OGCG-LABEL: _knot_mask8
+ // OGCG: bitcast i8 %{{.*}} to <8 x i1>
+ // OGCG: xor <8 x i1>
+ // OGCG: bitcast <8 x i1> {{.*}} to i8
+ return _knot_mask8(A);
+}
+
+// Multiple user-level mask helpers inline to this same kmov builtin.
+// CIR does not implement any special lowering for those helpers.
+//
+// Therefore, testing the builtin (__builtin_ia32_kmov*) directly is
+// sufficient to cover the CIR lowering behavior. Testing each helper
+// individually would add no new CIR paths.
+
+__mmask8 test_kmov_b(__mmask8 A) {
+ // CIR-LABEL: test_kmov_b
+ // CIR: cir.cast bitcast {{.*}} : !u8i -> !cir.vector<8 x !cir.int<u, 1>>
+ // CIR: cir.cast bitcast {{.*}} : !cir.vector<8 x !cir.int<u, 1>> -> !u8i
+
+ // LLVM-LABEL: test_kmov_b
+ // LLVM: bitcast i8 %{{.*}} to <8 x i1>
+ // LLVM: bitcast <8 x i1> {{.*}} to i8
+
+ // OGCG-LABEL: test_kmov_b
+ // OGCG: bitcast i8 %{{.*}} to <8 x i1>
+ // OGCG: bitcast <8 x i1> {{.*}} to i8
+ return __builtin_ia32_kmovb(A);
+}
diff --git a/clang/test/CIR/CodeGenBuiltins/X86/avx512f-builtins.c 
b/clang/test/CIR/CodeGenBuiltins/X86/avx512f-builtins.c
index dc54a87856a7c..31d6bc3d22408 100644
--- a/clang/test/CIR/CodeGenBuiltins/X86/avx512f-builtins.c
+++ b/clang/test/CIR/CodeGenBuiltins/X86/avx512f-builtins.c
@@ -77,3 +77,154 @@ __m512i test_mm512_undefined_epi32(void) {
   // OGCG: ret <8 x i64> zeroinitializer
   return _mm512_undefined_epi32();
 }
+
+__mmask16 test_mm512_kand(__mmask16 A, __mmask16 B) {
+  // CIR-LABEL: _mm512_kand
+  // CIR: cir.cast bitcast {{.*}} : !u16i -> !cir.vector<16 x !cir.int<u, 1>>
+  // CIR: cir.cast bitcast {{.*}} : !u16i -> !cir.vector<16 x !cir.int<u, 1>>
+  // CIR: cir.binop(and, {{.*}}, {{.*}}) : !cir.vector<16 x !cir.int<u, 1>>
+  // CIR: cir.cast bitcast {{.*}} : !cir.vector<16 x !cir.int<u, 1>> -> !u16i
+
+  // LLVM-LABEL: _mm512_kand
+  // LLVM: [[L:%.*]] = bitcast i16 %{{.*}} to <16 x i1>
+  // LLVM: [[R:%.*]] = bitcast i16 %{{.*}} to <16 x i1>
+  // LLVM: [[RES:%.*]] = and <16 x i1> [[L]], [[R]]
+  // LLVM: bitcast <16 x i1> [[RES]] to i16
+
+  // OGCG-LABEL: _mm512_kand
+  // OGCG: bitcast i16 %{{.*}} to <16 x i1>
+  // OGCG: bitcast i16 %{{.*}} to <16 x i1>
+  // OGCG: and <16 x i1>
+  // OGCG: bitcast <16 x i1> {{.*}} to i16
+  return _mm512_kand(A, B);
+}
+
+__mmask16 test_mm512_kandn(__mmask16 A, __mmask16 B) {
+  // CIR-LABEL: _mm512_kandn
+  // CIR: cir.cast bitcast {{.*}} : !u16i -> !cir.vector<16 x !cir.int<u, 1>>
+  // CIR: cir.cast bitcast {{.*}} : !u16i -> !cir.vector<16 x !cir.int<u, 1>>
+  // CIR: cir.unary(not, {{.*}}) : !cir.vector<16 x !cir.int<u, 1>>
+  // CIR: cir.binop(and, {{.*}}, {{.*}}) : !cir.vector<16 x !cir.int<u, 1>>
+  // CIR: cir.cast bitcast {{.*}} : !cir.vector<16 x !cir.int<u, 1>> -> !u16i
+
+  // LLVM-LABEL: _mm512_kandn
+  // LLVM: [[L:%.*]] = bitcast i16 %{{.*}} to <16 x i1>
+  // LLVM: [[R:%.*]] = bitcast i16 %{{.*}} to <16 x i1>
+  // LLVM: xor <16 x i1> [[L]], splat (i1 true)
+  // LLVM: and <16 x i1>
+  // LLVM: bitcast <16 x i1> {{.*}} to i16
+
+  // OGCG-LABEL: _mm512_kandn
+  // OGCG: bitcast i16 %{{.*}} to <16 x i1>
+  // OGCG: bitcast i16 %{{.*}} to <16 x i1>
+  // OGCG: xor <16 x i1>
+  // OGCG: and <16 x i1>
+  // OGCG: bitcast <16 x i1> {{.*}} to i16
+  return _mm512_kandn(A, B);
+}
+
+__mmask16 test_mm512_kor(__mmask16 A, __mmask16 B) {
+  // CIR-LABEL: _mm512_kor
+  // CIR: cir.cast bitcast {{.*}} : !u16i -> !cir.vector<16 x !cir.int<u, 1>>
+  // CIR: cir.cast bitcast {{.*}} : !u16i -> !cir.vector<16 x !cir.int<u, 1>>
+  // CIR: cir.binop(or, {{.*}}, {{.*}}) : !cir.vector<16 x !cir.int<u, 1>>
+  // CIR: cir.cast bitcast {{.*}} : !cir.vector<16 x !cir.int<u, 1>> -> !u16i
+
+  // LLVM-LABEL: _mm512_kor
+  // LLVM: [[L:%.*]] = bitcast i16 %{{.*}} to <16 x i1>
+  // LLVM: [[R:%.*]] = bitcast i16 %{{.*}} to <16 x i1>
+  // LLVM: or <16 x i1> [[L]], [[R]]
+  // LLVM: bitcast <16 x i1> {{.*}} to i16
+
+  // OGCG-LABEL: _mm512_kor
+  // OGCG: bitcast i16 %{{.*}} to <16 x i1>
+  // OGCG: bitcast i16 %{{.*}} to <16 x i1>
+  // OGCG: or <16 x i1>
+  // OGCG: bitcast <16 x i1> {{.*}} to i16
+  return _mm512_kor(A, B);
+}
+
+__mmask16 test_mm512_kxnor(__mmask16 A, __mmask16 B) {
+  // CIR-LABEL: _mm512_kxnor
+  // CIR: cir.cast bitcast {{.*}} : !u16i -> !cir.vector<16 x !cir.int<u, 1>>
+  // CIR: cir.cast bitcast {{.*}} : !u16i -> !cir.vector<16 x !cir.int<u, 1>>
+  // CIR: cir.unary(not, {{.*}}) : !cir.vector<16 x !cir.int<u, 1>>
+  // CIR: cir.binop(xor, {{.*}}, {{.*}}) : !cir.vector<16 x !cir.int<u, 1>>
+  // CIR: cir.cast bitcast {{.*}} : !cir.vector<16 x !cir.int<u, 1>> -> !u16i
+
+  // LLVM-LABEL: _mm512_kxnor
+  // LLVM: [[L:%.*]] = bitcast i16 %{{.*}} to <16 x i1>
+  // LLVM: [[R:%.*]] = bitcast i16 %{{.*}} to <16 x i1>
+  // LLVM: [[NOT:%.*]] = xor <16 x i1> [[L]], splat (i1 true)
+  // LLVM: [[RES:%.*]] = xor <16 x i1> [[NOT]], [[R]]
+  // LLVM: bitcast <16 x i1> [[RES]] to i16
+
+  // OGCG-LABEL: _mm512_kxnor
+  // OGCG: bitcast i16 %{{.*}} to <16 x i1>
+  // OGCG: bitcast i16 %{{.*}} to <16 x i1>
+  // OGCG: xor <16 x i1>
+  // OGCG: xor <16 x i1>
+  // OGCG: bitcast <16 x i1> {{.*}} to i16
+  return _mm512_kxnor(A, B);
+}
+
+__mmask16 test_mm512_kxor(__mmask16 A, __mmask16 B) {
+  // CIR-LABEL: _mm512_kxor
+  // CIR: cir.cast bitcast {{.*}} : !u16i -> !cir.vector<16 x !cir.int<u, 1>>
+  // CIR: cir.cast bitcast {{.*}} : !u16i -> !cir.vector<16 x !cir.int<u, 1>>
+  // CIR: cir.binop(xor, {{.*}}, {{.*}}) : !cir.vector<16 x !cir.int<u, 1>>
+  // CIR: cir.cast bitcast {{.*}} : !cir.vector<16 x !cir.int<u, 1>> -> !u16i
+
+  // LLVM-LABEL: _mm512_kxor
+  // LLVM: [[L:%.*]] = bitcast i16 %{{.*}} to <16 x i1>
+  // LLVM: [[R:%.*]] = bitcast i16 %{{.*}} to <16 x i1>
+  // LLVM: xor <16 x i1> [[L]], [[R]]
+  // LLVM: bitcast <16 x i1> {{.*}} to i16
+
+  // OGCG-LABEL: _mm512_kxor
+  // OGCG: bitcast i16 %{{.*}} to <16 x i1>
+  // OGCG: bitcast i16 %{{.*}} to <16 x i1>
+  // OGCG: xor <16 x i1>
+  // OGCG: bitcast <16 x i1> {{.*}} to i16
+  return _mm512_kxor(A, B);
+}
+
+__mmask16 test_mm512_knot(__mmask16 A) {
+  // CIR-LABEL: _mm512_knot
+  // CIR: cir.cast bitcast {{.*}} : !u16i -> !cir.vector<16 x !cir.int<u, 1>>
+  // CIR: cir.unary(not, {{.*}}) : !cir.vector<16 x !cir.int<u, 1>>
+  // CIR: cir.cast bitcast {{.*}} : !cir.vector<16 x !cir.int<u, 1>> -> !u16i
+
+  // LLVM-LABEL: _mm512_knot
+  // LLVM: bitcast i16 %{{.*}} to <16 x i1>
+  // LLVM: xor <16 x i1>
+  // LLVM: bitcast <16 x i1> {{.*}} to i16
+
+  // OGCG-LABEL: _mm512_knot
+  // OGCG: bitcast i16 %{{.*}} to <16 x i1>
+  // OGCG: xor <16 x i1>
+  // OGCG: bitcast <16 x i1> {{.*}} to i16
+  return _mm512_knot(A);
+}
+
+// Multiple user-level mask helpers inline to this same kmov builtin.
+// CIR does not implement any special lowering for those helpers.
+//
+// Therefore, testing the builtin (__builtin_ia32_kmov*) directly is
+// sufficient to cover the CIR lowering behavior. Testing each helper
+// individually would add no new CIR paths.
+
+__mmask16 test_kmov_w(__mmask16 A) {
+  // CIR-LABEL: test_kmov_w
+  // CIR: cir.cast bitcast {{.*}} : !u16i -> !cir.vector<16 x !cir.int<u, 1>>
+  // CIR: cir.cast bitcast {{.*}} : !cir.vector<16 x !cir.int<u, 1>> -> !u16i
+
+  // LLVM-LABEL: test_kmov_w
+  // LLVM: bitcast i16 %{{.*}} to <16 x i1>
+  // LLVM: bitcast <16 x i1> {{.*}} to i16
+
+  // OGCG-LABEL: test_kmov_w
+  // OGCG: bitcast i16 %{{.*}} to <16 x i1>
+  // OGCG: bitcast <16 x i1> {{.*}} to i16
+  return __builtin_ia32_kmovw(A);
+}

_______________________________________________
cfe-commits mailing list
[email protected]
https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits

[clang] [CIR][X86] Implement lowering for AVX512 mask builtins (PR #169774)

Reply via email to