[clang] [CIR] Add support for X86 pmovqd512_mask and pmovwb512_mask builtins (PR #173802)

via cfe-commits Thu, 01 Jan 2026 16:46:36 -0800

https://github.com/DannyDaoBoYang updated 
https://github.com/llvm/llvm-project/pull/173802


>From df2937fa546f52af32f0db7d5d94ff5611b59f71 Mon Sep 17 00:00:00 2001
From: DannyDaoBoYang <[email protected]>
Date: Sun, 28 Dec 2025 17:00:49 -0500
Subject: [PATCH 1/2] Add support for pmovqd512_mask and pmovwb512_mask

Add CIR support for pmovqd512_mask and pmovwb512_mask. And a minor type cast 
fix in CirGenCleanup.cpp that caused compile error on Windows.
---
 .../CIR/Dialect/Builder/CIRBaseBuilder.h      | 16 ++++++++++
 clang/lib/CIR/CodeGen/CIRGenBuiltinX86.cpp    |  5 +++-
 clang/lib/CIR/CodeGen/CIRGenCleanup.cpp       |  2 +-
 .../X86/pmovqd-mask-builtins.c                | 29 +++++++++++++++++++
 4 files changed, 50 insertions(+), 2 deletions(-)
 create mode 100644 clang/test/CIR/CodeGenBuiltins/X86/pmovqd-mask-builtins.c

diff --git a/clang/include/clang/CIR/Dialect/Builder/CIRBaseBuilder.h 
b/clang/include/clang/CIR/Dialect/Builder/CIRBaseBuilder.h
index cc28941aaa079..481d06091f012 100644
--- a/clang/include/clang/CIR/Dialect/Builder/CIRBaseBuilder.h
+++ b/clang/include/clang/CIR/Dialect/Builder/CIRBaseBuilder.h
@@ -425,6 +425,22 @@ class CIRBaseBuilderTy : public mlir::OpBuilder {
   // Cast/Conversion Operators
   
//===--------------------------------------------------------------------===//
 
+  /// Create an value truncation to a narrower type.
+  /// Returns the source if types already match. CIR casts do not
+  /// encode NUW/NSW; wrap semantics should be handled by callers.
+  /// Supports both scalar integers and vectors of integers.
+  mlir::Value createTrunc(mlir::Location loc, mlir::Value src,
+                          mlir::Type newTy) {
+    auto srcIntTy = mlir::dyn_cast<cir::VectorType>(src.getType());
+    if (newTy == srcIntTy)
+      return src;
+    return createCast(loc, cir::CastKind::integral, src, newTy);
+  }
+
+  mlir::Value createTrunc(mlir::Value src, mlir::Type newTy) {
+    return createTrunc(src.getLoc(), src, newTy);
+  }
+
   mlir::Value createCast(mlir::Location loc, cir::CastKind kind,
                          mlir::Value src, mlir::Type newTy) {
     if (newTy == src.getType())
diff --git a/clang/lib/CIR/CodeGen/CIRGenBuiltinX86.cpp 
b/clang/lib/CIR/CodeGen/CIRGenBuiltinX86.cpp
index 1c87e945de846..b553327f676f5 100644
--- a/clang/lib/CIR/CodeGen/CIRGenBuiltinX86.cpp
+++ b/clang/lib/CIR/CodeGen/CIRGenBuiltinX86.cpp
@@ -1274,7 +1274,10 @@ CIRGenFunction::emitX86BuiltinExpr(unsigned builtinID, 
const CallExpr *expr) {
                                     mask);
   }
   case X86::BI__builtin_ia32_pmovqd512_mask:
-  case X86::BI__builtin_ia32_pmovwb512_mask:
+  case X86::BI__builtin_ia32_pmovwb512_mask: {
+    mlir::Value Res = builder.createTrunc(ops[0], 
cast<cir::VectorType>(ops[1].getType()));
+    return emitX86Select(builder, getLoc(expr->getExprLoc()), ops[2], Res, 
ops[1]);
+  }
   case X86::BI__builtin_ia32_pblendw128:
   case X86::BI__builtin_ia32_blendpd:
   case X86::BI__builtin_ia32_blendps:
diff --git a/clang/lib/CIR/CodeGen/CIRGenCleanup.cpp 
b/clang/lib/CIR/CodeGen/CIRGenCleanup.cpp
index 6c6cb402d1190..8d9ea7c6c22eb 100644
--- a/clang/lib/CIR/CodeGen/CIRGenCleanup.cpp
+++ b/clang/lib/CIR/CodeGen/CIRGenCleanup.cpp
@@ -97,7 +97,7 @@ EHScopeStack::getInnermostActiveNormalCleanup() const {
 char *EHScopeStack::allocate(size_t size) {
   size = llvm::alignTo(size, ScopeStackAlignment);
   if (!startOfBuffer) {
-    unsigned capacity = llvm::PowerOf2Ceil(std::max(size, 1024ul));
+    unsigned capacity = llvm::PowerOf2Ceil(std::max<size_t>(size, 1024ul));
     startOfBuffer = std::make_unique<char[]>(capacity);
     startOfData = endOfBuffer = startOfBuffer.get() + capacity;
   } else if (static_cast<size_t>(startOfData - startOfBuffer.get()) < size) {
diff --git a/clang/test/CIR/CodeGenBuiltins/X86/pmovqd-mask-builtins.c 
b/clang/test/CIR/CodeGenBuiltins/X86/pmovqd-mask-builtins.c
new file mode 100644
index 0000000000000..b43d2dc8f050d
--- /dev/null
+++ b/clang/test/CIR/CodeGenBuiltins/X86/pmovqd-mask-builtins.c
@@ -0,0 +1,29 @@
+// RUN: %clang_cc1 -x c -ffreestanding %s -triple=x86_64-unknown-linux 
-target-feature +avx512f -fclangir -emit-cir -o %t.cir -Wall -Werror 
-Wsign-conversion
+// RUN: FileCheck --check-prefix=CIR --input-file=%t.cir %s
+
+#include <immintrin.h>
+
+__m256i test_pmovqd_mask(__m512i a, __m256i b, __mmask8 mask) {
+  // CIR-LABEL: test_pmovqd_mask
+  // CIR: %[[TRUNC:.*]] = cir.cast integral {{.*}} : !cir.vector<8 x !s64i> -> 
!cir.vector<8 x !s32i>
+  // CIR: %[[MASK_VEC:.*]] = cir.cast bitcast {{.*}} : !u8i -> !cir.vector<8 x 
!cir.int<s, 1>>
+  // CIR: cir.vec.ternary(%[[MASK_VEC]], %[[TRUNC]], {{.*}}) : !cir.vector<8 x 
!cir.int<s, 1>>, !cir.vector<8 x !s32i>
+  return __builtin_ia32_pmovqd512_mask(a, b, mask);
+}
+
+__m256i test_pmovqd_maskz(__m512i a, __mmask8 mask) {
+  // CIR-LABEL: test_pmovqd_maskz
+  // CIR: %[[TRUNC:.*]] = cir.cast integral {{.*}} : !cir.vector<8 x !s64i> -> 
!cir.vector<8 x !s32i>
+  // CIR: %[[MASK_VEC:.*]] = cir.cast bitcast {{.*}} : !u8i -> !cir.vector<8 x 
!cir.int<s, 1>>
+  // CIR: cir.vec.ternary(%[[MASK_VEC]], %[[TRUNC]], {{.*}}) : !cir.vector<8 x 
!cir.int<s, 1>>, !cir.vector<8 x !s32i>
+  __m256i zero = _mm256_setzero_si256();
+  return __builtin_ia32_pmovqd512_mask(a, zero, mask);
+}
+
+__m256i test_pmovwb_mask(__m512i a, __m256i b, __mmask32 mask) {
+  // CIR-LABEL: test_pmovwb_mask
+  // CIR: %[[TRUNC:.*]] = cir.cast integral {{.*}} : !cir.vector<32 x !s16i> 
-> !cir.vector<32 x !s8i>
+  // CIR: %[[MASK_VEC:.*]] = cir.cast bitcast {{.*}} : !u32i -> !cir.vector<32 
x !cir.int<s, 1>>
+  // CIR: cir.vec.ternary(%[[MASK_VEC]], %[[TRUNC]], {{.*}}) : !cir.vector<32 
x !cir.int<s, 1>>, !cir.vector<32 x !s8i>
+  return __builtin_ia32_pmovwb512_mask(a, b, mask);
+}
\ No newline at end of file

>From b989427cede8fbbde25fddd0f47a334b3d88a6a0 Mon Sep 17 00:00:00 2001
From: DannyDaoBoYang <[email protected]>
Date: Thu, 1 Jan 2026 19:46:15 -0500
Subject: [PATCH 2/2] add LLVM and OGCG in test, Combine Trunc function calls,
 format

---
 .../CIR/Dialect/Builder/CIRBaseBuilder.h      |  8 +--
 clang/lib/CIR/CodeGen/CIRGenBuiltinX86.cpp    |  6 ++-
 .../X86/pmovqd-mask-builtins.c                | 51 +++++++++++++++----
 3 files changed, 47 insertions(+), 18 deletions(-)

diff --git a/clang/include/clang/CIR/Dialect/Builder/CIRBaseBuilder.h 
b/clang/include/clang/CIR/Dialect/Builder/CIRBaseBuilder.h
index 481d06091f012..b66b7171e0628 100644
--- a/clang/include/clang/CIR/Dialect/Builder/CIRBaseBuilder.h
+++ b/clang/include/clang/CIR/Dialect/Builder/CIRBaseBuilder.h
@@ -429,18 +429,14 @@ class CIRBaseBuilderTy : public mlir::OpBuilder {
   /// Returns the source if types already match. CIR casts do not
   /// encode NUW/NSW; wrap semantics should be handled by callers.
   /// Supports both scalar integers and vectors of integers.
-  mlir::Value createTrunc(mlir::Location loc, mlir::Value src,
-                          mlir::Type newTy) {
+  mlir::Value createTrunc(mlir::Value src, mlir::Type newTy) {
+    mlir::Location loc = src.getLoc();
     auto srcIntTy = mlir::dyn_cast<cir::VectorType>(src.getType());
     if (newTy == srcIntTy)
       return src;
     return createCast(loc, cir::CastKind::integral, src, newTy);
   }
 
-  mlir::Value createTrunc(mlir::Value src, mlir::Type newTy) {
-    return createTrunc(src.getLoc(), src, newTy);
-  }
-
   mlir::Value createCast(mlir::Location loc, cir::CastKind kind,
                          mlir::Value src, mlir::Type newTy) {
     if (newTy == src.getType())
diff --git a/clang/lib/CIR/CodeGen/CIRGenBuiltinX86.cpp 
b/clang/lib/CIR/CodeGen/CIRGenBuiltinX86.cpp
index b553327f676f5..253fca321f742 100644
--- a/clang/lib/CIR/CodeGen/CIRGenBuiltinX86.cpp
+++ b/clang/lib/CIR/CodeGen/CIRGenBuiltinX86.cpp
@@ -1275,8 +1275,10 @@ CIRGenFunction::emitX86BuiltinExpr(unsigned builtinID, 
const CallExpr *expr) {
   }
   case X86::BI__builtin_ia32_pmovqd512_mask:
   case X86::BI__builtin_ia32_pmovwb512_mask: {
-    mlir::Value Res = builder.createTrunc(ops[0], 
cast<cir::VectorType>(ops[1].getType()));
-    return emitX86Select(builder, getLoc(expr->getExprLoc()), ops[2], Res, 
ops[1]);
+    mlir::Value Res =
+        builder.createTrunc(ops[0], cast<cir::VectorType>(ops[1].getType()));
+    return emitX86Select(builder, getLoc(expr->getExprLoc()), ops[2], Res,
+                         ops[1]);
   }
   case X86::BI__builtin_ia32_pblendw128:
   case X86::BI__builtin_ia32_blendpd:
diff --git a/clang/test/CIR/CodeGenBuiltins/X86/pmovqd-mask-builtins.c 
b/clang/test/CIR/CodeGenBuiltins/X86/pmovqd-mask-builtins.c
index b43d2dc8f050d..797ecf67ea9ec 100644
--- a/clang/test/CIR/CodeGenBuiltins/X86/pmovqd-mask-builtins.c
+++ b/clang/test/CIR/CodeGenBuiltins/X86/pmovqd-mask-builtins.c
@@ -1,5 +1,9 @@
-// RUN: %clang_cc1 -x c -ffreestanding %s -triple=x86_64-unknown-linux 
-target-feature +avx512f -fclangir -emit-cir -o %t.cir -Wall -Werror 
-Wsign-conversion
+// RUN: %clang_cc1 -x c -ffreestanding %s -triple=x86_64-unknown-linux 
-target-feature +avx512f -target-feature +avx512bw -fclangir -emit-cir -o 
%t.cir -Wall -Werror -Wsign-conversion
 // RUN: FileCheck --check-prefix=CIR --input-file=%t.cir %s
+// RUN: %clang_cc1 -x c -ffreestanding %s -triple=x86_64-unknown-linux 
-target-feature +avx512f -target-feature +avx512bw -fclangir -emit-llvm -o 
%t.ll -Wall -Werror -Wsign-conversion
+// RUN: FileCheck --check-prefix=LLVM --input-file=%t.ll %s
+// RUN: %clang_cc1 -x c -ffreestanding %s -triple=x86_64-unknown-linux 
-target-feature +avx512f -target-feature +avx512bw -emit-llvm -o %t.ll -Wall 
-Werror -Wsign-conversion
+// RUN: FileCheck --check-prefix=OGCG --input-file=%t.ll %s
 
 #include <immintrin.h>
 
@@ -8,22 +12,49 @@ __m256i test_pmovqd_mask(__m512i a, __m256i b, __mmask8 
mask) {
   // CIR: %[[TRUNC:.*]] = cir.cast integral {{.*}} : !cir.vector<8 x !s64i> -> 
!cir.vector<8 x !s32i>
   // CIR: %[[MASK_VEC:.*]] = cir.cast bitcast {{.*}} : !u8i -> !cir.vector<8 x 
!cir.int<s, 1>>
   // CIR: cir.vec.ternary(%[[MASK_VEC]], %[[TRUNC]], {{.*}}) : !cir.vector<8 x 
!cir.int<s, 1>>, !cir.vector<8 x !s32i>
+  // LLVM-LABEL: @test_pmovqd_mask
+  // LLVM: %[[B_CAST:.*]] = bitcast <4 x i64> %{{.*}} to <8 x i32>
+  // LLVM: %[[TRUNC:.*]] = trunc <8 x i64> %{{.*}} to <8 x i32>
+  // LLVM: %[[MASK_VEC:.*]] = bitcast i8 %{{.*}} to <8 x i1>
+  // LLVM: %[[CMP:.*]] = icmp ne <8 x i1> %[[MASK_VEC]], zeroinitializer
+  // LLVM: %[[SEL:.*]] = select <8 x i1> %[[CMP]], <8 x i32> %[[TRUNC]], <8 x 
i32> %[[B_CAST]]
+  // LLVM: %[[RETBC:.*]] = bitcast <8 x i32> %[[SEL]] to <4 x i64>
+  // LLVM: store <4 x i64> %[[RETBC]],
+  // LLVM: %[[RET:.*]] = load <4 x i64>,
+  // LLVM: ret <4 x i64> %[[RET]]
+  // OGCG-LABEL: @test_pmovqd_mask
+  // OGCG: %[[B_CAST:.*]] = bitcast <4 x i64> %{{.*}} to <8 x i32>
+  // OGCG: %[[TRUNC:.*]] = trunc <8 x i64> %{{.*}} to <8 x i32>
+  // OGCG: %[[MASK_VEC:.*]] = bitcast i8 %{{.*}} to <8 x i1>
+  // OGCG: %[[SEL:.*]] = select <8 x i1> %[[MASK_VEC]], <8 x i32> %[[TRUNC]], 
<8 x i32> %[[B_CAST]]
+  // OGCG: %[[RET:.*]] = bitcast <8 x i32> %[[SEL]] to <4 x i64>
+  // OGCG: ret <4 x i64> %[[RET]]
   return __builtin_ia32_pmovqd512_mask(a, b, mask);
 }
 
-__m256i test_pmovqd_maskz(__m512i a, __mmask8 mask) {
-  // CIR-LABEL: test_pmovqd_maskz
-  // CIR: %[[TRUNC:.*]] = cir.cast integral {{.*}} : !cir.vector<8 x !s64i> -> 
!cir.vector<8 x !s32i>
-  // CIR: %[[MASK_VEC:.*]] = cir.cast bitcast {{.*}} : !u8i -> !cir.vector<8 x 
!cir.int<s, 1>>
-  // CIR: cir.vec.ternary(%[[MASK_VEC]], %[[TRUNC]], {{.*}}) : !cir.vector<8 x 
!cir.int<s, 1>>, !cir.vector<8 x !s32i>
-  __m256i zero = _mm256_setzero_si256();
-  return __builtin_ia32_pmovqd512_mask(a, zero, mask);
-}
-
 __m256i test_pmovwb_mask(__m512i a, __m256i b, __mmask32 mask) {
   // CIR-LABEL: test_pmovwb_mask
   // CIR: %[[TRUNC:.*]] = cir.cast integral {{.*}} : !cir.vector<32 x !s16i> 
-> !cir.vector<32 x !s8i>
   // CIR: %[[MASK_VEC:.*]] = cir.cast bitcast {{.*}} : !u32i -> !cir.vector<32 
x !cir.int<s, 1>>
   // CIR: cir.vec.ternary(%[[MASK_VEC]], %[[TRUNC]], {{.*}}) : !cir.vector<32 
x !cir.int<s, 1>>, !cir.vector<32 x !s8i>
+  // LLVM-LABEL: @test_pmovwb_mask
+  // LLVM: %[[A_CAST:.*]] = bitcast <8 x i64> %{{.*}} to <32 x i16>
+  // LLVM: %[[B_CAST:.*]] = bitcast <4 x i64> %{{.*}} to <32 x i8>
+  // LLVM: %[[TRUNC:.*]] = trunc <32 x i16> %[[A_CAST]] to <32 x i8>
+  // LLVM: %[[MASK_VEC:.*]] = bitcast i32 %{{.*}} to <32 x i1>
+  // LLVM: %[[CMP:.*]] = icmp ne <32 x i1> %[[MASK_VEC]], zeroinitializer
+  // LLVM: %[[SEL:.*]] = select <32 x i1> %[[CMP]], <32 x i8> %[[TRUNC]], <32 
x i8> %[[B_CAST]]
+  // LLVM: %[[RETBC:.*]] = bitcast <32 x i8> %[[SEL]] to <4 x i64>
+  // LLVM: store <4 x i64> %[[RETBC]],
+  // LLVM: %[[RET:.*]] = load <4 x i64>,
+  // LLVM: ret <4 x i64> %[[RET]]
+  // OGCG-LABEL: @test_pmovwb_mask
+  // OGCG: %[[A_CAST:.*]] = bitcast <8 x i64> %{{.*}} to <32 x i16>
+  // OGCG: %[[B_CAST:.*]] = bitcast <4 x i64> %{{.*}} to <32 x i8>
+  // OGCG: %[[TRUNC:.*]] = trunc <32 x i16> %[[A_CAST]] to <32 x i8>
+  // OGCG: %[[MASK_VEC:.*]] = bitcast i32 %{{.*}} to <32 x i1>
+  // OGCG: %[[SEL:.*]] = select <32 x i1> %[[MASK_VEC]], <32 x i8> %[[TRUNC]], 
<32 x i8> %[[B_CAST]]
+  // OGCG: %[[RET:.*]] = bitcast <32 x i8> %[[SEL]] to <4 x i64>
+  // OGCG: ret <4 x i64> %[[RET]]
   return __builtin_ia32_pmovwb512_mask(a, b, mask);
 }
\ No newline at end of file

_______________________________________________
cfe-commits mailing list
[email protected]
https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits

[clang] [CIR] Add support for X86 pmovqd512_mask and pmovwb512_mask builtins (PR #173802)

Reply via email to