https://github.com/DannyDaoBoYang updated https://github.com/llvm/llvm-project/pull/173802
>From df2937fa546f52af32f0db7d5d94ff5611b59f71 Mon Sep 17 00:00:00 2001 From: DannyDaoBoYang <[email protected]> Date: Sun, 28 Dec 2025 17:00:49 -0500 Subject: [PATCH 1/2] Add support for pmovqd512_mask and pmovwb512_mask Add CIR support for pmovqd512_mask and pmovwb512_mask. And a minor type cast fix in CirGenCleanup.cpp that caused compile error on Windows. --- .../CIR/Dialect/Builder/CIRBaseBuilder.h | 16 ++++++++++ clang/lib/CIR/CodeGen/CIRGenBuiltinX86.cpp | 5 +++- clang/lib/CIR/CodeGen/CIRGenCleanup.cpp | 2 +- .../X86/pmovqd-mask-builtins.c | 29 +++++++++++++++++++ 4 files changed, 50 insertions(+), 2 deletions(-) create mode 100644 clang/test/CIR/CodeGenBuiltins/X86/pmovqd-mask-builtins.c diff --git a/clang/include/clang/CIR/Dialect/Builder/CIRBaseBuilder.h b/clang/include/clang/CIR/Dialect/Builder/CIRBaseBuilder.h index cc28941aaa079..481d06091f012 100644 --- a/clang/include/clang/CIR/Dialect/Builder/CIRBaseBuilder.h +++ b/clang/include/clang/CIR/Dialect/Builder/CIRBaseBuilder.h @@ -425,6 +425,22 @@ class CIRBaseBuilderTy : public mlir::OpBuilder { // Cast/Conversion Operators //===--------------------------------------------------------------------===// + /// Create an value truncation to a narrower type. + /// Returns the source if types already match. CIR casts do not + /// encode NUW/NSW; wrap semantics should be handled by callers. + /// Supports both scalar integers and vectors of integers. + mlir::Value createTrunc(mlir::Location loc, mlir::Value src, + mlir::Type newTy) { + auto srcIntTy = mlir::dyn_cast<cir::VectorType>(src.getType()); + if (newTy == srcIntTy) + return src; + return createCast(loc, cir::CastKind::integral, src, newTy); + } + + mlir::Value createTrunc(mlir::Value src, mlir::Type newTy) { + return createTrunc(src.getLoc(), src, newTy); + } + mlir::Value createCast(mlir::Location loc, cir::CastKind kind, mlir::Value src, mlir::Type newTy) { if (newTy == src.getType()) diff --git a/clang/lib/CIR/CodeGen/CIRGenBuiltinX86.cpp b/clang/lib/CIR/CodeGen/CIRGenBuiltinX86.cpp index 1c87e945de846..b553327f676f5 100644 --- a/clang/lib/CIR/CodeGen/CIRGenBuiltinX86.cpp +++ b/clang/lib/CIR/CodeGen/CIRGenBuiltinX86.cpp @@ -1274,7 +1274,10 @@ CIRGenFunction::emitX86BuiltinExpr(unsigned builtinID, const CallExpr *expr) { mask); } case X86::BI__builtin_ia32_pmovqd512_mask: - case X86::BI__builtin_ia32_pmovwb512_mask: + case X86::BI__builtin_ia32_pmovwb512_mask: { + mlir::Value Res = builder.createTrunc(ops[0], cast<cir::VectorType>(ops[1].getType())); + return emitX86Select(builder, getLoc(expr->getExprLoc()), ops[2], Res, ops[1]); + } case X86::BI__builtin_ia32_pblendw128: case X86::BI__builtin_ia32_blendpd: case X86::BI__builtin_ia32_blendps: diff --git a/clang/lib/CIR/CodeGen/CIRGenCleanup.cpp b/clang/lib/CIR/CodeGen/CIRGenCleanup.cpp index 6c6cb402d1190..8d9ea7c6c22eb 100644 --- a/clang/lib/CIR/CodeGen/CIRGenCleanup.cpp +++ b/clang/lib/CIR/CodeGen/CIRGenCleanup.cpp @@ -97,7 +97,7 @@ EHScopeStack::getInnermostActiveNormalCleanup() const { char *EHScopeStack::allocate(size_t size) { size = llvm::alignTo(size, ScopeStackAlignment); if (!startOfBuffer) { - unsigned capacity = llvm::PowerOf2Ceil(std::max(size, 1024ul)); + unsigned capacity = llvm::PowerOf2Ceil(std::max<size_t>(size, 1024ul)); startOfBuffer = std::make_unique<char[]>(capacity); startOfData = endOfBuffer = startOfBuffer.get() + capacity; } else if (static_cast<size_t>(startOfData - startOfBuffer.get()) < size) { diff --git a/clang/test/CIR/CodeGenBuiltins/X86/pmovqd-mask-builtins.c b/clang/test/CIR/CodeGenBuiltins/X86/pmovqd-mask-builtins.c new file mode 100644 index 0000000000000..b43d2dc8f050d --- /dev/null +++ b/clang/test/CIR/CodeGenBuiltins/X86/pmovqd-mask-builtins.c @@ -0,0 +1,29 @@ +// RUN: %clang_cc1 -x c -ffreestanding %s -triple=x86_64-unknown-linux -target-feature +avx512f -fclangir -emit-cir -o %t.cir -Wall -Werror -Wsign-conversion +// RUN: FileCheck --check-prefix=CIR --input-file=%t.cir %s + +#include <immintrin.h> + +__m256i test_pmovqd_mask(__m512i a, __m256i b, __mmask8 mask) { + // CIR-LABEL: test_pmovqd_mask + // CIR: %[[TRUNC:.*]] = cir.cast integral {{.*}} : !cir.vector<8 x !s64i> -> !cir.vector<8 x !s32i> + // CIR: %[[MASK_VEC:.*]] = cir.cast bitcast {{.*}} : !u8i -> !cir.vector<8 x !cir.int<s, 1>> + // CIR: cir.vec.ternary(%[[MASK_VEC]], %[[TRUNC]], {{.*}}) : !cir.vector<8 x !cir.int<s, 1>>, !cir.vector<8 x !s32i> + return __builtin_ia32_pmovqd512_mask(a, b, mask); +} + +__m256i test_pmovqd_maskz(__m512i a, __mmask8 mask) { + // CIR-LABEL: test_pmovqd_maskz + // CIR: %[[TRUNC:.*]] = cir.cast integral {{.*}} : !cir.vector<8 x !s64i> -> !cir.vector<8 x !s32i> + // CIR: %[[MASK_VEC:.*]] = cir.cast bitcast {{.*}} : !u8i -> !cir.vector<8 x !cir.int<s, 1>> + // CIR: cir.vec.ternary(%[[MASK_VEC]], %[[TRUNC]], {{.*}}) : !cir.vector<8 x !cir.int<s, 1>>, !cir.vector<8 x !s32i> + __m256i zero = _mm256_setzero_si256(); + return __builtin_ia32_pmovqd512_mask(a, zero, mask); +} + +__m256i test_pmovwb_mask(__m512i a, __m256i b, __mmask32 mask) { + // CIR-LABEL: test_pmovwb_mask + // CIR: %[[TRUNC:.*]] = cir.cast integral {{.*}} : !cir.vector<32 x !s16i> -> !cir.vector<32 x !s8i> + // CIR: %[[MASK_VEC:.*]] = cir.cast bitcast {{.*}} : !u32i -> !cir.vector<32 x !cir.int<s, 1>> + // CIR: cir.vec.ternary(%[[MASK_VEC]], %[[TRUNC]], {{.*}}) : !cir.vector<32 x !cir.int<s, 1>>, !cir.vector<32 x !s8i> + return __builtin_ia32_pmovwb512_mask(a, b, mask); +} \ No newline at end of file >From b989427cede8fbbde25fddd0f47a334b3d88a6a0 Mon Sep 17 00:00:00 2001 From: DannyDaoBoYang <[email protected]> Date: Thu, 1 Jan 2026 19:46:15 -0500 Subject: [PATCH 2/2] add LLVM and OGCG in test, Combine Trunc function calls, format --- .../CIR/Dialect/Builder/CIRBaseBuilder.h | 8 +-- clang/lib/CIR/CodeGen/CIRGenBuiltinX86.cpp | 6 ++- .../X86/pmovqd-mask-builtins.c | 51 +++++++++++++++---- 3 files changed, 47 insertions(+), 18 deletions(-) diff --git a/clang/include/clang/CIR/Dialect/Builder/CIRBaseBuilder.h b/clang/include/clang/CIR/Dialect/Builder/CIRBaseBuilder.h index 481d06091f012..b66b7171e0628 100644 --- a/clang/include/clang/CIR/Dialect/Builder/CIRBaseBuilder.h +++ b/clang/include/clang/CIR/Dialect/Builder/CIRBaseBuilder.h @@ -429,18 +429,14 @@ class CIRBaseBuilderTy : public mlir::OpBuilder { /// Returns the source if types already match. CIR casts do not /// encode NUW/NSW; wrap semantics should be handled by callers. /// Supports both scalar integers and vectors of integers. - mlir::Value createTrunc(mlir::Location loc, mlir::Value src, - mlir::Type newTy) { + mlir::Value createTrunc(mlir::Value src, mlir::Type newTy) { + mlir::Location loc = src.getLoc(); auto srcIntTy = mlir::dyn_cast<cir::VectorType>(src.getType()); if (newTy == srcIntTy) return src; return createCast(loc, cir::CastKind::integral, src, newTy); } - mlir::Value createTrunc(mlir::Value src, mlir::Type newTy) { - return createTrunc(src.getLoc(), src, newTy); - } - mlir::Value createCast(mlir::Location loc, cir::CastKind kind, mlir::Value src, mlir::Type newTy) { if (newTy == src.getType()) diff --git a/clang/lib/CIR/CodeGen/CIRGenBuiltinX86.cpp b/clang/lib/CIR/CodeGen/CIRGenBuiltinX86.cpp index b553327f676f5..253fca321f742 100644 --- a/clang/lib/CIR/CodeGen/CIRGenBuiltinX86.cpp +++ b/clang/lib/CIR/CodeGen/CIRGenBuiltinX86.cpp @@ -1275,8 +1275,10 @@ CIRGenFunction::emitX86BuiltinExpr(unsigned builtinID, const CallExpr *expr) { } case X86::BI__builtin_ia32_pmovqd512_mask: case X86::BI__builtin_ia32_pmovwb512_mask: { - mlir::Value Res = builder.createTrunc(ops[0], cast<cir::VectorType>(ops[1].getType())); - return emitX86Select(builder, getLoc(expr->getExprLoc()), ops[2], Res, ops[1]); + mlir::Value Res = + builder.createTrunc(ops[0], cast<cir::VectorType>(ops[1].getType())); + return emitX86Select(builder, getLoc(expr->getExprLoc()), ops[2], Res, + ops[1]); } case X86::BI__builtin_ia32_pblendw128: case X86::BI__builtin_ia32_blendpd: diff --git a/clang/test/CIR/CodeGenBuiltins/X86/pmovqd-mask-builtins.c b/clang/test/CIR/CodeGenBuiltins/X86/pmovqd-mask-builtins.c index b43d2dc8f050d..797ecf67ea9ec 100644 --- a/clang/test/CIR/CodeGenBuiltins/X86/pmovqd-mask-builtins.c +++ b/clang/test/CIR/CodeGenBuiltins/X86/pmovqd-mask-builtins.c @@ -1,5 +1,9 @@ -// RUN: %clang_cc1 -x c -ffreestanding %s -triple=x86_64-unknown-linux -target-feature +avx512f -fclangir -emit-cir -o %t.cir -Wall -Werror -Wsign-conversion +// RUN: %clang_cc1 -x c -ffreestanding %s -triple=x86_64-unknown-linux -target-feature +avx512f -target-feature +avx512bw -fclangir -emit-cir -o %t.cir -Wall -Werror -Wsign-conversion // RUN: FileCheck --check-prefix=CIR --input-file=%t.cir %s +// RUN: %clang_cc1 -x c -ffreestanding %s -triple=x86_64-unknown-linux -target-feature +avx512f -target-feature +avx512bw -fclangir -emit-llvm -o %t.ll -Wall -Werror -Wsign-conversion +// RUN: FileCheck --check-prefix=LLVM --input-file=%t.ll %s +// RUN: %clang_cc1 -x c -ffreestanding %s -triple=x86_64-unknown-linux -target-feature +avx512f -target-feature +avx512bw -emit-llvm -o %t.ll -Wall -Werror -Wsign-conversion +// RUN: FileCheck --check-prefix=OGCG --input-file=%t.ll %s #include <immintrin.h> @@ -8,22 +12,49 @@ __m256i test_pmovqd_mask(__m512i a, __m256i b, __mmask8 mask) { // CIR: %[[TRUNC:.*]] = cir.cast integral {{.*}} : !cir.vector<8 x !s64i> -> !cir.vector<8 x !s32i> // CIR: %[[MASK_VEC:.*]] = cir.cast bitcast {{.*}} : !u8i -> !cir.vector<8 x !cir.int<s, 1>> // CIR: cir.vec.ternary(%[[MASK_VEC]], %[[TRUNC]], {{.*}}) : !cir.vector<8 x !cir.int<s, 1>>, !cir.vector<8 x !s32i> + // LLVM-LABEL: @test_pmovqd_mask + // LLVM: %[[B_CAST:.*]] = bitcast <4 x i64> %{{.*}} to <8 x i32> + // LLVM: %[[TRUNC:.*]] = trunc <8 x i64> %{{.*}} to <8 x i32> + // LLVM: %[[MASK_VEC:.*]] = bitcast i8 %{{.*}} to <8 x i1> + // LLVM: %[[CMP:.*]] = icmp ne <8 x i1> %[[MASK_VEC]], zeroinitializer + // LLVM: %[[SEL:.*]] = select <8 x i1> %[[CMP]], <8 x i32> %[[TRUNC]], <8 x i32> %[[B_CAST]] + // LLVM: %[[RETBC:.*]] = bitcast <8 x i32> %[[SEL]] to <4 x i64> + // LLVM: store <4 x i64> %[[RETBC]], + // LLVM: %[[RET:.*]] = load <4 x i64>, + // LLVM: ret <4 x i64> %[[RET]] + // OGCG-LABEL: @test_pmovqd_mask + // OGCG: %[[B_CAST:.*]] = bitcast <4 x i64> %{{.*}} to <8 x i32> + // OGCG: %[[TRUNC:.*]] = trunc <8 x i64> %{{.*}} to <8 x i32> + // OGCG: %[[MASK_VEC:.*]] = bitcast i8 %{{.*}} to <8 x i1> + // OGCG: %[[SEL:.*]] = select <8 x i1> %[[MASK_VEC]], <8 x i32> %[[TRUNC]], <8 x i32> %[[B_CAST]] + // OGCG: %[[RET:.*]] = bitcast <8 x i32> %[[SEL]] to <4 x i64> + // OGCG: ret <4 x i64> %[[RET]] return __builtin_ia32_pmovqd512_mask(a, b, mask); } -__m256i test_pmovqd_maskz(__m512i a, __mmask8 mask) { - // CIR-LABEL: test_pmovqd_maskz - // CIR: %[[TRUNC:.*]] = cir.cast integral {{.*}} : !cir.vector<8 x !s64i> -> !cir.vector<8 x !s32i> - // CIR: %[[MASK_VEC:.*]] = cir.cast bitcast {{.*}} : !u8i -> !cir.vector<8 x !cir.int<s, 1>> - // CIR: cir.vec.ternary(%[[MASK_VEC]], %[[TRUNC]], {{.*}}) : !cir.vector<8 x !cir.int<s, 1>>, !cir.vector<8 x !s32i> - __m256i zero = _mm256_setzero_si256(); - return __builtin_ia32_pmovqd512_mask(a, zero, mask); -} - __m256i test_pmovwb_mask(__m512i a, __m256i b, __mmask32 mask) { // CIR-LABEL: test_pmovwb_mask // CIR: %[[TRUNC:.*]] = cir.cast integral {{.*}} : !cir.vector<32 x !s16i> -> !cir.vector<32 x !s8i> // CIR: %[[MASK_VEC:.*]] = cir.cast bitcast {{.*}} : !u32i -> !cir.vector<32 x !cir.int<s, 1>> // CIR: cir.vec.ternary(%[[MASK_VEC]], %[[TRUNC]], {{.*}}) : !cir.vector<32 x !cir.int<s, 1>>, !cir.vector<32 x !s8i> + // LLVM-LABEL: @test_pmovwb_mask + // LLVM: %[[A_CAST:.*]] = bitcast <8 x i64> %{{.*}} to <32 x i16> + // LLVM: %[[B_CAST:.*]] = bitcast <4 x i64> %{{.*}} to <32 x i8> + // LLVM: %[[TRUNC:.*]] = trunc <32 x i16> %[[A_CAST]] to <32 x i8> + // LLVM: %[[MASK_VEC:.*]] = bitcast i32 %{{.*}} to <32 x i1> + // LLVM: %[[CMP:.*]] = icmp ne <32 x i1> %[[MASK_VEC]], zeroinitializer + // LLVM: %[[SEL:.*]] = select <32 x i1> %[[CMP]], <32 x i8> %[[TRUNC]], <32 x i8> %[[B_CAST]] + // LLVM: %[[RETBC:.*]] = bitcast <32 x i8> %[[SEL]] to <4 x i64> + // LLVM: store <4 x i64> %[[RETBC]], + // LLVM: %[[RET:.*]] = load <4 x i64>, + // LLVM: ret <4 x i64> %[[RET]] + // OGCG-LABEL: @test_pmovwb_mask + // OGCG: %[[A_CAST:.*]] = bitcast <8 x i64> %{{.*}} to <32 x i16> + // OGCG: %[[B_CAST:.*]] = bitcast <4 x i64> %{{.*}} to <32 x i8> + // OGCG: %[[TRUNC:.*]] = trunc <32 x i16> %[[A_CAST]] to <32 x i8> + // OGCG: %[[MASK_VEC:.*]] = bitcast i32 %{{.*}} to <32 x i1> + // OGCG: %[[SEL:.*]] = select <32 x i1> %[[MASK_VEC]], <32 x i8> %[[TRUNC]], <32 x i8> %[[B_CAST]] + // OGCG: %[[RET:.*]] = bitcast <32 x i8> %[[SEL]] to <4 x i64> + // OGCG: ret <4 x i64> %[[RET]] return __builtin_ia32_pmovwb512_mask(a, b, mask); } \ No newline at end of file _______________________________________________ cfe-commits mailing list [email protected] https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits
