[clang] [CIR][CIRGen][Builtin][X86] Masked compress Intrinsics (PR #169582)

via cfe-commits Fri, 05 Dec 2025 03:05:16 -0800

https://github.com/cs25resch11005-bhuvan updated 
https://github.com/llvm/llvm-project/pull/169582


>From 621c41026ce446ce3cf3d80d2fe14b4fb1e0628a Mon Sep 17 00:00:00 2001
From: bhuvan1527 <[email protected]>
Date: Wed, 26 Nov 2025 05:11:22 +0530
Subject: [PATCH 1/2] [CIR][CIRGen][Builtin][X86] Masked compress Intrinsics

Added masked compress builtin in CIR.
Note: This is my first PR to llvm.
---
 clang/lib/CIR/CodeGen/CIRGenBuiltinX86.cpp | 12 +++++++++++-
 1 file changed, 11 insertions(+), 1 deletion(-)

diff --git a/clang/lib/CIR/CodeGen/CIRGenBuiltinX86.cpp 
b/clang/lib/CIR/CodeGen/CIRGenBuiltinX86.cpp
index b242efc00e491..4949d99c021f0 100644
--- a/clang/lib/CIR/CodeGen/CIRGenBuiltinX86.cpp
+++ b/clang/lib/CIR/CodeGen/CIRGenBuiltinX86.cpp
@@ -84,6 +84,14 @@ static mlir::Value getMaskVecValue(CIRGenBuilderTy &builder, 
mlir::Location loc,
   }
   return maskVec;
 }
+static mlir::Value emitX86CompressExpand(CIRGenFunction &cgf, const CallExpr 
*expr,ArrayRef<mlir::Value> ops, bool IsCompress, const std::string &ID){
+  auto ResultTy = cast<cir::VectorType>(ops[1].getType());
+  mlir::Value MaskValue = getMaskVecValue(cgf, expr, ops[2],  
cast<cir::VectorType>(ResultTy).getSize());
+  llvm::SmallVector<mlir::Value, 4> op{ops[0], ops[1], MaskValue};
+  
+  return emitIntrinsicCallOp(cgf,expr, ID, ResultTy, op);
+
+}
 
 static mlir::Value emitX86MaskAddLogic(CIRGenBuilderTy &builder,
                                        mlir::Location loc,
@@ -486,7 +494,9 @@ mlir::Value CIRGenFunction::emitX86BuiltinExpr(unsigned 
builtinID,
   case X86::BI__builtin_ia32_compresshi512_mask:
   case X86::BI__builtin_ia32_compressqi128_mask:
   case X86::BI__builtin_ia32_compressqi256_mask:
-  case X86::BI__builtin_ia32_compressqi512_mask:
+  case X86::BI__builtin_ia32_compressqi512_mask:{
+    return emitX86CompressExpand(*this, expr, ops, true, 
"x86_avx512_mask_compress");
+  }
   case X86::BI__builtin_ia32_gather3div2df:
   case X86::BI__builtin_ia32_gather3div2di:
   case X86::BI__builtin_ia32_gather3div4df:

>From d7df56ea6162c6c7dc66fe6cc2a3fe2cdab45207 Mon Sep 17 00:00:00 2001
From: bhuvan1527 <[email protected]>
Date: Thu, 27 Nov 2025 19:59:41 +0530
Subject: [PATCH 2/2] [CIR][CIRGen][Builtin][X86] Masked compress Intrinsics

This pr is related to the issue #167765
Added the support Masked compress builtin in CIR codeGen
---
 clang/lib/CIR/CodeGen/CIRGenBuiltinX86.cpp    |  33 +++-
 .../CodeGenBuiltins/X86/avx512vl-builtins.c   |  45 +++++
 .../X86/avx512vlvbmi2-builtins.c              | 171 ++++++++++++++++++
 3 files changed, 239 insertions(+), 10 deletions(-)
 create mode 100644 clang/test/CIR/CodeGenBuiltins/X86/avx512vl-builtins.c
 create mode 100644 clang/test/CIR/CodeGenBuiltins/X86/avx512vlvbmi2-builtins.c

diff --git a/clang/lib/CIR/CodeGen/CIRGenBuiltinX86.cpp 
b/clang/lib/CIR/CodeGen/CIRGenBuiltinX86.cpp
index 4949d99c021f0..d915e8be30142 100644
--- a/clang/lib/CIR/CodeGen/CIRGenBuiltinX86.cpp
+++ b/clang/lib/CIR/CodeGen/CIRGenBuiltinX86.cpp
@@ -84,13 +84,16 @@ static mlir::Value getMaskVecValue(CIRGenBuilderTy 
&builder, mlir::Location loc,
   }
   return maskVec;
 }
-static mlir::Value emitX86CompressExpand(CIRGenFunction &cgf, const CallExpr 
*expr,ArrayRef<mlir::Value> ops, bool IsCompress, const std::string &ID){
-  auto ResultTy = cast<cir::VectorType>(ops[1].getType());
-  mlir::Value MaskValue = getMaskVecValue(cgf, expr, ops[2],  
cast<cir::VectorType>(ResultTy).getSize());
-  llvm::SmallVector<mlir::Value, 4> op{ops[0], ops[1], MaskValue};
-  
-  return emitIntrinsicCallOp(cgf,expr, ID, ResultTy, op);
-
+static mlir::Value emitX86CompressExpand(CIRGenBuilderTy &builder,
+                                         mlir::Location loc, mlir::Value 
source,
+                                         mlir::Value mask,
+                                         mlir::Value inputVector,
+                                         const std::string &id) {
+  auto ResultTy = cast<cir::VectorType>(mask.getType());
+  mlir::Value MaskValue = getMaskVecValue(
+      builder, loc, inputVector, cast<cir::VectorType>(ResultTy).getSize());
+  return emitIntrinsicCallOp(builder, loc, id, ResultTy,
+                             mlir::ValueRange{source, mask, MaskValue});
 }
 
 static mlir::Value emitX86MaskAddLogic(CIRGenBuilderTy &builder,
@@ -459,6 +462,10 @@ mlir::Value CIRGenFunction::emitX86BuiltinExpr(unsigned 
builtinID,
   case X86::BI__builtin_ia32_compressstoreqi128_mask:
   case X86::BI__builtin_ia32_compressstoreqi256_mask:
   case X86::BI__builtin_ia32_compressstoreqi512_mask:
+    cgm.errorNYI(expr->getSourceRange(),
+                 std::string("unimplemented X86 builtin call: ") +
+                     getContext().BuiltinInfo.getName(builtinID));
+    return {};
   case X86::BI__builtin_ia32_expanddf128_mask:
   case X86::BI__builtin_ia32_expanddf256_mask:
   case X86::BI__builtin_ia32_expanddf512_mask:
@@ -476,7 +483,11 @@ mlir::Value CIRGenFunction::emitX86BuiltinExpr(unsigned 
builtinID,
   case X86::BI__builtin_ia32_expandhi512_mask:
   case X86::BI__builtin_ia32_expandqi128_mask:
   case X86::BI__builtin_ia32_expandqi256_mask:
-  case X86::BI__builtin_ia32_expandqi512_mask:
+  case X86::BI__builtin_ia32_expandqi512_mask:{
+  mlir::Location loc = getLoc(expr->getExprLoc());
+  return emitX86CompressExpand(builder, loc, ops[0], ops[1], ops[2],
+                               "x86.avx512.mask.expand");
+}
   case X86::BI__builtin_ia32_compressdf128_mask:
   case X86::BI__builtin_ia32_compressdf256_mask:
   case X86::BI__builtin_ia32_compressdf512_mask:
@@ -495,8 +506,10 @@ mlir::Value CIRGenFunction::emitX86BuiltinExpr(unsigned 
builtinID,
   case X86::BI__builtin_ia32_compressqi128_mask:
   case X86::BI__builtin_ia32_compressqi256_mask:
   case X86::BI__builtin_ia32_compressqi512_mask:{
-    return emitX86CompressExpand(*this, expr, ops, true, 
"x86_avx512_mask_compress");
-  }
+  mlir::Location loc = getLoc(expr->getExprLoc());
+  return emitX86CompressExpand(builder, loc, ops[0], ops[1], ops[2],
+                               "x86.avx512.mask.compress");
+}
   case X86::BI__builtin_ia32_gather3div2df:
   case X86::BI__builtin_ia32_gather3div2di:
   case X86::BI__builtin_ia32_gather3div4df:
diff --git a/clang/test/CIR/CodeGenBuiltins/X86/avx512vl-builtins.c 
b/clang/test/CIR/CodeGenBuiltins/X86/avx512vl-builtins.c
new file mode 100644
index 0000000000000..a7366b657840d
--- /dev/null
+++ b/clang/test/CIR/CodeGenBuiltins/X86/avx512vl-builtins.c
@@ -0,0 +1,45 @@
+// RUN: %clang_cc1 -x c -flax-vector-conversions=none -ffreestanding %s 
-triple=x86_64-unknown-linux -target-feature +avx512vl -fclangir -emit-cir -o 
%t.cir -Wall -Werror -Wsign-conversion
+// RUN: FileCheck --check-prefix=CIR --input-file=%t.cir %s
+// RUN: %clang_cc1 -x c -flax-vector-conversions=none -ffreestanding %s 
-triple=x86_64-unknown-linux -target-feature +avx512vl -fclangir -emit-llvm -o 
%t.ll -Wall -Werror -Wsign-conversion
+// RUN: FileCheck --check-prefixes=LLVM --input-file=%t.ll %s
+
+// RUN: %clang_cc1 -x c++ -flax-vector-conversions=none -ffreestanding %s 
-triple=x86_64-unknown-linux -target-feature +avx512vl -fclangir -emit-cir -o 
%t.cir -Wall -Werror -Wsign-conversion
+// RUN: FileCheck --check-prefix=CIR --input-file=%t.cir %s
+// RUN: %clang_cc1 -x c++ -flax-vector-conversions=none -ffreestanding %s 
-triple=x86_64-unknown-linux -target-feature +avx512vl -fclangir -emit-llvm -o 
%t.ll -Wall -Werror -Wsign-conversion
+// RUN: FileCheck --check-prefixes=LLVM --input-file=%t.ll %s
+
+#include <immintrin.h>
+
+
+__m128d test_mm_mask_expand_pd(__m128d __W, __mmask8 __U, __m128d __A) {
+  // CIR-LABEL: _mm_mask_expand_pd
+  // CIR: %[[MASK:.*]] = cir.cast bitcast {{.*}} : !u8i -> !cir.vector<8 x 
!cir.int<u, 1>>
+  // CIR: %[[SHUF:.*]] = cir.vec.shuffle(%[[MASK]], %[[MASK]] : !cir.vector<8 
x !cir.int<u, 1>>) [#cir.int<0> : !s32i, #cir.int<1> : !s32i] : !cir.vector<2 x 
!cir.int<u, 1>>
+
+  // LLVM-LABEL: test_mm_mask_expand_pd
+  // LLVM: %[[BC:.*]] = bitcast i8 %{{.*}} to <8 x i1>
+  // LLVM: %[[SHUF:.*]] = shufflevector <8 x i1> %[[BC]], <8 x i1> %[[BC]], <2 
x i32> <i32 0, i32 1>
+
+  // OGCG-LABEL: test_mm_mask_expand_pd
+  // OGCG: %[[BC2:.*]] = bitcast i8 %{{.*}} to <8 x i1>
+  // OGCG: %[[SHUF2:.*]] = shufflevector <8 x i1> %[[BC2]], <8 x i1> %[[BC2]], 
<2 x i32> <i32 0, i32 1>
+
+  return _mm_mask_expand_pd(__W,__U,__A);
+}
+
+__m128d test_mm_maskz_expand_pd(__mmask8 __U, __m128d __A) {
+  // CIR-LABEL: _mm_maskz_expand_pd
+  // CIR: %[[MASK:.*]] = cir.cast bitcast {{.*}} : !u8i -> !cir.vector<8 x 
!cir.int<u, 1>>
+  // CIR: %[[SHUF:.*]] = cir.vec.shuffle(%[[MASK]], %[[MASK]] : !cir.vector<8 
x !cir.int<u, 1>>) [#cir.int<0> : !s32i, #cir.int<1> : !s32i] : !cir.vector<2 x 
!cir.int<u, 1>>
+
+  // LLVM-LABEL: test_mm_maskz_expand_pd
+  // LLVM: %[[BC:.*]] = bitcast i8 %{{.*}} to <8 x i1>
+  // LLVM: %[[SHUF:.*]] = shufflevector <8 x i1> %[[BC]], <8 x i1> %[[BC]], <2 
x i32> <i32 0, i32 1>
+
+  // OGCG-LABEL: test_mm_maskz_expand_pd
+  // OGCG: %[[BC2:.*]] = bitcast i8 %{{.*}} to <8 x i1>
+  // OGCG: %[[SHUF2:.*]] = shufflevector <8 x i1> %[[BC2]], <8 x i1> %[[BC2]], 
<2 x i32> <i32 0, i32 1>
+
+  return _mm_maskz_expand_pd(__U,__A);
+}
+
diff --git a/clang/test/CIR/CodeGenBuiltins/X86/avx512vlvbmi2-builtins.c 
b/clang/test/CIR/CodeGenBuiltins/X86/avx512vlvbmi2-builtins.c
new file mode 100644
index 0000000000000..964971d71eb6c
--- /dev/null
+++ b/clang/test/CIR/CodeGenBuiltins/X86/avx512vlvbmi2-builtins.c
@@ -0,0 +1,171 @@
+
+// RUN: %clang_cc1 -x c -flax-vector-conversions=none -ffreestanding %s 
-triple=x86_64-unknown-linux -target-feature +avx512vlvbmi2 -fclangir -emit-cir 
-o %t.cir -Wall -Werror -Wsign-conversion
+// RUN: FileCheck --check-prefix=CIR --input-file=%t.cir %s
+// RUN: %clang_cc1 -x c -flax-vector-conversions=none -ffreestanding %s 
-triple=x86_64-unknown-linux -target-feature +avx512vlvbmi2 -fclangir 
-emit-llvm -o %t.ll -Wall -Werror -Wsign-conversion
+// RUN: FileCheck --check-prefixes=LLVM --input-file=%t.ll %s
+
+// RUN: %clang_cc1 -x c++ -flax-vector-conversions=none -ffreestanding %s 
-triple=x86_64-unknown-linux -target-feature +avx512vlvbmi2 -fclangir -emit-cir 
-o %t.cir -Wall -Werror -Wsign-conversion
+// RUN: FileCheck --check-prefix=CIR --input-file=%t.cir %s
+// RUN: %clang_cc1 -x c++ -flax-vector-conversions=none -ffreestanding %s 
-triple=x86_64-unknown-linux -target-feature +avx512vlvbmi2 -fclangir 
-emit-llvm -o %t.ll -Wall -Werror -Wsign-conversion
+// RUN: FileCheck --check-prefixes=LLVM --input-file=%t.ll %s
+
+#include <immintrin.h>
+
+
+__m128i test_mm_mask_compress_epi16(__m128i __S, __mmask8 __U, __m128i __D) {
+  // CIR-LABEL: test_mm_mask_compress_epi16
+  // %[[MASK8:.+]] = cir.cast bitcast %{{.+}} : !u8i -> !cir.vector<8 x 
!cir.int<u, 1>>
+  // %[[RES:.+]] = cir.call_llvm_intrinsic "x86.avx512.mask.compress" %{{.+}}, 
%{{.+}}, %[[MASK8]]: (!cir.vector<8 x !s16i>, !cir.vector<8 x !s16i>, 
!cir.vector<8 x !cir.int<u, 1>>) -> !cir.vector<8 x !s16i>
+  // %[[CAST:.+]] = cir.cast bitcast %[[RES]] : !cir.vector<8 x !s16i> -> 
!cir.vector<2 x !s64i>
+
+  // LLVM-LABEL: test_mm_mask_compress_epi16
+  // %[[MASK8:.+]] = bitcast i8 %{{.+}} to <8 x i1>
+  // %[[RES:.+]] = call <8 x i16> @llvm.x86.avx512.mask.compress.v8i16(<8 x 
i16> %{{.+}}, <8 x i16> %{{.+}}, <8 x i1> %[[MASK8]])
+  // %[[CAST:.+]] = bitcast <8 x i16> %[[RES]] to <2 x i64>
+
+  // OGCG-LABEL: test_mm_mask_compress_epi16
+  // %[[MASK8:.+]] = bitcast i8 %{{.+}} to <8 x i1>
+  // %[[RES:.+]] = call <8 x i16> @llvm.x86.avx512.mask.compress.v8i16(<8 x 
i16> %{{.+}}, <8 x i16> %{{.+}}, <8 x i1> %[[MASK8]])
+  // %[[CAST:.+]] = bitcast <8 x i16> %[[RES]] to <2 x i64>
+
+  return _mm_mask_compress_epi16(__S, __U, __D);
+}
+
+__m128i test_mm_maskz_compress_epi16(__mmask8 __U, __m128i __D) {
+  // CIR-LABEL: test_mm_maskz_compress_epi16
+  // %[[MASK8:.+]] = cir.cast bitcast %{{.+}} : !u8i -> !cir.vector<8 x 
!cir.int<u, 1>>
+  // %[[RES:.+]] = cir.call_llvm_intrinsic "x86.avx512.mask.compress" %{{.+}}, 
%{{.+}}, %[[MASK8]]: (!cir.vector<8 x !s16i>, !cir.vector<8 x !s16i>, 
!cir.vector<8 x !cir.int<u, 1>>) -> !cir.vector<8 x !s16i>
+  // %[[CAST:.+]] = cir.cast bitcast %[[RES]] : !cir.vector<8 x !s16i> -> 
!cir.vector<2 x !s64i>
+
+  // LLVM-LABEL: test_mm_maskz_compress_epi16
+  // %[[MASK8:.+]] = bitcast i8 %{{.+}} to <8 x i1>
+  // %[[RES:.+]] = call <8 x i16> @llvm.x86.avx512.mask.compress.v8i16(<8 x 
i16> %{{.+}}, <8 x i16> %{{.+}}, <8 x i1> %[[MASK8]])
+  // %[[CAST:.+]] = bitcast <8 x i16> %[[RES]] to <2 x i64>
+
+  // OGCG-LABEL: test_mm_maskz_compress_epi16
+  // %[[MASK8:.+]] = bitcast i8 %{{.+}} to <8 x i1>
+  // %[[RES:.+]] = call <8 x i16> @llvm.x86.avx512.mask.compress.v8i16(<8 x 
i16> %{{.+}}, <8 x i16> %{{.+}}, <8 x i1> %[[MASK8]])
+  // %[[CAST:.+]] = bitcast <8 x i16> %[[RES]] to <2 x i64>
+
+  return _mm_maskz_compress_epi16(__U, __D);
+}
+
+__m128i test_mm_mask_compress_epi8(__m128i __S, __mmask16 __U, __m128i __D) {
+  // CIR-LABEL: test_mm_mask_compress_epi8
+  // %[[MASK16:.+]] = cir.cast bitcast %{{.+}} : !u16i -> !cir.vector<16 x 
!cir.int<u, 1>>
+  // %[[RES:.+]] = cir.call_llvm_intrinsic "x86.avx512.mask.compress" %{{.+}}, 
%{{.+}}, %[[MASK16]]: (!cir.vector<16 x !s8i>, !cir.vector<16 x !s8i>, 
!cir.vector<16 x !cir.int<u, 1>>) -> !cir.vector<16 x !s8i>
+  // %[[CAST:.+]] = cir.cast bitcast %[[RES]] : !cir.vector<16 x !s8i> -> 
!cir.vector<2 x !s64i>
+
+  // LLVM-LABEL: test_mm_mask_compress_epi8
+  // %[[MASK16:.+]] = bitcast i16 %{{.+}} to <16 x i1>
+  // %[[RES:.+]] = call <16 x i8> @llvm.x86.avx512.mask.compress.v16i8(<16 x 
i8> %{{.+}}, <16 x i8> %{{.+}}, <16 x i1> %[[MASK16]])
+  // %[[CAST:.+]] = bitcast <16 x i8> %[[RES]] to <2 x i64>
+
+  // OGCG-LABEL: test_mm_mask_compress_epi8
+  // %[[MASK16:.+]] = bitcast i16 %{{.+}} to <16 x i1>
+  // %[[RES:.+]] = call <16 x i8> @llvm.x86.avx512.mask.compress.v16i8(<16 x 
i8> %{{.+}}, <16 x i8> %{{.+}}, <16 x i1> %[[MASK16]])
+  // %[[CAST:.+]] = bitcast <16 x i8> %[[RES]] to <2 x i64>
+
+  return _mm_mask_compress_epi8(__S, __U, __D);
+}
+
+__m128i test_mm_maskz_compress_epi8(__mmask16 __U, __m128i __D) {
+  // CIR-LABEL: test_mm_maskz_compress_epi8
+  // %[[ZERO:.+]] = cir.call @_mm_setzero_si128() : () -> !cir.vector<2 x 
!s64i>
+  // %[[CAST1:.+]] = cir.cast bitcast %[[ZERO]] : !cir.vector<2 x !s64i> -> 
!cir.vector<16 x !s8i>
+  // %[[MASK16:.+]] = cir.cast bitcast %{{.+}} : !u16i -> !cir.vector<16 x 
!cir.int<u, 1>>
+  // %[[RES:.+]] = cir.call_llvm_intrinsic "x86.avx512.mask.compress" %{{.+}}, 
%[[CAST1]], %[[MASK16]]: (!cir.vector<16 x !s8i>, !cir.vector<16 x !s8i>, 
!cir.vector<16 x !cir.int<u, 1>>) -> !cir.vector<16 x !s8i>
+  // %[[CAST2:.+]] = cir.cast bitcast %[[RES]] : !cir.vector<16 x !s8i> -> 
!cir.vector<2 x !s64i>
+
+  // LLVM-LABEL: test_mm_maskz_compress_epi8
+  // store <2 x i64> zeroinitializer, ptr %{{.+}}, align 16
+  // %[[CAST1:.+]] = bitcast <2 x i64> %{{.+}} to <16 x i8>
+  // %[[MASK16:.+]] = bitcast i16 %{{.+}} to <16 x i1>
+  // %[[RES:.+]] = call <16 x i8> @llvm.x86.avx512.mask.compress.v16i8(<16 x 
i8> %{{.+}}, <16 x i8> %[[CAST1]], <16 x i1> %[[MASK16]])
+  // %[[CAST2:.+]] = bitcast <16 x i8> %[[RES]] to <2 x i64>
+
+  // OGCG-LABEL: test_mm_maskz_compress_epi8
+  // store <2 x i64> zeroinitializer, ptr %{{.+}}, align 16
+  // %[[CAST1:.+]] = bitcast <2 x i64> %{{.+}} to <16 x i8>
+  // %[[MASK16:.+]] = bitcast i16 %{{.+}} to <16 x i1>
+  // %[[RES:.+]] = call <16 x i8> @llvm.x86.avx512.mask.compress.v16i8(<16 x 
i8> %{{.+}}, <16 x i8> %[[CAST1]], <16 x i1> %[[MASK16]])
+  // %[[CAST2:.+]] = bitcast <16 x i8> %[[RES]] to <2 x i64>
+
+  return _mm_maskz_compress_epi8(__U, __D);
+}
+
+__m128i test_mm_mask_expand_epi16(__m128i __S, __mmask8 __U, __m128i __D) {
+  // CIR-LABEL: test_mm_mask_expand_epi16
+  // %[[MASK16:.+]] = cir.cast bitcast %{{.+}} : !u8i -> !cir.vector<8 x 
!cir.int<u, 1>>
+  // %[[RES:.+]] = cir.call_llvm_intrinsic "x86.avx512.mask.expand" %{{.+}}, 
%{{.+}}, %[[MASK16]]: (!cir.vector<8 x !s16i>, !cir.vector<8 x !s16i>, 
!cir.vector<8 x !cir.int<u, 1>>) -> !cir.vector<8 x !s16i>
+  // %[[CAST:.+]] = cir.cast bitcast %[[RES]] : !cir.vector<8 x !s16i> -> 
!cir.vector<2 x !s64i>
+
+  // LLVM-LABEL: test_mm_mask_expand_epi16
+  // %[[MASK16:.+]] = bitcast i8 %{{.+}} to <8 x i1>
+  // %[[RES:.+]] = call <8 x i16> @llvm.x86.avx512.mask.expand.v8i16(<8 x i16> 
%{{.+}}, <8 x i16> %{{.+}}, <8 x i1> %[[MASK16]])
+  // %[[CAST:.+]] = bitcast <8 x i16> %[[RES]] to <2 x i64>
+
+  // OGCG-LABEL: test_mm_mask_expand_epi16
+  // %[[MASK16:.+]] = bitcast i8 %{{.+}} to <8 x i1>
+  // %[[RES:.+]] = call <8 x i16> @llvm.x86.avx512.mask.expand.v8i16(<8 x i16> 
%{{.+}}, <8 x i16> %{{.+}}, <8 x i1> %[[MASK16]])
+  // %[[CAST:.+]] = bitcast <8 x i16> %[[RES]] to <2 x i64>
+
+  return _mm_mask_expand_epi16(__S, __U, __D);
+}
+
+__m128i test_mm_maskz_expand_epi16(__mmask8 __U, __m128i __D) {
+  // CIR-LABEL: test_mm_maskz_expand_epi16
+  // %[[MASK:.+]] = cir.cast bitcast %{{.+}} : !u8i -> !cir.vector<8 x 
!cir.int<u, 1>>
+  // %[[RES:.+]] = cir.call_llvm_intrinsic "x86.avx512.mask.expand" %{{.+}}, 
%{{.+}}, %[[MASK]]: (!cir.vector<8 x !s16i>, !cir.vector<8 x !s16i>, 
!cir.vector<8 x !cir.int<u, 1>>) -> !cir.vector<8 x !s16i>
+  // %[[CAST:.+]] = cir.cast bitcast %[[RES]] : !cir.vector<8 x !s16i> -> 
!cir.vector<2 x !s64i>
+
+  // LLVM-LABEL: test_mm_maskz_expand_epi16
+  // %[[MASK:.+]] = bitcast i8 %{{.+}} to <8 x i1>
+  // %[[RES:.+]] = call <8 x i16> @llvm.x86.avx512.mask.expand.v8i16(<8 x i16> 
%{{.+}}, <8 x i16> %{{.+}}, <8 x i1> %[[MASK]])
+  // %[[CAST:.+]] = bitcast <8 x i16> %[[RES]] to <2 x i64>
+
+  // OGCG-LABEL: test_mm_maskz_expand_epi16
+  // %[[MASK:.+]] = bitcast i8 %{{.+}} to <8 x i1>
+  // %[[RES:.+]] = call <8 x i16> @llvm.x86.avx512.mask.expand.v8i16(<8 x i16> 
%{{.+}}, <8 x i16> %{{.+}}, <8 x i1> %[[MASK]])
+  // %[[CAST:.+]] = bitcast <8 x i16> %[[RES]] to <2 x i64>
+
+  return _mm_maskz_expand_epi16(__U, __D);
+}
+
+__m128i test_mm_mask_expand_epi8(__m128i __S, __mmask16 __U, __m128i __D) {
+  // CIR-LABEL: test_mm_mask_expand_epi8
+  // %[[MASK:.+]] = cir.cast bitcast %{{.+}} : !u16i -> !cir.vector<16 x 
!cir.int<u, 1>>
+  // %[[RES:.+]] = cir.call_llvm_intrinsic "x86.avx512.mask.expand" %{{.+}}, 
%{{.+}}, %[[MASK]]: (!cir.vector<16 x !s8i>, !cir.vector<16 x !s8i>, 
!cir.vector<16 x !cir.int<u, 1>>) -> !cir.vector<16 x !s8i>
+  // %[[CAST:.+]] = cir.cast bitcast %[[RES]] : !cir.vector<16 x !s8i> -> 
!cir.vector<2 x !s64i>
+
+  // LLVM-LABEL: test_mm_mask_expand_epi8
+  // %[[MASK:.+]] = bitcast i16 %{{.+}} to <16 x i1>
+  // %[[RES:.+]] = call <16 x i8> @llvm.x86.avx512.mask.expand.v16i8(<16 x i8> 
%{{.+}}, <16 x i8> %{{.+}}, <16 x i1> %[[MASK]])
+  // %[[CAST:.+]] = bitcast <16 x i8> %[[RES]] to <2 x i64>
+
+  // OGCG-LABEL: test_mm_mask_expand_epi8
+  // %[[MASK:.+]] = bitcast i16 %{{.+}} to <16 x i1>
+  // %[[RES:.+]] = call <16 x i8> @llvm.x86.avx512.mask.expand.v16i8(<16 x i8> 
%{{.+}}, <16 x i8> %{{.+}}, <16 x i1> %[[MASK]])
+  // %[[CAST:.+]] = bitcast <16 x i8> %[[RES]] to <2 x i64>
+
+  return _mm_mask_expand_epi8(__S, __U, __D);
+}
+
+__m128i test_mm_maskz_expand_epi8(__mmask16 __U, __m128i __D) {
+  // CIR-LABEL: test_mm_maskz_expand_epi8
+  // %[[MASK:.+]] = cir.cast bitcast %{{.+}} : !u16i -> !cir.vector<16 x 
!cir.int<u, 1>>
+  // %[[RES:.+]] = cir.call_llvm_intrinsic "x86.avx512.mask.expand" %{{.+}}, 
%{{.+}}, %[[MASK]]: (!cir.vector<16 x !s8i>, !cir.vector<16 x !s8i>, 
!cir.vector<16 x !cir.int<u, 1>>) -> !cir.vector<16 x !s8i>
+  // %[[CAST:.+]] = cir.cast bitcast %[[RES]] : !cir.vector<16 x !s8i> -> 
!cir.vector<2 x !s64i>
+
+  // LLVM-LABEL: test_mm_maskz_expand_epi8
+  // %[[MASK:.+]] = bitcast i16 %{{.+}} to <16 x i1>
+  // %[[RES:.+]] = call <16 x i8> @llvm.x86.avx512.mask.expand.v16i8(<16 x i8> 
%{{.+}}, <16 x i8> %{{.+}}, <16 x i1> %[[MASK]])
+  // %[[CAST:.+]] = bitcast <16 x i8> %[[RES]] to <2 x i64>
+
+  // OGCG-LABEL: test_mm_maskz_expand_epi8
+  // %[[MASK:.+]] = bitcast i16 %{{.+}} to <16 x i1>
+  // %[[RES:.+]] = call <16 x i8> @llvm.x86.avx512.mask.expand.v16i8(<16 x i8> 
%{{.+}}, <16 x i8> %{{.+}}, <16 x i1> %[[MASK]])
+  // %[[CAST:.+]] = bitcast <16 x i8> %[[RES]] to <2 x i64>
+
+  return _mm_maskz_expand_epi8(__U, __D);
+}

_______________________________________________
cfe-commits mailing list
[email protected]
https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits

[clang] [CIR][CIRGen][Builtin][X86] Masked compress Intrinsics (PR #169582)

Reply via email to