Author: Vishruth Thimmaiah Date: 2026-01-12T17:04:01-08:00 New Revision: c6fc6adb7e32bf6fd77e3e73ad74752881e131fb
URL: https://github.com/llvm/llvm-project/commit/c6fc6adb7e32bf6fd77e3e73ad74752881e131fb DIFF: https://github.com/llvm/llvm-project/commit/c6fc6adb7e32bf6fd77e3e73ad74752881e131fb.diff LOG: [CIR][X86] Add support for `intersect` builtins (#172554) adds support for the `__builtin_ia32_vp2intersect_d`/`__builtin_ia32_vp2intersect_q` x86 builtins. Part of #167765 --------- Signed-off-by: vishruth-thimmaiah <[email protected]> Added: clang/test/CIR/CodeGenBuiltins/X86/avx512vlvp2intersect-builtins.c clang/test/CIR/CodeGenBuiltins/X86/avx512vp2intersect-builtins.c Modified: clang/lib/CIR/CodeGen/CIRGenBuiltinX86.cpp Removed: ################################################################################ diff --git a/clang/lib/CIR/CodeGen/CIRGenBuiltinX86.cpp b/clang/lib/CIR/CodeGen/CIRGenBuiltinX86.cpp index f88e57aca6a08..cc3af713bc8c2 100644 --- a/clang/lib/CIR/CodeGen/CIRGenBuiltinX86.cpp +++ b/clang/lib/CIR/CodeGen/CIRGenBuiltinX86.cpp @@ -264,15 +264,15 @@ static mlir::Value emitX86MaskTest(CIRGenBuilderTy &builder, mlir::Location loc, mlir::ValueRange{lhsVec, rhsVec}); } -// TODO: The cgf parameter should be removed when all the NYI cases are -// implemented. -static std::optional<mlir::Value> -emitX86MaskedCompareResult(CIRGenFunction &cgf, CIRGenBuilderTy &builder, - mlir::Value cmp, unsigned numElts, - mlir::Value maskIn, mlir::Location loc) { +static mlir::Value emitX86MaskedCompareResult(CIRGenBuilderTy &builder, + mlir::Value cmp, unsigned numElts, + mlir::Value maskIn, + mlir::Location loc) { if (maskIn) { - cgf.cgm.errorNYI(loc, "emitX86MaskedCompareResult"); - return {}; + auto c = mlir::dyn_cast_or_null<cir::ConstantOp>(maskIn.getDefiningOp()); + if (!c || !c.isAllOnesValue()) + cmp = builder.createAnd(loc, cmp, + getMaskVecValue(builder, loc, maskIn, numElts)); } if (numElts < 8) { llvm::SmallVector<mlir::Attribute> indices; @@ -340,7 +340,7 @@ emitX86MaskedCompare(CIRGenFunction &cgf, CIRGenBuilderTy &builder, unsigned cc, if (ops.size() == 4) maskIn = ops[3]; - return emitX86MaskedCompareResult(cgf, builder, cmp, numElts, maskIn, loc); + return emitX86MaskedCompareResult(builder, cmp, numElts, maskIn, loc); } // TODO: The cgf parameter should be removed when all the NYI cases are @@ -1840,12 +1840,66 @@ CIRGenFunction::emitX86BuiltinExpr(unsigned builtinID, const CallExpr *expr) { case X86::BI__builtin_ia32_fpclasspd128_mask: case X86::BI__builtin_ia32_fpclasspd256_mask: case X86::BI__builtin_ia32_fpclasspd512_mask: + cgm.errorNYI(expr->getSourceRange(), + std::string("unimplemented X86 builtin call: ") + + getContext().BuiltinInfo.getName(builtinID)); + return mlir::Value{}; case X86::BI__builtin_ia32_vp2intersect_q_512: case X86::BI__builtin_ia32_vp2intersect_q_256: case X86::BI__builtin_ia32_vp2intersect_q_128: case X86::BI__builtin_ia32_vp2intersect_d_512: case X86::BI__builtin_ia32_vp2intersect_d_256: - case X86::BI__builtin_ia32_vp2intersect_d_128: + case X86::BI__builtin_ia32_vp2intersect_d_128: { + unsigned numElts = cast<cir::VectorType>(ops[0].getType()).getSize(); + mlir::Location loc = getLoc(expr->getExprLoc()); + StringRef intrinsicName; + + switch (builtinID) { + default: + llvm_unreachable("Unexpected builtin"); + case X86::BI__builtin_ia32_vp2intersect_q_512: + intrinsicName = "x86.avx512.vp2intersect.q.512"; + break; + case X86::BI__builtin_ia32_vp2intersect_q_256: + intrinsicName = "x86.avx512.vp2intersect.q.256"; + break; + case X86::BI__builtin_ia32_vp2intersect_q_128: + intrinsicName = "x86.avx512.vp2intersect.q.128"; + break; + case X86::BI__builtin_ia32_vp2intersect_d_512: + intrinsicName = "x86.avx512.vp2intersect.d.512"; + break; + case X86::BI__builtin_ia32_vp2intersect_d_256: + intrinsicName = "x86.avx512.vp2intersect.d.256"; + break; + case X86::BI__builtin_ia32_vp2intersect_d_128: + intrinsicName = "x86.avx512.vp2intersect.d.128"; + break; + } + + auto resVector = cir::VectorType::get(builder.getBoolTy(), numElts); + + cir::RecordType resRecord = + cir::RecordType::get(&getMLIRContext(), {resVector, resVector}, false, + false, cir::RecordType::RecordKind::Struct); + + mlir::Value call = + emitIntrinsicCallOp(builder, getLoc(expr->getExprLoc()), intrinsicName, + resRecord, mlir::ValueRange{ops[0], ops[1]}); + mlir::Value result = + cir::ExtractMemberOp::create(builder, loc, resVector, call, 0); + result = emitX86MaskedCompareResult(builder, result, numElts, nullptr, loc); + Address addr = Address( + ops[2], clang::CharUnits::fromQuantity(std::max(1U, numElts / 8))); + builder.createStore(loc, result, addr); + + result = cir::ExtractMemberOp::create(builder, loc, resVector, call, 1); + result = emitX86MaskedCompareResult(builder, result, numElts, nullptr, loc); + addr = Address(ops[3], + clang::CharUnits::fromQuantity(std::max(1U, numElts / 8))); + builder.createStore(loc, result, addr); + return mlir::Value{}; + } case X86::BI__builtin_ia32_vpmultishiftqb128: case X86::BI__builtin_ia32_vpmultishiftqb256: case X86::BI__builtin_ia32_vpmultishiftqb512: diff --git a/clang/test/CIR/CodeGenBuiltins/X86/avx512vlvp2intersect-builtins.c b/clang/test/CIR/CodeGenBuiltins/X86/avx512vlvp2intersect-builtins.c new file mode 100644 index 0000000000000..6882d2e91961e --- /dev/null +++ b/clang/test/CIR/CodeGenBuiltins/X86/avx512vlvp2intersect-builtins.c @@ -0,0 +1,161 @@ +// RUN: %clang_cc1 -x c -flax-vector-conversions=none -ffreestanding %s -triple=x86_64-unknown-linux -target-feature +avx512vp2intersect -target-feature +avx512vl -fclangir -emit-cir -o %t.cir -Wall -Werror +// RUN: FileCheck --check-prefix=CIR --input-file=%t.cir %s +// RUN: %clang_cc1 -x c -flax-vector-conversions=none -ffreestanding %s -triple=x86_64-unknown-linux -target-feature +avx512vp2intersect -target-feature +avx512vl -fclangir -emit-llvm -o %t.ll -Wall -Werror +// RUN: FileCheck --check-prefixes=LLVM --input-file=%t.ll %s + +// RUN: %clang_cc1 -x c++ -flax-vector-conversions=none -ffreestanding %s -triple=x86_64-unknown-linux -target-feature +avx512vp2intersect -target-feature +avx512vl -fclangir -emit-cir -o %t.cir -Wall -Werror +// RUN: FileCheck --check-prefix=CIR --input-file=%t.cir %s +// RUN: %clang_cc1 -x c++ -flax-vector-conversions=none -ffreestanding %s -triple=x86_64-unknown-linux -target-feature +avx512vp2intersect -target-feature +avx512vl -fclangir -emit-llvm -o %t.ll -Wall -Werror +// RUN: FileCheck --check-prefixes=LLVM --input-file=%t.ll %s + +// RUN: %clang_cc1 -x c -flax-vector-conversions=none -ffreestanding %s -triple=x86_64-unknown-linux -target-feature +avx512vp2intersect -target-feature +avx512vl -emit-llvm -o - -Wall -Werror | FileCheck %s -check-prefix=OGCG +// RUN: %clang_cc1 -x c++ -flax-vector-conversions=none -ffreestanding %s -triple=x86_64-unknown-linux -target-feature +avx512vp2intersect -target-feature +avx512vl -emit-llvm -o - -Wall -Werror | FileCheck %s -check-prefix=OGCG + +#include <immintrin.h> + +// CIR: !rec_anon_struct = !cir.record<struct {!cir.vector<8 x !cir.bool>, !cir.vector<8 x !cir.bool>}> +// CIR: !rec_anon_struct1 = !cir.record<struct {!cir.vector<4 x !cir.bool>, !cir.vector<4 x !cir.bool>}> +// CIR: !rec_anon_struct2 = !cir.record<struct {!cir.vector<2 x !cir.bool>, !cir.vector<2 x !cir.bool>}> +void test_mm256_2intersect_epi32(__m256i a, __m256i b, __mmask8 *m0, __mmask8 *m1) { + // CIR-LABEL: mm256_2intersect_epi32 + // CIR: %[[RES:.*]] = cir.call_llvm_intrinsic "x86.avx512.vp2intersect.d.256" %{{.*}}, %{{.*}} : (!cir.vector<8 x !s32i>, !cir.vector<8 x !s32i>) -> !rec_anon_struct + // CIR: %[[VAL1:.*]] = cir.extract_member %[[RES]][0] : !rec_anon_struct -> !cir.vector<8 x !cir.bool> + // CIR: %[[CAST1:.*]] = cir.cast bitcast %[[VAL1]] : !cir.vector<8 x !cir.bool> -> !u8i + // CIR: cir.store align(1) %[[CAST1]], %{{.*}} : !u8i, !cir.ptr<!u8i> + // CIR: %[[VAL2:.*]] = cir.extract_member %[[RES]][1] : !rec_anon_struct -> !cir.vector<8 x !cir.bool> + // CIR: %[[CAST2:.*]] = cir.cast bitcast %[[VAL2]] : !cir.vector<8 x !cir.bool> -> !u8i + // CIR: cir.store align(1) %[[CAST2]], %{{.*}} : !u8i, !cir.ptr<!u8i> + + // LLVM-LABEL: test_mm256_2intersect_epi32 + // LLVM: %[[RES:.*]] = call { <8 x i1>, <8 x i1> } @llvm.x86.avx512.vp2intersect.d.256(<8 x i32> %{{.*}}, <8 x i32> %{{.*}}) + // LLVM: %[[VAL1:.*]] = extractvalue { <8 x i1>, <8 x i1> } %{{.*}}, 0 + // LLVM: %[[CAST1:.*]] = bitcast <8 x i1> %[[VAL1]] to i8 + // LLVM: store i8 %[[CAST1]], ptr %{{.*}}, align 1 + // LLVM: %[[VAL2:.*]] = extractvalue { <8 x i1>, <8 x i1> } %{{.*}}, 1 + // LLVM: %[[CAST2:.*]] = bitcast <8 x i1> %[[VAL2]] to i8 + // LLVM: store i8 %[[CAST2]], ptr %{{.*}}, align 1 + + // OGCG-LABEL: test_mm256_2intersect_epi32 + // OGCG: %[[RES:.*]] = call { <8 x i1>, <8 x i1> } @llvm.x86.avx512.vp2intersect.d.256(<8 x i32> %{{.*}}, <8 x i32> %{{.*}}) + // OGCG: %[[VAL1:.*]] = extractvalue { <8 x i1>, <8 x i1> } %{{.*}}, 0 + // OGCG: %[[CAST1:.*]] = bitcast <8 x i1> %[[VAL1]] to i8 + // OGCG: store i8 %[[CAST1]], ptr %{{.*}}, align 1 + // OGCG: %[[VAL2:.*]] = extractvalue { <8 x i1>, <8 x i1> } %{{.*}}, 1 + // OGCG: %[[CAST2:.*]] = bitcast <8 x i1> %[[VAL2]] to i8 + // OGCG: store i8 %[[CAST2]], ptr %{{.*}}, align 1 + _mm256_2intersect_epi32(a, b, m0, m1); +} + +void test_mm256_2intersect_epi64(__m256i a, __m256i b, __mmask8 *m0, __mmask8 *m1) { + // CIR-LABEL: mm256_2intersect_epi64 + // CIR: %[[RES:.*]] = cir.call_llvm_intrinsic "x86.avx512.vp2intersect.q.256" %{{.*}}, %{{.*}} : (!cir.vector<4 x !s64i>, !cir.vector<4 x !s64i>) -> !rec_anon_struct1 + // CIR: %[[VAL1:.*]] = cir.extract_member %[[RES]][0] : !rec_anon_struct1 -> !cir.vector<4 x !cir.bool> + // CIR: %[[ZERO1:.*]] = cir.const #cir.zero : !cir.vector<4 x !cir.bool> + // CIR: %[[SHUF1:.*]] = cir.vec.shuffle(%[[VAL1]], %[[ZERO1]] : !cir.vector<4 x !cir.bool>) [#cir.int<0> : !s64i, #cir.int<1> : !s64i, #cir.int<2> : !s64i, #cir.int<3> : !s64i, #cir.int<4> : !s64i, #cir.int<5> : !s64i, #cir.int<6> : !s64i, #cir.int<7> : !s64i] : !cir.vector<8 x !cir.bool> + // CIR: %[[CAST1:.*]] = cir.cast bitcast %[[SHUF1]] : !cir.vector<8 x !cir.bool> -> !u8i + // CIR: cir.store align(1) %[[CAST1]], %{{.*}} : !u8i, !cir.ptr<!u8i> + // CIR: %[[VAL2:.*]] = cir.extract_member %[[RES]][1] : !rec_anon_struct1 -> !cir.vector<4 x !cir.bool> + // CIR: %[[ZERO2:.*]] = cir.const #cir.zero : !cir.vector<4 x !cir.bool> + // CIR: %[[SHUF2:.*]] = cir.vec.shuffle(%[[VAL2]], %[[ZERO2]] : !cir.vector<4 x !cir.bool>) [#cir.int<0> : !s64i, #cir.int<1> : !s64i, #cir.int<2> : !s64i, #cir.int<3> : !s64i, #cir.int<4> : !s64i, #cir.int<5> : !s64i, #cir.int<6> : !s64i, #cir.int<7> : !s64i] : !cir.vector<8 x !cir.bool> + // CIR: %[[CAST2:.*]] = cir.cast bitcast %[[SHUF2]] : !cir.vector<8 x !cir.bool> -> !u8i + // CIR: cir.store align(1) %[[CAST2]], %{{.*}} : !u8i, !cir.ptr<!u8i> + + // LLVM-LABEL: test_mm256_2intersect_epi64 + // LLVM: %[[RES:.*]] = call { <4 x i1>, <4 x i1> } @llvm.x86.avx512.vp2intersect.q.256(<4 x i64> %{{.*}}, <4 x i64> %{{.*}}) + // LLVM: %[[VAL1:.*]] = extractvalue { <4 x i1>, <4 x i1> } %{{.*}}, 0 + // LLVM: %[[SHUF1:.*]] = shufflevector <4 x i1> %[[VAL1]], <4 x i1> zeroinitializer, <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7> + // LLVM: %[[CAST1:.*]] = bitcast <8 x i1> %[[SHUF1]] to i8 + // LLVM: store i8 %[[CAST1]], ptr %{{.*}}, align 1 + // LLVM: %[[VAL2:.*]] = extractvalue { <4 x i1>, <4 x i1> } %{{.*}}, 1 + // LLVM: %[[SHUF2:.*]] = shufflevector <4 x i1> %[[VAL2]], <4 x i1> zeroinitializer, <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7> + // LLVM: %[[CAST2:.*]] = bitcast <8 x i1> %[[SHUF2]] to i8 + // LLVM: store i8 %[[CAST2]], ptr %{{.*}}, align 1 + + // OGCG-LABEL: test_mm256_2intersect_epi64 + // OGCG: %[[RES:.*]] = call { <4 x i1>, <4 x i1> } @llvm.x86.avx512.vp2intersect.q.256(<4 x i64> %{{.*}}, <4 x i64> %{{.*}}) + // OGCG: %[[VAL1:.*]] = extractvalue { <4 x i1>, <4 x i1> } %{{.*}}, 0 + // OGCG: %[[SHUF1:.*]] = shufflevector <4 x i1> %[[VAL1]], <4 x i1> zeroinitializer, <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7> + // OGCG: %[[CAST1:.*]] = bitcast <8 x i1> %[[SHUF1]] to i8 + // OGCG: store i8 %[[CAST1]], ptr %{{.*}}, align 1 + // OGCG: %[[VAL2:.*]] = extractvalue { <4 x i1>, <4 x i1> } %{{.*}}, 1 + // OGCG: %[[SHUF2:.*]] = shufflevector <4 x i1> %[[VAL2]], <4 x i1> zeroinitializer, <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7> + // OGCG: %[[CAST2:.*]] = bitcast <8 x i1> %[[SHUF2]] to i8 + // OGCG: store i8 %[[CAST2]], ptr %{{.*}}, align 1 + _mm256_2intersect_epi64(a, b, m0, m1); +} + +void test_mm_2intersect_epi32(__m128i a, __m128i b, __mmask8 *m0, __mmask8 *m1) { + // CIR-LABEL: mm_2intersect_epi32 + // CIR: %[[RES:.*]] = cir.call_llvm_intrinsic "x86.avx512.vp2intersect.d.128" %{{.*}}, %{{.*}} : (!cir.vector<4 x !s32i>, !cir.vector<4 x !s32i>) -> !rec_anon_struct1 + // CIR: %[[VAL1:.*]] = cir.extract_member %[[RES]][0] : !rec_anon_struct1 -> !cir.vector<4 x !cir.bool> + // CIR: %[[ZERO1:.*]] = cir.const #cir.zero : !cir.vector<4 x !cir.bool> + // CIR: %[[SHUF1:.*]] = cir.vec.shuffle(%[[VAL1]], %[[ZERO1]] : !cir.vector<4 x !cir.bool>) [#cir.int<0> : !s64i, #cir.int<1> : !s64i, #cir.int<2> : !s64i, #cir.int<3> : !s64i, #cir.int<4> : !s64i, #cir.int<5> : !s64i, #cir.int<6> : !s64i, #cir.int<7> : !s64i] : !cir.vector<8 x !cir.bool> + // CIR: %[[CAST1:.*]] = cir.cast bitcast %[[SHUF1]] : !cir.vector<8 x !cir.bool> -> !u8i + // CIR: cir.store align(1) %[[CAST1]], %{{.*}} : !u8i, !cir.ptr<!u8i> + // CIR: %[[VAL2:.*]] = cir.extract_member %[[RES]][1] : !rec_anon_struct1 -> !cir.vector<4 x !cir.bool> + // CIR: %[[ZERO2:.*]] = cir.const #cir.zero : !cir.vector<4 x !cir.bool> + // CIR: %[[SHUF2:.*]] = cir.vec.shuffle(%[[VAL2]], %[[ZERO2]] : !cir.vector<4 x !cir.bool>) [#cir.int<0> : !s64i, #cir.int<1> : !s64i, #cir.int<2> : !s64i, #cir.int<3> : !s64i, #cir.int<4> : !s64i, #cir.int<5> : !s64i, #cir.int<6> : !s64i, #cir.int<7> : !s64i] : !cir.vector<8 x !cir.bool> + // CIR: %[[CAST2:.*]] = cir.cast bitcast %[[SHUF2]] : !cir.vector<8 x !cir.bool> -> !u8i + // CIR: cir.store align(1) %[[CAST2]], %{{.*}} : !u8i, !cir.ptr<!u8i> + + // LLVM-LABEL: test_mm_2intersect_epi32 + // LLVM: %[[RES:.*]] = call { <4 x i1>, <4 x i1> } @llvm.x86.avx512.vp2intersect.d.128(<4 x i32> %{{.*}}, <4 x i32> %{{.*}}) + // LLVM: %[[VAL1:.*]] = extractvalue { <4 x i1>, <4 x i1> } %{{.*}}, 0 + // LLVM: %[[SHUF1:.*]] = shufflevector <4 x i1> %[[VAL1]], <4 x i1> zeroinitializer, <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7> + // LLVM: %[[CAST1:.*]] = bitcast <8 x i1> %[[SHUF1]] to i8 + // LLVM: store i8 %[[CAST1]], ptr %{{.*}}, align 1 + // LLVM: %[[VAL2:.*]] = extractvalue { <4 x i1>, <4 x i1> } %{{.*}}, 1 + // LLVM: %[[SHUF2:.*]] = shufflevector <4 x i1> %[[VAL2]], <4 x i1> zeroinitializer, <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7> + // LLVM: %[[CAST2:.*]] = bitcast <8 x i1> %[[SHUF2]] to i8 + // LLVM: store i8 %[[CAST2]], ptr %{{.*}}, align 1 + + // OGCG-LABEL: test_mm_2intersect_epi32 + // OGCG: %[[RES:.*]] = call { <4 x i1>, <4 x i1> } @llvm.x86.avx512.vp2intersect.d.128(<4 x i32> %{{.*}}, <4 x i32> %{{.*}}) + // OGCG: %[[VAL1:.*]] = extractvalue { <4 x i1>, <4 x i1> } %{{.*}}, 0 + // OGCG: %[[SHUF1:.*]] = shufflevector <4 x i1> %[[VAL1]], <4 x i1> zeroinitializer, <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7> + // OGCG: %[[CAST1:.*]] = bitcast <8 x i1> %[[SHUF1]] to i8 + // OGCG: store i8 %[[CAST1]], ptr %{{.*}}, align 1 + // OGCG: %[[VAL2:.*]] = extractvalue { <4 x i1>, <4 x i1> } %{{.*}}, 1 + // OGCG: %[[SHUF2:.*]] = shufflevector <4 x i1> %[[VAL2]], <4 x i1> zeroinitializer, <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7> + // OGCG: %[[CAST2:.*]] = bitcast <8 x i1> %[[SHUF2]] to i8 + // OGCG: store i8 %[[CAST2]], ptr %{{.*}}, align 1 + _mm_2intersect_epi32(a, b, m0, m1); +} + +void test_mm_2intersect_epi64(__m128i a, __m128i b, __mmask8 *m0, __mmask8 *m1) { + // CIR-LABEL: mm_2intersect_epi64 + // CIR: %[[RES:.*]] = cir.call_llvm_intrinsic "x86.avx512.vp2intersect.q.128" %{{.*}}, %{{.*}} : (!cir.vector<2 x !s64i>, !cir.vector<2 x !s64i>) -> !rec_anon_struct2 + // CIR: %[[VAL1:.*]] = cir.extract_member %[[RES]][0] : !rec_anon_struct2 -> !cir.vector<2 x !cir.bool> + // CIR: %[[ZERO1:.*]] = cir.const #cir.zero : !cir.vector<2 x !cir.bool> + // CIR: %[[SHUF1:.*]] = cir.vec.shuffle(%[[VAL1]], %[[ZERO1]] : !cir.vector<2 x !cir.bool>) [#cir.int<0> : !s64i, #cir.int<1> : !s64i, #cir.int<2> : !s64i, #cir.int<3> : !s64i, #cir.int<2> : !s64i, #cir.int<3> : !s64i, #cir.int<2> : !s64i, #cir.int<3> : !s64i] : !cir.vector<8 x !cir.bool> + // CIR: %[[CAST1:.*]] = cir.cast bitcast %[[SHUF1]] : !cir.vector<8 x !cir.bool> -> !u8i + // CIR: cir.store align(1) %[[CAST1]], %{{.*}} : !u8i, !cir.ptr<!u8i> + // CIR: %[[VAL2:.*]] = cir.extract_member %[[RES]][1] : !rec_anon_struct2 -> !cir.vector<2 x !cir.bool> + // CIR: %[[ZERO2:.*]] = cir.const #cir.zero : !cir.vector<2 x !cir.bool> + // CIR: %[[SHUF2:.*]] = cir.vec.shuffle(%[[VAL2]], %[[ZERO2]] : !cir.vector<2 x !cir.bool>) [#cir.int<0> : !s64i, #cir.int<1> : !s64i, #cir.int<2> : !s64i, #cir.int<3> : !s64i, #cir.int<2> : !s64i, #cir.int<3> : !s64i, #cir.int<2> : !s64i, #cir.int<3> : !s64i] : !cir.vector<8 x !cir.bool> + // CIR: %[[CAST2:.*]] = cir.cast bitcast %[[SHUF2]] : !cir.vector<8 x !cir.bool> -> !u8i + // CIR: cir.store align(1) %[[CAST2]], %{{.*}} : !u8i, !cir.ptr<!u8i> + + // LLVM-LABEL: test_mm_2intersect_epi64 + // LLVM: %[[RES:.*]] = call { <2 x i1>, <2 x i1> } @llvm.x86.avx512.vp2intersect.q.128(<2 x i64> %{{.*}}, <2 x i64> %{{.*}}) + // LLVM: %[[VAL1:.*]] = extractvalue { <2 x i1>, <2 x i1> } %{{.*}}, 0 + // LLVM: %[[SHUF1:.*]] = shufflevector <2 x i1> %[[VAL1]], <2 x i1> zeroinitializer, <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3> + // LLVM: %[[CAST1:.*]] = bitcast <8 x i1> %[[SHUF1]] to i8 + // LLVM: store i8 %[[CAST1]], ptr %{{.*}}, align 1 + // LLVM: %[[VAL2:.*]] = extractvalue { <2 x i1>, <2 x i1> } %{{.*}}, 1 + // LLVM: %[[SHUF2:.*]] = shufflevector <2 x i1> %[[VAL2]], <2 x i1> zeroinitializer, <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3> + // LLVM: %[[CAST2:.*]] = bitcast <8 x i1> %[[SHUF2]] to i8 + // LLVM: store i8 %[[CAST2]], ptr %{{.*}}, align 1 + + // OGCG-LABEL: test_mm_2intersect_epi64 + // OGCG: %[[RES:.*]] = call { <2 x i1>, <2 x i1> } @llvm.x86.avx512.vp2intersect.q.128(<2 x i64> %{{.*}}, <2 x i64> %{{.*}}) + // OGCG: %[[VAL1:.*]] = extractvalue { <2 x i1>, <2 x i1> } %{{.*}}, 0 + // OGCG: %[[SHUF1:.*]] = shufflevector <2 x i1> %[[VAL1]], <2 x i1> zeroinitializer, <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3> + // OGCG: %[[CAST1:.*]] = bitcast <8 x i1> %[[SHUF1]] to i8 + // OGCG: store i8 %[[CAST1]], ptr %{{.*}}, align 1 + // OGCG: %[[VAL2:.*]] = extractvalue { <2 x i1>, <2 x i1> } %{{.*}}, 1 + // OGCG: %[[SHUF2:.*]] = shufflevector <2 x i1> %[[VAL2]], <2 x i1> zeroinitializer, <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3> + // OGCG: %[[CAST2:.*]] = bitcast <8 x i1> %[[SHUF2]] to i8 + // OGCG: store i8 %[[CAST2]], ptr %{{.*}}, align 1 + _mm_2intersect_epi64(a, b, m0, m1); +} diff --git a/clang/test/CIR/CodeGenBuiltins/X86/avx512vp2intersect-builtins.c b/clang/test/CIR/CodeGenBuiltins/X86/avx512vp2intersect-builtins.c new file mode 100644 index 0000000000000..384477454c43e --- /dev/null +++ b/clang/test/CIR/CodeGenBuiltins/X86/avx512vp2intersect-builtins.c @@ -0,0 +1,77 @@ +// RUN: %clang_cc1 -x c -flax-vector-conversions=none -ffreestanding %s -triple=x86_64-unknown-linux -target-feature +avx512vp2intersect -fclangir -emit-cir -o %t.cir -Wall -Werror +// RUN: FileCheck --check-prefix=CIR --input-file=%t.cir %s +// RUN: %clang_cc1 -x c -flax-vector-conversions=none -ffreestanding %s -triple=x86_64-unknown-linux -target-feature +avx512vp2intersect -fclangir -emit-llvm -o %t.ll -Wall -Werror +// RUN: FileCheck --check-prefixes=LLVM --input-file=%t.ll %s + +// RUN: %clang_cc1 -x c++ -flax-vector-conversions=none -ffreestanding %s -triple=x86_64-unknown-linux -target-feature +avx512vp2intersect -fclangir -emit-cir -o %t.cir -Wall -Werror +// RUN: FileCheck --check-prefix=CIR --input-file=%t.cir %s +// RUN: %clang_cc1 -x c++ -flax-vector-conversions=none -ffreestanding %s -triple=x86_64-unknown-linux -target-feature +avx512vp2intersect -fclangir -emit-llvm -o %t.ll -Wall -Werror +// RUN: FileCheck --check-prefixes=LLVM --input-file=%t.ll %s + +// RUN: %clang_cc1 -x c -flax-vector-conversions=none -ffreestanding %s -triple=x86_64-unknown-linux -target-feature +avx512vp2intersect -emit-llvm -o - -Wall -Werror | FileCheck %s -check-prefix=OGCG +// RUN: %clang_cc1 -x c++ -flax-vector-conversions=none -ffreestanding %s -triple=x86_64-unknown-linux -target-feature +avx512vp2intersect -emit-llvm -o - -Wall -Werror | FileCheck %s -check-prefix=OGCG + +#include <immintrin.h> + + +// CIR: !rec_anon_struct = !cir.record<struct {!cir.vector<16 x !cir.bool>, !cir.vector<16 x !cir.bool>}> +// CIR: !rec_anon_struct1 = !cir.record<struct {!cir.vector<8 x !cir.bool>, !cir.vector<8 x !cir.bool>}> +void test_mm512_2intersect_epi32(__m512i a, __m512i b, __mmask16 *m0, __mmask16 *m1) { + // CIR-LABEL: mm512_2intersect_epi32 + // CIR: %[[RES:.*]] = cir.call_llvm_intrinsic "x86.avx512.vp2intersect.d.512" %{{.*}}, %{{.*}} : (!cir.vector<16 x !s32i>, !cir.vector<16 x !s32i>) -> !rec_anon_struct + // CIR: %[[VAL1:.*]] = cir.extract_member %[[RES]][0] : !rec_anon_struct -> !cir.vector<16 x !cir.bool> + // CIR: %[[CAST1:.*]] = cir.cast bitcast %[[VAL1]] : !cir.vector<16 x !cir.bool> -> !u16i + // CIR: cir.store align(2) %[[CAST1]], %{{.*}} : !u16i, !cir.ptr<!u16i> + // CIR: %[[VAL2:.*]] = cir.extract_member %[[RES]][1] : !rec_anon_struct -> !cir.vector<16 x !cir.bool> + // CIR: %[[CAST2:.*]] = cir.cast bitcast %[[VAL2]] : !cir.vector<16 x !cir.bool> -> !u16i + // CIR: cir.store align(2) %[[CAST2]], %{{.*}} : !u16i, !cir.ptr<!u16i> + + // LLVM-LABEL: test_mm512_2intersect_epi32 + // LLVM: %[[RES:.*]] = call { <16 x i1>, <16 x i1> } @llvm.x86.avx512.vp2intersect.d.512(<16 x i32> %{{.*}}, <16 x i32> %{{.*}}) + // LLVM: %[[VAL1:.*]] = extractvalue { <16 x i1>, <16 x i1> } %[[RES]], 0 + // LLVM: %[[CAST1:.*]] = bitcast <16 x i1> %[[VAL1]] to i16 + // LLVM: store i16 %[[CAST1]], ptr %{{.*}}, align 2 + // LLVM: %[[VAL2:.*]] = extractvalue { <16 x i1>, <16 x i1> } %[[RES]], 1 + // LLVM: %[[CAST2:.*]] = bitcast <16 x i1> %[[VAL2]] to i16 + // LLVM: store i16 %[[CAST2]], ptr %{{.*}}, align 2 + + // OGCG-LABEL: test_mm512_2intersect_epi32 + // OGCG: %[[RES:.*]] = call { <16 x i1>, <16 x i1> } @llvm.x86.avx512.vp2intersect.d.512(<16 x i32> %{{.*}}, <16 x i32> %{{.*}}) + // OGCG: %[[VAL1:.*]] = extractvalue { <16 x i1>, <16 x i1> } %[[RES]], 0 + // OGCG: %[[CAST1:.*]] = bitcast <16 x i1> %[[VAL1]] to i16 + // OGCG: store i16 %[[CAST1]], ptr %{{.*}}, align 2 + // OGCG: %[[VAL2:.*]] = extractvalue { <16 x i1>, <16 x i1> } %[[RES]], 1 + // OGCG: %[[CAST2:.*]] = bitcast <16 x i1> %[[VAL2]] to i16 + // OGCG: store i16 %[[CAST2]], ptr %{{.*}}, align 2 + _mm512_2intersect_epi32(a, b, m0, m1); +} + +void test_mm512_2intersect_epi64(__m512i a, __m512i b, __mmask8 *m0, __mmask8 *m1) { + // CIR-LABEL: mm512_2intersect_epi64 + // CIR: %[[RES:.*]] = cir.call_llvm_intrinsic "x86.avx512.vp2intersect.q.512" %{{.*}}, %{{.*}} : (!cir.vector<8 x !s64i>, !cir.vector<8 x !s64i>) -> !rec_anon_struct1 + // CIR: %[[VAL1:.*]] = cir.extract_member %[[RES]][0] : !rec_anon_struct1 -> !cir.vector<8 x !cir.bool> + // CIR: %[[CAST1:.*]] = cir.cast bitcast %[[VAL1]] : !cir.vector<8 x !cir.bool> -> !u8i + // CIR: cir.store align(1) %[[CAST1]], %{{.*}} : !u8i, !cir.ptr<!u8i> + // CIR: %[[VAL2:.*]] = cir.extract_member %[[RES]][1] : !rec_anon_struct1 -> !cir.vector<8 x !cir.bool> + // CIR: %[[CAST2:.*]] = cir.cast bitcast %[[VAL2]] : !cir.vector<8 x !cir.bool> -> !u8i + // CIR: cir.store align(1) %[[CAST2]], %{{.*}} : !u8i, !cir.ptr<!u8i> + + // LLVM-LABEL: test_mm512_2intersect_epi64 + // LLVM: %[[RES:.*]] = call { <8 x i1>, <8 x i1> } @llvm.x86.avx512.vp2intersect.q.512(<8 x i64> %{{.*}}, <8 x i64> %{{.*}}) + // LLVM: %[[VAL1:.*]] = extractvalue { <8 x i1>, <8 x i1> } %[[RES]], 0 + // LLVM: %[[CAST1:.*]] = bitcast <8 x i1> %[[VAL1]] to i8 + // LLVM: store i8 %[[CAST1]], ptr %{{.*}}, align 1 + // LLVM: %[[VAL2:.*]] = extractvalue { <8 x i1>, <8 x i1> } %[[RES]], 1 + // LLVM: %[[CAST2:.*]] = bitcast <8 x i1> %[[VAL2]] to i8 + // LLVM: store i8 %[[CAST2]], ptr %{{.*}}, align 1 + + // OGCG-LABEL: test_mm512_2intersect_epi64 + // OGCG: %[[RES:.*]] = call { <8 x i1>, <8 x i1> } @llvm.x86.avx512.vp2intersect.q.512(<8 x i64> %{{.*}}, <8 x i64> %{{.*}}) + // OGCG: %[[VAL1:.*]] = extractvalue { <8 x i1>, <8 x i1> } %[[RES]], 0 + // OGCG: %[[CAST1:.*]] = bitcast <8 x i1> %[[VAL1]] to i8 + // OGCG: store i8 %[[CAST1]], ptr %{{.*}}, align 1 + // OGCG: %[[VAL2:.*]] = extractvalue { <8 x i1>, <8 x i1> } %[[RES]], 1 + // OGCG: %[[CAST2:.*]] = bitcast <8 x i1> %[[VAL2]] to i8 + // OGCG: store i8 %[[CAST2]], ptr %{{.*}}, align 1 + _mm512_2intersect_epi64(a, b, m0, m1); +} _______________________________________________ cfe-commits mailing list [email protected] https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits
