https://github.com/imkiva created https://github.com/llvm/llvm-project/pull/203774
Adds initial LLVM and Clang support for the Zvvm/IME configuration APIs: - Adds Clang builtins/macros for __riscv_ime_vlen(), __riscv_ime_lambda(), and __riscv_vsetlambda(). - Adds LLVM intrinsics for implementation geometry queries, selected vtype.lambda readback, and nonzero lambda write/readback. This does not add full VSETVLI high-field tracking or matrix operation intrinsics/codegen >From 5222ad77498576ca9a3eaace4004b37209f8aab6 Mon Sep 17 00:00:00 2001 From: imkiva <[email protected]> Date: Fri, 12 Jun 2026 16:58:14 +0800 Subject: [PATCH 1/2] [RISCV][LLVM][Clang] Add experimental Zvvm config intrinsics --- .../clang/Basic/DiagnosticSemaKinds.td | 2 + clang/include/clang/Basic/riscv_vector.td | 28 + clang/lib/CodeGen/TargetBuiltins/RISCV.cpp | 38 ++ clang/lib/Sema/SemaRISCV.cpp | 37 ++ .../rvv-intrinsics-handcrafted/ime-config.c | 75 +++ clang/test/Sema/riscv-ime-vsetlambda.c | 38 ++ llvm/include/llvm/IR/IntrinsicsRISCV.td | 23 + llvm/lib/Target/RISCV/RISCVISelLowering.cpp | 241 +++++++ llvm/lib/Target/RISCV/RISCVInstrInfoZvvm.td | 28 + llvm/lib/Target/RISCV/RISCVSystemOperands.td | 2 +- .../ime-config-intrinsics-invalid-rv32.ll | 12 + .../ime-config-intrinsics-invalid-rv64.ll | 12 + .../RISCV/ime-config-intrinsics-rv32.ll | 590 +++++++++++++++++ .../RISCV/ime-config-intrinsics-rv64.ll | 620 ++++++++++++++++++ 14 files changed, 1745 insertions(+), 1 deletion(-) create mode 100644 clang/test/CodeGen/RISCV/rvv-intrinsics-handcrafted/ime-config.c create mode 100644 clang/test/Sema/riscv-ime-vsetlambda.c create mode 100644 llvm/test/CodeGen/RISCV/ime-config-intrinsics-invalid-rv32.ll create mode 100644 llvm/test/CodeGen/RISCV/ime-config-intrinsics-invalid-rv64.ll create mode 100644 llvm/test/CodeGen/RISCV/ime-config-intrinsics-rv32.ll create mode 100644 llvm/test/CodeGen/RISCV/ime-config-intrinsics-rv64.ll diff --git a/clang/include/clang/Basic/DiagnosticSemaKinds.td b/clang/include/clang/Basic/DiagnosticSemaKinds.td index a3b575b7ee63a..4e474a8cc1d28 100644 --- a/clang/include/clang/Basic/DiagnosticSemaKinds.td +++ b/clang/include/clang/Basic/DiagnosticSemaKinds.td @@ -13601,6 +13601,8 @@ def err_riscv_builtin_requires_extension : Error< "builtin requires%select{| at least one of the following extensions}0: %1">; def err_riscv_builtin_invalid_lmul : Error< "LMUL argument must be in the range [0,3] or [5,7]">; +def err_riscv_builtin_invalid_ime_lambda : Error< + "argument to RISC-V IME vsetlambda builtin must be an integer constant expression evaluating to 0 or a power of two in the range [1, 64]">; def err_riscv_type_requires_extension : Error< "RISC-V type %0 requires the '%1' extension">; def err_riscv_attribute_interrupt_requires_extension : Error< diff --git a/clang/include/clang/Basic/riscv_vector.td b/clang/include/clang/Basic/riscv_vector.td index c5ce8b7ae8fc1..f3651a9265e0e 100644 --- a/clang/include/clang/Basic/riscv_vector.td +++ b/clang/include/clang/Basic/riscv_vector.td @@ -2162,3 +2162,31 @@ let UnMaskedPolicyScheme = HasPassthruOperand in { defm vpairo : RVVOutBuiltinSet<"vpairo", "csil", [["vv", "Uv", "UvUvUv"]]>; } } + +//===----------------------------------------------------------------------===// +// Zvvm - Integrated Matrix Extension configuration builtins. +//===----------------------------------------------------------------------===// + +let HeaderCode = +[{ +#define __riscv_ime_vlen() __builtin_rvv_ime_vlen() +#define __riscv_ime_lambda() __builtin_rvv_ime_lambda() +#define __riscv_vsetlambda(lambda) __builtin_rvv_vsetlambda((size_t)(lambda)) +}] in +def ime_config_macro: RVVHeader; + +let HasBuiltinAlias = false, HasVL = false, HasMasked = false, + UnMaskedPolicyScheme = NonePolicy, MaskedPolicyScheme = NonePolicy, + Log2LMUL = [0], RequiredFeatures = ["zvvmm"], + ManualCodegen = [{ + return emitRVVIMEBuiltin(this, E, ReturnValue, ResultType, ID, Ops, + PolicyAttrs, IsMasked); + }] in +{ + let IRName = "ime_vlen" in + def ime_vlen : RVVBuiltin<"", "z", "i">; + let IRName = "ime_lambda" in + def ime_lambda : RVVBuiltin<"", "z", "i">; + let IRName = "ime_vsetlambda_nonzero" in + def vsetlambda : RVVBuiltin<"", "zz", "i">; +} diff --git a/clang/lib/CodeGen/TargetBuiltins/RISCV.cpp b/clang/lib/CodeGen/TargetBuiltins/RISCV.cpp index 3bf7dd07d54d3..356e9fcfce5c3 100644 --- a/clang/lib/CodeGen/TargetBuiltins/RISCV.cpp +++ b/clang/lib/CodeGen/TargetBuiltins/RISCV.cpp @@ -308,6 +308,44 @@ emitRVVVsetvliBuiltin(CodeGenFunction *CGF, const CallExpr *E, return Builder.CreateCall(F, Ops, ""); } +static LLVM_ATTRIBUTE_NOINLINE Value * +emitRVVIMEBuiltin(CodeGenFunction *CGF, const CallExpr *E, + ReturnValueSlot ReturnValue, llvm::Type *ResultType, + Intrinsic::ID ID, SmallVectorImpl<Value *> &Ops, + int PolicyAttrs, bool IsMasked) { + auto &Builder = CGF->Builder; + auto &CGM = CGF->CGM; + + switch (ID) { + case Intrinsic::riscv_ime_vlen: + case Intrinsic::riscv_ime_lambda: { + assert(Ops.empty() && "unexpected IME geometry operands"); + llvm::Function *F = CGM.getIntrinsic(ID, {ResultType}); + return Builder.CreateCall(F); + } + case Intrinsic::riscv_ime_vsetlambda_nonzero: + break; + default: + llvm_unreachable("unexpected IME builtin"); + } + + assert(Ops.size() == 1 && "unexpected vsetlambda arity"); + Value *Req = Ops[0]; + + auto *C = dyn_cast<llvm::ConstantInt>(Req); + assert(C && "Sema should reject non-constant __riscv_vsetlambda arguments"); + + if (C->isZero()) { + llvm::Function *ReadF = + CGM.getIntrinsic(Intrinsic::riscv_ime_readlambda, {ResultType}); + return Builder.CreateCall(ReadF); + } + + llvm::Function *SetF = CGM.getIntrinsic( + Intrinsic::riscv_ime_vsetlambda_nonzero, {ResultType}); + return Builder.CreateCall(SetF, {Req}); +} + static LLVM_ATTRIBUTE_NOINLINE Value * emitRVVVSEMaskBuiltin(CodeGenFunction *CGF, const CallExpr *E, ReturnValueSlot ReturnValue, llvm::Type *ResultType, diff --git a/clang/lib/Sema/SemaRISCV.cpp b/clang/lib/Sema/SemaRISCV.cpp index 9647a7d913744..e47eb65cc7303 100644 --- a/clang/lib/Sema/SemaRISCV.cpp +++ b/clang/lib/Sema/SemaRISCV.cpp @@ -26,6 +26,7 @@ #include "clang/Sema/Sema.h" #include "clang/Support/RISCVVIntrinsicUtils.h" #include "llvm/ADT/SmallVector.h" +#include "llvm/Support/MathExtras.h" #include "llvm/TargetParser/RISCVISAInfo.h" #include "llvm/TargetParser/RISCVTargetParser.h" #include <optional> @@ -677,11 +678,47 @@ bool SemaRISCV::CheckBuiltinFunctionCall(const TargetInfo &TI, return SemaRef.BuiltinConstantArgRange(TheCall, SEWOffset, 0, 3) || CheckLMUL(TheCall, LMULOffset); }; + + auto CheckIMEVSetLambda = [&]() -> bool { + assert(TheCall->getNumArgs() == 1 && "unexpected vsetlambda arity"); + + Expr *Arg = TheCall->getArg(0); + if (Arg->isTypeDependent() || Arg->isValueDependent()) + return false; + Expr *DiagArg = Arg->IgnoreParenCasts(); + + Expr::EvalResult Eval; + Expr *EvalArg = DiagArg; + if (!EvalArg->EvaluateAsInt(Eval, Context, Expr::SE_NoSideEffects)) { + EvalArg = Arg; + if (!EvalArg->EvaluateAsInt(Eval, Context, Expr::SE_NoSideEffects)) + return Diag(DiagArg->getBeginLoc(), + diag::err_riscv_builtin_invalid_ime_lambda) + << DiagArg->getSourceRange(); + } + + llvm::APSInt Val = Eval.Val.getInt(); + if (Val.isSigned() && Val.isNegative()) + return Diag(DiagArg->getBeginLoc(), + diag::err_riscv_builtin_invalid_ime_lambda) + << DiagArg->getSourceRange(); + + uint64_t U = Val.getLimitedValue(65); + if (U != 0 && (U > 64 || !llvm::isPowerOf2_64(U))) + return Diag(DiagArg->getBeginLoc(), + diag::err_riscv_builtin_invalid_ime_lambda) + << DiagArg->getSourceRange(); + + return false; + }; + switch (BuiltinID) { case RISCVVector::BI__builtin_rvv_vsetvli: return CheckVSetVL(1, 2); case RISCVVector::BI__builtin_rvv_vsetvlimax: return CheckVSetVL(0, 1); + case RISCVVector::BI__builtin_rvv_vsetlambda: + return CheckIMEVSetLambda(); case RISCVVector::BI__builtin_rvv_sf_vsettnt: case RISCVVector::BI__builtin_rvv_sf_vsettm: case RISCVVector::BI__builtin_rvv_sf_vsettn: diff --git a/clang/test/CodeGen/RISCV/rvv-intrinsics-handcrafted/ime-config.c b/clang/test/CodeGen/RISCV/rvv-intrinsics-handcrafted/ime-config.c new file mode 100644 index 0000000000000..a0477cf356d98 --- /dev/null +++ b/clang/test/CodeGen/RISCV/rvv-intrinsics-handcrafted/ime-config.c @@ -0,0 +1,75 @@ +// NOTE: Assertions have been autogenerated by utils/update_cc_test_checks.py UTC_ARGS: --version 6 +// REQUIRES: riscv-registered-target +// RUN: %clang_cc1 -triple riscv32 -target-feature +v \ +// RUN: -target-feature +experimental-zvvmm -disable-O0-optnone \ +// RUN: -emit-llvm -o - %s | FileCheck --check-prefix=RV32 %s +// RUN: %clang_cc1 -triple riscv64 -target-feature +v \ +// RUN: -target-feature +experimental-zvvmm -disable-O0-optnone \ +// RUN: -emit-llvm -o - %s | FileCheck --check-prefix=RV64 %s + +#include <stddef.h> +#include <riscv_vector.h> + +// RV32-LABEL: define dso_local i32 @test_ime_vlen( +// RV32-SAME: ) #[[ATTR0:[0-9]+]] { +// RV32-NEXT: [[ENTRY:.*:]] +// RV32-NEXT: [[TMP0:%.*]] = call i32 @llvm.riscv.ime.vlen.i32() +// RV32-NEXT: ret i32 [[TMP0]] +// +// RV64-LABEL: define dso_local i64 @test_ime_vlen( +// RV64-SAME: ) #[[ATTR0:[0-9]+]] { +// RV64-NEXT: [[ENTRY:.*:]] +// RV64-NEXT: [[TMP0:%.*]] = call i64 @llvm.riscv.ime.vlen.i64() +// RV64-NEXT: ret i64 [[TMP0]] +// +size_t test_ime_vlen(void) { + return __riscv_ime_vlen(); +} + +// RV32-LABEL: define dso_local i32 @test_ime_lambda( +// RV32-SAME: ) #[[ATTR0]] { +// RV32-NEXT: [[ENTRY:.*:]] +// RV32-NEXT: [[TMP0:%.*]] = call i32 @llvm.riscv.ime.lambda.i32() +// RV32-NEXT: ret i32 [[TMP0]] +// +// RV64-LABEL: define dso_local i64 @test_ime_lambda( +// RV64-SAME: ) #[[ATTR0]] { +// RV64-NEXT: [[ENTRY:.*:]] +// RV64-NEXT: [[TMP0:%.*]] = call i64 @llvm.riscv.ime.lambda.i64() +// RV64-NEXT: ret i64 [[TMP0]] +// +size_t test_ime_lambda(void) { + return __riscv_ime_lambda(); +} + +// RV32-LABEL: define dso_local i32 @test_vsetlambda( +// RV32-SAME: ) #[[ATTR0]] { +// RV32-NEXT: [[ENTRY:.*:]] +// RV32-NEXT: [[TMP0:%.*]] = call i32 @llvm.riscv.ime.vsetlambda.nonzero.i32(i32 4) +// RV32-NEXT: ret i32 [[TMP0]] +// +// RV64-LABEL: define dso_local i64 @test_vsetlambda( +// RV64-SAME: ) #[[ATTR0]] { +// RV64-NEXT: [[ENTRY:.*:]] +// RV64-NEXT: [[TMP0:%.*]] = call i64 @llvm.riscv.ime.vsetlambda.nonzero.i64(i64 4) +// RV64-NEXT: ret i64 [[TMP0]] +// +size_t test_vsetlambda(void) { + return __riscv_vsetlambda(4); +} + +// RV32-LABEL: define dso_local i32 @test_vsetlambda_zero( +// RV32-SAME: ) #[[ATTR0]] { +// RV32-NEXT: [[ENTRY:.*:]] +// RV32-NEXT: [[TMP0:%.*]] = call i32 @llvm.riscv.ime.readlambda.i32() +// RV32-NEXT: ret i32 [[TMP0]] +// +// RV64-LABEL: define dso_local i64 @test_vsetlambda_zero( +// RV64-SAME: ) #[[ATTR0]] { +// RV64-NEXT: [[ENTRY:.*:]] +// RV64-NEXT: [[TMP0:%.*]] = call i64 @llvm.riscv.ime.readlambda.i64() +// RV64-NEXT: ret i64 [[TMP0]] +// +size_t test_vsetlambda_zero(void) { + return __riscv_vsetlambda(0); +} diff --git a/clang/test/Sema/riscv-ime-vsetlambda.c b/clang/test/Sema/riscv-ime-vsetlambda.c new file mode 100644 index 0000000000000..0e3b8cbf889cc --- /dev/null +++ b/clang/test/Sema/riscv-ime-vsetlambda.c @@ -0,0 +1,38 @@ +// REQUIRES: riscv-registered-target +// RUN: %clang_cc1 -triple riscv32 -target-feature +v \ +// RUN: -target-feature +experimental-zvvmm -fsyntax-only -verify %s +// RUN: %clang_cc1 -triple riscv64 -target-feature +v \ +// RUN: -target-feature +experimental-zvvmm -fsyntax-only -verify %s + +#include <stddef.h> +#include <riscv_vector.h> + +void ok(void) { + __riscv_vsetlambda(0); + __riscv_vsetlambda(1); + __riscv_vsetlambda(2); + __riscv_vsetlambda(4); + __riscv_vsetlambda(8); + __riscv_vsetlambda(16); + __riscv_vsetlambda(32); + __riscv_vsetlambda(64); +} + +void bad_value(void) { + __riscv_vsetlambda(3); // expected-error {{argument to RISC-V IME vsetlambda builtin must be an integer constant expression evaluating to 0 or a power of two in the range [1, 64]}} + __riscv_vsetlambda(128); // expected-error {{argument to RISC-V IME vsetlambda builtin must be an integer constant expression evaluating to 0 or a power of two in the range [1, 64]}} + __riscv_vsetlambda(-1); // expected-error {{argument to RISC-V IME vsetlambda builtin must be an integer constant expression evaluating to 0 or a power of two in the range [1, 64]}} +} + +void bad_runtime(size_t x) { + __riscv_vsetlambda(x); // expected-error {{argument to RISC-V IME vsetlambda builtin must be an integer constant expression evaluating to 0 or a power of two in the range [1, 64]}} + __riscv_vsetlambda(x++); // expected-error {{argument to RISC-V IME vsetlambda builtin must be an integer constant expression evaluating to 0 or a power of two in the range [1, 64]}} +} + +void bad_wrap(void) { + __riscv_vsetlambda(0x100000004ULL); // expected-error {{argument to RISC-V IME vsetlambda builtin must be an integer constant expression evaluating to 0 or a power of two in the range [1, 64]}} + __riscv_vsetlambda(-4294967292LL); // expected-error {{argument to RISC-V IME vsetlambda builtin must be an integer constant expression evaluating to 0 or a power of two in the range [1, 64]}} +#if __SIZEOF_POINTER__ == 8 + __riscv_vsetlambda(((__int128)1) << 70); // expected-error {{argument to RISC-V IME vsetlambda builtin must be an integer constant expression evaluating to 0 or a power of two in the range [1, 64]}} +#endif +} diff --git a/llvm/include/llvm/IR/IntrinsicsRISCV.td b/llvm/include/llvm/IR/IntrinsicsRISCV.td index f53f752c25c30..7e54c8aefa4e1 100644 --- a/llvm/include/llvm/IR/IntrinsicsRISCV.td +++ b/llvm/include/llvm/IR/IntrinsicsRISCV.td @@ -2063,6 +2063,29 @@ let TargetPrefix = "riscv" in { defm vfncvt_sat_f_f_q_alt : RISCVConversionRoundingMode; } // TargetPrefix = "riscv" +//===----------------------------------------------------------------------===// +// Zvvm - Integrated Matrix Extension +// +// These intrinsics expose IME configuration queries and vtype.lambda control. +// They use llvm_anyint_ty for consistency with RVV configuration intrinsics, +// but the only supported type is XLen. +let TargetPrefix = "riscv" in { + // Implementation geometry helpers. + def int_riscv_ime_vlen : Intrinsic<[llvm_anyint_ty], [], [IntrNoMem]>; + def int_riscv_ime_lambda : Intrinsic<[llvm_anyint_ty], [], [IntrNoMem]>; + + // Current selected vtype.lambda readback. This is not a memory operation, + // but keep it conservative until LLVM IR has a first-class vtype state model. + def int_riscv_ime_readlambda + : Intrinsic<[llvm_anyint_ty], [], [IntrNoMem, IntrHasSideEffects]>; + + // Write a nonzero requested lambda and return the established lambda. + // The argument contract is: positive power of two in {1,2,4,8,16,32,64}. + def int_riscv_ime_vsetlambda_nonzero + : Intrinsic<[llvm_anyint_ty], [LLVMMatchType<0>], + [IntrNoMem, IntrHasSideEffects, ImmArg<ArgIndex<0>>]>; +} // TargetPrefix = "riscv" + // Vendor extensions //===----------------------------------------------------------------------===// include "llvm/IR/IntrinsicsRISCVXTHead.td" diff --git a/llvm/lib/Target/RISCV/RISCVISelLowering.cpp b/llvm/lib/Target/RISCV/RISCVISelLowering.cpp index 753901d71baca..7061a17fe6b4f 100644 --- a/llvm/lib/Target/RISCV/RISCVISelLowering.cpp +++ b/llvm/lib/Target/RISCV/RISCVISelLowering.cpp @@ -11567,6 +11567,238 @@ static SDValue lowerGetVectorLength(SDNode *N, SelectionDAG &DAG, return DAG.getNode(ISD::TRUNCATE, DL, N->getValueType(0), Res); } +static unsigned getIMELambdaShift(const RISCVSubtarget &Subtarget) { + return Subtarget.getXLen() - 4; +} + +static uint64_t getIMELambdaFieldMask(const RISCVSubtarget &Subtarget) { + return UINT64_C(7) << getIMELambdaShift(Subtarget); +} + +static uint64_t getIMEClearLambdaMask(const RISCVSubtarget &Subtarget) { + uint64_t Mask = ~getIMELambdaFieldMask(Subtarget); + if (!Subtarget.is64Bit()) + Mask = static_cast<uint32_t>(Mask); + return Mask; +} + +static bool isValidIMELambdaValue(uint64_t Value) { + return Value != 0 && Value <= 64 && isPowerOf2_64(Value); +} + +// The IME implementation lambda is derived from implementation VLEN using the +// representative shape from the spec: +// +// VLEN = 64 * lambda^2 +// +// For a known VLEN in bits this gives: +// +// log2(lambda) = (log2(VLEN) - log2(64)) / 2 +// = (log2(VLEN) - 6) / 2 +// +// Values below VLEN=64 produce lambda=1. The selected vtype.lambda encoding +// has seven non-zero values, so the maximum representable lambda is 64 +// (log2(lambda)=6). +static unsigned getKnownIMEImplementationLambda(unsigned VLenBits) { + unsigned Log2VLen = Log2_32(VLenBits); + if (Log2VLen <= 6) + return 1; + + unsigned LambdaLog2 = (Log2VLen - 6) / 2; + if (LambdaLog2 > 6) + LambdaLog2 = 6; + return 1U << LambdaLog2; +} + +// Decode the selected vtype.lambda field. The IME vtype encoding uses zero to +// mean "no selected lambda"; otherwise the encoded value is one plus log2 of +// the selected lambda: +// +// encoded 0 -> lambda 0 +// encoded n -> lambda 1 << (n - 1), for n in [1, 7] +static SDValue decodeSelectedIMELambdaFromVType( + SDValue VType, const SDLoc &DL, SelectionDAG &DAG, + const RISCVSubtarget &Subtarget) { + MVT XLenVT = Subtarget.getXLenVT(); + SDValue Encoded = + DAG.getNode(ISD::SRL, DL, XLenVT, VType, + DAG.getConstant(getIMELambdaShift(Subtarget), DL, XLenVT)); + Encoded = DAG.getNode(ISD::AND, DL, XLenVT, Encoded, + DAG.getConstant(7, DL, XLenVT)); + + SDValue Zero = DAG.getConstant(0, DL, XLenVT); + SDValue IsZero = DAG.getSetCC(DL, XLenVT, Encoded, Zero, ISD::SETEQ); + SDValue ShiftAmt = + DAG.getNode(ISD::SUB, DL, XLenVT, Encoded, + DAG.getConstant(1, DL, XLenVT)); + ShiftAmt = DAG.getSelect(DL, XLenVT, IsZero, Zero, ShiftAmt); + + SDValue Lambda = + DAG.getNode(ISD::SHL, DL, XLenVT, DAG.getConstant(1, DL, XLenVT), + ShiftAmt); + return DAG.getSelect(DL, XLenVT, IsZero, Zero, Lambda); +} + +// Read the architectural vtype CSR. This is selected as: +// +// csrr rd, vtype +// +// and is used only for IME selected-lambda readback and read-modify-write. +static SDValue readIMEVType(SDValue Chain, const SDLoc &DL, SelectionDAG &DAG, + const RISCVSubtarget &Subtarget) { + MVT XLenVT = Subtarget.getXLenVT(); + SDValue SysRegNo = DAG.getTargetConstant(RISCVSysReg::vtype, DL, XLenVT); + return DAG.getNode(RISCVISD::READ_CSR, DL, DAG.getVTList(XLenVT, MVT::Other), + Chain, SysRegNo); +} + +// Lower the implementation VLEN query. The IME C API returns VLEN in bits. If +// the subtarget has an exact VLEN, fold the query to a constant; otherwise read +// vlenb and convert bytes to bits: +// +// li rd, VLEN # fixed VLEN +// csrr rd, vlenb # dynamic VLEN +// slli rd, rd, 3 +static SDValue lowerIMEVLen(SDValue Op, SelectionDAG &DAG, + const RISCVSubtarget &Subtarget) { + MVT XLenVT = Subtarget.getXLenVT(); + SDLoc DL(Op); + + SDValue VLen; + if (std::optional<unsigned> KnownVLen = Subtarget.getRealVLen()) { + VLen = DAG.getConstant(*KnownVLen, DL, XLenVT); + } else { + SDValue VLenB = DAG.getNode(RISCVISD::READ_VLENB, DL, XLenVT); + VLen = DAG.getNode(ISD::SHL, DL, XLenVT, VLenB, + DAG.getConstant(3, DL, XLenVT)); + } + + return VLen; +} + +// Lower the implementation representative lambda query. This is the +// implementation geometry lambda described by the IME spec, not the currently +// selected vtype.lambda. It must not read vtype. +// +// The spec-derived formula is VLEN = 64 * lambda^2. For dynamic VLEN we read +// vlenb, where vlenb = VLEN / 8, so: +// +// ctz(vlenb) = log2(VLEN) - 3 +// log2(lambda) = (log2(VLEN) - 6) / 2 +// = (ctz(vlenb) - 3) / 2 +// +// Conceptual lowering: +// +// csrr rd, vlenb +// lambda_log2 = clamp((ctz(rd) - 3) / 2, 0, 6) +// rd = 1 << lambda_log2 +static SDValue lowerIMEImplementationLambda(SDValue Op, SelectionDAG &DAG, + const RISCVSubtarget &Subtarget) { + SDLoc DL(Op); + MVT XLenVT = Subtarget.getXLenVT(); + + SDValue Lambda; + if (std::optional<unsigned> KnownVLen = Subtarget.getRealVLen()) { + Lambda = + DAG.getConstant(getKnownIMEImplementationLambda(*KnownVLen), DL, XLenVT); + } else { + SDValue VLenB = DAG.getNode(RISCVISD::READ_VLENB, DL, XLenVT); + SDValue Ctz = DAG.getNode(ISD::CTTZ_ZERO_POISON, DL, XLenVT, VLenB); + + SDValue Three = DAG.getConstant(3, DL, XLenVT); + SDValue IsSmall = DAG.getSetCC(DL, XLenVT, Ctz, Three, ISD::SETULT); + SDValue LambdaLog2 = + DAG.getNode(ISD::SUB, DL, XLenVT, Ctz, Three); + LambdaLog2 = DAG.getSelect(DL, XLenVT, IsSmall, + DAG.getConstant(0, DL, XLenVT), LambdaLog2); + LambdaLog2 = + DAG.getNode(ISD::SRL, DL, XLenVT, LambdaLog2, + DAG.getConstant(1, DL, XLenVT)); + + SDValue Six = DAG.getConstant(6, DL, XLenVT); + SDValue IsTooLarge = + DAG.getSetCC(DL, XLenVT, LambdaLog2, Six, ISD::SETUGT); + LambdaLog2 = DAG.getSelect(DL, XLenVT, IsTooLarge, Six, LambdaLog2); + + Lambda = DAG.getNode(ISD::SHL, DL, XLenVT, + DAG.getConstant(1, DL, XLenVT), LambdaLog2); + } + + return Lambda; +} + +// Lower the selected vtype.lambda readback used by __riscv_vsetlambda(0). +// This is a read-only query of architectural vtype state and must not emit +// vsetvl or otherwise modify vl/vtype: +// +// csrr rd, vtype +// rd = decode(vtype.lambda) +static SDValue lowerIMEReadSelectedLambda(SDValue Op, SelectionDAG &DAG, + const RISCVSubtarget &Subtarget) { + SDLoc DL(Op); + SDValue Chain = Op.getOperand(0); + + SDValue VType = readIMEVType(Chain, DL, DAG, Subtarget); + Chain = VType.getValue(1); + SDValue Lambda = decodeSelectedIMELambdaFromVType(VType, DL, DAG, Subtarget); + return DAG.getMergeValues({Lambda, Chain}, DL); +} + +// Lower the nonzero selected-lambda write/readback primitive used by +// __riscv_vsetlambda(N), for N in {1,2,4,8,16,32,64}. The IME vtype fields +// live in high vtype bits outside the vsetvli/vsetivli immediate fields, so the +// spec requires configuring them with register-form vsetvl using a full vtype +// value in a GPR. +// +// The lowering preserves the current vl and all other vtype fields: +// +// old_vtype = csrr vtype +// encoded = log2(N) + 1 +// new_vtype = (old_vtype & ~lambda_mask) | (encoded << lambda_shift) +// vsetvl x0, x0, new_vtype +// updated_vtype = csrr vtype +// return decode(updated_vtype.lambda) +static SDValue lowerIMEVSetLambdaNonZero(SDValue Op, SelectionDAG &DAG, + const RISCVSubtarget &Subtarget) { + SDLoc DL(Op); + SDValue Chain = Op.getOperand(0); + SDValue Requested = Op.getOperand(2); + MVT XLenVT = Subtarget.getXLenVT(); + + auto *C = dyn_cast<ConstantSDNode>(Requested); + if (!C) + report_fatal_error( + "llvm.riscv.ime.vsetlambda.nonzero requires an immediate argument"); + + uint64_t Value = C->getZExtValue(); + if (!isValidIMELambdaValue(Value)) + report_fatal_error( + "invalid argument for llvm.riscv.ime.vsetlambda.nonzero: expected a " + "power of two in {1,2,4,8,16,32,64}"); + + SDValue OldVType = readIMEVType(Chain, DL, DAG, Subtarget); + Chain = OldVType.getValue(1); + + SDValue Encoded = DAG.getConstant(Log2_64(Value) + 1, DL, XLenVT); + SDValue Cleared = + DAG.getNode(ISD::AND, DL, XLenVT, OldVType, + DAG.getConstant(getIMEClearLambdaMask(Subtarget), DL, + XLenVT)); + SDValue EncodedBits = + DAG.getNode(ISD::SHL, DL, XLenVT, Encoded, + DAG.getConstant(getIMELambdaShift(Subtarget), DL, XLenVT)); + SDValue NewVType = DAG.getNode(ISD::OR, DL, XLenVT, Cleared, EncodedBits); + + Chain = DAG.getNode(RISCVISD::IME_VSETVTYPE, DL, MVT::Other, Chain, + NewVType); + + SDValue UpdatedVType = readIMEVType(Chain, DL, DAG, Subtarget); + Chain = UpdatedVType.getValue(1); + SDValue Lambda = + decodeSelectedIMELambdaFromVType(UpdatedVType, DL, DAG, Subtarget); + return DAG.getMergeValues({Lambda, Chain}, DL); +} + static SDValue lowerCttzElts(SDValue Op, SelectionDAG &DAG, const RISCVSubtarget &Subtarget) { SDValue Op0 = Op.getOperand(0); @@ -11735,6 +11967,10 @@ SDValue RISCVTargetLowering::LowerINTRINSIC_WO_CHAIN(SDValue Op, } case Intrinsic::experimental_get_vector_length: return lowerGetVectorLength(Op.getNode(), DAG, Subtarget); + case Intrinsic::riscv_ime_vlen: + return lowerIMEVLen(Op, DAG, Subtarget); + case Intrinsic::riscv_ime_lambda: + return lowerIMEImplementationLambda(Op, DAG, Subtarget); case Intrinsic::riscv_vmv_x_s: { SDValue Res = DAG.getNode(RISCVISD::VMV_X_S, DL, XLenVT, Op.getOperand(1)); return DAG.getNode(ISD::TRUNCATE, DL, Op.getValueType(), Res); @@ -12040,6 +12276,11 @@ SDValue RISCVTargetLowering::LowerINTRINSIC_W_CHAIN(SDValue Op, case Intrinsic::riscv_sseg8_load_mask: return lowerFixedVectorSegLoadIntrinsics(IntNo, Op, Subtarget, DAG); + case Intrinsic::riscv_ime_readlambda: + return lowerIMEReadSelectedLambda(Op, DAG, Subtarget); + case Intrinsic::riscv_ime_vsetlambda_nonzero: + return lowerIMEVSetLambdaNonZero(Op, DAG, Subtarget); + case Intrinsic::riscv_sf_vc_v_x_se: return getVCIXISDNodeWCHAIN(Op, DAG, RISCVISD::SF_VC_V_X_SE); case Intrinsic::riscv_sf_vc_v_i_se: diff --git a/llvm/lib/Target/RISCV/RISCVInstrInfoZvvm.td b/llvm/lib/Target/RISCV/RISCVInstrInfoZvvm.td index 067a9c0e404d4..432384c181f8e 100644 --- a/llvm/lib/Target/RISCV/RISCVInstrInfoZvvm.td +++ b/llvm/lib/Target/RISCV/RISCVInstrInfoZvvm.td @@ -50,6 +50,34 @@ def VScaleOp : RegisterOperand<VMV0> { let DecoderMethod = "decodeVMaskReg"; } +//===----------------------------------------------------------------------===// +// IME configuration pseudos +//===----------------------------------------------------------------------===// + +// Low-level backend node for writing a full vtype value with register-form +// vsetvl, preserving vl. +def riscv_ime_vsetvtype + : RVSDNode<"IME_VSETVTYPE", + SDTypeProfile<0, 1, [SDTCisInt<0>]>, + [SDNPHasChain]>; + +let Predicates = [HasStdExtZvvmm], Defs = [VL, VTYPE], Uses = [VL, VTYPE], + hasSideEffects = 1, mayLoad = 0, mayStore = 0 in +def PseudoIMEVSETVTYPE + : Pseudo<(outs), (ins GPR:$vtype), + [(riscv_ime_vsetvtype (XLenVT GPR:$vtype))]>, + PseudoInstExpansion<(VSETVL X0, X0, GPR:$vtype)>, + Sched<[WriteVSETVL, ReadVSETVL]>; + +let Predicates = [HasStdExtZvvmm], Uses = [VTYPE], hasSideEffects = 0, + mayLoad = 0, mayStore = 0, isCodeGenOnly = 1 in +def PseudoIMEReadVTYPE + : Pseudo<(outs GPR:$rd), (ins), + [(set GPR:$rd, + (XLenVT (riscv_read_csr (XLenVT SysRegVTYPE.Encoding))))]>, + PseudoInstExpansion<(CSRRS GPR:$rd, SysRegVTYPE.Encoding, X0)>, + Sched<[WriteCSR, ReadCSR]>; + class VTileLoadBase<bits<2> mop, dag ins, string opcodestr, string argstr> : RVInst<(outs VR:$vd), ins, opcodestr, argstr, [], InstFormatR> { diff --git a/llvm/lib/Target/RISCV/RISCVSystemOperands.td b/llvm/lib/Target/RISCV/RISCVSystemOperands.td index 66e3484ff0955..cc0bd0d3d532d 100644 --- a/llvm/lib/Target/RISCV/RISCVSystemOperands.td +++ b/llvm/lib/Target/RISCV/RISCVSystemOperands.td @@ -82,7 +82,7 @@ def : SysReg<"vxsat", 0x009>; def SysRegVXRM : SysReg<"vxrm", 0x00A>; def : SysReg<"vcsr", 0x00F>; def SysRegVL : SysReg<"vl", 0xC20>; -def : SysReg<"vtype", 0xC21>; +def SysRegVTYPE : SysReg<"vtype", 0xC21>; def SysRegVLENB: SysReg<"vlenb", 0xC22>; //===----------------------------------------------------------------------===// diff --git a/llvm/test/CodeGen/RISCV/ime-config-intrinsics-invalid-rv32.ll b/llvm/test/CodeGen/RISCV/ime-config-intrinsics-invalid-rv32.ll new file mode 100644 index 0000000000000..769ae27a8a673 --- /dev/null +++ b/llvm/test/CodeGen/RISCV/ime-config-intrinsics-invalid-rv32.ll @@ -0,0 +1,12 @@ +; REQUIRES: riscv-registered-target +; RUN: not --crash llc -mtriple=riscv32 -mattr=+experimental-zvvmm < %s 2>&1 \ +; RUN: | FileCheck %s --check-prefix=BAD-VALUE + +; BAD-VALUE: invalid argument for llvm.riscv.ime.vsetlambda.nonzero + +define i32 @vsetlambda_invalid_zero() { + %lambda = call i32 @llvm.riscv.ime.vsetlambda.nonzero.i32(i32 0) + ret i32 %lambda +} + +declare i32 @llvm.riscv.ime.vsetlambda.nonzero.i32(i32 immarg) diff --git a/llvm/test/CodeGen/RISCV/ime-config-intrinsics-invalid-rv64.ll b/llvm/test/CodeGen/RISCV/ime-config-intrinsics-invalid-rv64.ll new file mode 100644 index 0000000000000..47bafef1c295b --- /dev/null +++ b/llvm/test/CodeGen/RISCV/ime-config-intrinsics-invalid-rv64.ll @@ -0,0 +1,12 @@ +; REQUIRES: riscv-registered-target +; RUN: not --crash llc -mtriple=riscv64 -mattr=+experimental-zvvmm < %s 2>&1 \ +; RUN: | FileCheck %s --check-prefix=BAD-VALUE + +; BAD-VALUE: invalid argument for llvm.riscv.ime.vsetlambda.nonzero + +define i64 @vsetlambda_invalid_128() { + %lambda = call i64 @llvm.riscv.ime.vsetlambda.nonzero.i64(i64 128) + ret i64 %lambda +} + +declare i64 @llvm.riscv.ime.vsetlambda.nonzero.i64(i64 immarg) diff --git a/llvm/test/CodeGen/RISCV/ime-config-intrinsics-rv32.ll b/llvm/test/CodeGen/RISCV/ime-config-intrinsics-rv32.ll new file mode 100644 index 0000000000000..a81eb70d4b8f0 --- /dev/null +++ b/llvm/test/CodeGen/RISCV/ime-config-intrinsics-rv32.ll @@ -0,0 +1,590 @@ +; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py UTC_ARGS: --version 6 +; REQUIRES: riscv-registered-target +; RUN: llc -mtriple=riscv32 -mattr=+experimental-zvvmm < %s \ +; RUN: | FileCheck %s --check-prefix=RV32 +; RUN: llc -mtriple=riscv32 -mattr=+experimental-zvvmm \ +; RUN: -riscv-v-vector-bits-min=512 -riscv-v-vector-bits-max=512 < %s \ +; RUN: | FileCheck %s --check-prefix=RV32-VLEN512 +; RUN: llc -mtriple=riscv32 -mattr=+experimental-zvvmm \ +; RUN: -riscv-v-vector-bits-min=1024 -riscv-v-vector-bits-max=1024 < %s \ +; RUN: | FileCheck %s --check-prefix=RV32-VLEN1024 + +define i32 @ime_vlen_rv32() { +; RV32-LABEL: ime_vlen_rv32: +; RV32: # %bb.0: +; RV32-NEXT: csrr a0, vlenb +; RV32-NEXT: slli a0, a0, 3 +; RV32-NEXT: ret +; +; RV32-VLEN512-LABEL: ime_vlen_rv32: +; RV32-VLEN512: # %bb.0: +; RV32-VLEN512-NEXT: li a0, 512 +; RV32-VLEN512-NEXT: ret +; +; RV32-VLEN1024-LABEL: ime_vlen_rv32: +; RV32-VLEN1024: # %bb.0: +; RV32-VLEN1024-NEXT: li a0, 1024 +; RV32-VLEN1024-NEXT: ret + %vlen = call i32 @llvm.riscv.ime.vlen.i32() + ret i32 %vlen +} + +define i32 @ime_lambda_rv32() { +; RV32-LABEL: ime_lambda_rv32: +; RV32: # %bb.0: +; RV32-NEXT: addi sp, sp, -16 +; RV32-NEXT: .cfi_def_cfa_offset 16 +; RV32-NEXT: sw ra, 12(sp) # 4-byte Folded Spill +; RV32-NEXT: .cfi_offset ra, -4 +; RV32-NEXT: csrr a0, vlenb +; RV32-NEXT: neg a1, a0 +; RV32-NEXT: and a0, a0, a1 +; RV32-NEXT: lui a1, 30667 +; RV32-NEXT: addi a1, a1, 1329 +; RV32-NEXT: call __mulsi3 +; RV32-NEXT: lui a1, %hi(.LCPI1_0) +; RV32-NEXT: addi a1, a1, %lo(.LCPI1_0) +; RV32-NEXT: srli a0, a0, 27 +; RV32-NEXT: add a0, a1, a0 +; RV32-NEXT: lbu a0, 0(a0) +; RV32-NEXT: sltiu a1, a0, 3 +; RV32-NEXT: addi a0, a0, -3 +; RV32-NEXT: addi a1, a1, -1 +; RV32-NEXT: and a0, a1, a0 +; RV32-NEXT: srli a0, a0, 1 +; RV32-NEXT: li a1, 6 +; RV32-NEXT: bgeu a1, a0, .LBB1_2 +; RV32-NEXT: # %bb.1: +; RV32-NEXT: li a0, 6 +; RV32-NEXT: .LBB1_2: +; RV32-NEXT: li a1, 1 +; RV32-NEXT: sll a0, a1, a0 +; RV32-NEXT: lw ra, 12(sp) # 4-byte Folded Reload +; RV32-NEXT: .cfi_restore ra +; RV32-NEXT: addi sp, sp, 16 +; RV32-NEXT: .cfi_def_cfa_offset 0 +; RV32-NEXT: ret +; +; RV32-VLEN512-LABEL: ime_lambda_rv32: +; RV32-VLEN512: # %bb.0: +; RV32-VLEN512-NEXT: li a0, 2 +; RV32-VLEN512-NEXT: ret +; +; RV32-VLEN1024-LABEL: ime_lambda_rv32: +; RV32-VLEN1024: # %bb.0: +; RV32-VLEN1024-NEXT: li a0, 4 +; RV32-VLEN1024-NEXT: ret + %lambda = call i32 @llvm.riscv.ime.lambda.i32() + ret i32 %lambda +} + +define i32 @readlambda_rv32() { +; RV32-LABEL: readlambda_rv32: +; RV32: # %bb.0: +; RV32-NEXT: csrr a0, vtype +; RV32-NEXT: srli a0, a0, 28 +; RV32-NEXT: andi a0, a0, 7 +; RV32-NEXT: seqz a1, a0 +; RV32-NEXT: addi a0, a0, -1 +; RV32-NEXT: addi a1, a1, -1 +; RV32-NEXT: and a0, a1, a0 +; RV32-NEXT: li a2, 1 +; RV32-NEXT: sll a0, a2, a0 +; RV32-NEXT: and a0, a1, a0 +; RV32-NEXT: ret +; +; RV32-VLEN512-LABEL: readlambda_rv32: +; RV32-VLEN512: # %bb.0: +; RV32-VLEN512-NEXT: csrr a0, vtype +; RV32-VLEN512-NEXT: srli a0, a0, 28 +; RV32-VLEN512-NEXT: andi a0, a0, 7 +; RV32-VLEN512-NEXT: seqz a1, a0 +; RV32-VLEN512-NEXT: addi a0, a0, -1 +; RV32-VLEN512-NEXT: addi a1, a1, -1 +; RV32-VLEN512-NEXT: and a0, a1, a0 +; RV32-VLEN512-NEXT: li a2, 1 +; RV32-VLEN512-NEXT: sll a0, a2, a0 +; RV32-VLEN512-NEXT: and a0, a1, a0 +; RV32-VLEN512-NEXT: ret +; +; RV32-VLEN1024-LABEL: readlambda_rv32: +; RV32-VLEN1024: # %bb.0: +; RV32-VLEN1024-NEXT: csrr a0, vtype +; RV32-VLEN1024-NEXT: srli a0, a0, 28 +; RV32-VLEN1024-NEXT: andi a0, a0, 7 +; RV32-VLEN1024-NEXT: seqz a1, a0 +; RV32-VLEN1024-NEXT: addi a0, a0, -1 +; RV32-VLEN1024-NEXT: addi a1, a1, -1 +; RV32-VLEN1024-NEXT: and a0, a1, a0 +; RV32-VLEN1024-NEXT: li a2, 1 +; RV32-VLEN1024-NEXT: sll a0, a2, a0 +; RV32-VLEN1024-NEXT: and a0, a1, a0 +; RV32-VLEN1024-NEXT: ret + %lambda = call i32 @llvm.riscv.ime.readlambda.i32() + ret i32 %lambda +} + +define i32 @vsetlambda_1_rv32() { +; RV32-LABEL: vsetlambda_1_rv32: +; RV32: # %bb.0: +; RV32-NEXT: lui a0, 589824 +; RV32-NEXT: csrr a1, vtype +; RV32-NEXT: addi a0, a0, -1 +; RV32-NEXT: and a0, a1, a0 +; RV32-NEXT: lui a1, 65536 +; RV32-NEXT: or a0, a0, a1 +; RV32-NEXT: vsetvl zero, zero, a0 +; RV32-NEXT: csrr a0, vtype +; RV32-NEXT: srli a0, a0, 28 +; RV32-NEXT: andi a0, a0, 7 +; RV32-NEXT: seqz a1, a0 +; RV32-NEXT: addi a0, a0, -1 +; RV32-NEXT: addi a1, a1, -1 +; RV32-NEXT: and a0, a1, a0 +; RV32-NEXT: li a2, 1 +; RV32-NEXT: sll a0, a2, a0 +; RV32-NEXT: and a0, a1, a0 +; RV32-NEXT: ret +; +; RV32-VLEN512-LABEL: vsetlambda_1_rv32: +; RV32-VLEN512: # %bb.0: +; RV32-VLEN512-NEXT: lui a0, 589824 +; RV32-VLEN512-NEXT: csrr a1, vtype +; RV32-VLEN512-NEXT: addi a0, a0, -1 +; RV32-VLEN512-NEXT: and a0, a1, a0 +; RV32-VLEN512-NEXT: lui a1, 65536 +; RV32-VLEN512-NEXT: or a0, a0, a1 +; RV32-VLEN512-NEXT: vsetvl zero, zero, a0 +; RV32-VLEN512-NEXT: csrr a0, vtype +; RV32-VLEN512-NEXT: srli a0, a0, 28 +; RV32-VLEN512-NEXT: andi a0, a0, 7 +; RV32-VLEN512-NEXT: seqz a1, a0 +; RV32-VLEN512-NEXT: addi a0, a0, -1 +; RV32-VLEN512-NEXT: addi a1, a1, -1 +; RV32-VLEN512-NEXT: and a0, a1, a0 +; RV32-VLEN512-NEXT: li a2, 1 +; RV32-VLEN512-NEXT: sll a0, a2, a0 +; RV32-VLEN512-NEXT: and a0, a1, a0 +; RV32-VLEN512-NEXT: ret +; +; RV32-VLEN1024-LABEL: vsetlambda_1_rv32: +; RV32-VLEN1024: # %bb.0: +; RV32-VLEN1024-NEXT: lui a0, 589824 +; RV32-VLEN1024-NEXT: csrr a1, vtype +; RV32-VLEN1024-NEXT: addi a0, a0, -1 +; RV32-VLEN1024-NEXT: and a0, a1, a0 +; RV32-VLEN1024-NEXT: lui a1, 65536 +; RV32-VLEN1024-NEXT: or a0, a0, a1 +; RV32-VLEN1024-NEXT: vsetvl zero, zero, a0 +; RV32-VLEN1024-NEXT: csrr a0, vtype +; RV32-VLEN1024-NEXT: srli a0, a0, 28 +; RV32-VLEN1024-NEXT: andi a0, a0, 7 +; RV32-VLEN1024-NEXT: seqz a1, a0 +; RV32-VLEN1024-NEXT: addi a0, a0, -1 +; RV32-VLEN1024-NEXT: addi a1, a1, -1 +; RV32-VLEN1024-NEXT: and a0, a1, a0 +; RV32-VLEN1024-NEXT: li a2, 1 +; RV32-VLEN1024-NEXT: sll a0, a2, a0 +; RV32-VLEN1024-NEXT: and a0, a1, a0 +; RV32-VLEN1024-NEXT: ret + %lambda = call i32 @llvm.riscv.ime.vsetlambda.nonzero.i32(i32 1) + ret i32 %lambda +} + +define i32 @vsetlambda_2_rv32() { +; RV32-LABEL: vsetlambda_2_rv32: +; RV32: # %bb.0: +; RV32-NEXT: lui a0, 589824 +; RV32-NEXT: csrr a1, vtype +; RV32-NEXT: addi a0, a0, -1 +; RV32-NEXT: and a0, a1, a0 +; RV32-NEXT: lui a1, 131072 +; RV32-NEXT: or a0, a0, a1 +; RV32-NEXT: vsetvl zero, zero, a0 +; RV32-NEXT: csrr a0, vtype +; RV32-NEXT: srli a0, a0, 28 +; RV32-NEXT: andi a0, a0, 7 +; RV32-NEXT: seqz a1, a0 +; RV32-NEXT: addi a0, a0, -1 +; RV32-NEXT: addi a1, a1, -1 +; RV32-NEXT: and a0, a1, a0 +; RV32-NEXT: li a2, 1 +; RV32-NEXT: sll a0, a2, a0 +; RV32-NEXT: and a0, a1, a0 +; RV32-NEXT: ret +; +; RV32-VLEN512-LABEL: vsetlambda_2_rv32: +; RV32-VLEN512: # %bb.0: +; RV32-VLEN512-NEXT: lui a0, 589824 +; RV32-VLEN512-NEXT: csrr a1, vtype +; RV32-VLEN512-NEXT: addi a0, a0, -1 +; RV32-VLEN512-NEXT: and a0, a1, a0 +; RV32-VLEN512-NEXT: lui a1, 131072 +; RV32-VLEN512-NEXT: or a0, a0, a1 +; RV32-VLEN512-NEXT: vsetvl zero, zero, a0 +; RV32-VLEN512-NEXT: csrr a0, vtype +; RV32-VLEN512-NEXT: srli a0, a0, 28 +; RV32-VLEN512-NEXT: andi a0, a0, 7 +; RV32-VLEN512-NEXT: seqz a1, a0 +; RV32-VLEN512-NEXT: addi a0, a0, -1 +; RV32-VLEN512-NEXT: addi a1, a1, -1 +; RV32-VLEN512-NEXT: and a0, a1, a0 +; RV32-VLEN512-NEXT: li a2, 1 +; RV32-VLEN512-NEXT: sll a0, a2, a0 +; RV32-VLEN512-NEXT: and a0, a1, a0 +; RV32-VLEN512-NEXT: ret +; +; RV32-VLEN1024-LABEL: vsetlambda_2_rv32: +; RV32-VLEN1024: # %bb.0: +; RV32-VLEN1024-NEXT: lui a0, 589824 +; RV32-VLEN1024-NEXT: csrr a1, vtype +; RV32-VLEN1024-NEXT: addi a0, a0, -1 +; RV32-VLEN1024-NEXT: and a0, a1, a0 +; RV32-VLEN1024-NEXT: lui a1, 131072 +; RV32-VLEN1024-NEXT: or a0, a0, a1 +; RV32-VLEN1024-NEXT: vsetvl zero, zero, a0 +; RV32-VLEN1024-NEXT: csrr a0, vtype +; RV32-VLEN1024-NEXT: srli a0, a0, 28 +; RV32-VLEN1024-NEXT: andi a0, a0, 7 +; RV32-VLEN1024-NEXT: seqz a1, a0 +; RV32-VLEN1024-NEXT: addi a0, a0, -1 +; RV32-VLEN1024-NEXT: addi a1, a1, -1 +; RV32-VLEN1024-NEXT: and a0, a1, a0 +; RV32-VLEN1024-NEXT: li a2, 1 +; RV32-VLEN1024-NEXT: sll a0, a2, a0 +; RV32-VLEN1024-NEXT: and a0, a1, a0 +; RV32-VLEN1024-NEXT: ret + %lambda = call i32 @llvm.riscv.ime.vsetlambda.nonzero.i32(i32 2) + ret i32 %lambda +} + +define i32 @vsetlambda_4_rv32() { +; RV32-LABEL: vsetlambda_4_rv32: +; RV32: # %bb.0: +; RV32-NEXT: lui a0, 589824 +; RV32-NEXT: csrr a1, vtype +; RV32-NEXT: addi a0, a0, -1 +; RV32-NEXT: and a0, a1, a0 +; RV32-NEXT: lui a1, 196608 +; RV32-NEXT: or a0, a0, a1 +; RV32-NEXT: vsetvl zero, zero, a0 +; RV32-NEXT: csrr a0, vtype +; RV32-NEXT: srli a0, a0, 28 +; RV32-NEXT: andi a0, a0, 7 +; RV32-NEXT: seqz a1, a0 +; RV32-NEXT: addi a0, a0, -1 +; RV32-NEXT: addi a1, a1, -1 +; RV32-NEXT: and a0, a1, a0 +; RV32-NEXT: li a2, 1 +; RV32-NEXT: sll a0, a2, a0 +; RV32-NEXT: and a0, a1, a0 +; RV32-NEXT: ret +; +; RV32-VLEN512-LABEL: vsetlambda_4_rv32: +; RV32-VLEN512: # %bb.0: +; RV32-VLEN512-NEXT: lui a0, 589824 +; RV32-VLEN512-NEXT: csrr a1, vtype +; RV32-VLEN512-NEXT: addi a0, a0, -1 +; RV32-VLEN512-NEXT: and a0, a1, a0 +; RV32-VLEN512-NEXT: lui a1, 196608 +; RV32-VLEN512-NEXT: or a0, a0, a1 +; RV32-VLEN512-NEXT: vsetvl zero, zero, a0 +; RV32-VLEN512-NEXT: csrr a0, vtype +; RV32-VLEN512-NEXT: srli a0, a0, 28 +; RV32-VLEN512-NEXT: andi a0, a0, 7 +; RV32-VLEN512-NEXT: seqz a1, a0 +; RV32-VLEN512-NEXT: addi a0, a0, -1 +; RV32-VLEN512-NEXT: addi a1, a1, -1 +; RV32-VLEN512-NEXT: and a0, a1, a0 +; RV32-VLEN512-NEXT: li a2, 1 +; RV32-VLEN512-NEXT: sll a0, a2, a0 +; RV32-VLEN512-NEXT: and a0, a1, a0 +; RV32-VLEN512-NEXT: ret +; +; RV32-VLEN1024-LABEL: vsetlambda_4_rv32: +; RV32-VLEN1024: # %bb.0: +; RV32-VLEN1024-NEXT: lui a0, 589824 +; RV32-VLEN1024-NEXT: csrr a1, vtype +; RV32-VLEN1024-NEXT: addi a0, a0, -1 +; RV32-VLEN1024-NEXT: and a0, a1, a0 +; RV32-VLEN1024-NEXT: lui a1, 196608 +; RV32-VLEN1024-NEXT: or a0, a0, a1 +; RV32-VLEN1024-NEXT: vsetvl zero, zero, a0 +; RV32-VLEN1024-NEXT: csrr a0, vtype +; RV32-VLEN1024-NEXT: srli a0, a0, 28 +; RV32-VLEN1024-NEXT: andi a0, a0, 7 +; RV32-VLEN1024-NEXT: seqz a1, a0 +; RV32-VLEN1024-NEXT: addi a0, a0, -1 +; RV32-VLEN1024-NEXT: addi a1, a1, -1 +; RV32-VLEN1024-NEXT: and a0, a1, a0 +; RV32-VLEN1024-NEXT: li a2, 1 +; RV32-VLEN1024-NEXT: sll a0, a2, a0 +; RV32-VLEN1024-NEXT: and a0, a1, a0 +; RV32-VLEN1024-NEXT: ret + %lambda = call i32 @llvm.riscv.ime.vsetlambda.nonzero.i32(i32 4) + ret i32 %lambda +} + +define i32 @vsetlambda_8_rv32() { +; RV32-LABEL: vsetlambda_8_rv32: +; RV32: # %bb.0: +; RV32-NEXT: lui a0, 589824 +; RV32-NEXT: csrr a1, vtype +; RV32-NEXT: addi a0, a0, -1 +; RV32-NEXT: and a0, a1, a0 +; RV32-NEXT: lui a1, 262144 +; RV32-NEXT: or a0, a0, a1 +; RV32-NEXT: vsetvl zero, zero, a0 +; RV32-NEXT: csrr a0, vtype +; RV32-NEXT: srli a0, a0, 28 +; RV32-NEXT: andi a0, a0, 7 +; RV32-NEXT: seqz a1, a0 +; RV32-NEXT: addi a0, a0, -1 +; RV32-NEXT: addi a1, a1, -1 +; RV32-NEXT: and a0, a1, a0 +; RV32-NEXT: li a2, 1 +; RV32-NEXT: sll a0, a2, a0 +; RV32-NEXT: and a0, a1, a0 +; RV32-NEXT: ret +; +; RV32-VLEN512-LABEL: vsetlambda_8_rv32: +; RV32-VLEN512: # %bb.0: +; RV32-VLEN512-NEXT: lui a0, 589824 +; RV32-VLEN512-NEXT: csrr a1, vtype +; RV32-VLEN512-NEXT: addi a0, a0, -1 +; RV32-VLEN512-NEXT: and a0, a1, a0 +; RV32-VLEN512-NEXT: lui a1, 262144 +; RV32-VLEN512-NEXT: or a0, a0, a1 +; RV32-VLEN512-NEXT: vsetvl zero, zero, a0 +; RV32-VLEN512-NEXT: csrr a0, vtype +; RV32-VLEN512-NEXT: srli a0, a0, 28 +; RV32-VLEN512-NEXT: andi a0, a0, 7 +; RV32-VLEN512-NEXT: seqz a1, a0 +; RV32-VLEN512-NEXT: addi a0, a0, -1 +; RV32-VLEN512-NEXT: addi a1, a1, -1 +; RV32-VLEN512-NEXT: and a0, a1, a0 +; RV32-VLEN512-NEXT: li a2, 1 +; RV32-VLEN512-NEXT: sll a0, a2, a0 +; RV32-VLEN512-NEXT: and a0, a1, a0 +; RV32-VLEN512-NEXT: ret +; +; RV32-VLEN1024-LABEL: vsetlambda_8_rv32: +; RV32-VLEN1024: # %bb.0: +; RV32-VLEN1024-NEXT: lui a0, 589824 +; RV32-VLEN1024-NEXT: csrr a1, vtype +; RV32-VLEN1024-NEXT: addi a0, a0, -1 +; RV32-VLEN1024-NEXT: and a0, a1, a0 +; RV32-VLEN1024-NEXT: lui a1, 262144 +; RV32-VLEN1024-NEXT: or a0, a0, a1 +; RV32-VLEN1024-NEXT: vsetvl zero, zero, a0 +; RV32-VLEN1024-NEXT: csrr a0, vtype +; RV32-VLEN1024-NEXT: srli a0, a0, 28 +; RV32-VLEN1024-NEXT: andi a0, a0, 7 +; RV32-VLEN1024-NEXT: seqz a1, a0 +; RV32-VLEN1024-NEXT: addi a0, a0, -1 +; RV32-VLEN1024-NEXT: addi a1, a1, -1 +; RV32-VLEN1024-NEXT: and a0, a1, a0 +; RV32-VLEN1024-NEXT: li a2, 1 +; RV32-VLEN1024-NEXT: sll a0, a2, a0 +; RV32-VLEN1024-NEXT: and a0, a1, a0 +; RV32-VLEN1024-NEXT: ret + %lambda = call i32 @llvm.riscv.ime.vsetlambda.nonzero.i32(i32 8) + ret i32 %lambda +} + +define i32 @vsetlambda_16_rv32() { +; RV32-LABEL: vsetlambda_16_rv32: +; RV32: # %bb.0: +; RV32-NEXT: lui a0, 589824 +; RV32-NEXT: csrr a1, vtype +; RV32-NEXT: addi a0, a0, -1 +; RV32-NEXT: and a0, a1, a0 +; RV32-NEXT: lui a1, 327680 +; RV32-NEXT: or a0, a0, a1 +; RV32-NEXT: vsetvl zero, zero, a0 +; RV32-NEXT: csrr a0, vtype +; RV32-NEXT: srli a0, a0, 28 +; RV32-NEXT: andi a0, a0, 7 +; RV32-NEXT: seqz a1, a0 +; RV32-NEXT: addi a0, a0, -1 +; RV32-NEXT: addi a1, a1, -1 +; RV32-NEXT: and a0, a1, a0 +; RV32-NEXT: li a2, 1 +; RV32-NEXT: sll a0, a2, a0 +; RV32-NEXT: and a0, a1, a0 +; RV32-NEXT: ret +; +; RV32-VLEN512-LABEL: vsetlambda_16_rv32: +; RV32-VLEN512: # %bb.0: +; RV32-VLEN512-NEXT: lui a0, 589824 +; RV32-VLEN512-NEXT: csrr a1, vtype +; RV32-VLEN512-NEXT: addi a0, a0, -1 +; RV32-VLEN512-NEXT: and a0, a1, a0 +; RV32-VLEN512-NEXT: lui a1, 327680 +; RV32-VLEN512-NEXT: or a0, a0, a1 +; RV32-VLEN512-NEXT: vsetvl zero, zero, a0 +; RV32-VLEN512-NEXT: csrr a0, vtype +; RV32-VLEN512-NEXT: srli a0, a0, 28 +; RV32-VLEN512-NEXT: andi a0, a0, 7 +; RV32-VLEN512-NEXT: seqz a1, a0 +; RV32-VLEN512-NEXT: addi a0, a0, -1 +; RV32-VLEN512-NEXT: addi a1, a1, -1 +; RV32-VLEN512-NEXT: and a0, a1, a0 +; RV32-VLEN512-NEXT: li a2, 1 +; RV32-VLEN512-NEXT: sll a0, a2, a0 +; RV32-VLEN512-NEXT: and a0, a1, a0 +; RV32-VLEN512-NEXT: ret +; +; RV32-VLEN1024-LABEL: vsetlambda_16_rv32: +; RV32-VLEN1024: # %bb.0: +; RV32-VLEN1024-NEXT: lui a0, 589824 +; RV32-VLEN1024-NEXT: csrr a1, vtype +; RV32-VLEN1024-NEXT: addi a0, a0, -1 +; RV32-VLEN1024-NEXT: and a0, a1, a0 +; RV32-VLEN1024-NEXT: lui a1, 327680 +; RV32-VLEN1024-NEXT: or a0, a0, a1 +; RV32-VLEN1024-NEXT: vsetvl zero, zero, a0 +; RV32-VLEN1024-NEXT: csrr a0, vtype +; RV32-VLEN1024-NEXT: srli a0, a0, 28 +; RV32-VLEN1024-NEXT: andi a0, a0, 7 +; RV32-VLEN1024-NEXT: seqz a1, a0 +; RV32-VLEN1024-NEXT: addi a0, a0, -1 +; RV32-VLEN1024-NEXT: addi a1, a1, -1 +; RV32-VLEN1024-NEXT: and a0, a1, a0 +; RV32-VLEN1024-NEXT: li a2, 1 +; RV32-VLEN1024-NEXT: sll a0, a2, a0 +; RV32-VLEN1024-NEXT: and a0, a1, a0 +; RV32-VLEN1024-NEXT: ret + %lambda = call i32 @llvm.riscv.ime.vsetlambda.nonzero.i32(i32 16) + ret i32 %lambda +} + +define i32 @vsetlambda_32_rv32() { +; RV32-LABEL: vsetlambda_32_rv32: +; RV32: # %bb.0: +; RV32-NEXT: lui a0, 589824 +; RV32-NEXT: csrr a1, vtype +; RV32-NEXT: addi a0, a0, -1 +; RV32-NEXT: and a0, a1, a0 +; RV32-NEXT: lui a1, 393216 +; RV32-NEXT: or a0, a0, a1 +; RV32-NEXT: vsetvl zero, zero, a0 +; RV32-NEXT: csrr a0, vtype +; RV32-NEXT: srli a0, a0, 28 +; RV32-NEXT: andi a0, a0, 7 +; RV32-NEXT: seqz a1, a0 +; RV32-NEXT: addi a0, a0, -1 +; RV32-NEXT: addi a1, a1, -1 +; RV32-NEXT: and a0, a1, a0 +; RV32-NEXT: li a2, 1 +; RV32-NEXT: sll a0, a2, a0 +; RV32-NEXT: and a0, a1, a0 +; RV32-NEXT: ret +; +; RV32-VLEN512-LABEL: vsetlambda_32_rv32: +; RV32-VLEN512: # %bb.0: +; RV32-VLEN512-NEXT: lui a0, 589824 +; RV32-VLEN512-NEXT: csrr a1, vtype +; RV32-VLEN512-NEXT: addi a0, a0, -1 +; RV32-VLEN512-NEXT: and a0, a1, a0 +; RV32-VLEN512-NEXT: lui a1, 393216 +; RV32-VLEN512-NEXT: or a0, a0, a1 +; RV32-VLEN512-NEXT: vsetvl zero, zero, a0 +; RV32-VLEN512-NEXT: csrr a0, vtype +; RV32-VLEN512-NEXT: srli a0, a0, 28 +; RV32-VLEN512-NEXT: andi a0, a0, 7 +; RV32-VLEN512-NEXT: seqz a1, a0 +; RV32-VLEN512-NEXT: addi a0, a0, -1 +; RV32-VLEN512-NEXT: addi a1, a1, -1 +; RV32-VLEN512-NEXT: and a0, a1, a0 +; RV32-VLEN512-NEXT: li a2, 1 +; RV32-VLEN512-NEXT: sll a0, a2, a0 +; RV32-VLEN512-NEXT: and a0, a1, a0 +; RV32-VLEN512-NEXT: ret +; +; RV32-VLEN1024-LABEL: vsetlambda_32_rv32: +; RV32-VLEN1024: # %bb.0: +; RV32-VLEN1024-NEXT: lui a0, 589824 +; RV32-VLEN1024-NEXT: csrr a1, vtype +; RV32-VLEN1024-NEXT: addi a0, a0, -1 +; RV32-VLEN1024-NEXT: and a0, a1, a0 +; RV32-VLEN1024-NEXT: lui a1, 393216 +; RV32-VLEN1024-NEXT: or a0, a0, a1 +; RV32-VLEN1024-NEXT: vsetvl zero, zero, a0 +; RV32-VLEN1024-NEXT: csrr a0, vtype +; RV32-VLEN1024-NEXT: srli a0, a0, 28 +; RV32-VLEN1024-NEXT: andi a0, a0, 7 +; RV32-VLEN1024-NEXT: seqz a1, a0 +; RV32-VLEN1024-NEXT: addi a0, a0, -1 +; RV32-VLEN1024-NEXT: addi a1, a1, -1 +; RV32-VLEN1024-NEXT: and a0, a1, a0 +; RV32-VLEN1024-NEXT: li a2, 1 +; RV32-VLEN1024-NEXT: sll a0, a2, a0 +; RV32-VLEN1024-NEXT: and a0, a1, a0 +; RV32-VLEN1024-NEXT: ret + %lambda = call i32 @llvm.riscv.ime.vsetlambda.nonzero.i32(i32 32) + ret i32 %lambda +} + +define i32 @vsetlambda_64_rv32() { +; RV32-LABEL: vsetlambda_64_rv32: +; RV32: # %bb.0: +; RV32-NEXT: csrr a0, vtype +; RV32-NEXT: lui a1, 458752 +; RV32-NEXT: or a0, a0, a1 +; RV32-NEXT: vsetvl zero, zero, a0 +; RV32-NEXT: csrr a0, vtype +; RV32-NEXT: srli a0, a0, 28 +; RV32-NEXT: andi a0, a0, 7 +; RV32-NEXT: seqz a1, a0 +; RV32-NEXT: addi a0, a0, -1 +; RV32-NEXT: addi a1, a1, -1 +; RV32-NEXT: and a0, a1, a0 +; RV32-NEXT: li a2, 1 +; RV32-NEXT: sll a0, a2, a0 +; RV32-NEXT: and a0, a1, a0 +; RV32-NEXT: ret +; +; RV32-VLEN512-LABEL: vsetlambda_64_rv32: +; RV32-VLEN512: # %bb.0: +; RV32-VLEN512-NEXT: csrr a0, vtype +; RV32-VLEN512-NEXT: lui a1, 458752 +; RV32-VLEN512-NEXT: or a0, a0, a1 +; RV32-VLEN512-NEXT: vsetvl zero, zero, a0 +; RV32-VLEN512-NEXT: csrr a0, vtype +; RV32-VLEN512-NEXT: srli a0, a0, 28 +; RV32-VLEN512-NEXT: andi a0, a0, 7 +; RV32-VLEN512-NEXT: seqz a1, a0 +; RV32-VLEN512-NEXT: addi a0, a0, -1 +; RV32-VLEN512-NEXT: addi a1, a1, -1 +; RV32-VLEN512-NEXT: and a0, a1, a0 +; RV32-VLEN512-NEXT: li a2, 1 +; RV32-VLEN512-NEXT: sll a0, a2, a0 +; RV32-VLEN512-NEXT: and a0, a1, a0 +; RV32-VLEN512-NEXT: ret +; +; RV32-VLEN1024-LABEL: vsetlambda_64_rv32: +; RV32-VLEN1024: # %bb.0: +; RV32-VLEN1024-NEXT: csrr a0, vtype +; RV32-VLEN1024-NEXT: lui a1, 458752 +; RV32-VLEN1024-NEXT: or a0, a0, a1 +; RV32-VLEN1024-NEXT: vsetvl zero, zero, a0 +; RV32-VLEN1024-NEXT: csrr a0, vtype +; RV32-VLEN1024-NEXT: srli a0, a0, 28 +; RV32-VLEN1024-NEXT: andi a0, a0, 7 +; RV32-VLEN1024-NEXT: seqz a1, a0 +; RV32-VLEN1024-NEXT: addi a0, a0, -1 +; RV32-VLEN1024-NEXT: addi a1, a1, -1 +; RV32-VLEN1024-NEXT: and a0, a1, a0 +; RV32-VLEN1024-NEXT: li a2, 1 +; RV32-VLEN1024-NEXT: sll a0, a2, a0 +; RV32-VLEN1024-NEXT: and a0, a1, a0 +; RV32-VLEN1024-NEXT: ret + %lambda = call i32 @llvm.riscv.ime.vsetlambda.nonzero.i32(i32 64) + ret i32 %lambda +} + +declare i32 @llvm.riscv.ime.vlen.i32() +declare i32 @llvm.riscv.ime.lambda.i32() +declare i32 @llvm.riscv.ime.readlambda.i32() +declare i32 @llvm.riscv.ime.vsetlambda.nonzero.i32(i32) diff --git a/llvm/test/CodeGen/RISCV/ime-config-intrinsics-rv64.ll b/llvm/test/CodeGen/RISCV/ime-config-intrinsics-rv64.ll new file mode 100644 index 0000000000000..52b9cc6abbde4 --- /dev/null +++ b/llvm/test/CodeGen/RISCV/ime-config-intrinsics-rv64.ll @@ -0,0 +1,620 @@ +; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py UTC_ARGS: --version 6 +; REQUIRES: riscv-registered-target +; RUN: llc -mtriple=riscv64 -mattr=+experimental-zvvmm < %s \ +; RUN: | FileCheck %s --check-prefix=RV64 +; RUN: llc -mtriple=riscv64 -mattr=+experimental-zvvmm \ +; RUN: -riscv-v-vector-bits-min=512 -riscv-v-vector-bits-max=512 < %s \ +; RUN: | FileCheck %s --check-prefix=RV64-VLEN512 +; RUN: llc -mtriple=riscv64 -mattr=+experimental-zvvmm \ +; RUN: -riscv-v-vector-bits-min=1024 -riscv-v-vector-bits-max=1024 < %s \ +; RUN: | FileCheck %s --check-prefix=RV64-VLEN1024 + +define i64 @ime_vlen_rv64() { +; RV64-LABEL: ime_vlen_rv64: +; RV64: # %bb.0: +; RV64-NEXT: csrr a0, vlenb +; RV64-NEXT: slli a0, a0, 3 +; RV64-NEXT: ret +; +; RV64-VLEN512-LABEL: ime_vlen_rv64: +; RV64-VLEN512: # %bb.0: +; RV64-VLEN512-NEXT: li a0, 512 +; RV64-VLEN512-NEXT: ret +; +; RV64-VLEN1024-LABEL: ime_vlen_rv64: +; RV64-VLEN1024: # %bb.0: +; RV64-VLEN1024-NEXT: li a0, 1024 +; RV64-VLEN1024-NEXT: ret + %vlen = call i64 @llvm.riscv.ime.vlen.i64() + ret i64 %vlen +} + +define i64 @ime_lambda_rv64() { +; RV64-LABEL: ime_lambda_rv64: +; RV64: # %bb.0: +; RV64-NEXT: addi sp, sp, -16 +; RV64-NEXT: .cfi_def_cfa_offset 16 +; RV64-NEXT: sd ra, 8(sp) # 8-byte Folded Spill +; RV64-NEXT: .cfi_offset ra, -8 +; RV64-NEXT: csrr a0, vlenb +; RV64-NEXT: neg a1, a0 +; RV64-NEXT: and a0, a0, a1 +; RV64-NEXT: lui a1, %hi(.LCPI1_0) +; RV64-NEXT: ld a1, %lo(.LCPI1_0)(a1) +; RV64-NEXT: call __muldi3 +; RV64-NEXT: lui a1, %hi(.LCPI1_1) +; RV64-NEXT: addi a1, a1, %lo(.LCPI1_1) +; RV64-NEXT: srli a0, a0, 58 +; RV64-NEXT: add a0, a1, a0 +; RV64-NEXT: lbu a0, 0(a0) +; RV64-NEXT: sltiu a1, a0, 3 +; RV64-NEXT: addi a0, a0, -3 +; RV64-NEXT: addi a1, a1, -1 +; RV64-NEXT: and a0, a1, a0 +; RV64-NEXT: srli a0, a0, 1 +; RV64-NEXT: li a1, 6 +; RV64-NEXT: bgeu a1, a0, .LBB1_2 +; RV64-NEXT: # %bb.1: +; RV64-NEXT: li a0, 6 +; RV64-NEXT: .LBB1_2: +; RV64-NEXT: li a1, 1 +; RV64-NEXT: sll a0, a1, a0 +; RV64-NEXT: ld ra, 8(sp) # 8-byte Folded Reload +; RV64-NEXT: .cfi_restore ra +; RV64-NEXT: addi sp, sp, 16 +; RV64-NEXT: .cfi_def_cfa_offset 0 +; RV64-NEXT: ret +; +; RV64-VLEN512-LABEL: ime_lambda_rv64: +; RV64-VLEN512: # %bb.0: +; RV64-VLEN512-NEXT: li a0, 2 +; RV64-VLEN512-NEXT: ret +; +; RV64-VLEN1024-LABEL: ime_lambda_rv64: +; RV64-VLEN1024: # %bb.0: +; RV64-VLEN1024-NEXT: li a0, 4 +; RV64-VLEN1024-NEXT: ret + %lambda = call i64 @llvm.riscv.ime.lambda.i64() + ret i64 %lambda +} + +define i64 @readlambda_rv64() { +; RV64-LABEL: readlambda_rv64: +; RV64: # %bb.0: +; RV64-NEXT: csrr a0, vtype +; RV64-NEXT: srli a0, a0, 60 +; RV64-NEXT: andi a0, a0, 7 +; RV64-NEXT: seqz a1, a0 +; RV64-NEXT: addi a0, a0, -1 +; RV64-NEXT: addi a1, a1, -1 +; RV64-NEXT: and a0, a1, a0 +; RV64-NEXT: li a2, 1 +; RV64-NEXT: sll a0, a2, a0 +; RV64-NEXT: and a0, a1, a0 +; RV64-NEXT: ret +; +; RV64-VLEN512-LABEL: readlambda_rv64: +; RV64-VLEN512: # %bb.0: +; RV64-VLEN512-NEXT: csrr a0, vtype +; RV64-VLEN512-NEXT: srli a0, a0, 60 +; RV64-VLEN512-NEXT: andi a0, a0, 7 +; RV64-VLEN512-NEXT: seqz a1, a0 +; RV64-VLEN512-NEXT: addi a0, a0, -1 +; RV64-VLEN512-NEXT: addi a1, a1, -1 +; RV64-VLEN512-NEXT: and a0, a1, a0 +; RV64-VLEN512-NEXT: li a2, 1 +; RV64-VLEN512-NEXT: sll a0, a2, a0 +; RV64-VLEN512-NEXT: and a0, a1, a0 +; RV64-VLEN512-NEXT: ret +; +; RV64-VLEN1024-LABEL: readlambda_rv64: +; RV64-VLEN1024: # %bb.0: +; RV64-VLEN1024-NEXT: csrr a0, vtype +; RV64-VLEN1024-NEXT: srli a0, a0, 60 +; RV64-VLEN1024-NEXT: andi a0, a0, 7 +; RV64-VLEN1024-NEXT: seqz a1, a0 +; RV64-VLEN1024-NEXT: addi a0, a0, -1 +; RV64-VLEN1024-NEXT: addi a1, a1, -1 +; RV64-VLEN1024-NEXT: and a0, a1, a0 +; RV64-VLEN1024-NEXT: li a2, 1 +; RV64-VLEN1024-NEXT: sll a0, a2, a0 +; RV64-VLEN1024-NEXT: and a0, a1, a0 +; RV64-VLEN1024-NEXT: ret + %lambda = call i64 @llvm.riscv.ime.readlambda.i64() + ret i64 %lambda +} + +define i64 @vsetlambda_1_rv64() { +; RV64-LABEL: vsetlambda_1_rv64: +; RV64: # %bb.0: +; RV64-NEXT: li a0, -7 +; RV64-NEXT: csrr a1, vtype +; RV64-NEXT: slli a0, a0, 60 +; RV64-NEXT: addi a0, a0, -1 +; RV64-NEXT: li a2, 1 +; RV64-NEXT: and a0, a1, a0 +; RV64-NEXT: slli a1, a2, 60 +; RV64-NEXT: or a0, a0, a1 +; RV64-NEXT: vsetvl zero, zero, a0 +; RV64-NEXT: csrr a0, vtype +; RV64-NEXT: srli a0, a0, 60 +; RV64-NEXT: andi a0, a0, 7 +; RV64-NEXT: seqz a1, a0 +; RV64-NEXT: addi a0, a0, -1 +; RV64-NEXT: addi a1, a1, -1 +; RV64-NEXT: and a0, a1, a0 +; RV64-NEXT: sll a0, a2, a0 +; RV64-NEXT: and a0, a1, a0 +; RV64-NEXT: ret +; +; RV64-VLEN512-LABEL: vsetlambda_1_rv64: +; RV64-VLEN512: # %bb.0: +; RV64-VLEN512-NEXT: li a0, -7 +; RV64-VLEN512-NEXT: csrr a1, vtype +; RV64-VLEN512-NEXT: slli a0, a0, 60 +; RV64-VLEN512-NEXT: addi a0, a0, -1 +; RV64-VLEN512-NEXT: li a2, 1 +; RV64-VLEN512-NEXT: and a0, a1, a0 +; RV64-VLEN512-NEXT: slli a1, a2, 60 +; RV64-VLEN512-NEXT: or a0, a0, a1 +; RV64-VLEN512-NEXT: vsetvl zero, zero, a0 +; RV64-VLEN512-NEXT: csrr a0, vtype +; RV64-VLEN512-NEXT: srli a0, a0, 60 +; RV64-VLEN512-NEXT: andi a0, a0, 7 +; RV64-VLEN512-NEXT: seqz a1, a0 +; RV64-VLEN512-NEXT: addi a0, a0, -1 +; RV64-VLEN512-NEXT: addi a1, a1, -1 +; RV64-VLEN512-NEXT: and a0, a1, a0 +; RV64-VLEN512-NEXT: sll a0, a2, a0 +; RV64-VLEN512-NEXT: and a0, a1, a0 +; RV64-VLEN512-NEXT: ret +; +; RV64-VLEN1024-LABEL: vsetlambda_1_rv64: +; RV64-VLEN1024: # %bb.0: +; RV64-VLEN1024-NEXT: li a0, -7 +; RV64-VLEN1024-NEXT: csrr a1, vtype +; RV64-VLEN1024-NEXT: slli a0, a0, 60 +; RV64-VLEN1024-NEXT: addi a0, a0, -1 +; RV64-VLEN1024-NEXT: li a2, 1 +; RV64-VLEN1024-NEXT: and a0, a1, a0 +; RV64-VLEN1024-NEXT: slli a1, a2, 60 +; RV64-VLEN1024-NEXT: or a0, a0, a1 +; RV64-VLEN1024-NEXT: vsetvl zero, zero, a0 +; RV64-VLEN1024-NEXT: csrr a0, vtype +; RV64-VLEN1024-NEXT: srli a0, a0, 60 +; RV64-VLEN1024-NEXT: andi a0, a0, 7 +; RV64-VLEN1024-NEXT: seqz a1, a0 +; RV64-VLEN1024-NEXT: addi a0, a0, -1 +; RV64-VLEN1024-NEXT: addi a1, a1, -1 +; RV64-VLEN1024-NEXT: and a0, a1, a0 +; RV64-VLEN1024-NEXT: sll a0, a2, a0 +; RV64-VLEN1024-NEXT: and a0, a1, a0 +; RV64-VLEN1024-NEXT: ret + %lambda = call i64 @llvm.riscv.ime.vsetlambda.nonzero.i64(i64 1) + ret i64 %lambda +} + +define i64 @vsetlambda_2_rv64() { +; RV64-LABEL: vsetlambda_2_rv64: +; RV64: # %bb.0: +; RV64-NEXT: li a0, -7 +; RV64-NEXT: csrr a1, vtype +; RV64-NEXT: slli a0, a0, 60 +; RV64-NEXT: addi a0, a0, -1 +; RV64-NEXT: li a2, 1 +; RV64-NEXT: and a0, a1, a0 +; RV64-NEXT: slli a1, a2, 61 +; RV64-NEXT: or a0, a0, a1 +; RV64-NEXT: vsetvl zero, zero, a0 +; RV64-NEXT: csrr a0, vtype +; RV64-NEXT: srli a0, a0, 60 +; RV64-NEXT: andi a0, a0, 7 +; RV64-NEXT: seqz a1, a0 +; RV64-NEXT: addi a0, a0, -1 +; RV64-NEXT: addi a1, a1, -1 +; RV64-NEXT: and a0, a1, a0 +; RV64-NEXT: sll a0, a2, a0 +; RV64-NEXT: and a0, a1, a0 +; RV64-NEXT: ret +; +; RV64-VLEN512-LABEL: vsetlambda_2_rv64: +; RV64-VLEN512: # %bb.0: +; RV64-VLEN512-NEXT: li a0, -7 +; RV64-VLEN512-NEXT: csrr a1, vtype +; RV64-VLEN512-NEXT: slli a0, a0, 60 +; RV64-VLEN512-NEXT: addi a0, a0, -1 +; RV64-VLEN512-NEXT: li a2, 1 +; RV64-VLEN512-NEXT: and a0, a1, a0 +; RV64-VLEN512-NEXT: slli a1, a2, 61 +; RV64-VLEN512-NEXT: or a0, a0, a1 +; RV64-VLEN512-NEXT: vsetvl zero, zero, a0 +; RV64-VLEN512-NEXT: csrr a0, vtype +; RV64-VLEN512-NEXT: srli a0, a0, 60 +; RV64-VLEN512-NEXT: andi a0, a0, 7 +; RV64-VLEN512-NEXT: seqz a1, a0 +; RV64-VLEN512-NEXT: addi a0, a0, -1 +; RV64-VLEN512-NEXT: addi a1, a1, -1 +; RV64-VLEN512-NEXT: and a0, a1, a0 +; RV64-VLEN512-NEXT: sll a0, a2, a0 +; RV64-VLEN512-NEXT: and a0, a1, a0 +; RV64-VLEN512-NEXT: ret +; +; RV64-VLEN1024-LABEL: vsetlambda_2_rv64: +; RV64-VLEN1024: # %bb.0: +; RV64-VLEN1024-NEXT: li a0, -7 +; RV64-VLEN1024-NEXT: csrr a1, vtype +; RV64-VLEN1024-NEXT: slli a0, a0, 60 +; RV64-VLEN1024-NEXT: addi a0, a0, -1 +; RV64-VLEN1024-NEXT: li a2, 1 +; RV64-VLEN1024-NEXT: and a0, a1, a0 +; RV64-VLEN1024-NEXT: slli a1, a2, 61 +; RV64-VLEN1024-NEXT: or a0, a0, a1 +; RV64-VLEN1024-NEXT: vsetvl zero, zero, a0 +; RV64-VLEN1024-NEXT: csrr a0, vtype +; RV64-VLEN1024-NEXT: srli a0, a0, 60 +; RV64-VLEN1024-NEXT: andi a0, a0, 7 +; RV64-VLEN1024-NEXT: seqz a1, a0 +; RV64-VLEN1024-NEXT: addi a0, a0, -1 +; RV64-VLEN1024-NEXT: addi a1, a1, -1 +; RV64-VLEN1024-NEXT: and a0, a1, a0 +; RV64-VLEN1024-NEXT: sll a0, a2, a0 +; RV64-VLEN1024-NEXT: and a0, a1, a0 +; RV64-VLEN1024-NEXT: ret + %lambda = call i64 @llvm.riscv.ime.vsetlambda.nonzero.i64(i64 2) + ret i64 %lambda +} + +define i64 @vsetlambda_4_rv64() { +; RV64-LABEL: vsetlambda_4_rv64: +; RV64: # %bb.0: +; RV64-NEXT: li a0, -7 +; RV64-NEXT: csrr a1, vtype +; RV64-NEXT: slli a0, a0, 60 +; RV64-NEXT: addi a0, a0, -1 +; RV64-NEXT: li a2, 3 +; RV64-NEXT: and a0, a1, a0 +; RV64-NEXT: slli a2, a2, 60 +; RV64-NEXT: or a0, a0, a2 +; RV64-NEXT: vsetvl zero, zero, a0 +; RV64-NEXT: csrr a0, vtype +; RV64-NEXT: srli a0, a0, 60 +; RV64-NEXT: andi a0, a0, 7 +; RV64-NEXT: seqz a1, a0 +; RV64-NEXT: addi a0, a0, -1 +; RV64-NEXT: addi a1, a1, -1 +; RV64-NEXT: and a0, a1, a0 +; RV64-NEXT: li a2, 1 +; RV64-NEXT: sll a0, a2, a0 +; RV64-NEXT: and a0, a1, a0 +; RV64-NEXT: ret +; +; RV64-VLEN512-LABEL: vsetlambda_4_rv64: +; RV64-VLEN512: # %bb.0: +; RV64-VLEN512-NEXT: li a0, -7 +; RV64-VLEN512-NEXT: csrr a1, vtype +; RV64-VLEN512-NEXT: slli a0, a0, 60 +; RV64-VLEN512-NEXT: addi a0, a0, -1 +; RV64-VLEN512-NEXT: li a2, 3 +; RV64-VLEN512-NEXT: and a0, a1, a0 +; RV64-VLEN512-NEXT: slli a2, a2, 60 +; RV64-VLEN512-NEXT: or a0, a0, a2 +; RV64-VLEN512-NEXT: vsetvl zero, zero, a0 +; RV64-VLEN512-NEXT: csrr a0, vtype +; RV64-VLEN512-NEXT: srli a0, a0, 60 +; RV64-VLEN512-NEXT: andi a0, a0, 7 +; RV64-VLEN512-NEXT: seqz a1, a0 +; RV64-VLEN512-NEXT: addi a0, a0, -1 +; RV64-VLEN512-NEXT: addi a1, a1, -1 +; RV64-VLEN512-NEXT: and a0, a1, a0 +; RV64-VLEN512-NEXT: li a2, 1 +; RV64-VLEN512-NEXT: sll a0, a2, a0 +; RV64-VLEN512-NEXT: and a0, a1, a0 +; RV64-VLEN512-NEXT: ret +; +; RV64-VLEN1024-LABEL: vsetlambda_4_rv64: +; RV64-VLEN1024: # %bb.0: +; RV64-VLEN1024-NEXT: li a0, -7 +; RV64-VLEN1024-NEXT: csrr a1, vtype +; RV64-VLEN1024-NEXT: slli a0, a0, 60 +; RV64-VLEN1024-NEXT: addi a0, a0, -1 +; RV64-VLEN1024-NEXT: li a2, 3 +; RV64-VLEN1024-NEXT: and a0, a1, a0 +; RV64-VLEN1024-NEXT: slli a2, a2, 60 +; RV64-VLEN1024-NEXT: or a0, a0, a2 +; RV64-VLEN1024-NEXT: vsetvl zero, zero, a0 +; RV64-VLEN1024-NEXT: csrr a0, vtype +; RV64-VLEN1024-NEXT: srli a0, a0, 60 +; RV64-VLEN1024-NEXT: andi a0, a0, 7 +; RV64-VLEN1024-NEXT: seqz a1, a0 +; RV64-VLEN1024-NEXT: addi a0, a0, -1 +; RV64-VLEN1024-NEXT: addi a1, a1, -1 +; RV64-VLEN1024-NEXT: and a0, a1, a0 +; RV64-VLEN1024-NEXT: li a2, 1 +; RV64-VLEN1024-NEXT: sll a0, a2, a0 +; RV64-VLEN1024-NEXT: and a0, a1, a0 +; RV64-VLEN1024-NEXT: ret + %lambda = call i64 @llvm.riscv.ime.vsetlambda.nonzero.i64(i64 4) + ret i64 %lambda +} + +define i64 @vsetlambda_8_rv64() { +; RV64-LABEL: vsetlambda_8_rv64: +; RV64: # %bb.0: +; RV64-NEXT: li a0, -7 +; RV64-NEXT: csrr a1, vtype +; RV64-NEXT: slli a0, a0, 60 +; RV64-NEXT: addi a0, a0, -1 +; RV64-NEXT: li a2, 1 +; RV64-NEXT: and a0, a1, a0 +; RV64-NEXT: slli a1, a2, 62 +; RV64-NEXT: or a0, a0, a1 +; RV64-NEXT: vsetvl zero, zero, a0 +; RV64-NEXT: csrr a0, vtype +; RV64-NEXT: srli a0, a0, 60 +; RV64-NEXT: andi a0, a0, 7 +; RV64-NEXT: seqz a1, a0 +; RV64-NEXT: addi a0, a0, -1 +; RV64-NEXT: addi a1, a1, -1 +; RV64-NEXT: and a0, a1, a0 +; RV64-NEXT: sll a0, a2, a0 +; RV64-NEXT: and a0, a1, a0 +; RV64-NEXT: ret +; +; RV64-VLEN512-LABEL: vsetlambda_8_rv64: +; RV64-VLEN512: # %bb.0: +; RV64-VLEN512-NEXT: li a0, -7 +; RV64-VLEN512-NEXT: csrr a1, vtype +; RV64-VLEN512-NEXT: slli a0, a0, 60 +; RV64-VLEN512-NEXT: addi a0, a0, -1 +; RV64-VLEN512-NEXT: li a2, 1 +; RV64-VLEN512-NEXT: and a0, a1, a0 +; RV64-VLEN512-NEXT: slli a1, a2, 62 +; RV64-VLEN512-NEXT: or a0, a0, a1 +; RV64-VLEN512-NEXT: vsetvl zero, zero, a0 +; RV64-VLEN512-NEXT: csrr a0, vtype +; RV64-VLEN512-NEXT: srli a0, a0, 60 +; RV64-VLEN512-NEXT: andi a0, a0, 7 +; RV64-VLEN512-NEXT: seqz a1, a0 +; RV64-VLEN512-NEXT: addi a0, a0, -1 +; RV64-VLEN512-NEXT: addi a1, a1, -1 +; RV64-VLEN512-NEXT: and a0, a1, a0 +; RV64-VLEN512-NEXT: sll a0, a2, a0 +; RV64-VLEN512-NEXT: and a0, a1, a0 +; RV64-VLEN512-NEXT: ret +; +; RV64-VLEN1024-LABEL: vsetlambda_8_rv64: +; RV64-VLEN1024: # %bb.0: +; RV64-VLEN1024-NEXT: li a0, -7 +; RV64-VLEN1024-NEXT: csrr a1, vtype +; RV64-VLEN1024-NEXT: slli a0, a0, 60 +; RV64-VLEN1024-NEXT: addi a0, a0, -1 +; RV64-VLEN1024-NEXT: li a2, 1 +; RV64-VLEN1024-NEXT: and a0, a1, a0 +; RV64-VLEN1024-NEXT: slli a1, a2, 62 +; RV64-VLEN1024-NEXT: or a0, a0, a1 +; RV64-VLEN1024-NEXT: vsetvl zero, zero, a0 +; RV64-VLEN1024-NEXT: csrr a0, vtype +; RV64-VLEN1024-NEXT: srli a0, a0, 60 +; RV64-VLEN1024-NEXT: andi a0, a0, 7 +; RV64-VLEN1024-NEXT: seqz a1, a0 +; RV64-VLEN1024-NEXT: addi a0, a0, -1 +; RV64-VLEN1024-NEXT: addi a1, a1, -1 +; RV64-VLEN1024-NEXT: and a0, a1, a0 +; RV64-VLEN1024-NEXT: sll a0, a2, a0 +; RV64-VLEN1024-NEXT: and a0, a1, a0 +; RV64-VLEN1024-NEXT: ret + %lambda = call i64 @llvm.riscv.ime.vsetlambda.nonzero.i64(i64 8) + ret i64 %lambda +} + +define i64 @vsetlambda_16_rv64() { +; RV64-LABEL: vsetlambda_16_rv64: +; RV64: # %bb.0: +; RV64-NEXT: li a0, -7 +; RV64-NEXT: csrr a1, vtype +; RV64-NEXT: slli a0, a0, 60 +; RV64-NEXT: addi a0, a0, -1 +; RV64-NEXT: li a2, 5 +; RV64-NEXT: and a0, a1, a0 +; RV64-NEXT: slli a2, a2, 60 +; RV64-NEXT: or a0, a0, a2 +; RV64-NEXT: vsetvl zero, zero, a0 +; RV64-NEXT: csrr a0, vtype +; RV64-NEXT: srli a0, a0, 60 +; RV64-NEXT: andi a0, a0, 7 +; RV64-NEXT: seqz a1, a0 +; RV64-NEXT: addi a0, a0, -1 +; RV64-NEXT: addi a1, a1, -1 +; RV64-NEXT: and a0, a1, a0 +; RV64-NEXT: li a2, 1 +; RV64-NEXT: sll a0, a2, a0 +; RV64-NEXT: and a0, a1, a0 +; RV64-NEXT: ret +; +; RV64-VLEN512-LABEL: vsetlambda_16_rv64: +; RV64-VLEN512: # %bb.0: +; RV64-VLEN512-NEXT: li a0, -7 +; RV64-VLEN512-NEXT: csrr a1, vtype +; RV64-VLEN512-NEXT: slli a0, a0, 60 +; RV64-VLEN512-NEXT: addi a0, a0, -1 +; RV64-VLEN512-NEXT: li a2, 5 +; RV64-VLEN512-NEXT: and a0, a1, a0 +; RV64-VLEN512-NEXT: slli a2, a2, 60 +; RV64-VLEN512-NEXT: or a0, a0, a2 +; RV64-VLEN512-NEXT: vsetvl zero, zero, a0 +; RV64-VLEN512-NEXT: csrr a0, vtype +; RV64-VLEN512-NEXT: srli a0, a0, 60 +; RV64-VLEN512-NEXT: andi a0, a0, 7 +; RV64-VLEN512-NEXT: seqz a1, a0 +; RV64-VLEN512-NEXT: addi a0, a0, -1 +; RV64-VLEN512-NEXT: addi a1, a1, -1 +; RV64-VLEN512-NEXT: and a0, a1, a0 +; RV64-VLEN512-NEXT: li a2, 1 +; RV64-VLEN512-NEXT: sll a0, a2, a0 +; RV64-VLEN512-NEXT: and a0, a1, a0 +; RV64-VLEN512-NEXT: ret +; +; RV64-VLEN1024-LABEL: vsetlambda_16_rv64: +; RV64-VLEN1024: # %bb.0: +; RV64-VLEN1024-NEXT: li a0, -7 +; RV64-VLEN1024-NEXT: csrr a1, vtype +; RV64-VLEN1024-NEXT: slli a0, a0, 60 +; RV64-VLEN1024-NEXT: addi a0, a0, -1 +; RV64-VLEN1024-NEXT: li a2, 5 +; RV64-VLEN1024-NEXT: and a0, a1, a0 +; RV64-VLEN1024-NEXT: slli a2, a2, 60 +; RV64-VLEN1024-NEXT: or a0, a0, a2 +; RV64-VLEN1024-NEXT: vsetvl zero, zero, a0 +; RV64-VLEN1024-NEXT: csrr a0, vtype +; RV64-VLEN1024-NEXT: srli a0, a0, 60 +; RV64-VLEN1024-NEXT: andi a0, a0, 7 +; RV64-VLEN1024-NEXT: seqz a1, a0 +; RV64-VLEN1024-NEXT: addi a0, a0, -1 +; RV64-VLEN1024-NEXT: addi a1, a1, -1 +; RV64-VLEN1024-NEXT: and a0, a1, a0 +; RV64-VLEN1024-NEXT: li a2, 1 +; RV64-VLEN1024-NEXT: sll a0, a2, a0 +; RV64-VLEN1024-NEXT: and a0, a1, a0 +; RV64-VLEN1024-NEXT: ret + %lambda = call i64 @llvm.riscv.ime.vsetlambda.nonzero.i64(i64 16) + ret i64 %lambda +} + +define i64 @vsetlambda_32_rv64() { +; RV64-LABEL: vsetlambda_32_rv64: +; RV64: # %bb.0: +; RV64-NEXT: li a0, -7 +; RV64-NEXT: csrr a1, vtype +; RV64-NEXT: slli a0, a0, 60 +; RV64-NEXT: addi a0, a0, -1 +; RV64-NEXT: li a2, 3 +; RV64-NEXT: and a0, a1, a0 +; RV64-NEXT: slli a2, a2, 61 +; RV64-NEXT: or a0, a0, a2 +; RV64-NEXT: vsetvl zero, zero, a0 +; RV64-NEXT: csrr a0, vtype +; RV64-NEXT: srli a0, a0, 60 +; RV64-NEXT: andi a0, a0, 7 +; RV64-NEXT: seqz a1, a0 +; RV64-NEXT: addi a0, a0, -1 +; RV64-NEXT: addi a1, a1, -1 +; RV64-NEXT: and a0, a1, a0 +; RV64-NEXT: li a2, 1 +; RV64-NEXT: sll a0, a2, a0 +; RV64-NEXT: and a0, a1, a0 +; RV64-NEXT: ret +; +; RV64-VLEN512-LABEL: vsetlambda_32_rv64: +; RV64-VLEN512: # %bb.0: +; RV64-VLEN512-NEXT: li a0, -7 +; RV64-VLEN512-NEXT: csrr a1, vtype +; RV64-VLEN512-NEXT: slli a0, a0, 60 +; RV64-VLEN512-NEXT: addi a0, a0, -1 +; RV64-VLEN512-NEXT: li a2, 3 +; RV64-VLEN512-NEXT: and a0, a1, a0 +; RV64-VLEN512-NEXT: slli a2, a2, 61 +; RV64-VLEN512-NEXT: or a0, a0, a2 +; RV64-VLEN512-NEXT: vsetvl zero, zero, a0 +; RV64-VLEN512-NEXT: csrr a0, vtype +; RV64-VLEN512-NEXT: srli a0, a0, 60 +; RV64-VLEN512-NEXT: andi a0, a0, 7 +; RV64-VLEN512-NEXT: seqz a1, a0 +; RV64-VLEN512-NEXT: addi a0, a0, -1 +; RV64-VLEN512-NEXT: addi a1, a1, -1 +; RV64-VLEN512-NEXT: and a0, a1, a0 +; RV64-VLEN512-NEXT: li a2, 1 +; RV64-VLEN512-NEXT: sll a0, a2, a0 +; RV64-VLEN512-NEXT: and a0, a1, a0 +; RV64-VLEN512-NEXT: ret +; +; RV64-VLEN1024-LABEL: vsetlambda_32_rv64: +; RV64-VLEN1024: # %bb.0: +; RV64-VLEN1024-NEXT: li a0, -7 +; RV64-VLEN1024-NEXT: csrr a1, vtype +; RV64-VLEN1024-NEXT: slli a0, a0, 60 +; RV64-VLEN1024-NEXT: addi a0, a0, -1 +; RV64-VLEN1024-NEXT: li a2, 3 +; RV64-VLEN1024-NEXT: and a0, a1, a0 +; RV64-VLEN1024-NEXT: slli a2, a2, 61 +; RV64-VLEN1024-NEXT: or a0, a0, a2 +; RV64-VLEN1024-NEXT: vsetvl zero, zero, a0 +; RV64-VLEN1024-NEXT: csrr a0, vtype +; RV64-VLEN1024-NEXT: srli a0, a0, 60 +; RV64-VLEN1024-NEXT: andi a0, a0, 7 +; RV64-VLEN1024-NEXT: seqz a1, a0 +; RV64-VLEN1024-NEXT: addi a0, a0, -1 +; RV64-VLEN1024-NEXT: addi a1, a1, -1 +; RV64-VLEN1024-NEXT: and a0, a1, a0 +; RV64-VLEN1024-NEXT: li a2, 1 +; RV64-VLEN1024-NEXT: sll a0, a2, a0 +; RV64-VLEN1024-NEXT: and a0, a1, a0 +; RV64-VLEN1024-NEXT: ret + %lambda = call i64 @llvm.riscv.ime.vsetlambda.nonzero.i64(i64 32) + ret i64 %lambda +} + +define i64 @vsetlambda_64_rv64() { +; RV64-LABEL: vsetlambda_64_rv64: +; RV64: # %bb.0: +; RV64-NEXT: li a0, 7 +; RV64-NEXT: csrr a1, vtype +; RV64-NEXT: slli a0, a0, 60 +; RV64-NEXT: or a0, a1, a0 +; RV64-NEXT: vsetvl zero, zero, a0 +; RV64-NEXT: csrr a0, vtype +; RV64-NEXT: srli a0, a0, 60 +; RV64-NEXT: andi a0, a0, 7 +; RV64-NEXT: seqz a1, a0 +; RV64-NEXT: addi a0, a0, -1 +; RV64-NEXT: addi a1, a1, -1 +; RV64-NEXT: and a0, a1, a0 +; RV64-NEXT: li a2, 1 +; RV64-NEXT: sll a0, a2, a0 +; RV64-NEXT: and a0, a1, a0 +; RV64-NEXT: ret +; +; RV64-VLEN512-LABEL: vsetlambda_64_rv64: +; RV64-VLEN512: # %bb.0: +; RV64-VLEN512-NEXT: li a0, 7 +; RV64-VLEN512-NEXT: csrr a1, vtype +; RV64-VLEN512-NEXT: slli a0, a0, 60 +; RV64-VLEN512-NEXT: or a0, a1, a0 +; RV64-VLEN512-NEXT: vsetvl zero, zero, a0 +; RV64-VLEN512-NEXT: csrr a0, vtype +; RV64-VLEN512-NEXT: srli a0, a0, 60 +; RV64-VLEN512-NEXT: andi a0, a0, 7 +; RV64-VLEN512-NEXT: seqz a1, a0 +; RV64-VLEN512-NEXT: addi a0, a0, -1 +; RV64-VLEN512-NEXT: addi a1, a1, -1 +; RV64-VLEN512-NEXT: and a0, a1, a0 +; RV64-VLEN512-NEXT: li a2, 1 +; RV64-VLEN512-NEXT: sll a0, a2, a0 +; RV64-VLEN512-NEXT: and a0, a1, a0 +; RV64-VLEN512-NEXT: ret +; +; RV64-VLEN1024-LABEL: vsetlambda_64_rv64: +; RV64-VLEN1024: # %bb.0: +; RV64-VLEN1024-NEXT: li a0, 7 +; RV64-VLEN1024-NEXT: csrr a1, vtype +; RV64-VLEN1024-NEXT: slli a0, a0, 60 +; RV64-VLEN1024-NEXT: or a0, a1, a0 +; RV64-VLEN1024-NEXT: vsetvl zero, zero, a0 +; RV64-VLEN1024-NEXT: csrr a0, vtype +; RV64-VLEN1024-NEXT: srli a0, a0, 60 +; RV64-VLEN1024-NEXT: andi a0, a0, 7 +; RV64-VLEN1024-NEXT: seqz a1, a0 +; RV64-VLEN1024-NEXT: addi a0, a0, -1 +; RV64-VLEN1024-NEXT: addi a1, a1, -1 +; RV64-VLEN1024-NEXT: and a0, a1, a0 +; RV64-VLEN1024-NEXT: li a2, 1 +; RV64-VLEN1024-NEXT: sll a0, a2, a0 +; RV64-VLEN1024-NEXT: and a0, a1, a0 +; RV64-VLEN1024-NEXT: ret + %lambda = call i64 @llvm.riscv.ime.vsetlambda.nonzero.i64(i64 64) + ret i64 %lambda +} + +declare i64 @llvm.riscv.ime.vlen.i64() +declare i64 @llvm.riscv.ime.lambda.i64() +declare i64 @llvm.riscv.ime.readlambda.i64() +declare i64 @llvm.riscv.ime.vsetlambda.nonzero.i64(i64) >From 4d499141effe7034705be0cfd7991e266bc8626c Mon Sep 17 00:00:00 2001 From: imkiva <[email protected]> Date: Sun, 14 Jun 2026 23:16:28 +0800 Subject: [PATCH 2/2] [RISCV][LLVM][Clang] Support runtime value for __riscv_vsetlambda --- .../clang/Basic/DiagnosticSemaKinds.td | 2 +- clang/lib/CodeGen/TargetBuiltins/RISCV.cpp | 67 +++++++--- clang/lib/Sema/SemaRISCV.cpp | 12 +- .../rvv-intrinsics-handcrafted/ime-config.c | 83 +++++++++++++ clang/test/Sema/riscv-ime-vsetlambda.c | 18 +-- llvm/include/llvm/IR/IntrinsicsRISCV.td | 6 +- llvm/lib/Target/RISCV/RISCVISelLowering.cpp | 75 +++++++++--- .../ime-config-intrinsics-invalid-rv32.ll | 28 ++++- .../ime-config-intrinsics-invalid-rv64.ll | 28 ++++- .../RISCV/ime-config-intrinsics-rv32.ll | 115 ++++++++++++++++++ .../RISCV/ime-config-intrinsics-rv64.ll | 115 ++++++++++++++++++ 11 files changed, 490 insertions(+), 59 deletions(-) diff --git a/clang/include/clang/Basic/DiagnosticSemaKinds.td b/clang/include/clang/Basic/DiagnosticSemaKinds.td index 4e474a8cc1d28..f9e6ba99377cc 100644 --- a/clang/include/clang/Basic/DiagnosticSemaKinds.td +++ b/clang/include/clang/Basic/DiagnosticSemaKinds.td @@ -13602,7 +13602,7 @@ def err_riscv_builtin_requires_extension : Error< def err_riscv_builtin_invalid_lmul : Error< "LMUL argument must be in the range [0,3] or [5,7]">; def err_riscv_builtin_invalid_ime_lambda : Error< - "argument to RISC-V IME vsetlambda builtin must be an integer constant expression evaluating to 0 or a power of two in the range [1, 64]">; + "constant argument to RISC-V IME vsetlambda builtin must be 0 or a power of two in the range [1, 64]">; def err_riscv_type_requires_extension : Error< "RISC-V type %0 requires the '%1' extension">; def err_riscv_attribute_interrupt_requires_extension : Error< diff --git a/clang/lib/CodeGen/TargetBuiltins/RISCV.cpp b/clang/lib/CodeGen/TargetBuiltins/RISCV.cpp index 356e9fcfce5c3..1b6e0b52c5dd8 100644 --- a/clang/lib/CodeGen/TargetBuiltins/RISCV.cpp +++ b/clang/lib/CodeGen/TargetBuiltins/RISCV.cpp @@ -323,27 +323,60 @@ emitRVVIMEBuiltin(CodeGenFunction *CGF, const CallExpr *E, llvm::Function *F = CGM.getIntrinsic(ID, {ResultType}); return Builder.CreateCall(F); } - case Intrinsic::riscv_ime_vsetlambda_nonzero: - break; - default: - llvm_unreachable("unexpected IME builtin"); - } - - assert(Ops.size() == 1 && "unexpected vsetlambda arity"); - Value *Req = Ops[0]; - - auto *C = dyn_cast<llvm::ConstantInt>(Req); - assert(C && "Sema should reject non-constant __riscv_vsetlambda arguments"); + case Intrinsic::riscv_ime_vsetlambda_nonzero: { + assert(Ops.size() == 1 && "unexpected vsetlambda arity"); + Value *Req = Ops[0]; + + if (auto *C = dyn_cast<llvm::ConstantInt>(Req)) { + if (C->isZero()) { + llvm::Function *ReadF = + CGM.getIntrinsic(Intrinsic::riscv_ime_readlambda, {ResultType}); + return Builder.CreateCall(ReadF); + } + + llvm::Function *SetF = CGM.getIntrinsic( + Intrinsic::riscv_ime_vsetlambda_nonzero, {ResultType}); + return Builder.CreateCall(SetF, {Req}); + } - if (C->isZero()) { + // Runtime value. The IME API defines requested_lambda == 0 as a read-only + // selected-lambda query, so emit real control flow instead of an + // unconditional vsetvl guarded only by a selected vtype value. + llvm::Function *Fn = Builder.GetInsertBlock()->getParent(); + llvm::BasicBlock *ReadBB = + CGF->createBasicBlock("ime.vsetlambda.read", Fn); + llvm::BasicBlock *SetBB = + CGF->createBasicBlock("ime.vsetlambda.set", Fn); + llvm::BasicBlock *ContBB = + CGF->createBasicBlock("ime.vsetlambda.cont", Fn); + + Value *IsZero = + Builder.CreateICmpEQ(Req, llvm::ConstantInt::get(ResultType, 0)); + Builder.CreateCondBr(IsZero, ReadBB, SetBB); + + Builder.SetInsertPoint(ReadBB); llvm::Function *ReadF = CGM.getIntrinsic(Intrinsic::riscv_ime_readlambda, {ResultType}); - return Builder.CreateCall(ReadF); + Value *ReadVal = Builder.CreateCall(ReadF); + Builder.CreateBr(ContBB); + ReadBB = Builder.GetInsertBlock(); + + Builder.SetInsertPoint(SetBB); + llvm::Function *SetF = CGM.getIntrinsic( + Intrinsic::riscv_ime_vsetlambda_nonzero, {ResultType}); + Value *SetVal = Builder.CreateCall(SetF, {Req}); + Builder.CreateBr(ContBB); + SetBB = Builder.GetInsertBlock(); + + Builder.SetInsertPoint(ContBB); + llvm::PHINode *Phi = Builder.CreatePHI(ResultType, 2); + Phi->addIncoming(ReadVal, ReadBB); + Phi->addIncoming(SetVal, SetBB); + return Phi; + } + default: + llvm_unreachable("unexpected IME builtin"); } - - llvm::Function *SetF = CGM.getIntrinsic( - Intrinsic::riscv_ime_vsetlambda_nonzero, {ResultType}); - return Builder.CreateCall(SetF, {Req}); } static LLVM_ATTRIBUTE_NOINLINE Value * diff --git a/clang/lib/Sema/SemaRISCV.cpp b/clang/lib/Sema/SemaRISCV.cpp index e47eb65cc7303..095e1cc5e808c 100644 --- a/clang/lib/Sema/SemaRISCV.cpp +++ b/clang/lib/Sema/SemaRISCV.cpp @@ -689,13 +689,11 @@ bool SemaRISCV::CheckBuiltinFunctionCall(const TargetInfo &TI, Expr::EvalResult Eval; Expr *EvalArg = DiagArg; - if (!EvalArg->EvaluateAsInt(Eval, Context, Expr::SE_NoSideEffects)) { - EvalArg = Arg; - if (!EvalArg->EvaluateAsInt(Eval, Context, Expr::SE_NoSideEffects)) - return Diag(DiagArg->getBeginLoc(), - diag::err_riscv_builtin_invalid_ime_lambda) - << DiagArg->getSourceRange(); - } + // Prefer evaluating the user source expression before the macro-introduced + // (size_t) cast. This catches constants that would otherwise wrap into a + // valid size_t value on RV32, e.g. 0x100000004ULL -> 4. + if (!EvalArg->EvaluateAsInt(Eval, Context, Expr::SE_NoSideEffects)) + return false; llvm::APSInt Val = Eval.Val.getInt(); if (Val.isSigned() && Val.isNegative()) diff --git a/clang/test/CodeGen/RISCV/rvv-intrinsics-handcrafted/ime-config.c b/clang/test/CodeGen/RISCV/rvv-intrinsics-handcrafted/ime-config.c index a0477cf356d98..d69a91fae7297 100644 --- a/clang/test/CodeGen/RISCV/rvv-intrinsics-handcrafted/ime-config.c +++ b/clang/test/CodeGen/RISCV/rvv-intrinsics-handcrafted/ime-config.c @@ -73,3 +73,86 @@ size_t test_vsetlambda(void) { size_t test_vsetlambda_zero(void) { return __riscv_vsetlambda(0); } + +// RV32-LABEL: define dso_local i32 @test_vsetlambda_runtime( +// RV32-SAME: i32 noundef [[X:%.*]]) #[[ATTR0]] { +// RV32-NEXT: [[ENTRY:.*:]] +// RV32-NEXT: [[X_ADDR:%.*]] = alloca i32, align 4 +// RV32-NEXT: store i32 [[X]], ptr [[X_ADDR]], align 4 +// RV32-NEXT: [[TMP0:%.*]] = load i32, ptr [[X_ADDR]], align 4 +// RV32-NEXT: [[TMP1:%.*]] = icmp eq i32 [[TMP0]], 0 +// RV32-NEXT: br i1 [[TMP1]], label %[[IME_VSETLAMBDA_READ:.*]], label %[[IME_VSETLAMBDA_SET:.*]] +// RV32: [[IME_VSETLAMBDA_READ]]: +// RV32-NEXT: [[TMP2:%.*]] = call i32 @llvm.riscv.ime.readlambda.i32() +// RV32-NEXT: br label %[[IME_VSETLAMBDA_CONT:.*]] +// RV32: [[IME_VSETLAMBDA_SET]]: +// RV32-NEXT: [[TMP3:%.*]] = call i32 @llvm.riscv.ime.vsetlambda.nonzero.i32(i32 [[TMP0]]) +// RV32-NEXT: br label %[[IME_VSETLAMBDA_CONT]] +// RV32: [[IME_VSETLAMBDA_CONT]]: +// RV32-NEXT: [[TMP4:%.*]] = phi i32 [ [[TMP2]], %[[IME_VSETLAMBDA_READ]] ], [ [[TMP3]], %[[IME_VSETLAMBDA_SET]] ] +// RV32-NEXT: ret i32 [[TMP4]] +// +// RV64-LABEL: define dso_local i64 @test_vsetlambda_runtime( +// RV64-SAME: i64 noundef [[X:%.*]]) #[[ATTR0]] { +// RV64-NEXT: [[ENTRY:.*:]] +// RV64-NEXT: [[X_ADDR:%.*]] = alloca i64, align 8 +// RV64-NEXT: store i64 [[X]], ptr [[X_ADDR]], align 8 +// RV64-NEXT: [[TMP0:%.*]] = load i64, ptr [[X_ADDR]], align 8 +// RV64-NEXT: [[TMP1:%.*]] = icmp eq i64 [[TMP0]], 0 +// RV64-NEXT: br i1 [[TMP1]], label %[[IME_VSETLAMBDA_READ:.*]], label %[[IME_VSETLAMBDA_SET:.*]] +// RV64: [[IME_VSETLAMBDA_READ]]: +// RV64-NEXT: [[TMP2:%.*]] = call i64 @llvm.riscv.ime.readlambda.i64() +// RV64-NEXT: br label %[[IME_VSETLAMBDA_CONT:.*]] +// RV64: [[IME_VSETLAMBDA_SET]]: +// RV64-NEXT: [[TMP3:%.*]] = call i64 @llvm.riscv.ime.vsetlambda.nonzero.i64(i64 [[TMP0]]) +// RV64-NEXT: br label %[[IME_VSETLAMBDA_CONT]] +// RV64: [[IME_VSETLAMBDA_CONT]]: +// RV64-NEXT: [[TMP4:%.*]] = phi i64 [ [[TMP2]], %[[IME_VSETLAMBDA_READ]] ], [ [[TMP3]], %[[IME_VSETLAMBDA_SET]] ] +// RV64-NEXT: ret i64 [[TMP4]] +// +size_t test_vsetlambda_runtime(size_t x) { + return __riscv_vsetlambda(x); +} + +// RV32-LABEL: define dso_local i32 @test_vsetlambda_save_restore( +// RV32-SAME: ) #[[ATTR0]] { +// RV32-NEXT: [[ENTRY:.*:]] +// RV32-NEXT: [[SAVED:%.*]] = alloca i32, align 4 +// RV32-NEXT: [[TMP0:%.*]] = call i32 @llvm.riscv.ime.readlambda.i32() +// RV32-NEXT: store i32 [[TMP0]], ptr [[SAVED]], align 4 +// RV32-NEXT: [[TMP1:%.*]] = load i32, ptr [[SAVED]], align 4 +// RV32-NEXT: [[TMP2:%.*]] = icmp eq i32 [[TMP1]], 0 +// RV32-NEXT: br i1 [[TMP2]], label %[[IME_VSETLAMBDA_READ:.*]], label %[[IME_VSETLAMBDA_SET:.*]] +// RV32: [[IME_VSETLAMBDA_READ]]: +// RV32-NEXT: [[TMP3:%.*]] = call i32 @llvm.riscv.ime.readlambda.i32() +// RV32-NEXT: br label %[[IME_VSETLAMBDA_CONT:.*]] +// RV32: [[IME_VSETLAMBDA_SET]]: +// RV32-NEXT: [[TMP4:%.*]] = call i32 @llvm.riscv.ime.vsetlambda.nonzero.i32(i32 [[TMP1]]) +// RV32-NEXT: br label %[[IME_VSETLAMBDA_CONT]] +// RV32: [[IME_VSETLAMBDA_CONT]]: +// RV32-NEXT: [[TMP5:%.*]] = phi i32 [ [[TMP3]], %[[IME_VSETLAMBDA_READ]] ], [ [[TMP4]], %[[IME_VSETLAMBDA_SET]] ] +// RV32-NEXT: ret i32 [[TMP5]] +// +// RV64-LABEL: define dso_local i64 @test_vsetlambda_save_restore( +// RV64-SAME: ) #[[ATTR0]] { +// RV64-NEXT: [[ENTRY:.*:]] +// RV64-NEXT: [[SAVED:%.*]] = alloca i64, align 8 +// RV64-NEXT: [[TMP0:%.*]] = call i64 @llvm.riscv.ime.readlambda.i64() +// RV64-NEXT: store i64 [[TMP0]], ptr [[SAVED]], align 8 +// RV64-NEXT: [[TMP1:%.*]] = load i64, ptr [[SAVED]], align 8 +// RV64-NEXT: [[TMP2:%.*]] = icmp eq i64 [[TMP1]], 0 +// RV64-NEXT: br i1 [[TMP2]], label %[[IME_VSETLAMBDA_READ:.*]], label %[[IME_VSETLAMBDA_SET:.*]] +// RV64: [[IME_VSETLAMBDA_READ]]: +// RV64-NEXT: [[TMP3:%.*]] = call i64 @llvm.riscv.ime.readlambda.i64() +// RV64-NEXT: br label %[[IME_VSETLAMBDA_CONT:.*]] +// RV64: [[IME_VSETLAMBDA_SET]]: +// RV64-NEXT: [[TMP4:%.*]] = call i64 @llvm.riscv.ime.vsetlambda.nonzero.i64(i64 [[TMP1]]) +// RV64-NEXT: br label %[[IME_VSETLAMBDA_CONT]] +// RV64: [[IME_VSETLAMBDA_CONT]]: +// RV64-NEXT: [[TMP5:%.*]] = phi i64 [ [[TMP3]], %[[IME_VSETLAMBDA_READ]] ], [ [[TMP4]], %[[IME_VSETLAMBDA_SET]] ] +// RV64-NEXT: ret i64 [[TMP5]] +// +size_t test_vsetlambda_save_restore(void) { + size_t saved = __riscv_vsetlambda(0); + return __riscv_vsetlambda(saved); +} diff --git a/clang/test/Sema/riscv-ime-vsetlambda.c b/clang/test/Sema/riscv-ime-vsetlambda.c index 0e3b8cbf889cc..0c41cdc491de2 100644 --- a/clang/test/Sema/riscv-ime-vsetlambda.c +++ b/clang/test/Sema/riscv-ime-vsetlambda.c @@ -19,20 +19,20 @@ void ok(void) { } void bad_value(void) { - __riscv_vsetlambda(3); // expected-error {{argument to RISC-V IME vsetlambda builtin must be an integer constant expression evaluating to 0 or a power of two in the range [1, 64]}} - __riscv_vsetlambda(128); // expected-error {{argument to RISC-V IME vsetlambda builtin must be an integer constant expression evaluating to 0 or a power of two in the range [1, 64]}} - __riscv_vsetlambda(-1); // expected-error {{argument to RISC-V IME vsetlambda builtin must be an integer constant expression evaluating to 0 or a power of two in the range [1, 64]}} + __riscv_vsetlambda(3); // expected-error {{constant argument to RISC-V IME vsetlambda builtin must be 0 or a power of two in the range [1, 64]}} + __riscv_vsetlambda(128); // expected-error {{constant argument to RISC-V IME vsetlambda builtin must be 0 or a power of two in the range [1, 64]}} + __riscv_vsetlambda(-1); // expected-error {{constant argument to RISC-V IME vsetlambda builtin must be 0 or a power of two in the range [1, 64]}} } -void bad_runtime(size_t x) { - __riscv_vsetlambda(x); // expected-error {{argument to RISC-V IME vsetlambda builtin must be an integer constant expression evaluating to 0 or a power of two in the range [1, 64]}} - __riscv_vsetlambda(x++); // expected-error {{argument to RISC-V IME vsetlambda builtin must be an integer constant expression evaluating to 0 or a power of two in the range [1, 64]}} +void ok_runtime(size_t x) { + __riscv_vsetlambda(x); + __riscv_vsetlambda(x++); } void bad_wrap(void) { - __riscv_vsetlambda(0x100000004ULL); // expected-error {{argument to RISC-V IME vsetlambda builtin must be an integer constant expression evaluating to 0 or a power of two in the range [1, 64]}} - __riscv_vsetlambda(-4294967292LL); // expected-error {{argument to RISC-V IME vsetlambda builtin must be an integer constant expression evaluating to 0 or a power of two in the range [1, 64]}} + __riscv_vsetlambda(0x100000004ULL); // expected-error {{constant argument to RISC-V IME vsetlambda builtin must be 0 or a power of two in the range [1, 64]}} + __riscv_vsetlambda(-4294967292LL); // expected-error {{constant argument to RISC-V IME vsetlambda builtin must be 0 or a power of two in the range [1, 64]}} #if __SIZEOF_POINTER__ == 8 - __riscv_vsetlambda(((__int128)1) << 70); // expected-error {{argument to RISC-V IME vsetlambda builtin must be an integer constant expression evaluating to 0 or a power of two in the range [1, 64]}} + __riscv_vsetlambda(((__int128)1) << 70); // expected-error {{constant argument to RISC-V IME vsetlambda builtin must be 0 or a power of two in the range [1, 64]}} #endif } diff --git a/llvm/include/llvm/IR/IntrinsicsRISCV.td b/llvm/include/llvm/IR/IntrinsicsRISCV.td index 7e54c8aefa4e1..8d809ad80f133 100644 --- a/llvm/include/llvm/IR/IntrinsicsRISCV.td +++ b/llvm/include/llvm/IR/IntrinsicsRISCV.td @@ -2080,10 +2080,12 @@ let TargetPrefix = "riscv" in { : Intrinsic<[llvm_anyint_ty], [], [IntrNoMem, IntrHasSideEffects]>; // Write a nonzero requested lambda and return the established lambda. - // The argument contract is: positive power of two in {1,2,4,8,16,32,64}. + // Source-level contract: the argument is a positive power of two in + // {1,2,4,8,16,32,64}. Clang emits a separate readlambda path for + // requested_lambda == 0 before calling this primitive. def int_riscv_ime_vsetlambda_nonzero : Intrinsic<[llvm_anyint_ty], [LLVMMatchType<0>], - [IntrNoMem, IntrHasSideEffects, ImmArg<ArgIndex<0>>]>; + [IntrNoMem, IntrHasSideEffects]>; } // TargetPrefix = "riscv" // Vendor extensions diff --git a/llvm/lib/Target/RISCV/RISCVISelLowering.cpp b/llvm/lib/Target/RISCV/RISCVISelLowering.cpp index 7061a17fe6b4f..3e3d384ce76e5 100644 --- a/llvm/lib/Target/RISCV/RISCVISelLowering.cpp +++ b/llvm/lib/Target/RISCV/RISCVISelLowering.cpp @@ -11744,11 +11744,51 @@ static SDValue lowerIMEReadSelectedLambda(SDValue Op, SelectionDAG &DAG, return DAG.getMergeValues({Lambda, Chain}, DL); } -// Lower the nonzero selected-lambda write/readback primitive used by -// __riscv_vsetlambda(N), for N in {1,2,4,8,16,32,64}. The IME vtype fields -// live in high vtype bits outside the vsetvli/vsetivli immediate fields, so the -// spec requires configuring them with register-form vsetvl using a full vtype -// value in a GPR. +static SDValue encodeRuntimeIMELambda(SDValue Requested, const SDLoc &DL, + SelectionDAG &DAG, + const RISCVSubtarget &Subtarget, + MVT XLenVT) { + // This primitive is the nonzero arm of the C-level __riscv_vsetlambda + // lowering. Valid runtime inputs are {1,2,4,8,16,32,64}. On targets with + // Zbb, ctz(x) + 1 maps directly to the 3-bit vtype.lambda encoding. + if (Subtarget.hasStdExtZbb()) { + SDValue Encoded = DAG.getNode(ISD::CTTZ_ZERO_POISON, DL, XLenVT, Requested); + return DAG.getNode(ISD::ADD, DL, XLenVT, Encoded, + DAG.getConstant(1, DL, XLenVT)); + } + + // Without Zbb, generic cttz can expand to libcalls. Build the 3-bit + // vtype.lambda encoding directly instead. + // + // Invalid runtime inputs are outside the source-level contract. Leave their + // encoding as zero so this lowering does not write outside lambda[2:0] or + // synthesize an encoding that sets VILL. + SDValue Encoded = DAG.getConstant(0, DL, XLenVT); + + auto SelectIfEq = [&](uint64_t Value, unsigned Enc) { + SDValue IsEq = + DAG.getSetCC(DL, XLenVT, Requested, + DAG.getConstant(Value, DL, XLenVT), ISD::SETEQ); + Encoded = DAG.getSelect(DL, XLenVT, IsEq, + DAG.getConstant(Enc, DL, XLenVT), Encoded); + }; + + SelectIfEq(1, 1); + SelectIfEq(2, 2); + SelectIfEq(4, 3); + SelectIfEq(8, 4); + SelectIfEq(16, 5); + SelectIfEq(32, 6); + SelectIfEq(64, 7); + + return Encoded; +} + +// Lower the nonzero selected-lambda write/readback primitive used by the +// nonzero path of __riscv_vsetlambda(N). Valid source-level values are +// {1,2,4,8,16,32,64}. The IME vtype fields live in high vtype bits outside the +// vsetvli/vsetivli immediate fields, so the spec requires configuring them +// with register-form vsetvl using a full vtype value in a GPR. // // The lowering preserves the current vl and all other vtype fields: // @@ -11765,21 +11805,22 @@ static SDValue lowerIMEVSetLambdaNonZero(SDValue Op, SelectionDAG &DAG, SDValue Requested = Op.getOperand(2); MVT XLenVT = Subtarget.getXLenVT(); - auto *C = dyn_cast<ConstantSDNode>(Requested); - if (!C) - report_fatal_error( - "llvm.riscv.ime.vsetlambda.nonzero requires an immediate argument"); - - uint64_t Value = C->getZExtValue(); - if (!isValidIMELambdaValue(Value)) - report_fatal_error( - "invalid argument for llvm.riscv.ime.vsetlambda.nonzero: expected a " - "power of two in {1,2,4,8,16,32,64}"); - SDValue OldVType = readIMEVType(Chain, DL, DAG, Subtarget); Chain = OldVType.getValue(1); - SDValue Encoded = DAG.getConstant(Log2_64(Value) + 1, DL, XLenVT); + SDValue Encoded; + if (auto *C = dyn_cast<ConstantSDNode>(Requested)) { + uint64_t Value = C->getZExtValue(); + if (!isValidIMELambdaValue(Value)) + report_fatal_error( + "invalid constant requested lambda for " + "llvm.riscv.ime.vsetlambda.nonzero"); + + Encoded = DAG.getConstant(Log2_64(Value) + 1, DL, XLenVT); + } else { + Encoded = encodeRuntimeIMELambda(Requested, DL, DAG, Subtarget, XLenVT); + } + SDValue Cleared = DAG.getNode(ISD::AND, DL, XLenVT, OldVType, DAG.getConstant(getIMEClearLambdaMask(Subtarget), DL, diff --git a/llvm/test/CodeGen/RISCV/ime-config-intrinsics-invalid-rv32.ll b/llvm/test/CodeGen/RISCV/ime-config-intrinsics-invalid-rv32.ll index 769ae27a8a673..0924dd8f788c6 100644 --- a/llvm/test/CodeGen/RISCV/ime-config-intrinsics-invalid-rv32.ll +++ b/llvm/test/CodeGen/RISCV/ime-config-intrinsics-invalid-rv32.ll @@ -1,12 +1,34 @@ ; REQUIRES: riscv-registered-target -; RUN: not --crash llc -mtriple=riscv32 -mattr=+experimental-zvvmm < %s 2>&1 \ +; RUN: split-file %s %t +; RUN: not --crash llc -mtriple=riscv32 -mattr=+experimental-zvvmm < %t/zero.ll 2>&1 \ +; RUN: | FileCheck %s --check-prefix=BAD-VALUE +; RUN: not --crash llc -mtriple=riscv32 -mattr=+experimental-zvvmm < %t/three.ll 2>&1 \ +; RUN: | FileCheck %s --check-prefix=BAD-VALUE +; RUN: not --crash llc -mtriple=riscv32 -mattr=+experimental-zvvmm < %t/too-large.ll 2>&1 \ ; RUN: | FileCheck %s --check-prefix=BAD-VALUE -; BAD-VALUE: invalid argument for llvm.riscv.ime.vsetlambda.nonzero +; BAD-VALUE: invalid constant requested lambda for llvm.riscv.ime.vsetlambda.nonzero +;--- zero.ll define i32 @vsetlambda_invalid_zero() { %lambda = call i32 @llvm.riscv.ime.vsetlambda.nonzero.i32(i32 0) ret i32 %lambda } -declare i32 @llvm.riscv.ime.vsetlambda.nonzero.i32(i32 immarg) +declare i32 @llvm.riscv.ime.vsetlambda.nonzero.i32(i32) + +;--- three.ll +define i32 @vsetlambda_invalid_three() { + %lambda = call i32 @llvm.riscv.ime.vsetlambda.nonzero.i32(i32 3) + ret i32 %lambda +} + +declare i32 @llvm.riscv.ime.vsetlambda.nonzero.i32(i32) + +;--- too-large.ll +define i32 @vsetlambda_invalid_128() { + %lambda = call i32 @llvm.riscv.ime.vsetlambda.nonzero.i32(i32 128) + ret i32 %lambda +} + +declare i32 @llvm.riscv.ime.vsetlambda.nonzero.i32(i32) diff --git a/llvm/test/CodeGen/RISCV/ime-config-intrinsics-invalid-rv64.ll b/llvm/test/CodeGen/RISCV/ime-config-intrinsics-invalid-rv64.ll index 47bafef1c295b..113326e4ec2a2 100644 --- a/llvm/test/CodeGen/RISCV/ime-config-intrinsics-invalid-rv64.ll +++ b/llvm/test/CodeGen/RISCV/ime-config-intrinsics-invalid-rv64.ll @@ -1,12 +1,34 @@ ; REQUIRES: riscv-registered-target -; RUN: not --crash llc -mtriple=riscv64 -mattr=+experimental-zvvmm < %s 2>&1 \ +; RUN: split-file %s %t +; RUN: not --crash llc -mtriple=riscv64 -mattr=+experimental-zvvmm < %t/zero.ll 2>&1 \ ; RUN: | FileCheck %s --check-prefix=BAD-VALUE +; RUN: not --crash llc -mtriple=riscv64 -mattr=+experimental-zvvmm < %t/three.ll 2>&1 \ +; RUN: | FileCheck %s --check-prefix=BAD-VALUE +; RUN: not --crash llc -mtriple=riscv64 -mattr=+experimental-zvvmm < %t/too-large.ll 2>&1 \ +; RUN: | FileCheck %s --check-prefix=BAD-VALUE + +; BAD-VALUE: invalid constant requested lambda for llvm.riscv.ime.vsetlambda.nonzero + +;--- zero.ll +define i64 @vsetlambda_invalid_zero() { + %lambda = call i64 @llvm.riscv.ime.vsetlambda.nonzero.i64(i64 0) + ret i64 %lambda +} + +declare i64 @llvm.riscv.ime.vsetlambda.nonzero.i64(i64) + +;--- three.ll +define i64 @vsetlambda_invalid_three() { + %lambda = call i64 @llvm.riscv.ime.vsetlambda.nonzero.i64(i64 3) + ret i64 %lambda +} -; BAD-VALUE: invalid argument for llvm.riscv.ime.vsetlambda.nonzero +declare i64 @llvm.riscv.ime.vsetlambda.nonzero.i64(i64) +;--- too-large.ll define i64 @vsetlambda_invalid_128() { %lambda = call i64 @llvm.riscv.ime.vsetlambda.nonzero.i64(i64 128) ret i64 %lambda } -declare i64 @llvm.riscv.ime.vsetlambda.nonzero.i64(i64 immarg) +declare i64 @llvm.riscv.ime.vsetlambda.nonzero.i64(i64) diff --git a/llvm/test/CodeGen/RISCV/ime-config-intrinsics-rv32.ll b/llvm/test/CodeGen/RISCV/ime-config-intrinsics-rv32.ll index a81eb70d4b8f0..cf5af034b42bf 100644 --- a/llvm/test/CodeGen/RISCV/ime-config-intrinsics-rv32.ll +++ b/llvm/test/CodeGen/RISCV/ime-config-intrinsics-rv32.ll @@ -8,6 +8,8 @@ ; RUN: llc -mtriple=riscv32 -mattr=+experimental-zvvmm \ ; RUN: -riscv-v-vector-bits-min=1024 -riscv-v-vector-bits-max=1024 < %s \ ; RUN: | FileCheck %s --check-prefix=RV32-VLEN1024 +; RUN: llc -mtriple=riscv32 -mattr=+experimental-zvvmm,+zbb < %s \ +; RUN: | FileCheck %s --check-prefix=RV32-ZBB define i32 @ime_vlen_rv32() { ; RV32-LABEL: ime_vlen_rv32: @@ -584,6 +586,119 @@ define i32 @vsetlambda_64_rv32() { ret i32 %lambda } +define i32 @vsetlambda_nonzero_runtime_rv32(i32 %x) { +; RV32-LABEL: vsetlambda_nonzero_runtime_rv32: +; RV32-NOT: call +; RV32: csrr {{.*}}, vtype +; RV32-NOT: call +; RV32: vsetvl zero, zero, +; RV32-NOT: call +; RV32: csrr a0, vtype +; RV32-NOT: call +; RV32: ret +; +; RV32-VLEN512-LABEL: vsetlambda_nonzero_runtime_rv32: +; RV32-VLEN512-NOT: call +; RV32-VLEN512: csrr {{.*}}, vtype +; RV32-VLEN512-NOT: call +; RV32-VLEN512: vsetvl zero, zero, +; RV32-VLEN512-NOT: call +; RV32-VLEN512: csrr a0, vtype +; RV32-VLEN512-NOT: call +; RV32-VLEN512: ret +; +; RV32-VLEN1024-LABEL: vsetlambda_nonzero_runtime_rv32: +; RV32-VLEN1024-NOT: call +; RV32-VLEN1024: csrr {{.*}}, vtype +; RV32-VLEN1024-NOT: call +; RV32-VLEN1024: vsetvl zero, zero, +; RV32-VLEN1024-NOT: call +; RV32-VLEN1024: csrr a0, vtype +; RV32-VLEN1024-NOT: call +; RV32-VLEN1024: ret +; +; RV32-ZBB-LABEL: vsetlambda_nonzero_runtime_rv32: +; RV32-ZBB-NOT: call +; RV32-ZBB: csrr {{.*}}, vtype +; RV32-ZBB-NOT: call +; RV32-ZBB: ctz {{.*}}, a0 +; RV32-ZBB-NOT: call +; RV32-ZBB: vsetvl zero, zero, +; RV32-ZBB-NOT: call +; RV32-ZBB: csrr a0, vtype +; RV32-ZBB-NOT: call +; RV32-ZBB: ret + %lambda = call i32 @llvm.riscv.ime.vsetlambda.nonzero.i32(i32 %x) + ret i32 %lambda +} + +define i32 @vsetlambda_runtime_split_rv32(i32 %x) { +; RV32-LABEL: vsetlambda_runtime_split_rv32: +; RV32-NOT: call +; RV32: beqz a0, [[READ:.LBB[0-9_]+]] +; RV32-NOT: call +; RV32: vsetvl zero, zero, +; RV32-NOT: call +; RV32: [[READ]]: # %cont +; RV32-NEXT: csrr a0, vtype +; RV32-NOT: vsetvl +; RV32-NOT: call +; RV32: ret +; +; RV32-VLEN512-LABEL: vsetlambda_runtime_split_rv32: +; RV32-VLEN512-NOT: call +; RV32-VLEN512: beqz a0, [[READ:.LBB[0-9_]+]] +; RV32-VLEN512-NOT: call +; RV32-VLEN512: vsetvl zero, zero, +; RV32-VLEN512-NOT: call +; RV32-VLEN512: [[READ]]: # %cont +; RV32-VLEN512-NEXT: csrr a0, vtype +; RV32-VLEN512-NOT: vsetvl +; RV32-VLEN512-NOT: call +; RV32-VLEN512: ret +; +; RV32-VLEN1024-LABEL: vsetlambda_runtime_split_rv32: +; RV32-VLEN1024-NOT: call +; RV32-VLEN1024: beqz a0, [[READ:.LBB[0-9_]+]] +; RV32-VLEN1024-NOT: call +; RV32-VLEN1024: vsetvl zero, zero, +; RV32-VLEN1024-NOT: call +; RV32-VLEN1024: [[READ]]: # %cont +; RV32-VLEN1024-NEXT: csrr a0, vtype +; RV32-VLEN1024-NOT: vsetvl +; RV32-VLEN1024-NOT: call +; RV32-VLEN1024: ret +; +; RV32-ZBB-LABEL: vsetlambda_runtime_split_rv32: +; RV32-ZBB-NOT: call +; RV32-ZBB: beqz a0, [[READ:.LBB[0-9_]+]] +; RV32-ZBB-NOT: call +; RV32-ZBB: ctz {{.*}}, a0 +; RV32-ZBB-NOT: call +; RV32-ZBB: vsetvl zero, zero, +; RV32-ZBB-NOT: call +; RV32-ZBB: [[READ]]: # %cont +; RV32-ZBB-NEXT: csrr a0, vtype +; RV32-ZBB-NOT: vsetvl +; RV32-ZBB-NOT: call +; RV32-ZBB: ret +entry: + %iszero = icmp eq i32 %x, 0 + br i1 %iszero, label %read, label %set + +read: + %readlambda = call i32 @llvm.riscv.ime.readlambda.i32() + br label %cont + +set: + %setlambda = call i32 @llvm.riscv.ime.vsetlambda.nonzero.i32(i32 %x) + br label %cont + +cont: + %lambda = phi i32 [ %readlambda, %read ], [ %setlambda, %set ] + ret i32 %lambda +} + declare i32 @llvm.riscv.ime.vlen.i32() declare i32 @llvm.riscv.ime.lambda.i32() declare i32 @llvm.riscv.ime.readlambda.i32() diff --git a/llvm/test/CodeGen/RISCV/ime-config-intrinsics-rv64.ll b/llvm/test/CodeGen/RISCV/ime-config-intrinsics-rv64.ll index 52b9cc6abbde4..cd14feaff0df4 100644 --- a/llvm/test/CodeGen/RISCV/ime-config-intrinsics-rv64.ll +++ b/llvm/test/CodeGen/RISCV/ime-config-intrinsics-rv64.ll @@ -8,6 +8,8 @@ ; RUN: llc -mtriple=riscv64 -mattr=+experimental-zvvmm \ ; RUN: -riscv-v-vector-bits-min=1024 -riscv-v-vector-bits-max=1024 < %s \ ; RUN: | FileCheck %s --check-prefix=RV64-VLEN1024 +; RUN: llc -mtriple=riscv64 -mattr=+experimental-zvvmm,+zbb < %s \ +; RUN: | FileCheck %s --check-prefix=RV64-ZBB define i64 @ime_vlen_rv64() { ; RV64-LABEL: ime_vlen_rv64: @@ -614,6 +616,119 @@ define i64 @vsetlambda_64_rv64() { ret i64 %lambda } +define i64 @vsetlambda_nonzero_runtime_rv64(i64 %x) { +; RV64-LABEL: vsetlambda_nonzero_runtime_rv64: +; RV64-NOT: call +; RV64: csrr {{.*}}, vtype +; RV64-NOT: call +; RV64: vsetvl zero, zero, +; RV64-NOT: call +; RV64: csrr a0, vtype +; RV64-NOT: call +; RV64: ret +; +; RV64-VLEN512-LABEL: vsetlambda_nonzero_runtime_rv64: +; RV64-VLEN512-NOT: call +; RV64-VLEN512: csrr {{.*}}, vtype +; RV64-VLEN512-NOT: call +; RV64-VLEN512: vsetvl zero, zero, +; RV64-VLEN512-NOT: call +; RV64-VLEN512: csrr a0, vtype +; RV64-VLEN512-NOT: call +; RV64-VLEN512: ret +; +; RV64-VLEN1024-LABEL: vsetlambda_nonzero_runtime_rv64: +; RV64-VLEN1024-NOT: call +; RV64-VLEN1024: csrr {{.*}}, vtype +; RV64-VLEN1024-NOT: call +; RV64-VLEN1024: vsetvl zero, zero, +; RV64-VLEN1024-NOT: call +; RV64-VLEN1024: csrr a0, vtype +; RV64-VLEN1024-NOT: call +; RV64-VLEN1024: ret +; +; RV64-ZBB-LABEL: vsetlambda_nonzero_runtime_rv64: +; RV64-ZBB-NOT: call +; RV64-ZBB: csrr {{.*}}, vtype +; RV64-ZBB-NOT: call +; RV64-ZBB: ctz {{.*}}, a0 +; RV64-ZBB-NOT: call +; RV64-ZBB: vsetvl zero, zero, +; RV64-ZBB-NOT: call +; RV64-ZBB: csrr a0, vtype +; RV64-ZBB-NOT: call +; RV64-ZBB: ret + %lambda = call i64 @llvm.riscv.ime.vsetlambda.nonzero.i64(i64 %x) + ret i64 %lambda +} + +define i64 @vsetlambda_runtime_split_rv64(i64 %x) { +; RV64-LABEL: vsetlambda_runtime_split_rv64: +; RV64-NOT: call +; RV64: beqz a0, [[READ:.LBB[0-9_]+]] +; RV64-NOT: call +; RV64: vsetvl zero, zero, +; RV64-NOT: call +; RV64: [[READ]]: # %cont +; RV64-NEXT: csrr a0, vtype +; RV64-NOT: vsetvl +; RV64-NOT: call +; RV64: ret +; +; RV64-VLEN512-LABEL: vsetlambda_runtime_split_rv64: +; RV64-VLEN512-NOT: call +; RV64-VLEN512: beqz a0, [[READ:.LBB[0-9_]+]] +; RV64-VLEN512-NOT: call +; RV64-VLEN512: vsetvl zero, zero, +; RV64-VLEN512-NOT: call +; RV64-VLEN512: [[READ]]: # %cont +; RV64-VLEN512-NEXT: csrr a0, vtype +; RV64-VLEN512-NOT: vsetvl +; RV64-VLEN512-NOT: call +; RV64-VLEN512: ret +; +; RV64-VLEN1024-LABEL: vsetlambda_runtime_split_rv64: +; RV64-VLEN1024-NOT: call +; RV64-VLEN1024: beqz a0, [[READ:.LBB[0-9_]+]] +; RV64-VLEN1024-NOT: call +; RV64-VLEN1024: vsetvl zero, zero, +; RV64-VLEN1024-NOT: call +; RV64-VLEN1024: [[READ]]: # %cont +; RV64-VLEN1024-NEXT: csrr a0, vtype +; RV64-VLEN1024-NOT: vsetvl +; RV64-VLEN1024-NOT: call +; RV64-VLEN1024: ret +; +; RV64-ZBB-LABEL: vsetlambda_runtime_split_rv64: +; RV64-ZBB-NOT: call +; RV64-ZBB: beqz a0, [[READ:.LBB[0-9_]+]] +; RV64-ZBB-NOT: call +; RV64-ZBB: ctz {{.*}}, a0 +; RV64-ZBB-NOT: call +; RV64-ZBB: vsetvl zero, zero, +; RV64-ZBB-NOT: call +; RV64-ZBB: [[READ]]: # %cont +; RV64-ZBB-NEXT: csrr a0, vtype +; RV64-ZBB-NOT: vsetvl +; RV64-ZBB-NOT: call +; RV64-ZBB: ret +entry: + %iszero = icmp eq i64 %x, 0 + br i1 %iszero, label %read, label %set + +read: + %readlambda = call i64 @llvm.riscv.ime.readlambda.i64() + br label %cont + +set: + %setlambda = call i64 @llvm.riscv.ime.vsetlambda.nonzero.i64(i64 %x) + br label %cont + +cont: + %lambda = phi i64 [ %readlambda, %read ], [ %setlambda, %set ] + ret i64 %lambda +} + declare i64 @llvm.riscv.ime.vlen.i64() declare i64 @llvm.riscv.ime.lambda.i64() declare i64 @llvm.riscv.ime.readlambda.i64() _______________________________________________ cfe-commits mailing list [email protected] https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits
