On Wed, 2026-05-20 at 14:35 -0300, Daniel Henrique Barboza wrote:
> We're missing the "Expanded Instruction-Length Encoding" logic from
> the
> unpriv isa, meaning we're not detecting missing insn len > 8 bytes.
> This doesn't have impact in regular insn emulation because most (if
> not
> all) of the existing insns are 2 or 4 bytes.
>
> However, running with "-d in_asm" will cause QEMU to run an infinite
> loop because our disas code can't handle the expanded length
> encoding,
> causing inst_length() to return '0' and target_disas() to loop
> forever
> since it's using the len to decrease the loop counter.
>
> Fixing just the disas code isn't enough. target_disas() will do a
>
> "if (size < count) {"
>
> check, where size = the insn length from the translator and count =
> the
> insn length from disas, and will warn about it during disassembling.
> We
> need both places to use the same insn length logic.
>
> [1] https://gitlab.com/qemu-project/qemu/-/work_items/3479
>
> Resolves: https://gitlab.com/qemu-project/qemu/-/work_items/3479
> Signed-off-by: Daniel Henrique Barboza
> <[email protected]>
> ---
> disas/riscv.c | 50 +++++++++++++++++++++++++++++---------
> --
> target/riscv/internals.h | 33 ++++++++++++++++++++++++--
> target/riscv/translate.c | 7 +++++-
> 3 files changed, 73 insertions(+), 17 deletions(-)
>
> diff --git a/disas/riscv.c b/disas/riscv.c
> index d416a4d6b3..d32661c857 100644
> --- a/disas/riscv.c
> +++ b/disas/riscv.c
> @@ -5057,26 +5057,48 @@ static bool check_constraints(rv_decode *dec,
> const rvc_constraint *c)
> return true;
> }
>
> -/* instruction length */
> -
> +/*
> + * Note: basically a carbon copy of insn_len() from
> + * target/riscv/internals.h.
> + */
> static size_t inst_length(rv_inst inst)
> {
> - /* NOTE: supports maximum instruction size of 64-bits */
> -
> /*
> - * instruction length coding
> + * "Expanded Instruction-Length Encoding" as in
> + * unpriv isa section 1.5.1.
This section was removed from the manual with commit
commit 4f05ffef4987213ddfb78b3e3aaf91b67892a664
Author: Andrew Waterman <[email protected]>
Date: Tue Apr 1 18:02:36 2025 -0700
Remove proposal for longer instruction encodings
This is pursuant to making the manual contain only ratified
content.
So I don't think we should reference it. We should probably drop
support for these as who knows what the ratified version will look like
Alistair
> *
> - * aa - 16 bit aa != 11
> - * bbb11 - 32 bit bbb != 111
> - * 011111 - 48 bit
> - * 0111111 - 64 bit
> + * aa - 16 bit aa != 11
> + * bbb11 - 32 bit bbb != 111
> + * 011111 - 48 bit
> + * 0111111 - 64 bit
> + * xnnnxxxxx1111111 - (80 + 16*nnn) bits, if nnn != 111
> + * x111xxxxx1111111 - 192 bits
> */
> + if ((inst & 0b11) != 0b11) {
> + return 2;
> + } else if ((inst & 0b11100) != 0b11100) {
> + return 4;
> + } else if ((inst & 0b111111) == 0b011111) {
> + return 6;
> + } else if ((inst & 0b1111111) == 0b0111111) {
> + return 8;
> + } else if ((inst & 0b1111111) == 0b1111111) {
> + uint32_t nnn = (inst >> 12) & 0b0111;
> +
> + if (nnn == 0b111) {
> + return 24;
> + }
> + return (16 * nnn + 80) / 8;
> + }
>
> - return (inst & 0b11) != 0b11 ? 2
> - : (inst & 0b11100) != 0b11100 ? 4
> - : (inst & 0b111111) == 0b011111 ? 6
> - : (inst & 0b1111111) == 0b0111111 ? 8
> - : 0;
> + /*
> + * Returning 0 if we don't find the right insn length will
> + * cause an infinite loop in target_disas(). Return an
> + * unrealistic length value instead, making target_disas()
> + * trigger the "Disassembler disagrees with translator over
> + * instruction" error path.
> + */
> + return 1024;
> }
>
> /* format instruction */
> diff --git a/target/riscv/internals.h b/target/riscv/internals.h
> index 8c24af0d85..80c2f8f5f8 100644
> --- a/target/riscv/internals.h
> +++ b/target/riscv/internals.h
> @@ -245,9 +245,38 @@ static inline target_ulong
> adjust_addr_virt(CPURISCVState *env,
> return adjust_addr_body(env, addr, true);
> }
>
> -static inline int insn_len(uint16_t first_word)
> +static inline int insn_len(uint32_t opcode)
> {
> - return (first_word & 3) == 3 ? 4 : 2;
> + /*
> + * "Expanded Instruction-Length Encoding" as in
> + * unpriv isa section 1.5.1.
> + *
> + * aa - 16 bit aa != 11
> + * bbb11 - 32 bit bbb != 111
> + * 011111 - 48 bit
> + * 0111111 - 64 bit
> + * xnnnxxxxx1111111 - (80 + 16*nnn) bits, if nnn != 111
> + * x111xxxxx1111111 - 192 bits
> + */
> + if ((opcode & 0b11) != 0b11) {
> + return 2;
> + } else if ((opcode & 0b11100) != 0b11100) {
> + return 4;
> + } else if ((opcode & 0b111111) == 0b011111) {
> + return 6;
> + } else if ((opcode & 0b1111111) == 0b0111111) {
> + return 8;
> + } else if ((opcode & 0b1111111) == 0b1111111) {
> + uint32_t nnn = (opcode >> 12) & 0b0111;
> +
> + if (nnn == 0b111) {
> + return 24;
> + }
> + return (16 * nnn + 80) / 8;
> + }
> +
> + /* Shouldn't happen, but ... */
> + return -1;
> }
>
> int riscv_monitor_get_register_legacy(CPUState *cs, const char
> *name,
> diff --git a/target/riscv/translate.c b/target/riscv/translate.c
> index 1e4f340256..bd28ed3fcb 100644
> --- a/target/riscv/translate.c
> +++ b/target/riscv/translate.c
> @@ -1255,6 +1255,7 @@ static void decode_opc(CPURISCVState *env,
> DisasContext *ctx)
> {
> uint32_t opcode;
> bool pc_is_4byte_align = ((ctx->base.pc_next % 4) == 0);
> + int insn_length;
>
> ctx->virt_inst_excp = false;
> if (pc_is_4byte_align) {
> @@ -1279,7 +1280,11 @@ static void decode_opc(CPURISCVState *env,
> DisasContext *ctx)
> }
> ctx->ol = ctx->xl;
>
> - ctx->cur_insn_len = insn_len((uint16_t)opcode);
> + insn_length = insn_len(opcode);
> + if (insn_length < 0) {
> + gen_exception_illegal(ctx);
> + }
> + ctx->cur_insn_len = insn_length;
> /* Check for compressed insn */
> if (ctx->cur_insn_len == 2) {
> ctx->opcode = (uint16_t)opcode;