On 5/26/2026 9:35 PM, Alistair Francis wrote:
On Wed, 2026-05-20 at 14:35 -0300, Daniel Henrique Barboza wrote:
We're missing the "Expanded Instruction-Length Encoding" logic from
the
unpriv isa, meaning we're not detecting missing insn len > 8 bytes.
This doesn't have impact in regular insn emulation because most (if
not
all) of the existing insns are 2 or 4 bytes.

However, running with "-d in_asm" will cause QEMU to run an infinite
loop because our disas code can't handle the expanded length
encoding,
causing inst_length() to return '0' and target_disas() to loop
forever
since it's using the len to decrease the loop counter.

Fixing just the disas code isn't enough.  target_disas() will do a

"if (size < count) {"

check, where size = the insn length from the translator and count =
the
insn length from disas, and will warn about it during disassembling.
We
need both places to use the same insn length logic.

[1] https://gitlab.com/qemu-project/qemu/-/work_items/3479

Resolves: https://gitlab.com/qemu-project/qemu/-/work_items/3479
Signed-off-by: Daniel Henrique Barboza
<[email protected]>
---
  disas/riscv.c            | 50 +++++++++++++++++++++++++++++---------
--
  target/riscv/internals.h | 33 ++++++++++++++++++++++++--
  target/riscv/translate.c |  7 +++++-
  3 files changed, 73 insertions(+), 17 deletions(-)

diff --git a/disas/riscv.c b/disas/riscv.c
index d416a4d6b3..d32661c857 100644
--- a/disas/riscv.c
+++ b/disas/riscv.c
@@ -5057,26 +5057,48 @@ static bool check_constraints(rv_decode *dec,
const rvc_constraint *c)
      return true;
  }
-/* instruction length */
-
+/*
+ * Note: basically a carbon copy of insn_len() from
+ * target/riscv/internals.h.
+ */
  static size_t inst_length(rv_inst inst)
  {
-    /* NOTE: supports maximum instruction size of 64-bits */
-
      /*
-     * instruction length coding
+     * "Expanded Instruction-Length Encoding" as in
+     * unpriv isa section 1.5.1.

This section was removed from the manual with commit

commit 4f05ffef4987213ddfb78b3e3aaf91b67892a664
Author: Andrew Waterman <[email protected]>
Date:   Tue Apr 1 18:02:36 2025 -0700

     Remove proposal for longer instruction encodings
This is pursuant to making the manual contain only ratified
content.


So I don't think we should reference it. We should probably drop
support for these as who knows what the ratified version will look like


Ooohhh good call. In the official RVI link:


https://docs.riscv.org/reference/isa/unpriv/intro.html#1-1-5-base-instruction-length-encoding


"All the 32-bit instructions in the base ISA have their lowest two bits set to 
11.
The optional compressed 16-bit instruction-set extensions have their lowest two
bits equal to 00, 01, or 10."

So we'll support either 16 or 32 bits lengths only.  I'll send a v2.


Thanks,
Daniel



Alistair

       *
-     *      aa - 16 bit aa != 11
-     *   bbb11 - 32 bit bbb != 111
-     *  011111 - 48 bit
-     * 0111111 - 64 bit
+     *               aa - 16 bit aa != 11
+     *            bbb11 - 32 bit bbb != 111
+     *           011111 - 48 bit
+     *          0111111 - 64 bit
+     * xnnnxxxxx1111111 - (80 + 16*nnn) bits, if nnn != 111
+     * x111xxxxx1111111 - 192 bits
       */
+    if ((inst & 0b11) != 0b11) {
+        return 2;
+    } else if ((inst & 0b11100) != 0b11100) {
+        return 4;
+    } else if ((inst & 0b111111) == 0b011111) {
+        return 6;
+    } else if ((inst & 0b1111111) == 0b0111111) {
+        return 8;
+    } else if ((inst & 0b1111111) == 0b1111111) {
+        uint32_t nnn = (inst >> 12) & 0b0111;
+
+        if (nnn == 0b111) {
+            return 24;
+        }
+        return (16 * nnn + 80) / 8;
+    }
-    return (inst &      0b11) != 0b11      ? 2
-         : (inst &   0b11100) != 0b11100   ? 4
-         : (inst &  0b111111) == 0b011111  ? 6
-         : (inst & 0b1111111) == 0b0111111 ? 8
-         : 0;
+    /*
+     * Returning 0 if we don't find the right insn length will
+     * cause an infinite loop in target_disas().  Return an
+     * unrealistic length value instead, making target_disas()
+     * trigger the "Disassembler disagrees with translator over
+     * instruction" error path.
+     */
+    return 1024;
  }
 /* format instruction */
diff --git a/target/riscv/internals.h b/target/riscv/internals.h
index 8c24af0d85..80c2f8f5f8 100644
--- a/target/riscv/internals.h
+++ b/target/riscv/internals.h
@@ -245,9 +245,38 @@ static inline target_ulong
adjust_addr_virt(CPURISCVState *env,
      return adjust_addr_body(env, addr, true);
  }
-static inline int insn_len(uint16_t first_word)
+static inline int insn_len(uint32_t opcode)
  {
-    return (first_word & 3) == 3 ? 4 : 2;
+    /*
+     * "Expanded Instruction-Length Encoding" as in
+     * unpriv isa section 1.5.1.
+     *
+     *               aa - 16 bit aa != 11
+     *            bbb11 - 32 bit bbb != 111
+     *           011111 - 48 bit
+     *          0111111 - 64 bit
+     * xnnnxxxxx1111111 - (80 + 16*nnn) bits, if nnn != 111
+     * x111xxxxx1111111 - 192 bits
+     */
+    if ((opcode & 0b11) != 0b11) {
+        return 2;
+    } else if ((opcode & 0b11100) != 0b11100) {
+        return 4;
+    } else if ((opcode & 0b111111) == 0b011111) {
+        return 6;
+    } else if ((opcode & 0b1111111) == 0b0111111) {
+        return 8;
+    } else if ((opcode & 0b1111111) == 0b1111111) {
+        uint32_t nnn = (opcode >> 12) & 0b0111;
+
+        if (nnn == 0b111) {
+            return 24;
+        }
+        return (16 * nnn + 80) / 8;
+    }
+
+    /* Shouldn't happen, but ... */
+    return -1;
  }
 int riscv_monitor_get_register_legacy(CPUState *cs, const char
*name,
diff --git a/target/riscv/translate.c b/target/riscv/translate.c
index 1e4f340256..bd28ed3fcb 100644
--- a/target/riscv/translate.c
+++ b/target/riscv/translate.c
@@ -1255,6 +1255,7 @@ static void decode_opc(CPURISCVState *env,
DisasContext *ctx)
  {
      uint32_t opcode;
      bool pc_is_4byte_align = ((ctx->base.pc_next % 4) == 0);
+    int insn_length;
     ctx->virt_inst_excp = false;
      if (pc_is_4byte_align) {
@@ -1279,7 +1280,11 @@ static void decode_opc(CPURISCVState *env,
DisasContext *ctx)
      }
      ctx->ol = ctx->xl;
-    ctx->cur_insn_len = insn_len((uint16_t)opcode);
+    insn_length = insn_len(opcode);
+    if (insn_length < 0) {
+        gen_exception_illegal(ctx);
+    }
+    ctx->cur_insn_len = insn_length;
      /* Check for compressed insn */
      if (ctx->cur_insn_len == 2) {
          ctx->opcode = (uint16_t)opcode;


Reply via email to