On 5/28/26 09:46, Paolo Bonzini wrote:
RCR and RCL instructions with a count of 9 are the same as if the
count was 0, but they generated incorrect code because the can_be_zero
flag is false.  This causes 0 to underflow into -1 at
tcg_gen_subi_tl(count, count, 1).

Fix by absorbind the call to gen_shift_count() into gen_rotc_mod(),
so that the new function handles both mask and mod.

Resolves: https://gitlab.com/qemu-project/qemu/-/work_items/3452
Signed-off-by: Paolo Bonzini <[email protected]>
---
  target/i386/tcg/emit.c.inc | 85 +++++++++++++++++++++++---------------
  1 file changed, 52 insertions(+), 33 deletions(-)

diff --git a/target/i386/tcg/emit.c.inc b/target/i386/tcg/emit.c.inc
index ce636b6c56c..c6b0be1f60d 100644
--- a/target/i386/tcg/emit.c.inc
+++ b/target/i386/tcg/emit.c.inc
@@ -3244,12 +3244,10 @@ static void gen_PUSHF(DisasContext *s, X86DecodedInsn 
*decode)
      assume_cc_op(s, CC_OP_EFLAGS);
  }
-static MemOp gen_shift_count(DisasContext *s, X86DecodedInsn *decode,
-                             bool *can_be_zero, TCGv *count, int unit)
+static void gen_shift_count_1(DisasContext *s, X86DecodedInsn *decode,
+                              bool *can_be_zero, TCGv *count, int unit,
+                              int mask)
  {
-    MemOp ot = decode->op[0].ot;
-    int mask = (ot <= MO_32 ? 0x1f : 0x3f);
-
      *can_be_zero = false;
      switch (unit) {
      case X86_OP_INT:
@@ -3259,12 +3257,17 @@ static MemOp gen_shift_count(DisasContext *s, 
X86DecodedInsn *decode,
          break;
case X86_OP_IMM:
-        if ((decode->immediate & mask) == 0) {
+        /*
+         * The caller applied the mask (this is awkward; unfortunately the
+         * mask must be applied *before* the modulo operation for RCL/RCR,
+         * and the modulo operation must be before this check for zero).
+         */
+        if (decode->immediate == 0) {

Perhaps clearer to pass down mod and perform all of the arithmetic in here.

+static MemOp gen_shift_count(DisasContext *s, X86DecodedInsn *decode,
+                             bool *can_be_zero, TCGv *count, int unit)
+{
+    MemOp ot = decode->op[0].ot;
+    int mask = (ot <= MO_32 ? 0x1f : 0x3f);
+
+    if (unit == X86_OP_IMM) {
+        decode->immediate &= mask;
+    }
+
+    gen_shift_count_1(s, decode, can_be_zero, count, unit, mask);

Here you could pass mod = 64, arbitrarily large, so that we could keep the mask computation common within gen_shift_count_1.

-static void gen_rotc_mod(MemOp ot, TCGv count)
+static MemOp gen_rotc_count(DisasContext *s, X86DecodedInsn *decode,
+                            bool *can_be_zero, TCGv *count, int unit)
  {
      TCGv temp;
+    MemOp ot = decode->op[0].ot;
+    int mod = (8 << ot) + 1;
+    int mask = (ot <= MO_32 ? 0x1f : 0x3f);
- switch (ot) {
-    case MO_8:
-        temp = tcg_temp_new();
-        tcg_gen_subi_tl(temp, count, 18);
-        tcg_gen_movcond_tl(TCG_COND_GE, count, temp, tcg_constant_tl(0), temp, 
count);
-        tcg_gen_subi_tl(temp, count, 9);
-        tcg_gen_movcond_tl(TCG_COND_GE, count, temp, tcg_constant_tl(0), temp, 
count);
-        break;
-
-    case MO_16:
-        temp = tcg_temp_new();
-        tcg_gen_subi_tl(temp, count, 17);
-        tcg_gen_movcond_tl(TCG_COND_GE, count, temp, tcg_constant_tl(0), temp, 
count);
-        break;
-
-    default:
-        break;
+    /*
+     * RCx operations are invariant modulo 8*operand_size+1.  For 8 and 16-bit
+     * operands, this is less than 0x1f (the mask applied by gen_shift_count)
+     * so reduce further.  Failure to do so results in incorrect shifts
+     * in gen_RCL/gen_RCR.
+     */
+    if (unit == X86_OP_IMM) {
+        decode->immediate &= mask;
+        decode->immediate %= mod;
      }
+
+    gen_shift_count_1(s, decode, can_be_zero, count, unit, mask);
+
+    if (unit == X86_OP_INT && mod < mask) {

And it would avoid having to have code split across these two functions in odd ways, and reusing the switch within gen_shift_count_1.

+        temp = tcg_temp_new();
+        if (mod * 2 < mask) {
+            tcg_gen_subi_tl(temp, *count, mod * 2);
+            tcg_gen_movcond_tl(TCG_COND_GE, *count, temp, tcg_constant_tl(0), 
temp, *count);
+        }
+        tcg_gen_subi_tl(temp, *count, mod);
+        tcg_gen_movcond_tl(TCG_COND_GE, *count, temp, tcg_constant_tl(0), 
temp, *count);
+    }

Perhaps worth assert(mask < mod * 4), for documentation's sake?


r~

Reply via email to