On 5/28/26 09:46, Paolo Bonzini wrote:
RCR and RCL instructions with a count of 9 are the same as if the
count was 0, but they generated incorrect code because the can_be_zero
flag is false. This causes 0 to underflow into -1 at
tcg_gen_subi_tl(count, count, 1).
Fix by absorbind the call to gen_shift_count() into gen_rotc_mod(),
so that the new function handles both mask and mod.
Resolves: https://gitlab.com/qemu-project/qemu/-/work_items/3452
Signed-off-by: Paolo Bonzini <[email protected]>
---
target/i386/tcg/emit.c.inc | 85 +++++++++++++++++++++++---------------
1 file changed, 52 insertions(+), 33 deletions(-)
diff --git a/target/i386/tcg/emit.c.inc b/target/i386/tcg/emit.c.inc
index ce636b6c56c..c6b0be1f60d 100644
--- a/target/i386/tcg/emit.c.inc
+++ b/target/i386/tcg/emit.c.inc
@@ -3244,12 +3244,10 @@ static void gen_PUSHF(DisasContext *s, X86DecodedInsn
*decode)
assume_cc_op(s, CC_OP_EFLAGS);
}
-static MemOp gen_shift_count(DisasContext *s, X86DecodedInsn *decode,
- bool *can_be_zero, TCGv *count, int unit)
+static void gen_shift_count_1(DisasContext *s, X86DecodedInsn *decode,
+ bool *can_be_zero, TCGv *count, int unit,
+ int mask)
{
- MemOp ot = decode->op[0].ot;
- int mask = (ot <= MO_32 ? 0x1f : 0x3f);
-
*can_be_zero = false;
switch (unit) {
case X86_OP_INT:
@@ -3259,12 +3257,17 @@ static MemOp gen_shift_count(DisasContext *s,
X86DecodedInsn *decode,
break;
case X86_OP_IMM:
- if ((decode->immediate & mask) == 0) {
+ /*
+ * The caller applied the mask (this is awkward; unfortunately the
+ * mask must be applied *before* the modulo operation for RCL/RCR,
+ * and the modulo operation must be before this check for zero).
+ */
+ if (decode->immediate == 0) {
Perhaps clearer to pass down mod and perform all of the arithmetic in here.
+static MemOp gen_shift_count(DisasContext *s, X86DecodedInsn *decode,
+ bool *can_be_zero, TCGv *count, int unit)
+{
+ MemOp ot = decode->op[0].ot;
+ int mask = (ot <= MO_32 ? 0x1f : 0x3f);
+
+ if (unit == X86_OP_IMM) {
+ decode->immediate &= mask;
+ }
+
+ gen_shift_count_1(s, decode, can_be_zero, count, unit, mask);
Here you could pass mod = 64, arbitrarily large, so that we could keep the mask
computation common within gen_shift_count_1.
-static void gen_rotc_mod(MemOp ot, TCGv count)
+static MemOp gen_rotc_count(DisasContext *s, X86DecodedInsn *decode,
+ bool *can_be_zero, TCGv *count, int unit)
{
TCGv temp;
+ MemOp ot = decode->op[0].ot;
+ int mod = (8 << ot) + 1;
+ int mask = (ot <= MO_32 ? 0x1f : 0x3f);
- switch (ot) {
- case MO_8:
- temp = tcg_temp_new();
- tcg_gen_subi_tl(temp, count, 18);
- tcg_gen_movcond_tl(TCG_COND_GE, count, temp, tcg_constant_tl(0), temp,
count);
- tcg_gen_subi_tl(temp, count, 9);
- tcg_gen_movcond_tl(TCG_COND_GE, count, temp, tcg_constant_tl(0), temp,
count);
- break;
-
- case MO_16:
- temp = tcg_temp_new();
- tcg_gen_subi_tl(temp, count, 17);
- tcg_gen_movcond_tl(TCG_COND_GE, count, temp, tcg_constant_tl(0), temp,
count);
- break;
-
- default:
- break;
+ /*
+ * RCx operations are invariant modulo 8*operand_size+1. For 8 and 16-bit
+ * operands, this is less than 0x1f (the mask applied by gen_shift_count)
+ * so reduce further. Failure to do so results in incorrect shifts
+ * in gen_RCL/gen_RCR.
+ */
+ if (unit == X86_OP_IMM) {
+ decode->immediate &= mask;
+ decode->immediate %= mod;
}
+
+ gen_shift_count_1(s, decode, can_be_zero, count, unit, mask);
+
+ if (unit == X86_OP_INT && mod < mask) {
And it would avoid having to have code split across these two functions in odd ways, and
reusing the switch within gen_shift_count_1.
+ temp = tcg_temp_new();
+ if (mod * 2 < mask) {
+ tcg_gen_subi_tl(temp, *count, mod * 2);
+ tcg_gen_movcond_tl(TCG_COND_GE, *count, temp, tcg_constant_tl(0),
temp, *count);
+ }
+ tcg_gen_subi_tl(temp, *count, mod);
+ tcg_gen_movcond_tl(TCG_COND_GE, *count, temp, tcg_constant_tl(0),
temp, *count);
+ }
Perhaps worth assert(mask < mod * 4), for documentation's sake?
r~