On Sat, Aug 6, 2011 at 8:33 PM, Stefan Weil <w...@mail.berlios.de> wrote: > Am 06.08.2011 22:13, schrieb Stefan Weil: >> >> Am 06.08.2011 16:06, schrieb Blue Swirl: >>> >>> Copy propagation introduced in 22613af4a6d9602001e6d0e7b6d98aa40aa018dc >>> considered only global registers. However, register temps and stack >>> allocated locals must be handled differently because register temps >>> don't survive across brcond. >>> >>> Fix by propagating only within same class of temps. >>> >>> Signed-off-by: Blue Swirl <blauwir...@gmail.com> >>> --- >>> tcg/optimize.c | 13 +++++++------ >>> tcg/tcg.h | 5 +++++ >>> 2 files changed, 12 insertions(+), 6 deletions(-) >>> >>> diff --git a/tcg/optimize.c b/tcg/optimize.c >>> index a3bfa5e..748ecf9 100644 >>> --- a/tcg/optimize.c >>> +++ b/tcg/optimize.c >>> @@ -185,12 +185,13 @@ static int op_to_movi(int op) >>> } >>> } >>> >>> -static void tcg_opt_gen_mov(TCGArg *gen_args, TCGArg dst, TCGArg src, >>> - int nb_temps, int nb_globals) >>> +static void tcg_opt_gen_mov(TCGContext *s, TCGArg *gen_args, TCGArg dst, >>> + TCGArg src, int nb_temps, int nb_globals) >>> { >>> reset_temp(dst, nb_temps, nb_globals); >>> assert(temps[src].state != TCG_TEMP_COPY); >>> - if (src >= nb_globals) { >>> + if (src >= nb_globals && >>> + tcg_arg_is_local(s, src) == tcg_arg_is_local(s, dst)) { >>> assert(temps[src].state != TCG_TEMP_CONST); >> >> [snip] >> >> Hi Blue, >> >> your patch fixes qemu-system-x86_64 which now seems to work on 32 bit >> hosts, too. >> >> qemu-system-mips64(el) still fail with the same abort. They work when I >> remove the >> if block in tcg_opt_gen_mov. >> >> The Debian kernel for qemu-system-mips64 which I used for the test is >> available on >> http://qemu.weilnetz.de/mips64/. >> >> I could not reproduce the crash with qemu-system-ppc64 - neither with nor >> without >> your patch. >> >> Kind regards, >> Stefan > > The patch works with qemu-system-mips64(el) after a small modification: > > if (src >= nb_globals && > tcg_arg_is_local(s, src) && tcg_arg_is_local(s, dst)) {
This removes the optimization completely for register temps. I think there is a better solution. The problem is similar to x86_64: IN: free_area_init_nodes 0xffffffff806004d8: sw v0,21664(s5) 0xffffffff806004dc: lw v1,0(s0) 0xffffffff806004e0: lui v0,0x8062 0xffffffff806004e4: sw v1,21676(v0) 0xffffffff806004e8: sw v1,4(s8) 0xffffffff806004ec: lw a0,4(s0) 0xffffffff806004f0: move a1,zero 0xffffffff806004f4: sltu v0,a0,v1 0xffffffff806004f8: movz v1,a0,v0 0xffffffff806004fc: lui v0,0x8062 0xffffffff80600500: addiu s4,v0,21676 0xffffffff80600504: lui v0,0x8062 0xffffffff80600508: sw v1,4(s4) 0xffffffff8060050c: addiu a0,v0,21688 0xffffffff80600510: li a2,4 0xffffffff80600514: sw zero,8(s8) 0xffffffff80600518: jal 0xffffffff80104000 0xffffffff8060051c: sw zero,8(s4) OP: ---- 0xffffffff806004d8 movi_i32 tmp0,$0x54a0 movi_i32 tmp1,$0x0 add2_i32 tmp0,tmp1,s5_0,s5_1,tmp0,tmp1 mov_i32 tmp2,v0_0 mov_i32 tmp3,v0_1 qemu_st32 tmp2,tmp0,tmp1,$0x0 ---- 0xffffffff806004dc mov_i32 tmp2,s0_0 mov_i32 tmp3,s0_1 qemu_ld32 tmp2,tmp2,tmp3,$0x0 movi_i32 tmp4,$0x1f sar_i32 tmp3,tmp2,tmp4 mov_i32 v1_0,tmp2 mov_i32 v1_1,tmp3 ---- 0xffffffff806004e0 movi_i32 v0_0,$0x80620000 movi_i32 v0_1,$0xffffffff ---- 0xffffffff806004e4 movi_i32 tmp0,$0x54ac movi_i32 tmp1,$0x0 add2_i32 tmp0,tmp1,v0_0,v0_1,tmp0,tmp1 mov_i32 tmp2,v1_0 mov_i32 tmp3,v1_1 qemu_st32 tmp2,tmp0,tmp1,$0x0 ---- 0xffffffff806004e8 movi_i32 tmp2,$0x4 movi_i32 tmp3,$0x0 add2_i32 tmp2,tmp3,s8_0,s8_1,tmp2,tmp3 mov_i32 tmp0,v1_0 mov_i32 tmp1,v1_1 qemu_st32 tmp0,tmp2,tmp3,$0x0 ---- 0xffffffff806004ec movi_i32 tmp0,$0x4 movi_i32 tmp1,$0x0 add2_i32 tmp0,tmp1,s0_0,s0_1,tmp0,tmp1 qemu_ld32 tmp0,tmp0,tmp1,$0x0 movi_i32 tmp4,$0x1f sar_i32 tmp1,tmp0,tmp4 mov_i32 a0_0,tmp0 mov_i32 a0_1,tmp1 ---- 0xffffffff806004f0 movi_i32 a1_0,$0x0 movi_i32 a1_1,$0x0 ---- 0xffffffff806004f4 mov_i32 tmp2,a0_0 mov_i32 tmp3,a0_1 mov_i32 tmp0,v1_0 mov_i32 tmp1,v1_1 setcond2_i32 v0_0,tmp2,tmp3,tmp0,tmp1,ltu movi_i32 v0_1,$0x0 ---- 0xffffffff806004f8 movi_i32 tmp0,$0x0 movi_i32 tmp1,$0x0 brcond2_i32 v0_0,v0_1,tmp0,tmp1,ne,$0x0 mov_i32 v1_0,a0_0 mov_i32 v1_1,a0_1 set_label $0x0 Here a0 locals are used after brcond. ---- 0xffffffff806004fc movi_i32 v0_0,$0x80620000 movi_i32 v0_1,$0xffffffff ---- 0xffffffff80600500 movi_i32 tmp0,$0x54ac movi_i32 tmp1,$0x0 add2_i32 s4_0,s4_1,v0_0,v0_1,tmp0,tmp1 movi_i32 tmp4,$0x1f sar_i32 s4_1,s4_0,tmp4 ---- 0xffffffff80600504 movi_i32 v0_0,$0x80620000 movi_i32 v0_1,$0xffffffff ---- 0xffffffff80600508 movi_i32 tmp0,$0x4 movi_i32 tmp1,$0x0 add2_i32 tmp0,tmp1,s4_0,s4_1,tmp0,tmp1 mov_i32 tmp2,v1_0 mov_i32 tmp3,v1_1 qemu_st32 tmp2,tmp0,tmp1,$0x0 ---- 0xffffffff8060050c movi_i32 tmp2,$0x54b8 movi_i32 tmp3,$0x0 add2_i32 a0_0,a0_1,v0_0,v0_1,tmp2,tmp3 movi_i32 tmp4,$0x1f sar_i32 a0_1,a0_0,tmp4 ---- 0xffffffff80600510 movi_i32 a2_0,$0x4 movi_i32 a2_1,$0x0 ---- 0xffffffff80600514 movi_i32 tmp2,$0x8 movi_i32 tmp3,$0x0 add2_i32 tmp2,tmp3,s8_0,s8_1,tmp2,tmp3 movi_i32 tmp0,$0x0 movi_i32 tmp1,$0x0 qemu_st32 tmp0,tmp2,tmp3,$0x0 ---- 0xffffffff80600518 movi_i32 ra_0,$0x80600520 movi_i32 ra_1,$0xffffffff ---- 0xffffffff8060051c movi_i32 tmp2,$0x8 movi_i32 tmp3,$0x0 add2_i32 tmp2,tmp3,s4_0,s4_1,tmp2,tmp3 movi_i32 tmp0,$0x0 movi_i32 tmp1,$0x0 movi_i32 hflags,$0x10898 movi_i32 btarget_0,$0x80104000 movi_i32 btarget_1,$0xffffffff qemu_st32 tmp0,tmp2,tmp3,$0x0 movi_i32 hflags,$0x98 movi_i32 PC_0,$0x80104000 movi_i32 PC_1,$0xffffffff exit_tb $0x0 OP after liveness analysis: ---- 0xffffffff806004d8 movi_i32 tmp0,$0x54a0 movi_i32 tmp1,$0x0 add2_i32 tmp0,tmp1,s5_0,s5_1,tmp0,tmp1 mov_i32 tmp2,v0_0 nopn $0x2,$0x2 qemu_st32 tmp2,tmp0,tmp1,$0x0 ---- 0xffffffff806004dc mov_i32 tmp2,s0_0 mov_i32 tmp3,s0_1 qemu_ld32 tmp2,tmp2,tmp3,$0x0 movi_i32 tmp4,$0x1f sar_i32 tmp3,tmp2,tmp4 mov_i32 v1_0,tmp2 mov_i32 v1_1,tmp3 ---- 0xffffffff806004e0 movi_i32 v0_0,$0x80620000 movi_i32 v0_1,$0xffffffff ---- 0xffffffff806004e4 movi_i32 tmp0,$0x54ac movi_i32 tmp1,$0x0 add2_i32 tmp0,tmp1,v0_0,v0_1,tmp0,tmp1 nop nop qemu_st32 tmp2,tmp0,tmp1,$0x0 ---- 0xffffffff806004e8 movi_i32 tmp2,$0x4 movi_i32 tmp3,$0x0 add2_i32 tmp2,tmp3,s8_0,s8_1,tmp2,tmp3 mov_i32 tmp0,v1_0 nopn $0x2,$0x2 qemu_st32 tmp0,tmp2,tmp3,$0x0 ---- 0xffffffff806004ec movi_i32 tmp0,$0x4 movi_i32 tmp1,$0x0 add2_i32 tmp0,tmp1,s0_0,s0_1,tmp0,tmp1 qemu_ld32 tmp0,tmp0,tmp1,$0x0 movi_i32 tmp4,$0x1f sar_i32 tmp1,tmp0,tmp4 mov_i32 a0_0,tmp0 mov_i32 a0_1,tmp1 ---- 0xffffffff806004f0 movi_i32 a1_0,$0x0 movi_i32 a1_1,$0x0 ---- 0xffffffff806004f4 mov_i32 tmp2,tmp0 mov_i32 tmp3,tmp1 mov_i32 tmp0,v1_0 mov_i32 tmp1,v1_1 setcond2_i32 v0_0,tmp2,tmp3,tmp0,tmp1,ltu movi_i32 v0_1,$0x0 ---- 0xffffffff806004f8 movi_i32 tmp0,$0x0 movi_i32 tmp1,$0x0 brcond2_i32 v0_0,v0_1,tmp0,tmp1,ne,$0x0 mov_i32 v1_0,tmp2 mov_i32 v1_1,tmp3 set_label $0x0 But here tmp2 and tmp3 which are dead after brcond. They are set to a0 values in 0xffffffff806004ec. Maybe my patch doesn't work because tmp2&3 are register temps while a0_0&1 are globals. So this should work: if (src >= nb_globals && dst >= nb_globals && tcg_arg_is_local(s, src) == tcg_arg_is_local(s, dst)) { ---- 0xffffffff806004fc movi_i32 v0_0,$0x80620000 movi_i32 v0_1,$0xffffffff ---- 0xffffffff80600500 movi_i32 tmp0,$0x54ac movi_i32 tmp1,$0x0 add2_i32 s4_0,s4_1,v0_0,v0_1,tmp0,tmp1 movi_i32 tmp4,$0x1f sar_i32 s4_1,s4_0,tmp4 ---- 0xffffffff80600504 movi_i32 v0_0,$0x80620000 movi_i32 v0_1,$0xffffffff ---- 0xffffffff80600508 movi_i32 tmp0,$0x4 movi_i32 tmp1,$0x0 add2_i32 tmp0,tmp1,s4_0,s4_1,tmp0,tmp1 mov_i32 tmp2,v1_0 nopn $0x2,$0x2 qemu_st32 tmp2,tmp0,tmp1,$0x0 ---- 0xffffffff8060050c movi_i32 tmp2,$0x54b8 movi_i32 tmp3,$0x0 add2_i32 a0_0,a0_1,v0_0,v0_1,tmp2,tmp3 movi_i32 tmp4,$0x1f sar_i32 a0_1,a0_0,tmp4 ---- 0xffffffff80600510 movi_i32 a2_0,$0x4 movi_i32 a2_1,$0x0 ---- 0xffffffff80600514 movi_i32 tmp2,$0x8 movi_i32 tmp3,$0x0 add2_i32 tmp2,tmp3,s8_0,s8_1,tmp2,tmp3 movi_i32 tmp0,$0x0 nopn $0x2,$0x2 qemu_st32 tmp0,tmp2,tmp3,$0x0 ---- 0xffffffff80600518 movi_i32 ra_0,$0x80600520 movi_i32 ra_1,$0xffffffff ---- 0xffffffff8060051c movi_i32 tmp2,$0x8 movi_i32 tmp3,$0x0 add2_i32 tmp2,tmp3,s4_0,s4_1,tmp2,tmp3 movi_i32 tmp0,$0x0 nopn $0x2,$0x2 movi_i32 hflags,$0x10898 movi_i32 btarget_0,$0x80104000 movi_i32 btarget_1,$0xffffffff qemu_st32 tmp0,tmp2,tmp3,$0x0 movi_i32 hflags,$0x98 movi_i32 PC_0,$0x80104000 movi_i32 PC_1,$0xffffffff exit_tb $0x0 end