On Sat, Aug 6, 2011 at 8:33 PM, Stefan Weil <w...@mail.berlios.de> wrote:
> Am 06.08.2011 22:13, schrieb Stefan Weil:
>>
>> Am 06.08.2011 16:06, schrieb Blue Swirl:
>>>
>>> Copy propagation introduced in 22613af4a6d9602001e6d0e7b6d98aa40aa018dc
>>> considered only global registers. However, register temps and stack
>>> allocated locals must be handled differently because register temps
>>> don't survive across brcond.
>>>
>>> Fix by propagating only within same class of temps.
>>>
>>> Signed-off-by: Blue Swirl <blauwir...@gmail.com>
>>> ---
>>> tcg/optimize.c | 13 +++++++------
>>> tcg/tcg.h | 5 +++++
>>> 2 files changed, 12 insertions(+), 6 deletions(-)
>>>
>>> diff --git a/tcg/optimize.c b/tcg/optimize.c
>>> index a3bfa5e..748ecf9 100644
>>> --- a/tcg/optimize.c
>>> +++ b/tcg/optimize.c
>>> @@ -185,12 +185,13 @@ static int op_to_movi(int op)
>>> }
>>> }
>>>
>>> -static void tcg_opt_gen_mov(TCGArg *gen_args, TCGArg dst, TCGArg src,
>>> - int nb_temps, int nb_globals)
>>> +static void tcg_opt_gen_mov(TCGContext *s, TCGArg *gen_args, TCGArg dst,
>>> + TCGArg src, int nb_temps, int nb_globals)
>>> {
>>> reset_temp(dst, nb_temps, nb_globals);
>>> assert(temps[src].state != TCG_TEMP_COPY);
>>> - if (src >= nb_globals) {
>>> + if (src >= nb_globals &&
>>> + tcg_arg_is_local(s, src) == tcg_arg_is_local(s, dst)) {
>>> assert(temps[src].state != TCG_TEMP_CONST);
>>
>> [snip]
>>
>> Hi Blue,
>>
>> your patch fixes qemu-system-x86_64 which now seems to work on 32 bit
>> hosts, too.
>>
>> qemu-system-mips64(el) still fail with the same abort. They work when I
>> remove the
>> if block in tcg_opt_gen_mov.
>>
>> The Debian kernel for qemu-system-mips64 which I used for the test is
>> available on
>> http://qemu.weilnetz.de/mips64/.
>>
>> I could not reproduce the crash with qemu-system-ppc64 - neither with nor
>> without
>> your patch.
>>
>> Kind regards,
>> Stefan
>
> The patch works with qemu-system-mips64(el) after a small modification:
>
> if (src >= nb_globals &&
>    tcg_arg_is_local(s, src) && tcg_arg_is_local(s, dst)) {

This removes the optimization completely for register temps. I think
there is a better solution.

The problem is similar to x86_64:

IN: free_area_init_nodes
0xffffffff806004d8:  sw v0,21664(s5)
0xffffffff806004dc:  lw v1,0(s0)
0xffffffff806004e0:  lui        v0,0x8062
0xffffffff806004e4:  sw v1,21676(v0)
0xffffffff806004e8:  sw v1,4(s8)
0xffffffff806004ec:  lw a0,4(s0)
0xffffffff806004f0:  move       a1,zero
0xffffffff806004f4:  sltu       v0,a0,v1
0xffffffff806004f8:  movz       v1,a0,v0
0xffffffff806004fc:  lui        v0,0x8062
0xffffffff80600500:  addiu      s4,v0,21676
0xffffffff80600504:  lui        v0,0x8062
0xffffffff80600508:  sw v1,4(s4)
0xffffffff8060050c:  addiu      a0,v0,21688
0xffffffff80600510:  li a2,4
0xffffffff80600514:  sw zero,8(s8)
0xffffffff80600518:  jal        0xffffffff80104000
0xffffffff8060051c:  sw zero,8(s4)

OP:
 ---- 0xffffffff806004d8
 movi_i32 tmp0,$0x54a0
 movi_i32 tmp1,$0x0
 add2_i32 tmp0,tmp1,s5_0,s5_1,tmp0,tmp1
 mov_i32 tmp2,v0_0
 mov_i32 tmp3,v0_1
 qemu_st32 tmp2,tmp0,tmp1,$0x0

 ---- 0xffffffff806004dc
 mov_i32 tmp2,s0_0
 mov_i32 tmp3,s0_1
 qemu_ld32 tmp2,tmp2,tmp3,$0x0
 movi_i32 tmp4,$0x1f
 sar_i32 tmp3,tmp2,tmp4
 mov_i32 v1_0,tmp2
 mov_i32 v1_1,tmp3

 ---- 0xffffffff806004e0
 movi_i32 v0_0,$0x80620000
 movi_i32 v0_1,$0xffffffff

 ---- 0xffffffff806004e4
 movi_i32 tmp0,$0x54ac
 movi_i32 tmp1,$0x0
 add2_i32 tmp0,tmp1,v0_0,v0_1,tmp0,tmp1
 mov_i32 tmp2,v1_0
 mov_i32 tmp3,v1_1
 qemu_st32 tmp2,tmp0,tmp1,$0x0

 ---- 0xffffffff806004e8
 movi_i32 tmp2,$0x4
 movi_i32 tmp3,$0x0
 add2_i32 tmp2,tmp3,s8_0,s8_1,tmp2,tmp3
 mov_i32 tmp0,v1_0
 mov_i32 tmp1,v1_1
 qemu_st32 tmp0,tmp2,tmp3,$0x0

 ---- 0xffffffff806004ec
 movi_i32 tmp0,$0x4
 movi_i32 tmp1,$0x0
 add2_i32 tmp0,tmp1,s0_0,s0_1,tmp0,tmp1
 qemu_ld32 tmp0,tmp0,tmp1,$0x0
 movi_i32 tmp4,$0x1f
 sar_i32 tmp1,tmp0,tmp4
 mov_i32 a0_0,tmp0
 mov_i32 a0_1,tmp1

 ---- 0xffffffff806004f0
 movi_i32 a1_0,$0x0
 movi_i32 a1_1,$0x0

 ---- 0xffffffff806004f4
 mov_i32 tmp2,a0_0
 mov_i32 tmp3,a0_1
 mov_i32 tmp0,v1_0
 mov_i32 tmp1,v1_1
 setcond2_i32 v0_0,tmp2,tmp3,tmp0,tmp1,ltu
 movi_i32 v0_1,$0x0

 ---- 0xffffffff806004f8
 movi_i32 tmp0,$0x0
 movi_i32 tmp1,$0x0
 brcond2_i32 v0_0,v0_1,tmp0,tmp1,ne,$0x0
 mov_i32 v1_0,a0_0
 mov_i32 v1_1,a0_1
 set_label $0x0

Here a0 locals are used after brcond.

 ---- 0xffffffff806004fc
 movi_i32 v0_0,$0x80620000
 movi_i32 v0_1,$0xffffffff

 ---- 0xffffffff80600500
 movi_i32 tmp0,$0x54ac
 movi_i32 tmp1,$0x0
 add2_i32 s4_0,s4_1,v0_0,v0_1,tmp0,tmp1
 movi_i32 tmp4,$0x1f
 sar_i32 s4_1,s4_0,tmp4

 ---- 0xffffffff80600504
 movi_i32 v0_0,$0x80620000
 movi_i32 v0_1,$0xffffffff

 ---- 0xffffffff80600508
 movi_i32 tmp0,$0x4
 movi_i32 tmp1,$0x0
 add2_i32 tmp0,tmp1,s4_0,s4_1,tmp0,tmp1
 mov_i32 tmp2,v1_0
 mov_i32 tmp3,v1_1
 qemu_st32 tmp2,tmp0,tmp1,$0x0

 ---- 0xffffffff8060050c
 movi_i32 tmp2,$0x54b8
 movi_i32 tmp3,$0x0
 add2_i32 a0_0,a0_1,v0_0,v0_1,tmp2,tmp3
 movi_i32 tmp4,$0x1f
 sar_i32 a0_1,a0_0,tmp4

 ---- 0xffffffff80600510
 movi_i32 a2_0,$0x4
 movi_i32 a2_1,$0x0

 ---- 0xffffffff80600514
 movi_i32 tmp2,$0x8
 movi_i32 tmp3,$0x0
 add2_i32 tmp2,tmp3,s8_0,s8_1,tmp2,tmp3
 movi_i32 tmp0,$0x0
 movi_i32 tmp1,$0x0
 qemu_st32 tmp0,tmp2,tmp3,$0x0

 ---- 0xffffffff80600518
 movi_i32 ra_0,$0x80600520
 movi_i32 ra_1,$0xffffffff

 ---- 0xffffffff8060051c
 movi_i32 tmp2,$0x8
 movi_i32 tmp3,$0x0
 add2_i32 tmp2,tmp3,s4_0,s4_1,tmp2,tmp3
 movi_i32 tmp0,$0x0
 movi_i32 tmp1,$0x0
 movi_i32 hflags,$0x10898
 movi_i32 btarget_0,$0x80104000
 movi_i32 btarget_1,$0xffffffff
 qemu_st32 tmp0,tmp2,tmp3,$0x0
 movi_i32 hflags,$0x98
 movi_i32 PC_0,$0x80104000
 movi_i32 PC_1,$0xffffffff
 exit_tb $0x0

OP after liveness analysis:
 ---- 0xffffffff806004d8
 movi_i32 tmp0,$0x54a0
 movi_i32 tmp1,$0x0
 add2_i32 tmp0,tmp1,s5_0,s5_1,tmp0,tmp1
 mov_i32 tmp2,v0_0
 nopn $0x2,$0x2
 qemu_st32 tmp2,tmp0,tmp1,$0x0

 ---- 0xffffffff806004dc
 mov_i32 tmp2,s0_0
 mov_i32 tmp3,s0_1
 qemu_ld32 tmp2,tmp2,tmp3,$0x0
 movi_i32 tmp4,$0x1f
 sar_i32 tmp3,tmp2,tmp4
 mov_i32 v1_0,tmp2
 mov_i32 v1_1,tmp3

 ---- 0xffffffff806004e0
 movi_i32 v0_0,$0x80620000
 movi_i32 v0_1,$0xffffffff

 ---- 0xffffffff806004e4
 movi_i32 tmp0,$0x54ac
 movi_i32 tmp1,$0x0
 add2_i32 tmp0,tmp1,v0_0,v0_1,tmp0,tmp1
 nop
 nop
 qemu_st32 tmp2,tmp0,tmp1,$0x0

 ---- 0xffffffff806004e8
 movi_i32 tmp2,$0x4
 movi_i32 tmp3,$0x0
 add2_i32 tmp2,tmp3,s8_0,s8_1,tmp2,tmp3
 mov_i32 tmp0,v1_0
 nopn $0x2,$0x2
 qemu_st32 tmp0,tmp2,tmp3,$0x0

 ---- 0xffffffff806004ec
 movi_i32 tmp0,$0x4
 movi_i32 tmp1,$0x0
 add2_i32 tmp0,tmp1,s0_0,s0_1,tmp0,tmp1
 qemu_ld32 tmp0,tmp0,tmp1,$0x0
 movi_i32 tmp4,$0x1f
 sar_i32 tmp1,tmp0,tmp4
 mov_i32 a0_0,tmp0
 mov_i32 a0_1,tmp1

 ---- 0xffffffff806004f0
 movi_i32 a1_0,$0x0
 movi_i32 a1_1,$0x0

 ---- 0xffffffff806004f4
 mov_i32 tmp2,tmp0
 mov_i32 tmp3,tmp1
 mov_i32 tmp0,v1_0
 mov_i32 tmp1,v1_1
 setcond2_i32 v0_0,tmp2,tmp3,tmp0,tmp1,ltu
 movi_i32 v0_1,$0x0

 ---- 0xffffffff806004f8
 movi_i32 tmp0,$0x0
 movi_i32 tmp1,$0x0
 brcond2_i32 v0_0,v0_1,tmp0,tmp1,ne,$0x0
 mov_i32 v1_0,tmp2
 mov_i32 v1_1,tmp3
 set_label $0x0

But here tmp2 and tmp3 which are dead after brcond. They are set to a0
values in 0xffffffff806004ec.

Maybe my patch doesn't work because tmp2&3 are register temps while
a0_0&1 are globals. So this should work:
if (src >= nb_globals && dst >= nb_globals &&
   tcg_arg_is_local(s, src) == tcg_arg_is_local(s, dst)) {

 ---- 0xffffffff806004fc
 movi_i32 v0_0,$0x80620000
 movi_i32 v0_1,$0xffffffff

 ---- 0xffffffff80600500
 movi_i32 tmp0,$0x54ac
 movi_i32 tmp1,$0x0
 add2_i32 s4_0,s4_1,v0_0,v0_1,tmp0,tmp1
 movi_i32 tmp4,$0x1f
 sar_i32 s4_1,s4_0,tmp4

 ---- 0xffffffff80600504
 movi_i32 v0_0,$0x80620000
 movi_i32 v0_1,$0xffffffff

 ---- 0xffffffff80600508
 movi_i32 tmp0,$0x4
 movi_i32 tmp1,$0x0
 add2_i32 tmp0,tmp1,s4_0,s4_1,tmp0,tmp1
 mov_i32 tmp2,v1_0
 nopn $0x2,$0x2
 qemu_st32 tmp2,tmp0,tmp1,$0x0

 ---- 0xffffffff8060050c
 movi_i32 tmp2,$0x54b8
 movi_i32 tmp3,$0x0
 add2_i32 a0_0,a0_1,v0_0,v0_1,tmp2,tmp3
 movi_i32 tmp4,$0x1f
 sar_i32 a0_1,a0_0,tmp4

 ---- 0xffffffff80600510
 movi_i32 a2_0,$0x4
 movi_i32 a2_1,$0x0

 ---- 0xffffffff80600514
 movi_i32 tmp2,$0x8
 movi_i32 tmp3,$0x0
 add2_i32 tmp2,tmp3,s8_0,s8_1,tmp2,tmp3
 movi_i32 tmp0,$0x0
 nopn $0x2,$0x2
 qemu_st32 tmp0,tmp2,tmp3,$0x0

 ---- 0xffffffff80600518
 movi_i32 ra_0,$0x80600520
 movi_i32 ra_1,$0xffffffff

 ---- 0xffffffff8060051c
 movi_i32 tmp2,$0x8
 movi_i32 tmp3,$0x0
 add2_i32 tmp2,tmp3,s4_0,s4_1,tmp2,tmp3
 movi_i32 tmp0,$0x0
 nopn $0x2,$0x2
 movi_i32 hflags,$0x10898
 movi_i32 btarget_0,$0x80104000
 movi_i32 btarget_1,$0xffffffff
 qemu_st32 tmp0,tmp2,tmp3,$0x0
 movi_i32 hflags,$0x98
 movi_i32 PC_0,$0x80104000
 movi_i32 PC_1,$0xffffffff
 exit_tb $0x0
 end

Reply via email to