On Sun, Apr 10, 2011 at 7:57 PM, Blue Swirl <blauwir...@gmail.com> wrote:
> On Sun, Apr 10, 2011 at 8:48 PM, Artyom Tarasenko <atar4q...@gmail.com> wrote:
>> On Sun, Apr 10, 2011 at 4:44 PM, Blue Swirl <blauwir...@gmail.com> wrote:
>>> On Sun, Apr 10, 2011 at 5:09 PM, Artyom Tarasenko <atar4q...@gmail.com> 
>>> wrote:
>>>> On Sun, Apr 10, 2011 at 3:24 PM, Aurelien Jarno <aurel...@aurel32.net> 
>>>> wrote:
>>>>> On Sun, Apr 10, 2011 at 02:29:59PM +0200, Artyom Tarasenko wrote:
>>>>>> Trying to boot some proprietary OS I get qemu-system-sparc64 crash with a
>>>>>>
>>>>>> tcg/tcg.c:1892: tcg fatal error
>>>>>>
>>>>>> error message.
>>>>>>
>>>>>> It looks like it can be a platform independent bug though, because
>>>>>> when a '-singlestep' option IS present, qemu doesn't crash and seems
>>>>>> to translate the code properly.
>>>>>>
>>>>>> (gdb) bt
>>>>>> #0  0x00000032c2e327f5 in raise () from /lib64/libc.so.6
>>>>>> #1  0x00000032c2e33fd5 in abort () from /lib64/libc.so.6
>>>>>> #2  0x000000000051933d in tcg_reg_alloc_call (s=<value optimized out>,
>>>>>> def=0x89d340, opc=INDEX_op_call, args=0x10acc98, dead_iargs=3) at
>>>>>> qemu/tcg/tcg.c:1892
>>>>>> #3  0x000000000051a557 in tcg_gen_code_common (s=0x10b8940,
>>>>>> gen_code_buf=0x40338b60 "I\213n@H\213] 3\355I\211\256\220") at
>>>>>> qemu/tcg/tcg.c:2099
>>>>>> #4  tcg_gen_code (s=0x10b8940, gen_code_buf=0x40338b60 "I\213n@H\213]
>>>>>> 3\355I\211\256\220") at qemu/tcg/tcg.c:2142
>>>>>> #5  0x00000000004d38f1 in cpu_sparc_gen_code (env=0x10cce10,
>>>>>> tb=0x7fffe91bc218, gen_code_size_ptr=0x7fffffffd9b4) at
>>>>>> qemu/translate-all.c:93
>>>>>> #6  0x00000000004d1fd7 in tb_gen_code (env=0x10cce10, pc=18868776,
>>>>>> cs_base=18868780, flags=15, cflags=0) at qemu/exec.c:989
>>>>>> #7  0x00000000004d4029 in tb_find_slow (env1=<value optimized out>) at
>>>>>> qemu/cpu-exec.c:167
>>>>>> #8  tb_find_fast (env1=<value optimized out>) at cpu-exec.c:194
>>>>>> #9  cpu_sparc_exec (env1=<value optimized out>) at qemu/cpu-exec.c:556
>>>>>> #10 0x0000000000408868 in tcg_cpu_exec () at qemu/cpus.c:1066
>>>>>> #11 cpu_exec_all () at qemu/cpus.c:1102
>>>>>> #12 0x000000000053c756 in main_loop (argc=<value optimized out>,
>>>>>> argv=<value optimized out>, envp=<value optimized out>) at
>>>>>> qemu/vl.c:1430
>>>>>>
>>>>>> I inspected ts->val_type causing the abort() case and it turned out to 
>>>>>> be 0.
>>>>>>
>>>>>> The last lines of qemu.log (without -singlestep)
>>>>>> IN:
>>>>>> 0x00000000011fe9f0:  rdpr  %pstate, %g1
>>>>>> 0x00000000011fe9f4:  wrpr  %g1, 2, %pstate
>>>>>> --------------
>>>>>> IN:
>>>>>> 0x00000000011fe9f8:  ldub  [ %o0 ], %o1
>>>>>> 0x00000000011fe9fc:  mov  %o1, %o2
>>>>>> 0x00000000011fea00:  rdpr  %tick, %o3
>>>>>> 0x00000000011fea04:  cmp  %o1, %o2
>>>>>> 0x00000000011fea08:  be  %icc, 0x11fea00
>>>>>> 0x00000000011fea0c:  ldub  [ %o0 ], %o2
>>>>>>
>>>>>> Search PC...
>>>>>> Search PC...
>>>>>> Search PC...
>>>>>> Search PC...
>>>>>> Search PC...
>>>>>> Search PC...
>>>>>> --------------
>>>>>> IN:
>>>>>> 0x00000000011fe9f8:  ldub  [ %o0 ], %o1
>>>>>> 0x00000000011fe9fc:  mov  %o1, %o2
>>>>>> 0x00000000011fea00:  rdpr  %tick, %o3
>>>>>> 0x00000000011fea04:  cmp  %o1, %o2
>>>>>> 0x00000000011fea08:  be  %icc, 0x11fea00
>>>>>> 0x00000000011fea0c:  ldub  [ %o0 ], %o2
>>>>>>
>>>>>> 110521: Data Access MMU Miss (v=0068) pc=00000000011fe9f8
>>>>>> npc=00000000011fe9fc SP=000000000180ae41
>>>>>> pc: 00000000011fe9f8  npc: 00000000011fe9fc
>>>>>>
>>>>>> IN:
>>>>>> 0x00000000011fea00:  rdpr  %tick, %o3
>>>>>> 0x00000000011fea04:  cmp  %o1, %o2
>>>>>> 0x00000000011fea08:  be  %icc, 0x11fea00
>>>>>> 0x00000000011fea0c:  ldub  [ %o0 ], %o2
>>>>>> --------------
>>>>>> IN:
>>>>>> 0x00000000011fea10:  brz,pn   %o2, 0x11fe9f8
>>>>>> 0x00000000011fea14:  mov  %o2, %o4
>>>>>> --------------
>>>>>> IN:
>>>>>> 0x00000000011fea18:  rdpr  %tick, %o5
>>>>>> 0x00000000011fea1c:  cmp  %o2, %o4
>>>>>> 0x00000000011fea20:  be  %icc, 0x11fea18
>>>>>> 0x00000000011fea24:  ldub  [ %o0 ], %o4
>>>>>> --------------
>>>>>> IN:
>>>>>> 0x00000000011fea28:  brz,pn   %o4, 0x11fe9f4
>>>>>> 0x00000000011fea2c:  wrpr  %g0, %g1, %pstate
>>>>>> <EOF>
>>>>>>
>>>>>> The crash is 100% reproducible and happens always on the same place,
>>>>>> so it's probably a pure TCG issue, not related on getting the
>>>>>> external/timer interrupts.
>>>>>>
>>>>>> Do you need any additional info?
>>>>>>
>>>>>
>>>>> What would be interesting would be to get the corresponding TCG code
>>>>> from qemu.log (-d op,op_opt).
>>>>
>>>>
>>>> OP:
>>>>  ---- 0x11fea28
>>>>  ld_i64 tmp6,regwptr,$0x20
>>>>  movi_i64 cond,$0x0
>>>>  movi_i64 tmp8,$0x0
>>>>  brcond_i64 tmp6,tmp8,ne,$0x0
>>>>  movi_i64 cond,$0x1
>>>>  set_label $0x0
>>>>
>>>>  ---- 0x11fea2c
>>>>  movi_i64 tmp7,$0x0
>>>>  xor_i64 tmp0,tmp7,g1
>>>>  movi_i64 pc,$0x11fea2c
>>>>  movi_i64 tmp8,$compute_psr
>>>>  call tmp8,$0x0,$0
>>>>  movi_i64 tmp8,$0x0
>>>>  brcond_i64 cond,tmp8,eq,$0x1
>>>>  movi_i64 npc,$0x11fe9f4
>>>>  br $0x2
>>>>  set_label $0x1
>>>>  movi_i64 npc,$0x11fea30
>>>>  set_label $0x2
>>>>  movi_i64 tmp8,$wrpstate
>>>>  call tmp8,$0x0,$0,tmp0
>>>>  mov_i64 pc,npc
>>>>  movi_i64 tmp8,$0x4
>>>>  add_i64 npc,npc,tmp8
>>>>  exit_tb $0x0
>>>>
>>>> OP after liveness analysis:
>>>>  ---- 0x11fea28
>>>>  ld_i64 tmp6,regwptr,$0x20
>>>>  movi_i64 cond,$0x0
>>>>  movi_i64 tmp8,$0x0
>>>>  brcond_i64 tmp6,tmp8,ne,$0x0
>>>>  movi_i64 cond,$0x1
>>>>  set_label $0x0
>>>>
>>>>  ---- 0x11fea2c
>>>>  nopn $0x2,$0x2
>>>>  nopn $0x3,$0x68,$0x3
>>>>  movi_i64 pc,$0x11fea2c
>>>>  movi_i64 tmp8,$compute_psr
>>>>  call tmp8,$0x0,$0
>>>>  movi_i64 tmp8,$0x0
>>>>  brcond_i64 cond,tmp8,eq,$0x1
>>>>  movi_i64 npc,$0x11fe9f4
>>>>  br $0x2
>>>>  set_label $0x1
>>>>  movi_i64 npc,$0x11fea30
>>>>  set_label $0x2
>>>>  movi_i64 tmp8,$wrpstate
>>>>  call tmp8,$0x0,$0,tmp0
>>>>  mov_i64 pc,npc
>>>>  movi_i64 tmp8,$0x4
>>>>  add_i64 npc,npc,tmp8
>>>>  exit_tb $0x0
>>>>  end
>>>>
>>>> Does it mean the last block is processed correctly and the crash
>>>> happens on the next instruction which doesn't make it to the log?
>>>> The next instruction would be a
>>>>
>>>> 0x00000000011fea30:  retl
>>>>
>>>> Since it's a branch instruction I guess this would also be a tcg block 
>>>> boundary.
>>>
>>> Because abort() was called from tcg_reg_alloc_call, I'd say 'retl'
>>> (synthetic op for 'jmpl %o8 + 8, %g0') was the problem.
>>
>> Any idea why? retl is not a rare instruction...
>
> Sorry, calls are generated for helpers, so it's not 'jmpl' but the
> call to wrpstate helper.

And why it doesn't happen in a singlestep mode?
I tried to comment out
cpu_check_irqs(env);
in the helper_wrpstate but it made no difference. The only suspicious
thing left is register bank switching. Is it safe to switch register
banks in the helper function? Shouldn't we end the translation block
before?

-- 
Regards,
Artyom Tarasenko

solaris/sparc under qemu blog: http://tyom.blogspot.com/

Reply via email to