Re: [Qemu-devel] [PATCH v4 0/7] tcg/ppc: Add vector opcodes

2019-06-27 Thread Mark Cave-Ayland
On 27/06/2019 18:54, Richard Henderson wrote:

> On 6/27/19 7:51 PM, Richard Henderson wrote:
>> Please try the following patch on top and if it works I'll split it back into
>> the patch set properly.
> 
> Dangit.  I generated the patch on the wrong machine.
> Let's try that again.

Yes it works! Or at least so far it has survived a boot into the MacOS 9 desktop
which is fairly good at exercising all sorts of strange edge cases...

If you're going to resend the patchset, don't forget to squash "tcg/ppc: Support
vector dup2" into "tcg/ppc: Initial backend support for Altivec" to preserve
bisectability on 32-bit PPC hosts when configuring with "--enable-debug-tcg".


ATB,

Mark.



Re: [Qemu-devel] [PATCH v4 0/7] tcg/ppc: Add vector opcodes

2019-06-27 Thread Richard Henderson
On 6/27/19 7:51 PM, Richard Henderson wrote:
> Please try the following patch on top and if it works I'll split it back into
> the patch set properly.

Dangit.  I generated the patch on the wrong machine.
Let's try that again.


r~
diff --git a/tcg/ppc/tcg-target.inc.c b/tcg/ppc/tcg-target.inc.c
index 6cc56cf..e929df3 100644
--- a/tcg/ppc/tcg-target.inc.c
+++ b/tcg/ppc/tcg-target.inc.c
@@ -1132,7 +1132,7 @@ static void tcg_out_mem_long(TCGContext *s, int opi, int 
opx, TCGReg rt,
 align = 3;
 /* FALLTHRU */
 default:
-if (rt != TCG_REG_R0) {
+if (rt > TCG_REG_R0 && rt < 32) {
 rs = rt;
 break;
 }
@@ -1161,7 +1161,7 @@ static void tcg_out_mem_long(TCGContext *s, int opi, int 
opx, TCGReg rt,
 }
 tcg_debug_assert(!is_int_store || rs != rt);
 tcg_out_movi(s, TCG_TYPE_PTR, rs, orig);
-tcg_out32(s, opx | TAB(rt, base, rs));
+tcg_out32(s, opx | TAB(rt & 31, base, rs));
 return;
 }
 
@@ -1182,7 +1182,7 @@ static void tcg_out_mem_long(TCGContext *s, int opi, int 
opx, TCGReg rt,
 base = rs;
 }
 if (opi != ADDI || base != rt || l0 != 0) {
-tcg_out32(s, opi | TAI(rt, base, l0));
+tcg_out32(s, opi | TAI(rt & 31, base, l0));
 }
 }
 
@@ -1204,11 +1204,11 @@ static void tcg_out_ld(TCGContext *s, TCGType type, 
TCGReg ret,
 break;
 }
 if (have_isa_2_07_vsx) {
-tcg_out_mem_long(s, 0, LXSIWZX | 1, ret & 31, base, offset);
+tcg_out_mem_long(s, 0, LXSIWZX | 1, ret, base, offset);
 break;
 }
 assert((offset & 3) == 0);
-tcg_out_mem_long(s, 0, LVEWX, ret & 31, base, offset);
+tcg_out_mem_long(s, 0, LVEWX, ret, base, offset);
 shift = (offset - 4) & 0xc;
 if (shift) {
 tcg_out_vsldoi(s, ret, ret, ret, shift);
@@ -1224,11 +1224,11 @@ static void tcg_out_ld(TCGContext *s, TCGType type, 
TCGReg ret,
 tcg_debug_assert(ret >= 32);
 if (have_isa_2_06_vsx) {
 tcg_out_mem_long(s, have_isa_3_00_vsx ? LXSD : 0, LXSDX | 1,
- ret & 31, base, offset);
+ ret, base, offset);
 break;
 }
 assert((offset & 7) == 0);
-tcg_out_mem_long(s, 0, LVX, ret & 31, base, offset & -16);
+tcg_out_mem_long(s, 0, LVX, ret, base, offset & -16);
 if (offset & 8) {
 tcg_out_vsldoi(s, ret, ret, ret, 8);
 }
@@ -1237,7 +1237,7 @@ static void tcg_out_ld(TCGContext *s, TCGType type, 
TCGReg ret,
 tcg_debug_assert(ret >= 32);
 assert((offset & 15) == 0);
 tcg_out_mem_long(s, have_isa_3_00_vsx ? LXV | 8 : 0, LVX,
- ret & 31, base, offset);
+ ret, base, offset);
 break;
 default:
 g_assert_not_reached();
@@ -1256,7 +1256,7 @@ static void tcg_out_st(TCGContext *s, TCGType type, 
TCGReg arg,
 break;
 }
 if (have_isa_2_07_vsx) {
-tcg_out_mem_long(s, 0, STXSIWX | 1, arg & 31, base, offset);
+tcg_out_mem_long(s, 0, STXSIWX | 1, arg, base, offset);
 break;
 }
 assert((offset & 3) == 0);
@@ -1265,7 +1265,7 @@ static void tcg_out_st(TCGContext *s, TCGType type, 
TCGReg arg,
 tcg_out_vsldoi(s, TCG_VEC_TMP1, arg, arg, shift);
 arg = TCG_VEC_TMP1;
 }
-tcg_out_mem_long(s, 0, STVEWX, arg & 31, base, offset);
+tcg_out_mem_long(s, 0, STVEWX, arg, base, offset);
 break;
 case TCG_TYPE_I64:
 if (arg < 32) {
@@ -1277,7 +1277,7 @@ static void tcg_out_st(TCGContext *s, TCGType type, 
TCGReg arg,
 tcg_debug_assert(arg >= 32);
 if (have_isa_2_06_vsx) {
 tcg_out_mem_long(s, have_isa_3_00_vsx ? STXSD : 0,
- STXSDX | 1, arg & 31, base, offset);
+ STXSDX | 1, arg, base, offset);
 break;
 }
 assert((offset & 7) == 0);
@@ -1285,13 +1285,13 @@ static void tcg_out_st(TCGContext *s, TCGType type, 
TCGReg arg,
 tcg_out_vsldoi(s, TCG_VEC_TMP1, arg, arg, 8);
 arg = TCG_VEC_TMP1;
 }
-tcg_out_mem_long(s, 0, STVEWX, arg & 31, base, offset);
-tcg_out_mem_long(s, 0, STVEWX, arg & 31, base, offset + 4);
+tcg_out_mem_long(s, 0, STVEWX, arg, base, offset);
+tcg_out_mem_long(s, 0, STVEWX, arg, base, offset + 4);
 break;
 case TCG_TYPE_V128:
 tcg_debug_assert(arg >= 32);
 tcg_out_mem_long(s, have_isa_3_00_vsx ? STXV | 8 : 0, STVX,
- arg & 31, base, offset);
+ arg, base, offset);
 break;
 default:
 g_assert_not_reached();
@@ -3075,7 +3075,6 @@ static bool tcg_out_dupm_vec(TCGContext *s, TCGType type, 
unsigned vece,
 int elt;
 
 tcg_debug_assert(out >= 32);
-out &= 31;

Re: [Qemu-devel] [PATCH v4 0/7] tcg/ppc: Add vector opcodes

2019-06-27 Thread Richard Henderson
On 6/27/19 7:24 PM, Mark Cave-Ayland wrote:
> For the TCG_TYPE_V128 case we have ret = TCG_REG_V2 but (ret & 31) masks
> off the top bit which converts this to TCG_REG_R2 and that's why
> tcg_out_mem_long() starts using r2 to calculate offsets.

Oh geez.  Ok, I see it now.

>  case TCG_TYPE_V128:
>  tcg_debug_assert(ret >= 32);
>  assert((offset & 15) == 0);
> -tcg_out_mem_long(s, 0, LVX, ret & 31, base, offset);
> +tcg_out_mem_long(s, 0, LVX, TCG_REG_TMP1, base, offset);

No, here ret is the register into which we are loading.
Same for the rest.  The error is in tcg_out_mem_long in
trying to reuse the output register as a scratch.

> Presumably the reason this didn't break on your Power 9 box is because
> the 64-bit ABI doesn't mark r2 as reserved?

Correct.  That and the fact that V0 and V1 get reserved as temporaries, so I
didn't attempt to use r1 (i.e. sp) as a temporary.

Please try the following patch on top and if it works I'll split it back into
the patch set properly.


r~
diff --git a/target/m68k/translate.c b/target/m68k/translate.c
index 2ae537461f..b61c7ea0f1 100644
--- a/target/m68k/translate.c
+++ b/target/m68k/translate.c
@@ -6124,27 +6124,34 @@ static void m68k_tr_tb_stop(DisasContextBase *dcbase, 
CPUState *cpu)
 {
 DisasContext *dc = container_of(dcbase, DisasContext, base);
 
-if (dc->base.is_jmp == DISAS_NORETURN) {
-return;
-}
-if (dc->base.singlestep_enabled) {
-gen_helper_raise_exception(cpu_env, tcg_const_i32(EXCP_DEBUG));
-return;
-}
-
 switch (dc->base.is_jmp) {
+case DISAS_NORETURN:
+break;
 case DISAS_TOO_MANY:
 update_cc_op(dc);
-gen_jmp_tb(dc, 0, dc->pc);
+if (dc->base.singlestep_enabled) {
+tcg_gen_movi_i32(QREG_PC, dc->pc);
+gen_helper_raise_exception(cpu_env, tcg_const_i32(EXCP_DEBUG));
+} else {
+gen_jmp_tb(dc, 0, dc->pc);
+}
 break;
 case DISAS_JUMP:
 /* We updated CC_OP and PC in gen_jmp/gen_jmp_im.  */
-tcg_gen_lookup_and_goto_ptr();
+if (dc->base.singlestep_enabled) {
+gen_helper_raise_exception(cpu_env, tcg_const_i32(EXCP_DEBUG));
+} else {
+tcg_gen_lookup_and_goto_ptr();
+}
 break;
 case DISAS_EXIT:
 /* We updated CC_OP and PC in gen_exit_tb, but also modified
other state that may require returning to the main loop.  */
-tcg_gen_exit_tb(NULL, 0);
+if (dc->base.singlestep_enabled) {
+gen_helper_raise_exception(cpu_env, tcg_const_i32(EXCP_DEBUG));
+} else {
+tcg_gen_exit_tb(NULL, 0);
+}
 break;
 default:
 g_assert_not_reached();


Re: [Qemu-devel] [PATCH v4 0/7] tcg/ppc: Add vector opcodes

2019-06-27 Thread Mark Cave-Ayland
On 26/06/2019 20:38, Mark Cave-Ayland wrote:

>> But I thought that _CALL_AIX was only defined for ppc64 elf version 1.  I
>> thought that ppc32 used _CALL_SYSV instead.  Certainly that's what is used
>> elsewhere...
> 
> No, that didn't work either. I've confirmed using #ifdef _CALL_AIX #error 
> ERROR
> #endif that _CALL_AIX is *NOT* defined and _CALL_SYSV *is* defined.
> 
> I've also tried removing TCG_REG_R2 from tcg_target_reg_alloc_order[] and
> tcg_regset_set_reg() for TCG_REG_R2 from tcg_target_init() and I'm still 
> generating
> bad code that writes to r2(!).
> 
> Since I can't find any other mentions of TCG_REG_TMP1 and TCG_REG_R2 that 
> isn't
> inside an #ifdef _CALL_AIX ... #endif section I'm starting to get stuck. Is 
> there any
> chance that the R_PPC_ADDR32 change could be causing this at all?

So after a lot more digging: the issue can be seen in tcg_out_ld() and 
tcg_out_st()
for the vector registers. Taking tcg_out_ld() as an example:

case TCG_TYPE_V128:
tcg_debug_assert(ret >= 32);
assert((offset & 15) == 0);
tcg_out_mem_long(s, 0, LVX, ret & 31, base, offset);
break;

For the TCG_TYPE_V128 case we have ret = TCG_REG_V2 but (ret & 31) masks off 
the top
bit which converts this to TCG_REG_R2 and that's why tcg_out_mem_long() starts 
using
r2 to calculate offsets.

Assuming that rt is the temporary register used to calculate the address then 
the
patch below tentatively appears to get things working again by passing in
TCG_REG_TMP1 instead, but ultimately I still see a crash much later when trying 
to
boot MacOS 9:

diff --git a/tcg/ppc/tcg-target.inc.c b/tcg/ppc/tcg-target.inc.c
index 61732c1f45..dd823447cc 100644
--- a/tcg/ppc/tcg-target.inc.c
+++ b/tcg/ppc/tcg-target.inc.c
@@ -1139,7 +1139,7 @@ static void tcg_out_ld(TCGContext *s, TCGType type, 
TCGReg ret,
 case TCG_TYPE_V64:
 tcg_debug_assert(ret >= 32);
 assert((offset & 7) == 0);
-tcg_out_mem_long(s, 0, LVX, ret & 31, base, offset & -16);
+tcg_out_mem_long(s, 0, LVX, TCG_REG_TMP1, base, offset & -16);
 if (offset & 8) {
 tcg_out_vsldoi(s, ret, ret, ret, 8);
 }
@@ -1147,7 +1147,7 @@ static void tcg_out_ld(TCGContext *s, TCGType type, 
TCGReg ret,
 case TCG_TYPE_V128:
 tcg_debug_assert(ret >= 32);
 assert((offset & 15) == 0);
-tcg_out_mem_long(s, 0, LVX, ret & 31, base, offset);
+tcg_out_mem_long(s, 0, LVX, TCG_REG_TMP1, base, offset);
 break;
 default:
 g_assert_not_reached();
@@ -1186,12 +1186,13 @@ static void tcg_out_st(TCGContext *s, TCGType type, 
TCGReg arg,
 tcg_out_vsldoi(s, TCG_VEC_TMP1, arg, arg, 8);
 arg = TCG_VEC_TMP1;
 }
-tcg_out_mem_long(s, 0, STVEWX, arg & 31, base, offset);
-tcg_out_mem_long(s, 0, STVEWX, arg & 31, base, offset + 4);
+tcg_out_mem_long(s, 0, STVEWX, TCG_REG_TMP1, base, offset);
+tcg_out_mem_long(s, 0, STVEWX, TCG_REG_TMP1, base, offset + 4);
 break;
 case TCG_TYPE_V128:
 tcg_debug_assert(arg >= 32);
-tcg_out_mem_long(s, 0, STVX, arg & 31, base, offset);
+assert((offset & 15) == 0);
+tcg_out_mem_long(s, 0, STVX, TCG_REG_TMP1, base, offset);
 break;
 default:
 g_assert_not_reached();

Richard: even though it's still not perfect, does this look like it's fixing the
right problem? Presumably the reason this didn't break on your Power 9 box is 
because
the 64-bit ABI doesn't mark r2 as reserved?


ATB,

Mark.



Re: [Qemu-devel] [PATCH v4 0/7] tcg/ppc: Add vector opcodes

2019-06-26 Thread BALATON Zoltan

On Wed, 26 Jun 2019, Mark Cave-Ayland wrote:

On 26/06/2019 19:42, Richard Henderson wrote:


On 6/26/19 7:00 PM, Mark Cave-Ayland wrote:

Interestingly if I set a trap and then switch the opcode to "lis r4,0" 
(0x3c80)
then we carry on as normal until the next "lis r2,0" instruction. Looking 
through the
whole output of -d out_asm this is the first mention of r2 which makes me 
wonder if
it is special somehow? At least a quick search indicates that for 32-bit PPC r2 
is
supposed to be dedicated as a TOC pointer.

Is there a quick way to disable r2 from the list of available registers to see 
if
that gets things going?


Interesting.  I'm not sure why that's happening.

As a quick hack,


  /* For some memory operations, we need a scratch that isn't R0.  For the AIX
 calling convention, we can re-use the TOC register since we'll be reloading
 it at every call.  Otherwise R12 will do nicely as neither a call-saved
 register nor a parameter register.  */
- #ifdef _CALL_AIX
+ #if 0
  # define TCG_REG_TMP1   TCG_REG_R2
  #else
  # define TCG_REG_TMP1   TCG_REG_R12
  #endif


But I thought that _CALL_AIX was only defined for ppc64 elf version 1.  I
thought that ppc32 used _CALL_SYSV instead.  Certainly that's what is used
elsewhere...


No, that didn't work either. I've confirmed using #ifdef _CALL_AIX #error ERROR
#endif that _CALL_AIX is *NOT* defined and _CALL_SYSV *is* defined.

I've also tried removing TCG_REG_R2 from tcg_target_reg_alloc_order[] and
tcg_regset_set_reg() for TCG_REG_R2 from tcg_target_init() and I'm still 
generating
bad code that writes to r2(!).

Since I can't find any other mentions of TCG_REG_TMP1 and TCG_REG_R2 that isn't
inside an #ifdef _CALL_AIX ... #endif section I'm starting to get stuck. Is 
there any
chance that the R_PPC_ADDR32 change could be causing this at all?


There's one more mention of TCG_REG_R2 in tcg-target.inc.c 
tcg_target_init() function where it's set as call clobber but then later 
in same func again as reserved if _CALL_SYSV or 64 bits. Not sure if the 
later tcg_regset_set_reg overrides the first one or that should be removed 
or put in an #else of the later conditional call. (Still don't know what 
I'm talking about just trowing random ideas.)


Regards,
BALATON Zoltan



Re: [Qemu-devel] [PATCH v4 0/7] tcg/ppc: Add vector opcodes

2019-06-26 Thread Mark Cave-Ayland
On 26/06/2019 19:42, Richard Henderson wrote:

> On 6/26/19 7:00 PM, Mark Cave-Ayland wrote:
>> Interestingly if I set a trap and then switch the opcode to "lis r4,0" 
>> (0x3c80)
>> then we carry on as normal until the next "lis r2,0" instruction. Looking 
>> through the
>> whole output of -d out_asm this is the first mention of r2 which makes me 
>> wonder if
>> it is special somehow? At least a quick search indicates that for 32-bit PPC 
>> r2 is
>> supposed to be dedicated as a TOC pointer.
>>
>> Is there a quick way to disable r2 from the list of available registers to 
>> see if
>> that gets things going?
> 
> Interesting.  I'm not sure why that's happening.
> 
> As a quick hack,
> 
> 
>   /* For some memory operations, we need a scratch that isn't R0.  For the AIX
>  calling convention, we can re-use the TOC register since we'll be 
> reloading
>  it at every call.  Otherwise R12 will do nicely as neither a call-saved
>  register nor a parameter register.  */
> - #ifdef _CALL_AIX
> + #if 0
>   # define TCG_REG_TMP1   TCG_REG_R2
>   #else
>   # define TCG_REG_TMP1   TCG_REG_R12
>   #endif
> 
> 
> But I thought that _CALL_AIX was only defined for ppc64 elf version 1.  I
> thought that ppc32 used _CALL_SYSV instead.  Certainly that's what is used
> elsewhere...

No, that didn't work either. I've confirmed using #ifdef _CALL_AIX #error ERROR
#endif that _CALL_AIX is *NOT* defined and _CALL_SYSV *is* defined.

I've also tried removing TCG_REG_R2 from tcg_target_reg_alloc_order[] and
tcg_regset_set_reg() for TCG_REG_R2 from tcg_target_init() and I'm still 
generating
bad code that writes to r2(!).

Since I can't find any other mentions of TCG_REG_TMP1 and TCG_REG_R2 that isn't
inside an #ifdef _CALL_AIX ... #endif section I'm starting to get stuck. Is 
there any
chance that the R_PPC_ADDR32 change could be causing this at all?


ATB,

Mark.



Re: [Qemu-devel] [PATCH v4 0/7] tcg/ppc: Add vector opcodes

2019-06-26 Thread Richard Henderson
On 6/26/19 7:00 PM, Mark Cave-Ayland wrote:
> Interestingly if I set a trap and then switch the opcode to "lis r4,0" 
> (0x3c80)
> then we carry on as normal until the next "lis r2,0" instruction. Looking 
> through the
> whole output of -d out_asm this is the first mention of r2 which makes me 
> wonder if
> it is special somehow? At least a quick search indicates that for 32-bit PPC 
> r2 is
> supposed to be dedicated as a TOC pointer.
> 
> Is there a quick way to disable r2 from the list of available registers to 
> see if
> that gets things going?

Interesting.  I'm not sure why that's happening.

As a quick hack,


  /* For some memory operations, we need a scratch that isn't R0.  For the AIX
 calling convention, we can re-use the TOC register since we'll be reloading
 it at every call.  Otherwise R12 will do nicely as neither a call-saved
 register nor a parameter register.  */
- #ifdef _CALL_AIX
+ #if 0
  # define TCG_REG_TMP1   TCG_REG_R2
  #else
  # define TCG_REG_TMP1   TCG_REG_R12
  #endif


But I thought that _CALL_AIX was only defined for ppc64 elf version 1.  I
thought that ppc32 used _CALL_SYSV instead.  Certainly that's what is used
elsewhere...


r~



Re: [Qemu-devel] [PATCH v4 0/7] tcg/ppc: Add vector opcodes

2019-06-26 Thread Richard Henderson
On 6/26/19 10:33 AM, David Gibson wrote:
>>> out of curiosity: is your Power9 system BE or LE?
>>
>> The Power9 is LE.
> 
> It's the kernel determines endianness, not the system.

Yes.  Lazy verbiage on my part -- I did mean "The Power9 that I have access to
is configured as LE".


r~



Re: [Qemu-devel] [PATCH v4 0/7] tcg/ppc: Add vector opcodes

2019-06-26 Thread BALATON Zoltan

On Wed, 26 Jun 2019, Mark Cave-Ayland wrote:

On 26/06/2019 08:45, Richard Henderson wrote:

On 6/25/19 7:55 PM, Mark Cave-Ayland wrote:

And here's where we are blowing up according to -d in_asm,op_out_asm:

IN:
0x00f22ca0:  101ffc84  vor  v0, v31, v31

OP:
 ld_i32 tmp0,env,$0xfff8
 movi_i32 tmp1,$0x0
 brcond_i32 tmp0,tmp1,lt,$L0

  00f22ca0
 ld_vec v128,e8,tmp2,env,$0xd6b0
 st_vec v128,e8,tmp2,env,$0xd4c0


Yep, that looks right.

As an aside, this does suggest to me that target/ppc might be well served in
moving the ppc_vsr_t members of CPUPPCState earlier, so that this offset is
smaller.


 movi_i32 nip,$0xf22ca4
 movi_i32 nip,$0xf22ca4
 movi_i32 tmp0,$0x10002
 call raise_exception,$0x2,$0,env,tmp0


And this, presumably is the single-step debug exception.


0xa4e7f12c:  3c40  lis  r2, 0
0xa4e7f130:  6042d6b0  ori  r2, r2, 0xd6b0
0xa4e7f134:  7c5b10ce  lvx  v2, r27, r2



0xa4e7f138:  3c40  lis  r2, 0
0xa4e7f13c:  6042d4c0  ori  r2, r2, 0xd4c0
0xa4e7f140:  7c5b11ce  stvx v2, r27, r2


These also look correct.  Form an offset into r2, load or store from env+r2.

This also shows what I mention above re offset.  For a ppc host, the offset is
large enough to require two instructions to form them.


Any ideas what might be going on here?


What is the observed problem that makes you think that this is the incorrect
instruction?


What I've been doing is set a breakpoint a few instructions before and then 
issuing
"stepi" commands via the gdbstub. As I step over the "vor v0, v31, v31" 
instruction
then either the qemu-system-ppc process segfaults outside of gdb, or inside gdb 
it
goes to bg. Bringing it back to fg just causes gdb to get confused and in the 
end the
only thing I can do is kill the gdb process.

On the plus side I've managed to work out where we are faulting by hacking the 
load
and store functions to inject trap opcodes in the ld_vec and st_vec and it 
appears
that we are segfaulting here:

OUT: [size=96]
0xa4e7f120:  81dbfff8  lwz  r14, -8(r27)
0xa4e7f124:  2f8e  cmpwicr7, r14, 0
0xa4e7f128:  419c004c  blt  cr7, 0xa4e7f174
0xa4e7f12c:  3c40  lis  r2, 0
  ^^
0xa4e7f130:  6042d6b0  ori  r2, r2, 0xd6b0
0xa4e7f134:  7c5b10ce  lvx  v2, r27, r2
0xa4e7f138:  3c40  lis  r2, 0
0xa4e7f13c:  6042d4c0  ori  r2, r2, 0xd4c0
0xa4e7f140:  7c5b11ce  stvx v2, r27, r2
0xa4e7f144:  3dc000f2  lis  r14, 0xf2
0xa4e7f148:  61ce2ca4  ori  r14, r14, 0x2ca4
0xa4e7f14c:  91db016c  stw  r14, 0x16c(r27)
0xa4e7f150:  7f63db78  mr   r3, r27
0xa4e7f154:  3c81  lis  r4, 1
0xa4e7f158:  60840002  ori  r4, r4, 2
0xa4e7f15c:  3c87  lis  r0, 0x87
0xa4e7f160:  6000b618  ori  r0, r0, 0xb618
0xa4e7f164:  7c0903a6  mtctrr0
0xa4e7f168:  4e800421  bctrl
0xa4e7f16c:  3860  li   r3, 0
0xa4e7f170:  4bfffef0  b0xa4e7f060
0xa4e7f174:  3c60a4e7  lis  r3, -0x5b19
0xa4e7f178:  6063f0c3  ori  r3, r3, 0xf0c3
0xa4e7f17c:  4bfffee4  b0xa4e7f060

Interestingly if I set a trap and then switch the opcode to "lis r4,0" 
(0x3c80)
then we carry on as normal until the next "lis r2,0" instruction. Looking 
through the
whole output of -d out_asm this is the first mention of r2 which makes me 
wonder if
it is special somehow? At least a quick search indicates that for 32-bit PPC r2 
is
supposed to be dedicated as a TOC pointer.


According to a PowerPC ABI doc: 
http://refspecs.linux-foundation.org/elf/elfspec_ppc.pdf
r2 is system reserved and should not be used by application code and 
another one (probably the same you were referring to mentions TOC 
https://refspecs.linuxfoundation.org/ELF/ppc64/PPC-elf64abi-1.9.html#REG. 
I'm not sure if that's relevant for the above but it looks like clobbering 
r2 might cause problems.


Regards,
BALATON Zoltan



Re: [Qemu-devel] [PATCH v4 0/7] tcg/ppc: Add vector opcodes

2019-06-26 Thread Mark Cave-Ayland
On 26/06/2019 08:45, Richard Henderson wrote:

> On 6/25/19 7:55 PM, Mark Cave-Ayland wrote:
>> And here's where we are blowing up according to -d in_asm,op_out_asm:
>>
>> IN:
>> 0x00f22ca0:  101ffc84  vor  v0, v31, v31
>>
>> OP:
>>  ld_i32 tmp0,env,$0xfff8
>>  movi_i32 tmp1,$0x0
>>  brcond_i32 tmp0,tmp1,lt,$L0
>>
>>   00f22ca0
>>  ld_vec v128,e8,tmp2,env,$0xd6b0
>>  st_vec v128,e8,tmp2,env,$0xd4c0
> 
> Yep, that looks right.
> 
> As an aside, this does suggest to me that target/ppc might be well served in
> moving the ppc_vsr_t members of CPUPPCState earlier, so that this offset is
> smaller.
> 
>>  movi_i32 nip,$0xf22ca4
>>  movi_i32 nip,$0xf22ca4
>>  movi_i32 tmp0,$0x10002
>>  call raise_exception,$0x2,$0,env,tmp0
> 
> And this, presumably is the single-step debug exception.
> 
>> 0xa4e7f12c:  3c40  lis  r2, 0
>> 0xa4e7f130:  6042d6b0  ori  r2, r2, 0xd6b0
>> 0xa4e7f134:  7c5b10ce  lvx  v2, r27, r2
> 
>> 0xa4e7f138:  3c40  lis  r2, 0
>> 0xa4e7f13c:  6042d4c0  ori  r2, r2, 0xd4c0
>> 0xa4e7f140:  7c5b11ce  stvx v2, r27, r2
> 
> These also look correct.  Form an offset into r2, load or store from env+r2.
> 
> This also shows what I mention above re offset.  For a ppc host, the offset is
> large enough to require two instructions to form them.
> 
>> Any ideas what might be going on here?
> 
> What is the observed problem that makes you think that this is the incorrect
> instruction?

What I've been doing is set a breakpoint a few instructions before and then 
issuing
"stepi" commands via the gdbstub. As I step over the "vor v0, v31, v31" 
instruction
then either the qemu-system-ppc process segfaults outside of gdb, or inside gdb 
it
goes to bg. Bringing it back to fg just causes gdb to get confused and in the 
end the
only thing I can do is kill the gdb process.

On the plus side I've managed to work out where we are faulting by hacking the 
load
and store functions to inject trap opcodes in the ld_vec and st_vec and it 
appears
that we are segfaulting here:

OUT: [size=96]
0xa4e7f120:  81dbfff8  lwz  r14, -8(r27)
0xa4e7f124:  2f8e  cmpwicr7, r14, 0
0xa4e7f128:  419c004c  blt  cr7, 0xa4e7f174
0xa4e7f12c:  3c40  lis  r2, 0
   ^^
0xa4e7f130:  6042d6b0  ori  r2, r2, 0xd6b0
0xa4e7f134:  7c5b10ce  lvx  v2, r27, r2
0xa4e7f138:  3c40  lis  r2, 0
0xa4e7f13c:  6042d4c0  ori  r2, r2, 0xd4c0
0xa4e7f140:  7c5b11ce  stvx v2, r27, r2
0xa4e7f144:  3dc000f2  lis  r14, 0xf2
0xa4e7f148:  61ce2ca4  ori  r14, r14, 0x2ca4
0xa4e7f14c:  91db016c  stw  r14, 0x16c(r27)
0xa4e7f150:  7f63db78  mr   r3, r27
0xa4e7f154:  3c81  lis  r4, 1
0xa4e7f158:  60840002  ori  r4, r4, 2
0xa4e7f15c:  3c87  lis  r0, 0x87
0xa4e7f160:  6000b618  ori  r0, r0, 0xb618
0xa4e7f164:  7c0903a6  mtctrr0
0xa4e7f168:  4e800421  bctrl
0xa4e7f16c:  3860  li   r3, 0
0xa4e7f170:  4bfffef0  b0xa4e7f060
0xa4e7f174:  3c60a4e7  lis  r3, -0x5b19
0xa4e7f178:  6063f0c3  ori  r3, r3, 0xf0c3
0xa4e7f17c:  4bfffee4  b0xa4e7f060

Interestingly if I set a trap and then switch the opcode to "lis r4,0" 
(0x3c80)
then we carry on as normal until the next "lis r2,0" instruction. Looking 
through the
whole output of -d out_asm this is the first mention of r2 which makes me 
wonder if
it is special somehow? At least a quick search indicates that for 32-bit PPC r2 
is
supposed to be dedicated as a TOC pointer.

Is there a quick way to disable r2 from the list of available registers to see 
if
that gets things going?


ATB,

Mark.



Re: [Qemu-devel] [PATCH v4 0/7] tcg/ppc: Add vector opcodes

2019-06-26 Thread David Gibson
g


On Tue, Jun 25, 2019 at 05:56:42PM +0200, Richard Henderson wrote:
> On 6/25/19 5:37 PM, Mark Cave-Ayland wrote:
> > The problem is that in tcg/tcg-op.h we define "DEF(dup2_vec, 1, 2, 0, 
> > IMPLVEC |
> > IMPL(TCG_TARGET_REG_BITS == 32))" and in the last patchset dup2_vec isn't 
> > introduced
> > until towards the end. Unfortunately it's not a simple as bringing the 
> > patch forward
> > within the series to maintain bisectability because the current 
> > implementation
> > depends on VMRG which only appears in the patch just before it...
> 
> Ah, that would explain it.  I admit I haven't looked at v5 that closely.
> 
> > Next to try and figure out what exactly is causing the fault. Just a quick 
> > question
> > out of curiosity: is your Power9 system BE or LE?
> 
> The Power9 is LE.

It's the kernel determines endianness, not the system.

> 
> I do have access to a Power7 BE system, and that worked last time I checked.
> I'll try that again tomorrow to be sure.
> 
> 
> r~
> 

-- 
David Gibson| I'll have my music baroque, and my code
david AT gibson.dropbear.id.au  | minimalist, thank you.  NOT _the_ _other_
| _way_ _around_!
http://www.ozlabs.org/~dgibson


signature.asc
Description: PGP signature


Re: [Qemu-devel] [PATCH v4 0/7] tcg/ppc: Add vector opcodes

2019-06-26 Thread Richard Henderson
On 6/25/19 7:55 PM, Mark Cave-Ayland wrote:
> And here's where we are blowing up according to -d in_asm,op_out_asm:
> 
> IN:
> 0x00f22ca0:  101ffc84  vor  v0, v31, v31
> 
> OP:
>  ld_i32 tmp0,env,$0xfff8
>  movi_i32 tmp1,$0x0
>  brcond_i32 tmp0,tmp1,lt,$L0
> 
>   00f22ca0
>  ld_vec v128,e8,tmp2,env,$0xd6b0
>  st_vec v128,e8,tmp2,env,$0xd4c0

Yep, that looks right.

As an aside, this does suggest to me that target/ppc might be well served in
moving the ppc_vsr_t members of CPUPPCState earlier, so that this offset is
smaller.

>  movi_i32 nip,$0xf22ca4
>  movi_i32 nip,$0xf22ca4
>  movi_i32 tmp0,$0x10002
>  call raise_exception,$0x2,$0,env,tmp0

And this, presumably is the single-step debug exception.

> 0xa4e7f12c:  3c40  lis  r2, 0
> 0xa4e7f130:  6042d6b0  ori  r2, r2, 0xd6b0
> 0xa4e7f134:  7c5b10ce  lvx  v2, r27, r2

> 0xa4e7f138:  3c40  lis  r2, 0
> 0xa4e7f13c:  6042d4c0  ori  r2, r2, 0xd4c0
> 0xa4e7f140:  7c5b11ce  stvx v2, r27, r2

These also look correct.  Form an offset into r2, load or store from env+r2.

This also shows what I mention above re offset.  For a ppc host, the offset is
large enough to require two instructions to form them.

> Any ideas what might be going on here?

What is the observed problem that makes you think that this is the incorrect
instruction?


r~



Re: [Qemu-devel] [PATCH v4 0/7] tcg/ppc: Add vector opcodes

2019-06-25 Thread Aleksandar Markovic
On Jun 25, 2019 5:42 PM, "Mark Cave-Ayland" 
wrote:

>
> The problem is that in tcg/tcg-op.h we define "DEF(dup2_vec, 1, 2, 0,
IMPLVEC |
> IMPL(TCG_TARGET_REG_BITS == 32))" and in the last patchset dup2_vec isn't
introduced
> until towards the end. Unfortunately it's not a simple as bringing the
patch forward
> within the series to maintain bisectability because the current
implementation
> depends on VMRG which only appears in the patch just before it...
>

My strong impression is that VMRG,  VSPLT, VSLDOI, ... opcodes and basic
logic could have been defined very early in the series. (They all just
support other TCG vector operations. Their functionalty just helps achieve
other, exposed, backend functionalities.) That would reduce patch
dependencies and  allow “patch mobility” within the rest of the series.

However, I am not positive at all that would solve the problem at hand.

Aleksandar


Re: [Qemu-devel] [PATCH v4 0/7] tcg/ppc: Add vector opcodes

2019-06-25 Thread Mark Cave-Ayland
On 25/06/2019 16:56, Richard Henderson wrote:

> On 6/25/19 5:37 PM, Mark Cave-Ayland wrote:
>> The problem is that in tcg/tcg-op.h we define "DEF(dup2_vec, 1, 2, 0, 
>> IMPLVEC |
>> IMPL(TCG_TARGET_REG_BITS == 32))" and in the last patchset dup2_vec isn't 
>> introduced
>> until towards the end. Unfortunately it's not a simple as bringing the patch 
>> forward
>> within the series to maintain bisectability because the current 
>> implementation
>> depends on VMRG which only appears in the patch just before it...
> 
> Ah, that would explain it.  I admit I haven't looked at v5 that closely.

It's actually the same in both patchsets: I'm still playing with v4 at the 
moment
since I have a few hacks in place to help me figure out what's going on.

>> Next to try and figure out what exactly is causing the fault. Just a quick 
>> question
>> out of curiosity: is your Power9 system BE or LE?
> 
> The Power9 is LE.
> 
> I do have access to a Power7 BE system, and that worked last time I checked.
> I'll try that again tomorrow to be sure.

And here's where we are blowing up according to -d in_asm,op_out_asm:

IN:
0x00f22ca0:  101ffc84  vor  v0, v31, v31

OP:
 ld_i32 tmp0,env,$0xfff8
 movi_i32 tmp1,$0x0
 brcond_i32 tmp0,tmp1,lt,$L0

  00f22ca0
 ld_vec v128,e8,tmp2,env,$0xd6b0
 st_vec v128,e8,tmp2,env,$0xd4c0
 movi_i32 nip,$0xf22ca4
 movi_i32 nip,$0xf22ca4
 movi_i32 tmp0,$0x10002
 call raise_exception,$0x2,$0,env,tmp0
 exit_tb $0x0
 set_label $L0
 exit_tb $0xa4e7f0c3

OUT: [size=96]
0xa4e7f120:  81dbfff8  lwz  r14, -8(r27)
0xa4e7f124:  2f8e  cmpwicr7, r14, 0
0xa4e7f128:  419c004c  blt  cr7, 0xa4e7f174
0xa4e7f12c:  3c40  lis  r2, 0
0xa4e7f130:  6042d6b0  ori  r2, r2, 0xd6b0
0xa4e7f134:  7c5b10ce  lvx  v2, r27, r2
0xa4e7f138:  3c40  lis  r2, 0
0xa4e7f13c:  6042d4c0  ori  r2, r2, 0xd4c0
0xa4e7f140:  7c5b11ce  stvx v2, r27, r2
0xa4e7f144:  3dc000f2  lis  r14, 0xf2
0xa4e7f148:  61ce2ca4  ori  r14, r14, 0x2ca4
0xa4e7f14c:  91db016c  stw  r14, 0x16c(r27)
0xa4e7f150:  7f63db78  mr   r3, r27
0xa4e7f154:  3c81  lis  r4, 1
0xa4e7f158:  60840002  ori  r4, r4, 2
0xa4e7f15c:  3c87  lis  r0, 0x87
0xa4e7f160:  6000b618  ori  r0, r0, 0xb618
0xa4e7f164:  7c0903a6  mtctrr0
0xa4e7f168:  4e800421  bctrl
0xa4e7f16c:  3860  li   r3, 0
0xa4e7f170:  4bfffef0  b0xa4e7f060
0xa4e7f174:  3c60a4e7  lis  r3, -0x5b19
0xa4e7f178:  6063f0c3  ori  r3, r3, 0xf0c3
0xa4e7f17c:  4bfffee4  b0xa4e7f060

Any ideas what might be going on here?


ATB,

Mark.



Re: [Qemu-devel] [PATCH v4 0/7] tcg/ppc: Add vector opcodes

2019-06-25 Thread Richard Henderson
On 6/25/19 5:37 PM, Mark Cave-Ayland wrote:
> The problem is that in tcg/tcg-op.h we define "DEF(dup2_vec, 1, 2, 0, IMPLVEC 
> |
> IMPL(TCG_TARGET_REG_BITS == 32))" and in the last patchset dup2_vec isn't 
> introduced
> until towards the end. Unfortunately it's not a simple as bringing the patch 
> forward
> within the series to maintain bisectability because the current implementation
> depends on VMRG which only appears in the patch just before it...

Ah, that would explain it.  I admit I haven't looked at v5 that closely.

> Next to try and figure out what exactly is causing the fault. Just a quick 
> question
> out of curiosity: is your Power9 system BE or LE?

The Power9 is LE.

I do have access to a Power7 BE system, and that worked last time I checked.
I'll try that again tomorrow to be sure.


r~



Re: [Qemu-devel] [PATCH v4 0/7] tcg/ppc: Add vector opcodes

2019-06-25 Thread Mark Cave-Ayland
On 25/06/2019 07:56, Richard Henderson wrote:

>>> One more hint: if I try a build of d8dcbb57e9 along with my 
>>> tcg_can_emit_vec_op()
>>> hack and pass --enable-debug-tcg to configure then I get an assert on 
>>> startup:
>>>
>>> qemu-system-ppc: /home/mca/src/qemu/tcg/tcg.c:2207: process_op_defs: 
>>> Assertion `tdefs
>>> != ((void *)0)' failed.
>>> Aborted
>>>
>>
>> Mark, Richard, Howard, David,
>>
>> I just sent v5 of the series, that is (in the sense of net-result of
>> code changes) equivalent to v4, but the patch 1/7 from v4 is now split
>> into ten smaller patches. This was done mainly to enable Mark to
>> perhaps try v5 and bisect, in order to at least somewhat narrow down
>> the culprit. Most likely it will be patch 5 from v5, that is still
>> sizeable, but even if this is the case, we can eliminate other smaller
>> things from consideration.
> 
> Thanks for the help on that.
> 
> I don't believe your split is actually bisectable -- there's a minimum amount
> that is required to enable vector opcodes at all.  Patch 5 is the first that
> enables tcg_out_{mov,ld,st}, so while patches beforehand may compile, they
> certainly will not run.
> 
> I can retain your split, but for real bisectability we need to move the enable
> of TCG_TARGET_HAS_v128 from patch 2 to patch 5.
> 
> Given that all this works for me on a Power9 host, I expect that there's a
> simple fix for Mark's G5 host.  Given the above assertion, a missing opcode
> definition, perhaps for -m32 vs -m64?

Right, I'm starting to dig into this a bit more now. First of all, I've figured 
out
what is triggering the above assertion:

"qemu-system-ppc: /home/mca/src/qemu/tcg/tcg.c:2207: process_op_defs: Assertion
`tdefs != ((void *)0)' failed."

The problem is that in tcg/tcg-op.h we define "DEF(dup2_vec, 1, 2, 0, IMPLVEC |
IMPL(TCG_TARGET_REG_BITS == 32))" and in the last patchset dup2_vec isn't 
introduced
until towards the end. Unfortunately it's not a simple as bringing the patch 
forward
within the series to maintain bisectability because the current implementation
depends on VMRG which only appears in the patch just before it...

Next to try and figure out what exactly is causing the fault. Just a quick 
question
out of curiosity: is your Power9 system BE or LE?


ATB,

Mark.



Re: [Qemu-devel] [PATCH v4 0/7] tcg/ppc: Add vector opcodes

2019-06-25 Thread Richard Henderson
On 6/23/19 7:10 PM, Aleksandar Markovic wrote:
> On Sat, Jun 22, 2019 at 5:02 PM Mark Cave-Ayland
>  wrote:
>>
>> On 22/06/2019 15:20, Mark Cave-Ayland wrote:
>>
>>> I've just given your tcg-ppc-vsx branch a spin here, and like Howard I'm 
>>> getting
>>> segfaults trying to launch my MacOS images :(  The segfault is weird: it 
>>> doesn't get
>>> caught by an attached gdb and the qemu-system-ppc process seems to hang 
>>> around like a
>>> zombie which makes me think that it's probably an illegal instruction of 
>>> some kind,
>>> but the PPC kernel can't handle it as well as x86 does.
>>>
>>> With a bit more work I bisected it down to the first commit in the patchset
>>> (d8dcbb57e9: "tcg/ppc: Initial backend support for Altivec") and then as an
>>> experiment I hacked tcg_can_emit_vec_op() to always return 0 to see if that 
>>> made a
>>> difference, but the segfault still appears.
>>>
>>> The commit message mentions that the load and store helpers are also 
>>> improved, so I
>>> wonder if they are what is causing the error rather than the vector parts? 
>>> Also in
>>> the kernel log I see the following messages appearing:
>>>
>>> [3639669.374942] qemu-system-ppc[28591]: segfault (11) at 64b8 nip f87280 
>>> lr f8723c
>>> code 1 in qemu-system-ppc[94e000+aa]
>>> [3639669.380015] qemu-system-ppc[28591]: code: 93c10038 91810020 90010044 
>>> 7fc802a6
>>> 3fde0059 2e03 3bde6c18 7c9d2378
>>> [3639669.385056] qemu-system-ppc[28591]: code: 813e80a0 7cbb2b78 7cda3378 
>>> 7cf93b78
>>> <81428ff8> 9141001c 3940 8129
>>>
>>> Does that help at all? If not let me know if there are any other tests that 
>>> you'd
>>> like me to try to help debug this.
>>
>> One more hint: if I try a build of d8dcbb57e9 along with my 
>> tcg_can_emit_vec_op()
>> hack and pass --enable-debug-tcg to configure then I get an assert on 
>> startup:
>>
>> qemu-system-ppc: /home/mca/src/qemu/tcg/tcg.c:2207: process_op_defs: 
>> Assertion `tdefs
>> != ((void *)0)' failed.
>> Aborted
>>
> 
> Mark, Richard, Howard, David,
> 
> I just sent v5 of the series, that is (in the sense of net-result of
> code changes) equivalent to v4, but the patch 1/7 from v4 is now split
> into ten smaller patches. This was done mainly to enable Mark to
> perhaps try v5 and bisect, in order to at least somewhat narrow down
> the culprit. Most likely it will be patch 5 from v5, that is still
> sizeable, but even if this is the case, we can eliminate other smaller
> things from consideration.

Thanks for the help on that.

I don't believe your split is actually bisectable -- there's a minimum amount
that is required to enable vector opcodes at all.  Patch 5 is the first that
enables tcg_out_{mov,ld,st}, so while patches beforehand may compile, they
certainly will not run.

I can retain your split, but for real bisectability we need to move the enable
of TCG_TARGET_HAS_v128 from patch 2 to patch 5.

Given that all this works for me on a Power9 host, I expect that there's a
simple fix for Mark's G5 host.  Given the above assertion, a missing opcode
definition, perhaps for -m32 vs -m64?


r~



Re: [Qemu-devel] [PATCH v4 0/7] tcg/ppc: Add vector opcodes

2019-06-23 Thread Aleksandar Markovic
On Sat, Jun 22, 2019 at 5:02 PM Mark Cave-Ayland
 wrote:
>
> On 22/06/2019 15:20, Mark Cave-Ayland wrote:
>
> > I've just given your tcg-ppc-vsx branch a spin here, and like Howard I'm 
> > getting
> > segfaults trying to launch my MacOS images :(  The segfault is weird: it 
> > doesn't get
> > caught by an attached gdb and the qemu-system-ppc process seems to hang 
> > around like a
> > zombie which makes me think that it's probably an illegal instruction of 
> > some kind,
> > but the PPC kernel can't handle it as well as x86 does.
> >
> > With a bit more work I bisected it down to the first commit in the patchset
> > (d8dcbb57e9: "tcg/ppc: Initial backend support for Altivec") and then as an
> > experiment I hacked tcg_can_emit_vec_op() to always return 0 to see if that 
> > made a
> > difference, but the segfault still appears.
> >
> > The commit message mentions that the load and store helpers are also 
> > improved, so I
> > wonder if they are what is causing the error rather than the vector parts? 
> > Also in
> > the kernel log I see the following messages appearing:
> >
> > [3639669.374942] qemu-system-ppc[28591]: segfault (11) at 64b8 nip f87280 
> > lr f8723c
> > code 1 in qemu-system-ppc[94e000+aa]
> > [3639669.380015] qemu-system-ppc[28591]: code: 93c10038 91810020 90010044 
> > 7fc802a6
> > 3fde0059 2e03 3bde6c18 7c9d2378
> > [3639669.385056] qemu-system-ppc[28591]: code: 813e80a0 7cbb2b78 7cda3378 
> > 7cf93b78
> > <81428ff8> 9141001c 3940 8129
> >
> > Does that help at all? If not let me know if there are any other tests that 
> > you'd
> > like me to try to help debug this.
>
> One more hint: if I try a build of d8dcbb57e9 along with my 
> tcg_can_emit_vec_op()
> hack and pass --enable-debug-tcg to configure then I get an assert on startup:
>
> qemu-system-ppc: /home/mca/src/qemu/tcg/tcg.c:2207: process_op_defs: 
> Assertion `tdefs
> != ((void *)0)' failed.
> Aborted
>

Mark, Richard, Howard, David,

I just sent v5 of the series, that is (in the sense of net-result of
code changes) equivalent to v4, but the patch 1/7 from v4 is now split
into ten smaller patches. This was done mainly to enable Mark to
perhaps try v5 and bisect, in order to at least somewhat narrow down
the culprit. Most likely it will be patch 5 from v5, that is still
sizeable, but even if this is the case, we can eliminate other smaller
things from consideration.

Sincerely,
Aleksandar

>
> ATB,
>
> Mark.
>



Re: [Qemu-devel] [PATCH v4 0/7] tcg/ppc: Add vector opcodes

2019-06-22 Thread Mark Cave-Ayland
On 22/06/2019 15:20, Mark Cave-Ayland wrote:

> I've just given your tcg-ppc-vsx branch a spin here, and like Howard I'm 
> getting
> segfaults trying to launch my MacOS images :(  The segfault is weird: it 
> doesn't get
> caught by an attached gdb and the qemu-system-ppc process seems to hang 
> around like a
> zombie which makes me think that it's probably an illegal instruction of some 
> kind,
> but the PPC kernel can't handle it as well as x86 does.
> 
> With a bit more work I bisected it down to the first commit in the patchset
> (d8dcbb57e9: "tcg/ppc: Initial backend support for Altivec") and then as an
> experiment I hacked tcg_can_emit_vec_op() to always return 0 to see if that 
> made a
> difference, but the segfault still appears.
> 
> The commit message mentions that the load and store helpers are also 
> improved, so I
> wonder if they are what is causing the error rather than the vector parts? 
> Also in
> the kernel log I see the following messages appearing:
> 
> [3639669.374942] qemu-system-ppc[28591]: segfault (11) at 64b8 nip f87280 lr 
> f8723c
> code 1 in qemu-system-ppc[94e000+aa]
> [3639669.380015] qemu-system-ppc[28591]: code: 93c10038 91810020 90010044 
> 7fc802a6
> 3fde0059 2e03 3bde6c18 7c9d2378
> [3639669.385056] qemu-system-ppc[28591]: code: 813e80a0 7cbb2b78 7cda3378 
> 7cf93b78
> <81428ff8> 9141001c 3940 8129
> 
> Does that help at all? If not let me know if there are any other tests that 
> you'd
> like me to try to help debug this.

One more hint: if I try a build of d8dcbb57e9 along with my 
tcg_can_emit_vec_op()
hack and pass --enable-debug-tcg to configure then I get an assert on startup:

qemu-system-ppc: /home/mca/src/qemu/tcg/tcg.c:2207: process_op_defs: Assertion 
`tdefs
!= ((void *)0)' failed.
Aborted


ATB,

Mark.



Re: [Qemu-devel] [PATCH v4 0/7] tcg/ppc: Add vector opcodes

2019-06-22 Thread Mark Cave-Ayland
On 19/06/2019 06:07, Mark Cave-Ayland wrote:

> On 18/06/2019 06:00, Richard Henderson wrote:
> 
>> Ping.  Otherwise I'll include it in my next tcg pull.
>>
>> r~
> 
> I can give this another spin on my test images on a G4 over the next few 
> days. I've
> also added Howard on CC as he reported some issues with the previous 
> iteration at
> https://lists.gnu.org/archive/html/qemu-devel/2019-03/msg06561.html.

I've just given your tcg-ppc-vsx branch a spin here, and like Howard I'm getting
segfaults trying to launch my MacOS images :(  The segfault is weird: it 
doesn't get
caught by an attached gdb and the qemu-system-ppc process seems to hang around 
like a
zombie which makes me think that it's probably an illegal instruction of some 
kind,
but the PPC kernel can't handle it as well as x86 does.

With a bit more work I bisected it down to the first commit in the patchset
(d8dcbb57e9: "tcg/ppc: Initial backend support for Altivec") and then as an
experiment I hacked tcg_can_emit_vec_op() to always return 0 to see if that 
made a
difference, but the segfault still appears.

The commit message mentions that the load and store helpers are also improved, 
so I
wonder if they are what is causing the error rather than the vector parts? Also 
in
the kernel log I see the following messages appearing:

[3639669.374942] qemu-system-ppc[28591]: segfault (11) at 64b8 nip f87280 lr 
f8723c
code 1 in qemu-system-ppc[94e000+aa]
[3639669.380015] qemu-system-ppc[28591]: code: 93c10038 91810020 90010044 
7fc802a6
3fde0059 2e03 3bde6c18 7c9d2378
[3639669.385056] qemu-system-ppc[28591]: code: 813e80a0 7cbb2b78 7cda3378 
7cf93b78
<81428ff8> 9141001c 3940 8129

Does that help at all? If not let me know if there are any other tests that 
you'd
like me to try to help debug this.


ATB,

Mark.



Re: [Qemu-devel] [PATCH v4 0/7] tcg/ppc: Add vector opcodes

2019-06-20 Thread Howard Spoelstra
Hi,

As reported before, qemu-system-ppc still crashes with a segmentation fault
on Lubuntu 16 on a G5. Built from current tcg-ppc-vsx branch.

Linux hsp-G5-Lubuntu 4.4.0-143-powerpc64-smp #169-Ubuntu SMP Thu Feb 7
08:25:49 UTC 2019 ppc64 ppc64 ppc64 GNU/Linux

hsp@hsp-G5-Lubuntu:~$ gcc -v
Using built-in specs.
COLLECT_GCC=gcc
COLLECT_LTO_WRAPPER=/usr/lib/gcc/powerpc-linux-gnu/5/lto-wrapper
Target: powerpc-linux-gnu
Configured with: ../src/configure -v --with-pkgversion='Ubuntu
5.4.0-6ubuntu1~16.04.11'
--with-bugurl=file:///usr/share/doc/gcc-5/README.Bugs
--enable-languages=c,ada,c++,java,go,d,fortran,objc,obj-c++ --prefix=/usr
--program-suffix=-5 --enable-shared --enable-linker-build-id
--libexecdir=/usr/lib --without-included-gettext --enable-threads=posix
--libdir=/usr/lib --enable-nls --with-sysroot=/ --enable-clocale=gnu
--enable-libstdcxx-debug --enable-libstdcxx-time=yes
--with-default-libstdcxx-abi=new --enable-gnu-unique-object
--disable-libitm --disable-libquadmath --enable-plugin --with-system-zlib
--disable-browser-plugin --enable-java-awt=gtk --enable-gtk-cairo
--with-java-home=/usr/lib/jvm/java-1.5.0-gcj-5-powerpc/jre
--enable-java-home
--with-jvm-root-dir=/usr/lib/jvm/java-1.5.0-gcj-5-powerpc
--with-jvm-jar-dir=/usr/lib/jvm-exports/java-1.5.0-gcj-5-powerpc
--with-arch-directory=ppc --with-ecj-jar=/usr/share/java/eclipse-ecj.jar
--enable-objc-gc --enable-secureplt --disable-softfloat
--with-cpu=default32 --disable-softfloat
--enable-targets=powerpc-linux,powerpc64-linux --enable-multiarch
--disable-werror --with-long-double-128 --enable-multilib
--enable-checking=release --build=powerpc-linux-gnu
--host=powerpc-linux-gnu --target=powerpc-linux-gnu
Thread model: posix
gcc version 5.4.0 20160609 (Ubuntu 5.4.0-6ubuntu1~16.04.11)


Best,
Howard.


Re: [Qemu-devel] [PATCH v4 0/7] tcg/ppc: Add vector opcodes

2019-06-19 Thread David Gibson
On Mon, Jun 17, 2019 at 10:00:10PM -0700, Richard Henderson wrote:
> Ping.  Otherwise I'll include it in my next tcg pull.

Uh.. I'm not sure who this ping is directed at.  I'm afraid this
series has dropped off my radar.

> 
> 
> r~
> 
> On 5/18/19 9:15 PM, Richard Henderson wrote:
> > Based-on: <20190518190157.21255-1-richard.hender...@linaro.org>
> > Aka "tcg: misc gvec improvements".
> > 
> > Version 3 was last posted in March,
> > https://lists.gnu.org/archive/html/qemu-devel/2019-03/msg05859.html
> > 
> > Changes since v3:
> >   * Add support for bitsel, with the vsx xxsel insn.
> >   * Rely on the new relocation overflow handling, so
> > we don't require 3 insns for a vector load.
> > 
> > Changes since v2:
> >   * Several generic tcg patches to improve dup vs dupi vs dupm.
> > In particular, if a global temp (like guest r10) is not in
> > a host register, we should duplicate from memory instead of
> > loading to an integer register, spilling to stack, loading
> > to a vector register, and then duplicating.
> >   * I have more confidence that 32-bit ppc host should work
> > this time around.  No testing on that front yet, but I've
> > unified some code sequences with 64-bit ppc host.
> >   * Base altivec now supports V128 only.  Moved V64 support to
> > Power7 (v2.06), which has 64-bit load/store.
> >   * Dropped support for 64-bit vector multiply using Power8.
> > The expansion was too large compared to using integer regs.
> > 
> > 
> > r~
> > 
> > 
> > Richard Henderson (7):
> >   tcg/ppc: Initial backend support for Altivec
> >   tcg/ppc: Support vector shift by immediate
> >   tcg/ppc: Support vector multiply
> >   tcg/ppc: Support vector dup2
> >   tcg/ppc: Update vector support to v2.06
> >   tcg/ppc: Update vector support to v2.07
> >   tcg/ppc: Update vector support to v3.00
> > 
> >  tcg/ppc/tcg-target.h |   39 +-
> >  tcg/ppc/tcg-target.opc.h |   11 +
> >  tcg/ppc/tcg-target.inc.c | 1077 +++---
> >  3 files changed, 1063 insertions(+), 64 deletions(-)
> >  create mode 100644 tcg/ppc/tcg-target.opc.h
> > 
> 

-- 
David Gibson| I'll have my music baroque, and my code
david AT gibson.dropbear.id.au  | minimalist, thank you.  NOT _the_ _other_
| _way_ _around_!
http://www.ozlabs.org/~dgibson


signature.asc
Description: PGP signature


Re: [Qemu-devel] [PATCH v4 0/7] tcg/ppc: Add vector opcodes

2019-06-18 Thread Mark Cave-Ayland
On 18/06/2019 06:00, Richard Henderson wrote:

> Ping.  Otherwise I'll include it in my next tcg pull.
> 
> r~

I can give this another spin on my test images on a G4 over the next few days. 
I've
also added Howard on CC as he reported some issues with the previous iteration 
at
https://lists.gnu.org/archive/html/qemu-devel/2019-03/msg06561.html.

> On 5/18/19 9:15 PM, Richard Henderson wrote:
>> Based-on: <20190518190157.21255-1-richard.hender...@linaro.org>
>> Aka "tcg: misc gvec improvements".
>>
>> Version 3 was last posted in March,
>> https://lists.gnu.org/archive/html/qemu-devel/2019-03/msg05859.html
>>
>> Changes since v3:
>>   * Add support for bitsel, with the vsx xxsel insn.
>>   * Rely on the new relocation overflow handling, so
>> we don't require 3 insns for a vector load.
>>
>> Changes since v2:
>>   * Several generic tcg patches to improve dup vs dupi vs dupm.
>> In particular, if a global temp (like guest r10) is not in
>> a host register, we should duplicate from memory instead of
>> loading to an integer register, spilling to stack, loading
>> to a vector register, and then duplicating.
>>   * I have more confidence that 32-bit ppc host should work
>> this time around.  No testing on that front yet, but I've
>> unified some code sequences with 64-bit ppc host.
>>   * Base altivec now supports V128 only.  Moved V64 support to
>> Power7 (v2.06), which has 64-bit load/store.
>>   * Dropped support for 64-bit vector multiply using Power8.
>> The expansion was too large compared to using integer regs.
>>
>>
>> r~
>>
>>
>> Richard Henderson (7):
>>   tcg/ppc: Initial backend support for Altivec
>>   tcg/ppc: Support vector shift by immediate
>>   tcg/ppc: Support vector multiply
>>   tcg/ppc: Support vector dup2
>>   tcg/ppc: Update vector support to v2.06
>>   tcg/ppc: Update vector support to v2.07
>>   tcg/ppc: Update vector support to v3.00
>>
>>  tcg/ppc/tcg-target.h |   39 +-
>>  tcg/ppc/tcg-target.opc.h |   11 +
>>  tcg/ppc/tcg-target.inc.c | 1077 +++---
>>  3 files changed, 1063 insertions(+), 64 deletions(-)
>>  create mode 100644 tcg/ppc/tcg-target.opc.h


ATB,

Mark.



Re: [Qemu-devel] [PATCH v4 0/7] tcg/ppc: Add vector opcodes

2019-06-17 Thread Richard Henderson
Ping.  Otherwise I'll include it in my next tcg pull.


r~

On 5/18/19 9:15 PM, Richard Henderson wrote:
> Based-on: <20190518190157.21255-1-richard.hender...@linaro.org>
> Aka "tcg: misc gvec improvements".
> 
> Version 3 was last posted in March,
> https://lists.gnu.org/archive/html/qemu-devel/2019-03/msg05859.html
> 
> Changes since v3:
>   * Add support for bitsel, with the vsx xxsel insn.
>   * Rely on the new relocation overflow handling, so
> we don't require 3 insns for a vector load.
> 
> Changes since v2:
>   * Several generic tcg patches to improve dup vs dupi vs dupm.
> In particular, if a global temp (like guest r10) is not in
> a host register, we should duplicate from memory instead of
> loading to an integer register, spilling to stack, loading
> to a vector register, and then duplicating.
>   * I have more confidence that 32-bit ppc host should work
> this time around.  No testing on that front yet, but I've
> unified some code sequences with 64-bit ppc host.
>   * Base altivec now supports V128 only.  Moved V64 support to
> Power7 (v2.06), which has 64-bit load/store.
>   * Dropped support for 64-bit vector multiply using Power8.
> The expansion was too large compared to using integer regs.
> 
> 
> r~
> 
> 
> Richard Henderson (7):
>   tcg/ppc: Initial backend support for Altivec
>   tcg/ppc: Support vector shift by immediate
>   tcg/ppc: Support vector multiply
>   tcg/ppc: Support vector dup2
>   tcg/ppc: Update vector support to v2.06
>   tcg/ppc: Update vector support to v2.07
>   tcg/ppc: Update vector support to v3.00
> 
>  tcg/ppc/tcg-target.h |   39 +-
>  tcg/ppc/tcg-target.opc.h |   11 +
>  tcg/ppc/tcg-target.inc.c | 1077 +++---
>  3 files changed, 1063 insertions(+), 64 deletions(-)
>  create mode 100644 tcg/ppc/tcg-target.opc.h
> 




[Qemu-devel] [PATCH v4 0/7] tcg/ppc: Add vector opcodes

2019-05-18 Thread Richard Henderson
Based-on: <20190518190157.21255-1-richard.hender...@linaro.org>
Aka "tcg: misc gvec improvements".

Version 3 was last posted in March,
https://lists.gnu.org/archive/html/qemu-devel/2019-03/msg05859.html

Changes since v3:
  * Add support for bitsel, with the vsx xxsel insn.
  * Rely on the new relocation overflow handling, so
we don't require 3 insns for a vector load.

Changes since v2:
  * Several generic tcg patches to improve dup vs dupi vs dupm.
In particular, if a global temp (like guest r10) is not in
a host register, we should duplicate from memory instead of
loading to an integer register, spilling to stack, loading
to a vector register, and then duplicating.
  * I have more confidence that 32-bit ppc host should work
this time around.  No testing on that front yet, but I've
unified some code sequences with 64-bit ppc host.
  * Base altivec now supports V128 only.  Moved V64 support to
Power7 (v2.06), which has 64-bit load/store.
  * Dropped support for 64-bit vector multiply using Power8.
The expansion was too large compared to using integer regs.


r~


Richard Henderson (7):
  tcg/ppc: Initial backend support for Altivec
  tcg/ppc: Support vector shift by immediate
  tcg/ppc: Support vector multiply
  tcg/ppc: Support vector dup2
  tcg/ppc: Update vector support to v2.06
  tcg/ppc: Update vector support to v2.07
  tcg/ppc: Update vector support to v3.00

 tcg/ppc/tcg-target.h |   39 +-
 tcg/ppc/tcg-target.opc.h |   11 +
 tcg/ppc/tcg-target.inc.c | 1077 +++---
 3 files changed, 1063 insertions(+), 64 deletions(-)
 create mode 100644 tcg/ppc/tcg-target.opc.h

-- 
2.17.1