from:"Tom Musta"

Re: [Qemu-devel] [PATCH 2/2] target-ppc: fix xscmpodp and xscmpudp decoding

2015-09-23 Thread Tom Musta

The modern versions of the ISA require that reserved instruction bits be
ignored for server class processors (see Book I, section 1.3.3).  I believe
older versions of the ISA allowed for Illegal Instruction Interrupt or
"Boundedly Undefined", which is, of course, much less specific.  QEMU
supports implementations from both eras and, as a general rule, flags this
situation as an illegal instruction.

So I would expect that real hardware will ignore the bit.  You will still
be left with the choice of making this decoder consistent with the hardware
or consistent with the rest of QEMU :)   When I added support for VSX, the
intent was the latter.

On Tue, Sep 22, 2015 at 4:28 PM, Aurelien Jarno <aurel...@aurel32.net>
wrote:

> On 2015-09-22 12:26, Thomas Huth wrote:
> > On 13/09/15 23:03, Aurelien Jarno wrote:
> > > The xscmpodp and xscmpudp instructions only have the AX, BX bits in
> > > there encoding, the lowest bit (usually TX) is marked as an invalid
> > > bit. We therefore can't decode them with GEN_XX2FORM, which decodes
> > > the two lowest bit.
> > >
> > > Introduce a new form GEN_XX2FORM, which decodes AX and BX and mark
> > > the lowest bit as invalid.
> > >
> > > Cc: Tom Musta <tommu...@gmail.com>
> > > Cc: Alexander Graf <ag...@suse.de>
> > > Cc: qemu-sta...@nongnu.org
> > > Signed-off-by: Aurelien Jarno <aurel...@aurel32.net>
> > > ---
> > >  target-ppc/translate.c | 11 +--
> > >  1 file changed, 9 insertions(+), 2 deletions(-)
> > >
> > > diff --git a/target-ppc/translate.c b/target-ppc/translate.c
> > > index 84c5cea..c0eed13 100644
> > > --- a/target-ppc/translate.c
> > > +++ b/target-ppc/translate.c
> > > @@ -10670,6 +10670,13 @@ GEN_HANDLER2_E(name, #name, 0x3C, opc2 | 1,
> opc3, 0, PPC_NONE, fl2), \
> > >  GEN_HANDLER2_E(name, #name, 0x3C, opc2 | 2, opc3, 0, PPC_NONE, fl2), \
> > >  GEN_HANDLER2_E(name, #name, 0x3C, opc2 | 3, opc3, 0, PPC_NONE, fl2)
> > >
> > > +#undef GEN_XX2IFORM
> > > +#define GEN_XX2IFORM(name, opc2, opc3, fl2)
>  \
> > > +GEN_HANDLER2_E(name, #name, 0x3C, opc2 | 0, opc3, 1, PPC_NONE, fl2), \
> > > +GEN_HANDLER2_E(name, #name, 0x3C, opc2 | 1, opc3, 1, PPC_NONE, fl2), \
> > > +GEN_HANDLER2_E(name, #name, 0x3C, opc2 | 2, opc3, 1, PPC_NONE, fl2), \
> > > +GEN_HANDLER2_E(name, #name, 0x3C, opc2 | 3, opc3, 1, PPC_NONE, fl2)
> > > +
> > >  #undef GEN_XX3_RC_FORM
> > >  #define GEN_XX3_RC_FORM(name, opc2, opc3, fl2)
>   \
> > >  GEN_HANDLER2_E(name, #name, 0x3C, opc2 | 0x00, opc3 | 0x00, 0,
> PPC_NONE, fl2), \
> > > @@ -10731,8 +10738,8 @@ GEN_XX3FORM(xsnmaddadp, 0x04, 0x14, PPC2_VSX),
> > >  GEN_XX3FORM(xsnmaddmdp, 0x04, 0x15, PPC2_VSX),
> > >  GEN_XX3FORM(xsnmsubadp, 0x04, 0x16, PPC2_VSX),
> > >  GEN_XX3FORM(xsnmsubmdp, 0x04, 0x17, PPC2_VSX),
> > > -GEN_XX2FORM(xscmpodp,  0x0C, 0x05, PPC2_VSX),
> > > -GEN_XX2FORM(xscmpudp,  0x0C, 0x04, PPC2_VSX),
> > > +GEN_XX2IFORM(xscmpodp,  0x0C, 0x05, PPC2_VSX),
> > > +GEN_XX2IFORM(xscmpudp,  0x0C, 0x04, PPC2_VSX),
> >
> > According to PowerISA 2.07, xscmpodp and xscmpudp are of type XX3, not
> > of type XX2 ... so should this macro maybe rather be named XX3IFORM
> instead?
>
> Indeed, I have chosen the name without actually realizing the manual
> also give names. Then I do wonder if the lower bit is really decoded as
> invalid, I wouldn't be surprised it is actually just ignored.
>
> I'll try to do some tests on real hardware and come up with a fixup
> patch.
>
> Aurelien
>
> --
> Aurelien Jarno  GPG: 4096R/1DDD8C9B
> aurel...@aurel32.net http://www.aurel32.net
>

Re: [Qemu-devel] [PULL 06/37] target-ppc: VXSQRT Should Not Be Set for NaNs

2015-02-13 Thread Tom Musta

I agree that the comment is incorrect and should say sNaN square root.

On Thu, Feb 12, 2015 at 4:21 PM, Maciej W. Rozycki ma...@linux-mips.org
wrote:

 On Wed, 7 Jan 2015, Alexander Graf wrote:

  diff --git a/target-ppc/fpu_helper.c b/target-ppc/fpu_helper.c
  index 7f74466..81db60f 100644
  --- a/target-ppc/fpu_helper.c
  +++ b/target-ppc/fpu_helper.c
  @@ -920,14 +923,16 @@ uint64_t helper_fsqrt(CPUPPCState *env, uint64_t
 arg)
 
   farg.ll = arg;
 
  -if (unlikely(float64_is_neg(farg.d)  !float64_is_zero(farg.d))) {
  -/* Square root of a negative nonzero number */
  -farg.ll = fload_invalid_op_excp(env, POWERPC_EXCP_FP_VXSQRT, 1);
  -} else {
  +if (unlikely(float64_is_any_nan(farg.d))) {
   if (unlikely(float64_is_signaling_nan(farg.d))) {
  -/* sNaN square root */
  +/* sNaN reciprocal square root */

  This change to the comment looks accidental, compare the changes below.
 Should it be reverted?  [Found this while resolving merge conflicts.]

   fload_invalid_op_excp(env, POWERPC_EXCP_FP_VXSNAN, 1);
  +farg.ll = float64_snan_to_qnan(farg.ll);
   }
  +} else if (unlikely(float64_is_neg(farg.d) 
 !float64_is_zero(farg.d))) {
  +/* Square root of a negative nonzero number */
  +farg.ll = fload_invalid_op_excp(env, POWERPC_EXCP_FP_VXSQRT, 1);
  +} else {
   farg.d = float64_sqrt(farg.d, env-fp_status);
   }
   return farg.ll;
  @@ -974,17 +979,20 @@ uint64_t helper_frsqrte(CPUPPCState *env, uint64_t
 arg)
 
   farg.ll = arg;
 
  -if (unlikely(float64_is_neg(farg.d)  !float64_is_zero(farg.d))) {
  -/* Reciprocal square root of a negative nonzero number */
  -farg.ll = fload_invalid_op_excp(env, POWERPC_EXCP_FP_VXSQRT, 1);
  -} else {
  +if (unlikely(float64_is_any_nan(farg.d))) {
   if (unlikely(float64_is_signaling_nan(farg.d))) {
   /* sNaN reciprocal square root */
   fload_invalid_op_excp(env, POWERPC_EXCP_FP_VXSNAN, 1);
  +farg.ll = float64_snan_to_qnan(farg.ll);
   }
  +} else if (unlikely(float64_is_neg(farg.d) 
 !float64_is_zero(farg.d))) {
  +/* Reciprocal square root of a negative nonzero number */
  +farg.ll = fload_invalid_op_excp(env, POWERPC_EXCP_FP_VXSQRT, 1);
  +} else {
   farg.d = float64_sqrt(farg.d, env-fp_status);
   farg.d = float64_div(float64_one, farg.d, env-fp_status);
   }
  +
   return farg.ll;
   }
 

   Maciej

Re: [Qemu-devel] [PATCH 9/9] target-ppc: Introduce Privileged TM Noops

2014-12-20 Thread Tom Musta

On 12/19/2014 4:20 AM, Fam Zheng wrote:
 On Thu, 12/18 10:34, Tom Musta wrote:
 Add the supervisory Transactional Memory instructions treclaim. and
 trechkpt.  The implementation is a degenerate one that simply
 checks privileged state, TM availability and then sets CR[0] to
 0b, just like the unprivileged noops.
 
 And also s-o-b for this :)
 
 Fam
 
 ---
  target-ppc/translate.c |   38 ++
  1 files changed, 38 insertions(+), 0 deletions(-)

 diff --git a/target-ppc/translate.c b/target-ppc/translate.c
 index a3c79a6..b4a4297 100644
 --- a/target-ppc/translate.c
 +++ b/target-ppc/translate.c
 @@ -9691,6 +9691,40 @@ static void gen_tcheck(DisasContext *ctx)
  tcg_gen_movi_i32(cpu_crf[crfD(ctx-opcode)], 0x8);
  }
  
 +#if defined(CONFIG_USER_ONLY)
 +#define GEN_TM_PRIV_NOOP(name) \
 +static inline void gen_##name(DisasContext *ctx)   \
 +{  \
 +gen_inval_exception(ctx, POWERPC_EXCP_PRIV_OPC);   \
 +}
 +
 +#else
 +
 +#define GEN_TM_PRIV_NOOP(name) \
 +static inline void gen_##name(DisasContext *ctx)   \
 +{  \
 +if (unlikely(ctx-pr)) {   \
 +gen_inval_exception(ctx, POWERPC_EXCP_PRIV_OPC);   \
 +return;\
 +}  \
 +if (unlikely(!ctx-tm_enabled)) {  \
 +gen_exception_err(ctx, POWERPC_EXCP_FU, FSCR_IC_TM);   \
 +return;\
 +}  \
 +/* Because tbegin always fails, the implementation is  \
 + * simple: \
 + * \
 + *   CR[0] = 0b0 || MSR[TS] || 0b0 \
 + * = 0b0 || 0b00 | 0b0 \
 + */\
 +tcg_gen_movi_i32(cpu_crf[0], 0);   \
 +}
 +
 +#endif
 +
 +GEN_TM_PRIV_NOOP(treclaim);
 +GEN_TM_PRIV_NOOP(trechkpt);
 +
  static opcode_t opcodes[] = {
  GEN_HANDLER(invalid, 0x00, 0x00, 0x00, 0x, PPC_NONE),
  GEN_HANDLER(cmp, 0x1F, 0x00, 0x00, 0x0040, PPC_INTEGER),
 @@ -11122,6 +11156,10 @@ GEN_HANDLER2_E(tsr, tsr, 0x1F, 0x0E, 0x17, 
 0x03DFF800, \
 PPC_NONE, PPC2_TM),
  GEN_HANDLER2_E(tcheck, tcheck, 0x1F, 0x0E, 0x16, 0x007FF800, \
 PPC_NONE, PPC2_TM),
 +GEN_HANDLER2_E(treclaim, treclaim, 0x1F, 0x0E, 0x1D, 0x03E0F800, \
 +   PPC_NONE, PPC2_TM),
 +GEN_HANDLER2_E(trechkpt, trechkpt, 0x1F, 0x0E, 0x1F, 0x03FFF800, \
 +   PPC_NONE, PPC2_TM),
  };
  
  #include helper_regs.h
 -- 
 1.7.1



Signed-off-by: Tom Musta tommu...@gmail.com

[Qemu-devel] [PATCH 0/9] target-ppc: Rudimentary Support for Transactional Memory

2014-12-18 Thread Tom Musta

This patch series introduces rudimentary support for the Transactional Memory
(TM) feature of Power ISA V2.07.  In a nutshell, software uses the feature by
initiating a transaction via the tbegin instruction.  Hardware then accumulates
storage accesses until the transaction is committed via the tend instruction).  
At this point, either the instruction completes and all storage accesses are 
atomic with respect to other processors; or the transaction fails and processor
state reverts to the point of tbegin.  Transaction success or failure is 
recorded
in CR[0] and the instruction immediately following tbegin is expected to inspect
this field and provide an error path to properly handle failure.

Accurately emulating such a feature in QEMU is quite difficult.  Instead, the
approach taken here simply fails the transaction at the point of tbegin and
thus immediately takes software down the error handlling path.  As such, this 
can
be considered a toleration mode for any software that utilizes the TM feature.
Valgrind has taken a similar approach.  There are no immediate plans to 
implement
a more sophisticated model.

Currently, Power8 is the only Power processor that supports TM.

Tom Musta (9):
  target-ppc: Introduce Instruction Type for Transactional Memory
  target-ppc: Introduce Feature Flag for Transactional Memory
  target-ppc: Introduce tm_enabled Bit to CPU State
  target-ppc: Power8 Supports Transactional Memory
  target-ppc: Introduce TEXASRU Bit Fields
  target-ppc: Introduce tbegin
  target-ppc: Introduce TM Noops
  target-ppc: Introduce tcheck
  target-ppc: Introduce Privileged TM Noops

 target-ppc/cpu.h|   26 ++-
 target-ppc/helper.h |2 +
 target-ppc/mem_helper.c |   22 
 target-ppc/translate.c  |  113 +++
 target-ppc/translate_init.c |5 +-
 5 files changed, 165 insertions(+), 3 deletions(-)

[Qemu-devel] [PATCH 2/9] target-ppc: Introduce Feature Flag for Transactional Memory

2014-12-18 Thread Tom Musta

Add a flag (POWERPC_FLAG_TM) for the Transactional Memory
Facility introduced in Power ISA 2.07.

Signed-off-by: Tom Musta tommu...@gmail.com
---
 target-ppc/cpu.h |2 ++
 1 files changed, 2 insertions(+), 0 deletions(-)

diff --git a/target-ppc/cpu.h b/target-ppc/cpu.h
index 3510083..38176c0 100644
--- a/target-ppc/cpu.h
+++ b/target-ppc/cpu.h
@@ -584,6 +584,8 @@ enum {
 POWERPC_FLAG_CFAR = 0x0004,
 /* Has VSX   */
 POWERPC_FLAG_VSX  = 0x0008,
+/* Has Transaction Memory (ISA 2.07) */
+POWERPC_FLAG_TM   = 0x0010,
 };
 
 /*/
-- 
1.7.1

[Qemu-devel] [PATCH 3/9] target-ppc: Introduce tm_enabled Bit to CPU State

2014-12-18 Thread Tom Musta

Add a bit (tm_enabled) to CPU state that mirrors the MSR[TM] bit.
This is analogous to the other available bits in the MSR (FP,
VSX, etc.).

NOTE: Since MSR[TM] occupies big-endian bit 31, the code is wrapped
with a PPC64 bit check.

Signed-off-by: Tom Musta tommu...@gmail.com
---
 target-ppc/translate.c |8 
 1 files changed, 8 insertions(+), 0 deletions(-)

diff --git a/target-ppc/translate.c b/target-ppc/translate.c
index d381632..7217041 100644
--- a/target-ppc/translate.c
+++ b/target-ppc/translate.c
@@ -203,6 +203,7 @@ typedef struct DisasContext {
 int altivec_enabled;
 int vsx_enabled;
 int spe_enabled;
+int tm_enabled;
 ppc_spr_t *spr_cb; /* Needed to check rights for mfspr/mtspr */
 int singlestep_enabled;
 uint64_t insns_flags;
@@ -11311,6 +11312,13 @@ static inline void 
gen_intermediate_code_internal(PowerPCCPU *cpu,
 } else {
 ctx.vsx_enabled = 0;
 }
+#if defined(TARGET_PPC64)
+if ((env-flags  POWERPC_FLAG_TM)  msr_tm) {
+ctx.tm_enabled = msr_tm;
+} else {
+ctx.tm_enabled = 0;
+}
+#endif
 if ((env-flags  POWERPC_FLAG_SE)  msr_se)
 ctx.singlestep_enabled = CPU_SINGLE_STEP;
 else
-- 
1.7.1

[Qemu-devel] [PATCH 6/9] target-ppc: Introduce tbegin

2014-12-18 Thread Tom Musta

Provide a degenerate implementation of the tbegin instruction.  This
implementation always fails the transaction, recording the failure
per Book II Section 5.3.2 of the Power ISA V2.07.

Signed-off-by: Tom Musta tommu...@gmail.com
---
 target-ppc/helper.h |2 ++
 target-ppc/mem_helper.c |   22 ++
 target-ppc/translate.c  |   12 
 3 files changed, 36 insertions(+), 0 deletions(-)

diff --git a/target-ppc/helper.h b/target-ppc/helper.h
index 210fd97..c2bf6d2 100644
--- a/target-ppc/helper.h
+++ b/target-ppc/helper.h
@@ -665,3 +665,5 @@ DEF_HELPER_4(dscri, void, env, fprp, fprp, i32)
 DEF_HELPER_4(dscriq, void, env, fprp, fprp, i32)
 DEF_HELPER_4(dscli, void, env, fprp, fprp, i32)
 DEF_HELPER_4(dscliq, void, env, fprp, fprp, i32)
+
+DEF_HELPER_1(tbegin, void, env)
diff --git a/target-ppc/mem_helper.c b/target-ppc/mem_helper.c
index 50344b8..6d37dae 100644
--- a/target-ppc/mem_helper.c
+++ b/target-ppc/mem_helper.c
@@ -269,3 +269,25 @@ STVE(stvewx, cpu_stl_data, bswap32, u32)
 
 #undef HI_IDX
 #undef LO_IDX
+
+void helper_tbegin(CPUPPCState *env)
+{
+/* As a degenerate implementation, always fail tbegin.  The reason
+ * given is Nesting overflow.  The persistent bit is set,
+ * providing a hint to the error handler to not retry.  The TFIAR
+ * captures the address of the failure, which is this tbegin
+ * instruction.  Instruction execution will continue with the
+ * next instruction in memory, which is precisely what we want.
+ */
+
+env-spr[SPR_TEXASR] =
+(1ULL  TEXASR_FAILURE_PERSISTENT) |
+(1ULL  TEXASR_NESTING_OVERFLOW) |
+(msr_hv  TEXASR_PRIVILEGE_HV) |
+(msr_pr  TEXASR_PRIVILEGE_PR) |
+(1ULL  TEXASR_FAILURE_SUMMARY) |
+(1ULL  TEXASR_TFIAR_EXACT);
+env-spr[SPR_TFIAR] = env-nip | (msr_hv  1) | msr_pr;
+env-spr[SPR_TFHAR] = env-nip + 4;
+env-crf[0] = 0xB; /* 0b1010 = transaction failure */
+}
diff --git a/target-ppc/translate.c b/target-ppc/translate.c
index 7217041..cddfc36 100644
--- a/target-ppc/translate.c
+++ b/target-ppc/translate.c
@@ -9643,6 +9643,15 @@ GEN_SPE(efdctsiz,  speundef,  0x1D, 0x0B, 0x0018, 
0x, PPC_SPE_DOUBLE
 GEN_SPE(efdtstgt,  efdtstlt,  0x1E, 0x0B, 0x0060, 0x0060, 
PPC_SPE_DOUBLE); //
 GEN_SPE(efdtsteq,  speundef,  0x1F, 0x0B, 0x0060, 0x, 
PPC_SPE_DOUBLE); //
 
+static void gen_tbegin(DisasContext *ctx)
+{
+if (unlikely(!ctx-tm_enabled)) {
+gen_exception_err(ctx, POWERPC_EXCP_FU, FSCR_IC_TM);
+return;
+}
+gen_helper_tbegin(cpu_env);
+}
+
 static opcode_t opcodes[] = {
 GEN_HANDLER(invalid, 0x00, 0x00, 0x00, 0x, PPC_NONE),
 GEN_HANDLER(cmp, 0x1F, 0x00, 0x00, 0x0040, PPC_INTEGER),
@@ -11055,6 +11064,9 @@ GEN_SPEOP_LDST(evstwhe, 0x18, 2),
 GEN_SPEOP_LDST(evstwho, 0x1A, 2),
 GEN_SPEOP_LDST(evstwwe, 0x1C, 2),
 GEN_SPEOP_LDST(evstwwo, 0x1E, 2),
+
+GEN_HANDLER2_E(tbegin, tbegin, 0x1F, 0x0E, 0x14, 0x01DFF800, \
+   PPC_NONE, PPC2_TM),
 };
 
 #include helper_regs.h
-- 
1.7.1

[Qemu-devel] [PATCH 4/9] target-ppc: Power8 Supports Transactional Memory

2014-12-18 Thread Tom Musta

The Power8 processor implements the Transactional Memory Facility
as defined in Power ISA 2.07.  Update the initialization code to
indicate this.

Signed-off-by: Tom Musta tommu...@gmail.com
---
 target-ppc/translate_init.c |5 +++--
 1 files changed, 3 insertions(+), 2 deletions(-)

diff --git a/target-ppc/translate_init.c b/target-ppc/translate_init.c
index 1fece7b..72cc9d0 100644
--- a/target-ppc/translate_init.c
+++ b/target-ppc/translate_init.c
@@ -8219,7 +8219,8 @@ POWERPC_FAMILY(POWER8)(ObjectClass *oc, void *data)
 PPC2_ATOMIC_ISA206 | PPC2_FP_CVT_ISA206 |
 PPC2_FP_TST_ISA206 | PPC2_BCTAR_ISA207 |
 PPC2_LSQ_ISA207 | PPC2_ALTIVEC_207 |
-PPC2_ISA205 | PPC2_ISA207S | PPC2_FP_CVT_S64;
+PPC2_ISA205 | PPC2_ISA207S | PPC2_FP_CVT_S64 |
+PPC2_TM;
 pcc-msr_mask = (1ull  MSR_SF) |
 (1ull  MSR_TM) |
 (1ull  MSR_VR) |
@@ -8247,7 +8248,7 @@ POWERPC_FAMILY(POWER8)(ObjectClass *oc, void *data)
 pcc-flags = POWERPC_FLAG_VRE | POWERPC_FLAG_SE |
  POWERPC_FLAG_BE | POWERPC_FLAG_PMM |
  POWERPC_FLAG_BUS_CLK | POWERPC_FLAG_CFAR |
- POWERPC_FLAG_VSX;
+ POWERPC_FLAG_VSX | POWERPC_FLAG_TM;
 pcc-l1_dcache_size = 0x8000;
 pcc-l1_icache_size = 0x8000;
 pcc-interrupts_big_endian = ppc_cpu_interrupts_big_endian_lpcr;
-- 
1.7.1

[Qemu-devel] [PATCH 8/9] target-ppc: Introduce tcheck

2014-12-18 Thread Tom Musta

Add a degenerate implementation of the Transaction Check (tcheck)
instruction.  Since transaction always immediately fail, this
implementation simply sets CR[BF] to 0b1000, i.e. TDOOMED = 1
and MSR[TS] == 0.

Signed-off-by: Tom Musta tommu...@gmail.com
---
 target-ppc/translate.c |   17 +
 1 files changed, 17 insertions(+), 0 deletions(-)

diff --git a/target-ppc/translate.c b/target-ppc/translate.c
index f468a5d..a3c79a6 100644
--- a/target-ppc/translate.c
+++ b/target-ppc/translate.c
@@ -9676,6 +9676,21 @@ GEN_TM_NOOP(tabortdc);
 GEN_TM_NOOP(tabortdci);
 GEN_TM_NOOP(tsr);
 
+static void gen_tcheck(DisasContext *ctx)
+{
+if (unlikely(!ctx-tm_enabled)) {
+gen_exception_err(ctx, POWERPC_EXCP_FU, FSCR_IC_TM);
+return;
+}
+/* Because tbegin always fails, the tcheck implementation
+ * is simple:
+ *
+ * CR[CRF] = TDOOMED || MSR[TS] || 0b0
+ * = 0b1 || 0b00 || 0b0
+ */
+tcg_gen_movi_i32(cpu_crf[crfD(ctx-opcode)], 0x8);
+}
+
 static opcode_t opcodes[] = {
 GEN_HANDLER(invalid, 0x00, 0x00, 0x00, 0x, PPC_NONE),
 GEN_HANDLER(cmp, 0x1F, 0x00, 0x00, 0x0040, PPC_INTEGER),
@@ -11105,6 +11120,8 @@ GEN_HANDLER2_E(tabortdci, tabortdci, 0x1F, 0x0E, 
0x1B, 0x, \
PPC_NONE, PPC2_TM),
 GEN_HANDLER2_E(tsr, tsr, 0x1F, 0x0E, 0x17, 0x03DFF800, \
PPC_NONE, PPC2_TM),
+GEN_HANDLER2_E(tcheck, tcheck, 0x1F, 0x0E, 0x16, 0x007FF800, \
+   PPC_NONE, PPC2_TM),
 };
 
 #include helper_regs.h
-- 
1.7.1

[Qemu-devel] [PATCH 1/9] target-ppc: Introduce Instruction Type for Transactional Memory

2014-12-18 Thread Tom Musta

Add a category (PPC2_TM) for the Transactional Memory instructions
introduced in Power ISA 2.07.

Signed-off-by: Tom Musta tommu...@gmail.com
---
 target-ppc/cpu.h |4 +++-
 1 files changed, 3 insertions(+), 1 deletions(-)

diff --git a/target-ppc/cpu.h b/target-ppc/cpu.h
index 068fcb2..3510083 100644
--- a/target-ppc/cpu.h
+++ b/target-ppc/cpu.h
@@ -2010,6 +2010,8 @@ enum {
 PPC2_ISA207S   = 0x8000ULL,
 /* Double precision floating point conversion for signed integer 64  */
 PPC2_FP_CVT_S64= 0x0001ULL,
+/* Transactional Memory (ISA 2.07, Book II)  */
+PPC2_TM= 0x0002ULL,
 
 #define PPC_TCG_INSNS2 (PPC2_BOOKE206 | PPC2_VSX | PPC2_PRCNTL | PPC2_DBRX | \
 PPC2_ISA205 | PPC2_VSX207 | PPC2_PERM_ISA206 | \
@@ -2017,7 +2019,7 @@ enum {
 PPC2_FP_CVT_ISA206 | PPC2_FP_TST_ISA206 | \
 PPC2_BCTAR_ISA207 | PPC2_LSQ_ISA207 | \
 PPC2_ALTIVEC_207 | PPC2_ISA207S | PPC2_DFP | \
-PPC2_FP_CVT_S64)
+PPC2_FP_CVT_S64 | PPC2_TM)
 };
 
 /*/
-- 
1.7.1

[Qemu-devel] [PATCH 5/9] target-ppc: Introduce TEXASRU Bit Fields

2014-12-18 Thread Tom Musta

Define mnemonics for the various bit fields in the Transaction
EXception And Summary Register (TEXASR).
---
 target-ppc/cpu.h |   20 
 1 files changed, 20 insertions(+), 0 deletions(-)

diff --git a/target-ppc/cpu.h b/target-ppc/cpu.h
index 38176c0..91a03f6 100644
--- a/target-ppc/cpu.h
+++ b/target-ppc/cpu.h
@@ -558,6 +558,26 @@ struct ppc_slb_t {
 #define ESR_VLEMI (1  (63 - 58)) /* VLE operation  */
 #define ESR_MIF   (1  (63 - 62)) /* Misaligned instruction (VLE)   */
 
+/* Transaction EXception And Summary Register bits   */
+#define TEXASR_FAILURE_PERSISTENT(63 - 7)
+#define TEXASR_DISALLOWED(63 - 8)
+#define TEXASR_NESTING_OVERFLOW  (63 - 9)
+#define TEXASR_FOOTPRINT_OVERFLOW(63 - 10)
+#define TEXASR_SELF_INDUCED_CONFLICT (63 - 11)
+#define TEXASR_NON_TRANSACTIONAL_CONFLICT(63 - 12)
+#define TEXASR_TRANSACTION_CONFLICT  (63 - 13)
+#define TEXASR_TRANSLATION_INVALIDATION_CONFLICT (63 - 14)
+#define TEXASR_IMPLEMENTATION_SPECIFIC   (63 - 15)
+#define TEXASR_INSTRUCTION_FETCH_CONFLICT(63 - 16)
+#define TEXASR_ABORT (63 - 31)
+#define TEXASR_SUSPENDED (63 - 32)
+#define TEXASR_PRIVILEGE_HV  (63 - 34)
+#define TEXASR_PRIVILEGE_PR  (63 - 35)
+#define TEXASR_FAILURE_SUMMARY   (63 - 36)
+#define TEXASR_TFIAR_EXACT   (63 - 37)
+#define TEXASR_ROT   (63 - 38)
+#define TEXASR_TRANSACTION_LEVEL (63 - 52) /* 12 bits */
+
 enum {
 POWERPC_FLAG_NONE = 0x,
 /* Flag for MSR bit 25 signification (VRE/SPE)   */
-- 
1.7.1

[Qemu-devel] [PATCH 9/9] target-ppc: Introduce Privileged TM Noops

2014-12-18 Thread Tom Musta

Add the supervisory Transactional Memory instructions treclaim. and
trechkpt.  The implementation is a degenerate one that simply
checks privileged state, TM availability and then sets CR[0] to
0b, just like the unprivileged noops.
---
 target-ppc/translate.c |   38 ++
 1 files changed, 38 insertions(+), 0 deletions(-)

diff --git a/target-ppc/translate.c b/target-ppc/translate.c
index a3c79a6..b4a4297 100644
--- a/target-ppc/translate.c
+++ b/target-ppc/translate.c
@@ -9691,6 +9691,40 @@ static void gen_tcheck(DisasContext *ctx)
 tcg_gen_movi_i32(cpu_crf[crfD(ctx-opcode)], 0x8);
 }
 
+#if defined(CONFIG_USER_ONLY)
+#define GEN_TM_PRIV_NOOP(name) \
+static inline void gen_##name(DisasContext *ctx)   \
+{  \
+gen_inval_exception(ctx, POWERPC_EXCP_PRIV_OPC);   \
+}
+
+#else
+
+#define GEN_TM_PRIV_NOOP(name) \
+static inline void gen_##name(DisasContext *ctx)   \
+{  \
+if (unlikely(ctx-pr)) {   \
+gen_inval_exception(ctx, POWERPC_EXCP_PRIV_OPC);   \
+return;\
+}  \
+if (unlikely(!ctx-tm_enabled)) {  \
+gen_exception_err(ctx, POWERPC_EXCP_FU, FSCR_IC_TM);   \
+return;\
+}  \
+/* Because tbegin always fails, the implementation is  \
+ * simple: \
+ * \
+ *   CR[0] = 0b0 || MSR[TS] || 0b0 \
+ * = 0b0 || 0b00 | 0b0 \
+ */\
+tcg_gen_movi_i32(cpu_crf[0], 0);   \
+}
+
+#endif
+
+GEN_TM_PRIV_NOOP(treclaim);
+GEN_TM_PRIV_NOOP(trechkpt);
+
 static opcode_t opcodes[] = {
 GEN_HANDLER(invalid, 0x00, 0x00, 0x00, 0x, PPC_NONE),
 GEN_HANDLER(cmp, 0x1F, 0x00, 0x00, 0x0040, PPC_INTEGER),
@@ -11122,6 +11156,10 @@ GEN_HANDLER2_E(tsr, tsr, 0x1F, 0x0E, 0x17, 
0x03DFF800, \
PPC_NONE, PPC2_TM),
 GEN_HANDLER2_E(tcheck, tcheck, 0x1F, 0x0E, 0x16, 0x007FF800, \
PPC_NONE, PPC2_TM),
+GEN_HANDLER2_E(treclaim, treclaim, 0x1F, 0x0E, 0x1D, 0x03E0F800, \
+   PPC_NONE, PPC2_TM),
+GEN_HANDLER2_E(trechkpt, trechkpt, 0x1F, 0x0E, 0x1F, 0x03FFF800, \
+   PPC_NONE, PPC2_TM),
 };
 
 #include helper_regs.h
-- 
1.7.1

[Qemu-devel] [PATCH 7/9] target-ppc: Introduce TM Noops

2014-12-18 Thread Tom Musta

Add degenerate implementations of the non-privileged Transactional
Memory instructions tend., tabort*. and tsr.  This implementation
simply checks the MSR[TM] bit and then sets CR0 to 0b.  This
is a reasonable degenerate implementation since transactions are
never allowed to begin and hence MSR[TS] is always 0b00.

Signed-off-by: Tom Musta tommu...@gmail.com
---
 target-ppc/translate.c |   38 ++
 1 files changed, 38 insertions(+), 0 deletions(-)

diff --git a/target-ppc/translate.c b/target-ppc/translate.c
index cddfc36..f468a5d 100644
--- a/target-ppc/translate.c
+++ b/target-ppc/translate.c
@@ -9652,6 +9652,30 @@ static void gen_tbegin(DisasContext *ctx)
 gen_helper_tbegin(cpu_env);
 }
 
+#define GEN_TM_NOOP(name)  \
+static inline void gen_##name(DisasContext *ctx)   \
+{  \
+if (unlikely(!ctx-tm_enabled)) {  \
+gen_exception_err(ctx, POWERPC_EXCP_FU, FSCR_IC_TM);   \
+return;\
+}  \
+/* Because tbegin always fails in QEMU, these user \
+ * space instructions all have a simple implementation:\
+ * \
+ * CR[0] = 0b0 || MSR[TS] || 0b0   \
+ *   = 0b0 || 0b00|| 0b0   \
+ */\
+tcg_gen_movi_i32(cpu_crf[0], 0);   \
+}
+
+GEN_TM_NOOP(tend);
+GEN_TM_NOOP(tabort);
+GEN_TM_NOOP(tabortwc);
+GEN_TM_NOOP(tabortwci);
+GEN_TM_NOOP(tabortdc);
+GEN_TM_NOOP(tabortdci);
+GEN_TM_NOOP(tsr);
+
 static opcode_t opcodes[] = {
 GEN_HANDLER(invalid, 0x00, 0x00, 0x00, 0x, PPC_NONE),
 GEN_HANDLER(cmp, 0x1F, 0x00, 0x00, 0x0040, PPC_INTEGER),
@@ -11067,6 +11091,20 @@ GEN_SPEOP_LDST(evstwwo, 0x1E, 2),
 
 GEN_HANDLER2_E(tbegin, tbegin, 0x1F, 0x0E, 0x14, 0x01DFF800, \
PPC_NONE, PPC2_TM),
+GEN_HANDLER2_E(tend,   tend,   0x1F, 0x0E, 0x15, 0x01FFF800, \
+   PPC_NONE, PPC2_TM),
+GEN_HANDLER2_E(tabort, tabort, 0x1F, 0x0E, 0x1C, 0x03E0F800, \
+   PPC_NONE, PPC2_TM),
+GEN_HANDLER2_E(tabortwc, tabortwc, 0x1F, 0x0E, 0x18, 0x, \
+   PPC_NONE, PPC2_TM),
+GEN_HANDLER2_E(tabortwci, tabortwci, 0x1F, 0x0E, 0x1A, 0x, \
+   PPC_NONE, PPC2_TM),
+GEN_HANDLER2_E(tabortdc, tabortdc, 0x1F, 0x0E, 0x19, 0x, \
+   PPC_NONE, PPC2_TM),
+GEN_HANDLER2_E(tabortdci, tabortdci, 0x1F, 0x0E, 0x1B, 0x, \
+   PPC_NONE, PPC2_TM),
+GEN_HANDLER2_E(tsr, tsr, 0x1F, 0x0E, 0x17, 0x03DFF800, \
+   PPC_NONE, PPC2_TM),
 };
 
 #include helper_regs.h
-- 
1.7.1

Re: [Qemu-devel] [PATCH 5/9] target-ppc: Introduce TEXASRU Bit Fields

2014-12-18 Thread Tom Musta

On 12/18/2014 11:02 AM, Alexander Graf wrote:
 
 
 On 18.12.14 17:34, Tom Musta wrote:
 Define mnemonics for the various bit fields in the Transaction
 EXception And Summary Register (TEXASR).
 
 This is missing an SoB line.
 
 
 Alex
 

Sorry about that.  I will publish a V2 but may wait a day or so for any other 
other comments.

Re: [Qemu-devel] [PATCH 5/9] target-ppc: Introduce TEXASRU Bit Fields

2014-12-18 Thread Tom Musta

On 12/18/2014 12:29 PM, Alexander Graf wrote:
 
 
 On 18.12.14 19:10, Tom Musta wrote:
 On 12/18/2014 11:02 AM, Alexander Graf wrote:


 On 18.12.14 17:34, Tom Musta wrote:
 Define mnemonics for the various bit fields in the Transaction
 EXception And Summary Register (TEXASR).

 This is missing an SoB line.


 Alex


 Sorry about that.  I will publish a V2 but may wait a day or so for any 
 other other comments.
 
 The patches look good IMHO, just reply with your SoB line and I'll apply
 the rest too :)
 
 
 Alex
 

Signed-off-by: Tom Musta tommu...@gmail.com

Re: [Qemu-devel] [2.3 V2 PATCH 2/6] target-ppc: Fix Floating Point Move Instructions That Set CR1

2014-11-20 Thread Tom Musta

On 11/20/2014 8:14 AM, Alexander Graf wrote:
 
 
 On 12.11.14 22:46, Tom Musta wrote:
 The Floating Point Move instructions (fmr., fabs., fnabs., fneg.,
 and fcpsgn.) incorrectly copy FPSCR[FPCC] instead of [FX,FEX,VX,OX].
 Furthermore, the current code does this via a call to gen_compute_fprf,
 which is awkward since these instructions do not actually set FPRF.

 Change the code to use the gen_set_cr1_from_fpscr utility.

 Signed-off-by: Tom Musta tommu...@gmail.com
 ---
  target-ppc/translate.c |   50 
 ---
  1 files changed, 30 insertions(+), 20 deletions(-)

 diff --git a/target-ppc/translate.c b/target-ppc/translate.c
 index 910ce56..2d79e39 100644
 --- a/target-ppc/translate.c
 +++ b/target-ppc/translate.c
 @@ -2077,6 +2077,21 @@ static void gen_srd(DisasContext *ctx)
  }
  #endif
  
 +#if defined(TARGET_PPC64)
 +static void gen_set_cr1_from_fpscr(DisasContext *ctx)
 +{
 +TCGv_i32 tmp = tcg_temp_new_i32();
 +tcg_gen_trunc_tl_i32(tmp, cpu_fpscr);
 +tcg_gen_shri_i32(cpu_crf[1], tmp, 28);
 +tcg_temp_free_i32(tmp);
 +}
 +#else
 +static void gen_set_cr1_from_fpscr(DisasContext *ctx)
 +{
 +tcg_gen_shri_tl(cpu_crf[1], cpu_fpscr, 28);
 +}
 +#endif
 +
  /***   Floating-Point arithmetic   
 ***/
  #define _GEN_FLOAT_ACB(name, op, op1, op2, isfloat, set_fprf, type) 
   \
  static void gen_f##name(DisasContext *ctx)  
   \
 @@ -2370,7 +2385,9 @@ static void gen_fabs(DisasContext *ctx)
  }
  tcg_gen_andi_i64(cpu_fpr[rD(ctx-opcode)], cpu_fpr[rB(ctx-opcode)],
   ~(1ULL  63));
 -gen_compute_fprf(cpu_fpr[rD(ctx-opcode)], 0, Rc(ctx-opcode) != 0);
 +if (unlikely(Rc(ctx-opcode))) {
 +gen_set_cr1_from_fpscr(ctx);
 +}
 
 I don't quite understand this. We set cr1 based on fpscr, but we don't
 recalculate the respective fpscr bits?
 
 Wouldn't we get outdated comparison data?
 
 
 Alex
 

Nope.

The floating point move instructions don't actually even alter the FPSCR.  From 
the ISA (see the last sentence):

4.6.5 Floating-Point Move Instructions
These instructions copy data from one floating-point
register to another, altering the sign bit (bit 0) as
described below for fneg, fabs, fnabs, and fcpsgn.
These instructions treat NaNs just like any other kind of
value (e.g., the sign bit of a NaN may be altered by
fneg, fabs, fnabs, and fcpsgn). These instructions do
not alter the FPSCR.

[Qemu-devel] [PATCH] target-ppc: Load/Store Vector Element Storage Alignment

2014-11-17 Thread Tom Musta

The Load Vector Element Indexed and Store Vector Element Indexed
instructions compute an effective address in the usual manner.
However, they truncate that address to the natural boundary.
For example, the lvewx instruction will ignore the least significant
two bits of the address and thus load the aligned word of storage.

Fix the generators for these instruction to properly perform this
truncation.

Signed-off-by: Tom Musta tommu...@gmail.com
---
 target-ppc/translate.c |   22 ++
 1 files changed, 14 insertions(+), 8 deletions(-)

diff --git a/target-ppc/translate.c b/target-ppc/translate.c
index 0f8897f..aaba887 100644
--- a/target-ppc/translate.c
+++ b/target-ppc/translate.c
@@ -6783,7 +6783,7 @@ static void gen_st##name(DisasContext *ctx)   
\
 tcg_temp_free(EA);\
 }
 
-#define GEN_VR_LVE(name, opc2, opc3)\
+#define GEN_VR_LVE(name, opc2, opc3, size)  \
 static void gen_lve##name(DisasContext *ctx)\
 {   \
 TCGv EA;\
@@ -6795,13 +6795,16 @@ static void gen_lve##name(DisasContext *ctx)
\
 gen_set_access_type(ctx, ACCESS_INT);   \
 EA = tcg_temp_new();\
 gen_addr_reg_index(ctx, EA);\
+if (size  1) { \
+tcg_gen_andi_tl(EA, EA, ~(size - 1));   \
+}   \
 rs = gen_avr_ptr(rS(ctx-opcode));  \
 gen_helper_lve##name(cpu_env, rs, EA);  \
 tcg_temp_free(EA);  \
 tcg_temp_free_ptr(rs);  \
 }
 
-#define GEN_VR_STVE(name, opc2, opc3)   \
+#define GEN_VR_STVE(name, opc2, opc3, size) \
 static void gen_stve##name(DisasContext *ctx)   \
 {   \
 TCGv EA;\
@@ -6813,6 +6816,9 @@ static void gen_stve##name(DisasContext *ctx) 
  \
 gen_set_access_type(ctx, ACCESS_INT);   \
 EA = tcg_temp_new();\
 gen_addr_reg_index(ctx, EA);\
+if (size  1) { \
+tcg_gen_andi_tl(EA, EA, ~(size - 1));   \
+}   \
 rs = gen_avr_ptr(rS(ctx-opcode));  \
 gen_helper_stve##name(cpu_env, rs, EA); \
 tcg_temp_free(EA);  \
@@ -6823,17 +6829,17 @@ GEN_VR_LDX(lvx, 0x07, 0x03);
 /* As we don't emulate the cache, lvxl is stricly equivalent to lvx */
 GEN_VR_LDX(lvxl, 0x07, 0x0B);
 
-GEN_VR_LVE(bx, 0x07, 0x00);
-GEN_VR_LVE(hx, 0x07, 0x01);
-GEN_VR_LVE(wx, 0x07, 0x02);
+GEN_VR_LVE(bx, 0x07, 0x00, 1);
+GEN_VR_LVE(hx, 0x07, 0x01, 2);
+GEN_VR_LVE(wx, 0x07, 0x02, 4);
 
 GEN_VR_STX(svx, 0x07, 0x07);
 /* As we don't emulate the cache, stvxl is stricly equivalent to stvx */
 GEN_VR_STX(svxl, 0x07, 0x0F);
 
-GEN_VR_STVE(bx, 0x07, 0x04);
-GEN_VR_STVE(hx, 0x07, 0x05);
-GEN_VR_STVE(wx, 0x07, 0x06);
+GEN_VR_STVE(bx, 0x07, 0x04, 1);
+GEN_VR_STVE(hx, 0x07, 0x05, 2);
+GEN_VR_STVE(wx, 0x07, 0x06, 4);
 
 static void gen_lvsl(DisasContext *ctx)
 {
-- 
1.7.1

[Qemu-devel] [PATCH] target-ppc: Altivec's mtvscr Decodes Wrong Register

2014-11-14 Thread Tom Musta

The Move to Vector Status and Control Register (mtvscr) instruction
uses VRB as the source register.  Fix the code generator to correctly
decode the VRB field.  That is, use rB(ctx-opcode) instead of
rD(ctx-opcode).

Signed-off-by: Tom Musta tommu...@gmail.com
---
 target-ppc/translate.c |2 +-
 1 files changed, 1 insertions(+), 1 deletions(-)

diff --git a/target-ppc/translate.c b/target-ppc/translate.c
index 910ce56..d381632 100644
--- a/target-ppc/translate.c
+++ b/target-ppc/translate.c
@@ -6848,7 +6848,7 @@ static void gen_mtvscr(DisasContext *ctx)
 gen_exception(ctx, POWERPC_EXCP_VPU);
 return;
 }
-p = gen_avr_ptr(rD(ctx-opcode));
+p = gen_avr_ptr(rB(ctx-opcode));
 gen_helper_mtvscr(cpu_env, p);
 tcg_temp_free_ptr(p);
 }
-- 
1.7.1

[Qemu-devel] [2.3 V2 PATCH 0/6] target-ppc: Assorted Floating Point Bugs and Cleanup

2014-11-12 Thread Tom Musta

This patch series corrects some issues with floating point emulation
on Power.

Patch 1 corrects a corner case in the square root instructions, which
incorrectly react to NaN whose sign bit is a 1.

Patches 2-6 correct a rather pervasive problem with modeling of the CR[1]
field (i.e. the dot form instructions of the FPU).

The bugs were found by running random test patterns through actual Power
hardware (P7 and P8) and comparing against QEMU.

The patches conflict quite a bit with Paolo's series that splits CR into
32 one bit registers.  Paolo: is V3 of your patch series coming anytime 
soon?

V2 Reworked patches to pick up the gen_set_cr1_from_fpscr() utility that 
was recently added by Paolo Bonzini.

Tom Musta (6):
  target-ppc: VXSQRT Should Not Be Set for NaNs
  target-ppc: Fix Floating Point Move Instructions That Set CR1
  target-ppc: mffs. Should Set CR1 from FPSCR Bits
  target-ppc: Fully Migrate to gen_set_cr1_from_fpscr
  target-ppc: Eliminate set_fprf Argument From gen_compute_fprf
  target-ppc: Eliminate set_fprf Argument From helper_compute_fprf

 target-ppc/fpu_helper.c |   85 +++---
 target-ppc/helper.h |2 +-
 target-ppc/translate.c  |  131 ---
 3 files changed, 122 insertions(+), 96 deletions(-)

[Qemu-devel] [2.3 V2 PATCH 6/6] target-ppc: Eliminate set_fprf Argument From helper_compute_fprf

2014-11-12 Thread Tom Musta

The set_fprf argument to the helper_compute_fprf helper function
is no longer necessary -- the helper is only invoked when FPSCR[FPRF]
is going to be set.

Eliminate the unnecessary argument from the function signature and
its corresponding implementation.  Change the return value of the
helper to void.  Update the name of the local variable ret to
fprf, which now makes more sense.

Signed-off-by: Tom Musta tommu...@gmail.com
---
 target-ppc/fpu_helper.c |   56 +-
 target-ppc/helper.h |2 +-
 target-ppc/translate.c  |8 +--
 3 files changed, 28 insertions(+), 38 deletions(-)

diff --git a/target-ppc/fpu_helper.c b/target-ppc/fpu_helper.c
index 81db60f..6cceffc 100644
--- a/target-ppc/fpu_helper.c
+++ b/target-ppc/fpu_helper.c
@@ -63,59 +63,55 @@ static inline int ppc_float64_get_unbiased_exp(float64 f)
 return ((f  52)  0x7FF) - 1023;
 }
 
-uint32_t helper_compute_fprf(CPUPPCState *env, uint64_t arg, uint32_t set_fprf)
+void helper_compute_fprf(CPUPPCState *env, uint64_t arg)
 {
 CPU_DoubleU farg;
 int isneg;
-int ret;
+int fprf;
 
 farg.ll = arg;
 isneg = float64_is_neg(farg.d);
 if (unlikely(float64_is_any_nan(farg.d))) {
 if (float64_is_signaling_nan(farg.d)) {
 /* Signaling NaN: flags are undefined */
-ret = 0x00;
+fprf = 0x00;
 } else {
 /* Quiet NaN */
-ret = 0x11;
+fprf = 0x11;
 }
 } else if (unlikely(float64_is_infinity(farg.d))) {
 /* +/- infinity */
 if (isneg) {
-ret = 0x09;
+fprf = 0x09;
 } else {
-ret = 0x05;
+fprf = 0x05;
 }
 } else {
 if (float64_is_zero(farg.d)) {
 /* +/- zero */
 if (isneg) {
-ret = 0x12;
+fprf = 0x12;
 } else {
-ret = 0x02;
+fprf = 0x02;
 }
 } else {
 if (isden(farg.d)) {
 /* Denormalized numbers */
-ret = 0x10;
+fprf = 0x10;
 } else {
 /* Normalized numbers */
-ret = 0x00;
+fprf = 0x00;
 }
 if (isneg) {
-ret |= 0x08;
+fprf |= 0x08;
 } else {
-ret |= 0x04;
+fprf |= 0x04;
 }
 }
 }
-if (set_fprf) {
-/* We update FPSCR_FPRF */
-env-fpscr = ~(0x1F  FPSCR_FPRF);
-env-fpscr |= ret  FPSCR_FPRF;
-}
-/* We just need fpcc to update Rc1 */
-return ret  0xF;
+/* We update FPSCR_FPRF */
+env-fpscr = ~(0x1F  FPSCR_FPRF);
+env-fpscr |= fprf  FPSCR_FPRF;
 }
 
 /* Floating-point invalid operations exception */
@@ -1853,7 +1849,7 @@ void helper_##name(CPUPPCState *env, uint32_t opcode) 
   \
 }\
  \
 if (sfprf) { \
-helper_compute_fprf(env, xt.fld, sfprf); \
+helper_compute_fprf(env, xt.fld);\
 }\
 }\
 putVSR(xT(opcode), xt, env);\
@@ -1908,7 +1904,7 @@ void helper_##op(CPUPPCState *env, uint32_t opcode)   
   \
 }\
  \
 if (sfprf) { \
-helper_compute_fprf(env, xt.fld, sfprf); \
+helper_compute_fprf(env, xt.fld);\
 }\
 }\
  \
@@ -1962,7 +1958,7 @@ void helper_##op(CPUPPCState *env, uint32_t opcode)   
\
 } \
   \
 if (sfprf) {  \
-helper_compute_fprf(env, xt.fld, sfprf);  \
+helper_compute_fprf(env, xt.fld

[Qemu-devel] [2.3 V2 PATCH 5/6] target-ppc: Eliminate set_fprf Argument From gen_compute_fprf

2014-11-12 Thread Tom Musta

The set_fprf argument to the gen_compute_fprf() utility is no longer
needed -- gen_compute_fprf() is now called only when FPRF is actually
computed and set.  Eliminate the obsolete argument.

Signed-off-by: Tom Musta tommu...@gmail.com
---
 target-ppc/translate.c |   38 +++---
 1 files changed, 23 insertions(+), 15 deletions(-)

diff --git a/target-ppc/translate.c b/target-ppc/translate.c
index d4faf20..9ac9f43 100644
--- a/target-ppc/translate.c
+++ b/target-ppc/translate.c
@@ -250,16 +250,14 @@ static inline void gen_reset_fpstatus(void)
 gen_helper_reset_fpstatus(cpu_env);
 }
 
-static inline void gen_compute_fprf(TCGv_i64 arg, int set_fprf)
+static inline void gen_compute_fprf(TCGv_i64 arg)
 {
 TCGv_i32 t0 = tcg_temp_new_i32();
 
-if (set_fprf != 0) {
-/* This case might be optimized later */
-tcg_gen_movi_i32(t0, 1);
-gen_helper_compute_fprf(t0, cpu_env, arg, t0);
-gen_helper_float_check_status(cpu_env);
-}
+/* This case might be optimized later */
+tcg_gen_movi_i32(t0, 1);
+gen_helper_compute_fprf(t0, cpu_env, arg, t0);
+gen_helper_float_check_status(cpu_env);
 
 tcg_temp_free_i32(t0);
 }
@@ -2102,7 +2100,9 @@ static void gen_f##name(DisasContext *ctx)
\
 gen_helper_frsp(cpu_fpr[rD(ctx-opcode)], cpu_env,\
 cpu_fpr[rD(ctx-opcode)]);\
 } \
-gen_compute_fprf(cpu_fpr[rD(ctx-opcode)], set_fprf); \
+if (set_fprf) {   \
+gen_compute_fprf(cpu_fpr[rD(ctx-opcode)]);   \
+} \
 if (unlikely(Rc(ctx-opcode) != 0)) { \
 gen_set_cr1_from_fpscr(ctx);  \
 } \
@@ -2129,7 +2129,9 @@ static void gen_f##name(DisasContext *ctx)
\
 gen_helper_frsp(cpu_fpr[rD(ctx-opcode)], cpu_env,\
 cpu_fpr[rD(ctx-opcode)]);\
 } \
-gen_compute_fprf(cpu_fpr[rD(ctx-opcode)], set_fprf); \
+if (set_fprf) {   \
+gen_compute_fprf(cpu_fpr[rD(ctx-opcode)]);   \
+} \
 if (unlikely(Rc(ctx-opcode) != 0)) { \
 gen_set_cr1_from_fpscr(ctx);  \
 } \
@@ -2155,7 +2157,9 @@ static void gen_f##name(DisasContext *ctx)
\
 gen_helper_frsp(cpu_fpr[rD(ctx-opcode)], cpu_env,\
 cpu_fpr[rD(ctx-opcode)]);\
 } \
-gen_compute_fprf(cpu_fpr[rD(ctx-opcode)], set_fprf); \
+if (set_fprf) {   \
+gen_compute_fprf(cpu_fpr[rD(ctx-opcode)]);   \
+} \
 if (unlikely(Rc(ctx-opcode) != 0)) { \
 gen_set_cr1_from_fpscr(ctx);  \
 } \
@@ -2176,7 +2180,9 @@ static void gen_f##name(DisasContext *ctx)
\
 gen_reset_fpstatus(); \
 gen_helper_f##name(cpu_fpr[rD(ctx-opcode)], cpu_env, \
cpu_fpr[rB(ctx-opcode)]); \
-gen_compute_fprf(cpu_fpr[rD(ctx-opcode)], set_fprf); \
+if (set_fprf) {   \
+gen_compute_fprf(cpu_fpr[rD(ctx-opcode)]);   \
+} \
 if (unlikely(Rc(ctx-opcode) != 0)) { \
 gen_set_cr1_from_fpscr(ctx);  \
 } \
@@ -2194,7 +2200,9 @@ static void gen_f##name(DisasContext *ctx)
\
 gen_reset_fpstatus

[Qemu-devel] [2.3 V2 PATCH 1/6] target-ppc: VXSQRT Should Not Be Set for NaNs

2014-11-12 Thread Tom Musta

The Power ISA square root instructions (fsqrt[s], frsqrte[s]) must
set the FPSCR[VXSQRT] flag when operating on a negative value.
However, NaNs have no sign and therefore this flag should not
be set when operating on one.

Change the order of the checks in the helper code.  Move the
SNaN-to-QNaN macro to the top of the file so that it can be
re-used.

Signed-off-by: Tom Musta tommu...@gmail.com
---
 target-ppc/fpu_helper.c |   29 +
 1 files changed, 17 insertions(+), 12 deletions(-)

diff --git a/target-ppc/fpu_helper.c b/target-ppc/fpu_helper.c
index 7f74466..81db60f 100644
--- a/target-ppc/fpu_helper.c
+++ b/target-ppc/fpu_helper.c
@@ -19,6 +19,9 @@
 #include cpu.h
 #include exec/helper-proto.h
 
+#define float64_snan_to_qnan(x) ((x) | 0x0008ULL)
+#define float32_snan_to_qnan(x) ((x) | 0x0040)
+
 /*/
 /* Floating point operations helpers */
 uint64_t helper_float32_to_float64(CPUPPCState *env, uint32_t arg)
@@ -920,14 +923,16 @@ uint64_t helper_fsqrt(CPUPPCState *env, uint64_t arg)
 
 farg.ll = arg;
 
-if (unlikely(float64_is_neg(farg.d)  !float64_is_zero(farg.d))) {
-/* Square root of a negative nonzero number */
-farg.ll = fload_invalid_op_excp(env, POWERPC_EXCP_FP_VXSQRT, 1);
-} else {
+if (unlikely(float64_is_any_nan(farg.d))) {
 if (unlikely(float64_is_signaling_nan(farg.d))) {
-/* sNaN square root */
+/* sNaN reciprocal square root */
 fload_invalid_op_excp(env, POWERPC_EXCP_FP_VXSNAN, 1);
+farg.ll = float64_snan_to_qnan(farg.ll);
 }
+} else if (unlikely(float64_is_neg(farg.d)  !float64_is_zero(farg.d))) {
+/* Square root of a negative nonzero number */
+farg.ll = fload_invalid_op_excp(env, POWERPC_EXCP_FP_VXSQRT, 1);
+} else {
 farg.d = float64_sqrt(farg.d, env-fp_status);
 }
 return farg.ll;
@@ -974,17 +979,20 @@ uint64_t helper_frsqrte(CPUPPCState *env, uint64_t arg)
 
 farg.ll = arg;
 
-if (unlikely(float64_is_neg(farg.d)  !float64_is_zero(farg.d))) {
-/* Reciprocal square root of a negative nonzero number */
-farg.ll = fload_invalid_op_excp(env, POWERPC_EXCP_FP_VXSQRT, 1);
-} else {
+if (unlikely(float64_is_any_nan(farg.d))) {
 if (unlikely(float64_is_signaling_nan(farg.d))) {
 /* sNaN reciprocal square root */
 fload_invalid_op_excp(env, POWERPC_EXCP_FP_VXSNAN, 1);
+farg.ll = float64_snan_to_qnan(farg.ll);
 }
+} else if (unlikely(float64_is_neg(farg.d)  !float64_is_zero(farg.d))) {
+/* Reciprocal square root of a negative nonzero number */
+farg.ll = fload_invalid_op_excp(env, POWERPC_EXCP_FP_VXSQRT, 1);
+} else {
 farg.d = float64_sqrt(farg.d, env-fp_status);
 farg.d = float64_div(float64_one, farg.d, env-fp_status);
 }
+
 return farg.ll;
 }
 
@@ -2382,9 +2390,6 @@ void helper_##op(CPUPPCState *env, uint32_t opcode)   
   \
 VSX_SCALAR_CMP(xscmpodp, 1)
 VSX_SCALAR_CMP(xscmpudp, 0)
 
-#define float64_snan_to_qnan(x) ((x) | 0x0008ULL)
-#define float32_snan_to_qnan(x) ((x) | 0x0040)
-
 /* VSX_MAX_MIN - VSX floating point maximum/minimum
  *   name  - instruction mnemonic
  *   op- operation (max or min)
-- 
1.7.1

[Qemu-devel] [2.3 V2 PATCH 2/6] target-ppc: Fix Floating Point Move Instructions That Set CR1

2014-11-12 Thread Tom Musta

The Floating Point Move instructions (fmr., fabs., fnabs., fneg.,
and fcpsgn.) incorrectly copy FPSCR[FPCC] instead of [FX,FEX,VX,OX].
Furthermore, the current code does this via a call to gen_compute_fprf,
which is awkward since these instructions do not actually set FPRF.

Change the code to use the gen_set_cr1_from_fpscr utility.

Signed-off-by: Tom Musta tommu...@gmail.com
---
 target-ppc/translate.c |   50 ---
 1 files changed, 30 insertions(+), 20 deletions(-)

diff --git a/target-ppc/translate.c b/target-ppc/translate.c
index 910ce56..2d79e39 100644
--- a/target-ppc/translate.c
+++ b/target-ppc/translate.c
@@ -2077,6 +2077,21 @@ static void gen_srd(DisasContext *ctx)
 }
 #endif
 
+#if defined(TARGET_PPC64)
+static void gen_set_cr1_from_fpscr(DisasContext *ctx)
+{
+TCGv_i32 tmp = tcg_temp_new_i32();
+tcg_gen_trunc_tl_i32(tmp, cpu_fpscr);
+tcg_gen_shri_i32(cpu_crf[1], tmp, 28);
+tcg_temp_free_i32(tmp);
+}
+#else
+static void gen_set_cr1_from_fpscr(DisasContext *ctx)
+{
+tcg_gen_shri_tl(cpu_crf[1], cpu_fpscr, 28);
+}
+#endif
+
 /***   Floating-Point arithmetic   ***/
 #define _GEN_FLOAT_ACB(name, op, op1, op2, isfloat, set_fprf, type)   \
 static void gen_f##name(DisasContext *ctx)\
@@ -2370,7 +2385,9 @@ static void gen_fabs(DisasContext *ctx)
 }
 tcg_gen_andi_i64(cpu_fpr[rD(ctx-opcode)], cpu_fpr[rB(ctx-opcode)],
  ~(1ULL  63));
-gen_compute_fprf(cpu_fpr[rD(ctx-opcode)], 0, Rc(ctx-opcode) != 0);
+if (unlikely(Rc(ctx-opcode))) {
+gen_set_cr1_from_fpscr(ctx);
+}
 }
 
 /* fmr  - fmr. */
@@ -2382,7 +2399,9 @@ static void gen_fmr(DisasContext *ctx)
 return;
 }
 tcg_gen_mov_i64(cpu_fpr[rD(ctx-opcode)], cpu_fpr[rB(ctx-opcode)]);
-gen_compute_fprf(cpu_fpr[rD(ctx-opcode)], 0, Rc(ctx-opcode) != 0);
+if (unlikely(Rc(ctx-opcode))) {
+gen_set_cr1_from_fpscr(ctx);
+}
 }
 
 /* fnabs */
@@ -2395,7 +2414,9 @@ static void gen_fnabs(DisasContext *ctx)
 }
 tcg_gen_ori_i64(cpu_fpr[rD(ctx-opcode)], cpu_fpr[rB(ctx-opcode)],
 1ULL  63);
-gen_compute_fprf(cpu_fpr[rD(ctx-opcode)], 0, Rc(ctx-opcode) != 0);
+if (unlikely(Rc(ctx-opcode))) {
+gen_set_cr1_from_fpscr(ctx);
+}
 }
 
 /* fneg */
@@ -2408,7 +2429,9 @@ static void gen_fneg(DisasContext *ctx)
 }
 tcg_gen_xori_i64(cpu_fpr[rD(ctx-opcode)], cpu_fpr[rB(ctx-opcode)],
  1ULL  63);
-gen_compute_fprf(cpu_fpr[rD(ctx-opcode)], 0, Rc(ctx-opcode) != 0);
+if (unlikely(Rc(ctx-opcode))) {
+gen_set_cr1_from_fpscr(ctx);
+}
 }
 
 /* fcpsgn: PowerPC 2.05 specification */
@@ -2421,7 +2444,9 @@ static void gen_fcpsgn(DisasContext *ctx)
 }
 tcg_gen_deposit_i64(cpu_fpr[rD(ctx-opcode)], cpu_fpr[rA(ctx-opcode)],
 cpu_fpr[rB(ctx-opcode)], 0, 63);
-gen_compute_fprf(cpu_fpr[rD(ctx-opcode)], 0, Rc(ctx-opcode) != 0);
+if (unlikely(Rc(ctx-opcode))) {
+gen_set_cr1_from_fpscr(ctx);
+}
 }
 
 static void gen_fmrgew(DisasContext *ctx)
@@ -8205,21 +8230,6 @@ static inline TCGv_ptr gen_fprp_ptr(int reg)
 return r;
 }
 
-#if defined(TARGET_PPC64)
-static void gen_set_cr1_from_fpscr(DisasContext *ctx)
-{
-TCGv_i32 tmp = tcg_temp_new_i32();
-tcg_gen_trunc_tl_i32(tmp, cpu_fpscr);
-tcg_gen_shri_i32(cpu_crf[1], tmp, 28);
-tcg_temp_free_i32(tmp);
-}
-#else
-static void gen_set_cr1_from_fpscr(DisasContext *ctx)
-{
-tcg_gen_shri_tl(cpu_crf[1], cpu_fpscr, 28);
-}
-#endif
-
 #define GEN_DFP_T_A_B_Rc(name)   \
 static void gen_##name(DisasContext *ctx)\
 {\
-- 
1.7.1

[Qemu-devel] [2.3 V2 PATCH 4/6] target-ppc: Fully Migrate to gen_set_cr1_from_fpscr

2014-11-12 Thread Tom Musta

Eliminate the set_rc argument from the gen_compute_fprf utility and
the corresponding (and incorrect) implementation.  Replace it with
calls to the gen_set_cr1_from_fpscr() utility.

Signed-off-by: Tom Musta tommu...@gmail.com
---
 target-ppc/translate.c |   55 ---
 1 files changed, 33 insertions(+), 22 deletions(-)

diff --git a/target-ppc/translate.c b/target-ppc/translate.c
index f3c57b8..d4faf20 100644
--- a/target-ppc/translate.c
+++ b/target-ppc/translate.c
@@ -250,7 +250,7 @@ static inline void gen_reset_fpstatus(void)
 gen_helper_reset_fpstatus(cpu_env);
 }
 
-static inline void gen_compute_fprf(TCGv_i64 arg, int set_fprf, int set_rc)
+static inline void gen_compute_fprf(TCGv_i64 arg, int set_fprf)
 {
 TCGv_i32 t0 = tcg_temp_new_i32();
 
@@ -258,15 +258,7 @@ static inline void gen_compute_fprf(TCGv_i64 arg, int 
set_fprf, int set_rc)
 /* This case might be optimized later */
 tcg_gen_movi_i32(t0, 1);
 gen_helper_compute_fprf(t0, cpu_env, arg, t0);
-if (unlikely(set_rc)) {
-tcg_gen_mov_i32(cpu_crf[1], t0);
-}
 gen_helper_float_check_status(cpu_env);
-} else if (unlikely(set_rc)) {
-/* We always need to compute fpcc */
-tcg_gen_movi_i32(t0, 0);
-gen_helper_compute_fprf(t0, cpu_env, arg, t0);
-tcg_gen_mov_i32(cpu_crf[1], t0);
 }
 
 tcg_temp_free_i32(t0);
@@ -2110,8 +2102,10 @@ static void gen_f##name(DisasContext *ctx)   
 \
 gen_helper_frsp(cpu_fpr[rD(ctx-opcode)], cpu_env,\
 cpu_fpr[rD(ctx-opcode)]);\
 } \
-gen_compute_fprf(cpu_fpr[rD(ctx-opcode)], set_fprf,  \
- Rc(ctx-opcode) != 0);   \
+gen_compute_fprf(cpu_fpr[rD(ctx-opcode)], set_fprf); \
+if (unlikely(Rc(ctx-opcode) != 0)) { \
+gen_set_cr1_from_fpscr(ctx);  \
+} \
 }
 
 #define GEN_FLOAT_ACB(name, op2, set_fprf, type)  \
@@ -2135,8 +2129,10 @@ static void gen_f##name(DisasContext *ctx)   
 \
 gen_helper_frsp(cpu_fpr[rD(ctx-opcode)], cpu_env,\
 cpu_fpr[rD(ctx-opcode)]);\
 } \
-gen_compute_fprf(cpu_fpr[rD(ctx-opcode)],\
- set_fprf, Rc(ctx-opcode) != 0); \
+gen_compute_fprf(cpu_fpr[rD(ctx-opcode)], set_fprf); \
+if (unlikely(Rc(ctx-opcode) != 0)) { \
+gen_set_cr1_from_fpscr(ctx);  \
+} \
 }
 #define GEN_FLOAT_AB(name, op2, inval, set_fprf, type)\
 _GEN_FLOAT_AB(name, name, 0x3F, op2, inval, 0, set_fprf, type);   \
@@ -2159,8 +2155,10 @@ static void gen_f##name(DisasContext *ctx)   
 \
 gen_helper_frsp(cpu_fpr[rD(ctx-opcode)], cpu_env,\
 cpu_fpr[rD(ctx-opcode)]);\
 } \
-gen_compute_fprf(cpu_fpr[rD(ctx-opcode)],\
- set_fprf, Rc(ctx-opcode) != 0); \
+gen_compute_fprf(cpu_fpr[rD(ctx-opcode)], set_fprf); \
+if (unlikely(Rc(ctx-opcode) != 0)) { \
+gen_set_cr1_from_fpscr(ctx);  \
+} \
 }
 #define GEN_FLOAT_AC(name, op2, inval, set_fprf, type)\
 _GEN_FLOAT_AC(name, name, 0x3F, op2, inval, 0, set_fprf, type);   \
@@ -2178,8 +2176,10 @@ static void gen_f##name(DisasContext *ctx)   
 \
 gen_reset_fpstatus(); \
 gen_helper_f##name(cpu_fpr[rD(ctx-opcode)], cpu_env, \
cpu_fpr[rB(ctx-opcode)]); \
-gen_compute_fprf(cpu_fpr[rD(ctx-opcode)],\
- set_fprf, Rc(ctx-opcode) != 0); \
+gen_compute_fprf(cpu_fpr[rD(ctx-opcode)], set_fprf); \
+if (unlikely(Rc(ctx-opcode) != 0

[Qemu-devel] [2.3 V2 PATCH 3/6] target-ppc: mffs. Should Set CR1 from FPSCR Bits

2014-11-12 Thread Tom Musta

Update the Move From FPSCR (mffs.) instruction to correctly
set CR[1] from FPSCR[FX,FEX,VX,OX].

Signed-off-by: Tom Musta tommu...@gmail.com
---
 target-ppc/translate.c |4 +++-
 1 files changed, 3 insertions(+), 1 deletions(-)

diff --git a/target-ppc/translate.c b/target-ppc/translate.c
index 2d79e39..f3c57b8 100644
--- a/target-ppc/translate.c
+++ b/target-ppc/translate.c
@@ -2504,7 +2504,9 @@ static void gen_mffs(DisasContext *ctx)
 }
 gen_reset_fpstatus();
 tcg_gen_extu_tl_i64(cpu_fpr[rD(ctx-opcode)], cpu_fpscr);
-gen_compute_fprf(cpu_fpr[rD(ctx-opcode)], 0, Rc(ctx-opcode) != 0);
+if (unlikely(Rc(ctx-opcode))) {
+gen_set_cr1_from_fpscr(ctx);
+}
 }
 
 /* mtfsb0 */
-- 
1.7.1

Re: [Qemu-devel] [PATCH] linux-user: Do not subtract offset from end address

2014-11-10 Thread Tom Musta

On 11/8/2014 6:22 PM, Andreas Färber wrote:
 Am 06.11.2014 um 20:43 schrieb Tom Musta:
 When computing the upper address of a program segment, do not subtract the
 offset from the virtual address; instead compute the sum of the virtual 
 address
 and the memory size.
 
 Note that this reads a bit weird as both old and new code are adding,
 not subtracting.
 
 Regards,
 Andreas
 

I agree that it is not obvious from the patch, which needed one more line of
context:

abi_ulong a = phdr[i].p_vaddr - phdr[i].p_offset;
if (a  loaddr) {
loaddr = a;
}
a = phdr[i].p_vaddr + phdr[i].p_memsz;
if (a  hiaddr) {
hiaddr = a;
}

I think the description accurately captures what is being changed in the code.
But if you still disagree, I will reword and respin V2.


 Signed-off-by: Tom Musta tommu...@gmail.com
 ---

 Please include this patch in QEMU 2.2.  

 Commit a93934fecd4dffc9d4b452b670c9506be5dea30d injected a regression of 
 Linux
 User Mode that I was able to detect on PowerPC 64 (but not x86).  I suspect 
 that
 large page size on the host has something to do with it.  In any case, that 
 commit
 adjusted the lower address of a program segment by the program header's 
 offset 
 field.  However, it also inadvertantly adjusted the upper address by the 
 offset also.

  linux-user/elfload.c |2 +-
  1 files changed, 1 insertions(+), 1 deletions(-)

 diff --git a/linux-user/elfload.c b/linux-user/elfload.c
 index 84123ba..e2596a4 100644
 --- a/linux-user/elfload.c
 +++ b/linux-user/elfload.c
 @@ -1824,7 +1824,7 @@ static void load_elf_image(const char *image_name, int 
 image_fd,
  if (a  loaddr) {
  loaddr = a;
  }
 -a += phdr[i].p_memsz;
 +a = phdr[i].p_vaddr + phdr[i].p_memsz;
  if (a  hiaddr) {
  hiaddr = a;
  }

Re: [Qemu-devel] [PATCH 2.2 v3] linux-user: Fix up timer id handling

2014-11-10 Thread Tom Musta

On 11/10/2014 12:21 PM, Alexander Graf wrote:
 When creating a timer handle, we give the timer id a special magic offset
 of 0xcafe. However, we never mask that offset out of the timer id before
 we start using it to dereference our timer array. So we always end up aborting
 timer operations because the timer id is out of bounds.
 
 This was not an issue before my patch e52a99f756e (linux-user: Simplify
 timerid checks on g_posix_timers range) because before we would blindly mask
 anything above the first 16 bits.
 
 This patch simplifies the code around timer id creation by introducing a 
 proper
 target_timer_id typedef that is s32, just like Linux has it. It also changes 
 the
 magic offset to a value that makes all timer ids be positive.
 
 Reported-by: Tom Musta tommu...@gmail.com
 Signed-off-by: Alexander Graf ag...@suse.de
 
 ---
 
 v1 - v2:
 
   - Abort when magic is missing
 
 v2 - v3:
 
   - Squash into a single patch
   - Change magic to always have positive IDs
 ---
  linux-user/syscall.c  | 28 ++--
  linux-user/syscall_defs.h |  5 +
  2 files changed, 23 insertions(+), 10 deletions(-)
 
 diff --git a/linux-user/syscall.c b/linux-user/syscall.c
 index a175cc1..c21262f 100644
 --- a/linux-user/syscall.c
 +++ b/linux-user/syscall.c
 @@ -9573,13 +9573,15 @@ abi_long do_syscall(void *cpu_env, int num, abi_long 
 arg1,
  }
  #endif
  
 +#define TIMER_MAGIC 0x0caf
 +#define TIMER_MAGIC_MASK 0x
 +
  #ifdef TARGET_NR_timer_create
  case TARGET_NR_timer_create:
  {
  /* args: clockid_t clockid, struct sigevent *sevp, timer_t *timerid 
 */
  
  struct sigevent host_sevp = { {0}, }, *phost_sevp = NULL;
 -struct target_timer_t *ptarget_timer;
  
  int clkid = arg1;
  int timer_index = next_free_host_timer();
 @@ -9601,11 +9603,9 @@ abi_long do_syscall(void *cpu_env, int num, abi_long 
 arg1,
  if (ret) {
  phtimer = NULL;
  } else {
 -if (!lock_user_struct(VERIFY_WRITE, ptarget_timer, arg3, 1)) 
 {
 +if (put_user(TIMER_MAGIC | timer_index, arg3, 
 target_timer_t)) {
  goto efault;
  }
 -ptarget_timer-ptr = tswap32(0xcafe | timer_index);
 -unlock_user_struct(ptarget_timer, arg3, 1);
  }
  }
  break;
 @@ -9617,7 +9617,15 @@ abi_long do_syscall(void *cpu_env, int num, abi_long 
 arg1,
  {
  /* args: timer_t timerid, int flags, const struct itimerspec 
 *new_value,
   * struct itimerspec * old_value */
 -target_ulong timerid = arg1;
 +target_timer_t timerid = arg1;
 +
 +/* Convert QEMU provided timer ID back to internal 16bit index 
 format */
 +if ((timerid  TIMER_MAGIC_MASK) == TIMER_MAGIC) {
 +timerid = 0x;
 +} else {
 +ret = -TARGET_EINVAL;
 +break;
 +}
  
  if (arg3 == 0 || timerid = ARRAY_SIZE(g_posix_timers)) {
  ret = -TARGET_EINVAL;
 @@ -9638,7 +9646,15 @@ abi_long do_syscall(void *cpu_env, int num, abi_long 
 arg1,
  case TARGET_NR_timer_gettime:
  {
  /* args: timer_t timerid, struct itimerspec *curr_value */
 -target_ulong timerid = arg1;
 +target_timer_t timerid = arg1;
 +
 +/* Convert QEMU provided timer ID back to internal 16bit index 
 format */
 +if ((timerid  TIMER_MAGIC_MASK) == TIMER_MAGIC) {
 +timerid = 0x;
 +} else {
 +ret = -TARGET_EINVAL;
 +break;
 +}
  
  if (!arg2) {
  return -TARGET_EFAULT;
 diff --git a/linux-user/syscall_defs.h b/linux-user/syscall_defs.h
 index c9e6323..ebb3be1 100644
 --- a/linux-user/syscall_defs.h
 +++ b/linux-user/syscall_defs.h
 @@ -2564,10 +2564,7 @@ struct target_ucred {
  
  #endif
  
 -
 -struct target_timer_t {
 -abi_ulong ptr;
 -};
 +typedef int32_t target_timer_t;
  
  #define TARGET_SIGEV_MAX_SIZE 64
  
 

There are two more syscalls that also need this decoding (timer_getoverrun,
timer_delete).  So assuming you add this:

diff --git a/linux-user/syscall.c b/linux-user/syscall.c
index c21262f..076131a 100644
--- a/linux-user/syscall.c
+++ b/linux-user/syscall.c
@@ -9679,6 +9679,14 @@ abi_long do_syscall(void *cpu_env, int num, abi_long 
arg1,
 /* args: timer_t timerid */
 target_ulong timerid = arg1;

+/* Convert QEMU provided timer ID back to internal 16bit index format 
*/
+if ((timerid  TIMER_MAGIC_MASK) == TIMER_MAGIC) {
+timerid = 0x;
+} else {
+ret = -TARGET_EINVAL;
+break;
+}
+
 if (timerid = ARRAY_SIZE(g_posix_timers)) {
 ret = -TARGET_EINVAL;
 } else {
@@ -9695,6 +9703,14 @@ abi_long do_syscall(void *cpu_env, int num, abi_long 
arg1,
 /* args: timer_t timerid */
 target_ulong timerid = arg1

Re: [Qemu-devel] [PATCH] linux-user: Fix up timer id handling

2014-11-10 Thread Tom Musta

On 11/10/2014 2:33 PM, Alexander Graf wrote:
 When creating a timer handle, we give the timer id a special magic offset
 of 0xcafe. However, we never mask that offset out of the timer id before
 we start using it to dereference our timer array. So we always end up aborting
 timer operations because the timer id is out of bounds.
 
 This was not an issue before my patch e52a99f756e (linux-user: Simplify
 timerid checks on g_posix_timers range) because before we would blindly mask
 anything above the first 16 bits.
 
 This patch simplifies the code around timer id creation by introducing a 
 proper
 target_timer_id typedef that is s32, just like Linux has it. It also changes 
 the
 magic offset to a value that makes all timer ids be positive.
 
 Reported-by: Tom Musta tommu...@gmail.com
 Signed-off-by: Alexander Graf ag...@suse.de
 
 ---
 
 v1 - v2:
 
   - Abort when magic is missing
 
 v2 - v3:
 
   - Squash into a single patch
   - Change magic to always have positive IDs
 
 v3 - v4:
 
   - Also handle timer_getoverrun and timer_delete
   - Move timer boundary checks into separate function
 
 v4 - v5:
 
   - Fix stupid thinko that made boundary checks always fail
 ---
  linux-user/syscall.c  | 54 
 ---
  linux-user/syscall_defs.h |  5 +
  2 files changed, 38 insertions(+), 21 deletions(-)
 
 diff --git a/linux-user/syscall.c b/linux-user/syscall.c
 index a175cc1..aaac6a2 100644
 --- a/linux-user/syscall.c
 +++ b/linux-user/syscall.c
 @@ -5473,6 +5473,27 @@ static int do_openat(void *cpu_env, int dirfd, const 
 char *pathname, int flags,
  return get_errno(sys_openat(dirfd, path(pathname), flags, mode));
  }
  
 +#define TIMER_MAGIC 0x0caf
 +#define TIMER_MAGIC_MASK 0x
 +
 +/* Convert QEMU provided timer ID back to internal 16bit index format */
 +static target_timer_t get_timer_id(abi_long arg)
 +{
 +target_timer_t timerid = arg;
 +
 +if ((timerid  TIMER_MAGIC_MASK) != TIMER_MAGIC) {
 +return -TARGET_EINVAL;
 +}
 +
 +timerid = 0x;
 +
 +if (timerid = ARRAY_SIZE(g_posix_timers)) {
 +return -TARGET_EINVAL;
 +}
 +
 +return timerid;
 +}
 +
  /* do_syscall() should always have a single exit point at the end so
 that actions, such as logging of syscall results, can be performed.
 All errnos that do_syscall() returns must be -TARGET_errcode. */
 @@ -9579,7 +9600,6 @@ abi_long do_syscall(void *cpu_env, int num, abi_long 
 arg1,
  /* args: clockid_t clockid, struct sigevent *sevp, timer_t *timerid 
 */
  
  struct sigevent host_sevp = { {0}, }, *phost_sevp = NULL;
 -struct target_timer_t *ptarget_timer;
  
  int clkid = arg1;
  int timer_index = next_free_host_timer();
 @@ -9601,11 +9621,9 @@ abi_long do_syscall(void *cpu_env, int num, abi_long 
 arg1,
  if (ret) {
  phtimer = NULL;
  } else {
 -if (!lock_user_struct(VERIFY_WRITE, ptarget_timer, arg3, 1)) 
 {
 +if (put_user(TIMER_MAGIC | timer_index, arg3, 
 target_timer_t)) {
  goto efault;
  }
 -ptarget_timer-ptr = tswap32(0xcafe | timer_index);
 -unlock_user_struct(ptarget_timer, arg3, 1);
  }
  }
  break;
 @@ -9617,9 +9635,11 @@ abi_long do_syscall(void *cpu_env, int num, abi_long 
 arg1,
  {
  /* args: timer_t timerid, int flags, const struct itimerspec 
 *new_value,
   * struct itimerspec * old_value */
 -target_ulong timerid = arg1;
 +target_timer_t timerid = get_timer_id(arg1);
  
 -if (arg3 == 0 || timerid = ARRAY_SIZE(g_posix_timers)) {
 +if (timerid  0) {
 +ret = timerid;
 +} else if (arg3 == 0) {
  ret = -TARGET_EINVAL;
  } else {
  timer_t htimer = g_posix_timers[timerid];
 @@ -9638,12 +9658,12 @@ abi_long do_syscall(void *cpu_env, int num, abi_long 
 arg1,
  case TARGET_NR_timer_gettime:
  {
  /* args: timer_t timerid, struct itimerspec *curr_value */
 -target_ulong timerid = arg1;
 +target_timer_t timerid = get_timer_id(arg1);
  
 -if (!arg2) {
 -return -TARGET_EFAULT;
 -} else if (timerid = ARRAY_SIZE(g_posix_timers)) {
 -ret = -TARGET_EINVAL;
 +if (timerid  0) {
 +ret = timerid;
 +} else if (!arg2) {
 +ret = -TARGET_EFAULT;
  } else {
  timer_t htimer = g_posix_timers[timerid];
  struct itimerspec hspec;
 @@ -9661,10 +9681,10 @@ abi_long do_syscall(void *cpu_env, int num, abi_long 
 arg1,
  case TARGET_NR_timer_getoverrun:
  {
  /* args: timer_t timerid */
 -target_ulong timerid = arg1;
 +target_timer_t timerid = get_timer_id(arg1);
  
 -if (timerid = ARRAY_SIZE(g_posix_timers)) {
 -ret = -TARGET_EINVAL

Re: [Qemu-devel] [ANNOUNCE] QEMU 2.2.0-rc0 is now available

2014-11-06 Thread Tom Musta

On 11/5/2014 2:04 PM, Michael Roth wrote:
 Hello,
 
 On behalf of the QEMU Team, I'd like to announce the availability of the
 first release candidate for the QEMU 2.2 release.  This release is meant
 for testing purposes and should not be used in a production environment.
 
   http://wiki.qemu.org/download/qemu-2.2.0-rc0.tar.bz2
 
 You can help improve the quality of the QEMU 2.2 release by testing this
 release and reporting bugs on Launchpad:
 
   https://bugs.launchpad.net/qemu/
 
 The release plan for the 2.2 release is available at:
 
   http://wiki.qemu.org/Planning/2.2
 
 Please add entries to the ChangeLog for the 2.2 release below:
 
   http://wiki.qemu.org/ChangeLog/2.2
 
 

I have found a problem with Linux User Mode hosted on Power.  I have been able 
to bisect it to this change:

 git bisect bad
a93934fecd4dffc9d4b452b670c9506be5dea30d is the first bad commit
commit a93934fecd4dffc9d4b452b670c9506be5dea30d
Author: Jonas Maebe jonas.ma...@elis.ugent.be
Date:   Fri Oct 24 16:07:15 2014 +0200

elf: take phdr offset into account when calculating the program load address

The first program header does not necessarily start at offset 0. This change
corresponds to what the Linux kernel does in load_elf_binary().

Signed-off-by: Jonas Maebe jonas.ma...@elis.ugent.be
Signed-off-by: Riku Voipio riku.voi...@linaro.org

Fix is forthcoming.

[Qemu-devel] [PATCH] linux-user: Do not subtract offset from end address

2014-11-06 Thread Tom Musta

When computing the upper address of a program segment, do not subtract the
offset from the virtual address; instead compute the sum of the virtual address
and the memory size.

Signed-off-by: Tom Musta tommu...@gmail.com
---

Please include this patch in QEMU 2.2.  

Commit a93934fecd4dffc9d4b452b670c9506be5dea30d injected a regression of Linux
User Mode that I was able to detect on PowerPC 64 (but not x86).  I suspect that
large page size on the host has something to do with it.  In any case, that 
commit
adjusted the lower address of a program segment by the program header's offset 
field.  However, it also inadvertantly adjusted the upper address by the offset 
also.

 linux-user/elfload.c |2 +-
 1 files changed, 1 insertions(+), 1 deletions(-)

diff --git a/linux-user/elfload.c b/linux-user/elfload.c
index 84123ba..e2596a4 100644
--- a/linux-user/elfload.c
+++ b/linux-user/elfload.c
@@ -1824,7 +1824,7 @@ static void load_elf_image(const char *image_name, int 
image_fd,
 if (a  loaddr) {
 loaddr = a;
 }
-a += phdr[i].p_memsz;
+a = phdr[i].p_vaddr + phdr[i].p_memsz;
 if (a  hiaddr) {
 hiaddr = a;
 }
-- 
1.7.1

Re: [Qemu-devel] [Qemu-ppc] [PATCH v3 4/4] target-ppc: Handle ibm, nmi-register RTAS call

2014-11-05 Thread Tom Musta

On 11/5/2014 2:32 AM, Alexander Graf wrote:
 
 
 On 05.11.14 08:13, Aravinda Prasad wrote:
 This patch adds FWNMI support in qemu for powerKVM
 guests by handling the ibm,nmi-register rtas call.
 Whenever OS issues ibm,nmi-register RTAS call, the
 machine check notification address is saved and the
 machine check interrupt vector 0x200 is patched to
 issue a private hcall.

 This patch also handles the cases when multi-processors
 experience machine check at or about the same time.
 As per PAPR, subsequent processors serialize waiting
 for the first processor to issue the ibm,nmi-interlock call.
 The second processor retries if the first processor which
 received a machine check is still reading the error log
 and is yet to issue ibm,nmi-interlock call.

 Signed-off-by: Aravinda Prasad aravi...@linux.vnet.ibm.com
 ---
  hw/ppc/spapr_hcall.c|   16 +++
  hw/ppc/spapr_rtas.c |   93 
 +++
  include/hw/ppc/spapr.h  |   17 +++
  pc-bios/spapr-rtas/spapr-rtas.S |   38 
  4 files changed, 163 insertions(+), 1 deletion(-)

 diff --git a/hw/ppc/spapr_hcall.c b/hw/ppc/spapr_hcall.c
 index 8f16160..eceb5e5 100644
 --- a/hw/ppc/spapr_hcall.c
 +++ b/hw/ppc/spapr_hcall.c
 @@ -97,6 +97,9 @@ struct rtas_mc_log {
  struct rtas_error_log err_log;
  };
  
 +/* Whether machine check handling is in progress by any CPU */
 +bool mc_in_progress;
 +
  static void do_spr_sync(void *arg)
  {
  struct SPRSyncState *s = arg;
 @@ -678,6 +681,19 @@ static target_ulong h_report_mc_err(PowerPCCPU *cpu, 
 sPAPREnvironment *spapr,
  cpu_synchronize_state(CPU(ppc_env_get_cpu(env)));
  
  /*
 + * Only one VCPU can process machine check NMI at a time. Hence
 + * set the lock mc_in_progress. Once the VCPU finishes processing
 + * NMI, it executes ibm,nmi-interlock and mc_in_progress is unset
 + * in ibm,nmi-interlock handler. Meanwhile if other VCPUs encounter
 + * NMI we return 0 asking the VCPU to retry h_report_mc_err
 + */
 +if (mc_in_progress == 1) {
 
 Please don't depend on bools being numbers. Use true / false. For if()s,
 just don't use == at all - it makes it more readable.
 
 +return 0;
 +}
 +
 +mc_in_progress = 1;
 +
 +/*
   * We save the original r3 register in SPRG2 in 0x200 vector,
   * which is patched during call to ibm.nmi-register. Original
   * r3 is required to be included in error log
 diff --git a/hw/ppc/spapr_rtas.c b/hw/ppc/spapr_rtas.c
 index 2ec2a8e..71c7662 100644
 --- a/hw/ppc/spapr_rtas.c
 +++ b/hw/ppc/spapr_rtas.c
 @@ -36,6 +36,9 @@
  
  #include libfdt.h
  
 +#define BRANCH_INST_MASK  0xFC00
 +extern bool mc_in_progress;
 
 Please put this into the spapr struct.
 
 +
  static void rtas_display_character(PowerPCCPU *cpu, sPAPREnvironment *spapr,
 uint32_t token, uint32_t nargs,
 target_ulong args,
 @@ -290,6 +293,90 @@ static void rtas_ibm_os_term(PowerPCCPU *cpu,
  rtas_st(rets, 0, ret);
  }
  
 +static void rtas_ibm_nmi_register(PowerPCCPU *cpu,
 +  sPAPREnvironment *spapr,
 +  uint32_t token, uint32_t nargs,
 +  target_ulong args,
 +  uint32_t nret, target_ulong rets)
 +{
 +int i;
 +uint32_t ori_inst = 0x6063;
 +uint32_t branch_inst = 0x4802;
 +target_ulong guest_machine_check_addr;
 +uint32_t trampoline[TRAMPOLINE_INSTS];
 +int total_inst = sizeof(trampoline) / sizeof(uint32_t);
 
 ARRAY_SIZE(trampoline), though I don't quite understand why you need a
 variable that contains the same value as a constant (TRAMPOLINE_INSTS).
 
 But since you're moving all of those bits into variable fields on the
 rtas blob itself as we discussed in the last version, I guess this code
 will go away anyways ;).
 
 +PowerPCCPUClass *pcc = POWERPC_CPU_GET_CLASS(cpu);
 +
 +/* Store the system reset and machine check address */
 +guest_machine_check_addr = rtas_ld(args, 1);
 
 Load or Store? I don't find the comment particularly useful either ;).
 
 +
 +/*
 + * Read the trampoline instructions from RTAS Blob and patch
 + * the KVMPPC_H_REPORT_MC_ERR hcall number and the guest
 + * machine check address before copying to 0x200 vector
 + */
 +cpu_physical_memory_read(spapr-rtas_addr + RTAS_TRAMPOLINE_OFFSET,
 + trampoline, sizeof(trampoline));
 +
 +/* Safety Check */
 
 Same for this comment.
 
 +QEMU_BUILD_BUG_ON(sizeof(trampoline)  MC_INTERRUPT_VECTOR_SIZE);
 +
 +/* Update the KVMPPC_H_REPORT_MC_ERR value in trampoline */
 +ori_inst |= KVMPPC_H_REPORT_MC_ERR;
 +memcpy(trampoline[TRAMPOLINE_ORI_INST_INDEX], ori_inst,
 +sizeof(ori_inst));
 
 Why memcpy a u32 into a u32 array?

Additionally, I don't see the need for the ori_inst *variable*

[Qemu-devel] [PATCH 2/7] target-ppc: Introduce gen_set_cr1_from_fpscr

2014-11-03 Thread Tom Musta

The Power ISA supports a mode in many floating point instructions whereby
the Condition Register field 1 (CR[1]) receives a copy of the Floating
Point Status (FPSCR) bits 32:35, also known as FX, FEX VX and OX.

The existing QEMU code is mostly wrong -- CR[1] is set to the Floating
Point Condition Code (FPSCR[FPCC]).  Furthermore, this code is buried
inside the code that generates the FPSCR[FPRF] code, which is awkward.

Introduce a new generator utility that correctly sets CR[1] from the
FPSCR bits.  Subsequent patches will correct various segments of
the defective code and will clean up the gen_compute_fprf()
utility.

Signed-off-by: Tom Musta tommu...@gmail.com
---
 target-ppc/translate.c |8 
 1 files changed, 8 insertions(+), 0 deletions(-)

diff --git a/target-ppc/translate.c b/target-ppc/translate.c
index d03daea..7775bf4 100644
--- a/target-ppc/translate.c
+++ b/target-ppc/translate.c
@@ -249,6 +249,14 @@ static inline void gen_reset_fpstatus(void)
 gen_helper_reset_fpstatus(cpu_env);
 }
 
+static inline void gen_set_cr1_from_fpscr(void)
+{
+TCGv_i32 t0 = tcg_temp_new_i32();
+tcg_gen_trunc_tl_i32(t0, cpu_fpscr);
+tcg_gen_shri_i32(cpu_crf[1], t0, 28);
+tcg_temp_free_i32(t0);
+}
+
 static inline void gen_compute_fprf(TCGv_i64 arg, int set_fprf, int set_rc)
 {
 TCGv_i32 t0 = tcg_temp_new_i32();
-- 
1.7.1

[Qemu-devel] [PATCH 0/7] target-ppc: Assorted Floating Point Bugs and Cleanup

2014-11-03 Thread Tom Musta

This patch series corrects some issues with floating point emulation
on Power.

Patch 1 corrects a corner case in the square root instructions, which
incorrectly react to NaNs whose sign bit is a 1.

Patches 2-6 correct a pervasive problem with modeling of the CR[1]
field (i.e. the so-called dot form instructions of the FPU).

The bugs were found by running random test patterns through actual Power
hardware (P7 and P8) and comparing against QEMU.

The patches conflict quite a bit with Paolo's series that splits CR into
32 one bit registers.  Paolo: is V3 of your patch series coming anytime 
soon?

Tom Musta (7):
  target-ppc: VXSQRT Should Not Be Set for NaNs
  target-ppc: Introduce gen_set_cr1_from_fpscr
  target-ppc: Fix Floating Point Move Instructions That Set CR1
  target-ppc: mffs. Should Set CR1 from FPSCR Bits
  target-ppc: Fully Migrate to gen_set_cr1_from_fpscr
  target-ppc: Eliminate set_fprf Argument From gen_compute_fprf
  target-ppc: Eliminate set_fprf Argument From helper_compute_fprf

 target-ppc/fpu_helper.c |   85 +++---
 target-ppc/helper.h |2 +-
 target-ppc/translate.c  |  105 +++
 3 files changed, 113 insertions(+), 79 deletions(-)

[Qemu-devel] [PATCH 1/7] target-ppc: VXSQRT Should Not Be Set for NaNs

2014-11-03 Thread Tom Musta

The Power ISA square root instructions (fsqrt[s], frsqrte[s]) must
set the FPSCR[VXSQRT] flag when operating on a negative value.
However, NaNs have no sign and therefore this flag should not
be set when operating on one.

Change the order of the checks in the helper code.  Move the
SNaN-to-QNaN macro to the top of the file so that it can be
re-used.

Signed-off-by: Tom Musta tommu...@gmail.com
---
 target-ppc/fpu_helper.c |   29 +
 1 files changed, 17 insertions(+), 12 deletions(-)

diff --git a/target-ppc/fpu_helper.c b/target-ppc/fpu_helper.c
index da93d12..288401d 100644
--- a/target-ppc/fpu_helper.c
+++ b/target-ppc/fpu_helper.c
@@ -19,6 +19,9 @@
 #include cpu.h
 #include exec/helper-proto.h
 
+#define float64_snan_to_qnan(x) ((x) | 0x0008ULL)
+#define float32_snan_to_qnan(x) ((x) | 0x0040)
+
 /*/
 /* Floating point operations helpers */
 uint64_t helper_float32_to_float64(CPUPPCState *env, uint32_t arg)
@@ -926,14 +929,16 @@ uint64_t helper_fsqrt(CPUPPCState *env, uint64_t arg)
 
 farg.ll = arg;
 
-if (unlikely(float64_is_neg(farg.d)  !float64_is_zero(farg.d))) {
-/* Square root of a negative nonzero number */
-farg.ll = fload_invalid_op_excp(env, POWERPC_EXCP_FP_VXSQRT, 1);
-} else {
+if (unlikely(float64_is_any_nan(farg.d))) {
 if (unlikely(float64_is_signaling_nan(farg.d))) {
-/* sNaN square root */
+/* sNaN reciprocal square root */
 fload_invalid_op_excp(env, POWERPC_EXCP_FP_VXSNAN, 1);
+farg.ll = float64_snan_to_qnan(farg.ll);
 }
+} else if (unlikely(float64_is_neg(farg.d)  !float64_is_zero(farg.d))) {
+/* Square root of a negative nonzero number */
+farg.ll = fload_invalid_op_excp(env, POWERPC_EXCP_FP_VXSQRT, 1);
+} else {
 farg.d = float64_sqrt(farg.d, env-fp_status);
 }
 return farg.ll;
@@ -980,17 +985,20 @@ uint64_t helper_frsqrte(CPUPPCState *env, uint64_t arg)
 
 farg.ll = arg;
 
-if (unlikely(float64_is_neg(farg.d)  !float64_is_zero(farg.d))) {
-/* Reciprocal square root of a negative nonzero number */
-farg.ll = fload_invalid_op_excp(env, POWERPC_EXCP_FP_VXSQRT, 1);
-} else {
+if (unlikely(float64_is_any_nan(farg.d))) {
 if (unlikely(float64_is_signaling_nan(farg.d))) {
 /* sNaN reciprocal square root */
 fload_invalid_op_excp(env, POWERPC_EXCP_FP_VXSNAN, 1);
+farg.ll = float64_snan_to_qnan(farg.ll);
 }
+} else if (unlikely(float64_is_neg(farg.d)  !float64_is_zero(farg.d))) {
+/* Reciprocal square root of a negative nonzero number */
+farg.ll = fload_invalid_op_excp(env, POWERPC_EXCP_FP_VXSQRT, 1);
+} else {
 farg.d = float64_sqrt(farg.d, env-fp_status);
 farg.d = float64_div(float64_one, farg.d, env-fp_status);
 }
+
 return farg.ll;
 }
 
@@ -2388,9 +2396,6 @@ void helper_##op(CPUPPCState *env, uint32_t opcode)   
   \
 VSX_SCALAR_CMP(xscmpodp, 1)
 VSX_SCALAR_CMP(xscmpudp, 0)
 
-#define float64_snan_to_qnan(x) ((x) | 0x0008ULL)
-#define float32_snan_to_qnan(x) ((x) | 0x0040)
-
 /* VSX_MAX_MIN - VSX floating point maximum/minimum
  *   name  - instruction mnemonic
  *   op- operation (max or min)
-- 
1.7.1

[Qemu-devel] [PATCH 3/7] target-ppc: Fix Floating Point Move Instructions That Set CR1

2014-11-03 Thread Tom Musta

The Floating Point Move instructions (fmr., fabs., fnabs., fneg.,
and fcpsgn.) incorrectly copy FPSCR[FPCC] instead of [FX,FEX,VX,OX].
Furthermore, the current code does this via a call to gen_compute_fprf,
which is awkward since these instructions do not actually set FPRF.

Change the code to use the newly added gen_set_cr1_from_fpscr
utility.

Signed-off-by: Tom Musta tommu...@gmail.com
---
 target-ppc/translate.c |   20 +++-
 1 files changed, 15 insertions(+), 5 deletions(-)

diff --git a/target-ppc/translate.c b/target-ppc/translate.c
index 7775bf4..9653ba9 100644
--- a/target-ppc/translate.c
+++ b/target-ppc/translate.c
@@ -2393,7 +2393,9 @@ static void gen_fabs(DisasContext *ctx)
 }
 tcg_gen_andi_i64(cpu_fpr[rD(ctx-opcode)], cpu_fpr[rB(ctx-opcode)],
  ~(1ULL  63));
-gen_compute_fprf(cpu_fpr[rD(ctx-opcode)], 0, Rc(ctx-opcode) != 0);
+if (unlikely(Rc(ctx-opcode))) {
+gen_set_cr1_from_fpscr();
+}
 }
 
 /* fmr  - fmr. */
@@ -2405,7 +2407,9 @@ static void gen_fmr(DisasContext *ctx)
 return;
 }
 tcg_gen_mov_i64(cpu_fpr[rD(ctx-opcode)], cpu_fpr[rB(ctx-opcode)]);
-gen_compute_fprf(cpu_fpr[rD(ctx-opcode)], 0, Rc(ctx-opcode) != 0);
+if (unlikely(Rc(ctx-opcode))) {
+gen_set_cr1_from_fpscr();
+}
 }
 
 /* fnabs */
@@ -2418,7 +2422,9 @@ static void gen_fnabs(DisasContext *ctx)
 }
 tcg_gen_ori_i64(cpu_fpr[rD(ctx-opcode)], cpu_fpr[rB(ctx-opcode)],
 1ULL  63);
-gen_compute_fprf(cpu_fpr[rD(ctx-opcode)], 0, Rc(ctx-opcode) != 0);
+if (unlikely(Rc(ctx-opcode))) {
+gen_set_cr1_from_fpscr();
+}
 }
 
 /* fneg */
@@ -2431,7 +2437,9 @@ static void gen_fneg(DisasContext *ctx)
 }
 tcg_gen_xori_i64(cpu_fpr[rD(ctx-opcode)], cpu_fpr[rB(ctx-opcode)],
  1ULL  63);
-gen_compute_fprf(cpu_fpr[rD(ctx-opcode)], 0, Rc(ctx-opcode) != 0);
+if (unlikely(Rc(ctx-opcode))) {
+gen_set_cr1_from_fpscr();
+}
 }
 
 /* fcpsgn: PowerPC 2.05 specification */
@@ -2444,7 +2452,9 @@ static void gen_fcpsgn(DisasContext *ctx)
 }
 tcg_gen_deposit_i64(cpu_fpr[rD(ctx-opcode)], cpu_fpr[rA(ctx-opcode)],
 cpu_fpr[rB(ctx-opcode)], 0, 63);
-gen_compute_fprf(cpu_fpr[rD(ctx-opcode)], 0, Rc(ctx-opcode) != 0);
+if (unlikely(Rc(ctx-opcode))) {
+gen_set_cr1_from_fpscr();
+}
 }
 
 static void gen_fmrgew(DisasContext *ctx)
-- 
1.7.1

[Qemu-devel] [PATCH 6/7] target-ppc: Eliminate set_fprf Argument From gen_compute_fprf

2014-11-03 Thread Tom Musta

The set_fprf argument to the gen_compute_fprf() utility is no longer
needed -- gen_compute_fprf() is now called only when FPRF is actually
computed and set.  Eliminate the obsolete argument.

Signed-off-by: Tom Musta tommu...@gmail.com
---
 target-ppc/translate.c |   38 +++---
 1 files changed, 23 insertions(+), 15 deletions(-)

diff --git a/target-ppc/translate.c b/target-ppc/translate.c
index d719cdf..c039494 100644
--- a/target-ppc/translate.c
+++ b/target-ppc/translate.c
@@ -257,16 +257,14 @@ static inline void gen_set_cr1_from_fpscr(void)
 tcg_temp_free_i32(t0);
 }
 
-static inline void gen_compute_fprf(TCGv_i64 arg, int set_fprf)
+static inline void gen_compute_fprf(TCGv_i64 arg)
 {
 TCGv_i32 t0 = tcg_temp_new_i32();
 
-if (set_fprf != 0) {
-/* This case might be optimized later */
-tcg_gen_movi_i32(t0, 1);
-gen_helper_compute_fprf(t0, cpu_env, arg, t0);
-gen_helper_float_check_status(cpu_env);
-}
+/* This case might be optimized later */
+tcg_gen_movi_i32(t0, 1);
+gen_helper_compute_fprf(t0, cpu_env, arg, t0);
+gen_helper_float_check_status(cpu_env);
 
 tcg_temp_free_i32(t0);
 }
@@ -2108,7 +2106,9 @@ static void gen_f##name(DisasContext *ctx)
\
 gen_helper_frsp(cpu_fpr[rD(ctx-opcode)], cpu_env,\
 cpu_fpr[rD(ctx-opcode)]);\
 } \
-gen_compute_fprf(cpu_fpr[rD(ctx-opcode)], set_fprf); \
+if (set_fprf) {   \
+gen_compute_fprf(cpu_fpr[rD(ctx-opcode)]);   \
+} \
 if (unlikely(Rc(ctx-opcode) != 0)) { \
 gen_set_cr1_from_fpscr(); \
 } \
@@ -2135,7 +2135,9 @@ static void gen_f##name(DisasContext *ctx)
\
 gen_helper_frsp(cpu_fpr[rD(ctx-opcode)], cpu_env,\
 cpu_fpr[rD(ctx-opcode)]);\
 } \
-gen_compute_fprf(cpu_fpr[rD(ctx-opcode)], set_fprf); \
+if (set_fprf) {   \
+gen_compute_fprf(cpu_fpr[rD(ctx-opcode)]);   \
+} \
 if (unlikely(Rc(ctx-opcode) != 0)) { \
 gen_set_cr1_from_fpscr(); \
 } \
@@ -2161,7 +2163,9 @@ static void gen_f##name(DisasContext *ctx)
\
 gen_helper_frsp(cpu_fpr[rD(ctx-opcode)], cpu_env,\
 cpu_fpr[rD(ctx-opcode)]);\
 } \
-gen_compute_fprf(cpu_fpr[rD(ctx-opcode)], set_fprf); \
+if (set_fprf) {   \
+gen_compute_fprf(cpu_fpr[rD(ctx-opcode)]);   \
+} \
 if (unlikely(Rc(ctx-opcode) != 0)) { \
 gen_set_cr1_from_fpscr(); \
 } \
@@ -2182,7 +2186,9 @@ static void gen_f##name(DisasContext *ctx)
\
 gen_reset_fpstatus(); \
 gen_helper_f##name(cpu_fpr[rD(ctx-opcode)], cpu_env, \
cpu_fpr[rB(ctx-opcode)]); \
-gen_compute_fprf(cpu_fpr[rD(ctx-opcode)], set_fprf); \
+if (set_fprf) {   \
+gen_compute_fprf(cpu_fpr[rD(ctx-opcode)]);   \
+} \
 if (unlikely(Rc(ctx-opcode) != 0)) { \
 gen_set_cr1_from_fpscr(); \
 } \
@@ -2200,7 +2206,9 @@ static void gen_f##name(DisasContext *ctx)
\
 gen_reset_fpstatus

[Qemu-devel] [PATCH 5/7] target-ppc: Fully Migrate to gen_set_cr1_from_fpscr

2014-11-03 Thread Tom Musta

Eliminate the set_rc argument from the gen_compute_fprf utility and
the corresponding (and incorrect) implementation.  Replace it with
calls to the gen_set_cr1_from_fpscr() utility.

Signed-off-by: Tom Musta tommu...@gmail.com
---
 target-ppc/translate.c |   55 ---
 1 files changed, 33 insertions(+), 22 deletions(-)

diff --git a/target-ppc/translate.c b/target-ppc/translate.c
index 0247af5..d719cdf 100644
--- a/target-ppc/translate.c
+++ b/target-ppc/translate.c
@@ -257,7 +257,7 @@ static inline void gen_set_cr1_from_fpscr(void)
 tcg_temp_free_i32(t0);
 }
 
-static inline void gen_compute_fprf(TCGv_i64 arg, int set_fprf, int set_rc)
+static inline void gen_compute_fprf(TCGv_i64 arg, int set_fprf)
 {
 TCGv_i32 t0 = tcg_temp_new_i32();
 
@@ -265,15 +265,7 @@ static inline void gen_compute_fprf(TCGv_i64 arg, int 
set_fprf, int set_rc)
 /* This case might be optimized later */
 tcg_gen_movi_i32(t0, 1);
 gen_helper_compute_fprf(t0, cpu_env, arg, t0);
-if (unlikely(set_rc)) {
-tcg_gen_mov_i32(cpu_crf[1], t0);
-}
 gen_helper_float_check_status(cpu_env);
-} else if (unlikely(set_rc)) {
-/* We always need to compute fpcc */
-tcg_gen_movi_i32(t0, 0);
-gen_helper_compute_fprf(t0, cpu_env, arg, t0);
-tcg_gen_mov_i32(cpu_crf[1], t0);
 }
 
 tcg_temp_free_i32(t0);
@@ -2116,8 +2108,10 @@ static void gen_f##name(DisasContext *ctx)   
 \
 gen_helper_frsp(cpu_fpr[rD(ctx-opcode)], cpu_env,\
 cpu_fpr[rD(ctx-opcode)]);\
 } \
-gen_compute_fprf(cpu_fpr[rD(ctx-opcode)], set_fprf,  \
- Rc(ctx-opcode) != 0);   \
+gen_compute_fprf(cpu_fpr[rD(ctx-opcode)], set_fprf); \
+if (unlikely(Rc(ctx-opcode) != 0)) { \
+gen_set_cr1_from_fpscr(); \
+} \
 }
 
 #define GEN_FLOAT_ACB(name, op2, set_fprf, type)  \
@@ -2141,8 +2135,10 @@ static void gen_f##name(DisasContext *ctx)   
 \
 gen_helper_frsp(cpu_fpr[rD(ctx-opcode)], cpu_env,\
 cpu_fpr[rD(ctx-opcode)]);\
 } \
-gen_compute_fprf(cpu_fpr[rD(ctx-opcode)],\
- set_fprf, Rc(ctx-opcode) != 0); \
+gen_compute_fprf(cpu_fpr[rD(ctx-opcode)], set_fprf); \
+if (unlikely(Rc(ctx-opcode) != 0)) { \
+gen_set_cr1_from_fpscr(); \
+} \
 }
 #define GEN_FLOAT_AB(name, op2, inval, set_fprf, type)\
 _GEN_FLOAT_AB(name, name, 0x3F, op2, inval, 0, set_fprf, type);   \
@@ -2165,8 +2161,10 @@ static void gen_f##name(DisasContext *ctx)   
 \
 gen_helper_frsp(cpu_fpr[rD(ctx-opcode)], cpu_env,\
 cpu_fpr[rD(ctx-opcode)]);\
 } \
-gen_compute_fprf(cpu_fpr[rD(ctx-opcode)],\
- set_fprf, Rc(ctx-opcode) != 0); \
+gen_compute_fprf(cpu_fpr[rD(ctx-opcode)], set_fprf); \
+if (unlikely(Rc(ctx-opcode) != 0)) { \
+gen_set_cr1_from_fpscr(); \
+} \
 }
 #define GEN_FLOAT_AC(name, op2, inval, set_fprf, type)\
 _GEN_FLOAT_AC(name, name, 0x3F, op2, inval, 0, set_fprf, type);   \
@@ -2184,8 +2182,10 @@ static void gen_f##name(DisasContext *ctx)   
 \
 gen_reset_fpstatus(); \
 gen_helper_f##name(cpu_fpr[rD(ctx-opcode)], cpu_env, \
cpu_fpr[rB(ctx-opcode)]); \
-gen_compute_fprf(cpu_fpr[rD(ctx-opcode)],\
- set_fprf, Rc(ctx-opcode) != 0); \
+gen_compute_fprf(cpu_fpr[rD(ctx-opcode)], set_fprf); \
+if (unlikely(Rc(ctx-opcode) != 0

[Qemu-devel] [PATCH 7/7] target-ppc: Eliminate set_fprf Argument From helper_compute_fprf

2014-11-03 Thread Tom Musta

The set_fprf argument to the helper_compute_fprf helper function
is no longer necessary -- the helper is only invoked when FPSCR[FPRF]
is going to be set.

Eliminate the unnecessary argument from the function signature and
its corresponding implementation.  Change the return value of the
helper to void.  Update the name of the local variable ret to
fprf, which now makes more sense.

Signed-off-by: Tom Musta tommu...@gmail.com
---
 target-ppc/fpu_helper.c |   56 +-
 target-ppc/helper.h |2 +-
 target-ppc/translate.c  |8 +--
 3 files changed, 28 insertions(+), 38 deletions(-)

diff --git a/target-ppc/fpu_helper.c b/target-ppc/fpu_helper.c
index 288401d..34ddda0 100644
--- a/target-ppc/fpu_helper.c
+++ b/target-ppc/fpu_helper.c
@@ -63,59 +63,55 @@ static inline int ppc_float64_get_unbiased_exp(float64 f)
 return ((f  52)  0x7FF) - 1023;
 }
 
-uint32_t helper_compute_fprf(CPUPPCState *env, uint64_t arg, uint32_t set_fprf)
+void helper_compute_fprf(CPUPPCState *env, uint64_t arg)
 {
 CPU_DoubleU farg;
 int isneg;
-int ret;
+int fprf;
 
 farg.ll = arg;
 isneg = float64_is_neg(farg.d);
 if (unlikely(float64_is_any_nan(farg.d))) {
 if (float64_is_signaling_nan(farg.d)) {
 /* Signaling NaN: flags are undefined */
-ret = 0x00;
+fprf = 0x00;
 } else {
 /* Quiet NaN */
-ret = 0x11;
+fprf = 0x11;
 }
 } else if (unlikely(float64_is_infinity(farg.d))) {
 /* +/- infinity */
 if (isneg) {
-ret = 0x09;
+fprf = 0x09;
 } else {
-ret = 0x05;
+fprf = 0x05;
 }
 } else {
 if (float64_is_zero(farg.d)) {
 /* +/- zero */
 if (isneg) {
-ret = 0x12;
+fprf = 0x12;
 } else {
-ret = 0x02;
+fprf = 0x02;
 }
 } else {
 if (isden(farg.d)) {
 /* Denormalized numbers */
-ret = 0x10;
+fprf = 0x10;
 } else {
 /* Normalized numbers */
-ret = 0x00;
+fprf = 0x00;
 }
 if (isneg) {
-ret |= 0x08;
+fprf |= 0x08;
 } else {
-ret |= 0x04;
+fprf |= 0x04;
 }
 }
 }
-if (set_fprf) {
-/* We update FPSCR_FPRF */
-env-fpscr = ~(0x1F  FPSCR_FPRF);
-env-fpscr |= ret  FPSCR_FPRF;
-}
-/* We just need fpcc to update Rc1 */
-return ret  0xF;
+/* We update FPSCR_FPRF */
+env-fpscr = ~(0x1F  FPSCR_FPRF);
+env-fpscr |= fprf  FPSCR_FPRF;
 }
 
 /* Floating-point invalid operations exception */
@@ -1859,7 +1855,7 @@ void helper_##name(CPUPPCState *env, uint32_t opcode) 
   \
 }\
  \
 if (sfprf) { \
-helper_compute_fprf(env, xt.fld, sfprf); \
+helper_compute_fprf(env, xt.fld);\
 }\
 }\
 putVSR(xT(opcode), xt, env);\
@@ -1914,7 +1910,7 @@ void helper_##op(CPUPPCState *env, uint32_t opcode)   
   \
 }\
  \
 if (sfprf) { \
-helper_compute_fprf(env, xt.fld, sfprf); \
+helper_compute_fprf(env, xt.fld);\
 }\
 }\
  \
@@ -1968,7 +1964,7 @@ void helper_##op(CPUPPCState *env, uint32_t opcode)   
\
 } \
   \
 if (sfprf) {  \
-helper_compute_fprf(env, xt.fld, sfprf);  \
+helper_compute_fprf(env, xt.fld

[Qemu-devel] [PATCH 4/7] target-ppc: mffs. Should Set CR1 from FPSCR Bits

2014-11-03 Thread Tom Musta

Update the Move From FPSCR (mffs.) instruction to correctly
set CR[1] from FPSCR[FX,FEX,VX,OX].

Signed-off-by: Tom Musta tommu...@gmail.com
---
 target-ppc/translate.c |4 +++-
 1 files changed, 3 insertions(+), 1 deletions(-)

diff --git a/target-ppc/translate.c b/target-ppc/translate.c
index 9653ba9..0247af5 100644
--- a/target-ppc/translate.c
+++ b/target-ppc/translate.c
@@ -2512,7 +2512,9 @@ static void gen_mffs(DisasContext *ctx)
 }
 gen_reset_fpstatus();
 tcg_gen_extu_tl_i64(cpu_fpr[rD(ctx-opcode)], cpu_fpscr);
-gen_compute_fprf(cpu_fpr[rD(ctx-opcode)], 0, Rc(ctx-opcode) != 0);
+if (unlikely(Rc(ctx-opcode))) {
+gen_set_cr1_from_fpscr();
+}
 }
 
 /* mtfsb0 */
-- 
1.7.1

[Qemu-devel] [PATCH] target-ppc: Fix vcmpbfp. Unordered Case

2014-10-31 Thread Tom Musta

Fix the implementation of Vector Compare Bounds Single Precision.
Specifically, fix the case where the operands are unordered -- since
the result is non-zero, the CR[6] field should be set to zero.

Signed-off-by: Tom Musta tommu...@gmail.com
---
 target-ppc/int_helper.c |2 +-
 1 files changed, 1 insertions(+), 1 deletions(-)

diff --git a/target-ppc/int_helper.c b/target-ppc/int_helper.c
index eb5c6d2..dae2fea 100644
--- a/target-ppc/int_helper.c
+++ b/target-ppc/int_helper.c
@@ -708,7 +708,7 @@ static inline void vcmpbfp_internal(CPUPPCState *env, 
ppc_avr_t *r,
 int le_rel = float32_compare_quiet(a-f[i], b-f[i], env-vec_status);
 if (le_rel == float_relation_unordered) {
 r-u32[i] = 0xc000;
-/* ALL_IN does not need to be updated here.  */
+all_in = 1;
 } else {
 float32 bneg = float32_chs(b-f[i]);
 int ge_rel = float32_compare_quiet(a-f[i], bneg, 
env-vec_status);
-- 
1.7.1

[Qemu-devel] [PATCH] target-ppc: Fix Altivec Round Opcodes

2014-10-31 Thread Tom Musta

Correct the opcodes for the vrfim, vrfin and vrfiz instructions.

Signed-off-by: Tom Musta tommu...@gmail.com
---
 target-ppc/translate.c |   12 ++--
 1 files changed, 6 insertions(+), 6 deletions(-)

diff --git a/target-ppc/translate.c b/target-ppc/translate.c
index 4a00935..c064cc9 100644
--- a/target-ppc/translate.c
+++ b/target-ppc/translate.c
@@ -7286,10 +7286,10 @@ GEN_VXFORM_NOA_ENV(vrefp, 5, 4);
 GEN_VXFORM_NOA_ENV(vrsqrtefp, 5, 5);
 GEN_VXFORM_NOA_ENV(vexptefp, 5, 6);
 GEN_VXFORM_NOA_ENV(vlogefp, 5, 7);
-GEN_VXFORM_NOA_ENV(vrfim, 5, 8);
-GEN_VXFORM_NOA_ENV(vrfin, 5, 9);
+GEN_VXFORM_NOA_ENV(vrfim, 5, 11);
+GEN_VXFORM_NOA_ENV(vrfin, 5, 8);
 GEN_VXFORM_NOA_ENV(vrfip, 5, 10);
-GEN_VXFORM_NOA_ENV(vrfiz, 5, 11);
+GEN_VXFORM_NOA_ENV(vrfiz, 5, 9);
 
 #define GEN_VXFORM_SIMM(name, opc2, opc3)   \
 static void glue(gen_, name)(DisasContext *ctx)
 \
@@ -10524,10 +10524,10 @@ GEN_VXFORM_NOA(vrefp, 5, 4),
 GEN_VXFORM_NOA(vrsqrtefp, 5, 5),
 GEN_VXFORM_NOA(vexptefp, 5, 6),
 GEN_VXFORM_NOA(vlogefp, 5, 7),
-GEN_VXFORM_NOA(vrfim, 5, 8),
-GEN_VXFORM_NOA(vrfin, 5, 9),
+GEN_VXFORM_NOA(vrfim, 5, 11),
+GEN_VXFORM_NOA(vrfin, 5, 8),
 GEN_VXFORM_NOA(vrfip, 5, 10),
-GEN_VXFORM_NOA(vrfiz, 5, 11),
+GEN_VXFORM_NOA(vrfiz, 5, 9),
 
 #undef GEN_VXFORM_UIMM
 #define GEN_VXFORM_UIMM(name, opc2, opc3)   \
-- 
1.7.1

[Qemu-devel] [PATCH] target-ppc: Fix Altivec Shifts

2014-10-29 Thread Tom Musta

Fix the implementation of the Altivec shift left and shift right
instructions (vsl, vsr) which erroneously inverts shift direction
on big endian hosts.

Signed-off-by: Tom Musta tommu...@gmail.com
---
 target-ppc/int_helper.c |   13 ++---
 1 files changed, 2 insertions(+), 11 deletions(-)

diff --git a/target-ppc/int_helper.c b/target-ppc/int_helper.c
index 713d777..eb5c6d2 100644
--- a/target-ppc/int_helper.c
+++ b/target-ppc/int_helper.c
@@ -1552,13 +1552,6 @@ void helper_vlogefp(CPUPPCState *env, ppc_avr_t *r, 
ppc_avr_t *b)
 }
 }
 
-#if defined(HOST_WORDS_BIGENDIAN)
-#define LEFT 0
-#define RIGHT 1
-#else
-#define LEFT 1
-#define RIGHT 0
-#endif
 /* The specification says that the results are undefined if all of the
  * shift counts are not identical.  We check to make sure that they are
  * to conform to what real hardware appears to do.  */
@@ -1588,11 +1581,9 @@ void helper_vlogefp(CPUPPCState *env, ppc_avr_t *r, 
ppc_avr_t *b)
 }   \
 }   \
 }
-VSHIFT(l, LEFT)
-VSHIFT(r, RIGHT)
+VSHIFT(l, 1)
+VSHIFT(r, 0)
 #undef VSHIFT
-#undef LEFT
-#undef RIGHT
 
 #define VSL(suffix, element, mask)  \
 void helper_vsl##suffix(ppc_avr_t *r, ppc_avr_t *a, ppc_avr_t *b)   \
-- 
1.7.1

Re: [Qemu-devel] [PATCH v2] linux-user: Let user specify random seed

2014-10-09 Thread Tom Musta

On 10/9/2014 3:36 AM, Magnus Reftel wrote:
 This patch introduces the -seed command line option and the
 QEMU_RAND_SEED environment variable for setting the random seed, which
 is used for the AT_RANDOM ELF aux entry.
 
 This is an updated version of the patch, addressing review comments
 from Eric Blake.
 

Magnus:

Possibly a dumb question:  In a regular environment, is there a way for a user 
to control the 16 bytes of random data pointed to by AT_RANDOM?  (I cannot find 
one).

If not, why is this capability needed in Linux user mode?

Re: [Qemu-devel] [PATCH 04/14] ppc: introduce ppc_get_cr and ppc_set_cr

2014-09-18 Thread Tom Musta

 ppc_cpu_gdb_write_register(CPUState *cs, uint8_t 
 *mem_buf, int n)
  ppc_store_msr(env, ldtul_p(mem_buf));
  break;
  case 66:
 -{
 -uint32_t cr = ldl_p(mem_buf);
 -int i;
 -for (i = 0; i  8; i++) {
 -env-crf[i] = (cr  (32 - ((i + 1) * 4)))  0xF;
 -}
 -break;
 -}
 +ppc_set_cr(env, ldl_p(mem_buf));
 +break;
  case 67:
  env-lr = ldtul_p(mem_buf);
  break;
 @@ -293,14 +273,8 @@ int ppc_cpu_gdb_write_register_apple(CPUState *cs, 
 uint8_t *mem_buf, int n)
  ppc_store_msr(env, ldq_p(mem_buf));
  break;
  case 66 + 32:
 -{
 -uint32_t cr = ldl_p(mem_buf);
 -int i;
 -for (i = 0; i  8; i++) {
 -env-crf[i] = (cr  (32 - ((i + 1) * 4)))  0xF;
 -}
 -break;
 -}
 +ppc_set_cr(env, ldl_p(mem_buf));
 +break;
  case 67 + 32:
  env-lr = ldq_p(mem_buf);
  break;
 diff --git a/target-ppc/kvm.c b/target-ppc/kvm.c
 index 9c23c6b..e541b9e 100644
 --- a/target-ppc/kvm.c
 +++ b/target-ppc/kvm.c
 @@ -831,10 +831,7 @@ int kvm_arch_put_registers(CPUState *cs, int level)
  for (i = 0;i  32; i++)
  regs.gpr[i] = env-gpr[i];
  
 -regs.cr = 0;
 -for (i = 0; i  8; i++) {
 -regs.cr |= (env-crf[i]  15)  (4 * (7 - i));
 -}
 +regs.cr = ppc_get_cr(env);
  
  ret = kvm_vcpu_ioctl(cs, KVM_SET_REGS, regs);
  if (ret  0)
 @@ -955,11 +952,7 @@ int kvm_arch_get_registers(CPUState *cs)
  if (ret  0)
  return ret;
  
 -cr = regs.cr;
 -for (i = 7; i = 0; i--) {
 -env-crf[i] = cr  15;
 -cr = 4;
 -}
 +ppc_set_cr(env, regs.cr);
  
  env-ctr = regs.ctr;
  env-lr = regs.lr;
 

One minor issue with this patch:

  CCppc64-softmmu/target-ppc/kvm.o
/bghome/tmusta/powerisa/qemu/qemu/target-ppc/kvm.c: In function 
?kvm_arch_get_registers?:
/bghome/tmusta/powerisa/qemu/qemu/target-ppc/kvm.c:948: warning: unused 
variable ?cr?

which, of course, can be fixed like this:

 git diff
diff --git a/target-ppc/kvm.c b/target-ppc/kvm.c
index e541b9e..74c1324 100644
--- a/target-ppc/kvm.c
+++ b/target-ppc/kvm.c
@@ -945,7 +945,6 @@ int kvm_arch_get_registers(CPUState *cs)
 CPUPPCState *env = cpu-env;
 struct kvm_regs regs;
 struct kvm_sregs sregs;
-uint32_t cr;
 int i, ret;

 ret = kvm_vcpu_ioctl(cs, KVM_GET_REGS, regs);


Otherwise ...
Reviewed-by: Tom Musta tommu...@gmail.com

Re: [Qemu-devel] [PATCH 07/14] ppc: reorganize gen_compute_fprf

2014-09-18 Thread Tom Musta

On 9/15/2014 10:03 AM, Paolo Bonzini wrote:
 Signed-off-by: Paolo Bonzini pbonz...@redhat.com
 ---
   v1-v2: fixed leak of temporaries
 
  target-ppc/translate.c | 25 -
  1 file changed, 12 insertions(+), 13 deletions(-)
 
 diff --git a/target-ppc/translate.c b/target-ppc/translate.c
 index c28bddf..a8b6b7c 100644
 --- a/target-ppc/translate.c
 +++ b/target-ppc/translate.c
 @@ -252,23 +252,22 @@ static inline void gen_reset_fpstatus(void)
  
  static inline void gen_compute_fprf(TCGv_i64 arg, int set_fprf, int set_rc)
  {
 -TCGv_i32 t0 = tcg_temp_new_i32();
 +TCGv_i32 t0;
  
 -if (set_fprf != 0) {
 -/* This case might be optimized later */
 -tcg_gen_movi_i32(t0, 1);
 -gen_helper_compute_fprf(t0, cpu_env, arg, t0);
 -if (unlikely(set_rc)) {
 -tcg_gen_mov_i32(cpu_crf[1], t0);
 -}
 -gen_helper_float_check_status(cpu_env);
 -} else if (unlikely(set_rc)) {
 -/* We always need to compute fpcc */
 -tcg_gen_movi_i32(t0, 0);
 -gen_helper_compute_fprf(t0, cpu_env, arg, t0);
 +if (set_fprf == 0  !set_rc) {
 +return;
 +}
 +
 +t0 = tcg_temp_new_i32();
 +tcg_gen_movi_i32(t0, set_fprf != 0);
 +gen_helper_compute_fprf(t0, cpu_env, arg, t0);
 +if (set_rc) {
  tcg_gen_mov_i32(cpu_crf[1], t0);
  }
  
 +if (set_fprf != 0) {
 +gen_helper_float_check_status(cpu_env);
 +}
  tcg_temp_free_i32(t0);
  }
  
 

Reviewed-by: Tom Musta tommu...@gmail.com

Re: [Qemu-devel] [PATCH 08/14] ppc: introduce gen_op_mfcr/gen_op_mtcr

2014-09-18 Thread Tom Musta

);
 +tcg_temp_free_i32(t0);
  }
  } else {
  gen_helper_mfocrf(cpu_gpr[rD(ctx-opcode)], cpu_env);
 @@ -4262,8 +4288,7 @@ static void gen_mtcrf(DisasContext *ctx)
  TCGv_i32 temp = tcg_temp_new_i32();
  crn = ctz32 (crm);
  tcg_gen_trunc_tl_i32(temp, cpu_gpr[rS(ctx-opcode)]);
 -tcg_gen_shri_i32(temp, temp, crn * 4);
 -tcg_gen_andi_i32(cpu_crf[7 - crn], temp, 0xf);
 +gen_op_mtcr((7 - crn) * 4, temp, crn * 4);
  tcg_temp_free_i32(temp);
  }
  } else {
 @@ -8188,13 +8213,13 @@ static void gen_set_cr1_from_fpscr(DisasContext *ctx)
  {
  TCGv_i32 tmp = tcg_temp_new_i32();
  tcg_gen_trunc_tl_i32(tmp, cpu_fpscr);
 -tcg_gen_shri_i32(cpu_crf[1], tmp, 28);
 +gen_op_mtcr(4, tmp, 28);
  tcg_temp_free_i32(tmp);
  }
  #else
  static void gen_set_cr1_from_fpscr(DisasContext *ctx)
  {
 -tcg_gen_shri_tl(cpu_crf[1], cpu_fpscr, 28);
 +gen_op_mtcr(4, cpu_fpscr, 28);
  }
  #endif
  
 

Reviewed-by: Tom Musta tommu...@gmail.com

Re: [Qemu-devel] [PATCH 09/14] ppc: introduce ppc_get_crf and ppc_set_crf

2014-09-18 Thread Tom Musta

On 9/15/2014 10:03 AM, Paolo Bonzini wrote:
 These two functions will group together four CR bits into a single
 value, once we change the representation of condition registers.
 
 Signed-off-by: Paolo Bonzini pbonz...@redhat.com
 ---
  linux-user/main.c|  2 +-
  target-ppc/cpu.h | 10 ++
  target-ppc/excp_helper.c |  2 +-
  target-ppc/fpu_helper.c  |  6 --
  target-ppc/int_helper.c  | 14 +++---
  target-ppc/translate.c   | 13 +++--
  6 files changed, 30 insertions(+), 17 deletions(-)
 
 diff --git a/linux-user/main.c b/linux-user/main.c
 index 472a16d..152c031 100644
 --- a/linux-user/main.c
 +++ b/linux-user/main.c
 @@ -1550,7 +1550,7 @@ static int do_store_exclusive(CPUPPCState *env)
  }
  }
  }
 -env-crf[0] = (stored  1) | xer_so;
 +ppc_set_crf(env, 0, (stored  1) | xer_so);
  env-reserve_addr = (target_ulong)-1;
  }
  if (!segv) {
 diff --git a/target-ppc/cpu.h b/target-ppc/cpu.h
 index 0c0196d..91eac17 100644
 --- a/target-ppc/cpu.h
 +++ b/target-ppc/cpu.h
 @@ -1217,6 +1217,16 @@ static inline void ppc_set_cr(CPUPPCState *env, 
 uint32_t cr)
  }
  }
  
 +static inline uint32_t ppc_get_crf(const CPUPPCState *env, int i)
 +{
 +return env-crf[i];
 +}
 +
 +static inline void ppc_set_crf(CPUPPCState *env, int i, uint32_t val)
 +{
 +env-crf[i] = val;
 +}
 +
  static inline uint64_t ppc_dump_gpr(CPUPPCState *env, int gprn)
  {
  uint64_t gprv;
 diff --git a/target-ppc/excp_helper.c b/target-ppc/excp_helper.c
 index 96ad9d7..08637c1 100644
 --- a/target-ppc/excp_helper.c
 +++ b/target-ppc/excp_helper.c
 @@ -504,7 +504,7 @@ static inline void powerpc_excp(PowerPCCPU *cpu, int 
 excp_model, int excp)
   env-error_code);
  }
  #endif
 -msr |= env-crf[0]  28;
 +msr |= ppc_get_crf(env, 0)  28;
  msr |= env-error_code; /* key, D/I, S/L bits */
  /* Set way using a LRU mechanism */
  msr |= ((env-last_way + 1)  (env-nb_ways - 1))  17;
 diff --git a/target-ppc/fpu_helper.c b/target-ppc/fpu_helper.c
 index b4e6d72..8cf321b 100644
 --- a/target-ppc/fpu_helper.c
 +++ b/target-ppc/fpu_helper.c
 @@ -1099,7 +1099,8 @@ void helper_fcmpu(CPUPPCState *env, uint64_t arg1, 
 uint64_t arg2,
  
  env-fpscr = ~(0x0F  FPSCR_FPRF);
  env-fpscr |= (0x01  FPSCR_FPRF)  fpcc;
 -env-crf[crfD] = (1  fpcc);
 +ppc_set_crf(env, crfD, 1  fpcc);
 +
  if (unlikely(fpcc == CRF_SO
(float64_is_signaling_nan(farg1.d) ||
   float64_is_signaling_nan(farg2.d {
 @@ -1130,7 +1131,8 @@ void helper_fcmpo(CPUPPCState *env, uint64_t arg1, 
 uint64_t arg2,
  
  env-fpscr = ~(0x0F  FPSCR_FPRF);
  env-fpscr |= (0x01  FPSCR_FPRF)  fpcc;
 -env-crf[crfD] = (1  fpcc);
 +ppc_set_crf(env, crfD, 1  fpcc);
 +
  if (unlikely(fpcc == CRF_SO)) {
  if (float64_is_signaling_nan(farg1.d) ||
  float64_is_signaling_nan(farg2.d)) {
 diff --git a/target-ppc/int_helper.c b/target-ppc/int_helper.c
 index 54e8998..b76a895 100644
 --- a/target-ppc/int_helper.c
 +++ b/target-ppc/int_helper.c
 @@ -294,7 +294,7 @@ void helper_mtocrf(CPUPPCState *env, target_ulong cr, 
 uint32_t mask)
  int i;
  for (i = ARRAY_SIZE(env-crf); --i = 0; ) {
  if (mask  1) {
 -env-crf[i] = cr  0x0F;
 +ppc_set_crf(env, i, cr  0x0F);
  }
  cr = 4;
  mask = 1;
 @@ -657,7 +657,7 @@ VCF(sx, int32_to_float32, s32)
  none |= result; \
  }   \
  if (record) {   \
 -env-crf[6] = ((all != 0)  3) | ((none == 0)  1);   \
 +ppc_set_crf(env, 6, ((all != 0)  3) | ((none == 0)  1)); \
  }   \
  }
  #define VCMP(suffix, compare, element)  \
 @@ -703,7 +703,7 @@ VCMP(gtsd, , s64)
  none |= result; \
  }   \
  if (record) {   \
 -env-crf[6] = ((all != 0)  3) | ((none == 0)  1);   \
 +ppc_set_crf(env, 6, ((all != 0)  3) | ((none == 0)  1)); \
  }   \
  }
  #define VCMPFP(suffix, compare, order)  \
 @@ -737,7 +737,7 @@ static inline void vcmpbfp_internal(CPUPPCState *env, 
 ppc_avr_t *r,
  }
  }
  if (record) {
 -env-crf[6] = (all_in == 0)  1;
 +ppc_set_crf(env, 6, (all_in == 0)  1);
  }
  }
  
 @@ -2558,7 +2558,7 @@ target_ulong helper_dlmzb(CPUPPCState *env, 
 target_ulong high,
  for (mask = 0xFF00; mask != 0; mask = mask  8) {

Re: [Qemu-devel] [PATCH 10/14] ppc: use movcond for isel

2014-09-18 Thread Tom Musta

On 9/15/2014 10:03 AM, Paolo Bonzini wrote:
 Signed-off-by: Paolo Bonzini pbonz...@redhat.com
 ---
   v1-v2: fixed TCG debugging failures
 
  target-ppc/translate.c | 26 +-
  1 file changed, 13 insertions(+), 13 deletions(-)
 
 diff --git a/target-ppc/translate.c b/target-ppc/translate.c
 index 9ff8763..0933c00 100644
 --- a/target-ppc/translate.c
 +++ b/target-ppc/translate.c
 @@ -777,27 +777,27 @@ static void gen_cmpli(DisasContext *ctx)
  /* isel (PowerPC 2.03 specification) */
  static void gen_isel(DisasContext *ctx)
  {
 -int l1, l2;
  uint32_t bi = rC(ctx-opcode);
  uint32_t mask;
  TCGv_i32 t0;
 -
 -l1 = gen_new_label();
 -l2 = gen_new_label();
 +TCGv t1, true_op, zero;
  
  mask = 0x08  (bi  0x03);
  t0 = tcg_temp_new_i32();
  tcg_gen_andi_i32(t0, cpu_crf[bi  2], mask);
 -tcg_gen_brcondi_i32(TCG_COND_EQ, t0, 0, l1);
 -if (rA(ctx-opcode) == 0)
 -tcg_gen_movi_tl(cpu_gpr[rD(ctx-opcode)], 0);
 -else
 -tcg_gen_mov_tl(cpu_gpr[rD(ctx-opcode)], cpu_gpr[rA(ctx-opcode)]);
 -tcg_gen_br(l2);
 -gen_set_label(l1);
 -tcg_gen_mov_tl(cpu_gpr[rD(ctx-opcode)], cpu_gpr[rB(ctx-opcode)]);
 -gen_set_label(l2);
 +t1 = tcg_temp_new();
 +tcg_gen_extu_i32_tl(t1, t0);
 +zero = tcg_const_tl(0);
 +if (rA(ctx-opcode) == 0) {
 +true_op = zero;
 +} else {
 +true_op = cpu_gpr[rA(ctx-opcode)];
 +}
 +tcg_gen_movcond_tl(TCG_COND_NE, cpu_gpr[rD(ctx-opcode)], t1, zero,
 +   true_op, cpu_gpr[rB(ctx-opcode)]);
 +tcg_temp_free(t1);
  tcg_temp_free_i32(t0);
 +tcg_temp_free(zero);
  }
  
  /* cmpb: PowerPC 2.05 specification */
 

Reviewed-by: Tom Musta tommu...@gmail.com
Tested-by: Tom Musta tommu...@gmail.com

Re: [Qemu-devel] [PATCH 11/14] ppc: store CR registers in 32 1-bit registers

2014-09-18 Thread Tom Musta

On 9/15/2014 10:03 AM, Paolo Bonzini wrote:
 This makes comparisons much smaller and faster.  The speedup is
 approximately 10% on user-mode emulation on x86 host, 3-4% on PPC.
 
 Note that CRF_* constants are flipped to match PowerPC's big
 bit-endianness.  Previously, the CR register was effectively stored
 in mixed endianness, so now there is less indirection going on.
 
 Signed-off-by: Paolo Bonzini pbonz...@redhat.com
 ---
   v1-v2: fixed all issues reported by Tom, notably: 1) temporary
   leak in gen_op_mfcr; 2) missing set of cr[so] for gen_op_cmp32;
   3) i32 vs. tl typing issues; 4) creqv/nand/nor/orc extra 1 bits.
 
  linux-user/main.c   |   4 +-
  target-ppc/cpu.h|  41 +++---
  target-ppc/fpu_helper.c |  44 ++-
  target-ppc/helper.h |   6 -
  target-ppc/int_helper.c |   2 +-
  target-ppc/machine.c|   9 ++
  target-ppc/translate.c  | 344 
 ++--
  7 files changed, 236 insertions(+), 214 deletions(-)
 

Run checkpatch.pl  In fairness, you are modifying code that didn't pass before 
... but still 

 diff --git a/linux-user/main.c b/linux-user/main.c
 index 152c031..b403f24 100644
 --- a/linux-user/main.c
 +++ b/linux-user/main.c
 @@ -1929,7 +1929,7 @@ void cpu_loop(CPUPPCState *env)
   * PPC ABI uses overflow flag in cr0 to signal an error
   * in syscalls.
   */
 -env-crf[0] = ~0x1;
 +env-cr[CRF_SO] = 0;
  ret = do_syscall(env, env-gpr[0], env-gpr[3], env-gpr[4],
   env-gpr[5], env-gpr[6], env-gpr[7],
   env-gpr[8], 0, 0);
 @@ -1939,7 +1939,7 @@ void cpu_loop(CPUPPCState *env)
  break;
  }
  if (ret  (target_ulong)(-515)) {
 -env-crf[0] |= 0x1;
 +env-cr[CRF_SO] = 1;
  ret = -ret;
  }
  env-gpr[3] = ret;
 diff --git a/target-ppc/cpu.h b/target-ppc/cpu.h
 index 91eac17..41b8299 100644
 --- a/target-ppc/cpu.h
 +++ b/target-ppc/cpu.h
 @@ -940,7 +940,7 @@ struct CPUPPCState {
  /* CTR */
  target_ulong ctr;
  /* condition register */
 -uint32_t crf[8];
 +uint32_t cr[32];
  #if defined(TARGET_PPC64)
  /* CFAR */
  target_ulong cfar;
 @@ -1059,6 +1059,9 @@ struct CPUPPCState {
  uint64_t dtl_addr, dtl_size;
  #endif /* TARGET_PPC64 */
  
 +/* condition register, for migration compatibility */
 +uint32_t crf[8];
 +
  int error_code;
  uint32_t pending_interrupts;
  #if !defined(CONFIG_USER_ONLY)
 @@ -1202,8 +1205,8 @@ static inline uint32_t ppc_get_cr(const CPUPPCState 
 *env)
  uint32_t cr = 0;
  int i;
  
 -for (i = 0; i  ARRAY_SIZE(env-crf); i++) {
 -cr |= env-crf[i]  (32 - ((i + 1) * 4));
 +for (i = 0; i  ARRAY_SIZE(env-cr); i++) {
 +cr |= env-cr[i]  (31 - i);
  }
  return cr;
  }
 @@ -1212,19 +1215,27 @@ static inline void ppc_set_cr(CPUPPCState *env, 
 uint32_t cr)
  {
  int i;
  
 -for (i = 0; i  ARRAY_SIZE(env-crf); i++) {
 -env-crf[i] = (cr  (32 - ((i + 1) * 4)))  0xF;
 +for (i = 0; i  ARRAY_SIZE(env-cr); i++) {
 +env-cr[i] = (cr  (31 - i))  1;
  }
  }
  
  static inline uint32_t ppc_get_crf(const CPUPPCState *env, int i)
  {
 -return env-crf[i];
 +uint32_t r;
 +r = env-cr[i * 4];
 +r = (r  1) | (env-cr[i * 4 + 1]);
 +r = (r  1) | (env-cr[i * 4 + 2]);
 +r = (r  1) | (env-cr[i * 4 + 3]);
 +return r;
  }
  
  static inline void ppc_set_crf(CPUPPCState *env, int i, uint32_t val)
  {
 -env-crf[i] = val;
 +env-cr[i * 4 + 0] = (val  0x08) != 0;
 +env-cr[i * 4 + 1] = (val  0x04) != 0;
 +env-cr[i * 4 + 2] = (val  0x02) != 0;
 +env-cr[i * 4 + 3] = (val  0x01) != 0;
  }
  
  static inline uint64_t ppc_dump_gpr(CPUPPCState *env, int gprn)
 @@ -1271,14 +1282,14 @@ static inline int cpu_mmu_index (CPUPPCState *env)
  
  
 /*/
  /* CRF definitions */
 -#define CRF_LT3
 -#define CRF_GT2
 -#define CRF_EQ1
 -#define CRF_SO0
 -#define CRF_CH(1  CRF_LT)
 -#define CRF_CL(1  CRF_GT)
 -#define CRF_CH_OR_CL  (1  CRF_EQ)
 -#define CRF_CH_AND_CL (1  CRF_SO)
 +#define CRF_LT0
 +#define CRF_GT1
 +#define CRF_EQ2
 +#define CRF_SO3
 +#define CRF_CHCRF_LT
 +#define CRF_CLCRF_GT
 +#define CRF_CH_OR_CL  CRF_EQ
 +#define CRF_CH_AND_CL CRF_SO

This breaks what you did in patch 5, which used LE bit numbering to perform 
shifts.  And it breaks other code that uses the old LE convention.

Here is what I found:

diff --git a/target-ppc/fpu_helper.c b/target-ppc/fpu_helper.c
index 7894dc5..3f656e5 100644
--- a/target-ppc/fpu_helper.c
+++ b/target-ppc/fpu_helper.c
@@ -1043,7 +1043,7 @@ uint32_t helper_ftdiv(uint64_t fra, uint64_t frb)
 }
 }

-return (1  CRF_LT) |

Re: [Qemu-devel] [PATCH 13/14] ppc: inline ppc_set_crf when clearer

2014-09-18 Thread Tom Musta

On 9/15/2014 10:03 AM, Paolo Bonzini wrote:
 Do not go through the loop when we're setting the four CR fields to
 separate constants or conditions.  This is clearer than putting together
 4-bit value and passing it.

I guess clearer is in the eye of the beholder  :)

In general, replacing a single line of code with four is not a simplification 
(IMO).

That said, I was not able to spot or identify by testing any functional 
problems with this patch.

 
 Signed-off-by: Paolo Bonzini pbonz...@redhat.com
 ---
   v1-v2: due to previous changes, ppc_get_crf never needs this
   treatment, so I adjusted the subject
 
  linux-user/main.c   |  5 -
  target-ppc/fpu_helper.c | 12 ++--
  target-ppc/int_helper.c | 27 +--
  3 files changed, 35 insertions(+), 9 deletions(-)
 
 diff --git a/linux-user/main.c b/linux-user/main.c
 index b403f24..5a0b31f 100644
 --- a/linux-user/main.c
 +++ b/linux-user/main.c
 @@ -1550,7 +1550,10 @@ static int do_store_exclusive(CPUPPCState *env)
  }
  }
  }
 -ppc_set_crf(env, 0, (stored  1) | xer_so);
 +env-cr[CRF_LT] = 0;
 +env-cr[CRF_GT] = 0;
 +env-cr[CRF_EQ] = stored;
 +env-cr[CRF_SO] = xer_so;
  env-reserve_addr = (target_ulong)-1;
  }
  if (!segv) {
 diff --git a/target-ppc/fpu_helper.c b/target-ppc/fpu_helper.c
 index 7894dc5..c86320f 100644
 --- a/target-ppc/fpu_helper.c
 +++ b/target-ppc/fpu_helper.c
 @@ -1099,7 +1099,11 @@ void helper_fcmpu(CPUPPCState *env, uint64_t arg1, 
 uint64_t arg2,
  
  env-fpscr = ~(0x0F  FPSCR_FPRF);
  env-fpscr |= (0x08  FPSCR_FPRF)  fpcc;
 -ppc_set_crf(env, crfD, 0x08  fpcc);
 +
 +env-cr[crfD * 4 + CRF_LT] = (fpcc == CRF_LT);
 +env-cr[crfD * 4 + CRF_GT] = (fpcc == CRF_GT);
 +env-cr[crfD * 4 + CRF_EQ] = (fpcc == CRF_EQ);
 +env-cr[crfD * 4 + CRF_SO] = (fpcc == CRF_SO);
  
  if (unlikely(fpcc == CRF_SO
(float64_is_signaling_nan(farg1.d) ||
 @@ -1131,7 +1135,11 @@ void helper_fcmpo(CPUPPCState *env, uint64_t arg1, 
 uint64_t arg2,
  
  env-fpscr = ~(0x0F  FPSCR_FPRF);
  env-fpscr |= (0x08  FPSCR_FPRF)  fpcc;
 -ppc_set_crf(env, crfD, 0x08  fpcc);
 +
 +env-cr[crfD * 4 + CRF_LT] = (fpcc == CRF_LT);
 +env-cr[crfD * 4 + CRF_GT] = (fpcc == CRF_GT);
 +env-cr[crfD * 4 + CRF_EQ] = (fpcc == CRF_EQ);
 +env-cr[crfD * 4 + CRF_SO] = (fpcc == CRF_SO);
  
  if (unlikely(fpcc == CRF_SO)) {
  if (float64_is_signaling_nan(farg1.d) ||
 diff --git a/target-ppc/int_helper.c b/target-ppc/int_helper.c
 index 96f2e7d..be52437 100644
 --- a/target-ppc/int_helper.c
 +++ b/target-ppc/int_helper.c
 @@ -657,7 +657,10 @@ VCF(sx, int32_to_float32, s32)
  none |= result; \
  }   \
  if (record) {   \
 -ppc_set_crf(env, 6, ((all != 0)  3) | ((none == 0)  1)); \
 +env-cr[24 + CRF_LT] = (all != 0);  \
 +env-cr[24 + CRF_GT] = 0;   \
 +env-cr[24 + CRF_EQ] = (none == 0); \
 +env-cr[24 + CRF_SO] = 0;   \
  }   \
  }
  #define VCMP(suffix, compare, element)  \
 @@ -703,7 +706,10 @@ VCMP(gtsd, , s64)
  none |= result; \
  }   \
  if (record) {   \
 -ppc_set_crf(env, 6, ((all != 0)  3) | ((none == 0)  1)); \
 +env-cr[24 + CRF_LT] = (all != 0);  \
 +env-cr[24 + CRF_GT] = 0;   \
 +env-cr[24 + CRF_EQ] = (none == 0); \
 +env-cr[24 + CRF_SO] = 0;   \
  }   \
  }
  #define VCMPFP(suffix, compare, order)  \
 @@ -737,7 +743,10 @@ static inline void vcmpbfp_internal(CPUPPCState *env, 
 ppc_avr_t *r,
  }
  }
  if (record) {
 -ppc_set_crf(env, 6, (all_in == 0)  1);
 +env-cr[24 + CRF_LT] = 0;
 +env-cr[24 + CRF_GT] = 0;
 +env-cr[24 + CRF_EQ] = (all_in == 0);
 +env-cr[24 + CRF_SO] = 0;
  }
  }
  
 @@ -2558,7 +2567,9 @@ target_ulong helper_dlmzb(CPUPPCState *env, 
 target_ulong high,
  for (mask = 0xFF00; mask != 0; mask = mask  8) {
  if ((high  mask) == 0) {
  if (update_Rc) {
 -ppc_set_crf(env, 0, 0x4);
 +env-cr[CRF_LT] = 0;
 +env-cr[CRF_GT] = 1;
 +env-cr[CRF_EQ] = 0;

Re: [Qemu-devel] [PATCH v2 00/14] TCG ppc speedups

2014-09-18 Thread Tom Musta

On 9/15/2014 10:03 AM, Paolo Bonzini wrote:
 Patches 1-3 speed up softmmu emulation by avoiding TLB flushes on changes
 to IR/DR.
 
 Patches 4-14 speed up emulation in general by rewriting the handling of
 condition registers.
 
 Paolo Bonzini (14):
   ppc: do not look at the MMU index to detect PR/HV mode
   softmmu: support up to 12 MMU modes
   target-ppc: use separate indices for various translation modes
   ppc: introduce ppc_get_cr and ppc_set_cr
   ppc: use CRF_* in fpu_helper.c
   ppc: introduce helpers for mfocrf/mtocrf
   ppc: reorganize gen_compute_fprf
   ppc: introduce gen_op_mfcr/gen_op_mtcr
   ppc: introduce ppc_get_crf and ppc_set_crf
   ppc: use movcond for isel
   ppc: store CR registers in 32 1-bit registers
   ppc: use movcond to implement evsel
   ppc: inline ppc_get_crf/ppc_set_crf when clearer
   ppc: dump all 32 CR bits
 
  include/exec/cpu_ldst.h  | 120 -
  linux-user/elfload.c |   4 +-
  linux-user/main.c|   9 +-
  linux-user/signal.c  |   8 +-
  monitor.c|   9 +-
  target-ppc/cpu.h |  66 -
  target-ppc/excp_helper.c |   5 +-
  target-ppc/fpu_helper.c  |  82 +++---
  target-ppc/gdbstub.c |  42 +--
  target-ppc/helper.h  |   9 +-
  target-ppc/helper_regs.h |  11 +-
  target-ppc/int_helper.c  |  46 +++-
  target-ppc/kvm.c |  11 +-
  target-ppc/machine.c |   9 +
  target-ppc/translate.c   | 686 
 ---
  15 files changed, 631 insertions(+), 486 deletions(-)
 


Paolo:  I spent some time reviewing and testing patches 4-14.  See my specific 
comments.

I also attempted to identify the speedup of just these patches.  My test was 
booting an Ubunutu 14.04 (PPC64LE) image to the login prompt, checking some of 
the timestamps along the way.  I was able to observe a speedup on a modest 
sized laptop (x86) host
-- about 2%.  I did not see any difference on a Power7 host.

Re: [Qemu-devel] [PATCH 03/14] target-ppc: use separate indices for various translation modes

2014-09-16 Thread Tom Musta

On 9/15/2014 10:03 AM, Paolo Bonzini wrote:
 PowerPC TCG flushes the TLB on every IR/DR change, which basically
 means on every user-kernel context switch.  Encode IR/DR in the
 MMU index.
 
 This brings the number of TLB flushes down from ~90 to ~5
 for starting up the Debian installer, which is in line with x86
 and gives a ~10% performance improvement.
 
 Signed-off-by: Paolo Bonzini pbonz...@redhat.com
 ---
  target-ppc/cpu.h |  7 ++-
  target-ppc/excp_helper.c |  3 ---
  target-ppc/helper_regs.h | 11 ++-
  3 files changed, 8 insertions(+), 13 deletions(-)
 
 diff --git a/target-ppc/cpu.h b/target-ppc/cpu.h
 index b64c652..c29ce3b 100644
 --- a/target-ppc/cpu.h
 +++ b/target-ppc/cpu.h
 @@ -922,7 +922,8 @@ struct ppc_segment_page_sizes {
  
  
 /*/
  /* The whole PowerPC CPU context */
 -#define NB_MMU_MODES 3
 +#define NB_MMU_MODES 12
 +#define MMU_USER_IDX 3  /* PR=IR=DR=1 */

This doesn't build for me:

  CCppc64-softmmu/tcg/tcg.o
In file included from /bghome/tmusta/powerisa/qemu/qemu/tcg/tcg.c:264:
/bghome/tmusta/powerisa/qemu/qemu/tcg/ppc/tcg-target.c: In function 
?tcg_out_tlb_read?:
/bghome/tmusta/powerisa/qemu/qemu/tcg/ppc/tcg-target.c:1394: error: size of 
array ?qemu_build_bug_on__1396? is negative
make[1]: *** [tcg/tcg.o] Error 1
make: *** [subdir-ppc64-softmmu] Error 2

which correlates with this:

  1389  /* Compensate for very large offsets.  */
  1390  if (add_off = 0x8000) {
  1391  /* Most target env are smaller than 32k; none are larger than 
64k.
  1392 Simplify the logic here merely to offset by 0x7ff0, giving 
us a
  1393 range just shy of 64k.  Check this assumption.  */
  1394  QEMU_BUILD_BUG_ON(offsetof(CPUArchState,
  1395 tlb_table[NB_MMU_MODES - 1][1])
  1396 0x7ff0 + 0x7fff);
  1397  tcg_out32(s, ADDI | TAI(TCG_REG_TMP1, base, 0x7ff0));
  1398  base = TCG_REG_TMP1;
  1399  cmp_off -= 0x7ff0;
  1400  add_off -= 0x7ff0;
  1401  }




  
  #define PPC_CPU_OPCODES_LEN 0x40
  
 @@ -1231,10 +1232,6 @@ static inline CPUPPCState *cpu_init(const char 
 *cpu_model)
  #define cpu_list ppc_cpu_list
  
  /* MMU modes definitions */
 -#define MMU_MODE0_SUFFIX _user
 -#define MMU_MODE1_SUFFIX _kernel
 -#define MMU_MODE2_SUFFIX _hypv
 -#define MMU_USER_IDX 0
  static inline int cpu_mmu_index (CPUPPCState *env)
  {
  return env-mmu_idx;
 diff --git a/target-ppc/excp_helper.c b/target-ppc/excp_helper.c
 index 922e86d..96ad9d7 100644
 --- a/target-ppc/excp_helper.c
 +++ b/target-ppc/excp_helper.c
 @@ -623,9 +623,6 @@ static inline void powerpc_excp(PowerPCCPU *cpu, int 
 excp_model, int excp)
  
  if (env-spr[SPR_LPCR]  LPCR_AIL) {
  new_msr |= (1  MSR_IR) | (1  MSR_DR);
 -} else if (msr  ((1  MSR_IR) | (1  MSR_DR))) {
 -/* If we disactivated any translation, flush TLBs */
 -tlb_flush(cs, 1);
  }
  
  #ifdef TARGET_PPC64
 diff --git a/target-ppc/helper_regs.h b/target-ppc/helper_regs.h
 index 271fddf..23b8ded 100644
 --- a/target-ppc/helper_regs.h
 +++ b/target-ppc/helper_regs.h
 @@ -41,12 +41,15 @@ static inline void hreg_swap_gpr_tgpr(CPUPPCState *env)
  
  static inline void hreg_compute_mem_idx(CPUPPCState *env)
  {
 +int high;
 +
  /* Precompute MMU index */
  if (msr_pr == 0  msr_hv != 0) {
 -env-mmu_idx = 2;
 +high = 2;
  } else {
 -env-mmu_idx = 1 - msr_pr;
 +high = 1 - msr_pr;
  }
 +env-mmu_idx = (high  2) | (msr_ir  1) | msr_dr;
  }
  
  static inline void hreg_compute_hflags(CPUPPCState *env)
 @@ -56,7 +59,7 @@ static inline void hreg_compute_hflags(CPUPPCState *env)
  /* We 'forget' FE0  FE1: we'll never generate imprecise exceptions */
  hflags_mask = (1  MSR_VR) | (1  MSR_AP) | (1  MSR_SA) |
  (1  MSR_PR) | (1  MSR_FP) | (1  MSR_SE) | (1  MSR_BE) |
 -(1  MSR_LE) | (1  MSR_VSX);
 +(1  MSR_LE) | (1  MSR_VSX) | (1  MSR_IR) | (1  MSR_DR);
  hflags_mask |= (1ULL  MSR_CM) | (1ULL  MSR_SF) | MSR_HVB;
  hreg_compute_mem_idx(env);
  env-hflags = env-msr  hflags_mask;
 @@ -82,8 +85,6 @@ static inline int hreg_store_msr(CPUPPCState *env, 
 target_ulong value,
  }
  if (((value  MSR_IR)  1) != msr_ir ||
  ((value  MSR_DR)  1) != msr_dr) {
 -/* Flush all tlb when changing translation mode */
 -tlb_flush(cs, 1);
  excp = POWERPC_EXCP_NONE;
  cs-interrupt_request |= CPU_INTERRUPT_EXITTB;
  }

Re: [Qemu-devel] [PATCH 1/2] target-ppc : Allow fc[tf]id[*] mnemonics for non TARGET_PPC64

2014-09-12 Thread Tom Musta

, PPC2_FP_CVT_ISA206),
  GEN_FLOAT_B(ctiwz, 0x0F, 0x00, 0, PPC_FLOAT),
  GEN_HANDLER_E(fctiwuz, 0x3F, 0x0F, 0x04, 0, PPC_NONE, PPC2_FP_CVT_ISA206),
  GEN_FLOAT_B(rsp, 0x0C, 0x00, 1, PPC_FLOAT),
 -#if defined(TARGET_PPC64)
 -GEN_FLOAT_B(cfid, 0x0E, 0x1A, 1, PPC_64B),
 +GEN_HANDLER_E(fcfid, 0x3F, 0x0E, 0x1A, 0x001F, PPC_NONE, 
 PPC2_FP_CVT_S64),
  GEN_HANDLER_E(fcfids, 0x3B, 0x0E, 0x1A, 0, PPC_NONE, PPC2_FP_CVT_ISA206),
  GEN_HANDLER_E(fcfidu, 0x3F, 0x0E, 0x1E, 0, PPC_NONE, PPC2_FP_CVT_ISA206),
  GEN_HANDLER_E(fcfidus, 0x3B, 0x0E, 0x1E, 0, PPC_NONE, PPC2_FP_CVT_ISA206),
 -GEN_FLOAT_B(ctid, 0x0E, 0x19, 0, PPC_64B),
 +GEN_HANDLER_E(fctid, 0x3F, 0x0E, 0x19, 0x001F, PPC_NONE, 
 PPC2_FP_CVT_S64),
 +GEN_HANDLER_E(fctidz, 0x3F, 0x0F, 0x19, 0x001F, PPC_NONE, 
 PPC2_FP_CVT_S64),
  GEN_HANDLER_E(fctidu, 0x3F, 0x0E, 0x1D, 0, PPC_NONE, PPC2_FP_CVT_ISA206),
 -GEN_FLOAT_B(ctidz, 0x0F, 0x19, 0, PPC_64B),
  GEN_HANDLER_E(fctiduz, 0x3F, 0x0F, 0x1D, 0, PPC_NONE, PPC2_FP_CVT_ISA206),
 -#endif
  GEN_FLOAT_B(rin, 0x08, 0x0C, 1, PPC_FLOAT_EXT),
  GEN_FLOAT_B(riz, 0x08, 0x0D, 1, PPC_FLOAT_EXT),
  GEN_FLOAT_B(rip, 0x08, 0x0E, 1, PPC_FLOAT_EXT),
 diff --git a/target-ppc/translate_init.c b/target-ppc/translate_init.c
 index 48177ed..ac4d12a 100644
 --- a/target-ppc/translate_init.c
 +++ b/target-ppc/translate_init.c
 @@ -5010,7 +5010,8 @@ POWERPC_FAMILY(e5500)(ObjectClass *oc, void *data)
 PPC_FLOAT_STFIWX | PPC_WAIT |
 PPC_MEM_TLBSYNC | PPC_TLBIVAX | PPC_MEM_SYNC |
 PPC_64B | PPC_POPCNTB | PPC_POPCNTWD;
 -pcc-insns_flags2 = PPC2_BOOKE206 | PPC2_PRCNTL | PPC2_PERM_ISA206;
 +pcc-insns_flags2 = PPC2_BOOKE206 | PPC2_PRCNTL | PPC2_PERM_ISA206 | \
 +PPC2_FP_CVT_S64;
  pcc-msr_mask = (1ull  MSR_CM) |
  (1ull  MSR_GS) |
  (1ull  MSR_UCLE) |
 @@ -7906,6 +7907,7 @@ POWERPC_FAMILY(970)(ObjectClass *oc, void *data)
 PPC_MEM_TLBIE | PPC_MEM_TLBSYNC |
 PPC_64B | PPC_ALTIVEC |
 PPC_SEGMENT_64B | PPC_SLBI;
 +pcc-insns_flags2 = PPC2_FP_CVT_S64;
  pcc-msr_mask = (1ull  MSR_SF) |
  (1ull  MSR_VR) |
  (1ull  MSR_POW) |
 @@ -7958,6 +7960,7 @@ POWERPC_FAMILY(POWER5P)(ObjectClass *oc, void *data)
 PPC_MEM_TLBIE | PPC_MEM_TLBSYNC |
 PPC_64B |
 PPC_SEGMENT_64B | PPC_SLBI;
 +pcc-insns_flags2 = PPC2_FP_CVT_S64;
  pcc-msr_mask = (1ull  MSR_SF) |
  (1ull  MSR_VR) |
  (1ull  MSR_POW) |
 @@ -8100,7 +8103,7 @@ POWERPC_FAMILY(POWER7)(ObjectClass *oc, void *data)
  pcc-insns_flags2 = PPC2_VSX | PPC2_DFP | PPC2_DBRX | PPC2_ISA205 |
  PPC2_PERM_ISA206 | PPC2_DIVE_ISA206 |
  PPC2_ATOMIC_ISA206 | PPC2_FP_CVT_ISA206 |
 -PPC2_FP_TST_ISA206;
 +PPC2_FP_TST_ISA206 | PPC2_FP_CVT_S64;
  pcc-msr_mask = (1ull  MSR_SF) |
  (1ull  MSR_VR) |
  (1ull  MSR_VSX) |
 @@ -8178,7 +8181,7 @@ POWERPC_FAMILY(POWER8)(ObjectClass *oc, void *data)
  PPC2_ATOMIC_ISA206 | PPC2_FP_CVT_ISA206 |
  PPC2_FP_TST_ISA206 | PPC2_BCTAR_ISA207 |
  PPC2_LSQ_ISA207 | PPC2_ALTIVEC_207 |
 -PPC2_ISA205 | PPC2_ISA207S;
 +PPC2_ISA205 | PPC2_ISA207S | PPC2_FP_CVT_S64;
  pcc-msr_mask = (1ull  MSR_SF) |
  (1ull  MSR_TM) |
  (1ull  MSR_VR) |
 

Other than the minor comments 

Reviewed-by: Tom Musta tommu...@gmail.com
Tested-by: Tom Musta tommu...@gmail.com

Re: [Qemu-devel] [PATCH 0/2] Enabling floating point instruction to 440x5 CPUs

2014-09-12 Thread Tom Musta

On 9/11/2014 2:17 PM, Pierre Mallard wrote:
 This patch series enable floating point instruction in 440x5 CPUs
 which have the capabilities to have optional APU FPU in double precision mode.
 
 1) Allow fc[tf]id[*] mnemonics for non TARGET_PPC64 with a new insn2 flag
 2) Create a new 440x5 implementing floating point instructions
 
 Pierre Mallard (2):
   target-ppc : Allow fc[tf]id[*] mnemonics for non TARGET_PPC64
   target-ppc : Add new processor type 440x5wDFPU
 
  target-ppc/cpu-models.c |3 +++
  target-ppc/cpu.h|5 -
  target-ppc/fpu_helper.c |6 --
  target-ppc/helper.h |4 +---
  target-ppc/translate.c  |   18 +++--
  target-ppc/translate_init.c |   47 
 ---
  6 files changed, 59 insertions(+), 24 deletions(-)
 

NIT:  It is customary to version your patches so that we can all keep them 
straight.  So [V2 PATCH 0/2]   You can use the --subject-prefix option to 
git format-patch.  I will defer to Alex on whether he wants you to resubmit.

Re: [Qemu-devel] [PATCH 1/2] target-ppc : Allow fc[tf]id[*] mnemonics for non TARGET_PPC64

2014-09-12 Thread Tom Musta

On 9/12/2014 9:28 AM, Tom Musta wrote:
 On 9/11/2014 2:17 PM, Pierre Mallard wrote:
 This patch remove limitation for fc[tf]id[*] on 32 bits targets and
 add a new insn flag for signed integer 64 conversion PPC2_FP_CVT_S64
 ---
  target-ppc/cpu.h|5 -
  target-ppc/fpu_helper.c |6 --
  target-ppc/helper.h |4 +---
  target-ppc/translate.c  |   18 +++---
  target-ppc/translate_init.c |9 ++---
  5 files changed, 18 insertions(+), 24 deletions(-)

 diff --git a/target-ppc/cpu.h b/target-ppc/cpu.h
 index b64c652..fa50c32 100644
 --- a/target-ppc/cpu.h
 +++ b/target-ppc/cpu.h
 @@ -2008,13 +2008,16 @@ enum {
  PPC2_ALTIVEC_207   = 0x4000ULL,
  /* PowerISA 2.07 Book3s specification   
  */
  PPC2_ISA207S   = 0x8000ULL,
 +/* Double precision floating point conversion for signed integer 64 
  */
 +PPC2_FP_CVT_S64= 0x0001ULL,
  
  #define PPC_TCG_INSNS2 (PPC2_BOOKE206 | PPC2_VSX | PPC2_PRCNTL | PPC2_DBRX 
 | \
  PPC2_ISA205 | PPC2_VSX207 | PPC2_PERM_ISA206 | \
  PPC2_DIVE_ISA206 | PPC2_ATOMIC_ISA206 | \
  PPC2_FP_CVT_ISA206 | PPC2_FP_TST_ISA206 | \
  PPC2_BCTAR_ISA207 | PPC2_LSQ_ISA207 | \
 -PPC2_ALTIVEC_207 | PPC2_ISA207S | PPC2_DFP)
 +PPC2_ALTIVEC_207 | PPC2_ISA207S | PPC2_DFP | \
 +PPC2_FP_CVT_S64)
  };
  
  
 /*/
 diff --git a/target-ppc/fpu_helper.c b/target-ppc/fpu_helper.c
 index da93d12..7f74466 100644
 --- a/target-ppc/fpu_helper.c
 +++ b/target-ppc/fpu_helper.c
 @@ -649,14 +649,10 @@ FPU_FCTI(fctiw, int32, 0x8000U)
  FPU_FCTI(fctiwz, int32_round_to_zero, 0x8000U)
  FPU_FCTI(fctiwu, uint32, 0xU)
  FPU_FCTI(fctiwuz, uint32_round_to_zero, 0xU)
 -#if defined(TARGET_PPC64)
  FPU_FCTI(fctid, int64, 0x8000ULL)
  FPU_FCTI(fctidz, int64_round_to_zero, 0x8000ULL)
  FPU_FCTI(fctidu, uint64, 0xULL)
  FPU_FCTI(fctiduz, uint64_round_to_zero, 0xULL)
 -#endif
 -
 -#if defined(TARGET_PPC64)
  
  #define FPU_FCFI(op, cvtr, is_single)  \
  uint64_t helper_##op(CPUPPCState *env, uint64_t arg)   \
 @@ -678,8 +674,6 @@ FPU_FCFI(fcfids, int64_to_float32, 1)
  FPU_FCFI(fcfidu, uint64_to_float64, 0)
  FPU_FCFI(fcfidus, uint64_to_float32, 1)
  
 -#endif
 -
  static inline uint64_t do_fri(CPUPPCState *env, uint64_t arg,
int rounding_mode)
  {
 diff --git a/target-ppc/helper.h b/target-ppc/helper.h
 index 509eae5..52402ef 100644
 --- a/target-ppc/helper.h
 +++ b/target-ppc/helper.h
 @@ -67,16 +67,14 @@ DEF_HELPER_2(fctiw, i64, env, i64)
  DEF_HELPER_2(fctiwu, i64, env, i64)
  DEF_HELPER_2(fctiwz, i64, env, i64)
  DEF_HELPER_2(fctiwuz, i64, env, i64)
 -#if defined(TARGET_PPC64)
  DEF_HELPER_2(fcfid, i64, env, i64)
  DEF_HELPER_2(fcfidu, i64, env, i64)
  DEF_HELPER_2(fcfids, i64, env, i64)
  DEF_HELPER_2(fcfidus, i64, env, i64)
  DEF_HELPER_2(fctid, i64, env, i64)
 -DEF_HELPER_2(fctidu, i64, env, i64)
  DEF_HELPER_2(fctidz, i64, env, i64)
 +DEF_HELPER_2(fctidu, i64, env, i64)
 
 NIT:  I would not have re-arranged fctidu/fctidz like this since it only 
 makes the patch larger without actually accomplishing anything (unless, of 
 course, the point of your patch is to do clean up).  You seem to have done 
 this in other places as well.
 
  DEF_HELPER_2(fctiduz, i64, env, i64)
 -#endif
  DEF_HELPER_2(frsp, i64, env, i64)
  DEF_HELPER_2(frin, i64, env, i64)
  DEF_HELPER_2(friz, i64, env, i64)
 diff --git a/target-ppc/translate.c b/target-ppc/translate.c
 index c07bb01..1fe82ce 100644
 --- a/target-ppc/translate.c
 +++ b/target-ppc/translate.c
 @@ -2246,9 +2246,8 @@ GEN_FLOAT_B(ctiwz, 0x0F, 0x00, 0, PPC_FLOAT);
  GEN_FLOAT_B(ctiwuz, 0x0F, 0x04, 0, PPC2_FP_CVT_ISA206);
  /* frsp */
  GEN_FLOAT_B(rsp, 0x0C, 0x00, 1, PPC_FLOAT);
 -#if defined(TARGET_PPC64)
  /* fcfid */
 -GEN_FLOAT_B(cfid, 0x0E, 0x1A, 1, PPC_64B);
 +GEN_FLOAT_B(cfid, 0x0E, 0x1A, 1, PPC2_FP_CVT_S64);
  /* fcfids */
  GEN_FLOAT_B(cfids, 0x0E, 0x1A, 0, PPC2_FP_CVT_ISA206);
  /* fcfidu */
 @@ -2256,14 +2255,13 @@ GEN_FLOAT_B(cfidu, 0x0E, 0x1E, 0, 
 PPC2_FP_CVT_ISA206);
  /* fcfidus */
  GEN_FLOAT_B(cfidus, 0x0E, 0x1E, 0, PPC2_FP_CVT_ISA206);
  /* fctid */
 -GEN_FLOAT_B(ctid, 0x0E, 0x19, 0, PPC_64B);
 +GEN_FLOAT_B(ctid, 0x0E, 0x19, 0, PPC2_FP_CVT_S64);
 +/* fctidz */
 +GEN_FLOAT_B(ctidz, 0x0F, 0x19, 0, PPC2_FP_CVT_S64);
  /* fctidu */
  GEN_FLOAT_B(ctidu, 0x0E, 0x1D, 0, PPC2_FP_CVT_ISA206);
 -/* fctidz */
 -GEN_FLOAT_B(ctidz, 0x0F, 0x19, 0, PPC_64B);
  /* fctidu */
  GEN_FLOAT_B(ctiduz, 0x0F, 0x1D, 0, PPC2_FP_CVT_ISA206);
 -#endif
  
  /* frin */
  GEN_FLOAT_B(rin, 0x08, 0x0C, 1, PPC_FLOAT_EXT);
 @@ -10050,16 +10048,14 @@ GEN_HANDLER_E(fctiwu

Re: [Qemu-devel] [PATCH 2/2] target-ppc : Add new processor type 440x5wDFPU

2014-09-12 Thread Tom Musta

On 9/11/2014 2:17 PM, Pierre Mallard wrote:
 This patch add a new processor type 440x5wDFPU for Virtex 5 PPC440
 with an external APU FPU in double precision mode
 ---
  target-ppc/cpu-models.c |3 +++
  target-ppc/translate_init.c |   38 ++
  2 files changed, 41 insertions(+)
 
 diff --git a/target-ppc/cpu-models.c b/target-ppc/cpu-models.c
 index 52ac6ec..91e9fac 100644
 --- a/target-ppc/cpu-models.c
 +++ b/target-ppc/cpu-models.c
 @@ -309,6 +309,9 @@
  #endif
  POWERPC_DEF(440-Xilinx,CPU_POWERPC_440_XILINX, 440x5,
  PowerPC 440 Xilinx 5)
 +
 +POWERPC_DEF(440-Xilinx-w-dfpu,CPU_POWERPC_440_XILINX, 
 440x5wDFPU,
 +PowerPC 440 Xilinx 5 With a Double Prec. FPU)
  #if defined(TODO)
  POWERPC_DEF(440A5, CPU_POWERPC_440A5,  440x5,
  PowerPC 440 A5)
 diff --git a/target-ppc/translate_init.c b/target-ppc/translate_init.c
 index ac4d12a..7d7dce7 100644
 --- a/target-ppc/translate_init.c
 +++ b/target-ppc/translate_init.c
 @@ -3923,6 +3923,44 @@ POWERPC_FAMILY(440x5)(ObjectClass *oc, void *data)
   POWERPC_FLAG_DE | POWERPC_FLAG_BUS_CLK;
  }
  
 +POWERPC_FAMILY(440x5wDFPU)(ObjectClass *oc, void *data)
 +{
 +DeviceClass *dc = DEVICE_CLASS(oc);
 +PowerPCCPUClass *pcc = POWERPC_CPU_CLASS(oc);
 +
 +dc-desc = PowerPC 440x5 with double precision FPU;
 +pcc-init_proc = init_proc_440x5;
 +pcc-check_pow = check_pow_nocheck;
 +pcc-insns_flags = PPC_INSNS_BASE | PPC_STRING |
 +   PPC_FLOAT | PPC_FLOAT_FSQRT | 
 +   PPC_FLOAT_STFIWX |
 +   PPC_DCR | PPC_WRTEE | PPC_RFMCI |
 +   PPC_CACHE | PPC_CACHE_ICBI |
 +   PPC_CACHE_DCBZ | PPC_CACHE_DCBA |
 +   PPC_MEM_TLBSYNC | PPC_MFTB |
 +   PPC_BOOKE | PPC_4xx_COMMON | PPC_405_MAC |
 +   PPC_440_SPEC;
 +pcc-insns_flags2 = PPC2_FP_CVT_S64;
 +pcc-msr_mask = (1ull  MSR_POW) |
 +(1ull  MSR_CE) |
 +(1ull  MSR_EE) |
 +(1ull  MSR_PR) |
 +(1ull  MSR_FP) |
 +(1ull  MSR_ME) |
 +(1ull  MSR_FE0) |
 +(1ull  MSR_DWE) |
 +(1ull  MSR_DE) |
 +(1ull  MSR_FE1) |
 +(1ull  MSR_IR) |
 +(1ull  MSR_DR);
 +pcc-mmu_model = POWERPC_MMU_BOOKE;
 +pcc-excp_model = POWERPC_EXCP_BOOKE;
 +pcc-bus_model = PPC_FLAGS_INPUT_BookE;
 +pcc-bfd_mach = bfd_mach_ppc_403;
 +pcc-flags = POWERPC_FLAG_CE | POWERPC_FLAG_DWE |
 + POWERPC_FLAG_DE | POWERPC_FLAG_BUS_CLK;
 +}
 +
  static void init_proc_460 (CPUPPCState *env)
  {
  /* Time base */
 

I don't see any significant issues with this patch, but it does not pass the 
patch checker:

 ./scripts/checkpatch.pl  ../patches/pierre.mallard.fctid.v2/0002.patch
WARNING: line over 80 characters
#19: FILE: target-ppc/cpu-models.c:313:
+POWERPC_DEF(440-Xilinx-w-dfpu,CPU_POWERPC_440_XILINX, 
440x5wDFPU,

ERROR: trailing whitespace
#41: FILE: target-ppc/translate_init.c:3935:
+   PPC_FLOAT | PPC_FLOAT_FSQRT | $

ERROR: Missing Signed-off-by: line(s)

total: 2 errors, 1 warnings, 53 lines checked

../patches/pierre.mallard.fctid.v2/0002.patch has style problems, please 
review.  If any of these errors
are false positives report them to the maintainer, see
CHECKPATCH in MAINTAINERS.

Re: [Qemu-devel] [Qemu-ppc] [PATCH 0/3] Enabling floating point instruction to 440x5 CPUs

2014-09-11 Thread Tom Musta

On 9/10/2014 5:43 PM, Pierre Mallard wrote:
 On Wed, Sep 10, 2014 at 7:15 PM, Tom Musta tommu...@gmail.com 
 mailto:tommu...@gmail.com wrote:
 
 
 (1) Eliminate the TARGET_PPC64 checks for all six FP Doubleword Integer 
 Conversion instructions.
 
 
 There is also fcfids and fcfidus which leads to 8 instructions (fcfid, 
 fcfids, fcfidu, fcfidus and fctid, fctidz, fctidu, fctiduz), is this right ?

You are correct.

 
 
 (2) Defined a new flag for FP Signed Doubleword Conversion instructions 
 (PPC2_FP_CVT_S64).  Use this flag exclusively when defining the opcode 
 tables, e.g.
 
 +/* fctidz */
 +GEN_FLOAT_B(ctidz, 0x0F, 0x19, 0, PPC2_FP_CVT_S64);
 
 I'm not sure, I did understand correctly that one, indeed to have the flag 
 check I have to make changes for each of the three instructions (fcfid, 
 fctif, fctidz) at 2 places in translate.c :
 
 One at the gen_ function definition which is quite straight forward :
 GEN_FLOAT_B(ctid, 0x0E, 0x19, 0, PPC_64B);
 becomes
 GEN_FLOAT_B(ctid, 0x0E, 0x19, 0, PPC2_FP_CVT_S64);
 
 One in the Opcode Table which requires to use the GEN_HANDLER_E macro for the 
 second type to be taken in account :
 GEN_FLOAT_B(ctid, 0x0E, 0x19, 0, PPC2_FP_CVT_S64),
 becomes
 GEN_HANDLER_E(fctid, 0x3F, 0x0E, 0x19, 0x001F, PPC_NONE, PPC2_FP_CVT_S64)
 
 is this right ?

Yes.

  
 
 
 (3) You would have to add the flag to all existing 64-bit CPUs that 
 support floating point.  And of course, to your new 440-w-fpu CPU.
 
  
 Pierre

Re: [Qemu-devel] [Qemu-ppc] [PATCH 2/3] target-ppc : Add PPC_FLOAT_64 flag to instructions type

2014-09-10 Thread Tom Musta

On 9/10/2014 4:18 AM, Alexander Graf wrote:
 
 
 On 10.09.14 07:03, Pierre Mallard wrote:
 This patch declare a new floating point instruction flag PPC_FLOAT_64 to be 
 used
 by fcfid, fctid[z] operations. Note that due to limited number of bit, 
 FSEL and FRES points now to same value, and PPC_FLOAT_64 to former FSEL 
 value. 
 (There seems to be no case where FSEL and FRES are not used together at the 
 moment)

 Signed-off-by: Pierre Mallard mallard.pie...@gmail.com
 ---
  target-ppc/cpu.h|7 +--
  target-ppc/translate_init.c |2 +-
  2 files changed, 6 insertions(+), 3 deletions(-)

 diff --git a/target-ppc/cpu.h b/target-ppc/cpu.h
 index b64c652..b5b3912 100644
 --- a/target-ppc/cpu.h
 +++ b/target-ppc/cpu.h
 @@ -1868,9 +1868,12 @@ enum {
  PPC_FLOAT_FRES = 0x0008ULL,
  PPC_FLOAT_FRSQRTE  = 0x0010ULL,
  PPC_FLOAT_FRSQRTES = 0x0020ULL,
 -PPC_FLOAT_FSEL = 0x0040ULL,
 +PPC_FLOAT_FSEL = 0x0008ULL,
  PPC_FLOAT_STFIWX   = 0x0080ULL,
  
 +/* Use for PPC with double precision fpu */
 +PPC_FLOAT_64   = 0x0040ULL,
 
 Please keep the list sorted by the bit number. Also I think we're better
 off not having the same bit used for 2 enums. Just keep PPC_FLOAT_FRES
 and make FSEL depend on the FRES bit in translate.c
 

Alternatively, you could add the new flag to PPC2_xxx .

 +
  /* Vector/SIMD extensions   
  */
  /*   Altivec support
  */
  PPC_ALTIVEC= 0x0100ULL,
 @@ -1957,7 +1960,7 @@ enum {
  | PPC_STRING | PPC_FLOAT | PPC_FLOAT_EXT \
  | PPC_FLOAT_FSQRT | PPC_FLOAT_FRES \
  | PPC_FLOAT_FRSQRTE | PPC_FLOAT_FRSQRTES \
 -| PPC_FLOAT_FSEL | PPC_FLOAT_STFIWX \
 +| PPC_FLOAT_FSEL | PPC_FLOAT_STFIWX | PPC_FLOAT_64 \
  | PPC_ALTIVEC | PPC_SPE | PPC_SPE_SINGLE \
  | PPC_SPE_DOUBLE | PPC_MEM_TLBIA \
  | PPC_MEM_TLBIE | PPC_MEM_TLBSYNC \
 diff --git a/target-ppc/translate_init.c b/target-ppc/translate_init.c
 index b4dedce..073bef1 100644
 --- a/target-ppc/translate_init.c
 +++ b/target-ppc/translate_init.c
 @@ -3899,7 +3899,7 @@ POWERPC_FAMILY(440x5)(ObjectClass *oc, void *data)
  pcc-insns_flags = PPC_INSNS_BASE | PPC_STRING |
  #ifdef PPC440x5_HAVE_FPU
 PPC_FLOAT | PPC_FLOAT_FSQRT | 
 -   PPC_FLOAT_STFIWX |
 +   PPC_FLOAT_STFIWX | PPC_FLOAT_64 |
  #endif
 PPC_DCR | PPC_WRTEE | PPC_RFMCI |
 PPC_CACHE | PPC_CACHE_ICBI |

Re: [Qemu-devel] [Qemu-ppc] [PATCH 3/3] target-ppc : Add PPC_FLOAT_64 type to fctid, fctidz and fcfid and remove their TARGET_PPC64 restriction

2014-09-10 Thread Tom Musta

On 9/10/2014 4:19 AM, Alexander Graf wrote:
 
 
 On 10.09.14 07:03, Pierre Mallard wrote:
 Apply the new PPC_FLOAT_64 flag to fctid[z] and fcfid. 
 May also be applyed to fctidu[z] and fcfid[su][z], but since they are not 
 mentionned in xilinx documentation it might not be needed yet.

 Signed-off-by: Pierre Mallard mallard.pie...@gmail.com
 ---
  target-ppc/fpu_helper.c |7 +++
  target-ppc/helper.h |6 --
  target-ppc/translate.c  |   20 
  3 files changed, 19 insertions(+), 14 deletions(-)

 diff --git a/target-ppc/fpu_helper.c b/target-ppc/fpu_helper.c
 index da93d12..4e0e9e2 100644
 --- a/target-ppc/fpu_helper.c
 +++ b/target-ppc/fpu_helper.c
 @@ -649,15 +649,13 @@ FPU_FCTI(fctiw, int32, 0x8000U)
  FPU_FCTI(fctiwz, int32_round_to_zero, 0x8000U)
  FPU_FCTI(fctiwu, uint32, 0xU)
  FPU_FCTI(fctiwuz, uint32_round_to_zero, 0xU)
 -#if defined(TARGET_PPC64)
  FPU_FCTI(fctid, int64, 0x8000ULL)
  FPU_FCTI(fctidz, int64_round_to_zero, 0x8000ULL)
 +#if defined(TARGET_PPC64)
  FPU_FCTI(fctidu, uint64, 0xULL)
  FPU_FCTI(fctiduz, uint64_round_to_zero, 0xULL)
  #endif

So fctid[z] are being handled by this new flag but fctidu[z] are not?  Uggh.
  
 -#if defined(TARGET_PPC64)
 -
  #define FPU_FCFI(op, cvtr, is_single)  \
  uint64_t helper_##op(CPUPPCState *env, uint64_t arg)   \
  {  \
 @@ -674,10 +672,11 @@ uint64_t helper_##op(CPUPPCState *env, uint64_t arg)   
 \
  }
  
  FPU_FCFI(fcfid, int64_to_float64, 0)
 +
 +#if defined(TARGET_PPC64)
  FPU_FCFI(fcfids, int64_to_float32, 1)
  FPU_FCFI(fcfidu, uint64_to_float64, 0)
  FPU_FCFI(fcfidus, uint64_to_float32, 1)
 -
  #endif
  
  static inline uint64_t do_fri(CPUPPCState *env, uint64_t arg,
 diff --git a/target-ppc/helper.h b/target-ppc/helper.h
 index 509eae5..e51aa69 100644
 --- a/target-ppc/helper.h
 +++ b/target-ppc/helper.h
 @@ -67,14 +67,16 @@ DEF_HELPER_2(fctiw, i64, env, i64)
  DEF_HELPER_2(fctiwu, i64, env, i64)
  DEF_HELPER_2(fctiwz, i64, env, i64)
  DEF_HELPER_2(fctiwuz, i64, env, i64)
 -#if defined(TARGET_PPC64)
  DEF_HELPER_2(fcfid, i64, env, i64)
 +#if defined(TARGET_PPC64)
  DEF_HELPER_2(fcfidu, i64, env, i64)
  DEF_HELPER_2(fcfids, i64, env, i64)
  DEF_HELPER_2(fcfidus, i64, env, i64)
 +#endif
  DEF_HELPER_2(fctid, i64, env, i64)
 -DEF_HELPER_2(fctidu, i64, env, i64)
  DEF_HELPER_2(fctidz, i64, env, i64)
 +#if defined(TARGET_PPC64)
 +DEF_HELPER_2(fctidu, i64, env, i64)
  DEF_HELPER_2(fctiduz, i64, env, i64)
  #endif
  DEF_HELPER_2(frsp, i64, env, i64)
 diff --git a/target-ppc/translate.c b/target-ppc/translate.c
 index c07bb01..6af25fe 100644
 --- a/target-ppc/translate.c
 +++ b/target-ppc/translate.c
 @@ -2246,21 +2246,23 @@ GEN_FLOAT_B(ctiwz, 0x0F, 0x00, 0, PPC_FLOAT);
  GEN_FLOAT_B(ctiwuz, 0x0F, 0x04, 0, PPC2_FP_CVT_ISA206);
  /* frsp */
  GEN_FLOAT_B(rsp, 0x0C, 0x00, 1, PPC_FLOAT);
 -#if defined(TARGET_PPC64)
  /* fcfid */
 -GEN_FLOAT_B(cfid, 0x0E, 0x1A, 1, PPC_64B);
 +GEN_FLOAT_B(cfid, 0x0E, 0x1A, 1, PPC_FLOAT_64|PPC_64B);

Given the limited scope of the flag (see my previous comment), I dont think 
PPC_FLOAT_64 is a very good name for this.  The semantic of this flag derived 
from your implementation is really limited to fcfid/fctid, fctidz
 +#if defined(TARGET_PPC64)
  /* fcfids */
  GEN_FLOAT_B(cfids, 0x0E, 0x1A, 0, PPC2_FP_CVT_ISA206);
  /* fcfidu */
  GEN_FLOAT_B(cfidu, 0x0E, 0x1E, 0, PPC2_FP_CVT_ISA206);
  /* fcfidus */
  GEN_FLOAT_B(cfidus, 0x0E, 0x1E, 0, PPC2_FP_CVT_ISA206);
 +#endif
  /* fctid */
 -GEN_FLOAT_B(ctid, 0x0E, 0x19, 0, PPC_64B);
 +GEN_FLOAT_B(ctid, 0x0E, 0x19, 0, PPC_FLOAT_64|PPC_64B);
 +/* fctidz */
 +GEN_FLOAT_B(ctidz, 0x0F, 0x19, 0, PPC_FLOAT_64|PPC_64B);
 +#if defined(TARGET_PPC64)
  /* fctidu */
  GEN_FLOAT_B(ctidu, 0x0E, 0x1D, 0, PPC2_FP_CVT_ISA206);
 -/* fctidz */
 -GEN_FLOAT_B(ctidz, 0x0F, 0x19, 0, PPC_64B);
  /* fctidu */
  GEN_FLOAT_B(ctiduz, 0x0F, 0x1D, 0, PPC2_FP_CVT_ISA206);
  #endif
 @@ -10050,14 +10052,16 @@ GEN_HANDLER_E(fctiwu, 0x3F, 0x0E, 0x04, 0, 
 PPC_NONE, PPC2_FP_CVT_ISA206),
  GEN_FLOAT_B(ctiwz, 0x0F, 0x00, 0, PPC_FLOAT),
  GEN_HANDLER_E(fctiwuz, 0x3F, 0x0F, 0x04, 0, PPC_NONE, PPC2_FP_CVT_ISA206),
  GEN_FLOAT_B(rsp, 0x0C, 0x00, 1, PPC_FLOAT),
 +GEN_FLOAT_B(cfid, 0x0E, 0x1A, 1, PPC_FLOAT_64|PPC_64B),
  #if defined(TARGET_PPC64)
 -GEN_FLOAT_B(cfid, 0x0E, 0x1A, 1, PPC_64B),
  GEN_HANDLER_E(fcfids, 0x3B, 0x0E, 0x1A, 0, PPC_NONE, PPC2_FP_CVT_ISA206),
  GEN_HANDLER_E(fcfidu, 0x3F, 0x0E, 0x1E, 0, PPC_NONE, PPC2_FP_CVT_ISA206),
  GEN_HANDLER_E(fcfidus, 0x3B, 0x0E, 0x1E, 0, PPC_NONE, PPC2_FP_CVT_ISA206),
 -GEN_FLOAT_B(ctid, 0x0E, 0x19, 0, PPC_64B),
 +#endif
 +GEN_FLOAT_B(ctid, 0x0E, 0x19, 0, PPC_FLOAT_64|PPC_64B),
 +GEN_FLOAT_B(ctidz, 0x0F, 0x19, 0, PPC_FLOAT_64|PPC_64B),
 
 I think we're better off with only a single bit. Just make all 64bit
 CPUs that have an FPU also set PPC_FLOAT_64 and only check for that.

Re: [Qemu-devel] [Qemu-ppc] [PATCH 0/3] Enabling floating point instruction to 440x5 CPUs

2014-09-10 Thread Tom Musta

On 9/10/2014 4:20 AM, Alexander Graf wrote:
 
 
 On 10.09.14 07:03, Pierre Mallard wrote:
 This patch series enable floating point instruction in 440x5 CPUs 
 which have the capabilities to have optional APU FPU.

 1) Add floating point standard insns flag to 440x5 in case there is an apu 
 fpu.
 2) Define a new floating point insns flag for operation 
 previously reserved to 64 bits proc (fcfid, fctid, fctidz)
 3) Apply this new flag to fcfid, fctid, fctidz and move TARGET_PPC64 
 restrictions
 
 I've looked through the patches mostly from a stylistic point of view.
 As for whether the changes are technically correct and fully adhere to
 the specs, I haven't verified anything and would leave that part to Tom :).
 

I went back to some old (paper) versions of the ISA circa 1998 and the Floating 
Convert To/From Doubleword instructions all have this clause:

This instruction is defined only for 64-bit implementations.  Using it on a 
32-bit implementation will cause the system illegal instruction error handler 
to be invoked.

I believe this view of things was in play for the 60x and PowerMAC era 32-bit 
CPUs.  Which is consistent with the existing QEMU implementation.

The next revision of the spec that I have is Power ISA 2.03 (2006) and the 
clause is gone.  Furthermore, the instructions are *NOT* in the 64 category.

To complicate matters more, the unsigned integer versions were added in ISA 
2.06 (fcfidu, fctidu, fctiduz).  QEMU deals with these via the 
PPC2_FP_CVT_ISA206 flag.

My interpretation is that all of the fc[tf]id[*] instructions are a required 
part of any Power floating point implementation -- 32-bit or 64-bit is 
irrelevant.

Based on all of this, I think it would make sense to do the following in this 
patch series:

(1) Eliminate the TARGET_PPC64 checks for all six FP Doubleword Integer 
Conversion instructions.

(2) Defined a new flag for FP Signed Doubleword Conversion instructions 
(PPC2_FP_CVT_S64).  Use this flag exclusively when defining the opcode tables, 
e.g.

+/* fctidz */
+GEN_FLOAT_B(ctidz, 0x0F, 0x19, 0, PPC2_FP_CVT_S64);


(3) You would have to add the flag to all existing 64-bit CPUs that support 
floating point.  And of course, to your new 440-w-fpu CPU.

[Qemu-devel] [PATCH] target-ppc: Implement IVOR[59] By Default for Book E

2014-09-05 Thread Tom Musta

Adjust the IVOR mask for generic Book E implementation to support bit 59.
This is consistent with the Power ISA.

Signed-off-by: Tom Musta tommu...@gmail.com
Reported-by: Pierre Mallard mallard.pie...@gmail.com
---
 target-ppc/translate_init.c |2 +-
 1 files changed, 1 insertions(+), 1 deletions(-)

diff --git a/target-ppc/translate_init.c b/target-ppc/translate_init.c
index 48177ed..85a11ef 100644
--- a/target-ppc/translate_init.c
+++ b/target-ppc/translate_init.c
@@ -2786,7 +2786,7 @@ static void init_excp_BookE (CPUPPCState *env)
 env-excp_vectors[POWERPC_EXCP_DTLB] = 0x;
 env-excp_vectors[POWERPC_EXCP_ITLB] = 0x;
 env-excp_vectors[POWERPC_EXCP_DEBUG]= 0x;
-env-ivor_mask = 0xFFE0UL;
+env-ivor_mask = 0xFFF0UL;
 env-ivpr_mask = 0xUL;
 /* Hardware reset vector */
 env-hreset_vector = 0xFFFCUL;
-- 
1.7.1

Re: [Qemu-devel] [PATCH 15/17] ppc: store CR registers in 32 1-bit registers

2014-09-04 Thread Tom Musta

On 8/28/2014 12:15 PM, Paolo Bonzini wrote:
 This makes comparisons much smaller and faster.  The speedup is
 approximately 10% on user-mode emulation on x86 host, 3-4% on PPC.
 
 Note that CRF_* constants are flipped to match PowerPC's big
 bit-endianness.  Previously, the CR register was effectively stored
 in mixed endianness, so now there is less indirection going on.
 
 Signed-off-by: Paolo Bonzini pbonz...@redhat.com

There are some issues with this patch -- it doesn't compile due to some typing 
issues.  There are also some functional issues.  Some details are below

(nit) Also it doesnt pass checkpatch.pl.

 ---
  linux-user/main.c   |   4 +-
  target-ppc/cpu.h|  33 --
  target-ppc/fpu_helper.c |  39 ++
  target-ppc/helper.h |   6 -
  target-ppc/int_helper.c |   2 +-
  target-ppc/machine.c|   9 ++
  target-ppc/translate.c  | 307 
 +---
  7 files changed, 204 insertions(+), 196 deletions(-)
 
 diff --git a/linux-user/main.c b/linux-user/main.c
 index 152c031..b403f24 100644
 --- a/linux-user/main.c
 +++ b/linux-user/main.c
 @@ -1929,7 +1929,7 @@ void cpu_loop(CPUPPCState *env)
   * PPC ABI uses overflow flag in cr0 to signal an error
   * in syscalls.
   */
 -env-crf[0] = ~0x1;
 +env-cr[CRF_SO] = 0;
  ret = do_syscall(env, env-gpr[0], env-gpr[3], env-gpr[4],
   env-gpr[5], env-gpr[6], env-gpr[7],
   env-gpr[8], 0, 0);
 @@ -1939,7 +1939,7 @@ void cpu_loop(CPUPPCState *env)
  break;
  }
  if (ret  (target_ulong)(-515)) {
 -env-crf[0] |= 0x1;
 +env-cr[CRF_SO] = 1;
  ret = -ret;
  }
  env-gpr[3] = ret;
 diff --git a/target-ppc/cpu.h b/target-ppc/cpu.h
 index 05c29b2..67510e8 100644
 --- a/target-ppc/cpu.h
 +++ b/target-ppc/cpu.h
 @@ -939,7 +939,7 @@ struct CPUPPCState {
  /* CTR */
  target_ulong ctr;
  /* condition register */
 -uint32_t crf[8];
 +uint32_t cr[32];
  #if defined(TARGET_PPC64)
  /* CFAR */
  target_ulong cfar;
 @@ -1058,6 +1058,9 @@ struct CPUPPCState {
  uint64_t dtl_addr, dtl_size;
  #endif /* TARGET_PPC64 */
  
 +/* condition register, for migration compatibility */
 +uint32_t crf[8];
 +
  int error_code;
  uint32_t pending_interrupts;
  #if !defined(CONFIG_USER_ONLY)
 @@ -1200,12 +1203,20 @@ void store_fpscr(CPUPPCState *env, uint64_t arg, 
 uint32_t mask);
  
  static inline uint32_t ppc_get_crf(const CPUPPCState *env, int i)
  {
 -return env-crf[i];
 +uint32_t r;
 +r = env-cr[i * 4];
 +r = (r  1) | (env-cr[i * 4 + 1]);
 +r = (r  1) | (env-cr[i * 4 + 2]);
 +r = (r  1) | (env-cr[i * 4 + 3]);
 +return r;
  }
  
  static inline void ppc_set_crf(CPUPPCState *env, int i, uint32_t val)
  {
 -env-crf[i] = val;
 +env-cr[i * 4 + 0] = (val  0x08) != 0;
 +env-cr[i * 4 + 1] = (val  0x04) != 0;
 +env-cr[i * 4 + 2] = (val  0x02) != 0;
 +env-cr[i * 4 + 3] = (val  0x01) != 0;
  }
  
  static inline uint64_t ppc_dump_gpr(CPUPPCState *env, int gprn)
 @@ -1256,14 +1267,14 @@ static inline int cpu_mmu_index (CPUPPCState *env)
  
  
 /*/
  /* CRF definitions */
 -#define CRF_LT3
 -#define CRF_GT2
 -#define CRF_EQ1
 -#define CRF_SO0
 -#define CRF_CH(1  CRF_LT)
 -#define CRF_CL(1  CRF_GT)
 -#define CRF_CH_OR_CL  (1  CRF_EQ)
 -#define CRF_CH_AND_CL (1  CRF_SO)
 +#define CRF_LT0
 +#define CRF_GT1
 +#define CRF_EQ2
 +#define CRF_SO3
 +#define CRF_CHCRF_LT
 +#define CRF_CLCRF_GT
 +#define CRF_CH_OR_CL  CRF_EQ
 +#define CRF_CH_AND_CL CRF_SO
  
  /* XER definitions */
  #define XER_SO  31
 diff --git a/target-ppc/fpu_helper.c b/target-ppc/fpu_helper.c
 index 1ccbcf3..9574ebe 100644
 --- a/target-ppc/fpu_helper.c
 +++ b/target-ppc/fpu_helper.c
 @@ -1098,8 +1098,8 @@ void helper_fcmpu(CPUPPCState *env, uint64_t arg1, 
 uint64_t arg2,
  }
  
  env-fpscr = ~(0x0F  FPSCR_FPRF);
 -env-fpscr |= (0x01  FPSCR_FPRF)  ret;
 -ppc_set_crf(env, crfD, 1  ret);
 +env-fpscr |= (0x08  FPSCR_FPRF)  ret;
 +ppc_set_crf(env, crfD, 0x08  ret);
  
  if (unlikely(ret == CRF_SO
(float64_is_signaling_nan(farg1.d) ||
 @@ -1130,8 +1130,8 @@ void helper_fcmpo(CPUPPCState *env, uint64_t arg1, 
 uint64_t arg2,
  }
  
  env-fpscr = ~(0x0F  FPSCR_FPRF);
 -env-fpscr |= (0x01  FPSCR_FPRF)  ret;
 -ppc_set_crf(env, crfD, 1  ret);
 +env-fpscr |= (0x08  FPSCR_FPRF)  ret;
 +ppc_set_crf(env, crfD, 0x08  ret);
  
  if (unlikely(ret == CRF_SO)) {
  if (float64_is_signaling_nan(farg1.d) ||
 @@ -1403,7 +1403,7 @@ static inline uint32_t efscmplt(CPUPPCState *env, 
 uint32_t op1, uint32_t op2)

Re: [Qemu-devel] [PATCH 14/17] ppc: introduce ppc_get_crf and ppc_set_crf

2014-09-04 Thread Tom Musta

On 8/28/2014 12:15 PM, Paolo Bonzini wrote:
 These two functions will group together four CR bits into a single
 value, once we change the representation of condition registers.
 
 Signed-off-by: Paolo Bonzini pbonz...@redhat.com
 ---
  linux-user/elfload.c |  2 +-
  linux-user/main.c|  2 +-
  linux-user/signal.c  |  4 ++--
  monitor.c|  2 +-
  target-ppc/cpu.h | 10 ++
  target-ppc/excp_helper.c |  2 +-
  target-ppc/fpu_helper.c  |  6 --
  target-ppc/gdbstub.c |  4 ++--
  target-ppc/int_helper.c  | 16 
  target-ppc/kvm.c |  4 ++--
  target-ppc/translate.c   | 13 +++--
  11 files changed, 39 insertions(+), 26 deletions(-)
 

The patch doesn't pass checkpatch.pl

 diff --git a/linux-user/elfload.c b/linux-user/elfload.c
 index bea803b..3769ae6 100644
 --- a/linux-user/elfload.c
 +++ b/linux-user/elfload.c
 @@ -858,7 +858,7 @@ static void elf_core_copy_regs(target_elf_gregset_t 
 *regs, const CPUPPCState *en
  (*regs)[37] = tswapreg(env-xer);
  
  for (i = 0; i  ARRAY_SIZE(env-crf); i++) {
 -ccr |= env-crf[i]  (32 - ((i + 1) * 4));
 +ccr |= ppc_get_crf(env, i)  (32 - ((i + 1) * 4));
  }
  (*regs)[38] = tswapreg(ccr);
  }
 diff --git a/linux-user/main.c b/linux-user/main.c
 index 472a16d..152c031 100644
 --- a/linux-user/main.c
 +++ b/linux-user/main.c
 @@ -1550,7 +1550,7 @@ static int do_store_exclusive(CPUPPCState *env)
  }
  }
  }
 -env-crf[0] = (stored  1) | xer_so;
 +ppc_set_crf(env, 0, (stored  1) | xer_so);
  env-reserve_addr = (target_ulong)-1;
  }
  if (!segv) {
 diff --git a/linux-user/signal.c b/linux-user/signal.c
 index 26929c5..4f5d79f 100644
 --- a/linux-user/signal.c
 +++ b/linux-user/signal.c
 @@ -4512,7 +4512,7 @@ static void save_user_regs(CPUPPCState *env, struct 
 target_mcontext *frame,
  __put_user(env-xer, frame-mc_gregs[TARGET_PT_XER]);
  
  for (i = 0; i  ARRAY_SIZE(env-crf); i++) {
 -ccr |= env-crf[i]  (32 - ((i + 1) * 4));
 +ccr |= ppc_get_crf(env, i)  (32 - ((i + 1) * 4));
  }
  __put_user(ccr, frame-mc_gregs[TARGET_PT_CCR]);
  
 @@ -4591,7 +4591,7 @@ static void restore_user_regs(CPUPPCState *env,
  __get_user(ccr, frame-mc_gregs[TARGET_PT_CCR]);
  
  for (i = 0; i  ARRAY_SIZE(env-crf); i++) {
 -env-crf[i] = (ccr  (32 - ((i + 1) * 4)))  0xf;
 +ppc_set_crf(env, i, (ccr  (32 - ((i + 1) * 4)))  0xf);
  }
  
  if (!sig) {
 diff --git a/monitor.c b/monitor.c
 index ec73dd4..97d72f4 100644
 --- a/monitor.c
 +++ b/monitor.c
 @@ -2968,7 +2968,7 @@ static target_long monitor_get_ccr (const struct 
 MonitorDef *md, int val)
  
  u = 0;
  for (i = 0; i  8; i++)

ARRAY_SIZE ?

 -u |= env-crf[i]  (32 - (4 * (i + 1)));
 +u |= ppc_get_crf(env, i)  (32 - (4 * (i + 1)));
  
  return u;
  }
 diff --git a/target-ppc/cpu.h b/target-ppc/cpu.h
 index c1cb27f..05c29b2 100644
 --- a/target-ppc/cpu.h
 +++ b/target-ppc/cpu.h
 @@ -1198,6 +1198,16 @@ void ppc_tlb_invalidate_one (CPUPPCState *env, 
 target_ulong addr);
  
  void store_fpscr(CPUPPCState *env, uint64_t arg, uint32_t mask);
  
 +static inline uint32_t ppc_get_crf(const CPUPPCState *env, int i)
 +{
 +return env-crf[i];
 +}
 +
 +static inline void ppc_set_crf(CPUPPCState *env, int i, uint32_t val)
 +{
 +env-crf[i] = val;
 +}
 +
  static inline uint64_t ppc_dump_gpr(CPUPPCState *env, int gprn)
  {
  uint64_t gprv;
 diff --git a/target-ppc/excp_helper.c b/target-ppc/excp_helper.c
 index bf25d44..522fce4 100644
 --- a/target-ppc/excp_helper.c
 +++ b/target-ppc/excp_helper.c
 @@ -504,7 +504,7 @@ static inline void powerpc_excp(PowerPCCPU *cpu, int 
 excp_model, int excp)
   env-error_code);
  }
  #endif
 -msr |= env-crf[0]  28;
 +msr |= ppc_get_crf(env, 0)  28;
  msr |= env-error_code; /* key, D/I, S/L bits */
  /* Set way using a LRU mechanism */
  msr |= ((env-last_way + 1)  (env-nb_ways - 1))  17;
 diff --git a/target-ppc/fpu_helper.c b/target-ppc/fpu_helper.c
 index 0fe006a..1ccbcf3 100644
 --- a/target-ppc/fpu_helper.c
 +++ b/target-ppc/fpu_helper.c
 @@ -1099,7 +1099,8 @@ void helper_fcmpu(CPUPPCState *env, uint64_t arg1, 
 uint64_t arg2,
  
  env-fpscr = ~(0x0F  FPSCR_FPRF);
  env-fpscr |= (0x01  FPSCR_FPRF)  ret;
 -env-crf[crfD] = (1  ret);
 +ppc_set_crf(env, crfD, 1  ret);
 +
  if (unlikely(ret == CRF_SO
(float64_is_signaling_nan(farg1.d) ||
   float64_is_signaling_nan(farg2.d {
 @@ -1130,7 +1131,8 @@ void helper_fcmpo(CPUPPCState *env, uint64_t arg1, 
 uint64_t arg2,
  
  env-fpscr = ~(0x0F  FPSCR_FPRF);
  env-fpscr |= (0x01  FPSCR_FPRF)  ret;
 -env-crf[crfD] = (1  ret);
 +ppc_set_crf(env, crfD, 1  ret);
 +
  if (unlikely(ret == CRF_SO)) {
  if

Re: [Qemu-devel] [Qemu-ppc] [PULL 00/52] ppc patch queue 2014-09-04

2014-09-04 Thread Tom Musta

On 9/4/2014 2:13 PM, Alexander Graf wrote:
 
 
 Am 04.09.2014 um 20:38 schrieb Peter Maydell peter.mayd...@linaro.org:

 On 4 September 2014 18:19, Alexander Graf ag...@suse.de wrote:
 Hi Peter,

 This is my current patch queue for ppc.  Please pull.

 Hi. I'm afraid this doesn't build (x86, 64bit, gcc
 4.8.2, non-debug build):

  CCppc64abi32-linux-user/linux-user/signal.o
 /home/petmay01/linaro/qemu-for-merges/linux-user/signal.c: In function
 ‘setup_frame’:
 /home/petmay01/linaro/qemu-for-merges/linux-user/signal.c:4698:5:
 error: right shift count = width of type [-Werror]
 __put_user(set-sig[0]  32, sc-_unused[3]);
 ^
 cc1: all warnings being treated as errors
 
 Ouch. I guess we're missing ppc64abi32 in travis then - it never complained 
 here ;).
 
 Will fix in a few hours and resend.
 
 
 Alex
 

 thanks
 -- PMM
 

I am thinking that the way to fix this is by amending patch 5/52 () with this 
change

diff --git a/linux-user/signal.c b/linux-user/signal.c
index a227213..edecd76 100644
--- a/linux-user/signal.c
+++ b/linux-user/signal.c
@@ -4682,7 +4682,7 @@ static void setup_frame(int sig, struct target_sigaction 
*ka,

 __put_user(ka-_sa_handler, sc-handler);
 __put_user(set-sig[0], sc-oldmask);
-#if defined(TARGET_PPC64)
+#if TARGET_ABI_BITS == 64
 __put_user(set-sig[0]  32, sc-_unused[3]);
 #else
 __put_user(set-sig[1], sc-_unused[3]);

Re: [Qemu-devel] [PATCH 04/17] ppc: use ARRAY_SIZE in gdbstub.c

2014-09-03 Thread Tom Musta

On 8/28/2014 12:15 PM, Paolo Bonzini wrote:
 Match the idiom used by linux-user/signal.c and
 linux-user/elfload.c.
 
 Signed-off-by: Paolo Bonzini pbonz...@redhat.com
 ---
  target-ppc/gdbstub.c | 4 ++--
  1 file changed, 2 insertions(+), 2 deletions(-)
 
 diff --git a/target-ppc/gdbstub.c b/target-ppc/gdbstub.c
 index 14675f4..bad49ae 100644
 --- a/target-ppc/gdbstub.c
 +++ b/target-ppc/gdbstub.c
 @@ -138,7 +138,7 @@ int ppc_cpu_gdb_read_register(CPUState *cs, uint8_t 
 *mem_buf, int n)
  {
  uint32_t cr = 0;
  int i;
 -for (i = 0; i  8; i++) {
 +for (i = 0; i  ARRAY_SIZE(env-crf); i++) {
  cr |= env-crf[i]  (32 - ((i + 1) * 4));
  }
  gdb_get_reg32(mem_buf, cr);
 @@ -246,7 +246,7 @@ int ppc_cpu_gdb_write_register(CPUState *cs, uint8_t 
 *mem_buf, int n)
  {
  uint32_t cr = ldl_p(mem_buf);
  int i;
 -for (i = 0; i  8; i++) {
 +for (i = 0; i  ARRAY_SIZE(env-crf); i++) {
  env-crf[i] = (cr  (32 - ((i + 1) * 4)))  0xF;
  }
  break;
 

Since the same code appears in 3 different places, would it be better to 
implement a reusable function in target-ppc/cpu.h?

I.e.:

static inline uint32_t ppc_get_cr(const CPUPPCState *env) {
uint32_t cr = 0;
for (i = 0; i  ARRAY_SIZE(env-crf); i++) {
cr |= ppc_get_crf(env, i)  (32 - ((i + 1) * 4));
}
return cr;
}

Re: [Qemu-devel] [PATCH 06/17] ppc: use CRF_* in int_helper.c

2014-09-03 Thread Tom Musta

On 8/28/2014 12:15 PM, Paolo Bonzini wrote:
 Signed-off-by: Paolo Bonzini pbonz...@redhat.com
 ---
  target-ppc/int_helper.c | 12 ++--
  1 file changed, 6 insertions(+), 6 deletions(-)
 
 diff --git a/target-ppc/int_helper.c b/target-ppc/int_helper.c
 index f6e8846..9c1c5cd 100644
 --- a/target-ppc/int_helper.c
 +++ b/target-ppc/int_helper.c
 @@ -2303,25 +2303,25 @@ uint32_t helper_bcdadd(ppc_avr_t *r,  ppc_avr_t *a, 
 ppc_avr_t *b, uint32_t ps)
  if (sgna == sgnb) {
  result.u8[BCD_DIG_BYTE(0)] = bcd_preferred_sgn(sgna, ps);
  zero = bcd_add_mag(result, a, b, invalid, overflow);
 -cr = (sgna  0) ? 4 : 8;
 +cr = (sgna  0) ? 1  CRF_GT : 1  CRF_LT;
  } else if (bcd_cmp_mag(a, b)  0) {
  result.u8[BCD_DIG_BYTE(0)] = bcd_preferred_sgn(sgna, ps);
  zero = bcd_sub_mag(result, a, b, invalid, overflow);
 -cr = (sgna  0) ? 4 : 8;
 +cr = (sgna  0) ? 1  CRF_GT : 1  CRF_LT;
  } else {
  result.u8[BCD_DIG_BYTE(0)] = bcd_preferred_sgn(sgnb, ps);
  zero = bcd_sub_mag(result, b, a, invalid, overflow);
 -cr = (sgnb  0) ? 4 : 8;
 +cr = (sgnb  0) ? 1  CRF_GT : 1  CRF_LT;
  }
  }
  
  if (unlikely(invalid)) {
  result.u64[HI_IDX] = result.u64[LO_IDX] = -1;
 -cr = 1;
 +cr = 1  CRF_SO;
  } else if (overflow) {
 -cr |= 1;
 +cr |= 1  CRF_SO;
  } else if (zero) {
 -cr = 2;
 +cr = 1  CRF_EQ;
  }
  
  *r = result;
 

Reviewed-by: Tom Musta tommu...@gmail.com
Tested-by: Tom Musta tommu...@gmail.com

Re: [Qemu-devel] [PATCH 07/17] ppc: fix result of DLMZB when no zero bytes are found

2014-09-03 Thread Tom Musta

On 8/28/2014 12:15 PM, Paolo Bonzini wrote:
 It must return 8 and place 8 in XER, but the current code uses
 i directly which is 9 at this point of the code.
 
 Signed-off-by: Paolo Bonzini pbonz...@redhat.com
 ---
  target-ppc/int_helper.c | 1 +
  1 file changed, 1 insertion(+)
 
 diff --git a/target-ppc/int_helper.c b/target-ppc/int_helper.c
 index 9c1c5cd..7955bf7 100644
 --- a/target-ppc/int_helper.c
 +++ b/target-ppc/int_helper.c
 @@ -2573,6 +2573,7 @@ target_ulong helper_dlmzb(CPUPPCState *env, 
 target_ulong high,
  }
  i++;
  }
 +i = 8;
  if (update_Rc) {
  env-crf[0] = 0x2;
  }
 

Reviewed-by: Tom Musta tommu...@gmail.com

Re: [Qemu-devel] [PATCH 05/17] ppc: use CRF_* in fpu_helper.c

2014-09-03 Thread Tom Musta

On 8/28/2014 12:15 PM, Paolo Bonzini wrote:
 Signed-off-by: Paolo Bonzini pbonz...@redhat.com
 ---
  target-ppc/fpu_helper.c | 32 
  1 file changed, 16 insertions(+), 16 deletions(-)
 
 diff --git a/target-ppc/fpu_helper.c b/target-ppc/fpu_helper.c
 index da93d12..0fe006a 100644
 --- a/target-ppc/fpu_helper.c
 +++ b/target-ppc/fpu_helper.c
 @@ -1043,7 +1043,7 @@ uint32_t helper_ftdiv(uint64_t fra, uint64_t frb)
  }
  }
  
 -return 0x8 | (fg_flag ? 4 : 0) | (fe_flag ? 2 : 0);
 +return (1  CRF_LT) | (fg_flag  CRF_GT) | (fe_flag  CRF_EQ);
  }
  
  uint32_t helper_ftsqrt(uint64_t frb)
 @@ -1074,7 +1074,7 @@ uint32_t helper_ftsqrt(uint64_t frb)
  }
  }
  
 -return 0x8 | (fg_flag ? 4 : 0) | (fe_flag ? 2 : 0);
 +return (1  CRF_LT) | (fg_flag  CRF_GT) | (fe_flag  CRF_EQ);
  }
  
  void helper_fcmpu(CPUPPCState *env, uint64_t arg1, uint64_t arg2,
 @@ -1088,19 +1088,19 @@ void helper_fcmpu(CPUPPCState *env, uint64_t arg1, 
 uint64_t arg2,
  
  if (unlikely(float64_is_any_nan(farg1.d) ||
   float64_is_any_nan(farg2.d))) {
 -ret = 0x01UL;
 +ret = CRF_SO;
  } else if (float64_lt(farg1.d, farg2.d, env-fp_status)) {
 -ret = 0x08UL;
 +ret = CRF_LT;
  } else if (!float64_le(farg1.d, farg2.d, env-fp_status)) {
 -ret = 0x04UL;
 +ret = CRF_GT;
  } else {
 -ret = 0x02UL;
 +ret = CRF_EQ;
  }
  
  env-fpscr = ~(0x0F  FPSCR_FPRF);
 -env-fpscr |= ret  FPSCR_FPRF;
 -env-crf[crfD] = ret;
 -if (unlikely(ret == 0x01UL
 +env-fpscr |= (0x01  FPSCR_FPRF)  ret;
 +env-crf[crfD] = (1  ret);
 +if (unlikely(ret == CRF_SO
(float64_is_signaling_nan(farg1.d) ||
   float64_is_signaling_nan(farg2.d {
  /* sNaN comparison */
 @@ -1119,19 +1119,19 @@ void helper_fcmpo(CPUPPCState *env, uint64_t arg1, 
 uint64_t arg2,
  
  if (unlikely(float64_is_any_nan(farg1.d) ||
   float64_is_any_nan(farg2.d))) {
 -ret = 0x01UL;
 +ret = CRF_SO;
  } else if (float64_lt(farg1.d, farg2.d, env-fp_status)) {
 -ret = 0x08UL;
 +ret = CRF_LT;
  } else if (!float64_le(farg1.d, farg2.d, env-fp_status)) {
 -ret = 0x04UL;
 +ret = CRF_GT;
  } else {
 -ret = 0x02UL;
 +ret = CRF_EQ;
  }
  
  env-fpscr = ~(0x0F  FPSCR_FPRF);
 -env-fpscr |= ret  FPSCR_FPRF;
 -env-crf[crfD] = ret;
 -if (unlikely(ret == 0x01UL)) {
 +env-fpscr |= (0x01  FPSCR_FPRF)  ret;
 +env-crf[crfD] = (1  ret);
 +if (unlikely(ret == CRF_SO)) {
  if (float64_is_signaling_nan(farg1.d) ||
  float64_is_signaling_nan(farg2.d)) {
  /* sNaN comparison */
 

I like this patch.

Nit: for the fcmp* functions, ret is not a very good name for the variable.  
Since this is a cleanup patch, I would suggest renaming it to fpcc.

Other than that ...

Reviewed-by: Tom Musta tommu...@gmail.com
Tested-by: Tom Musta tommu...@gmail.com

Re: [Qemu-devel] [PATCH 08/17] ppc: introduce helpers for mfocrf/mtocrf

2014-09-03 Thread Tom Musta

On 8/28/2014 12:15 PM, Paolo Bonzini wrote:
 Signed-off-by: Paolo Bonzini pbonz...@redhat.com
 ---
  target-ppc/helper.h |  3 +++
  target-ppc/int_helper.c | 22 ++
  target-ppc/translate.c  | 31 ---
  3 files changed, 29 insertions(+), 27 deletions(-)
 
 diff --git a/target-ppc/helper.h b/target-ppc/helper.h
 index 509eae5..5342f13 100644
 --- a/target-ppc/helper.h
 +++ b/target-ppc/helper.h
 @@ -60,6 +60,9 @@ DEF_HELPER_2(fpscr_setbit, void, env, i32)
  DEF_HELPER_2(float64_to_float32, i32, env, i64)
  DEF_HELPER_2(float32_to_float64, i64, env, i32)
  
 +DEF_HELPER_1(mfocrf, tl, env)
 +DEF_HELPER_3(mtocrf, void, env, tl, i32)
 +
  DEF_HELPER_4(fcmpo, void, env, i64, i64, i32)
  DEF_HELPER_4(fcmpu, void, env, i64, i64, i32)
  
 diff --git a/target-ppc/int_helper.c b/target-ppc/int_helper.c
 index 7955bf7..5fa10c7 100644
 --- a/target-ppc/int_helper.c
 +++ b/target-ppc/int_helper.c
 @@ -306,6 +306,28 @@ target_ulong helper_popcntw(target_ulong val)
  }
  #endif
  
 +void helper_mtocrf(CPUPPCState *env, target_ulong cr, uint32_t mask)
 +{
 +int i;
 +for (i = 7; i = 0; i--) {
 +if (mask  1) {
 +env-crf[i] = cr  0x0F;
 +}
 +cr = 4;
 +mask = 1;
 +}
 +}

Use ARRAY_SIZE?

 +
 +target_ulong helper_mfocrf(CPUPPCState *env)
 +{
 +uint32_t cr = 0;
 +int i;
 +for (i = 0; i  8; i++) {
 +cr |= env-crf[i]  (32 - (i + 1) * 4);
 +}
 +return cr;
 +}
 +

Use ARRAY_SIZE?  Or better yet, reuse the utility that I recommended adding as 
part of patch 4.

  
 /*/
  /* PowerPC 601 specific instructions (POWER bridge) */
  target_ulong helper_div(CPUPPCState *env, target_ulong arg1, target_ulong 
 arg2)
 diff --git a/target-ppc/translate.c b/target-ppc/translate.c
 index 5a8267a..0a85a23 100644
 --- a/target-ppc/translate.c
 +++ b/target-ppc/translate.c
 @@ -4145,24 +4145,7 @@ static void gen_mfcr(DisasContext *ctx)
  cpu_gpr[rD(ctx-opcode)], crn * 4);
  }
  } else {
 -TCGv_i32 t0 = tcg_temp_new_i32();
 -tcg_gen_mov_i32(t0, cpu_crf[0]);
 -tcg_gen_shli_i32(t0, t0, 4);
 -tcg_gen_or_i32(t0, t0, cpu_crf[1]);
 -tcg_gen_shli_i32(t0, t0, 4);
 -tcg_gen_or_i32(t0, t0, cpu_crf[2]);
 -tcg_gen_shli_i32(t0, t0, 4);
 -tcg_gen_or_i32(t0, t0, cpu_crf[3]);
 -tcg_gen_shli_i32(t0, t0, 4);
 -tcg_gen_or_i32(t0, t0, cpu_crf[4]);
 -tcg_gen_shli_i32(t0, t0, 4);
 -tcg_gen_or_i32(t0, t0, cpu_crf[5]);
 -tcg_gen_shli_i32(t0, t0, 4);
 -tcg_gen_or_i32(t0, t0, cpu_crf[6]);
 -tcg_gen_shli_i32(t0, t0, 4);
 -tcg_gen_or_i32(t0, t0, cpu_crf[7]);
 -tcg_gen_extu_i32_tl(cpu_gpr[rD(ctx-opcode)], t0);
 -tcg_temp_free_i32(t0);
 +gen_helper_mfocrf(cpu_gpr[rD(ctx-opcode)], cpu_env);
  }
  }
  
 @@ -4257,15 +4240,9 @@ static void gen_mtcrf(DisasContext *ctx)
  tcg_temp_free_i32(temp);
  }
  } else {
 -TCGv_i32 temp = tcg_temp_new_i32();
 -tcg_gen_trunc_tl_i32(temp, cpu_gpr[rS(ctx-opcode)]);
 -for (crn = 0 ; crn  8 ; crn++) {
 -if (crm  (1  crn)) {
 -tcg_gen_shri_i32(cpu_crf[7 - crn], temp, crn * 4);
 -tcg_gen_andi_i32(cpu_crf[7 - crn], cpu_crf[7 - crn], 
 0xf);
 -}
 -}
 -tcg_temp_free_i32(temp);
 +TCGv_i32 t0 = tcg_const_i32(crm);
 +gen_helper_mtocrf(cpu_env, cpu_gpr[rS(ctx-opcode)], t0);
 +tcg_temp_free_i32(t0);
  }
  }

Re: [Qemu-devel] [PATCH 03/17] ppc: fix monitor access to CR

2014-09-03 Thread Tom Musta

On 8/28/2014 12:14 PM, Paolo Bonzini wrote:
 This was off-by-one.
 
 Signed-off-by: Paolo Bonzini pbonz...@redhat.com
 ---
  monitor.c | 2 +-
  1 file changed, 1 insertion(+), 1 deletion(-)
 
 diff --git a/monitor.c b/monitor.c
 index 34cee74..ec73dd4 100644
 --- a/monitor.c
 +++ b/monitor.c
 @@ -2968,7 +2968,7 @@ static target_long monitor_get_ccr (const struct 
 MonitorDef *md, int val)
  
  u = 0;
  for (i = 0; i  8; i++)
 -u |= env-crf[i]  (32 - (4 * i));
 +u |= env-crf[i]  (32 - (4 * (i + 1)));
  
  return u;
  }
 

Reviewed-by: Tom Musta tommu...@gmail.com

Re: [Qemu-devel] [PATCH 09/17] ppc: reorganize gen_compute_fprf

2014-09-03 Thread Tom Musta

On 8/28/2014 12:15 PM, Paolo Bonzini wrote:
 Signed-off-by: Paolo Bonzini pbonz...@redhat.com
 ---
  target-ppc/translate.c | 22 ++
  1 file changed, 10 insertions(+), 12 deletions(-)
 
 diff --git a/target-ppc/translate.c b/target-ppc/translate.c
 index 0a85a23..afbd336 100644
 --- a/target-ppc/translate.c
 +++ b/target-ppc/translate.c
 @@ -253,21 +253,19 @@ static inline void gen_compute_fprf(TCGv_i64 arg, int 
 set_fprf, int set_rc)
  {
  TCGv_i32 t0 = tcg_temp_new_i32();
  
 -if (set_fprf != 0) {
 -/* This case might be optimized later */
 -tcg_gen_movi_i32(t0, 1);
 -gen_helper_compute_fprf(t0, cpu_env, arg, t0);
 -if (unlikely(set_rc)) {
 -tcg_gen_mov_i32(cpu_crf[1], t0);
 -}
 -gen_helper_float_check_status(cpu_env);
 -} else if (unlikely(set_rc)) {
 -/* We always need to compute fpcc */
 -tcg_gen_movi_i32(t0, 0);
 -gen_helper_compute_fprf(t0, cpu_env, arg, t0);
 +if (set_fprf == 0  !set_rc) {
 +return;
 +}
 +
 +tcg_gen_movi_i32(t0, set_fprf != 0);
 +gen_helper_compute_fprf(t0, cpu_env, arg, t0);
 +if (set_rc) {
  tcg_gen_mov_i32(cpu_crf[1], t0);
  }
  
 +if (set_fprf != 0) {
 +gen_helper_float_check_status(cpu_env);
 +}
  tcg_temp_free_i32(t0);
  }
  
 

This has a leak:

Opcode 3f 07 12 (fc00048e) leaked temporaries

I made this modification on top of your patch to fix it:

diff --git a/target-ppc/translate.c b/target-ppc/translate.c
index 0911c18..ff9b966 100644
--- a/target-ppc/translate.c
+++ b/target-ppc/translate.c
@@ -251,12 +251,13 @@ static inline void gen_reset_fpstatus(void)

 static inline void gen_compute_fprf(TCGv_i64 arg, int set_fprf, int set_rc)
 {
-TCGv_i32 t0 = tcg_temp_new_i32();
+TCGv_i32 t0;

 if (set_fprf == 0  !set_rc) {
 return;
 }

+t0 = tcg_temp_new_i32();
 tcg_gen_movi_i32(t0, set_fprf != 0);
 gen_helper_compute_fprf(t0, cpu_env, arg, t0);
 if (set_rc) {

Re: [Qemu-devel] [PATCH 10/17] ppc: introduce gen_op_mfcr/gen_op_mtcr

2014-09-03 Thread Tom Musta

On 8/28/2014 12:15 PM, Paolo Bonzini wrote:
 Signed-off-by: Paolo Bonzini pbonz...@redhat.com

This patch does not compile for 64 bit targets when TCG debug is enabled -- 
there are several places in this patch that need to be more explicit about the 
i32-ness of variables.  There is also a leak of temporaries in mfcr.  Details 
are below.
 ---
  target-ppc/translate.c | 60 
 +++---
  1 file changed, 42 insertions(+), 18 deletions(-)
 
 diff --git a/target-ppc/translate.c b/target-ppc/translate.c
 index afbd336..8def0ae 100644
 --- a/target-ppc/translate.c
 +++ b/target-ppc/translate.c
 @@ -249,6 +249,21 @@ static inline void gen_reset_fpstatus(void)
  gen_helper_reset_fpstatus(cpu_env);
  }
  
 +static inline void gen_op_mfcr(TCGv dest, int first_cr, int shift)

  -- TCGv_i32 dest

 +{
 +tcg_gen_shli_i32(dest, cpu_crf[first_cr  2], shift);
 +}
 +
 +static inline void gen_op_mtcr(int first_cr, TCGv src, int shift)

 - TCGv_i32 src
 +{
 +if (shift) {
 +tcg_gen_shri_i32(cpu_crf[first_cr  2], src, shift);
 +tcg_gen_andi_i32(cpu_crf[first_cr  2], cpu_crf[first_cr  2], 
 0x0F);
 +} else {
 +tcg_gen_andi_i32(cpu_crf[first_cr  2], src, 0x0F);
 +}
 +}
 +
  static inline void gen_compute_fprf(TCGv_i64 arg, int set_fprf, int set_rc)
  {
  TCGv_i32 t0 = tcg_temp_new_i32();
 @@ -260,7 +275,7 @@ static inline void gen_compute_fprf(TCGv_i64 arg, int 
 set_fprf, int set_rc)
  tcg_gen_movi_i32(t0, set_fprf != 0);
  gen_helper_compute_fprf(t0, cpu_env, arg, t0);
  if (set_rc) {
 -tcg_gen_mov_i32(cpu_crf[1], t0);
 +gen_op_mtcr(4, t0, 0);
  }
  
  if (set_fprf != 0) {
 @@ -2428,6 +2443,7 @@ static void gen_fmrgow(DisasContext *ctx)
  static void gen_mcrfs(DisasContext *ctx)
  {
  TCGv tmp = tcg_temp_new();
 +TCGv_i32 tmp32 = tcg_temp_new_i32();
  int bfa;
  
  if (unlikely(!ctx-fpu_enabled)) {
 @@ -2436,10 +2452,11 @@ static void gen_mcrfs(DisasContext *ctx)
  }
  bfa = 4 * (7 - crfS(ctx-opcode));
  tcg_gen_shri_tl(tmp, cpu_fpscr, bfa);
 -tcg_gen_trunc_tl_i32(cpu_crf[crfD(ctx-opcode)], tmp);
 +tcg_gen_trunc_tl_i32(tmp32, tmp);
  tcg_temp_free(tmp);
 -tcg_gen_andi_i32(cpu_crf[crfD(ctx-opcode)], cpu_crf[crfD(ctx-opcode)], 
 0xf);
 +gen_op_mtcr(crfD(ctx-opcode)  2, tmp32, 0);
  tcg_gen_andi_tl(cpu_fpscr, cpu_fpscr, ~(0xF  bfa));
 +tcg_temp_free(tmp32);

  --  tcg_temp_free_i32(tmp32);

  }
  
  /* mffs */
 @@ -2474,8 +2491,10 @@ static void gen_mtfsb0(DisasContext *ctx)
  tcg_temp_free_i32(t0);
  }
  if (unlikely(Rc(ctx-opcode) != 0)) {
 -tcg_gen_trunc_tl_i32(cpu_crf[1], cpu_fpscr);
 -tcg_gen_shri_i32(cpu_crf[1], cpu_crf[1], FPSCR_OX);
 +TCGv_i32 tmp32 = tcg_temp_new_i32();
 +tcg_gen_trunc_tl_i32(tmp32, cpu_fpscr);
 +gen_op_mtcr(4, tmp32, FPSCR_OX);
 +tcg_temp_free_i32(tmp32);
  }
  }
  
 @@ -2500,8 +2519,10 @@ static void gen_mtfsb1(DisasContext *ctx)
  tcg_temp_free_i32(t0);
  }
  if (unlikely(Rc(ctx-opcode) != 0)) {
 -tcg_gen_trunc_tl_i32(cpu_crf[1], cpu_fpscr);
 -tcg_gen_shri_i32(cpu_crf[1], cpu_crf[1], FPSCR_OX);
 +TCGv_i32 tmp32 = tcg_temp_new_i32();
 +tcg_gen_trunc_tl_i32(tmp32, cpu_fpscr);
 +gen_op_mtcr(4, tmp32, FPSCR_OX);
 +tcg_temp_free_i32(tmp32);
  }
  /* We can raise a differed exception */
  gen_helper_float_check_status(cpu_env);
 @@ -2535,8 +2556,10 @@ static void gen_mtfsf(DisasContext *ctx)
  gen_helper_store_fpscr(cpu_env, cpu_fpr[rB(ctx-opcode)], t0);
  tcg_temp_free_i32(t0);
  if (unlikely(Rc(ctx-opcode) != 0)) {
 -tcg_gen_trunc_tl_i32(cpu_crf[1], cpu_fpscr);
 -tcg_gen_shri_i32(cpu_crf[1], cpu_crf[1], FPSCR_OX);
 +TCGv_i32 tmp32 = tcg_temp_new_i32();
 +tcg_gen_trunc_tl_i32(tmp32, cpu_fpscr);
 +gen_op_mtcr(4, tmp32, FPSCR_OX);
 +tcg_temp_free_i32(tmp32);
  }
  /* We can raise a differed exception */
  gen_helper_float_check_status(cpu_env);
 @@ -2569,8 +2592,10 @@ static void gen_mtfsfi(DisasContext *ctx)
  tcg_temp_free_i64(t0);
  tcg_temp_free_i32(t1);
  if (unlikely(Rc(ctx-opcode) != 0)) {
 -tcg_gen_trunc_tl_i32(cpu_crf[1], cpu_fpscr);
 -tcg_gen_shri_i32(cpu_crf[1], cpu_crf[1], FPSCR_OX);
 +TCGv_i32 tmp32 = tcg_temp_new_i32();
 +tcg_gen_trunc_tl_i32(tmp32, cpu_fpscr);
 +gen_op_mtcr(4, tmp32, FPSCR_OX);
 +tcg_temp_free_i32(tmp32);
  }
  /* We can raise a differed exception */
  gen_helper_float_check_status(cpu_env);
 @@ -4137,10 +4162,10 @@ static void gen_mfcr(DisasContext *ctx)
  if (likely(ctx-opcode  0x0010)) {
  crm = CRM(ctx-opcode);
  if (likely(crm  ((crm  (crm - 1)) == 0))) {
 +TCGv_i32 t0 = tcg_temp_new_i32();

Re: [Qemu-devel] [PATCH 12/17] ppc: use movcond for isel

2014-09-03 Thread Tom Musta

On 8/28/2014 12:15 PM, Paolo Bonzini wrote:
 Signed-off-by: Paolo Bonzini pbonz...@redhat.com
 ---
  target-ppc/translate.c | 23 +++
  1 file changed, 11 insertions(+), 12 deletions(-)
 
 diff --git a/target-ppc/translate.c b/target-ppc/translate.c
 index 67f13f7..48c7b66 100644
 --- a/target-ppc/translate.c
 +++ b/target-ppc/translate.c
 @@ -789,27 +789,26 @@ static void gen_cmpli(DisasContext *ctx)
  /* isel (PowerPC 2.03 specification) */
  static void gen_isel(DisasContext *ctx)
  {
 -int l1, l2;
  uint32_t bi = rC(ctx-opcode);
  uint32_t mask;
  TCGv_i32 t0;
 -
 -l1 = gen_new_label();
 -l2 = gen_new_label();
 +TCGv t1, true_op, zero;
  
  mask = 1  (3 - (bi  0x03));
  t0 = tcg_temp_new_i32();

This leaks t0 (never freed).

  tcg_gen_andi_i32(t0, cpu_crf[bi  2], mask);
 -tcg_gen_brcondi_i32(TCG_COND_EQ, t0, 0, l1);
 +t1 = tcg_temp_new();
 +tcg_gen_extu_i32_tl(t1, t0);
 +zero = tcg_const_tl(0);
  if (rA(ctx-opcode) == 0)
 -tcg_gen_movi_tl(cpu_gpr[rD(ctx-opcode)], 0);
 +true_op = zero;
  else
 -tcg_gen_mov_tl(cpu_gpr[rD(ctx-opcode)], cpu_gpr[rA(ctx-opcode)]);
 -tcg_gen_br(l2);
 -gen_set_label(l1);
 -tcg_gen_mov_tl(cpu_gpr[rD(ctx-opcode)], cpu_gpr[rB(ctx-opcode)]);
 -gen_set_label(l2);
 -tcg_temp_free_i32(t0);
 +true_op = cpu_gpr[rA(ctx-opcode)];
 +
 +tcg_gen_movcond_tl(cpu_gpr[rD(ctx-opcode)], t1, zero,
 +   true_op, cpu_gpr[rB(ctx-opcode)], TCG_COND_NE);

This doesnt compile for me ... the order of the arguments does not match what 
is defined in tcg-op.h.

 +tcg_temp_free_i32(t1);

Just tcg_temp_free(t1);

 +tcg_temp_free(zero);
  }
  
  /* cmpb: PowerPC 2.05 specification */

Re: [Qemu-devel] [PATCH 11/17] ppc: rename gen_set_cr6_from_fpscr

2014-09-03 Thread Tom Musta

On 8/28/2014 12:15 PM, Paolo Bonzini wrote:
 It sets CR1, not CR6 (and the spec agrees).
 
 Signed-off-by: Paolo Bonzini pbonz...@redhat.com
 ---
  target-ppc/translate.c | 14 +++---
  1 file changed, 7 insertions(+), 7 deletions(-)
 
 diff --git a/target-ppc/translate.c b/target-ppc/translate.c
 index 8def0ae..67f13f7 100644
 --- a/target-ppc/translate.c
 +++ b/target-ppc/translate.c
 @@ -8179,7 +8179,7 @@ static inline TCGv_ptr gen_fprp_ptr(int reg)
  }
  
  #if defined(TARGET_PPC64)
 -static void gen_set_cr6_from_fpscr(DisasContext *ctx)
 +static void gen_set_cr1_from_fpscr(DisasContext *ctx)
  {
  TCGv_i32 tmp = tcg_temp_new_i32();
  tcg_gen_trunc_tl_i32(tmp, cpu_fpscr);
 @@ -8187,7 +8187,7 @@ static void gen_set_cr6_from_fpscr(DisasContext *ctx)
  tcg_temp_free_i32(tmp);
  }
  #else
 -static void gen_set_cr6_from_fpscr(DisasContext *ctx)
 +static void gen_set_cr1_from_fpscr(DisasContext *ctx)
  {
  gen_op_mtcr(4, cpu_fpscr, 28);
  }
 @@ -8207,7 +8207,7 @@ static void gen_##name(DisasContext *ctx)\
  rb = gen_fprp_ptr(rB(ctx-opcode));  \
  gen_helper_##name(cpu_env, rd, ra, rb);  \
  if (unlikely(Rc(ctx-opcode) != 0)) {\
 -gen_set_cr6_from_fpscr(ctx); \
 +gen_set_cr1_from_fpscr(ctx); \
  }\
  tcg_temp_free_ptr(rd);   \
  tcg_temp_free_ptr(ra);   \
 @@ -8265,7 +8265,7 @@ static void gen_##name(DisasContext *ctx) \
  u32_2 = tcg_const_i32(u32f2(ctx-opcode));\
  gen_helper_##name(cpu_env, rt, rb, u32_1, u32_2); \
  if (unlikely(Rc(ctx-opcode) != 0)) { \
 -gen_set_cr6_from_fpscr(ctx);  \
 +gen_set_cr1_from_fpscr(ctx);  \
  } \
  tcg_temp_free_ptr(rt);\
  tcg_temp_free_ptr(rb);\
 @@ -8289,7 +8289,7 @@ static void gen_##name(DisasContext *ctx)\
  i32 = tcg_const_i32(i32fld(ctx-opcode));\
  gen_helper_##name(cpu_env, rt, ra, rb, i32); \
  if (unlikely(Rc(ctx-opcode) != 0)) {\
 -gen_set_cr6_from_fpscr(ctx); \
 +gen_set_cr1_from_fpscr(ctx); \
  }\
  tcg_temp_free_ptr(rt);   \
  tcg_temp_free_ptr(rb);   \
 @@ -8310,7 +8310,7 @@ static void gen_##name(DisasContext *ctx)\
  rb = gen_fprp_ptr(rB(ctx-opcode));  \
  gen_helper_##name(cpu_env, rt, rb);  \
  if (unlikely(Rc(ctx-opcode) != 0)) {\
 -gen_set_cr6_from_fpscr(ctx); \
 +gen_set_cr1_from_fpscr(ctx); \
  }\
  tcg_temp_free_ptr(rt);   \
  tcg_temp_free_ptr(rb);   \
 @@ -8331,7 +8331,7 @@ static void gen_##name(DisasContext *ctx)  \
  i32 = tcg_const_i32(i32fld(ctx-opcode));  \
  gen_helper_##name(cpu_env, rt, rs, i32);   \
  if (unlikely(Rc(ctx-opcode) != 0)) {  \
 -gen_set_cr6_from_fpscr(ctx);   \
 +gen_set_cr1_from_fpscr(ctx);   \
  }  \
  tcg_temp_free_ptr(rt); \
  tcg_temp_free_ptr(rs); \
 

Reviewed-by: Tom Musta tommu...@gmail.com
Tested-by: Tom Musta tommu...@gmail.com

Re: [Qemu-devel] [PATCH 13/17] ppc: compute mask from BI using right shift

2014-09-03 Thread Tom Musta

On 8/28/2014 12:15 PM, Paolo Bonzini wrote:
 This will match the code we use in fpu_helper.c when we flip
 CRF_* bit-endianness.
 
 Signed-off-by: Paolo Bonzini pbonz...@redhat.com
 ---
  target-ppc/translate.c | 6 +++---
  1 file changed, 3 insertions(+), 3 deletions(-)
 
 diff --git a/target-ppc/translate.c b/target-ppc/translate.c
 index 48c7b66..4ce7af4 100644
 --- a/target-ppc/translate.c
 +++ b/target-ppc/translate.c
 @@ -794,7 +794,7 @@ static void gen_isel(DisasContext *ctx)
  TCGv_i32 t0;
  TCGv t1, true_op, zero;
  
 -mask = 1  (3 - (bi  0x03));
 +mask = 0x08  (bi  0x03);
  t0 = tcg_temp_new_i32();
  tcg_gen_andi_i32(t0, cpu_crf[bi  2], mask);
  t1 = tcg_temp_new();
 @@ -3870,7 +3870,7 @@ static inline void gen_bcond(DisasContext *ctx, int 
 type)
  if ((bo  0x10) == 0) {
  /* Test CR */
  uint32_t bi = BI(ctx-opcode);
 -uint32_t mask = 1  (3 - (bi  0x03));
 +uint32_t mask = 0x08  (bi  0x03);
  TCGv_i32 temp = tcg_temp_new_i32();
  
  if (bo  0x8) {
 @@ -3952,7 +3952,7 @@ static void glue(gen_, name)(DisasContext *ctx)
  else 
  \
  tcg_gen_mov_i32(t1, cpu_crf[crbB(ctx-opcode)  2]);
  \
  tcg_op(t0, t0, t1);  
  \
 -bitmask = 1  (3 - (crbD(ctx-opcode)  0x03)); 
  \
 +bitmask = 0x08  (crbD(ctx-opcode)  0x03);
  \
  tcg_gen_andi_i32(t0, t0, bitmask);   
  \
  tcg_gen_andi_i32(t1, cpu_crf[crbD(ctx-opcode)  2], ~bitmask); 
  \
  tcg_gen_or_i32(cpu_crf[crbD(ctx-opcode)  2], t0, t1); 
  \
 

Reviewed-by: Tom Musta tommu...@gmail.com
Tested-by: Tom Musta tommu...@gmail.com

Re: [Qemu-devel] [Qemu-ppc] [PATCH 4/5] target-ppc: Handle ibm, nmi-register RTAS call

2014-08-28 Thread Tom Musta

On 8/28/2014 3:37 AM, Alexander Graf wrote:
 
 
 On 28.08.14 08:38, Aravinda Prasad wrote:


 On Wednesday 27 August 2014 04:07 PM, Alexander Graf wrote:


 On 25.08.14 15:45, Aravinda Prasad wrote:
 This patch adds FWNMI support in qemu for powerKVM
 guests by handling the ibm,nmi-register rtas call.
 Whenever OS issues ibm,nmi-register RTAS call, the
 machine check notification address is saved and the
 machine check interrupt vector 0x200 is patched to
 issue a private hcall.

 Signed-off-by: Aravinda Prasad aravi...@linux.vnet.ibm.com
 ---
  hw/ppc/spapr_rtas.c|   91 
 
  include/hw/ppc/spapr.h |8 
  2 files changed, 98 insertions(+), 1 deletion(-)

 diff --git a/hw/ppc/spapr_rtas.c b/hw/ppc/spapr_rtas.c
 index 02ddbf9..1135d2b 100644
 --- a/hw/ppc/spapr_rtas.c
 +++ b/hw/ppc/spapr_rtas.c
 @@ -277,6 +277,91 @@ static void rtas_ibm_set_system_parameter(PowerPCCPU 
 *cpu,
  rtas_st(rets, 0, ret);
  }
  
 +static void rtas_ibm_nmi_register(PowerPCCPU *cpu,
 +  sPAPREnvironment *spapr,
 +  uint32_t token, uint32_t nargs,
 +  target_ulong args,
 +  uint32_t nret, target_ulong rets)
 +{
 +int i;
 +uint32_t branch_inst = 0x4802;
 +target_ulong guest_machine_check_addr;
 +PowerPCCPUClass *pcc = POWERPC_CPU_GET_CLASS(cpu);
 +/*
 + * Trampoline saves r3 in sprg2 and issues private hcall
 + * to request qemu to build error log. QEMU builds the
 + * error log, copies to rtas-blob and returns the address.
 + * The initial 16 bytes in rtas-blob consists of saved srr0
 + * and srr1 which we restore and pass on the actual error
 + * log address to OS handled mcachine check notification
 + * routine
 + */
 +uint32_t trampoline[] = {
 +0x7c7243a6,/* mtspr   SPRN_SPRG2,r3 */
 +0x3860,/* li  r3,0   */
 +/* 0xf004 is the KVMPPC_H_REPORT_ERR private HCALL */
 +0x6063f004,/* ori r3,r3,f004  */
 +/* Issue H_CALL */
 +0x4422,/*  sc  1 */

 So up to here we're saving r3 in SPRG2 (how do we know that we can
 clobber it?) and call our special hypercall.

 But what does all the cruft below here do?

 The saved r3 in SPRG2 is consumed in KVMPPC_H_REPORT_ERR hcall, hence we
 can clobber SPRG2 after hcall returns. I have included a comment in
 patch 3/5 while building error log. I think better I add one here as well.


 +0x7c9243a6,/* mtspr r4 sprg2 */

 Apart from th fact that your order is wrong, this destroys the value of
 r3 that we saved above again.

 SPRG2 is saved inside hcall and hence we don't need SPRG2 further after
 KVMPPC_H_REPORT_ERR hcall returns.


 +0xe883,/* ld r4, 0(r3) */
 +0x7c9a03a6,/* mtspr r4, srr0 */
 +0xe8830008,/* ld r4, 8(r3) */
 +0x7c9b03a6,/* mtspr r4, srr1 */

 Can't we just set srr0 and srr1 directly?

 I checked for instructions in ISA which set srr0/1 directly given an
 address, but could not find any such instructions.
 
 I mean from QEMU :).
 


 +0x38630010,/* addi r3,r3,16 */
 +0x7c9242a6,/* mfspr r4 sprg2 */


 +0x4802,/* Branch to address registered
 +* by OS. The branch address is
 +* patched below */
 +0x4800,/* b . */
 +};
 +int total_inst = sizeof(trampoline) / sizeof(uint32_t);
 +
 +/* Store the system reset and machine check address */
 +guest_machine_check_addr = rtas_ld(args, 1);
 +
 +/* Safety Check */
 +if (sizeof(trampoline) = MC_INTERRUPT_VECTOR_SIZE) {
 +fprintf(stderr, Unable to register ibm,nmi_register: 
 +Trampoline size exceeded\n);
 +rtas_st(rets, 0, RTAS_OUT_NOT_SUPPORTED);
 +return;
 +}
 +
 +/*
 + * Update the branch instruction in trampoline with the absolute
 + * machine check address requested by OS
 + */
 +branch_inst |= guest_machine_check_addr;

 Does this even work? You're creating a relative branch here.

 We do an absolute branch here. guest_machine_check_addr contains the
 physical address of machine check handler. We update the branch
 instruction operand to do an absolute branch to this physical address.
 
 Where? I don't see the absolute bit anywhere.
 

Alex:

It is here:

+static void rtas_ibm_nmi_register(PowerPCCPU *cpu,
+  sPAPREnvironment *spapr,
+  uint32_t token, uint32_t nargs,
+  target_ulong args,
+  uint32_t nret, target_ulong rets)
+{
+int i;
+uint32_t branch_inst = 0x4802;
 ^
 ^

Re: [Qemu-devel] [RFT/RFH PATCH 00/16] PPC speedup patches for TCG

2014-08-28 Thread Tom Musta

On 8/28/2014 12:14 PM, Paolo Bonzini wrote:
 Hi everyone,
 
 these patches provide a speedup around 20% when running PPC softmmu
 emulation on x86 machines (10% for user-mode emulation).  There are
 actually two separate speedups here:
 
 * avoiding TLB flushing on every kernel-user transition (patches 1-2)
 
 * rewriting CR handling to use 32 1-bit registers instead of 8
   4-bit registers (patches 3-16)
 
 They must not be too shoddy; they boot a Linux guest fine. :) And the
 speedup is very interesting of course.  The three problems with it are:
 
 * I don't have a good testsuite.  So floating-point, decimal and SPE
   are mostly untested
 
 * I don't have much time to work on them (they are about a year old and
   I have just rebased them).
 
 * Patch 15 is a monster and hard to review, but I have no idea how to
   split it.
 
 Please take a look and if you are interested help in any way you can. :)

Paolo:  I will carve out some time to help with both testing and review.

 

[ ... ]

Re: [Qemu-devel] [Qemu-trivial] [PATCH] libdecnumber: Fix warnings from smatch (missing static, boolean operations)

2014-08-25 Thread Tom Musta

On 8/24/2014 4:42 AM, Stefan Weil wrote:
 Am 24.08.2014 11:21, schrieb Michael Tokarev:
 Applied to -trivial, thank you!

 But I've a small concern - should we really do this on external sources,
 and divirge from upstream needlessly?

 Thanks,

 /mjt
 
 In general, I agree. In this case, the code was part of gcc, and newer 
 versions of gcc use GPL 3 which is incompatible with QEMU, so I assume that 
 the code in QEMU is no longer available from a maintained upstream.
 
 Stefan
 
 
 

Yes.  We had to effectively fork a copy the code to deal with the license 
issues.

FWIW ... Alex has suggested a reformat of the libdecnumber code to make it 
compatible with QEMU formatting 
(http://lists.nongnu.org/archive/html/qemu-ppc/2014-05/msg00085.html).  This is 
on my todo list.  Obviously, such a reformat would make it even
harder to synchronize with upstream gcc.

[Qemu-devel] [PATCH 0/6] target-ppc: More Cleanup of FXU Instructions

2014-08-25 Thread Tom Musta

This series follows up my previous series of bug fixes to Power fixed point
instructions 
(http://lists.nongnu.org/archive/html/qemu-ppc/2014-08/msg00068.html).
Richard Henderson provided additional feedback after the patches had been taken
into Aleg Graf's ppc-next tree.

Tom Musta (6):
  target-ppc: Special Case of rlwimi Should Use Deposit
  target-ppc: Optimize rlwinm MB=0 ME=31
  target-ppc: Optimize rlwnm MB=0 ME=31
  target-ppc: Clean Up mullw
  target-ppc: Clean up mullwo
  target-ppc: Implement mulldo with TCG

 target-ppc/helper.h |1 -
 target-ppc/int_helper.c |   27 
 target-ppc/translate.c  |  103 ---
 3 files changed, 62 insertions(+), 69 deletions(-)

[Qemu-devel] [PATCH 1/6] target-ppc: Special Case of rlwimi Should Use Deposit

2014-08-25 Thread Tom Musta

The special case of rlwimi where MB = ME and SH = 31-ME can be implemented
with a single TCG deposit operation.  This replaces the less general case
of SH = MB = 0 and ME = 31.

Signed-off-by: Tom Musta tommu...@gmail.com
Suggested-by: Richard Henderson r...@twiddle.net
---
 target-ppc/translate.c |9 +++--
 1 files changed, 3 insertions(+), 6 deletions(-)

diff --git a/target-ppc/translate.c b/target-ppc/translate.c
index 47dc903..095b83c 100644
--- a/target-ppc/translate.c
+++ b/target-ppc/translate.c
@@ -1636,12 +1636,9 @@ static void gen_rlwimi(DisasContext *ctx)
 mb = MB(ctx-opcode);
 me = ME(ctx-opcode);
 sh = SH(ctx-opcode);
-if (likely(sh == 0  mb == 0  me == 31)) {
-#if defined(TARGET_PPC64)
-tcg_gen_mov_i64(cpu_gpr[rA(ctx-opcode)], cpu_gpr[rS(ctx-opcode)]);
-#else
-tcg_gen_ext32u_tl(cpu_gpr[rA(ctx-opcode)], cpu_gpr[rS(ctx-opcode)]);
-#endif
+if (likely(sh == (31-me)  mb = me)) {
+tcg_gen_deposit_tl(cpu_gpr[rA(ctx-opcode)], cpu_gpr[rA(ctx-opcode)],
+   cpu_gpr[rS(ctx-opcode)], sh, me - mb + 1);
 } else {
 target_ulong mask;
 TCGv t1;
-- 
1.7.1

[Qemu-devel] [PATCH 2/6] target-ppc: Optimize rlwinm MB=0 ME=31

2014-08-25 Thread Tom Musta

Optimize the special case of rlwinm where MB=0 and ME=31.  This can
be implemented as a 32-bit ROTL.

Signed-off-by: Tom Musta tommu...@gmail.com
Suggested-by: Richard Henderson r...@twiddle.net
---
 target-ppc/translate.c |6 ++
 1 files changed, 6 insertions(+), 0 deletions(-)

diff --git a/target-ppc/translate.c b/target-ppc/translate.c
index 095b83c..889e37d 100644
--- a/target-ppc/translate.c
+++ b/target-ppc/translate.c
@@ -1691,6 +1691,12 @@ static void gen_rlwinm(DisasContext *ctx)
 tcg_gen_shri_tl(t0, t0, mb);
 tcg_gen_ext32u_tl(cpu_gpr[rA(ctx-opcode)], t0);
 tcg_temp_free(t0);
+} else if (likely(mb == 0  me == 31)) {
+TCGv_i32 t0 = tcg_temp_new_i32();
+tcg_gen_trunc_tl_i32(t0, cpu_gpr[rS(ctx-opcode)]);
+tcg_gen_rotli_i32(t0, t0, sh);
+tcg_gen_extu_i32_tl(cpu_gpr[rA(ctx-opcode)], t0);
+tcg_temp_free_i32(t0);
 } else {
 TCGv t0 = tcg_temp_new();
 #if defined(TARGET_PPC64)
-- 
1.7.1

[Qemu-devel] [PATCH 6/6] target-ppc: Implement mulldo with TCG

2014-08-25 Thread Tom Musta

Optimize mulldo by using the muls2_i64 operation rather than a helper.  
Eliminate
the obsolete helper code.

Signed-off-by: Tom Musta tommu...@gmail.com
Suggested-by: Richard Henderson r...@twiddle.net
---
 target-ppc/helper.h |1 -
 target-ppc/int_helper.c |   27 ---
 target-ppc/translate.c  |   16 ++--
 3 files changed, 14 insertions(+), 30 deletions(-)

diff --git a/target-ppc/helper.h b/target-ppc/helper.h
index 509eae5..0cfdc8a 100644
--- a/target-ppc/helper.h
+++ b/target-ppc/helper.h
@@ -28,7 +28,6 @@ DEF_HELPER_2(icbi, void, env, tl)
 DEF_HELPER_5(lscbx, tl, env, tl, i32, i32, i32)
 
 #if defined(TARGET_PPC64)
-DEF_HELPER_3(mulldo, i64, env, i64, i64)
 DEF_HELPER_4(divdeu, i64, env, i64, i64, i32)
 DEF_HELPER_4(divde, i64, env, i64, i64, i32)
 #endif
diff --git a/target-ppc/int_helper.c b/target-ppc/int_helper.c
index e5b103b..713d777 100644
--- a/target-ppc/int_helper.c
+++ b/target-ppc/int_helper.c
@@ -24,33 +24,6 @@
 #include helper_regs.h
 /*/
 /* Fixed point operations helpers */
-#if defined(TARGET_PPC64)
-
-uint64_t helper_mulldo(CPUPPCState *env, uint64_t arg1, uint64_t arg2)
-{
-int64_t th;
-uint64_t tl;
-
-muls64(tl, (uint64_t *)th, arg1, arg2);
-
-/* th should either contain all 1 bits or all 0 bits and should
- * match the sign bit of tl; otherwise we have overflowed. */
-
-if ((int64_t)tl  0) {
-if (likely(th == -1LL)) {
-env-ov = 0;
-} else {
-env-so = env-ov = 1;
-}
-} else if (likely(th == 0LL)) {
-env-ov = 0;
-} else {
-env-so = env-ov = 1;
-}
-
-return (int64_t)tl;
-}
-#endif
 
 target_ulong helper_divweu(CPUPPCState *env, target_ulong ra, target_ulong rb,
uint32_t oe)
diff --git a/target-ppc/translate.c b/target-ppc/translate.c
index 1062634..d03daea 100644
--- a/target-ppc/translate.c
+++ b/target-ppc/translate.c
@@ -1215,8 +1215,20 @@ static void gen_mulld(DisasContext *ctx)
 /* mulldo  mulldo. */
 static void gen_mulldo(DisasContext *ctx)
 {
-gen_helper_mulldo(cpu_gpr[rD(ctx-opcode)], cpu_env,
-  cpu_gpr[rA(ctx-opcode)], cpu_gpr[rB(ctx-opcode)]);
+TCGv_i64 t0 = tcg_temp_new_i64();
+TCGv_i64 t1 = tcg_temp_new_i64();
+
+tcg_gen_muls2_i64(t0, t1, cpu_gpr[rA(ctx-opcode)],
+  cpu_gpr[rB(ctx-opcode)]);
+tcg_gen_mov_i64(cpu_gpr[rD(ctx-opcode)], t0);
+
+tcg_gen_sari_i64(t0, t0, 63);
+tcg_gen_setcond_i64(TCG_COND_NE, cpu_ov, t0, t1);
+tcg_gen_or_tl(cpu_so, cpu_so, cpu_ov);
+
+tcg_temp_free_i64(t0);
+tcg_temp_free_i64(t1);
+
 if (unlikely(Rc(ctx-opcode) != 0)) {
 gen_set_Rc0(ctx, cpu_gpr[rD(ctx-opcode)]);
 }
-- 
1.7.1

[Qemu-devel] [PATCH 3/6] target-ppc: Optimize rlwnm MB=0 ME=31

2014-08-25 Thread Tom Musta

Optimize the special case of rlwnm where MB=0 and ME=31.  This can
be implemented using a ROTL.

Suggested-by: Richard Henderson r...@twiddle.net
Signed-off-by: Tom Musta tommu...@gmail.com
---
 target-ppc/translate.c |   56 +--
 1 files changed, 34 insertions(+), 22 deletions(-)

diff --git a/target-ppc/translate.c b/target-ppc/translate.c
index 889e37d..57cb381 100644
--- a/target-ppc/translate.c
+++ b/target-ppc/translate.c
@@ -1721,37 +1721,49 @@ static void gen_rlwinm(DisasContext *ctx)
 static void gen_rlwnm(DisasContext *ctx)
 {
 uint32_t mb, me;
-TCGv t0;
-#if defined(TARGET_PPC64)
-TCGv t1;
-#endif
-
 mb = MB(ctx-opcode);
 me = ME(ctx-opcode);
-t0 = tcg_temp_new();
-tcg_gen_andi_tl(t0, cpu_gpr[rB(ctx-opcode)], 0x1f);
+
+if (likely(mb == 0  me == 31)) {
+TCGv_i32 t0, t1;
+t0 = tcg_temp_new_i32();
+t1 = tcg_temp_new_i32();
+tcg_gen_trunc_tl_i32(t0, cpu_gpr[rB(ctx-opcode)]);
+tcg_gen_trunc_tl_i32(t1, cpu_gpr[rS(ctx-opcode)]);
+tcg_gen_andi_i32(t0, t0, 0x1f);
+tcg_gen_rotl_i32(t1, t1, t0);
+tcg_gen_extu_i32_tl(cpu_gpr[rA(ctx-opcode)], t1);
+tcg_temp_free_i32(t0);
+tcg_temp_free_i32(t1);
+} else {
+TCGv t0;
 #if defined(TARGET_PPC64)
-t1 = tcg_temp_new_i64();
-tcg_gen_deposit_i64(t1, cpu_gpr[rS(ctx-opcode)],
-cpu_gpr[rS(ctx-opcode)], 32, 32);
-tcg_gen_rotl_i64(t0, t1, t0);
-tcg_temp_free_i64(t1);
-#else
-tcg_gen_rotl_i32(t0, cpu_gpr[rS(ctx-opcode)], t0);
+TCGv t1;
 #endif
-if (unlikely(mb != 0 || me != 31)) {
+
+t0 = tcg_temp_new();
+tcg_gen_andi_tl(t0, cpu_gpr[rB(ctx-opcode)], 0x1f);
 #if defined(TARGET_PPC64)
-mb += 32;
-me += 32;
+t1 = tcg_temp_new_i64();
+tcg_gen_deposit_i64(t1, cpu_gpr[rS(ctx-opcode)],
+cpu_gpr[rS(ctx-opcode)], 32, 32);
+tcg_gen_rotl_i64(t0, t1, t0);
+tcg_temp_free_i64(t1);
+#else
+tcg_gen_rotl_i32(t0, cpu_gpr[rS(ctx-opcode)], t0);
 #endif
-tcg_gen_andi_tl(cpu_gpr[rA(ctx-opcode)], t0, MASK(mb, me));
-} else {
+if (unlikely(mb != 0 || me != 31)) {
 #if defined(TARGET_PPC64)
-tcg_gen_andi_tl(t0, t0, MASK(32, 63));
+mb += 32;
+me += 32;
 #endif
-tcg_gen_mov_tl(cpu_gpr[rA(ctx-opcode)], t0);
+tcg_gen_andi_tl(cpu_gpr[rA(ctx-opcode)], t0, MASK(mb, me));
+} else {
+tcg_gen_andi_tl(t0, t0, MASK(32, 63));
+tcg_gen_mov_tl(cpu_gpr[rA(ctx-opcode)], t0);
+}
+tcg_temp_free(t0);
 }
-tcg_temp_free(t0);
 if (unlikely(Rc(ctx-opcode) != 0))
 gen_set_Rc0(ctx, cpu_gpr[rA(ctx-opcode)]);
 }
-- 
1.7.1

[Qemu-devel] [PATCH 5/6] target-ppc: Clean up mullwo

2014-08-25 Thread Tom Musta

Simplify the implementation of mullwo.  For 64 bit CPUs, the result is
the concatenation of the upper and lower parts of the muls2_i32 operation,
which may be slightly better than deposit.  For 32 bit CPUs, the lower part
of the muls_i32 operation is moved into the target GPR.

Signed-off-by: Tom Musta tommu...@gmail.com
Suggested-by: Richard Henderson r...@twiddle.net
---
 target-ppc/translate.c |   11 +++
 1 files changed, 3 insertions(+), 8 deletions(-)

diff --git a/target-ppc/translate.c b/target-ppc/translate.c
index ced295f..1062634 100644
--- a/target-ppc/translate.c
+++ b/target-ppc/translate.c
@@ -1150,19 +1150,14 @@ static void gen_mullwo(DisasContext *ctx)
 {
 TCGv_i32 t0 = tcg_temp_new_i32();
 TCGv_i32 t1 = tcg_temp_new_i32();
-#if defined(TARGET_PPC64)
-TCGv_i64 t2 = tcg_temp_new_i64();
-#endif
 
 tcg_gen_trunc_tl_i32(t0, cpu_gpr[rA(ctx-opcode)]);
 tcg_gen_trunc_tl_i32(t1, cpu_gpr[rB(ctx-opcode)]);
 tcg_gen_muls2_i32(t0, t1, t0, t1);
-tcg_gen_ext_i32_tl(cpu_gpr[rD(ctx-opcode)], t0);
 #if defined(TARGET_PPC64)
-tcg_gen_ext_i32_tl(t2, t1);
-tcg_gen_deposit_i64(cpu_gpr[rD(ctx-opcode)],
-cpu_gpr[rD(ctx-opcode)], t2, 32, 32);
-tcg_temp_free(t2);
+tcg_gen_concat_i32_i64(cpu_gpr[rD(ctx-opcode)], t0, t1);
+#else
+tcg_gen_mov_i32(cpu_gpr[rD(ctx-opcode)], t0);
 #endif
 
 tcg_gen_sari_i32(t0, t0, 31);
-- 
1.7.1

[Qemu-devel] [PATCH 4/6] target-ppc: Clean Up mullw

2014-08-25 Thread Tom Musta

Eliminate the unecessary ext32s TCG operation and make the multiplication
operation explicitly 32 bit.

Signed-off-by: Tom Musta tommu...@gmail.com
Suggested-by: Richard Henderson r...@twiddle.net
---
 target-ppc/translate.c |5 ++---
 1 files changed, 2 insertions(+), 3 deletions(-)

diff --git a/target-ppc/translate.c b/target-ppc/translate.c
index 57cb381..ced295f 100644
--- a/target-ppc/translate.c
+++ b/target-ppc/translate.c
@@ -1138,9 +1138,8 @@ static void gen_mullw(DisasContext *ctx)
 tcg_temp_free(t0);
 tcg_temp_free(t1);
 #else
-tcg_gen_mul_tl(cpu_gpr[rD(ctx-opcode)], cpu_gpr[rA(ctx-opcode)],
-   cpu_gpr[rB(ctx-opcode)]);
-tcg_gen_ext32s_tl(cpu_gpr[rD(ctx-opcode)], cpu_gpr[rD(ctx-opcode)]);
+tcg_gen_mul_i32(cpu_gpr[rD(ctx-opcode)], cpu_gpr[rA(ctx-opcode)],
+cpu_gpr[rB(ctx-opcode)]);
 #endif
 if (unlikely(Rc(ctx-opcode) != 0))
 gen_set_Rc0(ctx, cpu_gpr[rD(ctx-opcode)]);
-- 
1.7.1

Re: [Qemu-devel] [PATCH 3/8] target-ppc: Bug Fix: rlwimi

2014-08-18 Thread Tom Musta

On 8/15/2014 3:05 PM, Richard Henderson wrote:
 On 08/11/2014 09:23 AM, Tom Musta wrote:
 Also fix the special case of MB=31 and ME=0 to copy the entire contents
 of the source GPR.
 
 Err, that's not what you did.
 
  if (likely(sh == 0  mb == 0  me == 31)) {
 +#if defined(TARGET_PPC64)
 +tcg_gen_mov_i64(cpu_gpr[rA(ctx-opcode)], cpu_gpr[rS(ctx-opcode)]);
 +#else
  tcg_gen_ext32u_tl(cpu_gpr[rA(ctx-opcode)], 
 cpu_gpr[rS(ctx-opcode)]);
 +#endif
 
 This is the reverse condition.  Which, true enough, should not be implemented
 with ext32u for PPC64.  But a MOV isn't right either, it is
 
   deposit(ra, rs, 0, 32)
 
 Which does point out that we should probably implement anything MB = ME and 
 SH
 == 31 - ME with the deposit opcode.
 
 
 r~
 

Richard:

Good catch.  I found a bug in my test generator ... rlwimi is unusual in that 
the
RA register is both a source and a target.  A fix is forthcoming.

Thanks also for your other comments.  Unlike this one, I believe they are 
optimizations.
I will investigate and potentially publish some additional changes.  Alex has 
already
taken this series into his ppc-next, so the new patches will be relative to 
these.

Re: [Qemu-devel] [V2 PATCH 09/12] linux-user: Minimum Sig Handler Stack Size for PPC64 ELF V2

2014-08-13 Thread Tom Musta

On 8/13/2014 7:31 AM, Riku Voipio wrote:
 On Tue, Aug 12, 2014 at 01:53:40PM -0500, Tom Musta wrote:
 The ELF V2 ABI for PPC64 defines MINSIGSTKSZ as 4096 bytes whereas it was
 2048 previously.
 
 fails to build - need to make get_ppc64_abi a properly exported function.
  

Riku:

Sorry about this.  There is already a patch in Alex's ppc-next tree (but not
yet pulled into mainline) that does this:

http://lists.nongnu.org/archive/html/qemu-devel/2014-06/msg07225.html

I will republish V3 with this patch included.

 Signed-off-by: Tom Musta tommu...@gmail.com
 ---
 V2: Define and use TARGET_MINSIGSTKSZ constants from the various
 linux-user/$ARCH/syscall.h files (per Peter Maydell's review).

 There is still a runtime check for PPC64 since the stack size changes
 in ELF V2.

  linux-user/aarch64/syscall.h|1 +
  linux-user/alpha/syscall.h  |1 +
  linux-user/arm/syscall.h|2 ++
  linux-user/cris/syscall.h   |1 +
  linux-user/i386/syscall.h   |1 +
  linux-user/m68k/syscall.h   |2 ++
  linux-user/microblaze/syscall.h |1 +
  linux-user/mips/syscall.h   |1 +
  linux-user/mips64/syscall.h |1 +
  linux-user/openrisc/syscall.h   |2 ++
  linux-user/ppc/syscall.h|2 ++
  linux-user/s390x/syscall.h  |1 +
  linux-user/sh4/syscall.h|2 ++
  linux-user/signal.c |   12 +++-
  linux-user/sparc/syscall.h  |1 +
  linux-user/sparc64/syscall.h|1 +
  linux-user/unicore32/syscall.h  |2 ++
  linux-user/x86_64/syscall.h |1 +
  18 files changed, 34 insertions(+), 1 deletions(-)

 diff --git a/linux-user/aarch64/syscall.h b/linux-user/aarch64/syscall.h
 index 18f44a8..d1f4823 100644
 --- a/linux-user/aarch64/syscall.h
 +++ b/linux-user/aarch64/syscall.h
 @@ -8,3 +8,4 @@ struct target_pt_regs {
  #define UNAME_MACHINE aarch64
  #define UNAME_MINIMUM_RELEASE 3.8.0
  #define TARGET_CLONE_BACKWARDS
 +#define TARGET_MINSIGSTKSZ   2048
 diff --git a/linux-user/alpha/syscall.h b/linux-user/alpha/syscall.h
 index ed13d9a..3adedeb 100644
 --- a/linux-user/alpha/syscall.h
 +++ b/linux-user/alpha/syscall.h
 @@ -252,3 +252,4 @@ struct target_pt_regs {
  #define TARGET_UAC_NOPRINT  1
  #define TARGET_UAC_NOFIX2
  #define TARGET_UAC_SIGBUS   4
 +#define TARGET_MINSIGSTKSZ  4096
 diff --git a/linux-user/arm/syscall.h b/linux-user/arm/syscall.h
 index e0d2cc3..cdadb0c 100644
 --- a/linux-user/arm/syscall.h
 +++ b/linux-user/arm/syscall.h
 @@ -44,3 +44,5 @@ struct target_pt_regs {
  #define UNAME_MINIMUM_RELEASE 2.6.32
  
  #define TARGET_CLONE_BACKWARDS
 +
 +#define TARGET_MINSIGSTKSZ 2048
 diff --git a/linux-user/cris/syscall.h b/linux-user/cris/syscall.h
 index f5783c0..a75bcc4 100644
 --- a/linux-user/cris/syscall.h
 +++ b/linux-user/cris/syscall.h
 @@ -39,5 +39,6 @@ struct target_pt_regs {
  };
  
  #define TARGET_CLONE_BACKWARDS2
 +#define TARGET_MINSIGSTKSZ 2048
  
  #endif
 diff --git a/linux-user/i386/syscall.h b/linux-user/i386/syscall.h
 index 9bfc1ad..acf6856 100644
 --- a/linux-user/i386/syscall.h
 +++ b/linux-user/i386/syscall.h
 @@ -147,3 +147,4 @@ struct target_vm86plus_struct {
  #define UNAME_MINIMUM_RELEASE 2.6.32
  
  #define TARGET_CLONE_BACKWARDS
 +#define TARGET_MINSIGSTKSZ 2048
 diff --git a/linux-user/m68k/syscall.h b/linux-user/m68k/syscall.h
 index 889eaf7..f8553f8 100644
 --- a/linux-user/m68k/syscall.h
 +++ b/linux-user/m68k/syscall.h
 @@ -18,4 +18,6 @@ struct target_pt_regs {
  #define UNAME_MACHINE m68k
  #define UNAME_MINIMUM_RELEASE 2.6.32
  
 +#define TARGET_MINSIGSTKSZ 2048
 +
  void do_m68k_simcall(CPUM68KState *, int);
 diff --git a/linux-user/microblaze/syscall.h 
 b/linux-user/microblaze/syscall.h
 index 5b5f6b4..2a5e160 100644
 --- a/linux-user/microblaze/syscall.h
 +++ b/linux-user/microblaze/syscall.h
 @@ -49,5 +49,6 @@ struct target_pt_regs {
  };
  
  #define TARGET_CLONE_BACKWARDS
 +#define TARGET_MINSIGSTKSZ  2048
  
  #endif
 diff --git a/linux-user/mips/syscall.h b/linux-user/mips/syscall.h
 index 5bc5696..0b4662c 100644
 --- a/linux-user/mips/syscall.h
 +++ b/linux-user/mips/syscall.h
 @@ -228,3 +228,4 @@ struct target_pt_regs {
  #define UNAME_MINIMUM_RELEASE 2.6.32
  
  #define TARGET_CLONE_BACKWARDS
 +#define TARGET_MINSIGSTKSZ 2048
 diff --git a/linux-user/mips64/syscall.h b/linux-user/mips64/syscall.h
 index a7f5a58..39b8bed 100644
 --- a/linux-user/mips64/syscall.h
 +++ b/linux-user/mips64/syscall.h
 @@ -225,3 +225,4 @@ struct target_pt_regs {
  #define UNAME_MINIMUM_RELEASE 2.6.32
  
  #define TARGET_CLONE_BACKWARDS
 +#define TARGET_MINSIGSTKSZ  2048
 diff --git a/linux-user/openrisc/syscall.h b/linux-user/openrisc/syscall.h
 index c3b36da..e5e6180 100644
 --- a/linux-user/openrisc/syscall.h
 +++ b/linux-user/openrisc/syscall.h
 @@ -23,3 +23,5 @@ struct target_pt_regs {
  
  #define UNAME_MACHINE openrisc
  #define UNAME_MINIMUM_RELEASE 2.6.32
 +
 +#define TARGET_MINSIGSTKSZ 2048
 diff --git

[Qemu-devel] [V3 PATCH 00/13] target-ppc: Linux-User Mode Bug Fixes for Power

2014-08-13 Thread Tom Musta

This series of patches is the result of executing the Linux Test Program
(LTP) System Call bucket (https://github.com/linux-test-project/ltp)
on the 64 bit big and little endian linux user mode targets for Power.

Some of the changes are not technically unique to Power, but are effectively
so.  For example, Power may be the only runtime that uses the ipc system call
as a hub for other system calls (semctl, semop, ...).

The series is dependent on my previous patch series that adds signal handler
support on PPC64 
(http://lists.nongnu.org/archive/html/qemu-ppc/2014-06/msg00802.html).
That series has gone into Alex's ppcnext branch for QEMU 2.2.

V2: Addressing review comments from Peter Maydell.

V3: Included linux-user: Move get_ppc64_abi so that this series applies 
cleanly 
to the current git master.

Tom Musta (13):
  linux-user: PPC64 semid_ds Doesnt Include _unused1 and _unused2
  linux-user: Dereference Pointer Argument to ipc/semctl Sys Call
  linux-user: Properly Handle semun Structure In Cross-Endian
Situations
  linux-user: Make ipc syscall's third argument an abi_long
  linux-user: Conditionally Pass Attribute Pointer to mq_open()
  linux-user: Detect Negative Message Sizes in msgsnd System Call
  linux-user: Handle NULL sched_param argument to sched_*
  linux-user: Detect fault in sched_rr_get_interval
  linux-user: Move get_ppc64_abi
  linux-user: Minimum Sig Handler Stack Size for PPC64 ELF V2
  linux-user: clock_nanosleep errno Handling on PPC
  linux-user: Support target-to-host translation of mlockall argument
  linux-user: writev Partial Writes

 linux-user/aarch64/syscall.h|3 +
 linux-user/alpha/syscall.h  |3 +
 linux-user/arm/syscall.h|4 ++
 linux-user/cris/syscall.h   |3 +
 linux-user/elfload.c|9 
 linux-user/i386/syscall.h   |3 +
 linux-user/m68k/syscall.h   |4 ++
 linux-user/microblaze/syscall.h |3 +
 linux-user/mips/syscall.h   |3 +
 linux-user/mips64/syscall.h |3 +
 linux-user/openrisc/syscall.h   |4 ++
 linux-user/ppc/syscall.h|4 ++
 linux-user/ppc/target_cpu.h |   10 
 linux-user/s390x/syscall.h  |3 +
 linux-user/sh4/syscall.h|4 ++
 linux-user/signal.c |   12 -
 linux-user/sparc/syscall.h  |3 +
 linux-user/sparc64/syscall.h|3 +
 linux-user/syscall.c|  100 +-
 linux-user/unicore32/syscall.h  |4 ++
 linux-user/x86_64/syscall.h |3 +
 21 files changed, 164 insertions(+), 24 deletions(-)

[Qemu-devel] [V3 PATCH 01/13] linux-user: PPC64 semid_ds Doesnt Include _unused1 and _unused2

2014-08-13 Thread Tom Musta

The 64 bit PowerPC platforms eliminate the _unused1 and _unused2
elements of the semid_ds structure from sys/sem.h.  So eliminate
these from the target_semid_ds structure.

Signed-off-by: Tom Musta tommu...@gmail.com
---

 linux-user/syscall.c |4 
 1 files changed, 4 insertions(+), 0 deletions(-)

diff --git a/linux-user/syscall.c b/linux-user/syscall.c
index a50229d..540001c 100644
--- a/linux-user/syscall.c
+++ b/linux-user/syscall.c
@@ -2419,9 +2419,13 @@ struct target_semid_ds
 {
   struct target_ipc_perm sem_perm;
   abi_ulong sem_otime;
+#if !defined(TARGET_PPC64)
   abi_ulong __unused1;
+#endif
   abi_ulong sem_ctime;
+#if !defined(TARGET_PPC64)
   abi_ulong __unused2;
+#endif
   abi_ulong sem_nsems;
   abi_ulong __unused3;
   abi_ulong __unused4;
-- 
1.7.1

[Qemu-devel] [V3 PATCH 02/13] linux-user: Dereference Pointer Argument to ipc/semctl Sys Call

2014-08-13 Thread Tom Musta

When the ipc system call is used to wrap a semctl system call,
the ptr argument to ipc needs to be dereferenced prior to passing
it to the semctl handler.  This is because the fourth argument to
semctl is a union and not a pointer to a union.

Signed-off-by: Tom Musta tommu...@gmail.com
---
V2:  This is unchanged from V1.  I *did* review the QEMU, glibc and kernel code
looking for some problems but did not find anything.  I also did fairly 
comprehesive
testing of semctl on 4 targets (ppc-linux-user, ppc64-linux-user, 
ppc64le-linux-user,
x86_64-linux-user) on 3 different host platforms (x86-64 Ubuntu, PPC64 RHEL 6 
(BE) and
PPC64 Ubuntu 14.04 (LE)); this provided a broad coverage of co-endian and cross 
endian
situations.

 linux-user/syscall.c |   10 --
 1 files changed, 8 insertions(+), 2 deletions(-)

diff --git a/linux-user/syscall.c b/linux-user/syscall.c
index 540001c..229c482 100644
--- a/linux-user/syscall.c
+++ b/linux-user/syscall.c
@@ -3135,9 +3135,15 @@ static abi_long do_ipc(unsigned int call, int first,
 ret = get_errno(semget(first, second, third));
 break;
 
-case IPCOP_semctl:
-ret = do_semctl(first, second, third, (union target_semun)(abi_ulong) 
ptr);
+case IPCOP_semctl: {
+/* The semun argument to semctl is passed by value, so dereference the
+ * ptr argument. */
+abi_ulong atptr;
+get_user_ual(atptr, (abi_ulong)ptr);
+ret = do_semctl(first, second, third,
+(union target_semun)(abi_ulong) atptr);
 break;
+}
 
 case IPCOP_msgget:
 ret = get_errno(msgget(first, second));
-- 
1.7.1

[Qemu-devel] [V3 PATCH 12/13] linux-user: Support target-to-host translation of mlockall argument

2014-08-13 Thread Tom Musta

The argument to the mlockall system call is not necessarily the same on
all platforms and thus may require translation prior to passing to the
host.

For example, PowerPC 64 bit platforms define values for MCL_CURRENT
(0x2000) and MCL_FUTURE (0x4000) which are different from Intel platforms
(0x1 and 0x2, respectively)

Signed-off-by: Tom Musta tommu...@gmail.com
---
V2: Per Peter Maydell's review, added a complete set of TARGET_MCL_*
macros in the various linux-user/$ARCH/syscall.h files.

 linux-user/aarch64/syscall.h|2 ++
 linux-user/alpha/syscall.h  |2 ++
 linux-user/arm/syscall.h|2 ++
 linux-user/cris/syscall.h   |2 ++
 linux-user/i386/syscall.h   |2 ++
 linux-user/m68k/syscall.h   |2 ++
 linux-user/microblaze/syscall.h |2 ++
 linux-user/mips/syscall.h   |2 ++
 linux-user/mips64/syscall.h |2 ++
 linux-user/openrisc/syscall.h   |2 ++
 linux-user/ppc/syscall.h|2 ++
 linux-user/s390x/syscall.h  |2 ++
 linux-user/sh4/syscall.h|2 ++
 linux-user/sparc/syscall.h  |2 ++
 linux-user/sparc64/syscall.h|2 ++
 linux-user/syscall.c|   17 -
 linux-user/unicore32/syscall.h  |2 ++
 linux-user/x86_64/syscall.h |2 ++
 18 files changed, 50 insertions(+), 1 deletions(-)

diff --git a/linux-user/aarch64/syscall.h b/linux-user/aarch64/syscall.h
index d1f4823..dc72a15 100644
--- a/linux-user/aarch64/syscall.h
+++ b/linux-user/aarch64/syscall.h
@@ -9,3 +9,5 @@ struct target_pt_regs {
 #define UNAME_MINIMUM_RELEASE 3.8.0
 #define TARGET_CLONE_BACKWARDS
 #define TARGET_MINSIGSTKSZ   2048
+#define TARGET_MLOCKALL_MCL_CURRENT 1
+#define TARGET_MLOCKALL_MCL_FUTURE  2
diff --git a/linux-user/alpha/syscall.h b/linux-user/alpha/syscall.h
index 3adedeb..245cff2 100644
--- a/linux-user/alpha/syscall.h
+++ b/linux-user/alpha/syscall.h
@@ -253,3 +253,5 @@ struct target_pt_regs {
 #define TARGET_UAC_NOFIX   2
 #define TARGET_UAC_SIGBUS  4
 #define TARGET_MINSIGSTKSZ  4096
+#define TARGET_MLOCKALL_MCL_CURRENT 0x2000
+#define TARGET_MLOCKALL_MCL_FUTURE  0x4000
diff --git a/linux-user/arm/syscall.h b/linux-user/arm/syscall.h
index cdadb0c..3844a96 100644
--- a/linux-user/arm/syscall.h
+++ b/linux-user/arm/syscall.h
@@ -46,3 +46,5 @@ struct target_pt_regs {
 #define TARGET_CLONE_BACKWARDS
 
 #define TARGET_MINSIGSTKSZ 2048
+#define TARGET_MLOCKALL_MCL_CURRENT 1
+#define TARGET_MLOCKALL_MCL_FUTURE  2
diff --git a/linux-user/cris/syscall.h b/linux-user/cris/syscall.h
index a75bcc4..2957b0d 100644
--- a/linux-user/cris/syscall.h
+++ b/linux-user/cris/syscall.h
@@ -40,5 +40,7 @@ struct target_pt_regs {
 
 #define TARGET_CLONE_BACKWARDS2
 #define TARGET_MINSIGSTKSZ 2048
+#define TARGET_MLOCKALL_MCL_CURRENT 1
+#define TARGET_MLOCKALL_MCL_FUTURE  2
 
 #endif
diff --git a/linux-user/i386/syscall.h b/linux-user/i386/syscall.h
index acf6856..906aaac 100644
--- a/linux-user/i386/syscall.h
+++ b/linux-user/i386/syscall.h
@@ -148,3 +148,5 @@ struct target_vm86plus_struct {
 
 #define TARGET_CLONE_BACKWARDS
 #define TARGET_MINSIGSTKSZ 2048
+#define TARGET_MLOCKALL_MCL_CURRENT 1
+#define TARGET_MLOCKALL_MCL_FUTURE  2
diff --git a/linux-user/m68k/syscall.h b/linux-user/m68k/syscall.h
index f8553f8..9218493 100644
--- a/linux-user/m68k/syscall.h
+++ b/linux-user/m68k/syscall.h
@@ -19,5 +19,7 @@ struct target_pt_regs {
 #define UNAME_MINIMUM_RELEASE 2.6.32
 
 #define TARGET_MINSIGSTKSZ 2048
+#define TARGET_MLOCKALL_MCL_CURRENT 1
+#define TARGET_MLOCKALL_MCL_FUTURE  2
 
 void do_m68k_simcall(CPUM68KState *, int);
diff --git a/linux-user/microblaze/syscall.h b/linux-user/microblaze/syscall.h
index 2a5e160..3c1ed27 100644
--- a/linux-user/microblaze/syscall.h
+++ b/linux-user/microblaze/syscall.h
@@ -50,5 +50,7 @@ struct target_pt_regs {
 
 #define TARGET_CLONE_BACKWARDS
 #define TARGET_MINSIGSTKSZ  2048
+#define TARGET_MLOCKALL_MCL_CURRENT 1
+#define TARGET_MLOCKALL_MCL_FUTURE  2
 
 #endif
diff --git a/linux-user/mips/syscall.h b/linux-user/mips/syscall.h
index 0b4662c..35ca23b 100644
--- a/linux-user/mips/syscall.h
+++ b/linux-user/mips/syscall.h
@@ -229,3 +229,5 @@ struct target_pt_regs {
 
 #define TARGET_CLONE_BACKWARDS
 #define TARGET_MINSIGSTKSZ 2048
+#define TARGET_MLOCKALL_MCL_CURRENT 1
+#define TARGET_MLOCKALL_MCL_FUTURE  2
diff --git a/linux-user/mips64/syscall.h b/linux-user/mips64/syscall.h
index 39b8bed..6733107 100644
--- a/linux-user/mips64/syscall.h
+++ b/linux-user/mips64/syscall.h
@@ -226,3 +226,5 @@ struct target_pt_regs {
 
 #define TARGET_CLONE_BACKWARDS
 #define TARGET_MINSIGSTKSZ  2048
+#define TARGET_MLOCKALL_MCL_CURRENT 1
+#define TARGET_MLOCKALL_MCL_FUTURE  2
diff --git a/linux-user/openrisc/syscall.h b/linux-user/openrisc/syscall.h
index e5e6180..8ac0365 100644
--- a/linux-user/openrisc/syscall.h
+++ b/linux-user/openrisc/syscall.h
@@ -25,3 +25,5 @@ struct target_pt_regs {
 #define UNAME_MINIMUM_RELEASE 2.6.32

[Qemu-devel] [V3 PATCH 06/13] linux-user: Detect Negative Message Sizes in msgsnd System Call

2014-08-13 Thread Tom Musta

The msgsnd system call takes an argument that describes the message
size (msgsz) and is of type size_t.  The system call should set
errno to EINVAL in the event that a negative message size is passed.

Signed-off-by: Tom Musta tommu...@gmail.com
Reviewed-by: Peter Maydell peter.mayd...@linaro.org
---

 linux-user/syscall.c |6 +-
 1 files changed, 5 insertions(+), 1 deletions(-)

diff --git a/linux-user/syscall.c b/linux-user/syscall.c
index 04f4820..79fb3cb 100644
--- a/linux-user/syscall.c
+++ b/linux-user/syscall.c
@@ -2874,12 +2874,16 @@ struct target_msgbuf {
 };
 
 static inline abi_long do_msgsnd(int msqid, abi_long msgp,
- unsigned int msgsz, int msgflg)
+ ssize_t msgsz, int msgflg)
 {
 struct target_msgbuf *target_mb;
 struct msgbuf *host_mb;
 abi_long ret = 0;
 
+if (msgsz  0) {
+return -TARGET_EINVAL;
+}
+
 if (!lock_user_struct(VERIFY_READ, target_mb, msgp, 0))
 return -TARGET_EFAULT;
 host_mb = malloc(msgsz+sizeof(long));
-- 
1.7.1

[Qemu-devel] [V3 PATCH 11/13] linux-user: clock_nanosleep errno Handling on PPC

2014-08-13 Thread Tom Musta

The clock_nanosleep syscall is unusual in that it returns positive
numbers in error handling situations, versus returning -1 and setting
errno, or returning a negative errno value.  On POWER, the kernel will
set the SO bit of CR0 to indicate failure in a syscall.  QEMU has
generic handling to do this for syscalls with standard return values.

Add special case code for clock_nanosleep to handle CR0 properly.

Signed-off-by: Tom Musta tommu...@gmail.com
Reviewed-by: Peter Maydell peter.mayd...@linaro.org
---
V2: Eliminated redundant #if defined condition per Peter Maydell's
review.

 linux-user/syscall.c |8 
 1 files changed, 8 insertions(+), 0 deletions(-)

diff --git a/linux-user/syscall.c b/linux-user/syscall.c
index a20c2f7..fc828ae 100644
--- a/linux-user/syscall.c
+++ b/linux-user/syscall.c
@@ -8999,6 +8999,14 @@ abi_long do_syscall(void *cpu_env, int num, abi_long 
arg1,
 ret = get_errno(clock_nanosleep(arg1, arg2, ts, arg4 ? ts : NULL));
 if (arg4)
 host_to_target_timespec(arg4, ts);
+
+#if defined(TARGET_PPC)
+/* clock_nanosleep is odd in that it returns positive errno values.
+ * On PPC, CR0 bit 3 should be set in such a situation. */
+if (ret) {
+((CPUPPCState *)cpu_env)-crf[0] |= 1;
+}
+#endif
 break;
 }
 #endif
-- 
1.7.1

[Qemu-devel] [V3 PATCH 09/13] linux-user: Move get_ppc64_abi

2014-08-13 Thread Tom Musta

The get_ppc64_abi is used to determine the ELF ABI (i.e. V1 or V2). This
routine is currently implemented in the linux-user/elfload.c file but
is useful in other scenarios.  Move the routine to a more generally
available location (linux-user/ppc/target_cpu.h).

Signed-off-by: Tom Musta tommu...@gmail.com
---
V3: new patch

 linux-user/elfload.c|9 -
 linux-user/ppc/target_cpu.h |   10 ++
 2 files changed, 10 insertions(+), 9 deletions(-)

diff --git a/linux-user/elfload.c b/linux-user/elfload.c
index 60777fe..bea803b 100644
--- a/linux-user/elfload.c
+++ b/linux-user/elfload.c
@@ -824,8 +824,6 @@ static uint32_t get_elf_hwcap2(void)
 NEW_AUX_ENT(AT_IGNOREPPC, AT_IGNOREPPC);\
 } while (0)
 
-static inline uint32_t get_ppc64_abi(struct image_info *infop);
-
 static inline void init_thread(struct target_pt_regs *_regs, struct image_info 
*infop)
 {
 _regs-gpr[1] = infop-start_stack;
@@ -1205,13 +1203,6 @@ static inline void init_thread(struct target_pt_regs 
*regs, struct image_info *i
 
 #include elf.h
 
-#ifdef TARGET_PPC
-static inline uint32_t get_ppc64_abi(struct image_info *infop)
-{
-  return infop-elf_flags  EF_PPC64_ABI;
-}
-#endif
-
 struct exec
 {
 unsigned int a_info;   /* Use macros N_MAGIC, etc for access */
diff --git a/linux-user/ppc/target_cpu.h b/linux-user/ppc/target_cpu.h
index 9cc0c3b..26f4ba2 100644
--- a/linux-user/ppc/target_cpu.h
+++ b/linux-user/ppc/target_cpu.h
@@ -38,4 +38,14 @@ static inline void cpu_set_tls(CPUPPCState *env, 
target_ulong newtls)
 #endif
 }
 
+#ifndef EF_PPC64_ABI
+#define EF_PPC64_ABI   0x3
+#endif
+
+static inline uint32_t get_ppc64_abi(struct image_info *infop)
+{
+  return infop-elf_flags  EF_PPC64_ABI;
+}
+
+
 #endif
-- 
1.7.1

[Qemu-devel] [V3 PATCH 13/13] linux-user: writev Partial Writes

2014-08-13 Thread Tom Musta

Although not technically not required by POSIX, the writev system call will
typically write out its buffers individually.  That is, if the first buffer
is written successfully, but the second buffer pointer is invalid, then
the first chuck will be written and its size is returned.

Signed-off-by: Tom Musta tommu...@gmail.com
Reviewed-by: Peter Maydell peter.mayd...@linaro.org
---
V2: Use bool instead of int for bad_address per Peter Maydell's review.

 linux-user/syscall.c |   16 ++--
 1 files changed, 14 insertions(+), 2 deletions(-)

diff --git a/linux-user/syscall.c b/linux-user/syscall.c
index edc48e1..fb54f0e 100644
--- a/linux-user/syscall.c
+++ b/linux-user/syscall.c
@@ -1798,6 +1798,7 @@ static struct iovec *lock_iovec(int type, abi_ulong 
target_addr,
 abi_ulong total_len, max_len;
 int i;
 int err = 0;
+bool bad_address = false;
 
 if (count == 0) {
 errno = 0;
@@ -1838,9 +1839,20 @@ static struct iovec *lock_iovec(int type, abi_ulong 
target_addr,
 vec[i].iov_base = 0;
 } else {
 vec[i].iov_base = lock_user(type, base, len, copy);
+/* If the first buffer pointer is bad, this is a fault.  But
+ * subsequent bad buffers will result in a partial write; this
+ * is realized by filling the vector with null pointers and
+ * zero lengths. */
 if (!vec[i].iov_base) {
-err = EFAULT;
-goto fail;
+if (i == 0) {
+err = EFAULT;
+goto fail;
+} else {
+bad_address = true;
+}
+}
+if (bad_address) {
+len = 0;
 }
 if (len  max_len - total_len) {
 len = max_len - total_len;
-- 
1.7.1

[Qemu-devel] [V3 PATCH 10/13] linux-user: Minimum Sig Handler Stack Size for PPC64 ELF V2

2014-08-13 Thread Tom Musta

The ELF V2 ABI for PPC64 defines MINSIGSTKSZ as 4096 bytes whereas it was
2048 previously.

Signed-off-by: Tom Musta tommu...@gmail.com
---
V2: Define and use TARGET_MINSIGSTKSZ constants from the various
linux-user/$ARCH/syscall.h files (per Peter Maydell's review).

 linux-user/aarch64/syscall.h|1 +
 linux-user/alpha/syscall.h  |1 +
 linux-user/arm/syscall.h|2 ++
 linux-user/cris/syscall.h   |1 +
 linux-user/i386/syscall.h   |1 +
 linux-user/m68k/syscall.h   |2 ++
 linux-user/microblaze/syscall.h |1 +
 linux-user/mips/syscall.h   |1 +
 linux-user/mips64/syscall.h |1 +
 linux-user/openrisc/syscall.h   |2 ++
 linux-user/ppc/syscall.h|2 ++
 linux-user/s390x/syscall.h  |1 +
 linux-user/sh4/syscall.h|2 ++
 linux-user/signal.c |   12 +++-
 linux-user/sparc/syscall.h  |1 +
 linux-user/sparc64/syscall.h|1 +
 linux-user/unicore32/syscall.h  |2 ++
 linux-user/x86_64/syscall.h |1 +
 18 files changed, 34 insertions(+), 1 deletions(-)

diff --git a/linux-user/aarch64/syscall.h b/linux-user/aarch64/syscall.h
index 18f44a8..d1f4823 100644
--- a/linux-user/aarch64/syscall.h
+++ b/linux-user/aarch64/syscall.h
@@ -8,3 +8,4 @@ struct target_pt_regs {
 #define UNAME_MACHINE aarch64
 #define UNAME_MINIMUM_RELEASE 3.8.0
 #define TARGET_CLONE_BACKWARDS
+#define TARGET_MINSIGSTKSZ   2048
diff --git a/linux-user/alpha/syscall.h b/linux-user/alpha/syscall.h
index ed13d9a..3adedeb 100644
--- a/linux-user/alpha/syscall.h
+++ b/linux-user/alpha/syscall.h
@@ -252,3 +252,4 @@ struct target_pt_regs {
 #define TARGET_UAC_NOPRINT 1
 #define TARGET_UAC_NOFIX   2
 #define TARGET_UAC_SIGBUS  4
+#define TARGET_MINSIGSTKSZ  4096
diff --git a/linux-user/arm/syscall.h b/linux-user/arm/syscall.h
index e0d2cc3..cdadb0c 100644
--- a/linux-user/arm/syscall.h
+++ b/linux-user/arm/syscall.h
@@ -44,3 +44,5 @@ struct target_pt_regs {
 #define UNAME_MINIMUM_RELEASE 2.6.32
 
 #define TARGET_CLONE_BACKWARDS
+
+#define TARGET_MINSIGSTKSZ 2048
diff --git a/linux-user/cris/syscall.h b/linux-user/cris/syscall.h
index f5783c0..a75bcc4 100644
--- a/linux-user/cris/syscall.h
+++ b/linux-user/cris/syscall.h
@@ -39,5 +39,6 @@ struct target_pt_regs {
 };
 
 #define TARGET_CLONE_BACKWARDS2
+#define TARGET_MINSIGSTKSZ 2048
 
 #endif
diff --git a/linux-user/i386/syscall.h b/linux-user/i386/syscall.h
index 9bfc1ad..acf6856 100644
--- a/linux-user/i386/syscall.h
+++ b/linux-user/i386/syscall.h
@@ -147,3 +147,4 @@ struct target_vm86plus_struct {
 #define UNAME_MINIMUM_RELEASE 2.6.32
 
 #define TARGET_CLONE_BACKWARDS
+#define TARGET_MINSIGSTKSZ 2048
diff --git a/linux-user/m68k/syscall.h b/linux-user/m68k/syscall.h
index 889eaf7..f8553f8 100644
--- a/linux-user/m68k/syscall.h
+++ b/linux-user/m68k/syscall.h
@@ -18,4 +18,6 @@ struct target_pt_regs {
 #define UNAME_MACHINE m68k
 #define UNAME_MINIMUM_RELEASE 2.6.32
 
+#define TARGET_MINSIGSTKSZ 2048
+
 void do_m68k_simcall(CPUM68KState *, int);
diff --git a/linux-user/microblaze/syscall.h b/linux-user/microblaze/syscall.h
index 5b5f6b4..2a5e160 100644
--- a/linux-user/microblaze/syscall.h
+++ b/linux-user/microblaze/syscall.h
@@ -49,5 +49,6 @@ struct target_pt_regs {
 };
 
 #define TARGET_CLONE_BACKWARDS
+#define TARGET_MINSIGSTKSZ  2048
 
 #endif
diff --git a/linux-user/mips/syscall.h b/linux-user/mips/syscall.h
index 5bc5696..0b4662c 100644
--- a/linux-user/mips/syscall.h
+++ b/linux-user/mips/syscall.h
@@ -228,3 +228,4 @@ struct target_pt_regs {
 #define UNAME_MINIMUM_RELEASE 2.6.32
 
 #define TARGET_CLONE_BACKWARDS
+#define TARGET_MINSIGSTKSZ 2048
diff --git a/linux-user/mips64/syscall.h b/linux-user/mips64/syscall.h
index a7f5a58..39b8bed 100644
--- a/linux-user/mips64/syscall.h
+++ b/linux-user/mips64/syscall.h
@@ -225,3 +225,4 @@ struct target_pt_regs {
 #define UNAME_MINIMUM_RELEASE 2.6.32
 
 #define TARGET_CLONE_BACKWARDS
+#define TARGET_MINSIGSTKSZ  2048
diff --git a/linux-user/openrisc/syscall.h b/linux-user/openrisc/syscall.h
index c3b36da..e5e6180 100644
--- a/linux-user/openrisc/syscall.h
+++ b/linux-user/openrisc/syscall.h
@@ -23,3 +23,5 @@ struct target_pt_regs {
 
 #define UNAME_MACHINE openrisc
 #define UNAME_MINIMUM_RELEASE 2.6.32
+
+#define TARGET_MINSIGSTKSZ 2048
diff --git a/linux-user/ppc/syscall.h b/linux-user/ppc/syscall.h
index db92bbe..5311cc6 100644
--- a/linux-user/ppc/syscall.h
+++ b/linux-user/ppc/syscall.h
@@ -69,3 +69,5 @@ struct target_revectored_struct {
 #define UNAME_MINIMUM_RELEASE 2.6.32
 
 #define TARGET_CLONE_BACKWARDS
+
+#define TARGET_MINSIGSTKSZ 2048
diff --git a/linux-user/s390x/syscall.h b/linux-user/s390x/syscall.h
index aaad512..b11a3b2 100644
--- a/linux-user/s390x/syscall.h
+++ b/linux-user/s390x/syscall.h
@@ -24,3 +24,4 @@ struct target_pt_regs {
 #define UNAME_MINIMUM_RELEASE 2.6.32
 
 #define TARGET_CLONE_BACKWARDS2
+#define TARGET_MINSIGSTKSZ2048

Re: [Qemu-devel] [PATCH 0/8] target-ppc: Bug Fixes for 64 Bit FXU Instructions

2014-08-12 Thread Tom Musta

On 8/11/2014 10:06 PM, David Gibson wrote:
 On Mon, Aug 11, 2014 at 02:23:21PM -0500, Tom Musta wrote:
 These patches fix assorted bugs in the emulation of Power Fixed Point Unit
 instructions.

 All instructions have been thorougly tested by running millions of random
 patterns through actual hardware and comparing the results against QEMU.
 The bugs all appear to be limited to 64 bit implementations.
 
 I think understanding these fixes would be easier if each commit
 message included some example inputs for which the existing code
 generates the wrong results.
 

Thanks, David, for the feedback.  I will add some sample data patterns.

[Qemu-devel] [V2 PATCH 0/8] target-ppc: Bug Fixes for 64 Bit FXU Instructions

2014-08-12 Thread Tom Musta

These patches fix assorted bugs in the emulation of Power Fixed Point Unit
instructions.

All instructions have been thorougly tested by running millions of random
patterns through actual hardware and comparing the results against QEMU.
The bugs all appear to be limited to 64 bit implementations.

V2: Added example data patterns to commit messages (no functional change from 
V1).

Tom Musta (8):
  target-ppc: Bug Fix: rlwinm
  target-ppc: Bug Fix: rlwnm
  target-ppc: Bug Fix: rlwimi
  target-ppc: Bug Fix: mullw
  target-ppc: Bug Fix: mullwo
  target-ppc: Bug Fix: mulldo OV Detection
  target-ppc: Bug Fix: srawi
  target-ppc: Bug Fix: srad

 target-ppc/int_helper.c |   16 ++--
 target-ppc/translate.c  |   60 +++---
 2 files changed, 53 insertions(+), 23 deletions(-)

[Qemu-devel] [V2 PATCH 7/8] target-ppc: Bug Fix: srawi

2014-08-12 Thread Tom Musta

For 64 bit implementations, the special case of a shift by zero
should result in the sign extension of the least significant 32 bits
of the source GPR (not a direct copy of the 64 bit source GPR).

Example:

R3 A6212433228F41DC
srawi 3,3,0
R3 expected : 228F41DC
R3 actual   : A6212433228F41DC (without this patch)

Signed-off-by: Tom Musta tommu...@gmail.com
---
 target-ppc/translate.c |2 +-
 1 files changed, 1 insertions(+), 1 deletions(-)

diff --git a/target-ppc/translate.c b/target-ppc/translate.c
index 4904665..61fa42d 100644
--- a/target-ppc/translate.c
+++ b/target-ppc/translate.c
@@ -1941,7 +1941,7 @@ static void gen_srawi(DisasContext *ctx)
 TCGv dst = cpu_gpr[rA(ctx-opcode)];
 TCGv src = cpu_gpr[rS(ctx-opcode)];
 if (sh == 0) {
-tcg_gen_mov_tl(dst, src);
+tcg_gen_ext32s_tl(dst, src);
 tcg_gen_movi_tl(cpu_ca, 0);
 } else {
 TCGv t0;
-- 
1.7.1

[Qemu-devel] [V2 PATCH 2/8] target-ppc: Bug Fix: rlwnm

2014-08-12 Thread Tom Musta

The rlwnm specification includes the ROTL32 operation, which is defined
to be a left rotation of two copies of the least significant 32 bits of
the source GPR.

The current implementation is incorrect on 64-bit implementations in that
it rotates a single copy of the least significant 32 bits, padding with
zeroes in the most significant bits.

Fix the code to properly implement this ROTL32 operation.

Example:

R3 = 0002
R4 = 7FFF
rlwnm 3,3,4,31,16
R3 expected : 00010001
R3 actual   : 0001 (without this patch)

Signed-off-by: Tom Musta tommu...@gmail.com
---
 target-ppc/translate.c |   18 +-
 1 files changed, 9 insertions(+), 9 deletions(-)

diff --git a/target-ppc/translate.c b/target-ppc/translate.c
index a27d063..48f13a9 100644
--- a/target-ppc/translate.c
+++ b/target-ppc/translate.c
@@ -1695,7 +1695,7 @@ static void gen_rlwnm(DisasContext *ctx)
 uint32_t mb, me;
 TCGv t0;
 #if defined(TARGET_PPC64)
-TCGv_i32 t1, t2;
+TCGv t1;
 #endif
 
 mb = MB(ctx-opcode);
@@ -1703,14 +1703,11 @@ static void gen_rlwnm(DisasContext *ctx)
 t0 = tcg_temp_new();
 tcg_gen_andi_tl(t0, cpu_gpr[rB(ctx-opcode)], 0x1f);
 #if defined(TARGET_PPC64)
-t1 = tcg_temp_new_i32();
-t2 = tcg_temp_new_i32();
-tcg_gen_trunc_i64_i32(t1, cpu_gpr[rS(ctx-opcode)]);
-tcg_gen_trunc_i64_i32(t2, t0);
-tcg_gen_rotl_i32(t1, t1, t2);
-tcg_gen_extu_i32_i64(t0, t1);
-tcg_temp_free_i32(t1);
-tcg_temp_free_i32(t2);
+t1 = tcg_temp_new_i64();
+tcg_gen_deposit_i64(t1, cpu_gpr[rS(ctx-opcode)],
+cpu_gpr[rS(ctx-opcode)], 32, 32);
+tcg_gen_rotl_i64(t0, t1, t0);
+tcg_temp_free_i64(t1);
 #else
 tcg_gen_rotl_i32(t0, cpu_gpr[rS(ctx-opcode)], t0);
 #endif
@@ -1721,6 +1718,9 @@ static void gen_rlwnm(DisasContext *ctx)
 #endif
 tcg_gen_andi_tl(cpu_gpr[rA(ctx-opcode)], t0, MASK(mb, me));
 } else {
+#if defined(TARGET_PPC64)
+tcg_gen_andi_tl(t0, t0, MASK(32, 63));
+#endif
 tcg_gen_mov_tl(cpu_gpr[rA(ctx-opcode)], t0);
 }
 tcg_temp_free(t0);
-- 
1.7.1

1 2 3 4 5 6 7 8 9 >

1 - 100 of 869 matches

Mail list logo