Re: [Qemu-devel] [PATCH v2 08/27] linux-user/sh4: Notice gUSA regions during signal delivery

2017-07-15 Thread Aurelien Jarno
On 2017-07-06 16:20, Richard Henderson wrote:
> We translate gUSA regions atomically in a parallel context.
> But in a serial context a gUSA region may be interrupted.
> In that case, restart the region as the kernel would.
> 
> Signed-off-by: Richard Henderson 
> ---
>  linux-user/signal.c | 23 +++
>  1 file changed, 23 insertions(+)

With my limited knowledge of linux-user:

Reviewed-by: Aurelien Jarno 

-- 
Aurelien Jarno  GPG: 4096R/1DDD8C9B
aurel...@aurel32.net http://www.aurel32.net



Re: [Qemu-devel] [PATCH v2 06/27] target/sh4: Handle user-space atomics

2017-07-15 Thread Aurelien Jarno
.  While we can handle
> +   any sequence via cpu_exec_step_atomic, we can recognize the "normal"
> +   sequences and transform them into atomic operations as seen by the host.
> +*/
> +static int decode_gusa(DisasContext *ctx, CPUSH4State *env, int *pmax_insns)
> +{
> +uint32_t pc = ctx->pc;
> +uint32_t pc_end = ctx->tb->cs_base;
> +int backup = sextract32(ctx->tbflags, GUSA_SHIFT, 8);
> +int max_insns = (pc_end - pc) / 2;
> +
> +if (pc != pc_end + backup || max_insns < 2) {
> +/* This is a malformed gUSA region.  Don't do anything special,
> +   since the interpreter is likely to get confused.  */
> +ctx->envflags &= ~GUSA_MASK;
> +return 0;
> +}
>
> +if (ctx->tbflags & GUSA_EXCLUSIVE) {
> +/* Regardless of single-stepping or the end of the page,
> +   we must complete execution of the gUSA region while
> +   holding the exclusive lock.  */
> +*pmax_insns = max_insns;
> +return 0;
>  }

What are the consequence of not stopping the translation when crossing
the page in user mode? If it doesn't have any, we should probably change
the code to never stop when crossing pages.

> +qemu_log_mask(LOG_UNIMP, "Unrecognized gUSA sequence %08x-%08x\n",
> +  pc, pc_end);
> +
> +/* Restart with the EXCLUSIVE bit set, within a TB run via
> +   cpu_exec_step_atomic holding the exclusive lock.  */
> +tcg_gen_insn_start(pc, ctx->envflags);
> +ctx->envflags |= GUSA_EXCLUSIVE;
> +gen_save_cpu_state(ctx, false);
> +gen_helper_exclusive(cpu_env);
> +ctx->bstate = BS_EXCP;
> +
> +/* We're not executing an instruction, but we must report one for the
> +   purposes of accounting within the TB.  We might as well report the
> +   entire region consumed via ctx->pc so that it's immediately available
> +   in the disassembly dump.  */
> +ctx->pc = pc_end;
> +return 1;
>  }
> +#endif
>  
>  void gen_intermediate_code(CPUSH4State * env, struct TranslationBlock *tb)
>  {
> @@ -1869,6 +1978,12 @@ void gen_intermediate_code(CPUSH4State * env, struct 
> TranslationBlock *tb)
>  gen_tb_start(tb);
>  num_insns = 0;
>  
> +#ifdef CONFIG_USER_ONLY
> +if (ctx.tbflags & GUSA_MASK) {
> +num_insns = decode_gusa(&ctx, env, &max_insns);
> +}
> +#endif
> +
>  while (ctx.bstate == BS_NONE
> && num_insns < max_insns
> && !tcg_op_buf_full()) {
> @@ -1899,6 +2014,12 @@ void gen_intermediate_code(CPUSH4State * env, struct 
> TranslationBlock *tb)
>  if (tb->cflags & CF_LAST_IO) {
>  gen_io_end();
>  }
> +
> +if (ctx.tbflags & GUSA_EXCLUSIVE) {
> +/* Ending the region of exclusivity.  Clear the bits.  */
> +ctx.envflags &= ~GUSA_MASK;
> +}
> +

IIUC this assumes the number of instructions in the sequence is always
executed. I guess this is not correct if the TCG op buffer is full. Some
non-privileged instructions might also stop the translation, but they
are all FPU instructions, so I guess the section is simply not valid in
that case.

-- 
Aurelien Jarno  GPG: 4096R/1DDD8C9B
aurel...@aurel32.net http://www.aurel32.net



Re: [Qemu-devel] [PATCH v3 1/8] target/s390x: Implement CSST

2017-07-15 Thread Aurelien Jarno
On 2017-07-14 14:22, Richard Henderson wrote:
> On 07/14/2017 11:01 AM, Aurelien Jarno wrote:
> > > +if (parallel_cpus) {
> > > +int mask = 0;
> > > +#if !defined(CONFIG_ATOMIC64)
> > > +mask = -8;
> > > +#elif !defined(CONFIG_ATOMIC128)
> > > +mask = -16;
> > > +#endif
> > > +if (((4 << fc) | (1 << sc)) & mask) {
> > > +cpu_loop_exit_atomic(ENV_GET_CPU(env), ra);
> > > +}
> > > +}
> > 
> > This doesn't look correct. For a 16-byte store, ie sc = 4, and with
> > ATOMIC128 support, ie mask = -16, the condition is true.
> 
> That's WITHOUT atomic128 support that mask = -16.
> If we have atomic128, then mask = 0.

Oh, right, it all looks correct then.

Reviewed-by: Aurelien Jarno 

-- 
Aurelien Jarno  GPG: 4096R/1DDD8C9B
aurel...@aurel32.net http://www.aurel32.net



Re: [Qemu-devel] [PATCH v3 2/8] target/s390x: Implement CONVERT UNICODE insns

2017-07-15 Thread Aurelien Jarno
On 2017-07-14 14:23, Richard Henderson wrote:
> On 07/14/2017 11:08 AM, Aurelien Jarno wrote:
> > On 2017-07-11 17:18, Thomas Huth wrote:
> > > On 10.07.2017 22:45, Richard Henderson wrote:
> > > > Signed-off-by: Richard Henderson 
> > > > ---
> > > >   target/s390x/helper.h  |   6 +
> > > >   target/s390x/mem_helper.c  | 310 
> > > > +
> > > >   target/s390x/translate.c   |  43 +++
> > > >   target/s390x/insn-data.def |  13 ++
> > > >   4 files changed, 372 insertions(+)
> > > > 
> > > > diff --git a/target/s390x/helper.h b/target/s390x/helper.h
> > > > index 23e8d1d..2793cf3 100644
> > > > --- a/target/s390x/helper.h
> > > > +++ b/target/s390x/helper.h
> > > > @@ -107,6 +107,12 @@ DEF_HELPER_2(stfle, i32, env, i64)
> > > >   DEF_HELPER_FLAGS_2(lpq, TCG_CALL_NO_WG, i64, env, i64)
> > > >   DEF_HELPER_FLAGS_4(stpq, TCG_CALL_NO_WG, void, env, i64, i64, i64)
> > > >   DEF_HELPER_4(mvcos, i32, env, i64, i64, i64)
> > > > +DEF_HELPER_4(cu12, i32, env, i32, i32, i32)
> > > > +DEF_HELPER_4(cu14, i32, env, i32, i32, i32)
> > > > +DEF_HELPER_4(cu21, i32, env, i32, i32, i32)
> > > > +DEF_HELPER_4(cu24, i32, env, i32, i32, i32)
> > > > +DEF_HELPER_4(cu41, i32, env, i32, i32, i32)
> > > > +DEF_HELPER_4(cu42, i32, env, i32, i32, i32)
> > > >   #ifndef CONFIG_USER_ONLY
> > > >   DEF_HELPER_3(servc, i32, env, i64, i64)
> > > > diff --git a/target/s390x/mem_helper.c b/target/s390x/mem_helper.c
> > > > index 513b402..0b18560 100644
> > > > --- a/target/s390x/mem_helper.c
> > > > +++ b/target/s390x/mem_helper.c
> > > [...]
> > > > +static inline uint32_t convert_unicode(CPUS390XState *env, uint32_t r1,
> > > > +   uint32_t r2, uint32_t m3, 
> > > > uintptr_t ra,
> > > > +   decode_unicode_fn decode,
> > > > +   encode_unicode_fn encode)
> > > > +{
> > > > +uint64_t dst = get_address(env, r1);
> > > > +uint64_t dlen = get_length(env, r1 + 1);
> > > > +uint64_t src = get_address(env, r2);
> > > > +uint64_t slen = get_length(env, r2 + 1);
> > > > +bool enh_check = m3 & 1;
> > > > +int cc, i;
> > > 
> > > According to the PoP:
> > > 
> > > "The R1 and R2 fields [...] must designate an even-
> > >   numbered register; otherwise, a specification excep-
> > >   tion is recognized."
> > > 
> > > I think you should add such a check for even-numbered registers here.
> > 
> > Actually it should not be done here, but at translation time in
> > translate.c.
> > 
> > There are a few places where the register number is checked to be even
> > and later loaded into a temp. I guess that can be replaced by generators
> > instead?
> 
> Yes, that's done in a v3.5 patch that's a part of this thread.

Sorry I have some backlogs in all those mails, and I haven't seen it.
This is correct now.

That said I still wonder if we can get generators like that:

| static void in1_r1n(DisasContext *s, DisasFields *f, DisasOps *o)
| {   
| int r1 = get_field(s->fields, r1);
| o->in1 = tcg_const_i32(r1);
|
| #define SPEC_in1_r1n 0

and

| static void in1_r1n_even(DisasContext *s, DisasFields *f, DisasOps *o)
| {   
| int r1 = get_field(s->fields, r1);
| o->in1 = tcg_const_i32(r1);
|
| #define SPEC_in1_r1n_even SPEC_r1_even


-- 
Aurelien Jarno  GPG: 4096R/1DDD8C9B
aurel...@aurel32.net http://www.aurel32.net



Re: [Qemu-devel] [PATCH v3.5 2/8] target/s390x: Implement CONVERT UNICODE insns

2017-07-15 Thread Aurelien Jarno
On 2017-07-11 08:23, Richard Henderson wrote:
> Signed-off-by: Richard Henderson 
> ---
> v3.5: Added even register checks in the translator [thuth].
> ---
>  target/s390x/helper.h  |   6 +
>  target/s390x/mem_helper.c  | 310 
> +
>  target/s390x/translate.c   |  51 
>  target/s390x/insn-data.def |  13 ++
>  4 files changed, 380 insertions(+)
> 

Reviewed-by: Aurelien Jarno 

-- 
Aurelien Jarno  GPG: 4096R/1DDD8C9B
aurel...@aurel32.net http://www.aurel32.net



Re: [Qemu-devel] [PATCH v3 2/8] target/s390x: Implement CONVERT UNICODE insns

2017-07-14 Thread Aurelien Jarno
On 2017-07-11 17:18, Thomas Huth wrote:
> On 10.07.2017 22:45, Richard Henderson wrote:
> > Signed-off-by: Richard Henderson 
> > ---
> >  target/s390x/helper.h  |   6 +
> >  target/s390x/mem_helper.c  | 310 
> > +
> >  target/s390x/translate.c   |  43 +++
> >  target/s390x/insn-data.def |  13 ++
> >  4 files changed, 372 insertions(+)
> > 
> > diff --git a/target/s390x/helper.h b/target/s390x/helper.h
> > index 23e8d1d..2793cf3 100644
> > --- a/target/s390x/helper.h
> > +++ b/target/s390x/helper.h
> > @@ -107,6 +107,12 @@ DEF_HELPER_2(stfle, i32, env, i64)
> >  DEF_HELPER_FLAGS_2(lpq, TCG_CALL_NO_WG, i64, env, i64)
> >  DEF_HELPER_FLAGS_4(stpq, TCG_CALL_NO_WG, void, env, i64, i64, i64)
> >  DEF_HELPER_4(mvcos, i32, env, i64, i64, i64)
> > +DEF_HELPER_4(cu12, i32, env, i32, i32, i32)
> > +DEF_HELPER_4(cu14, i32, env, i32, i32, i32)
> > +DEF_HELPER_4(cu21, i32, env, i32, i32, i32)
> > +DEF_HELPER_4(cu24, i32, env, i32, i32, i32)
> > +DEF_HELPER_4(cu41, i32, env, i32, i32, i32)
> > +DEF_HELPER_4(cu42, i32, env, i32, i32, i32)
> >  
> >  #ifndef CONFIG_USER_ONLY
> >  DEF_HELPER_3(servc, i32, env, i64, i64)
> > diff --git a/target/s390x/mem_helper.c b/target/s390x/mem_helper.c
> > index 513b402..0b18560 100644
> > --- a/target/s390x/mem_helper.c
> > +++ b/target/s390x/mem_helper.c
> [...]
> > +static inline uint32_t convert_unicode(CPUS390XState *env, uint32_t r1,
> > +   uint32_t r2, uint32_t m3, uintptr_t 
> > ra,
> > +   decode_unicode_fn decode,
> > +   encode_unicode_fn encode)
> > +{
> > +uint64_t dst = get_address(env, r1);
> > +uint64_t dlen = get_length(env, r1 + 1);
> > +uint64_t src = get_address(env, r2);
> > +uint64_t slen = get_length(env, r2 + 1);
> > +bool enh_check = m3 & 1;
> > +int cc, i;
> 
> According to the PoP:
> 
> "The R1 and R2 fields [...] must designate an even-
>  numbered register; otherwise, a specification excep-
>  tion is recognized."
> 
> I think you should add such a check for even-numbered registers here.

Actually it should not be done here, but at translation time in
translate.c.

There are a few places where the register number is checked to be even
and later loaded into a temp. I guess that can be replaced by generators
instead?

-- 
Aurelien Jarno  GPG: 4096R/1DDD8C9B
aurel...@aurel32.net http://www.aurel32.net



Re: [Qemu-devel] [PATCH v3 2/8] target/s390x: Implement CONVERT UNICODE insns

2017-07-14 Thread Aurelien Jarno
On 2017-07-10 10:45, Richard Henderson wrote:
> Signed-off-by: Richard Henderson 
> ---
>  target/s390x/helper.h  |   6 +
>  target/s390x/mem_helper.c  | 310 
> +
>  target/s390x/translate.c   |  43 +++
>  target/s390x/insn-data.def |  13 ++
>  4 files changed, 372 insertions(+)
> 

Besides the check for even r1 and r3, this now looks good.

Reviewed-by: Aurelien Jarno 

-- 
Aurelien Jarno  GPG: 4096R/1DDD8C9B
aurel...@aurel32.net http://www.aurel32.net



Re: [Qemu-devel] [PATCH v3 1/8] target/s390x: Implement CSST

2017-07-14 Thread Aurelien Jarno
On 2017-07-10 10:45, Richard Henderson wrote:
> Signed-off-by: Richard Henderson 
> ---
>  target/s390x/helper.h  |   1 +
>  target/s390x/cpu_models.c  |   2 +
>  target/s390x/mem_helper.c  | 189 
> +
>  target/s390x/translate.c   |  13 +++-
>  target/s390x/insn-data.def |   2 +
>  5 files changed, 206 insertions(+), 1 deletion(-)
> 
> diff --git a/target/s390x/mem_helper.c b/target/s390x/mem_helper.c
> index ede8471..513b402 100644
> --- a/target/s390x/mem_helper.c
> +++ b/target/s390x/mem_helper.c
> @@ -1353,6 +1353,195 @@ void HELPER(cdsg)(CPUS390XState *env, uint64_t addr,
>  env->regs[r1 + 1] = int128_getlo(oldv);
>  }
>  
> +uint32_t HELPER(csst)(CPUS390XState *env, uint32_t r3, uint64_t a1, uint64_t 
> a2)
> +{
> +#if !defined(CONFIG_USER_ONLY) || defined(CONFIG_ATOMIC128)
> +uint32_t mem_idx = cpu_mmu_index(env, false);
> +#endif
> +uintptr_t ra = GETPC();
> +uint32_t fc = extract32(env->regs[0], 0, 8);
> +uint32_t sc = extract32(env->regs[0], 8, 8);
> +uint64_t pl = get_address(env, 1) & -16;
> +uint64_t svh, svl;
> +uint32_t cc;
> +
> +/* Sanity check the function code and storage characteristic.  */
> +if (fc > 1 || sc > 3) {
> +if (!s390_has_feat(S390_FEAT_COMPARE_AND_SWAP_AND_STORE_2)) {
> +goto spec_exception;
> +}
> +if (fc > 2 || sc > 4 || (fc == 2 && (r3 & 1))) {
> +goto spec_exception;
> +}
> +}
> +
> +/* Sanity check the alignments.  */
> +if (extract32(a1, 0, 4 << fc) || extract32(a2, 0, 1 << sc)) {
> +goto spec_exception;
> +}
> +
> +/* Sanity check writability of the store address.  */
> +#ifndef CONFIG_USER_ONLY
> +probe_write(env, a2, mem_idx, ra);
> +#endif
> +
> +/* Note that the compare-and-swap is atomic, and the store is atomic, but
> +   the complete operation is not.  Therefore we do not need to assert 
> serial
> +   context in order to implement this.  That said, restart early if we 
> can't
> +   support either operation that is supposed to be atomic.  */
> +if (parallel_cpus) {
> +int mask = 0;
> +#if !defined(CONFIG_ATOMIC64)
> +mask = -8;
> +#elif !defined(CONFIG_ATOMIC128)
> +mask = -16;
> +#endif
> +if (((4 << fc) | (1 << sc)) & mask) {
> +cpu_loop_exit_atomic(ENV_GET_CPU(env), ra);
> +}
> +}

This doesn't look correct. For a 16-byte store, ie sc = 4, and with
ATOMIC128 support, ie mask = -16, the condition is true.

> +/* All loads happen before all stores.  For simplicity, load the entire
> +   store value area from the parameter list.  */
> +svh = cpu_ldq_data_ra(env, pl + 16, ra);
> +svl = cpu_ldq_data_ra(env, pl + 24, ra);
> +
> +switch (fc) {
> +case 0:
> +{
> +uint32_t nv = cpu_ldl_data_ra(env, pl, ra);
> +uint32_t cv = env->regs[r3];
> +uint32_t ov;
> +
> +if (parallel_cpus) {
> +#ifdef CONFIG_USER_ONLY
> +uint32_t *haddr = g2h(a1);
> +ov = atomic_cmpxchg__nocheck(haddr, cv, nv);
> +#else
> +TCGMemOpIdx oi = make_memop_idx(MO_TEUL | MO_ALIGN, mem_idx);
> +ov = helper_atomic_cmpxchgl_be_mmu(env, a1, cv, nv, oi, ra);
> +#endif

Not a problem with the patch itself, but how complicated would it be to
make helper_atomic_cmpxchgl_be_mmu (just like the o version) available
also in user mode? That would make less #ifdef in the backends.


The remaining of the patch looks all good to me.

-- 
Aurelien Jarno  GPG: 4096R/1DDD8C9B
aurel...@aurel32.net http://www.aurel32.net



Re: [Qemu-devel] [PATCH v2 4/8] target/s390x: Implement SRSTU

2017-07-07 Thread Aurelien Jarno
On 2017-07-01 13:25, Richard Henderson wrote:
> Signed-off-by: Richard Henderson 
> ---
>  target/s390x/helper.h  |  1 +
>  target/s390x/mem_helper.c  | 41 +
>  target/s390x/translate.c   | 13 +
>  target/s390x/insn-data.def |  2 ++
>  4 files changed, 57 insertions(+)

Reviewed-by: Aurelien Jarno 

-- 
Aurelien Jarno  GPG: 4096R/1DDD8C9B
aurel...@aurel32.net http://www.aurel32.net



Re: [Qemu-devel] [PATCH v2 3/8] target/s390x: Tidy SRST

2017-07-07 Thread Aurelien Jarno
On 2017-07-01 13:25, Richard Henderson wrote:
> Since we require all registers saved on input, read R0 from ENV instead
> of passing it manually.  Recognize the specification exception when R0
> contains incorrect data.  Keep high bits of result registers unmodified
> when in 31 or 24-bit mode.
> 
> Signed-off-by: Richard Henderson 
> ---
>  target/s390x/helper.h  |  2 +-
>  target/s390x/mem_helper.c  | 25 ++---
>  target/s390x/translate.c   |  9 +++--
>  target/s390x/insn-data.def |  2 +-
>  4 files changed, 23 insertions(+), 15 deletions(-)

Reviewed-by: Aurelien Jarno 

-- 
Aurelien Jarno  GPG: 4096R/1DDD8C9B
aurel...@aurel32.net http://www.aurel32.net



Re: [Qemu-devel] [PATCH v2 26/27] target/sh4: Implement fsrra

2017-07-07 Thread Aurelien Jarno
On 2017-07-06 16:21, Richard Henderson wrote:
> Signed-off-by: Richard Henderson 
> ---
>  target/sh4/helper.h|  1 +
>  target/sh4/op_helper.c | 16 
>  target/sh4/translate.c |  2 ++
>  3 files changed, 19 insertions(+)
>
> diff --git a/target/sh4/helper.h b/target/sh4/helper.h
> index 6c6fa04..ea92dc0 100644
> --- a/target/sh4/helper.h
> +++ b/target/sh4/helper.h
> @@ -37,6 +37,7 @@ DEF_HELPER_FLAGS_3(fsub_FT, TCG_CALL_NO_WG, f32, env, f32, 
> f32)
>  DEF_HELPER_FLAGS_3(fsub_DT, TCG_CALL_NO_WG, f64, env, f64, f64)
>  DEF_HELPER_FLAGS_2(fsqrt_FT, TCG_CALL_NO_WG, f32, env, f32)
>  DEF_HELPER_FLAGS_2(fsqrt_DT, TCG_CALL_NO_WG, f64, env, f64)
> +DEF_HELPER_FLAGS_2(fsrra_FT, TCG_CALL_NO_WG, i32, env, i32)

That should be f32 instead of i32

>  DEF_HELPER_FLAGS_2(ftrc_FT, TCG_CALL_NO_WG, i32, env, f32)
>  DEF_HELPER_FLAGS_2(ftrc_DT, TCG_CALL_NO_WG, i32, env, f64)
>  DEF_HELPER_3(fipr, void, env, i32, i32)
> diff --git a/target/sh4/op_helper.c b/target/sh4/op_helper.c
> index 8513f38..d798f23 100644
> --- a/target/sh4/op_helper.c
> +++ b/target/sh4/op_helper.c
> @@ -406,6 +406,22 @@ float64 helper_fsqrt_DT(CPUSH4State *env, float64 t0)
>  return t0;
>  }
>  
> +float32 helper_fsrra_FT(CPUSH4State *env, float32 t0)
> +{
> +set_float_exception_flags(0, &env->fp_status);
> +/* "Approximate" 1/sqrt(x) via actual computation.  */
> +t0 = float32_sqrt(t0, &env->fp_status);
> +t0 = float32_div(float32_one, t0, &env->fp_status);
> +/* Since this is supposed to be an approximation, an imprecision
> +   exception is required.  One supposes this also follows the usual
> +   IEEE rule that other exceptions take precidence.  */
> +if (get_float_exception_flags(&env->fp_status) == 0) {
> +set_float_exception_flags(float_flag_inexact, &env->fp_status);
> +}
> +update_fpscr(env, GETPC());
> +return t0;
> +}
> +
>  float32 helper_fsub_FT(CPUSH4State *env, float32 t0, float32 t1)
>  {
>  set_float_exception_flags(0, &env->fp_status);
> diff --git a/target/sh4/translate.c b/target/sh4/translate.c
> index 2b62e39..5fae872 100644
> --- a/target/sh4/translate.c
> +++ b/target/sh4/translate.c
> @@ -1753,6 +1753,8 @@ static void _decode_opc(DisasContext * ctx)
>   return;
>  case 0xf07d: /* fsrra FRn */
>   CHECK_FPU_ENABLED
> +    CHECK_FPSCR_PR_0
> +gen_helper_fsrra_FT(FREG(B11_8), cpu_env, FREG(B11_8));
>   break;
>  case 0xf08d: /* fldi0 FRn - FPSCR: R[PR] */
>   CHECK_FPU_ENABLED

Otherwise it looks fine.

-- 
Aurelien Jarno  GPG: 4096R/1DDD8C9B
aurel...@aurel32.net http://www.aurel32.net



Re: [Qemu-devel] [PATCH v2 25/27] target/sh4: Add missing FPSCR.PR == 0 checks

2017-07-07 Thread Aurelien Jarno
On 2017-07-06 16:21, Richard Henderson wrote:
> Both frchg and fschg require PR == 0, otherwise undefined_operation.
> 
> Signed-off-by: Richard Henderson 
> ---
>  target/sh4/translate.c | 2 ++
>  1 file changed, 2 insertions(+)

Reviewed-by: Aurelien Jarno 

-- 
Aurelien Jarno  GPG: 4096R/1DDD8C9B
aurel...@aurel32.net http://www.aurel32.net



Re: [Qemu-devel] [PATCH v2 24/27] target/sh4: Implement fpchg

2017-07-07 Thread Aurelien Jarno
On 2017-07-06 16:21, Richard Henderson wrote:
> Signed-off-by: Richard Henderson 
> ---
>  target/sh4/translate.c | 5 +
>  1 file changed, 5 insertions(+)

Reviewed-by: Aurelien Jarno 

-- 
Aurelien Jarno  GPG: 4096R/1DDD8C9B
aurel...@aurel32.net http://www.aurel32.net



Re: [Qemu-devel] [PATCH v2 23/27] target/sh4: Introduce CHECK_SH4A

2017-07-07 Thread Aurelien Jarno
On 2017-07-06 16:21, Richard Henderson wrote:
> Signed-off-by: Richard Henderson 
> ---
>  target/sh4/translate.c | 64 
> +++---
>  1 file changed, 29 insertions(+), 35 deletions(-)

Reviewed-by: Aurelien Jarno 

-- 
Aurelien Jarno  GPG: 4096R/1DDD8C9B
aurel...@aurel32.net http://www.aurel32.net



Re: [Qemu-devel] [PATCH v2 22/27] target/sh4: Introduce CHECK_FPSCR_PR_*

2017-07-07 Thread Aurelien Jarno
On 2017-07-06 16:21, Richard Henderson wrote:
> Signed-off-by: Richard Henderson 
> ---
>  target/sh4/translate.c | 57 
> +++---
>  1 file changed, 31 insertions(+), 26 deletions(-)

Reviewed-by: Aurelien Jarno 

-- 
Aurelien Jarno  GPG: 4096R/1DDD8C9B
aurel...@aurel32.net http://www.aurel32.net



Re: [Qemu-devel] [PATCH v2 20/27] target/sh4: Unify code for CHECK_FPU_ENABLED

2017-07-07 Thread Aurelien Jarno
On 2017-07-06 16:21, Richard Henderson wrote:
> We do not need to emit N copies of raising an exception.
> 
> Signed-off-by: Richard Henderson 
> ---
>  target/sh4/translate.c | 24 ++--
>  1 file changed, 14 insertions(+), 10 deletions(-)

Reviewed-by: Aurelien Jarno 

-- 
Aurelien Jarno  GPG: 4096R/1DDD8C9B
aurel...@aurel32.net http://www.aurel32.net



Re: [Qemu-devel] [PATCH v2 21/27] target/sh4: Tidy misc illegal insn checks

2017-07-07 Thread Aurelien Jarno
On 2017-07-06 16:21, Richard Henderson wrote:
> Now that we have a do_illegal label, use goto in order
> to self-document the forcing of the exception.
> 
> Signed-off-by: Richard Henderson 
> ---
>  target/sh4/translate.c | 22 +-
>  1 file changed, 13 insertions(+), 9 deletions(-)

Reviewed-by: Aurelien Jarno 

-- 
Aurelien Jarno  GPG: 4096R/1DDD8C9B
aurel...@aurel32.net http://www.aurel32.net



Re: [Qemu-devel] [PATCH v2 19/27] target/sh4: Unify code for CHECK_PRIVILEGED

2017-07-07 Thread Aurelien Jarno
On 2017-07-06 16:21, Richard Henderson wrote:
> We do not need to emit N copies of raising an exception.
> 
> Signed-off-by: Richard Henderson 
> ---
>  target/sh4/translate.c | 14 --
>  1 file changed, 4 insertions(+), 10 deletions(-)

Reviewed-by: Aurelien Jarno 

-- 
Aurelien Jarno  GPG: 4096R/1DDD8C9B
aurel...@aurel32.net http://www.aurel32.net



Re: [Qemu-devel] [PATCH v2 18/27] target/sh4: Unify code for CHECK_NOT_DELAY_SLOT

2017-07-07 Thread Aurelien Jarno
On 2017-07-06 16:21, Richard Henderson wrote:
> We do not need to emit N copies of raising an exception.
> 
> Signed-off-by: Richard Henderson 
> ---
>  target/sh4/translate.c | 11 +--
>  1 file changed, 5 insertions(+), 6 deletions(-)
> 

Reviewed-by: Aurelien Jarno 

-- 
Aurelien Jarno  GPG: 4096R/1DDD8C9B
aurel...@aurel32.net http://www.aurel32.net



Re: [Qemu-devel] [PATCH v2 17/27] target/sh4: Simplify 64-bit fp reg-reg move

2017-07-07 Thread Aurelien Jarno
On 2017-07-06 16:21, Richard Henderson wrote:
> We do not need to form full 64-bit quantities in order to perform
> the move.  This reduces code expansion on 64-bit hosts.
> 
> Signed-off-by: Richard Henderson 
> ---
>  target/sh4/translate.c | 8 
>  1 file changed, 4 insertions(+), 4 deletions(-)
> 

Reviewed-by: Aurelien Jarno 

-- 
Aurelien Jarno  GPG: 4096R/1DDD8C9B
aurel...@aurel32.net http://www.aurel32.net



Re: [Qemu-devel] [PATCH v2 16/27] target/sh4: Load/store Dr as 64-bit quantities

2017-07-07 Thread Aurelien Jarno
On 2017-07-06 16:21, Richard Henderson wrote:
> This enforces proper alignment and makes the register update
> more natural.  Note that there is a more serious bug fix for
> fmov {DX}Rn,@(R0,Rn) to use a store instead of a load.
> 
> Signed-off-by: Richard Henderson 
> ---
>  target/sh4/translate.c | 74 
> --
>  1 file changed, 35 insertions(+), 39 deletions(-)
> 
> diff --git a/target/sh4/translate.c b/target/sh4/translate.c
> index 616e615..fcdabe8 100644
> --- a/target/sh4/translate.c
> +++ b/target/sh4/translate.c
> @@ -1044,18 +1038,20 @@ static void _decode_opc(DisasContext * ctx)
>   return;
>  case 0xf00b: /* fmov {F,D,X}Rm,@-Rn - FPSCR: Nothing */
>   CHECK_FPU_ENABLED
> -TCGv addr = tcg_temp_new_i32();
> -tcg_gen_subi_i32(addr, REG(B11_8), 4);
> -if (ctx->tbflags & FPSCR_SZ) {
> - int fr = XHACK(B7_4);
> -tcg_gen_qemu_st_i32(FREG(fr + 1), addr, ctx->memidx, MO_TEUL);
> - tcg_gen_subi_i32(addr, addr, 4);
> -tcg_gen_qemu_st_i32(FREG(fr), addr, ctx->memidx, MO_TEUL);
> - } else {
> -tcg_gen_qemu_st_i32(FREG(B7_4), addr, ctx->memidx, MO_TEUL);
> - }
> -tcg_gen_mov_i32(REG(B11_8), addr);
> -tcg_temp_free(addr);
> +{
> +TCGv addr = tcg_temp_new_i32();
> +if (ctx->tbflags & FPSCR_SZ) {
> +TCGv_i64 fp = tcg_temp_new_i64();
> +gen_load_fpr64(ctx, fp, XHACK(B7_4));
> +tcg_gen_qemu_st_i64(fp, addr, ctx->memidx, MO_TEQ);

addr is used without before being written. The following line is mising
before the load:

tcg_gen_subi_i32(addr, REG(B11_8), 8);

> +tcg_temp_free_i64(fp);
> +} else {
> +tcg_gen_subi_i32(addr, REG(B11_8), 4);
> +tcg_gen_qemu_st_i32(FREG(B7_4), addr, ctx->memidx, MO_TEUL);
> +}
> +tcg_gen_mov_i32(REG(B11_8), addr);
> +    tcg_temp_free(addr);
> +}
>   return;
>  case 0xf006: /* fmov @(R0,Rm),{F,D,X}Rm - FPSCR: Nothing */
>   CHECK_FPU_ENABLED

-- 
Aurelien Jarno  GPG: 4096R/1DDD8C9B
aurel...@aurel32.net http://www.aurel32.net



Re: [Qemu-devel] [PATCH v2 15/27] target/sh4: Merge DREG into fpr64 routines

2017-07-07 Thread Aurelien Jarno
On 2017-07-06 16:20, Richard Henderson wrote:
> Also add a debugging assert that we did signal illegal opc
> for odd double-precision registers.
> 
> Signed-off-by: Richard Henderson 
> ---
>  target/sh4/translate.c | 26 +++---
>  1 file changed, 15 insertions(+), 11 deletions(-)

Reviewed-by: Aurelien Jarno 

-- 
Aurelien Jarno  GPG: 4096R/1DDD8C9B
aurel...@aurel32.net http://www.aurel32.net



Re: [Qemu-devel] [PATCH v2 14/27] target/sh4: Eliminate unused XREG macro

2017-07-07 Thread Aurelien Jarno
On 2017-07-06 16:20, Richard Henderson wrote:
> Signed-off-by: Richard Henderson 
> ---
>  target/sh4/translate.c | 1 -
>  1 file changed, 1 deletion(-)

Reviewed-by: Aurelien Jarno 

-- 
Aurelien Jarno  GPG: 4096R/1DDD8C9B
aurel...@aurel32.net http://www.aurel32.net



Re: [Qemu-devel] [PATCH v2 13/27] target/sh4: Hoist fp register bank selection

2017-07-07 Thread Aurelien Jarno
On 2017-07-06 16:20, Richard Henderson wrote:
> Compute which register bank to use once at the start of translation.
> 
> Signed-off-by: Richard Henderson 
> ---
>  target/sh4/translate.c | 8 +---
>  1 file changed, 5 insertions(+), 3 deletions(-)

Reviewed-by: Aurelien Jarno 

-- 
Aurelien Jarno  GPG: 4096R/1DDD8C9B
aurel...@aurel32.net http://www.aurel32.net



Re: [Qemu-devel] [PATCH v2 12/27] target/sh4: Pass DisasContext to fpr64 routines

2017-07-07 Thread Aurelien Jarno
On 2017-07-06 16:20, Richard Henderson wrote:
> Signed-off-by: Richard Henderson 
> ---
>  target/sh4/translate.c | 26 +-
>  1 file changed, 13 insertions(+), 13 deletions(-)

Reviewed-by: Aurelien Jarno 

-- 
Aurelien Jarno  GPG: 4096R/1DDD8C9B
aurel...@aurel32.net http://www.aurel32.net



Re: [Qemu-devel] [PATCH v2 11/27] target/sh4: Unify cpu_fregs into FREG

2017-07-07 Thread Aurelien Jarno
On 2017-07-06 16:20, Richard Henderson wrote:
> We were treating FREG as an index and REG as a TCGv.
> Making FREG return a TCGv is both less confusing and
> a step toward cleaner banking of cpu_fregs.
> 
> Signed-off-by: Richard Henderson 
> ---
>  target/sh4/translate.c | 125 
> -
>  1 file changed, 52 insertions(+), 73 deletions(-)

Reviewed-by: Aurelien Jarno 

-- 
Aurelien Jarno  GPG: 4096R/1DDD8C9B
aurel...@aurel32.net http://www.aurel32.net



Re: [Qemu-devel] [PATCH v2 10/27] target/sh4: Hoist register bank selection

2017-07-07 Thread Aurelien Jarno
On 2017-07-06 16:20, Richard Henderson wrote:
> Compute which register bank to use once at the start of translation.
> 
> Signed-off-by: Richard Henderson 
> ---
>  target/sh4/translate.c | 21 +++--
>  1 file changed, 11 insertions(+), 10 deletions(-)

Reviewed-by: Aurelien Jarno 

-- 
Aurelien Jarno  GPG: 4096R/1DDD8C9B
aurel...@aurel32.net http://www.aurel32.net



Re: [Qemu-devel] [PATCH v2 04/27] target/sh4: Keep env->flags clean

2017-07-07 Thread Aurelien Jarno
On 2017-07-06 16:20, Richard Henderson wrote:
> If we mask off any out-of-band bits before we assign to the
> variable, then we don't need to clean it up when reading.
> 
> Signed-off-by: Richard Henderson 
> ---
>  target/sh4/cpu.h | 2 +-
>  target/sh4/cpu.c | 2 +-
>  2 files changed, 2 insertions(+), 2 deletions(-)

Reviewed-by: Aurelien Jarno 

-- 
Aurelien Jarno  GPG: 4096R/1DDD8C9B
aurel...@aurel32.net http://www.aurel32.net



Re: [Qemu-devel] [PATCH v2 03/27] target/sh4: Introduce TB_FLAG_ENVFLAGS_MASK

2017-07-07 Thread Aurelien Jarno
On 2017-07-06 16:20, Richard Henderson wrote:
> We'll be putting more things into this bitmask soon.
> Let's have a name that covers all possible uses.
> 
> Signed-off-by: Richard Henderson 
> ---
>  target/sh4/cpu.h   | 4 +++-
>  target/sh4/translate.c | 4 ++--
>  2 files changed, 5 insertions(+), 3 deletions(-)

Reviewed-by: Aurelien Jarno 

-- 
Aurelien Jarno  GPG: 4096R/1DDD8C9B
aurel...@aurel32.net http://www.aurel32.net



Re: [Qemu-devel] [PATCH v2 05/27] target/sh4: Adjust TB_FLAG_PENDING_MOVCA

2017-07-07 Thread Aurelien Jarno
On 2017-07-06 16:20, Richard Henderson wrote:
> Don't leave an unused bit after DELAY_SLOT_MASK.
> 
> Signed-off-by: Richard Henderson 
> ---
>  target/sh4/cpu.h | 6 +++---
>  1 file changed, 3 insertions(+), 3 deletions(-)

Reviewed-by: Aurelien Jarno 

-- 
Aurelien Jarno  GPG: 4096R/1DDD8C9B
aurel...@aurel32.net http://www.aurel32.net



Re: [Qemu-devel] [PATCH v2 02/27] target/sh4: Consolidate end-of-TB tests

2017-07-07 Thread Aurelien Jarno
On 2017-07-06 16:20, Richard Henderson wrote:
> We can fold 3 different tests within the decode loop
> into a more accurate computation of max_insns to start.
> 
> Signed-off-by: Richard Henderson 
> ---
>  target/sh4/translate.c | 29 +
>  1 file changed, 17 insertions(+), 12 deletions(-)
> 
> diff --git a/target/sh4/translate.c b/target/sh4/translate.c
> index 82d4d69..663b5c0 100644
> --- a/target/sh4/translate.c
> +++ b/target/sh4/translate.c
> @@ -1848,7 +1848,6 @@ void gen_intermediate_code(CPUSH4State * env, struct 
> TranslationBlock *tb)
>  ctx.features = env->features;
>  ctx.has_movcal = (ctx.tbflags & TB_FLAG_PENDING_MOVCA);
>  
> -num_insns = 0;
>  max_insns = tb->cflags & CF_COUNT_MASK;
>  if (max_insns == 0) {
>  max_insns = CF_COUNT_MASK;
> @@ -1856,9 +1855,23 @@ void gen_intermediate_code(CPUSH4State * env, struct 
> TranslationBlock *tb)
>  if (max_insns > TCG_MAX_INSNS) {
>  max_insns = TCG_MAX_INSNS;
>  }
> +/* Since the ISA is fixed-width, we can bound by the number
> +   of instructions remaining on the page.  */
> +num_insns = (TARGET_PAGE_SIZE - (ctx.pc & (TARGET_PAGE_SIZE - 1))) / 2;

This could be written as num_insn = -(ctx.pc | TARGET_PAGE_MASK) / 2;

> +if (max_insns > num_insns) {
> +max_insns = num_insns;
> +}

You are following the existing pattern, so I can really blame you, but
maybe it's the moment to change the existing code into something like:

max_insns = MIN(max_insn, ...)


> +/* Single stepping means just that.  */
> +if (ctx.singlestep_enabled || singlestep) {
> +max_insns = 1;
> +}
>  
>  gen_tb_start(tb);
> -while (ctx.bstate == BS_NONE && !tcg_op_buf_full()) {
> +num_insns = 0;
> +
> +while (ctx.bstate == BS_NONE
> +   && num_insns < max_insns
> +   && !tcg_op_buf_full()) {
>  tcg_gen_insn_start(ctx.pc, ctx.envflags);
>  num_insns++;
>  
> @@ -1882,18 +1895,10 @@ void gen_intermediate_code(CPUSH4State * env, struct 
> TranslationBlock *tb)
>  ctx.opcode = cpu_lduw_code(env, ctx.pc);
>   decode_opc(&ctx);
>   ctx.pc += 2;
> - if ((ctx.pc & (TARGET_PAGE_SIZE - 1)) == 0)
> - break;
> -if (cs->singlestep_enabled) {
> - break;
> -}
> -if (num_insns >= max_insns)
> -break;
> -if (singlestep)
> -break;
>  }
> -if (tb->cflags & CF_LAST_IO)
> +    if (tb->cflags & CF_LAST_IO) {
>  gen_io_end();
> +}
>  if (cs->singlestep_enabled) {
>  gen_save_cpu_state(&ctx, true);
>  gen_helper_debug(cpu_env);

Besides the minor nitpicks above:

Reviewed-by: Aurelien Jarno 

-- 
Aurelien Jarno  GPG: 4096R/1DDD8C9B
aurel...@aurel32.net http://www.aurel32.net



Re: [Qemu-devel] [PATCH 03/11] target/sh4: Handle user-space atomics

2017-07-06 Thread Aurelien Jarno
On 2017-07-05 14:23, Richard Henderson wrote:
> For uniprocessors, SH4 uses optimistic restartable atomic sequences.
> Upon an interrupt, a real kernel would simply notice magic values in
> the registers and reset the PC to the start of the sequence.
> 
> For QEMU, we cannot do this in quite the same way.  Instead, we notice
> the normal start of such a sequence (mov #-x,r15), and start a new TB
> that can be executed under cpu_exec_step_atomic.
> 
> Reported-by: Bruno Haible  
> LP: https://bugs.launchpad.net/bugs/1701971
> Signed-off-by: Richard Henderson 
> ---
>  target/sh4/cpu.h   |  21 ++--
>  target/sh4/helper.h|   1 +
>  target/sh4/op_helper.c |   6 +++
>  target/sh4/translate.c | 137 
> +++--
>  4 files changed, 147 insertions(+), 18 deletions(-)

I haven't reviewed this patch in details, but note that it breaks
booting a system under qemu-system.

-- 
Aurelien Jarno  GPG: 4096R/1DDD8C9B
aurel...@aurel32.net http://www.aurel32.net



Re: [Qemu-devel] [PATCH 02/11] target/sh4: Consolidate end-of-TB tests

2017-07-06 Thread Aurelien Jarno
On 2017-07-05 14:23, Richard Henderson wrote:
> We can fold 3 different tests within the decode loop
> into a more accurate computation of max_insns to start.
> 
> Signed-off-by: Richard Henderson 
> ---
>  target/sh4/translate.c | 29 +
>  1 file changed, 17 insertions(+), 12 deletions(-)
> 
> diff --git a/target/sh4/translate.c b/target/sh4/translate.c
> index 6b247fa..e1661e9 100644
> --- a/target/sh4/translate.c
> +++ b/target/sh4/translate.c
> @@ -1856,7 +1856,6 @@ void gen_intermediate_code(CPUSH4State * env, struct 
> TranslationBlock *tb)
>  ctx.features = env->features;
>  ctx.has_movcal = (ctx.tbflags & TB_FLAG_PENDING_MOVCA);
>  
> -num_insns = 0;
>  max_insns = tb->cflags & CF_COUNT_MASK;
>  if (max_insns == 0) {
>  max_insns = CF_COUNT_MASK;
> @@ -1864,9 +1863,23 @@ void gen_intermediate_code(CPUSH4State * env, struct 
> TranslationBlock *tb)
>  if (max_insns > TCG_MAX_INSNS) {
>  max_insns = TCG_MAX_INSNS;
>  }
> +/* Since the ISA is fixed-width, we can bound by the number
> +   of instructions remaining on the page.  */
> +num_insns = (TARGET_PAGE_SIZE - (ctx.pc & (TARGET_PAGE_SIZE - 1))) / 2;

This could be written as num_insn = -(ctx.pc | TARGET_PAGE_SIZE) / 2;

> +if (max_insns > num_insns) {
> +max_insns = num_insns;
> +}

You are following the existing pattern, so I can really blame you, but
maybe it's the moment to change the existing code into something like:

max_insns = MIN(max_insn, ...)

> +/* Single stepping means just that.  */
> +if (ctx.singlestep_enabled || singlestep) {
> +max_insns = 1;
> +}
>  
>  gen_tb_start(tb);
> -while (ctx.bstate == BS_NONE && !tcg_op_buf_full()) {
> +num_insns = 0;
> +
> +while (ctx.bstate == BS_NONE
> +   && num_insns < max_insns
> +   && !tcg_op_buf_full()) {
>  tcg_gen_insn_start(ctx.pc, ctx.envflags);
>  num_insns++;
>  
> @@ -1890,18 +1903,10 @@ void gen_intermediate_code(CPUSH4State * env, struct 
> TranslationBlock *tb)
>  ctx.opcode = cpu_lduw_code(env, ctx.pc);
>   decode_opc(&ctx);
>   ctx.pc += 2;
> - if ((ctx.pc & (TARGET_PAGE_SIZE - 1)) == 0)
> - break;
> -if (cs->singlestep_enabled) {
> - break;
> -}
> -if (num_insns >= max_insns)
> -break;
> -if (singlestep)
> -break;
>  }
> -if (tb->cflags & CF_LAST_IO)
> +    if (tb->cflags & CF_LAST_IO) {
>  gen_io_end();
> +}
>  if (cs->singlestep_enabled) {
>  gen_save_cpu_state(&ctx, true);
>  gen_helper_debug(cpu_env);

Besides the minor nitpicks above:

Reviewed-by: Aurelien Jarno 

-- 
Aurelien Jarno  GPG: 4096R/1DDD8C9B
aurel...@aurel32.net http://www.aurel32.net



Re: [Qemu-devel] [PATCH 00/11] target/sh4 improvments

2017-07-06 Thread Aurelien Jarno
On 2017-07-05 14:23, Richard Henderson wrote:
> This fixes two problems with atomic operations on sh4,
> including an attempt at supporting the user-space atomics
> technique used by most sh-linux-user binaries.
> 
> This is good enough to run the one occurrence in linux-user-test-0.3.
> I'm still downloading enough of a cross environment to be able to run
> more recent sh4 binaries.  Including the one in the LP bug report.
> 
> Thoughts and more extensive testing appreciated.

Thanks for you work. For the next version, could you please base it on
this patchset:

http://lists.nongnu.org/archive/html/qemu-devel/2017-07/msg00095.html

I'll try to review it over the next days, but unfortunately I'll have
little time before Monday.

-- 
Aurelien Jarno  GPG: 4096R/1DDD8C9B
aurel...@aurel32.net http://www.aurel32.net



Re: [Qemu-devel] trigonometric functions in softfloat

2017-07-05 Thread Aurelien Jarno
Hi,

On 2017-07-05 10:25, Laurent Vivier wrote:
> Hi,
> 
> Thomas has pointed out that WinUAE[1] has an updated softfloat library
> implementing missing operations for 680x0.
> 
> Do you think these changes can be merged in QEMU?

From the licensing point of view, they seems to use the SoftFloat-2a
license, so that should be fine.

Personally I am fine adding general trigonometric functions to
softfloat, that might help other targets like x86 which uses  the math
functions from libm and thus have different behaviour/precision
depending on the host.

The question is more how m68k specific are those trigonometric
functions. If we can have a way to implement them as generic
trigonometric functions reusable by other targets, I am all for it. If
not that code would probably be better in target/m68k.

Aurelien

-- 
Aurelien Jarno  GPG: 4096R/1DDD8C9B
aurel...@aurel32.net http://www.aurel32.net



[Qemu-devel] [PATCH v2 1/5] target/sh4: do not check for PR bit for fabs instruction

2017-07-02 Thread Aurelien Jarno
The SH4 manual is not fully clear about that, but real hardware do not
check for the PR bit, which allows to select between single or double
precision, for the fabs instruction. This is probably what is meant by
"Same operation is performed regardless of precision."

Remove the check, and at the same time use a TCG instruction instead of
a helper to clear one bit.

LP: https://bugs.launchpad.net/qemu/+bug/1701821
Reported-by: Bruno Haible 
Signed-off-by: Aurelien Jarno 
---
 target/sh4/helper.h|  2 --
 target/sh4/op_helper.c | 10 --
 target/sh4/translate.c | 15 +++
 3 files changed, 3 insertions(+), 24 deletions(-)

diff --git a/target/sh4/helper.h b/target/sh4/helper.h
index dce859caea..f715224822 100644
--- a/target/sh4/helper.h
+++ b/target/sh4/helper.h
@@ -16,8 +16,6 @@ DEF_HELPER_3(macw, void, env, i32, i32)
 
 DEF_HELPER_2(ld_fpscr, void, env, i32)
 
-DEF_HELPER_FLAGS_1(fabs_FT, TCG_CALL_NO_RWG_SE, f32, f32)
-DEF_HELPER_FLAGS_1(fabs_DT, TCG_CALL_NO_RWG_SE, f64, f64)
 DEF_HELPER_FLAGS_3(fadd_FT, TCG_CALL_NO_WG, f32, env, f32, f32)
 DEF_HELPER_FLAGS_3(fadd_DT, TCG_CALL_NO_WG, f64, env, f64, f64)
 DEF_HELPER_FLAGS_2(fcnvsd_FT_DT, TCG_CALL_NO_WG, f64, env, f32)
diff --git a/target/sh4/op_helper.c b/target/sh4/op_helper.c
index 528a40ac1d..5e3a3ba68c 100644
--- a/target/sh4/op_helper.c
+++ b/target/sh4/op_helper.c
@@ -252,16 +252,6 @@ static void update_fpscr(CPUSH4State *env, uintptr_t 
retaddr)
 }
 }
 
-float32 helper_fabs_FT(float32 t0)
-{
-return float32_abs(t0);
-}
-
-float64 helper_fabs_DT(float64 t0)
-{
-return float64_abs(t0);
-}
-
 float32 helper_fadd_FT(CPUSH4State *env, float32 t0, float32 t1)
 {
 set_float_exception_flags(0, &env->fp_status);
diff --git a/target/sh4/translate.c b/target/sh4/translate.c
index 4f20537ef8..7c40945908 100644
--- a/target/sh4/translate.c
+++ b/target/sh4/translate.c
@@ -1695,19 +1695,10 @@ static void _decode_opc(DisasContext * ctx)
gen_helper_fneg_T(cpu_fregs[FREG(B11_8)], cpu_fregs[FREG(B11_8)]);
}
return;
-case 0xf05d: /* fabs FRn/DRn */
+case 0xf05d: /* fabs FRn/DRn - FPCSR: Nothing */
CHECK_FPU_ENABLED
-if (ctx->tbflags & FPSCR_PR) {
-   if (ctx->opcode & 0x0100)
-   break; /* illegal instruction */
-   TCGv_i64 fp = tcg_temp_new_i64();
-   gen_load_fpr64(fp, DREG(B11_8));
-   gen_helper_fabs_DT(fp, fp);
-   gen_store_fpr64(fp, DREG(B11_8));
-   tcg_temp_free_i64(fp);
-   } else {
-   gen_helper_fabs_FT(cpu_fregs[FREG(B11_8)], cpu_fregs[FREG(B11_8)]);
-   }
+tcg_gen_andi_i32(cpu_fregs[FREG(B11_8)], cpu_fregs[FREG(B11_8)],
+ 0x7fff);
return;
 case 0xf06d: /* fsqrt FRn */
CHECK_FPU_ENABLED
-- 
2.11.0




[Qemu-devel] [PATCH v2 2/5] target/sh4: fix FPU unorderered compare

2017-07-02 Thread Aurelien Jarno
In case of unordered compare, the fcmp instructions should either
trigger and invalid exception (if enabled) or set T=0. The existing code
left it unchanged.

LP: https://bugs.launchpad.net/qemu/+bug/1701821
Reported-by: Bruno Haible 
Signed-off-by: Aurelien Jarno 
---
 target/sh4/op_helper.c | 28 
 1 file changed, 8 insertions(+), 20 deletions(-)

diff --git a/target/sh4/op_helper.c b/target/sh4/op_helper.c
index 5e3a3ba68c..f228daf125 100644
--- a/target/sh4/op_helper.c
+++ b/target/sh4/op_helper.c
@@ -274,11 +274,8 @@ void helper_fcmp_eq_FT(CPUSH4State *env, float32 t0, 
float32 t1)
 
 set_float_exception_flags(0, &env->fp_status);
 relation = float32_compare(t0, t1, &env->fp_status);
-if (unlikely(relation == float_relation_unordered)) {
-update_fpscr(env, GETPC());
-} else {
-env->sr_t = (relation == float_relation_equal);
-}
+update_fpscr(env, GETPC());
+env->sr_t = (relation == float_relation_equal);
 }
 
 void helper_fcmp_eq_DT(CPUSH4State *env, float64 t0, float64 t1)
@@ -287,11 +284,8 @@ void helper_fcmp_eq_DT(CPUSH4State *env, float64 t0, 
float64 t1)
 
 set_float_exception_flags(0, &env->fp_status);
 relation = float64_compare(t0, t1, &env->fp_status);
-if (unlikely(relation == float_relation_unordered)) {
-update_fpscr(env, GETPC());
-} else {
-env->sr_t = (relation == float_relation_equal);
-}
+update_fpscr(env, GETPC());
+env->sr_t = (relation == float_relation_equal);
 }
 
 void helper_fcmp_gt_FT(CPUSH4State *env, float32 t0, float32 t1)
@@ -300,11 +294,8 @@ void helper_fcmp_gt_FT(CPUSH4State *env, float32 t0, 
float32 t1)
 
 set_float_exception_flags(0, &env->fp_status);
 relation = float32_compare(t0, t1, &env->fp_status);
-if (unlikely(relation == float_relation_unordered)) {
-update_fpscr(env, GETPC());
-} else {
-env->sr_t = (relation == float_relation_greater);
-}
+update_fpscr(env, GETPC());
+env->sr_t = (relation == float_relation_greater);
 }
 
 void helper_fcmp_gt_DT(CPUSH4State *env, float64 t0, float64 t1)
@@ -313,11 +304,8 @@ void helper_fcmp_gt_DT(CPUSH4State *env, float64 t0, 
float64 t1)
 
 set_float_exception_flags(0, &env->fp_status);
 relation = float64_compare(t0, t1, &env->fp_status);
-if (unlikely(relation == float_relation_unordered)) {
-update_fpscr(env, GETPC());
-} else {
-env->sr_t = (relation == float_relation_greater);
-}
+update_fpscr(env, GETPC());
+env->sr_t = (relation == float_relation_greater);
 }
 
 float64 helper_fcnvsd_FT_DT(CPUSH4State *env, float32 t0)
-- 
2.11.0




[Qemu-devel] [PATCH v2 5/5] target/sh4: return result of fcmp using TCG

2017-07-02 Thread Aurelien Jarno
Since that the T bit of the SR register is mapped using a TGC global,
it's better to return the value through TCG than writing it directly. It
allows to declare the helpers with the flag TCG_CALL_NO_WG.

Signed-off-by: Aurelien Jarno 
---
 target/sh4/helper.h|  8 
 target/sh4/op_helper.c | 16 
 target/sh4/translate.c | 10 ++
 3 files changed, 18 insertions(+), 16 deletions(-)

diff --git a/target/sh4/helper.h b/target/sh4/helper.h
index d2398922dd..767a6d5209 100644
--- a/target/sh4/helper.h
+++ b/target/sh4/helper.h
@@ -21,10 +21,10 @@ DEF_HELPER_FLAGS_3(fadd_DT, TCG_CALL_NO_WG, f64, env, f64, 
f64)
 DEF_HELPER_FLAGS_2(fcnvsd_FT_DT, TCG_CALL_NO_WG, f64, env, f32)
 DEF_HELPER_FLAGS_2(fcnvds_DT_FT, TCG_CALL_NO_WG, f32, env, f64)
 
-DEF_HELPER_3(fcmp_eq_FT, void, env, f32, f32)
-DEF_HELPER_3(fcmp_eq_DT, void, env, f64, f64)
-DEF_HELPER_3(fcmp_gt_FT, void, env, f32, f32)
-DEF_HELPER_3(fcmp_gt_DT, void, env, f64, f64)
+DEF_HELPER_FLAGS_3(fcmp_eq_FT, TCG_CALL_NO_WG, i32, env, f32, f32)
+DEF_HELPER_FLAGS_3(fcmp_eq_DT, TCG_CALL_NO_WG, i32, env, f64, f64)
+DEF_HELPER_FLAGS_3(fcmp_gt_FT, TCG_CALL_NO_WG, i32, env, f32, f32)
+DEF_HELPER_FLAGS_3(fcmp_gt_DT, TCG_CALL_NO_WG, i32, env, f64, f64)
 DEF_HELPER_FLAGS_3(fdiv_FT, TCG_CALL_NO_WG, f32, env, f32, f32)
 DEF_HELPER_FLAGS_3(fdiv_DT, TCG_CALL_NO_WG, f64, env, f64, f64)
 DEF_HELPER_FLAGS_2(float_FT, TCG_CALL_NO_WG, f32, env, i32)
diff --git a/target/sh4/op_helper.c b/target/sh4/op_helper.c
index 64206cf803..c3d19b1f61 100644
--- a/target/sh4/op_helper.c
+++ b/target/sh4/op_helper.c
@@ -268,44 +268,44 @@ float64 helper_fadd_DT(CPUSH4State *env, float64 t0, 
float64 t1)
 return t0;
 }
 
-void helper_fcmp_eq_FT(CPUSH4State *env, float32 t0, float32 t1)
+uint32_t helper_fcmp_eq_FT(CPUSH4State *env, float32 t0, float32 t1)
 {
 int relation;
 
 set_float_exception_flags(0, &env->fp_status);
 relation = float32_compare(t0, t1, &env->fp_status);
 update_fpscr(env, GETPC());
-env->sr_t = (relation == float_relation_equal);
+return relation == float_relation_equal;
 }
 
-void helper_fcmp_eq_DT(CPUSH4State *env, float64 t0, float64 t1)
+uint32_t helper_fcmp_eq_DT(CPUSH4State *env, float64 t0, float64 t1)
 {
 int relation;
 
 set_float_exception_flags(0, &env->fp_status);
 relation = float64_compare(t0, t1, &env->fp_status);
 update_fpscr(env, GETPC());
-env->sr_t = (relation == float_relation_equal);
+return relation == float_relation_equal;
 }
 
-void helper_fcmp_gt_FT(CPUSH4State *env, float32 t0, float32 t1)
+uint32_t helper_fcmp_gt_FT(CPUSH4State *env, float32 t0, float32 t1)
 {
 int relation;
 
 set_float_exception_flags(0, &env->fp_status);
 relation = float32_compare(t0, t1, &env->fp_status);
 update_fpscr(env, GETPC());
-env->sr_t = (relation == float_relation_greater);
+return relation == float_relation_greater;
 }
 
-void helper_fcmp_gt_DT(CPUSH4State *env, float64 t0, float64 t1)
+uint32_t helper_fcmp_gt_DT(CPUSH4State *env, float64 t0, float64 t1)
 {
 int relation;
 
 set_float_exception_flags(0, &env->fp_status);
 relation = float64_compare(t0, t1, &env->fp_status);
 update_fpscr(env, GETPC());
-env->sr_t = (relation == float_relation_greater);
+return relation == float_relation_greater;
 }
 
 float64 helper_fcnvsd_FT_DT(CPUSH4State *env, float32 t0)
diff --git a/target/sh4/translate.c b/target/sh4/translate.c
index 8098228c51..87b04f0d39 100644
--- a/target/sh4/translate.c
+++ b/target/sh4/translate.c
@@ -1077,10 +1077,10 @@ static void _decode_opc(DisasContext * ctx)
 gen_helper_fdiv_DT(fp0, cpu_env, fp0, fp1);
 break;
 case 0xf004:   /* fcmp/eq Rm,Rn */
-gen_helper_fcmp_eq_DT(cpu_env, fp0, fp1);
+gen_helper_fcmp_eq_DT(cpu_sr_t, cpu_env, fp0, fp1);
 return;
 case 0xf005:   /* fcmp/gt Rm,Rn */
-gen_helper_fcmp_gt_DT(cpu_env, fp0, fp1);
+gen_helper_fcmp_gt_DT(cpu_sr_t, cpu_env, fp0, fp1);
 return;
 }
gen_store_fpr64(fp0, DREG(B11_8));
@@ -1109,11 +1109,13 @@ static void _decode_opc(DisasContext * ctx)
cpu_fregs[FREG(B7_4)]);
 break;
 case 0xf004:   /* fcmp/eq Rm,Rn */
-gen_helper_fcmp_eq_FT(cpu_env, cpu_fregs[FREG(B11_8)],
+gen_helper_fcmp_eq_FT(cpu_sr_t, cpu_env,
+  cpu_fregs[FREG(B11_8)],
   cpu_fregs[FREG(B7_4)]);
 return;
 case 0xf005:   /* fcmp/gt Rm,Rn */
-gen_helper_fcmp_gt_FT(cpu_env, cpu_fregs[FREG(B11_8)],
+gen_helper_fcmp_gt_FT(cpu

[Qemu-devel] [PATCH v2 0/5] target/sh4: misc FPU fixes and optimizations

2017-07-02 Thread Aurelien Jarno
This patchset should fix the bug#1701821 reported by Bruno Haible,
which makes the gnulib testsuite to fail for single precision libm
tests or for tests relying on unordered comparisons.

It also fixes an inversion of cause and flag bits in the FPSCR register,
which is unrelated with the reported bug. It also improves a bit the fneg
and fcmp instructions.

Aurelien Jarno (5):
  target/sh4: do not check for PR bit for fabs instruction
  target/sh4: fix FPU unorderered compare
  target/sh4: fix FPSCR cause vs flag inversion
  target/sh4: do not use a helper to implement fneg
  target/sh4: return result of fcmp using TCG

 target/sh4/helper.h| 11 +++-
 target/sh4/op_helper.c | 71 --
 target/sh4/translate.c | 30 -
 3 files changed, 37 insertions(+), 75 deletions(-)

-- 
2.11.0




[Qemu-devel] [PATCH v2 3/5] target/sh4: fix FPSCR cause vs flag inversion

2017-07-02 Thread Aurelien Jarno
The floating-point status/control register contains cause and flag
bits. The cause bits are set to 0 before executing the instruction,
while the flag bits hold the status of the exception generated after
the field was last cleared.

Signed-off-by: Aurelien Jarno 
---
 target/sh4/op_helper.c | 20 ++--
 1 file changed, 10 insertions(+), 10 deletions(-)

diff --git a/target/sh4/op_helper.c b/target/sh4/op_helper.c
index f228daf125..f2e39c5ca6 100644
--- a/target/sh4/op_helper.c
+++ b/target/sh4/op_helper.c
@@ -219,29 +219,29 @@ static void update_fpscr(CPUSH4State *env, uintptr_t 
retaddr)
 
 xcpt = get_float_exception_flags(&env->fp_status);
 
-/* Clear the flag entries */
-env->fpscr &= ~FPSCR_FLAG_MASK;
+/* Clear the cause entries */
+env->fpscr &= ~FPSCR_CAUSE_MASK;
 
 if (unlikely(xcpt)) {
 if (xcpt & float_flag_invalid) {
-env->fpscr |= FPSCR_FLAG_V;
+env->fpscr |= FPSCR_CAUSE_V;
 }
 if (xcpt & float_flag_divbyzero) {
-env->fpscr |= FPSCR_FLAG_Z;
+env->fpscr |= FPSCR_CAUSE_Z;
 }
 if (xcpt & float_flag_overflow) {
-env->fpscr |= FPSCR_FLAG_O;
+env->fpscr |= FPSCR_CAUSE_O;
 }
 if (xcpt & float_flag_underflow) {
-env->fpscr |= FPSCR_FLAG_U;
+env->fpscr |= FPSCR_CAUSE_U;
 }
 if (xcpt & float_flag_inexact) {
-env->fpscr |= FPSCR_FLAG_I;
+env->fpscr |= FPSCR_CAUSE_I;
 }
 
-/* Accumulate in cause entries */
-env->fpscr |= (env->fpscr & FPSCR_FLAG_MASK)
-  << (FPSCR_CAUSE_SHIFT - FPSCR_FLAG_SHIFT);
+/* Accumulate in flag entries */
+env->fpscr |= (env->fpscr & FPSCR_CAUSE_MASK)
+  >> (FPSCR_CAUSE_SHIFT - FPSCR_FLAG_SHIFT);
 
 /* Generate an exception if enabled */
 cause = (env->fpscr & FPSCR_CAUSE_MASK) >> FPSCR_CAUSE_SHIFT;
-- 
2.11.0




[Qemu-devel] [PATCH v2 4/5] target/sh4: do not use a helper to implement fneg

2017-07-02 Thread Aurelien Jarno
There is no need to use a helper to flip one bit, just use a TCG xor
instruction instead.

Signed-off-by: Aurelien Jarno 
---
 target/sh4/helper.h| 1 -
 target/sh4/op_helper.c | 5 -
 target/sh4/translate.c | 5 ++---
 3 files changed, 2 insertions(+), 9 deletions(-)

diff --git a/target/sh4/helper.h b/target/sh4/helper.h
index f715224822..d2398922dd 100644
--- a/target/sh4/helper.h
+++ b/target/sh4/helper.h
@@ -32,7 +32,6 @@ DEF_HELPER_FLAGS_2(float_DT, TCG_CALL_NO_WG, f64, env, i32)
 DEF_HELPER_FLAGS_4(fmac_FT, TCG_CALL_NO_WG, f32, env, f32, f32, f32)
 DEF_HELPER_FLAGS_3(fmul_FT, TCG_CALL_NO_WG, f32, env, f32, f32)
 DEF_HELPER_FLAGS_3(fmul_DT, TCG_CALL_NO_WG, f64, env, f64, f64)
-DEF_HELPER_FLAGS_1(fneg_T, TCG_CALL_NO_RWG_SE, f32, f32)
 DEF_HELPER_FLAGS_3(fsub_FT, TCG_CALL_NO_WG, f32, env, f32, f32)
 DEF_HELPER_FLAGS_3(fsub_DT, TCG_CALL_NO_WG, f64, env, f64, f64)
 DEF_HELPER_FLAGS_2(fsqrt_FT, TCG_CALL_NO_WG, f32, env, f32)
diff --git a/target/sh4/op_helper.c b/target/sh4/op_helper.c
index f2e39c5ca6..64206cf803 100644
--- a/target/sh4/op_helper.c
+++ b/target/sh4/op_helper.c
@@ -384,11 +384,6 @@ float64 helper_fmul_DT(CPUSH4State *env, float64 t0, 
float64 t1)
 return t0;
 }
 
-float32 helper_fneg_T(float32 t0)
-{
-return float32_chs(t0);
-}
-
 float32 helper_fsqrt_FT(CPUSH4State *env, float32 t0)
 {
 set_float_exception_flags(0, &env->fp_status);
diff --git a/target/sh4/translate.c b/target/sh4/translate.c
index 7c40945908..8098228c51 100644
--- a/target/sh4/translate.c
+++ b/target/sh4/translate.c
@@ -1691,9 +1691,8 @@ static void _decode_opc(DisasContext * ctx)
return;
 case 0xf04d: /* fneg FRn/DRn - FPSCR: Nothing */
CHECK_FPU_ENABLED
-   {
-   gen_helper_fneg_T(cpu_fregs[FREG(B11_8)], cpu_fregs[FREG(B11_8)]);
-   }
+tcg_gen_xori_i32(cpu_fregs[FREG(B11_8)], cpu_fregs[FREG(B11_8)],
+ 0x8000);
return;
 case 0xf05d: /* fabs FRn/DRn - FPCSR: Nothing */
CHECK_FPU_ENABLED
-- 
2.11.0




[Qemu-devel] [PATCH 0/2] target/sh4: fix fabs and optimize fneg

2017-07-02 Thread Aurelien Jarno
This patchset should fix the bug #1701821 reported by Bruno Haible,
which makes the gnulib testsuite to fail for single precision libm
tests.

Aurelien Jarno (2):
  target/sh4: do not check for PR bit for fabs instruction
  target/sh4: do not use a helper to implement fneg

 target/sh4/helper.h|  3 ---
 target/sh4/op_helper.c | 15 ---
 target/sh4/translate.c | 20 +---
 3 files changed, 5 insertions(+), 33 deletions(-)

-- 
2.11.0




[Qemu-devel] [PATCH 1/2] target/sh4: do not check for PR bit for fabs instruction

2017-07-02 Thread Aurelien Jarno
The SH4 manual is not fully clear about that, but real hardware do not
check for the PR bit, which allows to select between single or double
precision, for the fabs instruction. This is probably what is meant by
"Same operation is performed regardless of precision."

Remove the check, and at the same time use a TCG instruction instead of
a helper to clear one bit.

LP: https://bugs.launchpad.net/qemu/+bug/1701821
Reported-by: Bruno Haible 
Signed-off-by: Aurelien Jarno 
---
 target/sh4/helper.h|  2 --
 target/sh4/op_helper.c | 10 --
 target/sh4/translate.c | 15 +++
 3 files changed, 3 insertions(+), 24 deletions(-)

diff --git a/target/sh4/helper.h b/target/sh4/helper.h
index dce859caea..f715224822 100644
--- a/target/sh4/helper.h
+++ b/target/sh4/helper.h
@@ -16,8 +16,6 @@ DEF_HELPER_3(macw, void, env, i32, i32)
 
 DEF_HELPER_2(ld_fpscr, void, env, i32)
 
-DEF_HELPER_FLAGS_1(fabs_FT, TCG_CALL_NO_RWG_SE, f32, f32)
-DEF_HELPER_FLAGS_1(fabs_DT, TCG_CALL_NO_RWG_SE, f64, f64)
 DEF_HELPER_FLAGS_3(fadd_FT, TCG_CALL_NO_WG, f32, env, f32, f32)
 DEF_HELPER_FLAGS_3(fadd_DT, TCG_CALL_NO_WG, f64, env, f64, f64)
 DEF_HELPER_FLAGS_2(fcnvsd_FT_DT, TCG_CALL_NO_WG, f64, env, f32)
diff --git a/target/sh4/op_helper.c b/target/sh4/op_helper.c
index 528a40ac1d..5e3a3ba68c 100644
--- a/target/sh4/op_helper.c
+++ b/target/sh4/op_helper.c
@@ -252,16 +252,6 @@ static void update_fpscr(CPUSH4State *env, uintptr_t 
retaddr)
 }
 }
 
-float32 helper_fabs_FT(float32 t0)
-{
-return float32_abs(t0);
-}
-
-float64 helper_fabs_DT(float64 t0)
-{
-return float64_abs(t0);
-}
-
 float32 helper_fadd_FT(CPUSH4State *env, float32 t0, float32 t1)
 {
 set_float_exception_flags(0, &env->fp_status);
diff --git a/target/sh4/translate.c b/target/sh4/translate.c
index 4f20537ef8..7c40945908 100644
--- a/target/sh4/translate.c
+++ b/target/sh4/translate.c
@@ -1695,19 +1695,10 @@ static void _decode_opc(DisasContext * ctx)
gen_helper_fneg_T(cpu_fregs[FREG(B11_8)], cpu_fregs[FREG(B11_8)]);
}
return;
-case 0xf05d: /* fabs FRn/DRn */
+case 0xf05d: /* fabs FRn/DRn - FPCSR: Nothing */
CHECK_FPU_ENABLED
-if (ctx->tbflags & FPSCR_PR) {
-   if (ctx->opcode & 0x0100)
-   break; /* illegal instruction */
-   TCGv_i64 fp = tcg_temp_new_i64();
-   gen_load_fpr64(fp, DREG(B11_8));
-   gen_helper_fabs_DT(fp, fp);
-   gen_store_fpr64(fp, DREG(B11_8));
-   tcg_temp_free_i64(fp);
-   } else {
-   gen_helper_fabs_FT(cpu_fregs[FREG(B11_8)], cpu_fregs[FREG(B11_8)]);
-   }
+tcg_gen_andi_i32(cpu_fregs[FREG(B11_8)], cpu_fregs[FREG(B11_8)],
+ 0x7fff);
return;
 case 0xf06d: /* fsqrt FRn */
CHECK_FPU_ENABLED
-- 
2.11.0




[Qemu-devel] [PATCH 2/2] target/sh4: do not use a helper to implement fneg

2017-07-02 Thread Aurelien Jarno
There is no need to use a helper to flip one bit, just use a TCG xor
instruction instead.

Signed-off-by: Aurelien Jarno 
---
 target/sh4/helper.h| 1 -
 target/sh4/op_helper.c | 5 -
 target/sh4/translate.c | 5 ++---
 3 files changed, 2 insertions(+), 9 deletions(-)

diff --git a/target/sh4/helper.h b/target/sh4/helper.h
index f715224822..d2398922dd 100644
--- a/target/sh4/helper.h
+++ b/target/sh4/helper.h
@@ -32,7 +32,6 @@ DEF_HELPER_FLAGS_2(float_DT, TCG_CALL_NO_WG, f64, env, i32)
 DEF_HELPER_FLAGS_4(fmac_FT, TCG_CALL_NO_WG, f32, env, f32, f32, f32)
 DEF_HELPER_FLAGS_3(fmul_FT, TCG_CALL_NO_WG, f32, env, f32, f32)
 DEF_HELPER_FLAGS_3(fmul_DT, TCG_CALL_NO_WG, f64, env, f64, f64)
-DEF_HELPER_FLAGS_1(fneg_T, TCG_CALL_NO_RWG_SE, f32, f32)
 DEF_HELPER_FLAGS_3(fsub_FT, TCG_CALL_NO_WG, f32, env, f32, f32)
 DEF_HELPER_FLAGS_3(fsub_DT, TCG_CALL_NO_WG, f64, env, f64, f64)
 DEF_HELPER_FLAGS_2(fsqrt_FT, TCG_CALL_NO_WG, f32, env, f32)
diff --git a/target/sh4/op_helper.c b/target/sh4/op_helper.c
index 5e3a3ba68c..d561141301 100644
--- a/target/sh4/op_helper.c
+++ b/target/sh4/op_helper.c
@@ -396,11 +396,6 @@ float64 helper_fmul_DT(CPUSH4State *env, float64 t0, 
float64 t1)
 return t0;
 }
 
-float32 helper_fneg_T(float32 t0)
-{
-return float32_chs(t0);
-}
-
 float32 helper_fsqrt_FT(CPUSH4State *env, float32 t0)
 {
 set_float_exception_flags(0, &env->fp_status);
diff --git a/target/sh4/translate.c b/target/sh4/translate.c
index 7c40945908..8098228c51 100644
--- a/target/sh4/translate.c
+++ b/target/sh4/translate.c
@@ -1691,9 +1691,8 @@ static void _decode_opc(DisasContext * ctx)
return;
 case 0xf04d: /* fneg FRn/DRn - FPSCR: Nothing */
CHECK_FPU_ENABLED
-   {
-   gen_helper_fneg_T(cpu_fregs[FREG(B11_8)], cpu_fregs[FREG(B11_8)]);
-   }
+tcg_gen_xori_i32(cpu_fregs[FREG(B11_8)], cpu_fregs[FREG(B11_8)],
+ 0x8000);
return;
 case 0xf05d: /* fabs FRn/DRn - FPCSR: Nothing */
CHECK_FPU_ENABLED
-- 
2.11.0




Re: [Qemu-devel] [PATCH v2 8/8] target/s390x: Fix risbg handling

2017-07-02 Thread Aurelien Jarno
On 2017-07-01 13:26, Richard Henderson wrote:
> The rotation is to the left, but extract shifts to the right.
> The computation of the extract parameters needs adjusting.
> 
> For the entry condition, simplify
> 
>   64 - rot + len <= 64
>   -rot + len <= 0
>   len <= rot
> 
> Reported-by: David Hildenbrand 
> Suggested-by: Aurelien Jarno 
> Signed-off-by: Richard Henderson 
> ---
>  target/s390x/translate.c | 4 ++--
>  1 file changed, 2 insertions(+), 2 deletions(-)
> 
> diff --git a/target/s390x/translate.c b/target/s390x/translate.c
> index 1f0c401..89b2ea5 100644
> --- a/target/s390x/translate.c
> +++ b/target/s390x/translate.c
> @@ -3472,8 +3472,8 @@ static ExitStatus op_risbg(DisasContext *s, DisasOps *o)
>  }
>  
>  /* In some cases we can implement this with extract.  */
> -if (imask == 0 && pos == 0 && len > 0 && rot + len <= 64) {
> -tcg_gen_extract_i64(o->out, o->in2, rot, len);
> +if (imask == 0 && pos == 0 && len > 0 && len <= rot) {
> +tcg_gen_extract_i64(o->out, o->in2, 64 - rot, len);
>  return NO_EXIT;
>  }

Reviewed-by: Aurelien Jarno 


-- 
Aurelien Jarno  GPG: 4096R/1DDD8C9B
aurel...@aurel32.net http://www.aurel32.net



Re: [Qemu-devel] [PATCH v2 2/8] target/s390x: Implement CONVERT UNICODE insns

2017-07-02 Thread Aurelien Jarno
On 2017-07-01 13:25, Richard Henderson wrote:
> Signed-off-by: Richard Henderson 
> ---
>  target/s390x/helper.h  |   6 +
>  target/s390x/mem_helper.c  | 310 
> +
>  target/s390x/translate.c   |  44 +++
>  target/s390x/insn-data.def |  13 ++
>  4 files changed, 373 insertions(+)
> 

...

> diff --git a/target/s390x/translate.c b/target/s390x/translate.c
> index e739525..9301daa 100644
> --- a/target/s390x/translate.c
> +++ b/target/s390x/translate.c
> @@ -2122,6 +2122,49 @@ static ExitStatus op_ct(DisasContext *s, DisasOps *o)
>  return NO_EXIT;
>  }
>  
> +static ExitStatus op_cuXX(DisasContext *s, DisasOps *o)
> +{
> +int m3 = get_field(s->fields, m3);
> +TCGv_i32 r1 = tcg_const_i32(get_field(s->fields, r1));
> +TCGv_i32 r2 = tcg_const_i32(get_field(s->fields, r2));
> +TCGv_i32 chk;
> +
> +if (!s390_has_feat(s->insn->fac == S390_FEAT_EXTENDED_TRANSLATION_3
> +   ? S390_FEAT_ETF3_ENH : S390_FEAT_ETF2_ENH)) {
> +m3 = 0;
> +}

This doesn't look correct to me. The well-formedness checking is part of
ETF3_ENH facility, for both convert unicode instructions that are part
of the Z architecture (CU12 and CU21) and for the ones added by the ETF3
facility (CU14 and CU24).

The rest of the patch now looks fine.

-- 
Aurelien Jarno  GPG: 4096R/1DDD8C9B
aurel...@aurel32.net http://www.aurel32.net



Re: [Qemu-devel] [PATCH v3 4/7] softfloat: define floatx80_round()

2017-06-27 Thread Aurelien Jarno
On 2017-06-27 21:12, Laurent Vivier wrote:
> Add a function to round a floatx80 to the defined precision
> (floatx80_rounding_precision)
> 
> Signed-off-by: Laurent Vivier 
> Reviewed-by: Richard Henderson 
> ---
>  fpu/softfloat.c | 15 +++
>  include/fpu/softfloat.h |  1 +
>  2 files changed, 16 insertions(+)
> 
> diff --git a/fpu/softfloat.c b/fpu/softfloat.c
> index 7af14e2..e9bf359 100644
> --- a/fpu/softfloat.c
> +++ b/fpu/softfloat.c
> @@ -5086,6 +5086,21 @@ float128 floatx80_to_float128(floatx80 a, float_status 
> *status)
>  }
>  
>  
> /*
> +| Rounds the extended double-precision floating-point value `a'

Maybe it is worth mentioning the precision to which the value is rounded
(floatx80_rounding_precision) ?

Otherwise looks all fine to me.

Reviewed-by: Aurelien Jarno 

-- 
Aurelien Jarno  GPG: 4096R/1DDD8C9B
aurel...@aurel32.net http://www.aurel32.net



Re: [Qemu-devel] [PATCH v1] target-s390x: fix risbg handling

2017-06-25 Thread Aurelien Jarno
On 2017-06-23 01:12, David Hildenbrand wrote:
> If we have for example: r3 contains 0x
> ec 33 3f bf 61 55   risbg   %r3,%r3,63,191,97
> 
> We want to rotate 33 to the left and only keep MSB bit 63 of that. So the
> result is then exactly 1 (we're reading the sign of the 32 bit value).
> 
> Current code assumes that we can do that via an extract, which is not
> true (at least not that easy) and produces a 0.

I think the mistake there is that the rotation is done to the left,
while in extract the "shift" is done to the right. The following patch
should be enough:

--- a/target/s390x/translate.c
+++ b/target/s390x/translate.c
@@ -3441,8 +3441,8 @@ static ExitStatus op_risbg(DisasContext *s, DisasOps *o)
 }
 
 /* In some cases we can implement this with extract.  */
-if (imask == 0 && pos == 0 && len > 0 && rot + len <= 64) {
-tcg_gen_extract_i64(o->out, o->in2, rot, len);
+if (imask == 0 && pos == 0 && len > 0 && rot - len >= 0) {
+tcg_gen_extract_i64(o->out, o->in2, 64 - rot, len);
 return NO_EXIT;


> Let's just get rid of this special handling.
> 
> Signed-off-by: David Hildenbrand 
> ---
> 
> This effectively allows to start a linux kernel, compiled for z10 using
> the qemu model under tcg (with other patches currently on the list):
> 
> qemu-system-s390x ... -cpu qemu,mvcos=on,stfle=on,ldisp=on,ldisphp=on, \
>eimm=on,stckf=on,csst=on,csst2=on,ginste=on, \
>exrl=on ...
> 
> I found this by compiling the kvm-unit-tests for z10 and noticing
> elementary selftests failing. The kernel would trigger weird
> BUG_ONs very early while starting up, which basically gave not really
> many hints of what was actually going wrong.
> 
>  target/s390x/translate.c | 6 --
>  1 file changed, 6 deletions(-)

But the patch is also correct.

Reviewed-by: Aurelien Jarno 

-- 
Aurelien Jarno  GPG: 4096R/1DDD8C9B
aurel...@aurel32.net http://www.aurel32.net



Re: [Qemu-devel] MIPS little endian - Bug when decoding physaddr

2017-06-23 Thread Aurelien Jarno
On 2017-06-23 15:14, Vinicius Maciel wrote:
> Hi everyone,
> 
> I'm having a problem similar to the reported in this email, but now I'm
> trying to emulate a MIPS 24KEc,
> ralink RT5350F.
> 
> Assembly Code:
> 0x802006a0 : lui t5,0xb011
> 0x802006a4 : ori t5,t5,0x168
> 0x802006a8 : li t6,23
> 0x802006ac : nop
> 0x802006b0 : sw t6,0(t5) <---
> 
> The instruction "sw t6,0(t5)" try to write to address 0xb0110168 (0x10110168),
> but Qemu decodes
> this address to 2952790112 (0xb060). Is this address right?

sw is a write instruction...

> Qemu Debug:
> #0  io_readx (env=0x566e4a78, iotlbentry=0x566ec348,
> addr=2952790112,
> retaddr=140737129226144, size=4)
> at /home/vini/projs/emuladores/qemu-routers/cputlb.c:786
> #1  0x557c9a02 in io_readl (env=0x566e4a78, mmu_idx=0, index=0,
> addr=2952790112, retaddr=140737129226144)
> at /home/vini/projs/emuladores/qemu-routers/softmmu_template.h:104
> #2  0x557c9b89 in helper_le_ldul_mmu (env=0x566e4a78,
> addr=2952790112, <
> oi=32, retaddr=140737129226144)
> at /home/vini/projs/emuladores/qemu-routers/softmmu_template.h:141
> #3  0x7fffea982108 in code_gen_buffer ()

... while helper_le_ldul_mmu and io_readl are read functions. The
assembly code and the backtrace do not match. We can not conclude
anything.

Aurelien


-- 
Aurelien Jarno  GPG: 4096R/1DDD8C9B
aurel...@aurel32.net http://www.aurel32.net



Re: [Qemu-devel] [PATCH v3 00/18] target/s390x improvements

2017-06-23 Thread Aurelien Jarno
On 2017-06-19 17:03, Richard Henderson wrote:
> Changes since v2:
>   * Dropped the enforcement of PGM_OPERATION for insns for
> which the feature bit is set.  There's no agreement on
> exactly how to do this yet.
>   * Add implementations of insns for 6 more facilities.
> 
> I think we can get to z990 fairly quickly after this.
> Ignoring HFP, the ones I see missing are DAT-ENH, MSA.

Thanks for this work. For the record I have started working on HFP
sometimes ago. I'll try to finish that and submit patches in the next
weeks.

-- 
Aurelien Jarno  GPG: 4096R/1DDD8C9B
aurel...@aurel32.net http://www.aurel32.net



Re: [Qemu-devel] [PATCH v3 17/18] target/s390x: Mark ETF3 and ETF3_ENH facilities as available

2017-06-23 Thread Aurelien Jarno
On 2017-06-19 17:04, Richard Henderson wrote:
> Signed-off-by: Richard Henderson 
> ---
>  target/s390x/cpu_models.c | 2 ++
>  1 file changed, 2 insertions(+)
> 
> diff --git a/target/s390x/cpu_models.c b/target/s390x/cpu_models.c
> index be7757c..16129f6 100644
> --- a/target/s390x/cpu_models.c
> +++ b/target/s390x/cpu_models.c
> @@ -678,11 +678,13 @@ static void add_qemu_cpu_model_features(S390FeatBitmap 
> fbm)
>  S390_FEAT_STFLE,
>  S390_FEAT_EXTENDED_IMMEDIATE,
>  S390_FEAT_EXTENDED_TRANSLATION_2,
> +S390_FEAT_EXTENDED_TRANSLATION_3,
>  S390_FEAT_LONG_DISPLACEMENT,
>  S390_FEAT_LONG_DISPLACEMENT_FAST,
>  S390_FEAT_ETF2_ENH,
>  S390_FEAT_STORE_CLOCK_FAST,
>  S390_FEAT_MOVE_WITH_OPTIONAL_SPEC,
> +S390_FEAT_ETF3_ENH,
>  S390_FEAT_COMPARE_AND_SWAP_AND_STORE,
>  S390_FEAT_COMPARE_AND_SWAP_AND_STORE_2,
>      S390_FEAT_GENERAL_INSTRUCTIONS_EXT,

Reviewed-by: Aurelien Jarno 

-- 
Aurelien Jarno  GPG: 4096R/1DDD8C9B
aurel...@aurel32.net http://www.aurel32.net



Re: [Qemu-devel] [PATCH v3 16/18] target/s390x: Implement TRTR

2017-06-23 Thread Aurelien Jarno
On 2017-06-19 17:04, Richard Henderson wrote:
> Drop TRT from the set of insns handled internally by EXECUTE.
> It's more important to adjust the existing helper to handle
> both TRT and TRTR.
> 
> Signed-off-by: Richard Henderson 
> ---
>  target/s390x/helper.h  |  1 +
>  target/s390x/insn-data.def |  2 ++
>  target/s390x/mem_helper.c  | 20 +---
>  target/s390x/translate.c   |  9 +
>  4 files changed, 25 insertions(+), 7 deletions(-)

Reviewed-by: Aurelien Jarno 

-- 
Aurelien Jarno  GPG: 4096R/1DDD8C9B
aurel...@aurel32.net http://www.aurel32.net



Re: [Qemu-devel] [PATCH v3 14/18] target/s390x: Tidy SRST

2017-06-23 Thread Aurelien Jarno
On 2017-06-19 17:04, Richard Henderson wrote:
> Since we require all registers saved on input, read R0 from ENV instead
> of passing it manually.  Recognize the specification exception when R0
> contains incorrect data.
> 
> Signed-off-by: Richard Henderson 
> ---
>  target/s390x/helper.h |  2 +-
>  target/s390x/mem_helper.c | 11 ---
>  target/s390x/translate.c  |  2 +-
>  3 files changed, 10 insertions(+), 5 deletions(-)
> 
> diff --git a/target/s390x/helper.h b/target/s390x/helper.h
> index c014820..cd51b89 100644
> --- a/target/s390x/helper.h
> +++ b/target/s390x/helper.h
> @@ -12,7 +12,7 @@ DEF_HELPER_FLAGS_3(divs32, TCG_CALL_NO_WG, s64, env, s64, 
> s64)
>  DEF_HELPER_FLAGS_3(divu32, TCG_CALL_NO_WG, i64, env, i64, i64)
>  DEF_HELPER_FLAGS_3(divs64, TCG_CALL_NO_WG, s64, env, s64, s64)
>  DEF_HELPER_FLAGS_4(divu64, TCG_CALL_NO_WG, i64, env, i64, i64, i64)
> -DEF_HELPER_4(srst, i64, env, i64, i64, i64)
> +DEF_HELPER_3(srst, i64, env, i64, i64)
>  DEF_HELPER_4(clst, i64, env, i64, i64, i64)
>  DEF_HELPER_FLAGS_4(mvn, TCG_CALL_NO_WG, void, env, i32, i64, i64)
>  DEF_HELPER_FLAGS_4(mvo, TCG_CALL_NO_WG, void, env, i32, i64, i64)
> diff --git a/target/s390x/mem_helper.c b/target/s390x/mem_helper.c
> index df082f5..990858e 100644
> --- a/target/s390x/mem_helper.c
> +++ b/target/s390x/mem_helper.c
> @@ -538,12 +538,17 @@ static inline void set_length(CPUS390XState *env, int 
> reg, uint64_t length)
>  }
>  
>  /* search string (c is byte to search, r2 is string, r1 end of string) */
> -uint64_t HELPER(srst)(CPUS390XState *env, uint64_t r0, uint64_t end,
> -  uint64_t str)
> +uint64_t HELPER(srst)(CPUS390XState *env, uint64_t end, uint64_t str)
>  {
>  uintptr_t ra = GETPC();
>  uint32_t len;
> -uint8_t v, c = r0;
> +uint8_t v, c = env->regs[0];
> +
> +/* Bits 32-55 must contain all 0.  */
> +if (env->regs[0] & 0xff00u) {
> +cpu_restore_state(ENV_GET_CPU(env), ra);
> +program_interrupt(env, PGM_SPECIFICATION, 6);
> +}
>  
>  str = wrap_address(env, str);
>  end = wrap_address(env, end);
> diff --git a/target/s390x/translate.c b/target/s390x/translate.c
> index f8989ec..4a860f1 100644
> --- a/target/s390x/translate.c
> +++ b/target/s390x/translate.c
> @@ -4256,7 +4256,7 @@ static ExitStatus op_stpq(DisasContext *s, DisasOps *o)
>  
>  static ExitStatus op_srst(DisasContext *s, DisasOps *o)
>  {
> -gen_helper_srst(o->in1, cpu_env, regs[0], o->in1, o->in2);
> +gen_helper_srst(o->in1, cpu_env, o->in1, o->in2);
>  set_cc_static(s);
>  return_low128(o->in2);
>  return NO_EXIT;

The cleanup is a good step, but I guess that should also be the moment
to improve the address masking/wrapping (see comment on next patch).

Anyway:

Reviewed-by: Aurelien Jarno 

-- 
Aurelien Jarno  GPG: 4096R/1DDD8C9B
aurel...@aurel32.net http://www.aurel32.net



Re: [Qemu-devel] [PATCH v3 18/18] target/s390x: Clean up TB flag bits

2017-06-23 Thread Aurelien Jarno
On 2017-06-19 17:04, Richard Henderson wrote:
> Most of the PSW bits that were being copied into TB->flags
> are not relevant to translation.  Removing those that are
> unnecessary reduces the amount of translation required.
> 
> Signed-off-by: Richard Henderson 
> ---
>  target/s390x/cpu.h   | 24 +---
>  target/s390x/translate.c | 16 
>  2 files changed, 17 insertions(+), 23 deletions(-)
> 

Reviewed-by: Aurelien Jarno 

-- 
Aurelien Jarno  GPG: 4096R/1DDD8C9B
aurel...@aurel32.net http://www.aurel32.net



Re: [Qemu-devel] [PATCH v3 13/18] target/s390x: Implement CONVERT UNICODE insns

2017-06-23 Thread Aurelien Jarno
On 2017-06-19 17:04, Richard Henderson wrote:
> diff --git a/target/s390x/insn-data.def b/target/s390x/insn-data.def
> index 9c8f184..634ef98 100644
> --- a/target/s390x/insn-data.def
> +++ b/target/s390x/insn-data.def
> @@ -313,6 +313,19 @@
>  C(0xb3a1, CDLGBR,  RRF_e, FPE, 0, r2_o, f1, 0, cdlgb, 0)
>  C(0xb3a2, CXLGBR,  RRF_e, FPE, 0, r2_o, x1, 0, cxlgb, 0)
>  
> +/* CONVERT UTF-8 TO UTF-16 */
> +D(0xb2a7, CU12,RRF_c, Z,   0, 0, 0, 0, cuXX, 0, 12)
> +/* CONVERT UTF-8 TO UTF-32 */
> +D(0xb9b0, CU14,RRF_c, ETF3, 0, 0, 0, 0, cuXX, 0, 14)
> +/* CONVERT UTF-16 to UTF-8 */
> +D(0xb2a6, CU21,RRF_c, Z,   0, 0, 0, 0, cuXX, 0, 21)
> +/* CONVERT UTF-16 to UTF-32 */
> +D(0xb9b1, CU24,RRF_c, ETF3, 0, 0, 0, 0, cuXX, 0, 24)
> +/* CONVERT UTF-32 to UTF-8 */
> +D(0xb9b3, CU41,RRF_c, ETF3, 0, 0, 0, 0, cuXX, 0, 41)
> +/* CONVERT UTF-32 to UTF-16 */
> +D(0xb9b2, CU42,RRF_c, ETF3, 0, 0, 0, 0, cuXX, 0, 42)
> +

CU41 and CU42 are inverted here. CU41 has the 0xb9b2 opcode and CU42 the
0xb9b3 opcode.

> diff --git a/target/s390x/mem_helper.c b/target/s390x/mem_helper.c
> index 4376c72..df082f5 100644
> --- a/target/s390x/mem_helper.c
> +++ b/target/s390x/mem_helper.c

...

> +static int encode_utf8(CPUS390XState *env, uint64_t addr, uint64_t ilen,
> +   uintptr_t ra, uint32_t c, uint32_t *olen)
> +{
> +uint8_t d[4];
> +uint32_t l, i;
> +
> +if (c <= 0x7f) {
> +/* one byte character */
> +l = 1;
> +d[0] = c;
> +} else if (c <= 0x7ff) {
> +/* two byte character */
> +l = 2;
> +d[1] = 0x80 | extract32(c, 0, 6);
> +d[0] = 0xc0 | extract32(c, 6, 5);
> +} else if (c <= 0x) {
> +/* three byte character */
> +l = 3;
> +d[2] = 0x80 | extract32(c, 0, 6);
> +d[1] = 0x80 | extract32(c, 6, 6);
> +d[0] = 0xe0 | extract32(c, 12, 4);
> +} else {
> +/* four byte character */
> +l = 4;
> +d[3] = 0x80 | extract32(c, 0, 6);
> +d[2] = 0x80 | extract32(c, 6, 6);
> +d[1] = 0x80 | extract32(c, 12, 6);
> +d[0] = 0xe0 | extract32(c, 18, 3);

This should be 0xf0 instead of 0xe0.

> +static int encode_utf16(CPUS390XState *env, uint64_t addr, uint64_t ilen,
> +uintptr_t ra, uint32_t c, uint32_t *olen)
> +{
> +uint16_t d0, d1;
> +
> +if (c <= 0x) {
> +/* one word character */
> +if (ilen < 2) {
> +return 1;
> +}
> +cpu_stw_data_ra(env, addr, c, ra);
> +*olen = 2;
> +} else {
> +/* two word character */
> +if (ilen < 4) {
> +return 1;
> +}
> +d1 = 0xbc00 | extract32(c, 0, 10);
> +d0 = 0xb800 | extract32(c, 10, 6);

This should be 0xdc00 and 0xd800;


Otherwise the patch looks fine to me.

-- 
Aurelien Jarno  GPG: 4096R/1DDD8C9B
aurel...@aurel32.net http://www.aurel32.net



Re: [Qemu-devel] [PATCH v3 15/18] target/s390x: Implement SRSTU

2017-06-23 Thread Aurelien Jarno
On 2017-06-19 17:04, Richard Henderson wrote:
> Signed-off-by: Richard Henderson 
> ---
>  target/s390x/helper.h  |  1 +
>  target/s390x/insn-data.def |  2 ++
>  target/s390x/mem_helper.c  | 44 
>  target/s390x/translate.c   |  8 
>  4 files changed, 55 insertions(+)
> 
> diff --git a/target/s390x/helper.h b/target/s390x/helper.h
> index cd51b89..58d7f5b 100644
> --- a/target/s390x/helper.h
> +++ b/target/s390x/helper.h
> @@ -13,6 +13,7 @@ DEF_HELPER_FLAGS_3(divu32, TCG_CALL_NO_WG, i64, env, i64, 
> i64)
>  DEF_HELPER_FLAGS_3(divs64, TCG_CALL_NO_WG, s64, env, s64, s64)
>  DEF_HELPER_FLAGS_4(divu64, TCG_CALL_NO_WG, i64, env, i64, i64, i64)
>  DEF_HELPER_3(srst, i64, env, i64, i64)
> +DEF_HELPER_3(srstu, i64, env, i64, i64)
>  DEF_HELPER_4(clst, i64, env, i64, i64, i64)
>  DEF_HELPER_FLAGS_4(mvn, TCG_CALL_NO_WG, void, env, i32, i64, i64)
>  DEF_HELPER_FLAGS_4(mvo, TCG_CALL_NO_WG, void, env, i32, i64, i64)
> diff --git a/target/s390x/insn-data.def b/target/s390x/insn-data.def
> index 634ef98..1bebcf2 100644
> --- a/target/s390x/insn-data.def
> +++ b/target/s390x/insn-data.def
> @@ -736,6 +736,8 @@
>  
>  /* SEARCH STRING */
>  C(0xb25e, SRST,RRE,   Z,   r1_o, r2_o, 0, 0, srst, 0)
> +/* SEARCH STRING UNICODE */
> +C(0xb9be, SRSTU,   RRE,   ETF3, r1_o, r2_o, 0, 0, srstu, 0)
>  
>  /* SET ACCESS */
>  C(0xb24e, SAR, RRE,   Z,   0, r2_o, 0, 0, sar, 0)
> diff --git a/target/s390x/mem_helper.c b/target/s390x/mem_helper.c
> index 990858e..ce288d9 100644
> --- a/target/s390x/mem_helper.c
> +++ b/target/s390x/mem_helper.c
> @@ -578,6 +578,50 @@ uint64_t HELPER(srst)(CPUS390XState *env, uint64_t end, 
> uint64_t str)
>  return end;
>  }
>  
> +uint64_t HELPER(srstu)(CPUS390XState *env, uint64_t end, uint64_t str)
> +{
> +uintptr_t ra = GETPC();
> +uint32_t len;
> +uint16_t v, c = env->regs[0];
> +uint64_t adj_end;
> +
> +/* Bits 32-47 of R0 must be zero.  */
> +if (env->regs[0] & 0xu) {
> +cpu_restore_state(ENV_GET_CPU(env), ra);
> +program_interrupt(env, PGM_SPECIFICATION, 6);
> +}
> +
> +str = wrap_address(env, str);
> +end = wrap_address(env, end);
> +
> +/* If the LSB of the two addresses differ, use one extra byte.  */
> +adj_end = end + ((str ^ end) & 1);
> +
> +/* Assume for now that R2 is unmodified.  */
> +env->retxl = str;
> +
> +/* Lest we fail to service interrupts in a timely manner, limit the
> +   amount of work we're willing to do.  For now, let's cap at 8k.  */
> +for (len = 0; len < 0x2000; len += 2) {
> +if (str + len == adj_end) {
> +/* End of input found.  */
> +env->cc_op = 2;
> +return end;
> +}
> +v = cpu_lduw_data_ra(env, str + len, ra);
> +if (v == c) {
> +/* Character found.  Set R1 to the location; R2 is unmodified.  
> */
> +env->cc_op = 1;
> +return str + len;
> +}
> +}
> +
> +/* CPU-determined bytes processed.  Advance R2 to next byte to process.  
> */
> +env->retxl = str + len;
> +env->cc_op = 3;
> +return end;
> +}
> +
>  /* unsigned string compare (c is string terminator) */
>  uint64_t HELPER(clst)(CPUS390XState *env, uint64_t c, uint64_t s1, uint64_t 
> s2)
>  {

Overall that looks fine, but I think we should get the wrapping (almost)
correct, now that we have the get_address / set_address functions. As
all registers are saved on input, I guess the registers can be directly
written back in the helper using set_address. It should handle most of
the cases, except wrapping at the end of the address space, but anyway
I don't think it's handled somewhere.

-- 
Aurelien Jarno  GPG: 4096R/1DDD8C9B
aurel...@aurel32.net http://www.aurel32.net



Re: [Qemu-devel] [PATCH v3 11/18] target/s390x: Mark STFLE_49 facility as available

2017-06-23 Thread Aurelien Jarno
On 2017-06-19 17:03, Richard Henderson wrote:
> This facility bit includes execution-hint, load-and-trap,
> miscellaneous-instruction-extensions and processor-assist.
> 
> Signed-off-by: Richard Henderson 
> ---
>  target/s390x/cpu_models.c | 1 +
>  1 file changed, 1 insertion(+)

Reviewed-by: Aurelien Jarno 

-- 
Aurelien Jarno  GPG: 4096R/1DDD8C9B
aurel...@aurel32.net http://www.aurel32.net



Re: [Qemu-devel] [PATCH v3 12/18] target/s390x: Finish implementing ETF2-ENH

2017-06-23 Thread Aurelien Jarno
On 2017-06-19 17:03, Richard Henderson wrote:
> Missed the proper alignment in TRTO/TRTT, and ignoring the M3
> field for all TRXX insns without ETF2-ENH.
> 
> Signed-off-by: Richard Henderson 
> ---
>  target/s390x/mem_helper.c | 11 ++-
>  target/s390x/translate.c  |  5 +++--
>  2 files changed, 13 insertions(+), 3 deletions(-)

Reviewed-by: Aurelien Jarno 
 

-- 
Aurelien Jarno  GPG: 4096R/1DDD8C9B
aurel...@aurel32.net http://www.aurel32.net



Re: [Qemu-devel] [PATCH v3 09/18] target/s390x: Implement execution-hint insns

2017-06-23 Thread Aurelien Jarno
On 2017-06-19 17:03, Richard Henderson wrote:
> Signed-off-by: Richard Henderson 
> ---
>  target/s390x/insn-data.def | 9 +
>  target/s390x/translate.c   | 5 -
>  2 files changed, 13 insertions(+), 1 deletion(-)

Reviewed-by: Aurelien Jarno 
 

-- 
Aurelien Jarno  GPG: 4096R/1DDD8C9B
aurel...@aurel32.net http://www.aurel32.net



Re: [Qemu-devel] [PATCH v3 06/18] target/s390x: Implement load-on-condition-2 insns

2017-06-23 Thread Aurelien Jarno
On 2017-06-19 17:03, Richard Henderson wrote:
> Signed-off-by: Richard Henderson 
> ---
>  target/s390x/insn-data.def   |  9 +
>  target/s390x/insn-format.def |  1 +
>  target/s390x/translate.c | 18 +++---
>  3 files changed, 25 insertions(+), 3 deletions(-)

Reviewed-by: Aurelien Jarno 

-- 
Aurelien Jarno  GPG: 4096R/1DDD8C9B
aurel...@aurel32.net http://www.aurel32.net



Re: [Qemu-devel] [PATCH v3 10/18] target/s390x: Implement processor-assist insn

2017-06-23 Thread Aurelien Jarno
On 2017-06-19 17:03, Richard Henderson wrote:
> Signed-off-by: Richard Henderson 
> ---
>  target/s390x/insn-data.def | 3 +++
>  target/s390x/translate.c   | 1 +
>  2 files changed, 4 insertions(+)

Reviewed-by: Aurelien Jarno 

-- 
Aurelien Jarno  GPG: 4096R/1DDD8C9B
aurel...@aurel32.net http://www.aurel32.net



Re: [Qemu-devel] [PATCH v3 08/18] target/s390x: Mark STFLE_53 facility as available

2017-06-23 Thread Aurelien Jarno
On 2017-06-19 17:03, Richard Henderson wrote:
> This facility bit includes load-on-condition-2 and
> load-and-zero-rightmost-byte.
> 
> Signed-off-by: Richard Henderson 
> ---
>  target/s390x/cpu_models.c | 1 +
>  1 file changed, 1 insertion(+)

Reviewed-by: Aurelien Jarno 

-- 
Aurelien Jarno  GPG: 4096R/1DDD8C9B
aurel...@aurel32.net http://www.aurel32.net



Re: [Qemu-devel] [PATCH v3 07/18] target/s390x: Implement load-and-zero-rightmost-byte insns

2017-06-23 Thread Aurelien Jarno
On 2017-06-19 17:03, Richard Henderson wrote:
> Signed-off-by: Richard Henderson 
> ---
>  target/s390x/insn-data.def | 4 
>  target/s390x/translate.c   | 7 +++
>  2 files changed, 11 insertions(+)
> 
> diff --git a/target/s390x/insn-data.def b/target/s390x/insn-data.def
> index 60d244f..20dec56 100644
> --- a/target/s390x/insn-data.def
> +++ b/target/s390x/insn-data.def
> @@ -429,6 +429,10 @@
>  /* LOAD AND TRAP */
>  C(0xe39f, LAT, RXY_a, LAT, 0, m2_32u, r1, 0, lat, 0)
>  C(0xe385, LGAT,RXY_a, LAT, 0, a2, r1, 0, lgat, 0)
> +/* LOAD AND ZERO RIGHTMOST BYTE */
> +C(0xe3eb, LZRF,RXY_a, LZRB, 0, m2_32u, new, r1_32, lzrb, 0)
> +C(0xe32a, LZRG,RXY_a, LZRB, 0, m2_64, r1, 0, lzrb, 0)
> +C(0xe33a, LLZRGF,  RXY_a, LZRB, 0, m2_32u, r1, 0, lzrb, 0)

Small nitpick, LLZRGF is considered a separate instruction in the PoO,
called LOAD LOGICAL AND ZERO RIGHTMOST BYTE.

That said:

Reviewed-by: Aurelien Jarno 

-- 
Aurelien Jarno  GPG: 4096R/1DDD8C9B
aurel...@aurel32.net http://www.aurel32.net



Re: [Qemu-devel] [PATCH v3 05/18] target/s390x: Mark FPSEH facility as available

2017-06-23 Thread Aurelien Jarno
On 2017-06-19 17:03, Richard Henderson wrote:
> This facility bit includes DFP-rounding, FPR-GR-transfer,
> FPS-sign-handling, and IEEE-exception-simulation.  We do
> support all of these.
> 
> Signed-off-by: Richard Henderson 
> ---
>  target/s390x/cpu_models.c | 1 +
>  1 file changed, 1 insertion(+)

Reviewed-by: Aurelien Jarno 

-- 
Aurelien Jarno  GPG: 4096R/1DDD8C9B
aurel...@aurel32.net http://www.aurel32.net



Re: [Qemu-devel] [PATCH v3 02/18] target/s390x: change PSW_SHIFT_KEY

2017-06-23 Thread Aurelien Jarno
On 2017-06-19 17:03, Richard Henderson wrote:
> From: David Hildenbrand 
> 
> Such shifts are usually used to easily extract the PSW KEY from the PSW
> mask, so let's avoid the confusing offset of 4.
> 
> Reviewed-by: Thomas Huth 
> Signed-off-by: David Hildenbrand 
> Message-Id: <20170614133819.18480-2-da...@redhat.com>
> Signed-off-by: Richard Henderson 
> ---
>  target/s390x/cpu.h   | 2 +-
>  target/s390x/translate.c | 2 +-
>  2 files changed, 2 insertions(+), 2 deletions(-)

Reviewed-by: Aurelien Jarno 

-- 
Aurelien Jarno  GPG: 4096R/1DDD8C9B
aurel...@aurel32.net http://www.aurel32.net



Re: [Qemu-devel] [PATCH v3 01/18] target/s390x: Map existing FAC_* names to S390_FEAT_* names

2017-06-23 Thread Aurelien Jarno
On 2017-06-19 17:03, Richard Henderson wrote:
> The FAC_ names were placeholders prior to the introduction
> of the current facility modeling.
> 
> Signed-off-by: Richard Henderson 
> ---
>  target/s390x/translate.c | 59 
> 
>  1 file changed, 29 insertions(+), 30 deletions(-)

Reviewed-by: Aurelien Jarno 

-- 
Aurelien Jarno  GPG: 4096R/1DDD8C9B
aurel...@aurel32.net http://www.aurel32.net



[Qemu-devel] [PATCH] mips/malta: load the initrd at the end of the low memory

2017-06-23 Thread Aurelien Jarno
Currently the malta board is loading the initrd just after the kernel.
This doesn't work for kaslr enabled kernels, as the initrd ends-up being
overwritten.

Move the initrd at the end of the low memory, that should leave a
sufficient gap for kaslr.

Signed-off-by: Aurelien Jarno 
---
 hw/mips/mips_malta.c | 5 +++--
 1 file changed, 3 insertions(+), 2 deletions(-)

diff --git a/hw/mips/mips_malta.c b/hw/mips/mips_malta.c
index 95cdabb2dd..dad2f37fb1 100644
--- a/hw/mips/mips_malta.c
+++ b/hw/mips/mips_malta.c
@@ -841,8 +841,9 @@ static int64_t load_kernel (void)
 if (loaderparams.initrd_filename) {
 initrd_size = get_image_size (loaderparams.initrd_filename);
 if (initrd_size > 0) {
-initrd_offset = (kernel_high + ~INITRD_PAGE_MASK) & 
INITRD_PAGE_MASK;
-if (initrd_offset + initrd_size > ram_size) {
+initrd_offset = (loaderparams.ram_low_size - initrd_size
+ - ~INITRD_PAGE_MASK) & INITRD_PAGE_MASK;
+if (kernel_high >= initrd_offset) {
 fprintf(stderr,
 "qemu: memory too small for initial ram disk '%s'\n",
 loaderparams.initrd_filename);
-- 
2.11.0




Re: [Qemu-devel] [PATCH] target/mips: fix msa copy_[s|u]_df rd = 0 corner case

2017-06-17 Thread Aurelien Jarno
On 2017-06-15 16:20, Miodrag Dinic wrote:
> From: Miodrag Dinic 
> 
> This patch fixes the msa copy_[s|u]_df instruction emulation when
> the destination register rd is zero. Without this patch the zero
> register would get clobbered, which should never happen because it
> is supposed to be hardwired to 0.
> 
> Fix this corner case by explicitly checking rd = 0 and effectively
> making these instructions emulation no-op in that case.
> 
> Signed-off-by: Miodrag Dinic 
> ---
>  target/mips/translate.c | 8 ++--
>  1 file changed, 6 insertions(+), 2 deletions(-)
> 

Reviewed-by: Aurelien Jarno 
Acked-by: Aurelien Jarno 

-- 
Aurelien Jarno  GPG: 4096R/1DDD8C9B
aurel...@aurel32.net http://www.aurel32.net



Re: [Qemu-devel] [PATCH 5/5] target/s390x: mark CSST, CSST2, FPSEH facilities as available

2017-06-15 Thread Aurelien Jarno
On 2017-06-15 14:10, Richard Henderson wrote:
> On 06/15/2017 01:49 PM, Aurelien Jarno wrote:
> > > +S390_FEAT_FLOATING_POINT_SUPPPORT_ENH,
> > 
> > Theoretically the floating-point-support-enhancement facilities include
> > the DFP rounding facility. Given We don't implement the DFP facility I
> > guess this can be ignored.
> 
> We *do* implement the one instruction in DFP-rounding-facility: SRNMT.
> 

Ah ok, nevermind then. The DFP support is in better state than I
thought ;-).

-- 
Aurelien Jarno  GPG: 4096R/1DDD8C9B
aurel...@aurel32.net http://www.aurel32.net



Re: [Qemu-devel] [PATCH v2 3/5] target/mips: Exit after enabling interrupts

2017-06-15 Thread Aurelien Jarno
On 2017-06-14 12:48, Richard Henderson wrote:
> From: Paolo Bonzini 
> 
> Exit to cpu loop so we reevaluate cpu_mips_hw_interrupts.
> 
> Cc: Aurelien Jarno 
> Cc: Yongbok Kim 
> Signed-off-by: Richard Henderson 
> ---
>  target/mips/translate.c | 6 --
>  1 file changed, 4 insertions(+), 2 deletions(-)
> 
> diff --git a/target/mips/translate.c b/target/mips/translate.c
> index 559f8fe..891f14b 100644
> --- a/target/mips/translate.c
> +++ b/target/mips/translate.c
> @@ -13403,9 +13403,11 @@ static void gen_pool32axf (CPUMIPSState *env, 
> DisasContext *ctx, int rt, int rs)
>  save_cpu_state(ctx, 1);
>  gen_helper_ei(t0, cpu_env);
>  gen_store_gpr(t0, rs);
> -/* Stop translation as we may have switched the execution 
> mode */
> -ctx->bstate = BS_STOP;
>  tcg_temp_free(t0);
> +/* BS_STOP isn't good enough here;
> +   reevaluate cpu_mips_hw_interrupts_enabled.  */
> +gen_save_pc(ctx->pc + 4);
> +ctx->bstate = BS_EXCP;
>  }
>  break;
>  default:

While the above looks correct, it's not complete. It only fixes the
microMIPS EI instruction. The MIPS one also has to be fixed.

For what I understood, anything that can change the result of
cpu_mips_hw_interrupts_enabled has to stop the translation. In that case
I checked that ERET/ERETNC and MTC0/DMTC0 to the Status register are
already correct, that said it might be a good idea to update the
comments to mention it.

I can work on a better patch, but I doubt I'll have time before the
week-end.

Aurelien

-- 
Aurelien Jarno  GPG: 4096R/1DDD8C9B
aurel...@aurel32.net http://www.aurel32.net



Re: [Qemu-devel] [PATCH 2/5] target/s390x: Enforce instruction features

2017-06-15 Thread Aurelien Jarno
On 2017-06-15 15:10, David Hildenbrand wrote:
> 
> >> A "sane" guest (e.g. Linux) will only use an instruction if the
> >> corresponding stfl(e) bit is set. So in my opinion, this should be just
> >> fine. If the bit is not set currently, the guest will not use it == dead
> >> code.
> > 
> > Not necessarily. Depending on the distribution, gcc and hence binaries
> > default to a different ISA. Over the time people have added the
> > corresponding instructions to QEMU so that these binaries work. Now
> > given that GCC does not necessarily use all the instructions from a
> > given facility, we end up with missing instructions.
> 
> That's true, glibc sometimes assumes a certain architecture level
> without checking. So you're right, maybe we should defer this "big
> hammer" change until we have all facilities as part of the qemu CPU

Well the GNU libc itself correctly probe the facilities with stfl/stfle.
What happens is that newer instructions might be generated directly by
GCC if told to do so (with -march=xxx or the default architecture).


> model. Then, e.g. runnning -cpu qemu will not break such stuff, however
> e.g. -cpu z900 could correctly simulate that architecture level.
> 
> One option would be:
> 
> /* for now, we don't fake absence of features for the qemu model */
> if (!object_dynamic_cast(cpu, "qemu-s390x-cpu") {
>   dc.features = cpu->model->features;
> }
> 
> 
> ...
> 
> if (s->features && !test_bit(insn->fac, s->features)) {
> gen_program_exception(s, PGM_OPERATION);
> return EXIT_NORETURN;
> }

I don't know that part of the code enough to tell if it is the good way
to do that, but certainly having a "qemu" CPU that supports all
instructions look like the way to go, especially for the linux-user
emulation.

Aurelien

-- 
Aurelien Jarno  GPG: 4096R/1DDD8C9B
aurel...@aurel32.net http://www.aurel32.net



Re: [Qemu-devel] [PATCH 5/5] target/s390x: mark CSST, CSST2, FPSEH facilities as available

2017-06-15 Thread Aurelien Jarno
On 2017-06-14 22:53, Richard Henderson wrote:
> Signed-off-by: Richard Henderson 
> ---
>  target/s390x/cpu_models.c | 3 +++
>  1 file changed, 3 insertions(+)
> 
> diff --git a/target/s390x/cpu_models.c b/target/s390x/cpu_models.c
> index c3a4ce6..703feca 100644
> --- a/target/s390x/cpu_models.c
> +++ b/target/s390x/cpu_models.c
> @@ -683,8 +683,11 @@ static void add_qemu_cpu_model_features(S390FeatBitmap 
> fbm)
>  S390_FEAT_ETF2_ENH,
>  S390_FEAT_STORE_CLOCK_FAST,
>  S390_FEAT_MOVE_WITH_OPTIONAL_SPEC,
> +S390_FEAT_COMPARE_AND_SWAP_AND_STORE,
> +S390_FEAT_COMPARE_AND_SWAP_AND_STORE_2,
>  S390_FEAT_GENERAL_INSTRUCTIONS_EXT,
>  S390_FEAT_EXECUTE_EXT,
> +S390_FEAT_FLOATING_POINT_SUPPPORT_ENH,

Theoretically the floating-point-support-enhancement facilities include
the DFP rounding facility. Given We don't implement the DFP facility I
guess this can be ignored.

-- 
Aurelien Jarno  GPG: 4096R/1DDD8C9B
aurel...@aurel32.net http://www.aurel32.net



Re: [Qemu-devel] [PATCH 2/5] target/s390x: Enforce instruction features

2017-06-15 Thread Aurelien Jarno
On 2017-06-15 13:28, David Hildenbrand wrote:
> On 15.06.2017 09:01, Aurelien Jarno wrote:
> > On 2017-06-14 22:53, Richard Henderson wrote:
> >> Signed-off-by: Richard Henderson 
> >> ---
> >>  target/s390x/translate.c | 8 
> >>  1 file changed, 8 insertions(+)
> >>
> >> diff --git a/target/s390x/translate.c b/target/s390x/translate.c
> >> index af18ffb..48cee25 100644
> >> --- a/target/s390x/translate.c
> >> +++ b/target/s390x/translate.c
> >> @@ -55,6 +55,7 @@ typedef struct DisasFields DisasFields;
> >>  
> >>  struct DisasContext {
> >>  struct TranslationBlock *tb;
> >> +const unsigned long *features;
> >>  const DisasInsn *insn;
> >>  DisasFields *fields;
> >>  uint64_t ex_value;
> >> @@ -5600,6 +5601,12 @@ static ExitStatus translate_one(CPUS390XState *env, 
> >> DisasContext *s)
> >>  }
> >>  #endif
> >>  
> >> +/* Check for insn feature enabled.  */
> >> +if (!test_bit(insn->fac, s->features)) {
> >> +gen_program_exception(s, PGM_OPERATION);
> >> +return EXIT_NORETURN;
> >> +}
> >> +
> >>  /* Check for insn specification exceptions.  */
> >>  if (insn->spec) {
> >>  int spec = insn->spec, excp = 0, r;
> >> @@ -5726,6 +5733,7 @@ void gen_intermediate_code(CPUS390XState *env, 
> >> struct TranslationBlock *tb)
> >>  }
> >>  
> >>  dc.tb = tb;
> >> +dc.features = cpu->model->features;
> >>  dc.pc = pc_start;
> >>  dc.cc_op = CC_OP_DYNAMIC;
> >>  dc.ex_value = tb->cs_base;
> > 
> > It looks correct technically. Now I am not sure we want to do that,
> > without at least provided a user to enable those instructions. There are
> > a lot facilities that are partially implemented, usually because only
> > the really used instructions are implemented, but also because some
> > facilities are grouped in a single feature bit. This includes for
> > example FPE, LPP, MIE, LAT, I_SIM and more.
> 
> A "sane" guest (e.g. Linux) will only use an instruction if the
> corresponding stfl(e) bit is set. So in my opinion, this should be just
> fine. If the bit is not set currently, the guest will not use it == dead
> code.

Not necessarily. Depending on the distribution, gcc and hence binaries
default to a different ISA. Over the time people have added the
corresponding instructions to QEMU so that these binaries work. Now
given that GCC does not necessarily use all the instructions from a
given facility, we end up with missing instructions.

Taking this to its logical extreme, given we don't fully implement the Z
facility (for example the HFP instructions are missing), we should
prevent all the programs to run until that is fixed.

-- 
Aurelien Jarno  GPG: 4096R/1DDD8C9B
aurel...@aurel32.net http://www.aurel32.net



Re: [Qemu-devel] [PATCH 2/5] target/s390x: Enforce instruction features

2017-06-15 Thread Aurelien Jarno
On 2017-06-14 22:53, Richard Henderson wrote:
> Signed-off-by: Richard Henderson 
> ---
>  target/s390x/translate.c | 8 
>  1 file changed, 8 insertions(+)
> 
> diff --git a/target/s390x/translate.c b/target/s390x/translate.c
> index af18ffb..48cee25 100644
> --- a/target/s390x/translate.c
> +++ b/target/s390x/translate.c
> @@ -55,6 +55,7 @@ typedef struct DisasFields DisasFields;
>  
>  struct DisasContext {
>  struct TranslationBlock *tb;
> +const unsigned long *features;
>  const DisasInsn *insn;
>  DisasFields *fields;
>  uint64_t ex_value;
> @@ -5600,6 +5601,12 @@ static ExitStatus translate_one(CPUS390XState *env, 
> DisasContext *s)
>  }
>  #endif
>  
> +/* Check for insn feature enabled.  */
> +if (!test_bit(insn->fac, s->features)) {
> +gen_program_exception(s, PGM_OPERATION);
> +return EXIT_NORETURN;
> +}
> +
>  /* Check for insn specification exceptions.  */
>  if (insn->spec) {
>  int spec = insn->spec, excp = 0, r;
> @@ -5726,6 +5733,7 @@ void gen_intermediate_code(CPUS390XState *env, struct 
> TranslationBlock *tb)
>  }
>  
>  dc.tb = tb;
> +dc.features = cpu->model->features;
>  dc.pc = pc_start;
>  dc.cc_op = CC_OP_DYNAMIC;
>  dc.ex_value = tb->cs_base;

It looks correct technically. Now I am not sure we want to do that,
without at least provided a user to enable those instructions. There are
a lot facilities that are partially implemented, usually because only
the really used instructions are implemented, but also because some
facilities are grouped in a single feature bit. This includes for
example FPE, LPP, MIE, LAT, I_SIM and more.

We could maybe provide a way to override the check, or only enable
enforcement for fully implemented facilities. Failing to do so means we
just introduce a regression from the user point of view (many binaries
will stop working) and also that we change thousand of lines of code
into dead code.

Aurelien

-- 
Aurelien Jarno  GPG: 4096R/1DDD8C9B
aurel...@aurel32.net http://www.aurel32.net



[Qemu-devel] [PATCH] target/sh4: optimize cross-page and indirect jumps

2017-06-07 Thread Aurelien Jarno
Instead of unconditionally exiting to the exec loop for indirect jumps
or cross-page direct jumps, use the lookup_and_goto_ptr helper to jump
to the target if it is valid.

Signed-off-by: Aurelien Jarno 
---
 target/sh4/translate.c | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/target/sh4/translate.c b/target/sh4/translate.c
index 8bc132b27b..4f20537ef8 100644
--- a/target/sh4/translate.c
+++ b/target/sh4/translate.c
@@ -249,7 +249,7 @@ static void gen_goto_tb(DisasContext *ctx, int n, 
target_ulong dest)
 tcg_gen_movi_i32(cpu_pc, dest);
 if (ctx->singlestep_enabled)
 gen_helper_debug(cpu_env);
-tcg_gen_exit_tb(0);
+tcg_gen_lookup_and_goto_ptr(cpu_pc);
 }
 }
 
@@ -262,7 +262,7 @@ static void gen_jump(DisasContext * ctx)
 tcg_gen_discard_i32(cpu_delayed_pc);
if (ctx->singlestep_enabled)
 gen_helper_debug(cpu_env);
-   tcg_gen_exit_tb(0);
+tcg_gen_lookup_and_goto_ptr(cpu_pc);
 } else {
gen_goto_tb(ctx, 0, ctx->delayed_pc);
 }
-- 
2.11.0




Re: [Qemu-devel] qemu-system-sh4 -M r2d serial is broken.

2017-06-06 Thread Aurelien Jarno
On 2017-06-05 17:29, Rob Landley wrote:
> On 05/18/2017 06:01 PM, Aurelien Jarno wrote:
> > On 2017-05-18 17:37, Rob Landley wrote:
> >> On 05/18/2017 02:00 PM, Aurelien Jarno wrote:
> >>> On 2017-05-18 11:08, Rob Landley wrote:
> >>>> Serial input hangs after the first character in the 4.11 kernel:
> >>>>
> >>>>   http://www.spinics.net/lists/linux-sh/msg51183.html
> >>>>
> >>>> Because they enabled support for a buffer size thing QEMU doesn't
> >>>> emulate right:
> >>>>
> >>>>   http://www.spinics.net/lists/linux-sh/msg51189.html
> >>>
> >>> Indeed the SCIF emulation in QEMU is quite limited. The problem is that
> >>> it exposes many internal states to the software (and that's the same for
> >>> the SH4 CPU in general), and that's not really compatible with quick
> >>> emulation. In that case the timer should depend on the baud rate which
> >>> we don't really emulate.
> >>>
> >>> I'll try to have a look, that said my test environment is stuck with
> >>> kernel 4.8 due to the broken futex support on UP in kernel 4.9 (and
> >>> that's not QEMU specific). I'll try to build a more recent kernel with
> >>> additional patches.
> >>
> >> I thought Rich fixed that. Rich?
> > 
> > I have sent a patch already, but TTBOMK it hasn't been applied yet.
> > 
> > Aurelien
> 
> I poked Rich about the futex patch again today, he's been buried up to
> his neck in work but has to flush his bugfix queue before -rc5 so that
> should get sorted this week.

Thanks!

> Also, how do I tell the kernel to read the persistent clock on r2d? Both
> CONFIG_RTC_DRV_R9701 (from r2d defconfig) and CONFIG_RTC_DRV_SH give
> error messages and fail to read anything at boot time.

The R2D has such a RTC chip, but it is not emulated in QEMU. Someone has
to write it.

> If you need a new test environment (simple one that doesn't use futexes
> that I'm aware of) https://github.com/landley/mkroot is nearing its
> first release. You'll need to follow the README instructions to build
> musl-cross-make toolchains and set up the mcm symlink, but then:
> 
>   ./cross.sh sh4 ./mkroot.sh kernel
>   cd output/sh4
>   ./qemu-sh4.sh
>
> Should boot you to a shell prompt. And given that the root filesystem
> builder (mkroot.sh) is ~300 lines of bash and module/kernel is another
> 300 lines (mostly a big target-specific if/else staircase), it shouldn't
> be too hard to pull apart. :)

Ok, thanks.

> Right now sh4 is the only target in the release list that hasn't got the
> full "boots to a shell prompt and exits when you type exit, clock is set
> to correct time, block device works, network card works" functionality
> list. (That's all working on arm64 armv5l armv7l i486 i686 mips mipsel
> powerpc s390x x86-64.)

Patches to fix that are welcome.

Aurelien

-- 
Aurelien Jarno  GPG: 4096R/1DDD8C9B
aurel...@aurel32.net http://www.aurel32.net



[Qemu-devel] [PATCH v4 2/3] target/s390x: implement STORE PAIR TO QUADWORD

2017-06-04 Thread Aurelien Jarno
Signed-off-by: Aurelien Jarno 
---
 target/s390x/helper.h  |  1 +
 target/s390x/insn-data.def |  2 ++
 target/s390x/mem_helper.c  | 24 
 target/s390x/translate.c   |  6 ++
 4 files changed, 33 insertions(+)

diff --git a/target/s390x/helper.h b/target/s390x/helper.h
index 61d4ef899e..12d7f8fe95 100644
--- a/target/s390x/helper.h
+++ b/target/s390x/helper.h
@@ -104,6 +104,7 @@ DEF_HELPER_FLAGS_3(keb, TCG_CALL_NO_WG, i32, env, i64, i64)
 DEF_HELPER_FLAGS_3(kdb, TCG_CALL_NO_WG, i32, env, i64, i64)
 DEF_HELPER_FLAGS_5(kxb, TCG_CALL_NO_WG, i32, env, i64, i64, i64, i64)
 DEF_HELPER_FLAGS_2(lpq, TCG_CALL_NO_WG, i64, env, i64)
+DEF_HELPER_FLAGS_4(stpq, TCG_CALL_NO_WG, void, env, i64, i64, i64)
 
 #ifndef CONFIG_USER_ONLY
 DEF_HELPER_3(servc, i32, env, i64, i64)
diff --git a/target/s390x/insn-data.def b/target/s390x/insn-data.def
index 32dee40269..73dd05daf0 100644
--- a/target/s390x/insn-data.def
+++ b/target/s390x/insn-data.def
@@ -796,6 +796,8 @@
 /* STORE ACCESS MULTIPLE */
 C(0x9b00, STAM,RS_a,  Z,   0, a2, 0, 0, stam, 0)
 C(0xeb9b, STAMY,   RSY_a, LD,  0, a2, 0, 0, stam, 0)
+/* STORE PAIR TO QUADWORD */
+C(0xe38e, STPQ,RXY_a, Z,   0, a2, r1_P, 0, stpq, 0)
 
 /* SUBTRACT */
 C(0x1b00, SR,  RR_a,  Z,   r1, r2, new, r1_32, sub, subs32)
diff --git a/target/s390x/mem_helper.c b/target/s390x/mem_helper.c
index f48908cecb..a8988e0293 100644
--- a/target/s390x/mem_helper.c
+++ b/target/s390x/mem_helper.c
@@ -1692,6 +1692,30 @@ uint64_t HELPER(lpq)(CPUS390XState *env, uint64_t addr)
 return hi;
 }
 
+/* store pair to quadword */
+void HELPER(stpq)(CPUS390XState *env, uint64_t addr,
+  uint64_t low, uint64_t high)
+{
+uintptr_t ra = GETPC();
+
+if (parallel_cpus) {
+#ifndef CONFIG_ATOMIC128
+cpu_loop_exit_atomic(ENV_GET_CPU(env), ra);
+#else
+int mem_idx = cpu_mmu_index(env, false);
+TCGMemOpIdx oi = make_memop_idx(MO_TEQ | MO_ALIGN_16, mem_idx);
+
+Int128 v = int128_make128(low, high);
+helper_atomic_sto_be_mmu(env, addr, v, oi, ra);
+#endif
+} else {
+check_alignment(env, addr, 16, ra);
+
+cpu_stq_data_ra(env, addr + 0, high, ra);
+cpu_stq_data_ra(env, addr + 8, low, ra);
+}
+}
+
 /* Execute instruction.  This instruction executes an insn modified with
the contents of r1.  It does not change the executed instruction in memory;
it does not change the program counter.
diff --git a/target/s390x/translate.c b/target/s390x/translate.c
index 3b621993cf..8702cc8cc7 100644
--- a/target/s390x/translate.c
+++ b/target/s390x/translate.c
@@ -4198,6 +4198,12 @@ static ExitStatus op_stmh(DisasContext *s, DisasOps *o)
 return NO_EXIT;
 }
 
+static ExitStatus op_stpq(DisasContext *s, DisasOps *o)
+{
+gen_helper_stpq(cpu_env, o->in2, o->out2, o->out);
+return NO_EXIT;
+}
+
 static ExitStatus op_srst(DisasContext *s, DisasOps *o)
 {
 gen_helper_srst(o->in1, cpu_env, regs[0], o->in1, o->in2);
-- 
2.11.0




[Qemu-devel] [PATCH v4 3/3] target/s390x: check alignment in CDSG in the !CONFIG_ATOMIC128 case

2017-06-04 Thread Aurelien Jarno
The CDSG instruction requires a 16-byte alignement, as expressed in
the MO_ALIGN_16 passed to helper_atomic_cmpxchgo_be_mmu. In the non
parallel case, use check_alignment to enforce this.

Signed-off-by: Aurelien Jarno 
---
 target/s390x/mem_helper.c | 2 ++
 1 file changed, 2 insertions(+)

diff --git a/target/s390x/mem_helper.c b/target/s390x/mem_helper.c
index a8988e0293..80caab9c9d 100644
--- a/target/s390x/mem_helper.c
+++ b/target/s390x/mem_helper.c
@@ -1262,6 +1262,8 @@ void HELPER(cdsg)(CPUS390XState *env, uint64_t addr,
 } else {
 uint64_t oldh, oldl;
 
+check_alignment(env, addr, 16, ra);
+
 oldh = cpu_ldq_data_ra(env, addr + 0, ra);
 oldl = cpu_ldq_data_ra(env, addr + 8, ra);
 
-- 
2.11.0




[Qemu-devel] [PATCH v4 0/3] target/s390x: implement loads/store quadword

2017-06-04 Thread Aurelien Jarno
This patchset implements the LOAD PAIR FROM QUADWORD and STORE PAIR TO
QUADWORD instructions. The corresponding patches have been in my previous
patchset and the pull request from Richard, but they failed to build on a
host without atomic128 support.

This new version fixes that. It has to be applied over the pull request
as it makes uses of the check_alignment function.

Finally the latest patch fixes a lack of alignement check in CDSG,
discovered as I used it as an example about how to properly handle hosts
without atomic128 support.

Aurelien Jarno (3):
  target/s390x: implement LOAD PAIR FROM QUADWORD
  target/s390x: implement STORE PAIR TO QUADWORD
  target/s390x: check alignment in CDSG in the !CONFIG_ATOMIC128 case

 target/s390x/helper.h  |  2 ++
 target/s390x/insn-data.def |  4 
 target/s390x/mem_helper.c  | 53 ++
 target/s390x/translate.c   | 13 
 4 files changed, 72 insertions(+)

-- 
2.11.0




[Qemu-devel] [PATCH v4 1/3] target/s390x: implement LOAD PAIR FROM QUADWORD

2017-06-04 Thread Aurelien Jarno
Signed-off-by: Aurelien Jarno 
---
 target/s390x/helper.h  |  1 +
 target/s390x/insn-data.def |  2 ++
 target/s390x/mem_helper.c  | 27 +++
 target/s390x/translate.c   |  7 +++
 4 files changed, 37 insertions(+)

diff --git a/target/s390x/helper.h b/target/s390x/helper.h
index 5724691d1b..61d4ef899e 100644
--- a/target/s390x/helper.h
+++ b/target/s390x/helper.h
@@ -103,6 +103,7 @@ DEF_HELPER_2(stfle, i32, env, i64)
 DEF_HELPER_FLAGS_3(keb, TCG_CALL_NO_WG, i32, env, i64, i64)
 DEF_HELPER_FLAGS_3(kdb, TCG_CALL_NO_WG, i32, env, i64, i64)
 DEF_HELPER_FLAGS_5(kxb, TCG_CALL_NO_WG, i32, env, i64, i64, i64, i64)
+DEF_HELPER_FLAGS_2(lpq, TCG_CALL_NO_WG, i64, env, i64)
 
 #ifndef CONFIG_USER_ONLY
 DEF_HELPER_3(servc, i32, env, i64, i64)
diff --git a/target/s390x/insn-data.def b/target/s390x/insn-data.def
index 9976d290c4..32dee40269 100644
--- a/target/s390x/insn-data.def
+++ b/target/s390x/insn-data.def
@@ -517,6 +517,8 @@
 /* LOAD PAIR DISJOINT */
 D(0xc804, LPD, SSF,   ILA, 0, 0, new_P, r3_P32, lpd, 0, MO_TEUL)
 D(0xc805, LPDG,SSF,   ILA, 0, 0, new_P, r3_P64, lpd, 0, MO_TEQ)
+/* LOAD PAIR FROM QUADWORD */
+C(0xe38f, LPQ, RXY_a, Z,   0, a2, r1_P, 0, lpq, 0)
 /* LOAD POSITIVE */
 C(0x1000, LPR, RR_a,  Z,   0, r2_32s, new, r1_32, abs, abs32)
 C(0xb900, LPGR,RRE,   Z,   0, r2, r1, 0, abs, abs64)
diff --git a/target/s390x/mem_helper.c b/target/s390x/mem_helper.c
index be89cc4fb4..f48908cecb 100644
--- a/target/s390x/mem_helper.c
+++ b/target/s390x/mem_helper.c
@@ -1665,6 +1665,33 @@ uint64_t HELPER(lra)(CPUS390XState *env, uint64_t addr)
 }
 #endif
 
+/* load pair from quadword */
+uint64_t HELPER(lpq)(CPUS390XState *env, uint64_t addr)
+{
+uintptr_t ra = GETPC();
+uint64_t hi, lo;
+
+if (parallel_cpus) {
+#ifndef CONFIG_ATOMIC128
+cpu_loop_exit_atomic(ENV_GET_CPU(env), ra);
+#else
+int mem_idx = cpu_mmu_index(env, false);
+TCGMemOpIdx oi = make_memop_idx(MO_TEQ | MO_ALIGN_16, mem_idx);
+Int128 v = helper_atomic_ldo_be_mmu(env, addr, oi, ra);
+hi = int128_gethi(v);
+lo = int128_getlo(v);
+#endif
+} else {
+check_alignment(env, addr, 16, ra);
+
+hi = cpu_ldq_data_ra(env, addr + 0, ra);
+lo = cpu_ldq_data_ra(env, addr + 8, ra);
+}
+
+env->retxl = lo;
+return hi;
+}
+
 /* Execute instruction.  This instruction executes an insn modified with
the contents of r1.  It does not change the executed instruction in memory;
it does not change the program counter.
diff --git a/target/s390x/translate.c b/target/s390x/translate.c
index 4f4dffbdf4..3b621993cf 100644
--- a/target/s390x/translate.c
+++ b/target/s390x/translate.c
@@ -2904,6 +2904,13 @@ static ExitStatus op_lpd(DisasContext *s, DisasOps *o)
 return NO_EXIT;
 }
 
+static ExitStatus op_lpq(DisasContext *s, DisasOps *o)
+{
+gen_helper_lpq(o->out, cpu_env, o->in2);
+return_low128(o->out2);
+return NO_EXIT;
+}
+
 #ifndef CONFIG_USER_ONLY
 static ExitStatus op_lura(DisasContext *s, DisasOps *o)
 {
-- 
2.11.0




Re: [Qemu-devel] [PULL 00/69] target/s390x tcg patches

2017-06-04 Thread Aurelien Jarno
On 2017-06-04 11:32, no-re...@patchew.org wrote:
> Hi,
> 
> This series failed build test on s390x host. Please find the details below.
> 
> Subject: [Qemu-devel] [PULL 00/69] target/s390x tcg patches
> Message-id: 20170604173509.29684-1-...@twiddle.net
> Type: series
> 

[snip]

> /var/tmp/patchew-tester-tmp-9ddil10x/src/target/s390x/mem_helper.c: In 
> function ‘helper_lpq’:
> /var/tmp/patchew-tester-tmp-9ddil10x/src/target/s390x/mem_helper.c:1675:16: 
> error: implicit declaration of function ‘helper_atomic_ldo_be_mmu’ 
> [-Werror=implicit-function-declaration]
>  Int128 v = helper_atomic_ldo_be_mmu(env, addr, oi, ra);
> ^~~~
> /var/tmp/patchew-tester-tmp-9ddil10x/src/target/s390x/mem_helper.c:1675:5: 
> error: nested extern declaration of ‘helper_atomic_ldo_be_mmu’ 
> [-Werror=nested-externs]
>  Int128 v = helper_atomic_ldo_be_mmu(env, addr, oi, ra);
>  ^~
> /var/tmp/patchew-tester-tmp-9ddil10x/src/target/s390x/mem_helper.c: In 
> function ‘helper_stpq’:
> /var/tmp/patchew-tester-tmp-9ddil10x/src/target/s390x/mem_helper.c:1690:5: 
> error: implicit declaration of function ‘helper_atomic_sto_be_mmu’ 
> [-Werror=implicit-function-declaration]
>  helper_atomic_sto_be_mmu(env, addr, v, oi, ra);
>  ^~~~
> /var/tmp/patchew-tester-tmp-9ddil10x/src/target/s390x/mem_helper.c:1690:5: 
> error: nested extern declaration of ‘helper_atomic_sto_be_mmu’ 
> [-Werror=nested-externs]
> cc1: all warnings being treated as errors
> /var/tmp/patchew-tester-tmp-9ddil10x/src/rules.mak:69: recipe for target 
> 'target/s390x/mem_helper.o' failed
> make[1]: *** [target/s390x/mem_helper.o] Error 1
> Makefile:327: recipe for target 'subdir-s390x-softmmu' failed
> make: *** [subdir-s390x-softmmu] Error 2
> make: *** Waiting for unfinished jobs

Sorry for not testing test building those patches properly.

The best is probably to just drop them for now as they are independent
besides some easy conflict to solve in helper.h. Properly implementing
the !CONFIG_ATOMIC128 case requires to check the alignment and it's
better done with the check_alignment function introduced later in the
series.

I'll send new patches for those two instructions in the next hours.

Aurelien

-- 
Aurelien Jarno  GPG: 4096R/1DDD8C9B
aurel...@aurel32.net http://www.aurel32.net



Re: [Qemu-devel] [PULL 00/69] target/s390x tcg patches

2017-06-04 Thread Aurelien Jarno
On 2017-06-04 10:34, Richard Henderson wrote:
> The queue has gotten overlong.  This includes my unwinding patches,
> the execute rewrite, and Aurelien's flushing out of missing Z insns.
> 
> It does *not* include Aurelian's final patch to bump the base tcg
> cpu to z800.  David Hildenbrand had objections to that; I expect
> that we can address that in the next patch set.

Fair enough. I think I'll put this one in standby and look at that again
when QEMU can emulate a higher model like a z990.

-- 
Aurelien Jarno  GPG: 4096R/1DDD8C9B
aurel...@aurel32.net http://www.aurel32.net



Re: [Qemu-devel] [PATCH v1] s390x/cpumodel: wire up cpu type + id for TCG

2017-06-02 Thread Aurelien Jarno
On 2017-06-02 12:52, David Hildenbrand wrote:
> 
> >> +
> >> +#ifndef CONFIG_USER_ONLY
> >> +void HELPER(stidp)(CPUS390XState *env, uint64_t addr)
> >> +{
> >> +S390CPU *cpu = s390_env_get_cpu(env);
> >> +uint64_t cpuid = s390_cpuid_from_cpu_model(cpu->model);
> >> +
> >> +if (addr & 0x7) {
> >> +program_interrupt(env, PGM_SPECIFICATION, ILEN_LATER_INC);
> >> +return;
> >> +}
> >> +
> >> +/* basic mode, write the cpu address into the first 4 bit of the ID */
> >> +cpuid |= ((uint64_t)env->cpu_num & 0xf) << 54;
> >> +cpu_stq_data(env, addr, cpuid);
> >> +}
> >> +#endif
> > 
> > I don't really see the point of using an helper instead of just updating
> > the existing code. From what I understand the cpuid does not change at
> > runtime, so the s390_cpuid_from_cpu_model function can also be called 
> > from translate.c.
> > 
> > Aurelien
> > 
> 
> From what I can see, conditional exceptions are more complicated to
> implement without helpers (involves generating compares, jumps and so

In that case you don't need to do any compare an jump. It's a standard
load/store alignement check, you can just specify the MO_ALIGN flag to
the tcg_gen_qemu_st_i64 function.


> on). As this function is not expected to be executed on hot paths, I
> think moving it into a helper is the right thing to do.

This is not only about performance, but also avoiding code that is
spread in many files. Well theoretically increasing the number of
entries in the helper hash table has a performance impact, but i don't
think it is measurable.

Aurelien

-- 
Aurelien Jarno  GPG: 4096R/1DDD8C9B
aurel...@aurel32.net http://www.aurel32.net



Re: [Qemu-devel] [PATCH v3 30/30] target/s390x: update maximum TCG model to z800

2017-06-02 Thread Aurelien Jarno
On 2017-06-02 13:30, David Hildenbrand wrote:
> On 02.06.2017 10:09, Thomas Huth wrote:
> > On 01.06.2017 21:17, Aurelien Jarno wrote:
> >> On 2017-06-01 11:04, David Hildenbrand wrote:
> >>> On 01.06.2017 10:38, David Hildenbrand wrote:
> >>>> On 01.06.2017 00:01, Aurelien Jarno wrote:
> >>>>> At the same time fix the TCG version of get_max_cpu_model to return the
> >>>>> maximum model like on KVM. Remove the ETF2 and long-displacement
> >>>>
> >>>> I don't understand the part
> >>>> "fix the TCG version of get_max_cpu_model to return the maximum model
> >>>> like on KVM".
> >>>>
> >>>> Can you elaborate?
> >>>>
> >>>>> facilities from the additional features as it is included in the z800.
> >>>>>
> >>>>> Signed-off-by: Aurelien Jarno 
> >>>>> ---
> >>>>>  target/s390x/cpu_models.c | 13 ++---
> >>>>>  1 file changed, 6 insertions(+), 7 deletions(-)
> >>>>>
> >>>>> diff --git a/target/s390x/cpu_models.c b/target/s390x/cpu_models.c
> >>>>> index fc3cb25cc3..c13bbd852c 100644
> >>>>> --- a/target/s390x/cpu_models.c
> >>>>> +++ b/target/s390x/cpu_models.c
> >>>>> @@ -668,8 +668,6 @@ static void 
> >>>>> add_qemu_cpu_model_features(S390FeatBitmap fbm)
> >>>>>  static const int feats[] = {
> >>>>>  S390_FEAT_STFLE,
> >>>>>  S390_FEAT_EXTENDED_IMMEDIATE,
> >>>>> -S390_FEAT_EXTENDED_TRANSLATION_2,
> >>>>> -S390_FEAT_LONG_DISPLACEMENT,
> >>>>>  S390_FEAT_LONG_DISPLACEMENT_FAST,
> >>>>>  S390_FEAT_ETF2_ENH,
> >>>>>  S390_FEAT_STORE_CLOCK_FAST,
> >>>>> @@ -696,9 +694,9 @@ static S390CPUModel *get_max_cpu_model(Error **errp)
> >>>>>  if (kvm_enabled()) {
> >>>>>  kvm_s390_get_host_cpu_model(&max_model, errp);
> >>>>>  } else {
> >>>>> -/* TCG emulates a z900 (with some optional additional 
> >>>>> features) */
> >>>>> -max_model.def = &s390_cpu_defs[0];
> >>>>> -bitmap_copy(max_model.features, max_model.def->default_feat,
> >>>>> +/* TCG emulates a z800 (with some optional additional 
> >>>>> features) */
> >>>>> +max_model.def = s390_find_cpu_def(0x2066, 7, 3, NULL);
> >>>>> +bitmap_copy(max_model.features, max_model.def->full_feat,
> >>>>>  S390_FEAT_MAX);
> >>>
> >>> This is most likely wrong: you're indicating features here that are not
> >>> available on tcg. esp. S390_FEAT_SIE_F2 and friends.
> >>>
> >>> I think should only copy the base features and add whatever else is
> >>> available via add_qemu_cpu_model_features() as already done.
> >>
> >> The patch series added all the z800 features exposed via STFL/STFLE.
> >> Indeed the SIE features are missing, but anyway QEMU doesn't emulate SIE
> >> at all so the lack of these features are not exposed to the guest. In that
> >> regard QEMU already wrongly claim to emulate a z900.
> 
> Please note that:
> 
> a) SIE features were never part of the max model. QEMU never claimed
> that. With your change one could suddenly do a -cpu z900,sie_f2=on,
> which is wrong.
> 
> b) The SIE_F2 feature tells the guest that the SIE instruction is
> available. E.g. Linux will look at this bit and show SIE support in
> /proc/cpuinfo and unlock the KVM module.

I understand your point. That said I doubt we will support the SIE
instruction soon (it looks quite complicated and I can't find any doc).
As we are going to emulate more facilities to QEMU, it will be more and
more difficult to select a modern CPU. One will have to use -cpu,z900,
etf2=on,ldisp=on,...,eimm=on. I think we have to provide users with an
easier way to do that.

One possibility would be to filter the features that are not emulated by
QEMU (like on the ppc64 target) or to allow the user to specify a higher
model provided the non emulated features are disabled. Something like
-cpu z800,sie_f2=off.

> Please, just don't add features to the MAX model that we don't implement.

Please note that QEMU does not fully implement S390_FEAT_ESAN3 (though
that is addressed in this patchset) and S

Re: [Qemu-devel] [PATCH v3 30/30] target/s390x: update maximum TCG model to z800

2017-06-02 Thread Aurelien Jarno
On 2017-06-01 21:56, David Hildenbrand wrote:
> On 01.06.2017 21:17, Aurelien Jarno wrote:
> > On 2017-06-01 10:38, David Hildenbrand wrote:
> >> On 01.06.2017 00:01, Aurelien Jarno wrote:
> >>> At the same time fix the TCG version of get_max_cpu_model to return the
> >>> maximum model like on KVM. Remove the ETF2 and long-displacement
> >>
> >> I don't understand the part
> >> "fix the TCG version of get_max_cpu_model to return the maximum model
> >> like on KVM".
> >>
> >> Can you elaborate?
> > 
> > Currently get_max_cpu_model returns the features of the base model, so
> > for example the one of a z900 even on a z800. This makes impossible to
> > enable the features that are provided by a z800 like etf2 or ldisp.
> > 
> 
> Right, you can always change the max_cpu_model, e.g. bumping up the
> version or adding new features, that is just fine.
> 
> > For what I understand from the KVM code (but I haven't tested), the
> > function return all the features that are supported by the current CPU,
> > not all the features that are supported by the base model of the current
> > CPU.
> 
> Correct, for KVM it is the detected model, that means: Base features +
> optional features.
> 
> 
> > 
> > 
> >>> facilities from the additional features as it is included in the z800.
> >>>
> >>> Signed-off-by: Aurelien Jarno 
> >>> ---
> >>>  target/s390x/cpu_models.c | 13 ++---
> >>>  1 file changed, 6 insertions(+), 7 deletions(-)
> >>>
> >>> diff --git a/target/s390x/cpu_models.c b/target/s390x/cpu_models.c
> >>> index fc3cb25cc3..c13bbd852c 100644
> >>> --- a/target/s390x/cpu_models.c
> >>> +++ b/target/s390x/cpu_models.c
> >>> @@ -668,8 +668,6 @@ static void 
> >>> add_qemu_cpu_model_features(S390FeatBitmap fbm)
> >>>  static const int feats[] = {
> >>>  S390_FEAT_STFLE,
> >>>  S390_FEAT_EXTENDED_IMMEDIATE,
> >>> -S390_FEAT_EXTENDED_TRANSLATION_2,
> >>> -S390_FEAT_LONG_DISPLACEMENT,
> >>>  S390_FEAT_LONG_DISPLACEMENT_FAST,
> >>>  S390_FEAT_ETF2_ENH,
> >>>  S390_FEAT_STORE_CLOCK_FAST,
> >>> @@ -696,9 +694,9 @@ static S390CPUModel *get_max_cpu_model(Error **errp)
> >>>  if (kvm_enabled()) {
> >>>  kvm_s390_get_host_cpu_model(&max_model, errp);
> >>>  } else {
> >>> -/* TCG emulates a z900 (with some optional additional features) 
> >>> */
> >>> -max_model.def = &s390_cpu_defs[0];
> >>> -bitmap_copy(max_model.features, max_model.def->default_feat,
> >>> +/* TCG emulates a z800 (with some optional additional features) 
> >>> */
> >>> +max_model.def = s390_find_cpu_def(0x2066, 7, 3, NULL);
> >>> +bitmap_copy(max_model.features, max_model.def->full_feat,
> >>>  S390_FEAT_MAX);
> >>>  add_qemu_cpu_model_features(max_model.features);
> >>>  }
> >>> @@ -956,8 +954,9 @@ static void s390_qemu_cpu_model_initfn(Object *obj)
> >>>  S390CPU *cpu = S390_CPU(obj);
> >>>  
> >>>  cpu->model = g_malloc0(sizeof(*cpu->model));
> >>> -/* TCG emulates a z900 (with some optional additional features) */
> >>> -memcpy(&s390_qemu_cpu_defs, &s390_cpu_defs[0], 
> >>> sizeof(s390_qemu_cpu_defs));
> >>> +/* TCG emulates a z800 (with some optional additional features) */
> >>> +memcpy(&s390_qemu_cpu_defs, s390_find_cpu_def(0x2066, 7, 3, NULL),
> >>> +   sizeof(s390_qemu_cpu_defs));
> >>
> >> No changing the qemu model without compatibility handling.
> > 
> > This patch series is based on the patch from Thomas Huth. It means the
> > QEMU model is still based on a z900, but that it is possible to enable
> > some more features like etf2.
> 
> Thomas' code did neither change features nor the "model definition". It
> just allows for some more feature to be set. It is a hack.
> 
> I am pretty sure that expanding the "qemu" CPU model now (QMP
> query-cpu-model-expansion) will indicate a z800, not a z900.
> 
> See cpu_info_from_model(). And that is a problem, because the QEMU CPU
> model is a "migration-safe" CPU model, meaning it must remain equal for
> every compatibility machine.
> 

Ok, I get what you mean now.

Aurelien

-- 
Aurelien Jarno  GPG: 4096R/1DDD8C9B
aurel...@aurel32.net http://www.aurel32.net



Re: [Qemu-devel] [PATCH] msi: remove return code for msi_init()

2017-06-01 Thread Aurelien Jarno
On 2017-06-01 16:23, Paolo Bonzini wrote:
> 
> 
> On 01/06/2017 10:27, Marcel Apfelbaum wrote:
> > On 31/05/2017 11:28, Paolo Bonzini wrote:
> >> No, for now I'd rather just go and remove msi_nonbroken.  When someone
> >> reports a bug, we can add back "msi_broken".
> > 
> > Hi,
> > I agree with the direction, but I am concerned msi_nonbroken is there
> > for a reason.
> > We might break some (obscure/not in use) machine.
> > Maybe we should CC all arch machine maintainers/contributors to give
> > them a chance to object...
> 
> Yeah, Alpha, MIPS and SH are those that support PCI.  Adding Richard and
> Aurelien, do your platforms support MSI on real hardware but not in QEMU?

SH clearly doesn't support MSI.

The oldest MIPS board also do not support MSI, but I guess the Boston
board might support it. I am adding Paul Burton in Cc: who probably
knows about that.

Aurelien

-- 
Aurelien Jarno  GPG: 4096R/1DDD8C9B
aurel...@aurel32.net http://www.aurel32.net



Re: [Qemu-devel] [PATCH v1] s390x/cpumodel: wire up cpu type + id for TCG

2017-06-01 Thread Aurelien Jarno
p)(CPUS390XState *env, uint64_t addr)
> +{
> +S390CPU *cpu = s390_env_get_cpu(env);
> +uint64_t cpuid = s390_cpuid_from_cpu_model(cpu->model);
> +
> +if (addr & 0x7) {
> +program_interrupt(env, PGM_SPECIFICATION, ILEN_LATER_INC);
> +return;
> +}
> +
> +/* basic mode, write the cpu address into the first 4 bit of the ID */
> +cpuid |= ((uint64_t)env->cpu_num & 0xf) << 54;
> +cpu_stq_data(env, addr, cpuid);
> +}
> +#endif

I don't really see the point of using an helper instead of just updating
the existing code. From what I understand the cpuid does not change at
runtime, so the s390_cpuid_from_cpu_model function can also be called 
from translate.c.

Aurelien

-- 
Aurelien Jarno  GPG: 4096R/1DDD8C9B
aurel...@aurel32.net http://www.aurel32.net



Re: [Qemu-devel] [PATCH v3 30/30] target/s390x: update maximum TCG model to z800

2017-06-01 Thread Aurelien Jarno
On 2017-06-01 11:04, David Hildenbrand wrote:
> On 01.06.2017 10:38, David Hildenbrand wrote:
> > On 01.06.2017 00:01, Aurelien Jarno wrote:
> >> At the same time fix the TCG version of get_max_cpu_model to return the
> >> maximum model like on KVM. Remove the ETF2 and long-displacement
> > 
> > I don't understand the part
> > "fix the TCG version of get_max_cpu_model to return the maximum model
> > like on KVM".
> > 
> > Can you elaborate?
> > 
> >> facilities from the additional features as it is included in the z800.
> >>
> >> Signed-off-by: Aurelien Jarno 
> >> ---
> >>  target/s390x/cpu_models.c | 13 ++---
> >>  1 file changed, 6 insertions(+), 7 deletions(-)
> >>
> >> diff --git a/target/s390x/cpu_models.c b/target/s390x/cpu_models.c
> >> index fc3cb25cc3..c13bbd852c 100644
> >> --- a/target/s390x/cpu_models.c
> >> +++ b/target/s390x/cpu_models.c
> >> @@ -668,8 +668,6 @@ static void add_qemu_cpu_model_features(S390FeatBitmap 
> >> fbm)
> >>  static const int feats[] = {
> >>  S390_FEAT_STFLE,
> >>  S390_FEAT_EXTENDED_IMMEDIATE,
> >> -S390_FEAT_EXTENDED_TRANSLATION_2,
> >> -S390_FEAT_LONG_DISPLACEMENT,
> >>  S390_FEAT_LONG_DISPLACEMENT_FAST,
> >>  S390_FEAT_ETF2_ENH,
> >>  S390_FEAT_STORE_CLOCK_FAST,
> >> @@ -696,9 +694,9 @@ static S390CPUModel *get_max_cpu_model(Error **errp)
> >>  if (kvm_enabled()) {
> >>  kvm_s390_get_host_cpu_model(&max_model, errp);
> >>  } else {
> >> -/* TCG emulates a z900 (with some optional additional features) */
> >> -max_model.def = &s390_cpu_defs[0];
> >> -bitmap_copy(max_model.features, max_model.def->default_feat,
> >> +/* TCG emulates a z800 (with some optional additional features) */
> >> +max_model.def = s390_find_cpu_def(0x2066, 7, 3, NULL);
> >> +bitmap_copy(max_model.features, max_model.def->full_feat,
> >>  S390_FEAT_MAX);
> 
> This is most likely wrong: you're indicating features here that are not
> available on tcg. esp. S390_FEAT_SIE_F2 and friends.
> 
> I think should only copy the base features and add whatever else is
> available via add_qemu_cpu_model_features() as already done.

The patch series added all the z800 features exposed via STFL/STFLE.
Indeed the SIE features are missing, but anyway QEMU doesn't emulate SIE
at all so the lack of these features are not exposed to the guest. In that
regard QEMU already wrongly claim to emulate a z900.


> >>  add_qemu_cpu_model_features(max_model.features);
> >>  }
> >> @@ -956,8 +954,9 @@ static void s390_qemu_cpu_model_initfn(Object *obj)
> >>  S390CPU *cpu = S390_CPU(obj);
> >>  
> >>  cpu->model = g_malloc0(sizeof(*cpu->model));
> >> -/* TCG emulates a z900 (with some optional additional features) */
> >> -memcpy(&s390_qemu_cpu_defs, &s390_cpu_defs[0], 
> >> sizeof(s390_qemu_cpu_defs));
> >> +/* TCG emulates a z800 (with some optional additional features) */
> >> +memcpy(&s390_qemu_cpu_defs, s390_find_cpu_def(0x2066, 7, 3, NULL),
> >> +   sizeof(s390_qemu_cpu_defs));
> > 
> > No changing the qemu model without compatibility handling.
> > 
> Please have a look at the following mail for a possible solution:
> 
> https://lists.gnu.org/archive/html/qemu-devel/2017-05/msg06030.html
> 
> This could be moved to a separate patch. So this patch really should
> just care about the maximum model, not the qemu model.

From what I understand from this thread, the patch from Thomas Huth was
finally considered acceptable. I am adding him in Cc: so that he can
comment.

Aurelien

-- 
Aurelien Jarno  GPG: 4096R/1DDD8C9B
aurel...@aurel32.net http://www.aurel32.net



Re: [Qemu-devel] [PATCH v3 30/30] target/s390x: update maximum TCG model to z800

2017-06-01 Thread Aurelien Jarno
On 2017-06-01 10:38, David Hildenbrand wrote:
> On 01.06.2017 00:01, Aurelien Jarno wrote:
> > At the same time fix the TCG version of get_max_cpu_model to return the
> > maximum model like on KVM. Remove the ETF2 and long-displacement
> 
> I don't understand the part
> "fix the TCG version of get_max_cpu_model to return the maximum model
> like on KVM".
> 
> Can you elaborate?

Currently get_max_cpu_model returns the features of the base model, so
for example the one of a z900 even on a z800. This makes impossible to
enable the features that are provided by a z800 like etf2 or ldisp.

For what I understand from the KVM code (but I haven't tested), the
function return all the features that are supported by the current CPU,
not all the features that are supported by the base model of the current
CPU.


> > facilities from the additional features as it is included in the z800.
> > 
> > Signed-off-by: Aurelien Jarno 
> > ---
> >  target/s390x/cpu_models.c | 13 ++---
> >  1 file changed, 6 insertions(+), 7 deletions(-)
> > 
> > diff --git a/target/s390x/cpu_models.c b/target/s390x/cpu_models.c
> > index fc3cb25cc3..c13bbd852c 100644
> > --- a/target/s390x/cpu_models.c
> > +++ b/target/s390x/cpu_models.c
> > @@ -668,8 +668,6 @@ static void add_qemu_cpu_model_features(S390FeatBitmap 
> > fbm)
> >  static const int feats[] = {
> >  S390_FEAT_STFLE,
> >  S390_FEAT_EXTENDED_IMMEDIATE,
> > -S390_FEAT_EXTENDED_TRANSLATION_2,
> > -S390_FEAT_LONG_DISPLACEMENT,
> >  S390_FEAT_LONG_DISPLACEMENT_FAST,
> >  S390_FEAT_ETF2_ENH,
> >  S390_FEAT_STORE_CLOCK_FAST,
> > @@ -696,9 +694,9 @@ static S390CPUModel *get_max_cpu_model(Error **errp)
> >  if (kvm_enabled()) {
> >  kvm_s390_get_host_cpu_model(&max_model, errp);
> >  } else {
> > -/* TCG emulates a z900 (with some optional additional features) */
> > -max_model.def = &s390_cpu_defs[0];
> > -bitmap_copy(max_model.features, max_model.def->default_feat,
> > +/* TCG emulates a z800 (with some optional additional features) */
> > +max_model.def = s390_find_cpu_def(0x2066, 7, 3, NULL);
> > +bitmap_copy(max_model.features, max_model.def->full_feat,
> >  S390_FEAT_MAX);
> >  add_qemu_cpu_model_features(max_model.features);
> >  }
> > @@ -956,8 +954,9 @@ static void s390_qemu_cpu_model_initfn(Object *obj)
> >  S390CPU *cpu = S390_CPU(obj);
> >  
> >  cpu->model = g_malloc0(sizeof(*cpu->model));
> > -/* TCG emulates a z900 (with some optional additional features) */
> > -memcpy(&s390_qemu_cpu_defs, &s390_cpu_defs[0], 
> > sizeof(s390_qemu_cpu_defs));
> > +/* TCG emulates a z800 (with some optional additional features) */
> > +memcpy(&s390_qemu_cpu_defs, s390_find_cpu_def(0x2066, 7, 3, NULL),
> > +   sizeof(s390_qemu_cpu_defs));
> 
> No changing the qemu model without compatibility handling.

This patch series is based on the patch from Thomas Huth. It means the
QEMU model is still based on a z900, but that it is possible to enable
some more features like etf2.

Aurelien

-- 
Aurelien Jarno  GPG: 4096R/1DDD8C9B
aurel...@aurel32.net http://www.aurel32.net



[Qemu-devel] [PATCH v3 19/30] target/s390x: fix adj_len_to_page

2017-05-31 Thread Aurelien Jarno
adj_len_to_page doesn't return the correct result when the address
is already page aligned and the length is bigger than a page. Fix that.

Reviewed-by: Richard Henderson 
Signed-off-by: Aurelien Jarno 
---
 target/s390x/mem_helper.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/target/s390x/mem_helper.c b/target/s390x/mem_helper.c
index aaa347c121..6dfa087ff1 100644
--- a/target/s390x/mem_helper.c
+++ b/target/s390x/mem_helper.c
@@ -61,7 +61,7 @@ static inline uint32_t adj_len_to_page(uint32_t len, uint64_t 
addr)
 {
 #ifndef CONFIG_USER_ONLY
 if ((addr & ~TARGET_PAGE_MASK) + len - 1 >= TARGET_PAGE_SIZE) {
-return -addr & ~TARGET_PAGE_MASK;
+return -(addr | TARGET_PAGE_MASK);
 }
 #endif
 return len;
-- 
2.11.0




[Qemu-devel] [PATCH v3 03/30] target/s390x: implement local-TLB-clearing in IPTE

2017-05-31 Thread Aurelien Jarno
And at the same time make IPTE SMP aware.

Reviewed-by: Thomas Huth 
Reviewed-by: Richard Henderson 
Signed-off-by: Aurelien Jarno 
---
 target/s390x/helper.h |  2 +-
 target/s390x/mem_helper.c | 21 +
 target/s390x/translate.c  |  6 +-
 3 files changed, 19 insertions(+), 10 deletions(-)

diff --git a/target/s390x/helper.h b/target/s390x/helper.h
index cc451c70a6..3f5a05d43b 100644
--- a/target/s390x/helper.h
+++ b/target/s390x/helper.h
@@ -111,7 +111,7 @@ DEF_HELPER_4(mvcs, i32, env, i64, i64, i64)
 DEF_HELPER_4(mvcp, i32, env, i64, i64, i64)
 DEF_HELPER_4(sigp, i32, env, i64, i32, i64)
 DEF_HELPER_FLAGS_2(sacf, TCG_CALL_NO_WG, void, env, i64)
-DEF_HELPER_FLAGS_3(ipte, TCG_CALL_NO_RWG, void, env, i64, i64)
+DEF_HELPER_FLAGS_4(ipte, TCG_CALL_NO_RWG, void, env, i64, i64, i32)
 DEF_HELPER_FLAGS_1(ptlb, TCG_CALL_NO_RWG, void, env)
 DEF_HELPER_FLAGS_1(purge, TCG_CALL_NO_RWG, void, env)
 DEF_HELPER_2(lra, i64, env, i64)
diff --git a/target/s390x/mem_helper.c b/target/s390x/mem_helper.c
index 0ebd65d9ab..ddbebcd7ae 100644
--- a/target/s390x/mem_helper.c
+++ b/target/s390x/mem_helper.c
@@ -1073,17 +1073,16 @@ uint32_t HELPER(mvcp)(CPUS390XState *env, uint64_t l, 
uint64_t a1, uint64_t a2)
 }
 
 /* invalidate pte */
-void HELPER(ipte)(CPUS390XState *env, uint64_t pto, uint64_t vaddr)
+void HELPER(ipte)(CPUS390XState *env, uint64_t pto, uint64_t vaddr,
+  uint32_t m4)
 {
 CPUState *cs = CPU(s390_env_get_cpu(env));
 uint64_t page = vaddr & TARGET_PAGE_MASK;
 uint64_t pte_addr, pte;
 
-/* XXX broadcast to other CPUs */
-
 /* Compute the page table entry address */
 pte_addr = (pto & _SEGMENT_ENTRY_ORIGIN);
-pte_addr += (vaddr & _VADDR_PX) >> 9;
+pte_addr += (vaddr & VADDR_PX) >> 9;
 
 /* Mark the page table entry as invalid */
 pte = ldq_phys(cs->as, pte_addr);
@@ -1092,13 +1091,19 @@ void HELPER(ipte)(CPUS390XState *env, uint64_t pto, 
uint64_t vaddr)
 
 /* XXX we exploit the fact that Linux passes the exact virtual
address here - it's not obliged to! */
-tlb_flush_page(cs, page);
+/* XXX: the LC bit should be considered as 0 if the local-TLB-clearing
+   facility is not installed.  */
+if (m4 & 1) {
+tlb_flush_page(cs, page);
+} else {
+tlb_flush_page_all_cpus_synced(cs, page);
+}
 
 /* XXX 31-bit hack */
-if (page & 0x8000) {
-tlb_flush_page(cs, page & ~0x8000);
+if (m4 & 1) {
+tlb_flush_page(cs, page ^ 0x8000);
 } else {
-tlb_flush_page(cs, page | 0x8000);
+tlb_flush_page_all_cpus_synced(cs, page ^ 0x8000);
 }
 }
 
diff --git a/target/s390x/translate.c b/target/s390x/translate.c
index f7598184a6..f160b62c19 100644
--- a/target/s390x/translate.c
+++ b/target/s390x/translate.c
@@ -2352,8 +2352,12 @@ static ExitStatus op_ipm(DisasContext *s, DisasOps *o)
 #ifndef CONFIG_USER_ONLY
 static ExitStatus op_ipte(DisasContext *s, DisasOps *o)
 {
+TCGv_i32 m4;
+
 check_privileged(s);
-gen_helper_ipte(cpu_env, o->in1, o->in2);
+m4 = tcg_const_i32(get_field(s->fields, m4));
+gen_helper_ipte(cpu_env, o->in1, o->in2, m4);
+tcg_temp_free_i32(m4);
 return NO_EXIT;
 }
 
-- 
2.11.0




[Qemu-devel] [PATCH v3 10/30] target/s390x: implement MOVE INVERSE

2017-05-31 Thread Aurelien Jarno
Reviewed-by: Richard Henderson 
Signed-off-by: Aurelien Jarno 
---
 target/s390x/helper.h  |  1 +
 target/s390x/insn-data.def |  2 ++
 target/s390x/mem_helper.c  | 12 
 target/s390x/translate.c   |  8 
 4 files changed, 23 insertions(+)

diff --git a/target/s390x/helper.h b/target/s390x/helper.h
index 4ada89488b..a618fe5539 100644
--- a/target/s390x/helper.h
+++ b/target/s390x/helper.h
@@ -3,6 +3,7 @@ DEF_HELPER_FLAGS_4(nc, TCG_CALL_NO_WG, i32, env, i32, i64, i64)
 DEF_HELPER_FLAGS_4(oc, TCG_CALL_NO_WG, i32, env, i32, i64, i64)
 DEF_HELPER_FLAGS_4(xc, TCG_CALL_NO_WG, i32, env, i32, i64, i64)
 DEF_HELPER_FLAGS_4(mvc, TCG_CALL_NO_WG, void, env, i32, i64, i64)
+DEF_HELPER_FLAGS_4(mvcin, TCG_CALL_NO_WG, void, env, i32, i64, i64)
 DEF_HELPER_FLAGS_4(clc, TCG_CALL_NO_WG, i32, env, i32, i64, i64)
 DEF_HELPER_3(mvcl, i32, env, i32, i32)
 DEF_HELPER_FLAGS_4(clm, TCG_CALL_NO_WG, i32, env, i32, i32, i64)
diff --git a/target/s390x/insn-data.def b/target/s390x/insn-data.def
index 01278949fc..c8f77611ab 100644
--- a/target/s390x/insn-data.def
+++ b/target/s390x/insn-data.def
@@ -572,6 +572,8 @@
 C(0xe548, MVGHI,   SIL,   GIE, la1, i2, 0, m1_64, mov2, 0)
 C(0x9200, MVI, SI,Z,   la1, i2, 0, m1_8, mov2, 0)
 C(0xeb52, MVIY,SIY,   LD,  la1, i2, 0, m1_8, mov2, 0)
+/* MOVE INVERSE */
+C(0xe800, MVCIN,   SS_a,  Z,   la1, a2, 0, 0, mvcin, 0)
 /* MOVE LONG */
 C(0x0e00, MVCL,RR_a,  Z,   0, 0, 0, 0, mvcl, 0)
 /* MOVE LONG EXTENDED */
diff --git a/target/s390x/mem_helper.c b/target/s390x/mem_helper.c
index 15b5f45b77..eef754724d 100644
--- a/target/s390x/mem_helper.c
+++ b/target/s390x/mem_helper.c
@@ -231,6 +231,18 @@ void HELPER(mvc)(CPUS390XState *env, uint32_t l, uint64_t 
dest, uint64_t src)
 do_helper_mvc(env, l, dest, src, GETPC());
 }
 
+/* move inverse  */
+void HELPER(mvcin)(CPUS390XState *env, uint32_t l, uint64_t dest, uint64_t src)
+{
+uintptr_t ra = GETPC();
+int i;
+
+for (i = 0; i <= l; i++) {
+uint8_t v = cpu_ldub_data_ra(env, src - i, ra);
+cpu_stb_data_ra(env, dest + i, v, ra);
+}
+}
+
 /* compare unsigned byte arrays */
 static uint32_t do_helper_clc(CPUS390XState *env, uint32_t l, uint64_t s1,
   uint64_t s2, uintptr_t ra)
diff --git a/target/s390x/translate.c b/target/s390x/translate.c
index 30d0575c03..61373df29e 100644
--- a/target/s390x/translate.c
+++ b/target/s390x/translate.c
@@ -2940,6 +2940,14 @@ static ExitStatus op_mvc(DisasContext *s, DisasOps *o)
 return NO_EXIT;
 }
 
+static ExitStatus op_mvcin(DisasContext *s, DisasOps *o)
+{
+TCGv_i32 l = tcg_const_i32(get_field(s->fields, l1));
+gen_helper_mvcin(cpu_env, l, o->addr1, o->in2);
+tcg_temp_free_i32(l);
+return NO_EXIT;
+}
+
 static ExitStatus op_mvcl(DisasContext *s, DisasOps *o)
 {
 TCGv_i32 r1 = tcg_const_i32(get_field(s->fields, r1));
-- 
2.11.0




[Qemu-devel] [PATCH v3 05/30] target/s390x: implement TEST ADDRESSING MODE

2017-05-31 Thread Aurelien Jarno
Reviewed-by: Richard Henderson 
Signed-off-by: Aurelien Jarno 
---
 target/s390x/insn-data.def |  3 +++
 target/s390x/translate.c   | 10 ++
 2 files changed, 13 insertions(+)

diff --git a/target/s390x/insn-data.def b/target/s390x/insn-data.def
index 0f70acea5c..170b50ef2e 100644
--- a/target/s390x/insn-data.def
+++ b/target/s390x/insn-data.def
@@ -810,6 +810,9 @@
 /* SUPERVISOR CALL */
 C(0x0a00, SVC, I, Z,   0, 0, 0, 0, svc, 0)
 
+/* TEST ADDRESSING MODE */
+C(0x010b, TAM, E, Z,   0, 0, 0, 0, tam, 0)
+
 /* TEST AND SET */
 C(0x9300, TS,  S, Z,   0, a2, 0, 0, ts, 0)
 
diff --git a/target/s390x/translate.c b/target/s390x/translate.c
index 0cfa8cc05e..7f265aeb40 100644
--- a/target/s390x/translate.c
+++ b/target/s390x/translate.c
@@ -4063,6 +4063,16 @@ static ExitStatus op_svc(DisasContext *s, DisasOps *o)
 return EXIT_NORETURN;
 }
 
+static ExitStatus op_tam(DisasContext *s, DisasOps *o)
+{
+int cc = 0;
+
+cc |= (s->tb->flags & FLAG_MASK_64) ? 2 : 0;
+cc |= (s->tb->flags & FLAG_MASK_32) ? 1 : 0;
+gen_op_movi_cc(s, cc);
+return NO_EXIT;
+}
+
 static ExitStatus op_tceb(DisasContext *s, DisasOps *o)
 {
 gen_helper_tceb(cc_op, cpu_env, o->in1, o->in2);
-- 
2.11.0




[Qemu-devel] [PATCH v3 08/30] target/s390x: implement STORE PAIR TO QUADWORD

2017-05-31 Thread Aurelien Jarno
Signed-off-by: Aurelien Jarno 
---
 target/s390x/helper.h  |  1 +
 target/s390x/insn-data.def |  2 ++
 target/s390x/mem_helper.c  | 12 
 target/s390x/translate.c   |  6 ++
 4 files changed, 21 insertions(+)

diff --git a/target/s390x/helper.h b/target/s390x/helper.h
index ca78d1b162..596fec28ca 100644
--- a/target/s390x/helper.h
+++ b/target/s390x/helper.h
@@ -88,6 +88,7 @@ DEF_HELPER_FLAGS_1(popcnt, TCG_CALL_NO_RWG_SE, i64, i64)
 DEF_HELPER_FLAGS_1(stfl, TCG_CALL_NO_RWG, void, env)
 DEF_HELPER_2(stfle, i32, env, i64)
 DEF_HELPER_FLAGS_2(lpq, TCG_CALL_NO_WG, i64, env, i64)
+DEF_HELPER_FLAGS_4(stpq, TCG_CALL_NO_WG, void, env, i64, i64, i64)
 
 #ifndef CONFIG_USER_ONLY
 DEF_HELPER_3(servc, i32, env, i64, i64)
diff --git a/target/s390x/insn-data.def b/target/s390x/insn-data.def
index 53c86d5832..5314162b3d 100644
--- a/target/s390x/insn-data.def
+++ b/target/s390x/insn-data.def
@@ -770,6 +770,8 @@
 /* STORE ACCESS MULTIPLE */
 C(0x9b00, STAM,RS_a,  Z,   0, a2, 0, 0, stam, 0)
 C(0xeb9b, STAMY,   RSY_a, LD,  0, a2, 0, 0, stam, 0)
+/* STORE PAIR TO QUADWORD */
+C(0xe38e, STPQ,RXY_a, Z,   0, a2, r1_P, 0, stpq, 0)
 
 /* SUBTRACT */
 C(0x1b00, SR,  RR_a,  Z,   r1, r2, new, r1_32, sub, subs32)
diff --git a/target/s390x/mem_helper.c b/target/s390x/mem_helper.c
index 4f34f873c7..15b5f45b77 100644
--- a/target/s390x/mem_helper.c
+++ b/target/s390x/mem_helper.c
@@ -1250,6 +1250,18 @@ uint64_t HELPER(lpq)(CPUS390XState *env, uint64_t addr)
 return int128_gethi(v);
 }
 
+/* store pair to quadword */
+void HELPER(stpq)(CPUS390XState *env, uint64_t addr,
+  uint64_t low, uint64_t high)
+{
+uintptr_t ra = GETPC();
+int mem_idx = cpu_mmu_index(env, false);
+TCGMemOpIdx oi = make_memop_idx(MO_TEQ | MO_ALIGN_16, mem_idx);
+
+Int128 v = int128_make128(low, high);
+helper_atomic_sto_be_mmu(env, addr, v, oi, ra);
+}
+
 /* Execute instruction.  This instruction executes an insn modified with
the contents of r1.  It does not change the executed instruction in memory;
it does not change the program counter.
diff --git a/target/s390x/translate.c b/target/s390x/translate.c
index ec61590e50..6635877bbd 100644
--- a/target/s390x/translate.c
+++ b/target/s390x/translate.c
@@ -4019,6 +4019,12 @@ static ExitStatus op_stmh(DisasContext *s, DisasOps *o)
 return NO_EXIT;
 }
 
+static ExitStatus op_stpq(DisasContext *s, DisasOps *o)
+{
+gen_helper_stpq(cpu_env, o->in2, o->out2, o->out);
+return NO_EXIT;
+}
+
 static ExitStatus op_srst(DisasContext *s, DisasOps *o)
 {
 gen_helper_srst(o->in1, cpu_env, regs[0], o->in1, o->in2);
-- 
2.11.0




[Qemu-devel] [PATCH v3 12/30] target/s390x: implement MOVE WITH OFFSET

2017-05-31 Thread Aurelien Jarno
Reviewed-by: Richard Henderson 
Signed-off-by: Aurelien Jarno 
---
 target/s390x/helper.h  |  1 +
 target/s390x/insn-data.def |  4 
 target/s390x/mem_helper.c  | 31 +++
 target/s390x/translate.c   |  8 
 4 files changed, 44 insertions(+)

diff --git a/target/s390x/helper.h b/target/s390x/helper.h
index e62e45534d..a197b8bc39 100644
--- a/target/s390x/helper.h
+++ b/target/s390x/helper.h
@@ -14,6 +14,7 @@ DEF_HELPER_FLAGS_4(divu64, TCG_CALL_NO_WG, i64, env, i64, 
i64, i64)
 DEF_HELPER_4(srst, i64, env, i64, i64, i64)
 DEF_HELPER_4(clst, i64, env, i64, i64, i64)
 DEF_HELPER_FLAGS_4(mvn, TCG_CALL_NO_WG, void, env, i32, i64, i64)
+DEF_HELPER_FLAGS_4(mvo, TCG_CALL_NO_WG, void, env, i32, i64, i64)
 DEF_HELPER_FLAGS_4(mvpg, TCG_CALL_NO_WG, i32, env, i64, i64, i64)
 DEF_HELPER_4(mvst, i64, env, i64, i64, i64)
 DEF_HELPER_4(ex, void, env, i32, i64, i64)
diff --git a/target/s390x/insn-data.def b/target/s390x/insn-data.def
index 6af717648c..47542eeaf3 100644
--- a/target/s390x/insn-data.def
+++ b/target/s390x/insn-data.def
@@ -584,6 +584,10 @@
 C(0xb254, MVPG,RRE,   Z,   r1_o, r2_o, 0, 0, mvpg, 0)
 /* MOVE STRING */
 C(0xb255, MVST,RRE,   Z,   r1_o, r2_o, 0, 0, mvst, 0)
+/* MOVE WITH OFFSET */
+/* Really format SS_b, but we pack both lengths into one argument
+   for the helper call, so we might as well leave one 8-bit field.  */
+C(0xf100, MVO, SS_a,  Z,   la1, a2, 0, 0, mvo, 0)
 
 /* MULTIPLY */
 C(0x1c00, MR,  RR_a,  Z,   r1p1_32s, r2_32s, new, r1_D32, mul, 0)
diff --git a/target/s390x/mem_helper.c b/target/s390x/mem_helper.c
index 53852784e6..3601bb9cdd 100644
--- a/target/s390x/mem_helper.c
+++ b/target/s390x/mem_helper.c
@@ -256,6 +256,37 @@ void HELPER(mvn)(CPUS390XState *env, uint32_t l, uint64_t 
dest, uint64_t src)
 }
 }
 
+/* move with offset  */
+void HELPER(mvo)(CPUS390XState *env, uint32_t l, uint64_t dest, uint64_t src)
+{
+uintptr_t ra = GETPC();
+int len_dest = l >> 4;
+int len_src = l & 0xf;
+uint8_t byte_dest, byte_src;
+int i;
+
+src += len_src;
+dest += len_dest;
+
+/* Handle rightmost byte */
+byte_src = cpu_ldub_data_ra(env, src, ra);
+byte_dest = cpu_ldub_data_ra(env, dest, ra);
+byte_dest = (byte_dest & 0x0f) | (byte_src << 4);
+cpu_stb_data_ra(env, dest, byte_dest, ra);
+
+/* Process remaining bytes from right to left */
+for (i = 1; i <= len_dest; i++) {
+byte_dest = byte_src >> 4;
+if (len_src - i >= 0) {
+byte_src = cpu_ldub_data_ra(env, src - i, ra);
+} else {
+byte_src = 0;
+}
+byte_dest |= byte_src << 4;
+cpu_stb_data_ra(env, dest - i, byte_dest, ra);
+}
+}
+
 /* compare unsigned byte arrays */
 static uint32_t do_helper_clc(CPUS390XState *env, uint32_t l, uint64_t s1,
   uint64_t s2, uintptr_t ra)
diff --git a/target/s390x/translate.c b/target/s390x/translate.c
index 4e7211203a..b1877cf27b 100644
--- a/target/s390x/translate.c
+++ b/target/s390x/translate.c
@@ -2998,6 +2998,14 @@ static ExitStatus op_mvn(DisasContext *s, DisasOps *o)
 return NO_EXIT;
 }
 
+static ExitStatus op_mvo(DisasContext *s, DisasOps *o)
+{
+TCGv_i32 l = tcg_const_i32(get_field(s->fields, l1));
+gen_helper_mvo(cpu_env, l, o->addr1, o->in2);
+tcg_temp_free_i32(l);
+return NO_EXIT;
+}
+
 static ExitStatus op_mvpg(DisasContext *s, DisasOps *o)
 {
 gen_helper_mvpg(cc_op, cpu_env, regs[0], o->in1, o->in2);
-- 
2.11.0




[Qemu-devel] [PATCH v3 09/30] target/s390x: implement COMPARE AND SIGNAL

2017-05-31 Thread Aurelien Jarno
These functions differ from COMPARE by generating an exception for a
QNaN input. Use the non quiet version of floatXX_compare.

Signed-off-by: Aurelien Jarno 
---
 target/s390x/fpu_helper.c  | 27 +++
 target/s390x/helper.h  |  3 +++
 target/s390x/insn-data.def |  6 ++
 target/s390x/translate.c   | 21 +
 4 files changed, 57 insertions(+)

diff --git a/target/s390x/fpu_helper.c b/target/s390x/fpu_helper.c
index e604e9f7be..26f124fe96 100644
--- a/target/s390x/fpu_helper.c
+++ b/target/s390x/fpu_helper.c
@@ -585,6 +585,33 @@ uint64_t HELPER(fixb)(CPUS390XState *env, uint64_t ah, 
uint64_t al, uint32_t m3)
 return RET128(ret);
 }
 
+/* 32-bit FP compare and signal */
+uint32_t HELPER(keb)(CPUS390XState *env, uint64_t f1, uint64_t f2)
+{
+int cmp = float32_compare(f1, f2, &env->fpu_status);
+handle_exceptions(env, GETPC());
+return float_comp_to_cc(env, cmp);
+}
+
+/* 64-bit FP compare and signal */
+uint32_t HELPER(kdb)(CPUS390XState *env, uint64_t f1, uint64_t f2)
+{
+int cmp = float64_compare(f1, f2, &env->fpu_status);
+handle_exceptions(env, GETPC());
+return float_comp_to_cc(env, cmp);
+}
+
+/* 128-bit FP compare and signal */
+uint32_t HELPER(kxb)(CPUS390XState *env, uint64_t ah, uint64_t al,
+ uint64_t bh, uint64_t bl)
+{
+int cmp = float128_compare(make_float128(ah, al),
+   make_float128(bh, bl),
+   &env->fpu_status);
+handle_exceptions(env, GETPC());
+return float_comp_to_cc(env, cmp);
+}
+
 /* 32-bit FP multiply and add */
 uint64_t HELPER(maeb)(CPUS390XState *env, uint64_t f1,
   uint64_t f2, uint64_t f3)
diff --git a/target/s390x/helper.h b/target/s390x/helper.h
index 596fec28ca..4ada89488b 100644
--- a/target/s390x/helper.h
+++ b/target/s390x/helper.h
@@ -89,6 +89,9 @@ DEF_HELPER_FLAGS_1(stfl, TCG_CALL_NO_RWG, void, env)
 DEF_HELPER_2(stfle, i32, env, i64)
 DEF_HELPER_FLAGS_2(lpq, TCG_CALL_NO_WG, i64, env, i64)
 DEF_HELPER_FLAGS_4(stpq, TCG_CALL_NO_WG, void, env, i64, i64, i64)
+DEF_HELPER_FLAGS_3(keb, TCG_CALL_NO_WG, i32, env, i64, i64)
+DEF_HELPER_FLAGS_3(kdb, TCG_CALL_NO_WG, i32, env, i64, i64)
+DEF_HELPER_FLAGS_5(kxb, TCG_CALL_NO_WG, i32, env, i64, i64, i64, i64)
 
 #ifndef CONFIG_USER_ONLY
 DEF_HELPER_3(servc, i32, env, i64, i64)
diff --git a/target/s390x/insn-data.def b/target/s390x/insn-data.def
index 5314162b3d..01278949fc 100644
--- a/target/s390x/insn-data.def
+++ b/target/s390x/insn-data.def
@@ -154,6 +154,12 @@
 C(0xb349, CXBR,RRE,   Z,   x1_o, x2_o, 0, 0, cxb, 0)
 C(0xed09, CEB, RXE,   Z,   e1, m2_32u, 0, 0, ceb, 0)
 C(0xed19, CDB, RXE,   Z,   f1_o, m2_64, 0, 0, cdb, 0)
+/* COMPARE AND SIGNAL */
+C(0xb308, KEBR,RRE,   Z,   e1, e2, 0, 0, keb, 0)
+C(0xb318, KDBR,RRE,   Z,   f1_o, f2_o, 0, 0, kdb, 0)
+C(0xb348, KXBR,RRE,   Z,   x1_o, x2_o, 0, 0, kxb, 0)
+C(0xed08, KEB, RXE,   Z,   e1, m2_32u, 0, 0, keb, 0)
+C(0xed18, KDB, RXE,   Z,   f1_o, m2_64, 0, 0, kdb, 0)
 /* COMPARE IMMEDIATE */
 C(0xc20d, CFI, RIL_a, EI,  r1, i2, 0, 0, 0, cmps32)
 C(0xc20c, CGFI,RIL_a, EI,  r1, i2, 0, 0, 0, cmps64)
diff --git a/target/s390x/translate.c b/target/s390x/translate.c
index 6635877bbd..30d0575c03 100644
--- a/target/s390x/translate.c
+++ b/target/s390x/translate.c
@@ -2369,6 +2369,27 @@ static ExitStatus op_iske(DisasContext *s, DisasOps *o)
 }
 #endif
 
+static ExitStatus op_keb(DisasContext *s, DisasOps *o)
+{
+gen_helper_keb(cc_op, cpu_env, o->in1, o->in2);
+set_cc_static(s);
+return NO_EXIT;
+}
+
+static ExitStatus op_kdb(DisasContext *s, DisasOps *o)
+{
+gen_helper_kdb(cc_op, cpu_env, o->in1, o->in2);
+set_cc_static(s);
+return NO_EXIT;
+}
+
+static ExitStatus op_kxb(DisasContext *s, DisasOps *o)
+{
+gen_helper_kxb(cc_op, cpu_env, o->out, o->out2, o->in1, o->in2);
+set_cc_static(s);
+return NO_EXIT;
+}
+
 static ExitStatus op_laa(DisasContext *s, DisasOps *o)
 {
 /* The real output is indeed the original value in memory;
-- 
2.11.0




[Qemu-devel] [PATCH v3 07/30] target/s390x: implement LOAD PAIR FROM QUADWORD

2017-05-31 Thread Aurelien Jarno
Signed-off-by: Aurelien Jarno 
---
 target/s390x/helper.h  |  1 +
 target/s390x/insn-data.def |  2 ++
 target/s390x/mem_helper.c  | 13 +
 target/s390x/translate.c   |  7 +++
 4 files changed, 23 insertions(+)

diff --git a/target/s390x/helper.h b/target/s390x/helper.h
index c6fbc3b949..ca78d1b162 100644
--- a/target/s390x/helper.h
+++ b/target/s390x/helper.h
@@ -87,6 +87,7 @@ DEF_HELPER_FLAGS_2(sfas, TCG_CALL_NO_WG, void, env, i64)
 DEF_HELPER_FLAGS_1(popcnt, TCG_CALL_NO_RWG_SE, i64, i64)
 DEF_HELPER_FLAGS_1(stfl, TCG_CALL_NO_RWG, void, env)
 DEF_HELPER_2(stfle, i32, env, i64)
+DEF_HELPER_FLAGS_2(lpq, TCG_CALL_NO_WG, i64, env, i64)
 
 #ifndef CONFIG_USER_ONLY
 DEF_HELPER_3(servc, i32, env, i64, i64)
diff --git a/target/s390x/insn-data.def b/target/s390x/insn-data.def
index f92bfde4f8..53c86d5832 100644
--- a/target/s390x/insn-data.def
+++ b/target/s390x/insn-data.def
@@ -507,6 +507,8 @@
 /* LOAD PAIR DISJOINT */
 D(0xc804, LPD, SSF,   ILA, 0, 0, new_P, r3_P32, lpd, 0, MO_TEUL)
 D(0xc805, LPDG,SSF,   ILA, 0, 0, new_P, r3_P64, lpd, 0, MO_TEQ)
+/* LOAD PAIR FROM QUADWORD */
+C(0xe38f, LPQ, RXY_a, Z,   0, a2, r1_P, 0, lpq, 0)
 /* LOAD POSITIVE */
 C(0x1000, LPR, RR_a,  Z,   0, r2_32s, new, r1_32, abs, abs32)
 C(0xb900, LPGR,RRE,   Z,   0, r2, r1, 0, abs, abs64)
diff --git a/target/s390x/mem_helper.c b/target/s390x/mem_helper.c
index 850472e9ff..4f34f873c7 100644
--- a/target/s390x/mem_helper.c
+++ b/target/s390x/mem_helper.c
@@ -1237,6 +1237,19 @@ uint64_t HELPER(lra)(CPUS390XState *env, uint64_t addr)
 }
 #endif
 
+/* load pair from quadword */
+uint64_t HELPER(lpq)(CPUS390XState *env, uint64_t addr)
+{
+uintptr_t ra = GETPC();
+int mem_idx = cpu_mmu_index(env, false);
+TCGMemOpIdx oi = make_memop_idx(MO_TEQ | MO_ALIGN_16, mem_idx);
+
+Int128 v = helper_atomic_ldo_be_mmu(env, addr, oi, ra);
+
+env->retxl = int128_getlo(v);
+return int128_gethi(v);
+}
+
 /* Execute instruction.  This instruction executes an insn modified with
the contents of r1.  It does not change the executed instruction in memory;
it does not change the program counter.
diff --git a/target/s390x/translate.c b/target/s390x/translate.c
index 00b91c4f3a..ec61590e50 100644
--- a/target/s390x/translate.c
+++ b/target/s390x/translate.c
@@ -2830,6 +2830,13 @@ static ExitStatus op_lpd(DisasContext *s, DisasOps *o)
 return NO_EXIT;
 }
 
+static ExitStatus op_lpq(DisasContext *s, DisasOps *o)
+{
+gen_helper_lpq(o->out, cpu_env, o->in2);
+return_low128(o->out2);
+return NO_EXIT;
+}
+
 #ifndef CONFIG_USER_ONLY
 static ExitStatus op_lura(DisasContext *s, DisasOps *o)
 {
-- 
2.11.0




[Qemu-devel] [PATCH v3 06/30] target/s390x: implement PACK

2017-05-31 Thread Aurelien Jarno
Reviewed-by: Richard Henderson 
Signed-off-by: Aurelien Jarno 
---
 target/s390x/helper.h  |  1 +
 target/s390x/insn-data.def |  5 +
 target/s390x/mem_helper.c  | 37 +
 target/s390x/translate.c   |  8 
 4 files changed, 51 insertions(+)

diff --git a/target/s390x/helper.h b/target/s390x/helper.h
index 3f5a05d43b..c6fbc3b949 100644
--- a/target/s390x/helper.h
+++ b/target/s390x/helper.h
@@ -75,6 +75,7 @@ DEF_HELPER_FLAGS_2(sqeb, TCG_CALL_NO_WG, i64, env, i64)
 DEF_HELPER_FLAGS_2(sqdb, TCG_CALL_NO_WG, i64, env, i64)
 DEF_HELPER_FLAGS_3(sqxb, TCG_CALL_NO_WG, i64, env, i64, i64)
 DEF_HELPER_FLAGS_1(cvd, TCG_CALL_NO_RWG_SE, i64, s32)
+DEF_HELPER_FLAGS_4(pack, TCG_CALL_NO_WG, void, env, i32, i64, i64)
 DEF_HELPER_FLAGS_4(unpk, TCG_CALL_NO_WG, void, env, i32, i64, i64)
 DEF_HELPER_FLAGS_4(tr, TCG_CALL_NO_WG, void, env, i32, i64, i64)
 DEF_HELPER_4(tre, i64, env, i64, i64, i64)
diff --git a/target/s390x/insn-data.def b/target/s390x/insn-data.def
index 170b50ef2e..f92bfde4f8 100644
--- a/target/s390x/insn-data.def
+++ b/target/s390x/insn-data.def
@@ -639,6 +639,11 @@
 C(0x9600, OI,  SI,Z,   m1_8u, i2_8u, new, m1_8, or, nz64)
 C(0xeb56, OIY, SIY,   LD,  m1_8u, i2_8u, new, m1_8, or, nz64)
 
+/* PACK */
+/* Really format SS_b, but we pack both lengths into one argument
+   for the helper call, so we might as well leave one 8-bit field.  */
+C(0xf200, PACK,SS_a,  Z,   la1, a2, 0, 0, pack, 0)
+
 /* PREFETCH */
 /* Implemented as nops of course.  */
 C(0xe336, PFD, RXY_b, GIE, 0, 0, 0, 0, 0, 0)
diff --git a/target/s390x/mem_helper.c b/target/s390x/mem_helper.c
index ddbebcd7ae..850472e9ff 100644
--- a/target/s390x/mem_helper.c
+++ b/target/s390x/mem_helper.c
@@ -644,6 +644,43 @@ uint64_t HELPER(cksm)(CPUS390XState *env, uint64_t r1,
 return len;
 }
 
+void HELPER(pack)(CPUS390XState *env, uint32_t len, uint64_t dest, uint64_t 
src)
+{
+uintptr_t ra = GETPC();
+int len_dest = len >> 4;
+int len_src = len & 0xf;
+uint8_t b;
+
+dest += len_dest;
+src += len_src;
+
+/* last byte is special, it only flips the nibbles */
+b = cpu_ldub_data_ra(env, src, ra);
+cpu_stb_data_ra(env, dest, (b << 4) | (b >> 4), ra);
+src--;
+len_src--;
+
+/* now pack every value */
+while (len_dest >= 0) {
+b = 0;
+
+if (len_src > 0) {
+b = cpu_ldub_data_ra(env, src, ra) & 0x0f;
+src--;
+len_src--;
+}
+if (len_src > 0) {
+b |= cpu_ldub_data_ra(env, src, ra) << 4;
+src--;
+len_src--;
+}
+
+len_dest--;
+dest--;
+cpu_stb_data_ra(env, dest, b, ra);
+}
+}
+
 void HELPER(unpk)(CPUS390XState *env, uint32_t len, uint64_t dest,
   uint64_t src)
 {
diff --git a/target/s390x/translate.c b/target/s390x/translate.c
index 7f265aeb40..00b91c4f3a 100644
--- a/target/s390x/translate.c
+++ b/target/s390x/translate.c
@@ -3139,6 +3139,14 @@ static ExitStatus op_ori(DisasContext *s, DisasOps *o)
 return NO_EXIT;
 }
 
+static ExitStatus op_pack(DisasContext *s, DisasOps *o)
+{
+TCGv_i32 l = tcg_const_i32(get_field(s->fields, l1));
+gen_helper_pack(cpu_env, l, o->addr1, o->in2);
+tcg_temp_free_i32(l);
+return NO_EXIT;
+}
+
 static ExitStatus op_popcnt(DisasContext *s, DisasOps *o)
 {
 gen_helper_popcnt(o->out, o->in2);
-- 
2.11.0




[Qemu-devel] [PATCH v3 17/30] target/s390x: fix COMPARE LOGICAL LONG EXTENDED

2017-05-31 Thread Aurelien Jarno
There are multiple issues with the COMPARE LOGICAL LONG EXTENDED
instruction:
- The test between the two operands is inverted, leading to an inversion
  of the cc values 1 and 2.
- The address and length of an operand continue to be decreased after
  reaching the end of this operand. These values are then wrong write
  back to the registers.
- We should limit the amount of bytes to process, so that interrupts can
  be served correctly.

At the same time rename dest into src1 and src into src3 to match the
operand names and make the code less confusing.

Reviewed-by: Richard Henderson 
Signed-off-by: Aurelien Jarno 
---
 target/s390x/mem_helper.c | 54 ---
 target/s390x/translate.c  | 20 +-
 2 files changed, 52 insertions(+), 22 deletions(-)

diff --git a/target/s390x/mem_helper.c b/target/s390x/mem_helper.c
index 98a7aa22d3..e992fd993c 100644
--- a/target/s390x/mem_helper.c
+++ b/target/s390x/mem_helper.c
@@ -666,35 +666,55 @@ uint32_t HELPER(clcle)(CPUS390XState *env, uint32_t r1, 
uint64_t a2,
uint32_t r3)
 {
 uintptr_t ra = GETPC();
-uint64_t destlen = get_length(env, r1 + 1);
-uint64_t dest = get_address(env, r1);
-uint64_t srclen = get_length(env, r3 + 1);
-uint64_t src = get_address(env, r3);
+uint64_t src1len = get_length(env, r1 + 1);
+uint64_t src1 = get_address(env, r1);
+uint64_t src3len = get_length(env, r3 + 1);
+uint64_t src3 = get_address(env, r3);
 uint8_t pad = a2 & 0xff;
+uint64_t len = MAX(src1len, src3len);
 uint32_t cc = 0;
 
-if (!(destlen || srclen)) {
+if (!len) {
 return cc;
 }
 
-if (srclen > destlen) {
-srclen = destlen;
+/* Lest we fail to service interrupts in a timely manner, limit the
+   amount of work we're willing to do.  For now, let's cap at 8k.  */
+if (len > 0x2000) {
+len = 0x2000;
+cc = 3;
 }
 
-for (; destlen || srclen; src++, dest++, destlen--, srclen--) {
-uint8_t v1 = srclen ? cpu_ldub_data_ra(env, src, ra) : pad;
-uint8_t v2 = destlen ? cpu_ldub_data_ra(env, dest, ra) : pad;
-if (v1 != v2) {
-cc = (v1 < v2) ? 1 : 2;
+for (; len; len--) {
+uint8_t v1 = pad;
+uint8_t v3 = pad;
+
+if (src1len) {
+v1 = cpu_ldub_data_ra(env, src1, ra);
+}
+if (src3len) {
+v3 = cpu_ldub_data_ra(env, src3, ra);
+}
+
+if (v1 != v3) {
+cc = (v1 < v3) ? 1 : 2;
 break;
 }
+
+if (src1len) {
+src1++;
+src1len--;
+}
+if (src3len) {
+src3++;
+src3len--;
+}
 }
 
-set_length(env, r1 + 1, destlen);
-/* can't use srclen here, we trunc'ed it */
-set_length(env, r3 + 1, env->regs[r3 + 1] - src - env->regs[r3]);
-set_address(env, r1, dest);
-set_address(env, r3, src);
+set_length(env, r1 + 1, src1len);
+set_length(env, r3 + 1, src3len);
+set_address(env, r1, src1);
+set_address(env, r3, src3);
 
 return cc;
 }
diff --git a/target/s390x/translate.c b/target/s390x/translate.c
index 95ca53c1ef..9309e58009 100644
--- a/target/s390x/translate.c
+++ b/target/s390x/translate.c
@@ -1917,11 +1917,21 @@ static ExitStatus op_clc(DisasContext *s, DisasOps *o)
 
 static ExitStatus op_clcle(DisasContext *s, DisasOps *o)
 {
-TCGv_i32 r1 = tcg_const_i32(get_field(s->fields, r1));
-TCGv_i32 r3 = tcg_const_i32(get_field(s->fields, r3));
-gen_helper_clcle(cc_op, cpu_env, r1, o->in2, r3);
-tcg_temp_free_i32(r1);
-tcg_temp_free_i32(r3);
+int r1 = get_field(s->fields, r1);
+int r3 = get_field(s->fields, r3);
+TCGv_i32 t1, t3;
+
+/* r1 and r3 must be even.  */
+if (r1 & 1 || r3 & 1) {
+gen_program_exception(s, PGM_SPECIFICATION);
+return EXIT_NORETURN;
+}
+
+t1 = tcg_const_i32(r1);
+t3 = tcg_const_i32(r3);
+gen_helper_clcle(cc_op, cpu_env, t1, o->in2, t3);
+tcg_temp_free_i32(t1);
+tcg_temp_free_i32(t3);
 set_cc_static(s);
 return NO_EXIT;
 }
-- 
2.11.0




[Qemu-devel] [PATCH v3 20/30] target/s390x: improve MOVE LONG and MOVE LONG EXTENDED

2017-05-31 Thread Aurelien Jarno
As MVCL and MVCLE only differ by their operands, use a common
do_mvcl helper. Optimize it calling fast_memmove and fast_memset.
Correctly write back addresses. Check that r1 and r2/r3 registers
are even.

Reviewed-by: Richard Henderson 
Signed-off-by: Aurelien Jarno 
---
 target/s390x/mem_helper.c | 90 +--
 target/s390x/translate.c  | 40 +++--
 2 files changed, 70 insertions(+), 60 deletions(-)

diff --git a/target/s390x/mem_helper.c b/target/s390x/mem_helper.c
index 6dfa087ff1..cb0ec3eebf 100644
--- a/target/s390x/mem_helper.c
+++ b/target/s390x/mem_helper.c
@@ -576,49 +576,60 @@ void HELPER(stam)(CPUS390XState *env, uint32_t r1, 
uint64_t a2, uint32_t r3)
 }
 }
 
-/* move long */
-uint32_t HELPER(mvcl)(CPUS390XState *env, uint32_t r1, uint32_t r2)
+/* move long helper */
+static inline uint32_t do_mvcl(CPUS390XState *env,
+   uint64_t *dest, uint64_t *destlen,
+   uint64_t *src, uint64_t *srclen,
+   uint8_t pad, uintptr_t ra)
 {
-uintptr_t ra = GETPC();
-uint64_t destlen = env->regs[r1 + 1] & 0xff;
-uint64_t dest = get_address(env, r1);
-uint64_t srclen = env->regs[r2 + 1] & 0xff;
-uint64_t src = get_address(env, r2);
-uint8_t pad = env->regs[r2 + 1] >> 24;
-uint8_t v;
+uint64_t len = MIN(*srclen, *destlen);
 uint32_t cc;
 
-if (destlen == srclen) {
+if (*destlen == *srclen) {
 cc = 0;
-} else if (destlen < srclen) {
+} else if (*destlen < *srclen) {
 cc = 1;
 } else {
 cc = 2;
 }
 
-if (srclen > destlen) {
-srclen = destlen;
-}
+/* Copy the src array */
+fast_memmove(env, *dest, *src, len, ra);
+*src += len;
+*srclen -= len;
+*dest += len;
+*destlen -= len;
 
-for (; destlen && srclen; src++, dest++, destlen--, srclen--) {
-v = cpu_ldub_data_ra(env, src, ra);
-cpu_stb_data_ra(env, dest, v, ra);
-}
+/* Pad the remaining area */
+fast_memset(env, *dest, pad, *destlen, ra);
+*dest += *destlen;
+*destlen = 0;
 
-for (; destlen; dest++, destlen--) {
-cpu_stb_data_ra(env, dest, pad, ra);
-}
+return cc;
+}
 
-env->regs[r1 + 1] = destlen;
-/* can't use srclen here, we trunc'ed it */
-env->regs[r2 + 1] -= src - env->regs[r2];
+/* move long */
+uint32_t HELPER(mvcl)(CPUS390XState *env, uint32_t r1, uint32_t r2)
+{
+uintptr_t ra = GETPC();
+uint64_t destlen = env->regs[r1 + 1] & 0xff;
+uint64_t dest = get_address(env, r1);
+uint64_t srclen = env->regs[r2 + 1] & 0xff;
+uint64_t src = get_address(env, r2);
+uint8_t pad = env->regs[r2 + 1] >> 24;
+uint32_t cc;
+
+cc = do_mvcl(env, &dest, &destlen, &src, &srclen, pad, ra);
+
+env->regs[r1 + 1] = deposit64(env->regs[r1 + 1], 0, 24, destlen);
+env->regs[r2 + 1] = deposit64(env->regs[r2 + 1], 0, 24, srclen);
 set_address(env, r1, dest);
 set_address(env, r2, src);
 
 return cc;
 }
 
-/* move long extended another memcopy insn with more bells and whistles */
+/* move long extended */
 uint32_t HELPER(mvcle)(CPUS390XState *env, uint32_t r1, uint64_t a2,
uint32_t r3)
 {
@@ -627,34 +638,13 @@ uint32_t HELPER(mvcle)(CPUS390XState *env, uint32_t r1, 
uint64_t a2,
 uint64_t dest = get_address(env, r1);
 uint64_t srclen = get_length(env, r3 + 1);
 uint64_t src = get_address(env, r3);
-uint8_t pad = a2 & 0xff;
-uint8_t v;
+uint8_t pad = a2;
 uint32_t cc;
 
-if (destlen == srclen) {
-cc = 0;
-} else if (destlen < srclen) {
-cc = 1;
-} else {
-cc = 2;
-}
-
-if (srclen > destlen) {
-srclen = destlen;
-}
-
-for (; destlen && srclen; src++, dest++, destlen--, srclen--) {
-v = cpu_ldub_data_ra(env, src, ra);
-cpu_stb_data_ra(env, dest, v, ra);
-}
-
-for (; destlen; dest++, destlen--) {
-cpu_stb_data_ra(env, dest, pad, ra);
-}
+cc = do_mvcl(env, &dest, &destlen, &src, &srclen, pad, ra);
 
-set_length(env, r1 + 1 , destlen);
-/* can't use srclen here, we trunc'ed it */
-set_length(env, r3 + 1, env->regs[r3 + 1] - src - env->regs[r3]);
+set_length(env, r1 + 1, destlen);
+set_length(env, r3 + 1, srclen);
 set_address(env, r1, dest);
 set_address(env, r3, src);
 
diff --git a/target/s390x/translate.c b/target/s390x/translate.c
index 999d716f61..729d25d8f8 100644
--- a/target/s390x/translate.c
+++ b/target/s390x/translate.c
@@ -2981,22 +2981,42 @@ static ExitStatus op_mvcin(DisasContext *s, DisasOps *o)
 
 static ExitStatus op_mvcl(DisasContext *s, DisasOps *o)
 {
-TCGv_i32 r1 = tcg_const_i32(get_field(s->fields, r1));
-TCGv_i32 r2 = tcg_c

<    1   2   3   4   5   6   7   8   9   10   >