On Sun, Sep 9, 2012 at 9:05 PM, Richard Henderson <r...@twiddle.net> wrote: > Signed-off-by: Richard Henderson <r...@twiddle.net> > --- > target-s390x/helper.h | 4 +- > target-s390x/insn-data.def | 4 ++ > target-s390x/mem_helper.c | 119 > ++++++++++++++++++++++----------------------- > target-s390x/translate.c | 42 ++++++++-------- > 4 files changed, 83 insertions(+), 86 deletions(-) > > diff --git a/target-s390x/helper.h b/target-s390x/helper.h > index 2bd4105..473e776 100644 > --- a/target-s390x/helper.h > +++ b/target-s390x/helper.h > @@ -25,9 +25,9 @@ DEF_HELPER_FLAGS_3(set_cc_subu64, > TCG_CALL_PURE|TCG_CALL_CONST, i32, i64, i64, i > DEF_HELPER_FLAGS_3(set_cc_sub32, TCG_CALL_PURE|TCG_CALL_CONST, i32, s32, > s32, s32) > DEF_HELPER_FLAGS_3(set_cc_subu32, TCG_CALL_PURE|TCG_CALL_CONST, i32, i32, > i32, i32) > DEF_HELPER_4(srst, i32, env, i32, i32, i32) > -DEF_HELPER_4(clst, i32, env, i32, i32, i32) > +DEF_HELPER_4(clst, i64, env, i64, i64, i64) > DEF_HELPER_4(mvpg, void, env, i64, i64, i64) > -DEF_HELPER_4(mvst, void, env, i32, i32, i32) > +DEF_HELPER_4(mvst, i64, env, i64, i64, i64) > DEF_HELPER_4(csg, i64, env, i64, i64, i64) > DEF_HELPER_4(cdsg, i32, env, i32, i64, i32) > DEF_HELPER_4(cs, i64, env, i64, i64, i64) > diff --git a/target-s390x/insn-data.def b/target-s390x/insn-data.def > index 4b30dd5..501d02e 100644 > --- a/target-s390x/insn-data.def > +++ b/target-s390x/insn-data.def > @@ -153,6 +153,8 @@ > C(0xbd00, CLM, RS_b, Z, r1_o, a2, 0, 0, clm, 0) > C(0xeb21, CLMY, RSY_b, LD, r1_o, a2, 0, 0, clm, 0) > C(0xeb20, CLMH, RSY_b, Z, r1_sr32, a2, 0, 0, clm, 0) > +/* COMPARE LOGICAL STRING */ > + C(0xb25d, CLST, RRE, Z, r1_o, r2_o, 0, 0, clst, 0) > > /* COMPARE AND SWAP */ > C(0xba00, CS, RS_a, Z, r1_o, a2, new, r1_32, cs, 0) > @@ -396,6 +398,8 @@ > C(0xa800, MVCLE, RS_a, Z, 0, a2, 0, 0, mvcle, 0) > /* MOVE PAGE */ > C(0xb254, MVPG, RRE, Z, r1_o, r2_o, 0, 0, mvpg, 0) > +/* MOVE STRING */ > + C(0xb255, MVST, RRE, Z, r1_o, r2_o, 0, 0, mvst, 0) > > /* MULTIPLY */ > C(0x1c00, MR, RR_a, Z, r1p1_32s, r2_32s, new, r1_D32, mul, 0) > diff --git a/target-s390x/mem_helper.c b/target-s390x/mem_helper.c > index e9cacf9..8cc4625 100644 > --- a/target-s390x/mem_helper.c > +++ b/target-s390x/mem_helper.c > @@ -310,36 +310,30 @@ uint32_t HELPER(clm)(CPUS390XState *env, uint32_t r1, > uint32_t mask, > return cc; > } > > +static inline uint64_t fix_address(CPUS390XState *env, uint64_t a) > +{ > + /* 31-Bit mode */ > + if (!(env->psw.mask & PSW_MASK_64)) {
PSW_MASK_64 bit could be added to TB flags and that could be checked during translation, then the mask needs to be applied only when the mode is active. Whether that actually improves performance depends on how often the bit is changed. Also all PSW writes need to be handled, possibly causing a TB flush. Masking could be done with TCG ops too. > + a &= 0x7fffffff; > + } > + return a; > +} > + > static inline uint64_t get_address(CPUS390XState *env, int x2, int b2, int > d2) > { > uint64_t r = d2; > - > if (x2) { > r += env->regs[x2]; > } > - > if (b2) { > r += env->regs[b2]; > } > - > - /* 31-Bit mode */ > - if (!(env->psw.mask & PSW_MASK_64)) { > - r &= 0x7fffffff; > - } > - > - return r; > + return fix_address(env, r); > } > > static inline uint64_t get_address_31fix(CPUS390XState *env, int reg) > { > - uint64_t r = env->regs[reg]; > - > - /* 31-Bit mode */ > - if (!(env->psw.mask & PSW_MASK_64)) { > - r &= 0x7fffffff; > - } > - > - return r; > + return fix_address(env, env->regs[reg]); > } > > /* search string (c is byte to search, r2 is string, r1 end of string) */ > @@ -365,39 +359,40 @@ uint32_t HELPER(srst)(CPUS390XState *env, uint32_t c, > uint32_t r1, uint32_t r2) > } > > /* unsigned string compare (c is string terminator) */ > -uint32_t HELPER(clst)(CPUS390XState *env, uint32_t c, uint32_t r1, uint32_t > r2) > +uint64_t HELPER(clst)(CPUS390XState *env, uint64_t c, uint64_t s1, uint64_t > s2) > { > - uint64_t s1 = get_address_31fix(env, r1); > - uint64_t s2 = get_address_31fix(env, r2); > - uint8_t v1, v2; > - uint32_t cc; > + uint32_t len; > > c = c & 0xff; > -#ifdef CONFIG_USER_ONLY > - if (!c) { > - HELPER_LOG("%s: comparing '%s' and '%s'\n", > - __func__, (char *)g2h(s1), (char *)g2h(s2)); > - } > -#endif > - for (;;) { > - v1 = cpu_ldub_data(env, s1); > - v2 = cpu_ldub_data(env, s2); > - if ((v1 == c || v2 == c) || (v1 != v2)) { > - break; > + s1 = fix_address(env, s1); > + s2 = fix_address(env, s2); > + > + /* Lest we fail to service interrupts in a timely manner, limit the > + amount of work we're willing to do. For now, lets cap at 8k. */ > + for (len = 0; len < 0x2000; ++len) { > + uint8_t v1 = cpu_ldub_data(env, s1 + len); > + uint8_t v2 = cpu_ldub_data(env, s2 + len); > + if (v1 == v2) { > + if (v1 == c) { > + /* Equal. CC=0, and don't advance the registers. */ > + env->cc_op = 0; > + env->retxl = s2; > + return s1; > + } > + } else { > + /* Unequal. CC={1,2}, and advance the registers. Note that > + the terminator need not be zero, but the string that contains > + the terminator is by definition "low". */ > + env->cc_op = (v1 == c ? 1 : v2 == c ? 2 : v1 < v2 ? 1 : 2); > + env->retxl = s2 + len; > + return s1 + len; > } > - s1++; > - s2++; > } > > - if (v1 == v2) { > - cc = 0; > - } else { > - cc = (v1 < v2) ? 1 : 2; > - /* FIXME: 31-bit mode! */ > - env->regs[r1] = s1; > - env->regs[r2] = s2; > - } > - return cc; > + /* CPU-determined bytes equal; advance the registers. */ > + env->cc_op = 3; > + env->retxl = s2 + len; > + return s1 + len; > } > > /* move page */ > @@ -413,29 +408,31 @@ void HELPER(mvpg)(CPUS390XState *env, uint64_t r0, > uint64_t r1, uint64_t r2) > } > > /* string copy (c is string terminator) */ > -void HELPER(mvst)(CPUS390XState *env, uint32_t c, uint32_t r1, uint32_t r2) > +uint64_t HELPER(mvst)(CPUS390XState *env, uint64_t c, uint64_t d, uint64_t s) > { > - uint64_t dest = get_address_31fix(env, r1); > - uint64_t src = get_address_31fix(env, r2); > - uint8_t v; > + uint32_t len; > > c = c & 0xff; > -#ifdef CONFIG_USER_ONLY > - if (!c) { > - HELPER_LOG("%s: copy '%s' to 0x%lx\n", __func__, (char *)g2h(src), > - dest); > - } > -#endif > - for (;;) { > - v = cpu_ldub_data(env, src); > - cpu_stb_data(env, dest, v); > + d = fix_address(env, d); > + s = fix_address(env, s); > + > + /* Lest we fail to service interrupts in a timely manner, limit the > + amount of work we're willing to do. For now, lets cap at 8k. */ > + for (len = 0; len < 0x2000; ++len) { > + uint8_t v = cpu_ldub_data(env, s + len); > + cpu_stb_data(env, d + len, v); > if (v == c) { > - break; > + /* Complete. Set CC=1 and advance R1. */ > + env->cc_op = 1; > + env->retxl = s; > + return d + len; > } > - src++; > - dest++; > } > - env->regs[r1] = dest; /* FIXME: 31-bit mode! */ > + > + /* Incomplete. Set CC=3 and signal to advance R1 and R2. */ > + env->cc_op = 3; > + env->retxl = s + len; > + return d + len; > } > > /* compare and swap 64-bit */ > diff --git a/target-s390x/translate.c b/target-s390x/translate.c > index a49475b..49f419a 100644 > --- a/target-s390x/translate.c > +++ b/target-s390x/translate.c > @@ -425,7 +425,7 @@ static TCGv_i64 get_address(DisasContext *s, int x2, int > b2, int d2) > return tmp; > } > > -static void gen_op_movi_cc(DisasContext *s, uint32_t val) > +static inline void gen_op_movi_cc(DisasContext *s, uint32_t val) > { > s->cc_op = CC_OP_CONST0 + val; > } > @@ -1031,28 +1031,6 @@ static void disas_b2(DisasContext *s, int op, uint32_t > insn) > LOG_DISAS("disas_b2: op 0x%x r1 %d r2 %d\n", op, r1, r2); > > switch (op) { > - case 0x55: /* MVST R1,R2 [RRE] */ > - tmp32_1 = load_reg32(0); > - tmp32_2 = tcg_const_i32(r1); > - tmp32_3 = tcg_const_i32(r2); > - potential_page_fault(s); > - gen_helper_mvst(cpu_env, tmp32_1, tmp32_2, tmp32_3); > - tcg_temp_free_i32(tmp32_1); > - tcg_temp_free_i32(tmp32_2); > - tcg_temp_free_i32(tmp32_3); > - gen_op_movi_cc(s, 1); > - break; > - case 0x5d: /* CLST R1,R2 [RRE] */ > - tmp32_1 = load_reg32(0); > - tmp32_2 = tcg_const_i32(r1); > - tmp32_3 = tcg_const_i32(r2); > - potential_page_fault(s); > - gen_helper_clst(cc_op, cpu_env, tmp32_1, tmp32_2, tmp32_3); > - set_cc_static(s); > - tcg_temp_free_i32(tmp32_1); > - tcg_temp_free_i32(tmp32_2); > - tcg_temp_free_i32(tmp32_3); > - break; > case 0x5e: /* SRST R1,R2 [RRE] */ > tmp32_1 = load_reg32(0); > tmp32_2 = tcg_const_i32(r1); > @@ -2044,6 +2022,15 @@ static ExitStatus op_clm(DisasContext *s, DisasOps *o) > return NO_EXIT; > } > > +static ExitStatus op_clst(DisasContext *s, DisasOps *o) > +{ > + potential_page_fault(s); > + gen_helper_clst(o->in1, cpu_env, regs[0], o->in1, o->in2); > + set_cc_static(s); > + return_low128(o->in2); > + return NO_EXIT; > +} > + > static ExitStatus op_cs(DisasContext *s, DisasOps *o) > { > int r3 = get_field(s->fields, r3); > @@ -2589,6 +2576,15 @@ static ExitStatus op_mvpg(DisasContext *s, DisasOps *o) > return NO_EXIT; > } > > +static ExitStatus op_mvst(DisasContext *s, DisasOps *o) > +{ > + potential_page_fault(s); > + gen_helper_mvst(o->in1, cpu_env, regs[0], o->in1, o->in2); > + set_cc_static(s); > + return_low128(o->in2); > + return NO_EXIT; > +} > + > static ExitStatus op_mul(DisasContext *s, DisasOps *o) > { > tcg_gen_mul_i64(o->out, o->in1, o->in2); > -- > 1.7.11.4 > >