Re: [Qemu-devel] [patch] target-alpha: squashed fpu qualifiers patch

Aurelien Jarno Thu, 24 Dec 2009 07:19:23 -0800

On Fri, Dec 18, 2009 at 02:09:09PM -0800, Richard Henderson wrote:
> This is a squashed version of the 3 or 4 incremental patches that I
> had sent out for implementing the alpha fpu instruction qualifiers.
> 
>


First of all, this patch has a lot of coding style issues. I have
reported some of them at the beginning of the file, but stopped at some
point.

My main concern about this patch is that I don't really understand why
the current fp exceptions, the current rounding mode or flush_to_zero 
mode are stored in FP_STATUS. I think it would be better to have 
dedicated variable(s) in the cpu state structure, as it is done in other
emulated architectures. 

For example instead of saving the exception, doing a few fp
instructions, and restoring them, it is better to have a separate
variable that holds the current CPU FPU state (which probably already
exists as (part of) a CPU register), always clear the
FP_STATUS.float_exception_flags variable before an instruction or
sequence of instructions, and copy the bits that needs to be copied back
to the variable holding the CPU FPU state.

That would save a lot of mask and shift operation that is currently done
in your patch, and also a lot of save and restore operations when
executing code.

> commit 572164702dd83955fc8783c85811ec86c3fb6e4a
> Author: Richard Henderson <r...@twiddle.net>
> Date:   Fri Dec 18 10:50:32 2009 -0800
> 
>     target-alpha: Implement fp insn qualifiers.
>     
>     Adds a third constant argument to the fpu helpers, which contain the
>     unparsed qualifier bits.  The helper functions use new begin_fp/end_fp
>     routines that extract the rounding mode from the qualifier bits, as
>     well as raise exceptions for non-finite inputs and outputs also as
>     directed by the qualifier bits.
>     
>     cpu_alpha_load/store_fpcr modified to load/store the majority of the
>     bits from env->fpcr.  This because we hadn't been saving a few of the
>     fpcr bits in the fp_status field: in particular DNZ.
>     
>     Re-implement cvttq without saturation of overflow results, to match
>     the Alpha specification.
>     
>     Signed-off-by: Richard Henderson <r...@twiddle.net>
> 
> diff --git a/target-alpha/cpu.h b/target-alpha/cpu.h
> index c0dff4b..c1c0470 100644
> --- a/target-alpha/cpu.h
> +++ b/target-alpha/cpu.h
> @@ -430,9 +430,13 @@ enum {
>  };
>  
>  /* Arithmetic exception */
> -enum {
> -    EXCP_ARITH_OVERFLOW,
> -};
> +#define EXC_M_IOV    (1<<16)         /* Integer Overflow */
> +#define EXC_M_INE    (1<<15)         /* Inexact result */
> +#define EXC_M_UNF    (1<<14)         /* Underflow */
> +#define EXC_M_FOV    (1<<13)         /* Overflow */
> +#define EXC_M_DZE    (1<<12)         /* Division by zero */
> +#define EXC_M_INV    (1<<11)         /* Invalid operation */
> +#define EXC_M_SWC    (1<<10)         /* Software completion */
>  
>  enum {
>      IR_V0   = 0,
> diff --git a/target-alpha/helper.c b/target-alpha/helper.c
> index be7d37b..94821bd 100644
> --- a/target-alpha/helper.c
> +++ b/target-alpha/helper.c
> @@ -27,41 +27,13 @@
>  
>  uint64_t cpu_alpha_load_fpcr (CPUState *env)
>  {
> -    uint64_t ret = 0;
> -    int flags, mask;
> -
> -    flags = env->fp_status.float_exception_flags;
> -    ret |= (uint64_t) flags << 52;
> -    if (flags)
> -        ret |= FPCR_SUM;
> -    env->ipr[IPR_EXC_SUM] &= ~0x3E;
> -    env->ipr[IPR_EXC_SUM] |= flags << 1;
> -
> -    mask = env->fp_status.float_exception_mask;
> -    if (mask & float_flag_invalid)
> -        ret |= FPCR_INVD;
> -    if (mask & float_flag_divbyzero)
> -        ret |= FPCR_DZED;
> -    if (mask & float_flag_overflow)
> -        ret |= FPCR_OVFD;
> -    if (mask & float_flag_underflow)
> -        ret |= FPCR_UNFD;
> -    if (mask & float_flag_inexact)
> -        ret |= FPCR_INED;
> -
> -    switch (env->fp_status.float_rounding_mode) {
> -    case float_round_nearest_even:
> -        ret |= 2ULL << FPCR_DYN_SHIFT;
> -        break;
> -    case float_round_down:
> -        ret |= 1ULL << FPCR_DYN_SHIFT;
> -        break;
> -    case float_round_up:
> -        ret |= 3ULL << FPCR_DYN_SHIFT;
> -        break;
> -    case float_round_to_zero:
> -        break;
> -    }
> +    uint64_t ret = env->fp_status.float_exception_flags;
> +
> +    if (ret)
> +      ret = FPCR_SUM | (ret << 52);

Coding style.

> +
> +    ret |= env->fpcr & ~(FPCR_SUM | FPCR_STATUS_MASK);
> +
>      return ret;
>  }
>  
> @@ -69,6 +41,8 @@ void cpu_alpha_store_fpcr (CPUState *env, uint64_t val)
>  {
>      int round_mode, mask;
>  
> +    env->fpcr = val;
> +
>      set_float_exception_flags((val >> 52) & 0x3F, &env->fp_status);
>  
>      mask = 0;
> @@ -86,6 +60,7 @@ void cpu_alpha_store_fpcr (CPUState *env, uint64_t val)
>  
>      switch ((val >> FPCR_DYN_SHIFT) & 3) {
>      case 0:
> +    default:
>          round_mode = float_round_to_zero;
>          break;
>      case 1:
> @@ -100,6 +75,11 @@ void cpu_alpha_store_fpcr (CPUState *env, uint64_t val)
>          break;
>      }
>      set_float_rounding_mode(round_mode, &env->fp_status);
> +
> +    mask = 0;
> +    if ((val & (FPCR_UNDZ|FPCR_UNFD)) == (FPCR_UNDZ|FPCR_UNFD))
> +        mask = 1;

Coding style. Also the name of the variable "mask" is a bit misleading
for true/false variable.

> +    set_flush_to_zero(mask, &env->fp_status);
>  }
>  
>  #if defined(CONFIG_USER_ONLY)
> diff --git a/target-alpha/helper.h b/target-alpha/helper.h
> index bedd3c0..1521a84 100644
> --- a/target-alpha/helper.h
> +++ b/target-alpha/helper.h
> @@ -41,33 +41,33 @@ DEF_HELPER_1(store_fpcr, void, i64)
>  
>  DEF_HELPER_1(f_to_memory, i32, i64)
>  DEF_HELPER_1(memory_to_f, i64, i32)
> -DEF_HELPER_2(addf, i64, i64, i64)
> -DEF_HELPER_2(subf, i64, i64, i64)
> -DEF_HELPER_2(mulf, i64, i64, i64)
> -DEF_HELPER_2(divf, i64, i64, i64)
> -DEF_HELPER_1(sqrtf, i64, i64)
> +DEF_HELPER_3(addf, i64, i64, i64, i32)
> +DEF_HELPER_3(subf, i64, i64, i64, i32)
> +DEF_HELPER_3(mulf, i64, i64, i64, i32)
> +DEF_HELPER_3(divf, i64, i64, i64, i32)
> +DEF_HELPER_2(sqrtf, i64, i64, i32)
>  
>  DEF_HELPER_1(g_to_memory, i64, i64)
>  DEF_HELPER_1(memory_to_g, i64, i64)
> -DEF_HELPER_2(addg, i64, i64, i64)
> -DEF_HELPER_2(subg, i64, i64, i64)
> -DEF_HELPER_2(mulg, i64, i64, i64)
> -DEF_HELPER_2(divg, i64, i64, i64)
> -DEF_HELPER_1(sqrtg, i64, i64)
> +DEF_HELPER_3(addg, i64, i64, i64, i32)
> +DEF_HELPER_3(subg, i64, i64, i64, i32)
> +DEF_HELPER_3(mulg, i64, i64, i64, i32)
> +DEF_HELPER_3(divg, i64, i64, i64, i32)
> +DEF_HELPER_2(sqrtg, i64, i64, i32)
>  
>  DEF_HELPER_1(s_to_memory, i32, i64)
>  DEF_HELPER_1(memory_to_s, i64, i32)
> -DEF_HELPER_2(adds, i64, i64, i64)
> -DEF_HELPER_2(subs, i64, i64, i64)
> -DEF_HELPER_2(muls, i64, i64, i64)
> -DEF_HELPER_2(divs, i64, i64, i64)
> -DEF_HELPER_1(sqrts, i64, i64)
> -
> -DEF_HELPER_2(addt, i64, i64, i64)
> -DEF_HELPER_2(subt, i64, i64, i64)
> -DEF_HELPER_2(mult, i64, i64, i64)
> -DEF_HELPER_2(divt, i64, i64, i64)
> -DEF_HELPER_1(sqrtt, i64, i64)
> +DEF_HELPER_3(adds, i64, i64, i64, i32)
> +DEF_HELPER_3(subs, i64, i64, i64, i32)
> +DEF_HELPER_3(muls, i64, i64, i64, i32)
> +DEF_HELPER_3(divs, i64, i64, i64, i32)
> +DEF_HELPER_2(sqrts, i64, i64, i32)
> +
> +DEF_HELPER_3(addt, i64, i64, i64, i32)
> +DEF_HELPER_3(subt, i64, i64, i64, i32)
> +DEF_HELPER_3(mult, i64, i64, i64, i32)
> +DEF_HELPER_3(divt, i64, i64, i64, i32)
> +DEF_HELPER_2(sqrtt, i64, i64, i32)
>  
>  DEF_HELPER_2(cmptun, i64, i64, i64)
>  DEF_HELPER_2(cmpteq, i64, i64, i64)
> @@ -81,15 +81,15 @@ DEF_HELPER_2(cpys, i64, i64, i64)
>  DEF_HELPER_2(cpysn, i64, i64, i64)
>  DEF_HELPER_2(cpyse, i64, i64, i64)
>  
> -DEF_HELPER_1(cvtts, i64, i64)
> -DEF_HELPER_1(cvtst, i64, i64)
> -DEF_HELPER_1(cvttq, i64, i64)
> -DEF_HELPER_1(cvtqs, i64, i64)
> -DEF_HELPER_1(cvtqt, i64, i64)
> -DEF_HELPER_1(cvtqf, i64, i64)
> -DEF_HELPER_1(cvtgf, i64, i64)
> -DEF_HELPER_1(cvtgq, i64, i64)
> -DEF_HELPER_1(cvtqg, i64, i64)
> +DEF_HELPER_2(cvtts, i64, i64, i32)
> +DEF_HELPER_2(cvtst, i64, i64, i32)
> +DEF_HELPER_2(cvttq, i64, i64, i32)
> +DEF_HELPER_2(cvtqs, i64, i64, i32)
> +DEF_HELPER_2(cvtqt, i64, i64, i32)
> +DEF_HELPER_2(cvtqf, i64, i64, i32)
> +DEF_HELPER_2(cvtgf, i64, i64, i32)
> +DEF_HELPER_2(cvtgq, i64, i64, i32)
> +DEF_HELPER_2(cvtqg, i64, i64, i32)
>  DEF_HELPER_1(cvtlq, i64, i64)
>  DEF_HELPER_1(cvtql, i64, i64)
>  DEF_HELPER_1(cvtqlv, i64, i64)
> diff --git a/target-alpha/op_helper.c b/target-alpha/op_helper.c
> index b2abf6c..2d1c3d5 100644
> --- a/target-alpha/op_helper.c
> +++ b/target-alpha/op_helper.c
> @@ -24,7 +24,7 @@
>  
>  
> /*****************************************************************************/
>  /* Exceptions processing helpers */
> -void helper_excp (int excp, int error)
> +void QEMU_NORETURN helper_excp (int excp, int error)
>  {
>      env->exception_index = excp;
>      env->error_code = error;
> @@ -78,7 +78,7 @@ uint64_t helper_addqv (uint64_t op1, uint64_t op2)
>      uint64_t tmp = op1;
>      op1 += op2;
>      if (unlikely((tmp ^ op2 ^ (-1ULL)) & (tmp ^ op1) & (1ULL << 63))) {
> -        helper_excp(EXCP_ARITH, EXCP_ARITH_OVERFLOW);
> +        helper_excp(EXCP_ARITH, EXC_M_IOV);
>      }
>      return op1;
>  }
> @@ -88,7 +88,7 @@ uint64_t helper_addlv (uint64_t op1, uint64_t op2)
>      uint64_t tmp = op1;
>      op1 = (uint32_t)(op1 + op2);
>      if (unlikely((tmp ^ op2 ^ (-1UL)) & (tmp ^ op1) & (1UL << 31))) {
> -        helper_excp(EXCP_ARITH, EXCP_ARITH_OVERFLOW);
> +        helper_excp(EXCP_ARITH, EXC_M_IOV);
>      }
>      return op1;
>  }
> @@ -98,7 +98,7 @@ uint64_t helper_subqv (uint64_t op1, uint64_t op2)
>      uint64_t res;
>      res = op1 - op2;
>      if (unlikely((op1 ^ op2) & (res ^ op1) & (1ULL << 63))) {
> -        helper_excp(EXCP_ARITH, EXCP_ARITH_OVERFLOW);
> +        helper_excp(EXCP_ARITH, EXC_M_IOV);
>      }
>      return res;
>  }
> @@ -108,7 +108,7 @@ uint64_t helper_sublv (uint64_t op1, uint64_t op2)
>      uint32_t res;
>      res = op1 - op2;
>      if (unlikely((op1 ^ op2) & (res ^ op1) & (1UL << 31))) {
> -        helper_excp(EXCP_ARITH, EXCP_ARITH_OVERFLOW);
> +        helper_excp(EXCP_ARITH, EXC_M_IOV);
>      }
>      return res;
>  }
> @@ -118,7 +118,7 @@ uint64_t helper_mullv (uint64_t op1, uint64_t op2)
>      int64_t res = (int64_t)op1 * (int64_t)op2;
>  
>      if (unlikely((int32_t)res != res)) {
> -        helper_excp(EXCP_ARITH, EXCP_ARITH_OVERFLOW);
> +        helper_excp(EXCP_ARITH, EXC_M_IOV);
>      }
>      return (int64_t)((int32_t)res);
>  }
> @@ -130,7 +130,7 @@ uint64_t helper_mulqv (uint64_t op1, uint64_t op2)
>      muls64(&tl, &th, op1, op2);
>      /* If th != 0 && th != -1, then we had an overflow */
>      if (unlikely((th + 1) > 1)) {
> -        helper_excp(EXCP_ARITH, EXCP_ARITH_OVERFLOW);
> +        helper_excp(EXCP_ARITH, EXC_M_IOV);
>      }
>      return tl;
>  }
> @@ -370,8 +370,175 @@ uint64_t helper_unpkbw (uint64_t op1)
>  
>  /* Floating point helpers */
>  
> +/* ??? Not implemented is setting EXC_MASK, containing a bitmask of
> +   destination registers of instructions that have caused arithmetic
> +   traps.  Not needed for userspace emulation, or for complete 
> +   emulation of the entire fpu stack within qemu.  But we would need
> +   it to invoke a guest kernel's entArith trap handler properly.
> +   
> +   It would be possible to encode the FP destination register in the
> +   QUAL parameter for the FPU helpers below; additional changes would
> +   be required for ADD/V et al above.  */
> +
> +#define QUAL_RM_N    0x080   /* Round mode nearest even */
> +#define QUAL_RM_C    0x000   /* Round mode chopped */
> +#define QUAL_RM_M    0x040   /* Round mode minus infinity */
> +#define QUAL_RM_D    0x0c0   /* Round mode dynamic */
> +#define QUAL_RM_MASK 0x0c0
> +
> +#define QUAL_U               0x100   /* Underflow enable (fp output) */
> +#define QUAL_V               0x100   /* Overflow enable (int output) */
> +#define QUAL_S               0x400   /* Software completion enable */
> +#define QUAL_I               0x200   /* Inexact detection enable */
> +
> +/* If the floating-point qualifiers specified a rounding mode,
> +   set that rounding mode and remember the original mode for
> +   resetting at the end of the instruction.  */
> +static inline uint32_t begin_fp_roundmode(uint32_t qual)
> +{
> +    uint32_t rm = FP_STATUS.float_rounding_mode, old_rm = rm;
> +
> +    switch (qual & QUAL_RM_MASK) {
> +    default:
> +    case QUAL_RM_N:
> +        rm = float_round_nearest_even;
> +        break;
> +    case QUAL_RM_C:
> +        rm = float_round_to_zero;
> +        break;
> +    case QUAL_RM_M:
> +        rm = float_round_down;
> +        break;
> +    case QUAL_RM_D:
> +        return old_rm;

Does it corresponds to the mode describe above as to be implemented?

> +    }
> +    if (old_rm != rm)
> +        set_float_rounding_mode(rm, &FP_STATUS);

Coding style.

> +    return old_rm;
> +}
> +
> +/* Zero the exception flags so that we can determine if the current
> +   instruction raises any exceptions.  Save the old acrued exception
> +   status so that we can restore them at the end of the insn.  */
> +static inline uint32_t begin_fp_exception(void)
> +{
> +    uint32_t old_exc = (uint32_t)FP_STATUS.float_exception_flags << 8;
> +    set_float_exception_flags(0, &FP_STATUS);
> +    return old_exc;
> +}
> +
> +static inline uint32_t begin_fp_flush_to_zero(uint32_t quals)
> +{
> +    /* If underflow detection is disabled, silently flush to zero.
> +       Note that flush-to-zero mode may already be enabled via the FPCR.  */
> +    if ((quals & QUAL_U) == 0 && !FP_STATUS.flush_to_zero) {
> +        set_flush_to_zero(1, &FP_STATUS);
> +        return 0x10000;

What does this constant corresponds to?

> +    }
> +    return 0;
> +}
> +
> +/* Begin processing an fp operation.  Return a token that should be passed
> +   when completing the fp operation.  */
> +static uint32_t begin_fp(uint32_t quals)
> +{
> +    uint32_t ret = 0;
> +
> +    ret |= begin_fp_roundmode(quals);
> +    ret |= begin_fp_flush_to_zero(quals);
> +    ret |= begin_fp_exception();
> +
> +    return ret;
> +}
> +
> +/* End processing an fp operation.  */
> +
> +static inline void end_fp_roundmode(uint32_t orig)
> +{
> +    uint32_t rm = FP_STATUS.float_rounding_mode, old_rm = orig & 0xff;
> +    if (unlikely(rm != old_rm))
> +        set_float_rounding_mode(old_rm, &FP_STATUS);

coding style

> +}
> +
> +static inline void end_fp_flush_to_zero(uint32_t orig)
> +{
> +    if (orig & 0x10000)

What does this constant corresponds to? I guess it matches the previous
one.

> +        set_flush_to_zero(0, &FP_STATUS);

coding style

> +}
> +
> +static void end_fp_exception(uint32_t quals, uint32_t orig)
> +{
> +    uint8_t exc = FP_STATUS.float_exception_flags;
> +
> +    /* If inexact detection is disabled, silently clear it.  */
> +    if ((quals & QUAL_I) == 0)
> +        exc &= ~float_flag_inexact;

Coding style.

> +
> +    orig = (orig >> 8) & 0xff;
> +    set_float_exception_flags(exc | orig, &FP_STATUS);
> +
> +    /* Raise an exception as required.  */
> +    if (unlikely(exc)) {
> +        if (quals & QUAL_S)
> +            exc &= ~FP_STATUS.float_exception_mask;
> +        if (exc) {
> +            uint32_t hw_exc = 0;
> +
> +            if (exc & float_flag_invalid)
> +                hw_exc |= EXC_M_INV;
> +            if (exc & float_flag_divbyzero)
> +                hw_exc |= EXC_M_DZE;
> +            if (exc & float_flag_overflow)
> +                hw_exc |= EXC_M_FOV;
> +            if (exc & float_flag_underflow)
> +                hw_exc |= EXC_M_UNF;
> +            if (exc & float_flag_inexact)
> +                hw_exc |= EXC_M_INE;
> +
> +            helper_excp(EXCP_ARITH, hw_exc);
> +        }
> +    }
> +}
> +
> +static void end_fp(uint32_t quals, uint32_t orig)
> +{
> +    end_fp_roundmode(orig);
> +    end_fp_flush_to_zero(orig);
> +    end_fp_exception(quals, orig);
> +}
> +
> +static uint64_t remap_ieee_input(uint32_t quals, uint64_t a)
> +{
> +    uint64_t frac;
> +    uint32_t exp;
> +
> +    exp = (uint32_t)(a >> 52) & 0x7ff;
> +    frac = a & 0xfffffffffffffull;
> +
> +    if (exp == 0) {
> +        if (frac != 0) {
> +            /* If DNZ is set, flush denormals to zero on input.  */
> +            if (env->fpcr & FPCR_DNZ)
> +                a = a & (1ull << 63);
> +            /* If software completion not enabled, trap.  */
> +            else if ((quals & QUAL_S) == 0)
> +                helper_excp(EXCP_ARITH, EXC_M_UNF);
> +        }
> +    } else if (exp == 0x7ff) {
> +        /* Infinity or NaN.  If software completion is not enabled, trap.
> +           If /s is enabled, we'll properly signal for SNaN on output.  */
> +        /* ??? I'm not sure these exception bit flags are correct.  I do
> +           know that the Linux kernel, at least, doesn't rely on them and
> +           just emulates the insn to figure out what exception to use.  */
> +        if ((quals & QUAL_S) == 0)
> +            helper_excp(EXCP_ARITH, frac ? EXC_M_INV : EXC_M_FOV);

Coding style.

> +    }
> +
> +    return a;
> +}
> +
>  /* F floating (VAX) */
> -static inline uint64_t float32_to_f(float32 fa)
> +static uint64_t float32_to_f(float32 fa)
>  {
>      uint64_t r, exp, mant, sig;
>      CPU_FloatU a;
> @@ -404,7 +571,7 @@ static inline uint64_t float32_to_f(float32 fa)
>      return r;
>  }
>  
> -static inline float32 f_to_float32(uint64_t a)
> +static float32 f_to_float32(uint64_t a)
>  {
>      uint32_t exp, mant_sig;
>      CPU_FloatU r;
> @@ -447,58 +614,83 @@ uint64_t helper_memory_to_f (uint32_t a)
>      return r;
>  }
>  
> -uint64_t helper_addf (uint64_t a, uint64_t b)
> +uint64_t helper_addf (uint64_t a, uint64_t b, uint32_t quals)
>  {
>      float32 fa, fb, fr;
> +    uint32_t token;
>  
>      fa = f_to_float32(a);
>      fb = f_to_float32(b);
> +
> +    token = begin_fp(quals);
>      fr = float32_add(fa, fb, &FP_STATUS);
> +    end_fp(quals, token);
> +
>      return float32_to_f(fr);
>  }
>  
> -uint64_t helper_subf (uint64_t a, uint64_t b)
> +uint64_t helper_subf (uint64_t a, uint64_t b, uint32_t quals)
>  {
>      float32 fa, fb, fr;
> +    uint32_t token;
>  
>      fa = f_to_float32(a);
>      fb = f_to_float32(b);
> +
> +    token = begin_fp(quals);
>      fr = float32_sub(fa, fb, &FP_STATUS);
> +    end_fp(quals, token);
> +
>      return float32_to_f(fr);
>  }
>  
> -uint64_t helper_mulf (uint64_t a, uint64_t b)
> +uint64_t helper_mulf (uint64_t a, uint64_t b, uint32_t quals)
>  {
>      float32 fa, fb, fr;
> +    uint32_t token;
>  
>      fa = f_to_float32(a);
>      fb = f_to_float32(b);
> +
> +    token = begin_fp(quals);
>      fr = float32_mul(fa, fb, &FP_STATUS);
> +    end_fp(quals, token);
> +
>      return float32_to_f(fr);
>  }
>  
> -uint64_t helper_divf (uint64_t a, uint64_t b)
> +uint64_t helper_divf (uint64_t a, uint64_t b, uint32_t quals)
>  {
>      float32 fa, fb, fr;
> +    uint32_t token;
>  
>      fa = f_to_float32(a);
>      fb = f_to_float32(b);
> +
> +    token = begin_fp(quals);
>      fr = float32_div(fa, fb, &FP_STATUS);
> +    end_fp(quals, token);
> +
>      return float32_to_f(fr);
>  }
>  
> -uint64_t helper_sqrtf (uint64_t t)
> +uint64_t helper_sqrtf (uint64_t t, uint32_t quals)
>  {
>      float32 ft, fr;
> +    uint32_t token;
>  
>      ft = f_to_float32(t);
> +
> +    token = begin_fp(quals);
>      fr = float32_sqrt(ft, &FP_STATUS);
> +    end_fp(quals, token);
> +
>      return float32_to_f(fr);
>  }
>  
>  
>  /* G floating (VAX) */
> -static inline uint64_t float64_to_g(float64 fa)
> +static uint64_t float64_to_g(float64 fa)
>  {
>      uint64_t r, exp, mant, sig;
>      CPU_DoubleU a;
> @@ -531,7 +723,7 @@ static inline uint64_t float64_to_g(float64 fa)
>      return r;
>  }
>  
> -static inline float64 g_to_float64(uint64_t a)
> +static float64 g_to_float64(uint64_t a)
>  {
>      uint64_t exp, mant_sig;
>      CPU_DoubleU r;
> @@ -574,52 +766,77 @@ uint64_t helper_memory_to_g (uint64_t a)
>      return r;
>  }
>  
> -uint64_t helper_addg (uint64_t a, uint64_t b)
> +uint64_t helper_addg (uint64_t a, uint64_t b, uint32_t quals)
>  {
>      float64 fa, fb, fr;
> +    uint32_t token;
>  
>      fa = g_to_float64(a);
>      fb = g_to_float64(b);
> +
> +    token = begin_fp(quals);
>      fr = float64_add(fa, fb, &FP_STATUS);
> +    end_fp(quals, token);
> +
>      return float64_to_g(fr);
>  }
>  
> -uint64_t helper_subg (uint64_t a, uint64_t b)
> +uint64_t helper_subg (uint64_t a, uint64_t b, uint32_t quals)
>  {
>      float64 fa, fb, fr;
> +    uint32_t token;
>  
>      fa = g_to_float64(a);
>      fb = g_to_float64(b);
> +
> +    token = begin_fp(quals);
>      fr = float64_sub(fa, fb, &FP_STATUS);
> +    end_fp(quals, token);
> +
>      return float64_to_g(fr);
>  }
>  
> -uint64_t helper_mulg (uint64_t a, uint64_t b)
> +uint64_t helper_mulg (uint64_t a, uint64_t b, uint32_t quals)
>  {
>      float64 fa, fb, fr;
> -
> +    uint32_t token;
> +    
>      fa = g_to_float64(a);
>      fb = g_to_float64(b);
> +
> +    token = begin_fp(quals);
>      fr = float64_mul(fa, fb, &FP_STATUS);
> +    end_fp(quals, token);
> +
>      return float64_to_g(fr);
>  }
>  
> -uint64_t helper_divg (uint64_t a, uint64_t b)
> +uint64_t helper_divg (uint64_t a, uint64_t b, uint32_t quals)
>  {
>      float64 fa, fb, fr;
> +    uint32_t token;
>  
>      fa = g_to_float64(a);
>      fb = g_to_float64(b);
> +
> +    token = begin_fp(quals);
>      fr = float64_div(fa, fb, &FP_STATUS);
> +    end_fp(quals, token);
> +
>      return float64_to_g(fr);
>  }
>  
> -uint64_t helper_sqrtg (uint64_t a)
> +uint64_t helper_sqrtg (uint64_t a, uint32_t quals)
>  {
>      float64 fa, fr;
> +    uint32_t token;
>  
>      fa = g_to_float64(a);
> +
> +    token = begin_fp(quals);
>      fr = float64_sqrt(fa, &FP_STATUS);
> +    end_fp(quals, token);
> +
>      return float64_to_g(fr);
>  }
>  
> @@ -627,7 +844,7 @@ uint64_t helper_sqrtg (uint64_t a)
>  /* S floating (single) */
>  
>  /* Taken from linux/arch/alpha/kernel/traps.c, s_mem_to_reg.  */
> -static inline uint64_t float32_to_s_int(uint32_t fi)
> +static uint64_t float32_to_s_int(uint32_t fi)
>  {
>      uint32_t frac = fi & 0x7fffff;
>      uint32_t sign = fi >> 31;
> @@ -649,7 +866,7 @@ static inline uint64_t float32_to_s_int(uint32_t fi)
>              | ((uint64_t)frac << 29));
>  }
>  
> -static inline uint64_t float32_to_s(float32 fa)
> +static uint64_t float32_to_s(float32 fa)
>  {
>      CPU_FloatU a;
>      a.f = fa;
> @@ -678,52 +895,77 @@ uint64_t helper_memory_to_s (uint32_t a)
>      return float32_to_s_int(a);
>  }
>  
> -uint64_t helper_adds (uint64_t a, uint64_t b)
> +static float32 input_s(uint32_t quals, uint64_t a)
> +{
> +    return s_to_float32(remap_ieee_input(quals, a));
> +}
> +
> +uint64_t helper_adds (uint64_t a, uint64_t b, uint32_t quals)
>  {
>      float32 fa, fb, fr;
> +    uint32_t token;
>  
> -    fa = s_to_float32(a);
> -    fb = s_to_float32(b);
> +    token = begin_fp(quals);
> +    fa = input_s(quals, a);
> +    fb = input_s(quals, b);
>      fr = float32_add(fa, fb, &FP_STATUS);
> +    end_fp(quals, token);
> +
>      return float32_to_s(fr);
>  }
>  
> -uint64_t helper_subs (uint64_t a, uint64_t b)
> +uint64_t helper_subs (uint64_t a, uint64_t b, uint32_t quals)
>  {
>      float32 fa, fb, fr;
> +    uint32_t token;
>  
> -    fa = s_to_float32(a);
> -    fb = s_to_float32(b);
> +    token = begin_fp(quals);
> +    fa = input_s(quals, a);
> +    fb = input_s(quals, b);
>      fr = float32_sub(fa, fb, &FP_STATUS);
> +    end_fp(quals, token);
> +
>      return float32_to_s(fr);
>  }
>  
> -uint64_t helper_muls (uint64_t a, uint64_t b)
> +uint64_t helper_muls (uint64_t a, uint64_t b, uint32_t quals)
>  {
>      float32 fa, fb, fr;
> +    uint32_t token;
>  
> -    fa = s_to_float32(a);
> -    fb = s_to_float32(b);
> +    token = begin_fp(quals);
> +    fa = input_s(quals, a);
> +    fb = input_s(quals, b);
>      fr = float32_mul(fa, fb, &FP_STATUS);
> +    end_fp(quals, token);
> +
>      return float32_to_s(fr);
>  }
>  
> -uint64_t helper_divs (uint64_t a, uint64_t b)
> +uint64_t helper_divs (uint64_t a, uint64_t b, uint32_t quals)
>  {
>      float32 fa, fb, fr;
> +    uint32_t token;
>  
> -    fa = s_to_float32(a);
> -    fb = s_to_float32(b);
> +    token = begin_fp(quals);
> +    fa = input_s(quals, a);
> +    fb = input_s(quals, b);
>      fr = float32_div(fa, fb, &FP_STATUS);
> +    end_fp(quals, token);
> +
>      return float32_to_s(fr);
>  }
>  
> -uint64_t helper_sqrts (uint64_t a)
> +uint64_t helper_sqrts (uint64_t a, uint32_t quals)
>  {
>      float32 fa, fr;
> +    uint32_t token;
>  
> -    fa = s_to_float32(a);
> +    token = begin_fp(quals);
> +    fa = input_s(quals, a);
>      fr = float32_sqrt(fa, &FP_STATUS);
> +    end_fp(quals, token);
> +
>      return float32_to_s(fr);
>  }
>  
> @@ -745,52 +987,78 @@ static inline uint64_t float64_to_t(float64 fa)
>      return r.ll;
>  }
>  
> -uint64_t helper_addt (uint64_t a, uint64_t b)
> +/* Raise any exceptions needed for using F, given the insn qualifiers.  */
> +static float64 input_t(uint32_t quals, uint64_t a)
> +{
> +    return t_to_float64(remap_ieee_input(quals, a));
> +}
> +
> +uint64_t helper_addt (uint64_t a, uint64_t b, uint32_t quals)
>  {
>      float64 fa, fb, fr;
> +    uint32_t token;
>  
> -    fa = t_to_float64(a);
> -    fb = t_to_float64(b);
> +    token = begin_fp(quals);
> +    fa = input_t(quals, a);
> +    fb = input_t(quals, b);
>      fr = float64_add(fa, fb, &FP_STATUS);
> +    end_fp(quals, token);
> +
>      return float64_to_t(fr);
>  }
>  
> -uint64_t helper_subt (uint64_t a, uint64_t b)
> +uint64_t helper_subt (uint64_t a, uint64_t b, uint32_t quals)
>  {
>      float64 fa, fb, fr;
> +    uint32_t token;
>  
> -    fa = t_to_float64(a);
> -    fb = t_to_float64(b);
> +    token = begin_fp(quals);
> +    fa = input_t(quals, a);
> +    fb = input_t(quals, b);
>      fr = float64_sub(fa, fb, &FP_STATUS);
> +    end_fp(quals, token);
> +    
>      return float64_to_t(fr);
>  }
>  
> -uint64_t helper_mult (uint64_t a, uint64_t b)
> +uint64_t helper_mult (uint64_t a, uint64_t b, uint32_t quals)
>  {
>      float64 fa, fb, fr;
> +    uint32_t token;
>  
> -    fa = t_to_float64(a);
> -    fb = t_to_float64(b);
> +    token = begin_fp(quals);
> +    fa = input_t(quals, a);
> +    fb = input_t(quals, b);
>      fr = float64_mul(fa, fb, &FP_STATUS);
> +    end_fp(quals, token);
> +
>      return float64_to_t(fr);
>  }
>  
> -uint64_t helper_divt (uint64_t a, uint64_t b)
> +uint64_t helper_divt (uint64_t a, uint64_t b, uint32_t quals)
>  {
>      float64 fa, fb, fr;
> +    uint32_t token;
>  
> -    fa = t_to_float64(a);
> -    fb = t_to_float64(b);
> +    token = begin_fp(quals);
> +    fa = input_t(quals, a);
> +    fb = input_t(quals, b);
>      fr = float64_div(fa, fb, &FP_STATUS);
> +    end_fp(quals, token);
> +
>      return float64_to_t(fr);
>  }
>  
> -uint64_t helper_sqrtt (uint64_t a)
> +uint64_t helper_sqrtt (uint64_t a, uint32_t quals)
>  {
>      float64 fa, fr;
> +    uint32_t token;
>  
> -    fa = t_to_float64(a);
> +    token = begin_fp(quals);
> +    fa = input_t(quals, a);
>      fr = float64_sqrt(fa, &FP_STATUS);
> +    end_fp(quals, token);
> +
>      return float64_to_t(fr);
>  }
>  
> @@ -813,6 +1081,8 @@ uint64_t helper_cpyse(uint64_t a, uint64_t b)
>  
>  
>  /* Comparisons */
> +/* ??? Software completion qualifier missing.  */
> +
>  uint64_t helper_cmptun (uint64_t a, uint64_t b)
>  {
>      float64 fa, fb;
> @@ -905,70 +1175,218 @@ uint64_t helper_cmpglt(uint64_t a, uint64_t b)
>  }
>  
>  /* Floating point format conversion */
> -uint64_t helper_cvtts (uint64_t a)
> +uint64_t helper_cvtts (uint64_t a, uint32_t quals)
>  {
>      float64 fa;
>      float32 fr;
> +    uint32_t token;
>  
> -    fa = t_to_float64(a);
> +    token = begin_fp(quals);
> +    fa = input_t(quals, a);
>      fr = float64_to_float32(fa, &FP_STATUS);
> +    end_fp(quals, token);
> +
>      return float32_to_s(fr);
>  }
>  
> -uint64_t helper_cvtst (uint64_t a)
> +uint64_t helper_cvtst (uint64_t a, uint32_t quals)
>  {
>      float32 fa;
>      float64 fr;
> +    uint32_t token;
>  
> -    fa = s_to_float32(a);
> +    token = begin_fp(quals);
> +    fa = input_s(quals, a);
>      fr = float32_to_float64(fa, &FP_STATUS);
> +    end_fp(quals, token);
> +
>      return float64_to_t(fr);
>  }
>  
> -uint64_t helper_cvtqs (uint64_t a)
> +uint64_t helper_cvtqs (uint64_t a, uint32_t quals)
>  {
> -    float32 fr = int64_to_float32(a, &FP_STATUS);
> +    float32 fr;
> +    uint32_t token;
> +
> +    token = begin_fp(quals);
> +    fr = int64_to_float32(a, &FP_STATUS);
> +    end_fp(quals, token);
> +
>      return float32_to_s(fr);
>  }
>  
> -uint64_t helper_cvttq (uint64_t a)
> +/* Implement float64 to uint64 conversion without overflow enabled.
> +   In this mode we must supply the truncated result.  This behaviour
> +   is used by the compiler to get unsigned conversion for free with
> +   the same instruction.  */
> +
> +static uint64_t cvttq_internal(uint64_t a)
>  {
> -    float64 fa = t_to_float64(a);
> -    return float64_to_int64_round_to_zero(fa, &FP_STATUS);
> +    uint64_t frac, ret = 0;
> +    uint32_t exp, sign, exc = 0;
> +    int shift;
> +
> +    sign = (a >> 63);
> +    exp = (uint32_t)(a >> 52) & 0x7ff;
> +    frac = a & 0xfffffffffffffull;
> +
> +    if (exp == 0) {
> +        if (unlikely(frac != 0))
> +            goto do_underflow;
> +    } else if (exp == 0x7ff) {
> +        if (frac == 0)
> +            exc = float_flag_overflow;
> +        else
> +            exc = float_flag_invalid;
> +    } else {
> +        /* Restore implicit bit.  */
> +        frac |= 0x10000000000000ull;
> +
> +        /* Note that neither overflow exceptions nor inexact exceptions
> +           are desired.  This lets us streamline the checks quite a bit.  */
> +        shift = exp - 1023 - 52;
> +        if (shift >= 0) {
> +            /* In this case the number is so large that we must shift
> +               the fraction left.  There is no rounding to do.  */
> +            if (shift < 63) {
> +                ret = frac << shift;
> +                if ((ret >> shift) != frac)
> +                    exc = float_flag_overflow;
> +            }
> +        } else {
> +            uint64_t round;
> +
> +            /* In this case the number is smaller than the fraction as
> +               represented by the 52 bit number.  Here we must think 
> +               about rounding the result.  Handle this by shifting the
> +               fractional part of the number into the high bits of ROUND.
> +               This will let us efficiently handle round-to-nearest.  */
> +            shift = -shift;
> +            if (shift < 63) {
> +                ret = frac >> shift;
> +                round = frac << (64 - shift);
> +            } else {
> +                /* The exponent is so small we shift out everything.
> +                   Leave a sticky bit for proper rounding below.  */
> +            do_underflow:
> +                round = 1;
> +            }
> +
> +            if (round) {
> +                exc = float_flag_inexact;
> +                switch (FP_STATUS.float_rounding_mode) {
> +                case float_round_nearest_even:
> +                    if (round == (1ull << 63)) {
> +                        /* Fraction is exactly 0.5; round to even.  */
> +                        ret += (ret & 1);
> +                    } else if (round > (1ull << 63)) {
> +                        ret += 1;
> +                    }
> +                    break;
> +                case float_round_to_zero:
> +                    break;
> +                case float_round_up:
> +                    if (!sign)
> +                        ret += 1;
> +                    break;
> +                case float_round_down:
> +                    if (sign)
> +                        ret += 1;
> +                    break;
> +                }
> +            }
> +        }
> +        if (sign)
> +            ret = -ret;
> +    }
> +    if (unlikely(exc))
> +        float_raise(exc, &FP_STATUS);
> +
> +    return ret;
> +}
> +
> +uint64_t helper_cvttq (uint64_t a, uint32_t quals)
> +{
> +    uint64_t ret;
> +    uint32_t token;
> +
> +    /* ??? There's an arugument to be made that when /S is enabled, we
> +       should provide the standard IEEE saturated result, instead of
> +       the truncated result that we *must* provide when /V is disabled.
> +       However, that's not how either the Tru64 or Linux completion
> +       handlers actually work, and GCC knows it.  */
> +
> +    token = begin_fp(quals);
> +    a = remap_ieee_input(quals, a);
> +    ret = cvttq_internal(a);
> +    end_fp(quals, token);
> +
> +    return ret;
>  }
>  
> -uint64_t helper_cvtqt (uint64_t a)
> +uint64_t helper_cvtqt (uint64_t a, uint32_t quals)
>  {
> -    float64 fr = int64_to_float64(a, &FP_STATUS);
> +    float64 fr;
> +    uint32_t token;
> +
> +    token = begin_fp(quals);
> +    fr = int64_to_float64(a, &FP_STATUS);
> +    end_fp(quals, token);
> +
>      return float64_to_t(fr);
>  }
>  
> -uint64_t helper_cvtqf (uint64_t a)
> +uint64_t helper_cvtqf (uint64_t a, uint32_t quals)
>  {
> -    float32 fr = int64_to_float32(a, &FP_STATUS);
> +    float32 fr;
> +    uint32_t token;
> +
> +    token = begin_fp(quals);
> +    fr = int64_to_float32(a, &FP_STATUS);
> +    end_fp(quals, token);
> +
>      return float32_to_f(fr);
>  }
>  
> -uint64_t helper_cvtgf (uint64_t a)
> +uint64_t helper_cvtgf (uint64_t a, uint32_t quals)
>  {
>      float64 fa;
>      float32 fr;
> +    uint32_t token;
>  
>      fa = g_to_float64(a);
> +
> +    token = begin_fp(quals);
>      fr = float64_to_float32(fa, &FP_STATUS);
> +    end_fp(quals, token);
> +
>      return float32_to_f(fr);
>  }
>  
> -uint64_t helper_cvtgq (uint64_t a)
> +uint64_t helper_cvtgq (uint64_t a, uint32_t quals)
>  {
> -    float64 fa = g_to_float64(a);
> -    return float64_to_int64_round_to_zero(fa, &FP_STATUS);
> +    float64 fa;
> +    uint64_t ret;
> +    uint32_t token;
> +
> +    fa = g_to_float64(a);
> +
> +    token = begin_fp(quals);
> +    ret = float64_to_int64(fa, &FP_STATUS);
> +    end_fp(quals, token);
> +
> +    return ret;
>  }
>  
> -uint64_t helper_cvtqg (uint64_t a)
> +uint64_t helper_cvtqg (uint64_t a, uint32_t quals)
>  {
>      float64 fr;
> +    uint32_t token;
> +
> +    token = begin_fp(quals);
>      fr = int64_to_float64(a, &FP_STATUS);
> +    end_fp(quals, token);
> +
>      return float64_to_g(fr);
>  }
>  
> @@ -979,35 +1397,24 @@ uint64_t helper_cvtlq (uint64_t a)
>      return (lo & 0x3FFFFFFF) | (hi & 0xc0000000);
>  }
>  
> -static inline uint64_t __helper_cvtql(uint64_t a, int s, int v)
> -{
> -    uint64_t r;
> -
> -    r = ((uint64_t)(a & 0xC0000000)) << 32;
> -    r |= ((uint64_t)(a & 0x7FFFFFFF)) << 29;
> -
> -    if (v && (int64_t)((int32_t)r) != (int64_t)r) {
> -        helper_excp(EXCP_ARITH, EXCP_ARITH_OVERFLOW);
> -    }
> -    if (s) {
> -        /* TODO */
> -    }
> -    return r;
> -}
> -
>  uint64_t helper_cvtql (uint64_t a)
>  {
> -    return __helper_cvtql(a, 0, 0);
> +    return ((a & 0xC0000000) << 32) | ((a & 0x7FFFFFFF) << 29);
>  }
>  
>  uint64_t helper_cvtqlv (uint64_t a)
>  {
> -    return __helper_cvtql(a, 0, 1);
> +    if ((int32_t)a != (int64_t)a)
> +        helper_excp(EXCP_ARITH, EXC_M_IOV);
> +    return helper_cvtql(a);
>  }
>  
>  uint64_t helper_cvtqlsv (uint64_t a)
>  {
> -    return __helper_cvtql(a, 1, 1);
> +    /* ??? I'm pretty sure there's nothing that /sv needs to do that /v
> +       doesn't do.  The only thing I can think is that /sv is a valid
> +       instruction merely for completeness in the ISA.  */
> +    return helper_cvtqlv(a);
>  }
>  
>  /* PALcode support special instructions */
> diff --git a/target-alpha/translate.c b/target-alpha/translate.c
> index 45cb697..e0ca0ed 100644
> --- a/target-alpha/translate.c
> +++ b/target-alpha/translate.c
> @@ -442,81 +442,79 @@ static void gen_fcmov(TCGCond inv_cond, int ra, int rb, 
> int rc)
>      gen_set_label(l1);
>  }
>  
> -#define FARITH2(name)                                       \
> -static inline void glue(gen_f, name)(int rb, int rc)        \
> -{                                                           \
> -    if (unlikely(rc == 31))                                 \
> -      return;                                               \
> -                                                            \
> -    if (rb != 31)                                           \
> -        gen_helper_ ## name (cpu_fir[rc], cpu_fir[rb]);    \
> -    else {                                                  \
> -        TCGv tmp = tcg_const_i64(0);                        \
> -        gen_helper_ ## name (cpu_fir[rc], tmp);            \
> -        tcg_temp_free(tmp);                                 \
> -    }                                                       \
> +#define FARITH2(name)                                   \
> +static inline void glue(gen_f, name)(int rb, int rc)    \
> +{                                                       \
> +    if (unlikely(rc == 31))                             \
> +      return;                                           \
> +                                                        \
> +    if (rb != 31)                                       \
> +        gen_helper_ ## name (cpu_fir[rc], cpu_fir[rb]); \
> +    else {                                              \
> +        TCGv tmp = tcg_const_i64(0);                    \
> +        gen_helper_ ## name (cpu_fir[rc], tmp);         \
> +        tcg_temp_free(tmp);                             \
> +    }                                                   \
>  }
> -FARITH2(sqrts)
> -FARITH2(sqrtf)
> -FARITH2(sqrtg)
> -FARITH2(sqrtt)
> -FARITH2(cvtgf)
> -FARITH2(cvtgq)
> -FARITH2(cvtqf)
> -FARITH2(cvtqg)
> -FARITH2(cvtst)
> -FARITH2(cvtts)
> -FARITH2(cvttq)
> -FARITH2(cvtqs)
> -FARITH2(cvtqt)
>  FARITH2(cvtlq)
>  FARITH2(cvtql)
>  FARITH2(cvtqlv)
>  FARITH2(cvtqlsv)
>  
> -#define FARITH3(name)                                                     \
> -static inline void glue(gen_f, name)(int ra, int rb, int rc)              \
> -{                                                                         \
> -    if (unlikely(rc == 31))                                               \
> -        return;                                                           \
> -                                                                          \
> -    if (ra != 31) {                                                       \
> -        if (rb != 31)                                                     \
> -            gen_helper_ ## name (cpu_fir[rc], cpu_fir[ra], cpu_fir[rb]);  \
> -        else {                                                            \
> -            TCGv tmp = tcg_const_i64(0);                                  \
> -            gen_helper_ ## name (cpu_fir[rc], cpu_fir[ra], tmp);          \
> -            tcg_temp_free(tmp);                                           \
> -        }                                                                 \
> -    } else {                                                              \
> -        TCGv tmp = tcg_const_i64(0);                                      \
> -        if (rb != 31)                                                     \
> -            gen_helper_ ## name (cpu_fir[rc], tmp, cpu_fir[rb]);          \
> -        else                                                              \
> -            gen_helper_ ## name (cpu_fir[rc], tmp, tmp);                   \
> -        tcg_temp_free(tmp);                                               \
> -    }                                                                     \
> +#define QFARITH2(name)                                          \
> +static inline void glue(gen_f, name)(int rb, int rc, int opc)   \
> +{                                                               \
> +    TCGv_i32 quals;                                             \
> +    if (unlikely(rc == 31))                                     \
> +      return;                                                   \
> +    quals = tcg_const_i32(opc & ~0x3f);                         \
> +    if (rb != 31)                                               \
> +        gen_helper_ ## name (cpu_fir[rc], cpu_fir[rb], quals);  \
> +    else {                                                      \
> +        TCGv tmp = tcg_const_i64(0);                            \
> +        gen_helper_ ## name (cpu_fir[rc], tmp, quals);          \
> +        tcg_temp_free(tmp);                                     \
> +    }                                                           \
> +    tcg_temp_free_i32(quals);                                   \
> +}
> +QFARITH2(sqrts)
> +QFARITH2(sqrtf)
> +QFARITH2(sqrtg)
> +QFARITH2(sqrtt)
> +QFARITH2(cvtgf)
> +QFARITH2(cvtgq)
> +QFARITH2(cvtqf)
> +QFARITH2(cvtqg)
> +QFARITH2(cvtst)
> +QFARITH2(cvtts)
> +QFARITH2(cvttq)
> +QFARITH2(cvtqs)
> +QFARITH2(cvtqt)
> +
> +#define FARITH3(name)                                           \
> +static inline void glue(gen_f, name)(int ra, int rb, int rc)    \
> +{                                                               \
> +    TCGv zero, ta, tb;                                          \
> +    if (unlikely(rc == 31))                                     \
> +        return;                                                 \
> +    ta = cpu_fir[ra];                                           \
> +    tb = cpu_fir[rb];                                           \
> +    if (unlikely(ra == 31)) {                                   \
> +        zero = tcg_const_i64(0);                                \
> +        ta = zero;                                              \
> +    }                                                           \
> +    if (unlikely(rb == 31)) {                                   \
> +        if (ra != 31)                                           \
> +            zero = tcg_const_i64(0);                            \
> +        tb = zero;                                              \
> +    }                                                           \
> +    gen_helper_ ## name (cpu_fir[rc], ta, tb);                  \
> +    if (ra == 31 || rb == 31)                                   \
> +        tcg_temp_free(zero);                                    \
>  }
> -
> -FARITH3(addf)
> -FARITH3(subf)
> -FARITH3(mulf)
> -FARITH3(divf)
> -FARITH3(addg)
> -FARITH3(subg)
> -FARITH3(mulg)
> -FARITH3(divg)
>  FARITH3(cmpgeq)
>  FARITH3(cmpglt)
>  FARITH3(cmpgle)
> -FARITH3(adds)
> -FARITH3(subs)
> -FARITH3(muls)
> -FARITH3(divs)
> -FARITH3(addt)
> -FARITH3(subt)
> -FARITH3(mult)
> -FARITH3(divt)
>  FARITH3(cmptun)
>  FARITH3(cmpteq)
>  FARITH3(cmptlt)
> @@ -525,6 +523,47 @@ FARITH3(cpys)
>  FARITH3(cpysn)
>  FARITH3(cpyse)
>  
> +#define QFARITH3(name)                                                  \
> +static inline void glue(gen_f, name)(int ra, int rb, int rc, int opc)   \
> +{                                                                       \
> +    TCGv zero, ta, tb;                                                  \
> +    TCGv_i32 quals;                                                     \
> +    if (unlikely(rc == 31))                                             \
> +        return;                                                         \
> +    ta = cpu_fir[ra];                                                   \
> +    tb = cpu_fir[rb];                                                   \
> +    if (unlikely(ra == 31)) {                                           \
> +        zero = tcg_const_i64(0);                                        \
> +        ta = zero;                                                      \
> +    }                                                                   \
> +    if (unlikely(rb == 31)) {                                           \
> +        if (ra != 31)                                                   \
> +            zero = tcg_const_i64(0);                                    \
> +        tb = zero;                                                      \
> +    }                                                                   \
> +    quals = tcg_const_i32(opc & ~0x3f);                                 \
> +    gen_helper_ ## name (cpu_fir[rc], ta, tb, quals);                   \
> +    tcg_temp_free_i32(quals);                                           \
> +    if (ra == 31 || rb == 31)                                           \
> +        tcg_temp_free(zero);                                            \
> +}
> +QFARITH3(addf)
> +QFARITH3(subf)
> +QFARITH3(mulf)
> +QFARITH3(divf)
> +QFARITH3(addg)
> +QFARITH3(subg)
> +QFARITH3(mulg)
> +QFARITH3(divg)
> +QFARITH3(adds)
> +QFARITH3(subs)
> +QFARITH3(muls)
> +QFARITH3(divs)
> +QFARITH3(addt)
> +QFARITH3(subt)
> +QFARITH3(mult)
> +QFARITH3(divt)
> +
>  static inline uint64_t zapnot_mask(uint8_t lit)
>  {
>      uint64_t mask = 0;
> @@ -1607,7 +1646,7 @@ static inline int translate_one(DisasContext *ctx, 
> uint32_t insn)
>          }
>          break;
>      case 0x14:
> -        switch (fpfn) { /* f11 & 0x3F */
> +        switch (fpfn) { /* fn11 & 0x3F */
>          case 0x04:
>              /* ITOFS */
>              if (!(ctx->amask & AMASK_FIX))
> @@ -1626,13 +1665,13 @@ static inline int translate_one(DisasContext *ctx, 
> uint32_t insn)
>              /* SQRTF */
>              if (!(ctx->amask & AMASK_FIX))
>                  goto invalid_opc;
> -            gen_fsqrtf(rb, rc);
> +            gen_fsqrtf(rb, rc, fn11);
>              break;
>          case 0x0B:
>              /* SQRTS */
>              if (!(ctx->amask & AMASK_FIX))
>                  goto invalid_opc;
> -            gen_fsqrts(rb, rc);
> +            gen_fsqrts(rb, rc, fn11);
>              break;
>          case 0x14:
>              /* ITOFF */
> @@ -1663,13 +1702,13 @@ static inline int translate_one(DisasContext *ctx, 
> uint32_t insn)
>              /* SQRTG */
>              if (!(ctx->amask & AMASK_FIX))
>                  goto invalid_opc;
> -            gen_fsqrtg(rb, rc);
> +            gen_fsqrtg(rb, rc, fn11);
>              break;
>          case 0x02B:
>              /* SQRTT */
>              if (!(ctx->amask & AMASK_FIX))
>                  goto invalid_opc;
> -            gen_fsqrtt(rb, rc);
> +            gen_fsqrtt(rb, rc, fn11);
>              break;
>          default:
>              goto invalid_opc;
> @@ -1677,47 +1716,42 @@ static inline int translate_one(DisasContext *ctx, 
> uint32_t insn)
>          break;
>      case 0x15:
>          /* VAX floating point */
> -        /* XXX: rounding mode and trap are ignored (!) */
> -        switch (fpfn) { /* f11 & 0x3F */
> +        switch (fpfn) { /* fn11 & 0x3F */
>          case 0x00:
>              /* ADDF */
> -            gen_faddf(ra, rb, rc);
> +            gen_faddf(ra, rb, rc, fn11);
>              break;
>          case 0x01:
>              /* SUBF */
> -            gen_fsubf(ra, rb, rc);
> +            gen_fsubf(ra, rb, rc, fn11);
>              break;
>          case 0x02:
>              /* MULF */
> -            gen_fmulf(ra, rb, rc);
> +            gen_fmulf(ra, rb, rc, fn11);
>              break;
>          case 0x03:
>              /* DIVF */
> -            gen_fdivf(ra, rb, rc);
> +            gen_fdivf(ra, rb, rc, fn11);
>              break;
>          case 0x1E:
>              /* CVTDG */
> -#if 0 // TODO
> -            gen_fcvtdg(rb, rc);
> -#else
> +            /* TODO */
>              goto invalid_opc;
> -#endif
> -            break;
>          case 0x20:
>              /* ADDG */
> -            gen_faddg(ra, rb, rc);
> +            gen_faddg(ra, rb, rc, fn11);
>              break;
>          case 0x21:
>              /* SUBG */
> -            gen_fsubg(ra, rb, rc);
> +            gen_fsubg(ra, rb, rc, fn11);
>              break;
>          case 0x22:
>              /* MULG */
> -            gen_fmulg(ra, rb, rc);
> +            gen_fmulg(ra, rb, rc, fn11);
>              break;
>          case 0x23:
>              /* DIVG */
> -            gen_fdivg(ra, rb, rc);
> +            gen_fdivg(ra, rb, rc, fn11);
>              break;
>          case 0x25:
>              /* CMPGEQ */
> @@ -1733,27 +1767,23 @@ static inline int translate_one(DisasContext *ctx, 
> uint32_t insn)
>              break;
>          case 0x2C:
>              /* CVTGF */
> -            gen_fcvtgf(rb, rc);
> +            gen_fcvtgf(rb, rc, fn11);
>              break;
>          case 0x2D:
>              /* CVTGD */
> -#if 0 // TODO
> -            gen_fcvtgd(rb, rc);
> -#else
> +            /* TODO */
>              goto invalid_opc;
> -#endif
> -            break;
>          case 0x2F:
>              /* CVTGQ */
> -            gen_fcvtgq(rb, rc);
> +            gen_fcvtgq(rb, rc, fn11);
>              break;
>          case 0x3C:
>              /* CVTQF */
> -            gen_fcvtqf(rb, rc);
> +            gen_fcvtqf(rb, rc, fn11);
>              break;
>          case 0x3E:
>              /* CVTQG */
> -            gen_fcvtqg(rb, rc);
> +            gen_fcvtqg(rb, rc, fn11);
>              break;
>          default:
>              goto invalid_opc;
> @@ -1761,39 +1791,38 @@ static inline int translate_one(DisasContext *ctx, 
> uint32_t insn)
>          break;
>      case 0x16:
>          /* IEEE floating-point */
> -        /* XXX: rounding mode and traps are ignored (!) */
> -        switch (fpfn) { /* f11 & 0x3F */
> +        switch (fpfn) { /* fn11 & 0x3F */
>          case 0x00:
>              /* ADDS */
> -            gen_fadds(ra, rb, rc);
> +            gen_fadds(ra, rb, rc, fn11);
>              break;
>          case 0x01:
>              /* SUBS */
> -            gen_fsubs(ra, rb, rc);
> +            gen_fsubs(ra, rb, rc, fn11);
>              break;
>          case 0x02:
>              /* MULS */
> -            gen_fmuls(ra, rb, rc);
> +            gen_fmuls(ra, rb, rc, fn11);
>              break;
>          case 0x03:
>              /* DIVS */
> -            gen_fdivs(ra, rb, rc);
> +            gen_fdivs(ra, rb, rc, fn11);
>              break;
>          case 0x20:
>              /* ADDT */
> -            gen_faddt(ra, rb, rc);
> +            gen_faddt(ra, rb, rc, fn11);
>              break;
>          case 0x21:
>              /* SUBT */
> -            gen_fsubt(ra, rb, rc);
> +            gen_fsubt(ra, rb, rc, fn11);
>              break;
>          case 0x22:
>              /* MULT */
> -            gen_fmult(ra, rb, rc);
> +            gen_fmult(ra, rb, rc, fn11);
>              break;
>          case 0x23:
>              /* DIVT */
> -            gen_fdivt(ra, rb, rc);
> +            gen_fdivt(ra, rb, rc, fn11);
>              break;
>          case 0x24:
>              /* CMPTUN */
> @@ -1812,26 +1841,25 @@ static inline int translate_one(DisasContext *ctx, 
> uint32_t insn)
>              gen_fcmptle(ra, rb, rc);
>              break;
>          case 0x2C:
> -            /* XXX: incorrect */
>              if (fn11 == 0x2AC || fn11 == 0x6AC) {
>                  /* CVTST */
> -                gen_fcvtst(rb, rc);
> +                gen_fcvtst(rb, rc, fn11);
>              } else {
>                  /* CVTTS */
> -                gen_fcvtts(rb, rc);
> +                gen_fcvtts(rb, rc, fn11);
>              }
>              break;
>          case 0x2F:
>              /* CVTTQ */
> -            gen_fcvttq(rb, rc);
> +            gen_fcvttq(rb, rc, fn11);
>              break;
>          case 0x3C:
>              /* CVTQS */
> -            gen_fcvtqs(rb, rc);
> +            gen_fcvtqs(rb, rc, fn11);
>              break;
>          case 0x3E:
>              /* CVTQT */
> -            gen_fcvtqt(rb, rc);
> +            gen_fcvtqt(rb, rc, fn11);
>              break;
>          default:
>              goto invalid_opc;


-- 
Aurelien Jarno                          GPG: 1024D/F1BCDB73
aurel...@aurel32.net                 http://www.aurel32.net

Re: [Qemu-devel] [patch] target-alpha: squashed fpu qualifiers patch

Reply via email to