Hello, On Tue, May 22, 2018 at 7:56 PM, Richard Henderson <richard.hender...@linaro.org> wrote: > Depending on the host abi, float16, aka uint16_t, values are > passed and returned either zero-extended in the host register > or with garbage at the top of the host register. > > The tcg code generator has so far been assuming garbage, as that > matches the x86 abi, but this is incorrect for other host abis. > Further, target/arm has so far been assuming zero-extended results, > so that it may store the 16-bit value into a 32-bit slot with the > high 16-bits already clear. > > Rectify both problems by mapping "f16" in the helper definition > to uint32_t instead of (a typedef for) uint16_t. This forces > the host compiler to assume garbage in the upper 16 bits on input > and to zero-extend the result on output. > > Signed-off-by: Richard Henderson <richard.hender...@linaro.org>
Some AArch64 tests I had that previously failed on a x86-64 host now pass. Tested-by: Laurent Desnogues <laurent.desnog...@gmail.com> Perhaps the two occurrences of the following comment in target/arm/translate-a64.c could be removed along with this patch: /* write_fp_sreg is OK here because top half of tcg_rd is zero */ or reworded. Thanks, Laurent > --- > include/exec/helper-head.h | 2 +- > target/arm/helper-a64.c | 35 +++++++++-------- > target/arm/helper.c | 80 +++++++++++++++++++------------------- > 3 files changed, 59 insertions(+), 58 deletions(-) > > diff --git a/include/exec/helper-head.h b/include/exec/helper-head.h > index 15b6a68de3..276dd5afce 100644 > --- a/include/exec/helper-head.h > +++ b/include/exec/helper-head.h > @@ -39,7 +39,7 @@ > #define dh_ctype_int int > #define dh_ctype_i64 uint64_t > #define dh_ctype_s64 int64_t > -#define dh_ctype_f16 float16 > +#define dh_ctype_f16 uint32_t > #define dh_ctype_f32 float32 > #define dh_ctype_f64 float64 > #define dh_ctype_ptr void * > diff --git a/target/arm/helper-a64.c b/target/arm/helper-a64.c > index f92bdea732..6ee5f684cf 100644 > --- a/target/arm/helper-a64.c > +++ b/target/arm/helper-a64.c > @@ -85,12 +85,12 @@ static inline uint32_t float_rel_to_flags(int res) > return flags; > } > > -uint64_t HELPER(vfp_cmph_a64)(float16 x, float16 y, void *fp_status) > +uint64_t HELPER(vfp_cmph_a64)(uint32_t x, uint32_t y, void *fp_status) > { > return float_rel_to_flags(float16_compare_quiet(x, y, fp_status)); > } > > -uint64_t HELPER(vfp_cmpeh_a64)(float16 x, float16 y, void *fp_status) > +uint64_t HELPER(vfp_cmpeh_a64)(uint32_t x, uint32_t y, void *fp_status) > { > return float_rel_to_flags(float16_compare(x, y, fp_status)); > } > @@ -214,7 +214,7 @@ uint64_t HELPER(neon_cgt_f64)(float64 a, float64 b, void > *fpstp) > #define float64_three make_float64(0x4008000000000000ULL) > #define float64_one_point_five make_float64(0x3FF8000000000000ULL) > > -float16 HELPER(recpsf_f16)(float16 a, float16 b, void *fpstp) > +uint32_t HELPER(recpsf_f16)(uint32_t a, uint32_t b, void *fpstp) > { > float_status *fpst = fpstp; > > @@ -259,7 +259,7 @@ float64 HELPER(recpsf_f64)(float64 a, float64 b, void > *fpstp) > return float64_muladd(a, b, float64_two, 0, fpst); > } > > -float16 HELPER(rsqrtsf_f16)(float16 a, float16 b, void *fpstp) > +uint32_t HELPER(rsqrtsf_f16)(uint32_t a, uint32_t b, void *fpstp) > { > float_status *fpst = fpstp; > > @@ -366,7 +366,7 @@ uint64_t HELPER(neon_addlp_u16)(uint64_t a) > } > > /* Floating-point reciprocal exponent - see FPRecpX in ARM ARM */ > -float16 HELPER(frecpx_f16)(float16 a, void *fpstp) > +uint32_t HELPER(frecpx_f16)(uint32_t a, void *fpstp) > { > float_status *fpst = fpstp; > uint16_t val16, sbit; > @@ -695,7 +695,7 @@ void HELPER(casp_be_parallel)(CPUARMState *env, uint32_t > rs, uint64_t addr, > #define ADVSIMD_HELPER(name, suffix) HELPER(glue(glue(advsimd_, name), > suffix)) > > #define ADVSIMD_HALFOP(name) \ > -float16 ADVSIMD_HELPER(name, h)(float16 a, float16 b, void *fpstp) \ > +uint32_t ADVSIMD_HELPER(name, h)(uint32_t a, uint32_t b, void *fpstp) \ > { \ > float_status *fpst = fpstp; \ > return float16_ ## name(a, b, fpst); \ > @@ -755,7 +755,8 @@ ADVSIMD_HALFOP(mulx) > ADVSIMD_TWOHALFOP(mulx) > > /* fused multiply-accumulate */ > -float16 HELPER(advsimd_muladdh)(float16 a, float16 b, float16 c, void *fpstp) > +uint32_t HELPER(advsimd_muladdh)(uint32_t a, uint32_t b, uint32_t c, > + void *fpstp) > { > float_status *fpst = fpstp; > return float16_muladd(a, b, c, 0, fpst); > @@ -786,14 +787,14 @@ uint32_t HELPER(advsimd_muladd2h)(uint32_t two_a, > uint32_t two_b, > > #define ADVSIMD_CMPRES(test) (test) ? 0xffff : 0 > > -uint32_t HELPER(advsimd_ceq_f16)(float16 a, float16 b, void *fpstp) > +uint32_t HELPER(advsimd_ceq_f16)(uint32_t a, uint32_t b, void *fpstp) > { > float_status *fpst = fpstp; > int compare = float16_compare_quiet(a, b, fpst); > return ADVSIMD_CMPRES(compare == float_relation_equal); > } > > -uint32_t HELPER(advsimd_cge_f16)(float16 a, float16 b, void *fpstp) > +uint32_t HELPER(advsimd_cge_f16)(uint32_t a, uint32_t b, void *fpstp) > { > float_status *fpst = fpstp; > int compare = float16_compare(a, b, fpst); > @@ -801,14 +802,14 @@ uint32_t HELPER(advsimd_cge_f16)(float16 a, float16 b, > void *fpstp) > compare == float_relation_equal); > } > > -uint32_t HELPER(advsimd_cgt_f16)(float16 a, float16 b, void *fpstp) > +uint32_t HELPER(advsimd_cgt_f16)(uint32_t a, uint32_t b, void *fpstp) > { > float_status *fpst = fpstp; > int compare = float16_compare(a, b, fpst); > return ADVSIMD_CMPRES(compare == float_relation_greater); > } > > -uint32_t HELPER(advsimd_acge_f16)(float16 a, float16 b, void *fpstp) > +uint32_t HELPER(advsimd_acge_f16)(uint32_t a, uint32_t b, void *fpstp) > { > float_status *fpst = fpstp; > float16 f0 = float16_abs(a); > @@ -818,7 +819,7 @@ uint32_t HELPER(advsimd_acge_f16)(float16 a, float16 b, > void *fpstp) > compare == float_relation_equal); > } > > -uint32_t HELPER(advsimd_acgt_f16)(float16 a, float16 b, void *fpstp) > +uint32_t HELPER(advsimd_acgt_f16)(uint32_t a, uint32_t b, void *fpstp) > { > float_status *fpst = fpstp; > float16 f0 = float16_abs(a); > @@ -828,12 +829,12 @@ uint32_t HELPER(advsimd_acgt_f16)(float16 a, float16 b, > void *fpstp) > } > > /* round to integral */ > -float16 HELPER(advsimd_rinth_exact)(float16 x, void *fp_status) > +uint32_t HELPER(advsimd_rinth_exact)(uint32_t x, void *fp_status) > { > return float16_round_to_int(x, fp_status); > } > > -float16 HELPER(advsimd_rinth)(float16 x, void *fp_status) > +uint32_t HELPER(advsimd_rinth)(uint32_t x, void *fp_status) > { > int old_flags = get_float_exception_flags(fp_status), new_flags; > float16 ret; > @@ -857,7 +858,7 @@ float16 HELPER(advsimd_rinth)(float16 x, void *fp_status) > * setting the mode appropriately before calling the helper. > */ > > -uint32_t HELPER(advsimd_f16tosinth)(float16 a, void *fpstp) > +uint32_t HELPER(advsimd_f16tosinth)(uint32_t a, void *fpstp) > { > float_status *fpst = fpstp; > > @@ -869,7 +870,7 @@ uint32_t HELPER(advsimd_f16tosinth)(float16 a, void > *fpstp) > return float16_to_int16(a, fpst); > } > > -uint32_t HELPER(advsimd_f16touinth)(float16 a, void *fpstp) > +uint32_t HELPER(advsimd_f16touinth)(uint32_t a, void *fpstp) > { > float_status *fpst = fpstp; > > @@ -885,7 +886,7 @@ uint32_t HELPER(advsimd_f16touinth)(float16 a, void > *fpstp) > * Square Root and Reciprocal square root > */ > > -float16 HELPER(sqrt_f16)(float16 a, void *fpstp) > +uint32_t HELPER(sqrt_f16)(uint32_t a, void *fpstp) > { > float_status *s = fpstp; > > diff --git a/target/arm/helper.c b/target/arm/helper.c > index c0f739972e..a4bfac3932 100644 > --- a/target/arm/helper.c > +++ b/target/arm/helper.c > @@ -11344,35 +11344,35 @@ DO_VFP_cmp(d, float64) > > /* Integer to float and float to integer conversions */ > > -#define CONV_ITOF(name, fsz, sign) \ > - float##fsz HELPER(name)(uint32_t x, void *fpstp) \ > -{ \ > - float_status *fpst = fpstp; \ > - return sign##int32_to_##float##fsz((sign##int32_t)x, fpst); \ > +#define CONV_ITOF(name, ftype, fsz, sign) \ > +ftype HELPER(name)(uint32_t x, void *fpstp) \ > +{ \ > + float_status *fpst = fpstp; \ > + return sign##int32_to_##float##fsz((sign##int32_t)x, fpst); \ > } > > -#define CONV_FTOI(name, fsz, sign, round) \ > -uint32_t HELPER(name)(float##fsz x, void *fpstp) \ > -{ \ > - float_status *fpst = fpstp; \ > - if (float##fsz##_is_any_nan(x)) { \ > - float_raise(float_flag_invalid, fpst); \ > - return 0; \ > - } \ > - return float##fsz##_to_##sign##int32##round(x, fpst); \ > +#define CONV_FTOI(name, ftype, fsz, sign, round) \ > +uint32_t HELPER(name)(ftype x, void *fpstp) \ > +{ \ > + float_status *fpst = fpstp; \ > + if (float##fsz##_is_any_nan(x)) { \ > + float_raise(float_flag_invalid, fpst); \ > + return 0; \ > + } \ > + return float##fsz##_to_##sign##int32##round(x, fpst); \ > } > > -#define FLOAT_CONVS(name, p, fsz, sign) \ > -CONV_ITOF(vfp_##name##to##p, fsz, sign) \ > -CONV_FTOI(vfp_to##name##p, fsz, sign, ) \ > -CONV_FTOI(vfp_to##name##z##p, fsz, sign, _round_to_zero) > +#define FLOAT_CONVS(name, p, ftype, fsz, sign) \ > + CONV_ITOF(vfp_##name##to##p, ftype, fsz, sign) \ > + CONV_FTOI(vfp_to##name##p, ftype, fsz, sign, ) \ > + CONV_FTOI(vfp_to##name##z##p, ftype, fsz, sign, _round_to_zero) > > -FLOAT_CONVS(si, h, 16, ) > -FLOAT_CONVS(si, s, 32, ) > -FLOAT_CONVS(si, d, 64, ) > -FLOAT_CONVS(ui, h, 16, u) > -FLOAT_CONVS(ui, s, 32, u) > -FLOAT_CONVS(ui, d, 64, u) > +FLOAT_CONVS(si, h, uint32_t, 16, ) > +FLOAT_CONVS(si, s, float32, 32, ) > +FLOAT_CONVS(si, d, float64, 64, ) > +FLOAT_CONVS(ui, h, uint32_t, 16, u) > +FLOAT_CONVS(ui, s, float32, 32, u) > +FLOAT_CONVS(ui, d, float64, 64, u) > > #undef CONV_ITOF > #undef CONV_FTOI > @@ -11465,22 +11465,22 @@ static float16 do_postscale_fp16(float64 f, int > shift, float_status *fpst) > return float64_to_float16(float64_scalbn(f, -shift, fpst), true, fpst); > } > > -float16 HELPER(vfp_sltoh)(uint32_t x, uint32_t shift, void *fpst) > +uint32_t HELPER(vfp_sltoh)(uint32_t x, uint32_t shift, void *fpst) > { > return do_postscale_fp16(int32_to_float64(x, fpst), shift, fpst); > } > > -float16 HELPER(vfp_ultoh)(uint32_t x, uint32_t shift, void *fpst) > +uint32_t HELPER(vfp_ultoh)(uint32_t x, uint32_t shift, void *fpst) > { > return do_postscale_fp16(uint32_to_float64(x, fpst), shift, fpst); > } > > -float16 HELPER(vfp_sqtoh)(uint64_t x, uint32_t shift, void *fpst) > +uint32_t HELPER(vfp_sqtoh)(uint64_t x, uint32_t shift, void *fpst) > { > return do_postscale_fp16(int64_to_float64(x, fpst), shift, fpst); > } > > -float16 HELPER(vfp_uqtoh)(uint64_t x, uint32_t shift, void *fpst) > +uint32_t HELPER(vfp_uqtoh)(uint64_t x, uint32_t shift, void *fpst) > { > return do_postscale_fp16(uint64_to_float64(x, fpst), shift, fpst); > } > @@ -11504,32 +11504,32 @@ static float64 do_prescale_fp16(float16 f, int > shift, float_status *fpst) > } > } > > -uint32_t HELPER(vfp_toshh)(float16 x, uint32_t shift, void *fpst) > +uint32_t HELPER(vfp_toshh)(uint32_t x, uint32_t shift, void *fpst) > { > return float64_to_int16(do_prescale_fp16(x, shift, fpst), fpst); > } > > -uint32_t HELPER(vfp_touhh)(float16 x, uint32_t shift, void *fpst) > +uint32_t HELPER(vfp_touhh)(uint32_t x, uint32_t shift, void *fpst) > { > return float64_to_uint16(do_prescale_fp16(x, shift, fpst), fpst); > } > > -uint32_t HELPER(vfp_toslh)(float16 x, uint32_t shift, void *fpst) > +uint32_t HELPER(vfp_toslh)(uint32_t x, uint32_t shift, void *fpst) > { > return float64_to_int32(do_prescale_fp16(x, shift, fpst), fpst); > } > > -uint32_t HELPER(vfp_toulh)(float16 x, uint32_t shift, void *fpst) > +uint32_t HELPER(vfp_toulh)(uint32_t x, uint32_t shift, void *fpst) > { > return float64_to_uint32(do_prescale_fp16(x, shift, fpst), fpst); > } > > -uint64_t HELPER(vfp_tosqh)(float16 x, uint32_t shift, void *fpst) > +uint64_t HELPER(vfp_tosqh)(uint32_t x, uint32_t shift, void *fpst) > { > return float64_to_int64(do_prescale_fp16(x, shift, fpst), fpst); > } > > -uint64_t HELPER(vfp_touqh)(float16 x, uint32_t shift, void *fpst) > +uint64_t HELPER(vfp_touqh)(uint32_t x, uint32_t shift, void *fpst) > { > return float64_to_uint64(do_prescale_fp16(x, shift, fpst), fpst); > } > @@ -11565,7 +11565,7 @@ uint32_t HELPER(set_neon_rmode)(uint32_t rmode, > CPUARMState *env) > } > > /* Half precision conversions. */ > -float32 HELPER(vfp_fcvt_f16_to_f32)(float16 a, void *fpstp, uint32_t > ahp_mode) > +float32 HELPER(vfp_fcvt_f16_to_f32)(uint32_t a, void *fpstp, uint32_t > ahp_mode) > { > /* Squash FZ16 to 0 for the duration of conversion. In this case, > * it would affect flushing input denormals. > @@ -11578,7 +11578,7 @@ float32 HELPER(vfp_fcvt_f16_to_f32)(float16 a, void > *fpstp, uint32_t ahp_mode) > return r; > } > > -float16 HELPER(vfp_fcvt_f32_to_f16)(float32 a, void *fpstp, uint32_t > ahp_mode) > +uint32_t HELPER(vfp_fcvt_f32_to_f16)(float32 a, void *fpstp, uint32_t > ahp_mode) > { > /* Squash FZ16 to 0 for the duration of conversion. In this case, > * it would affect flushing output denormals. > @@ -11591,7 +11591,7 @@ float16 HELPER(vfp_fcvt_f32_to_f16)(float32 a, void > *fpstp, uint32_t ahp_mode) > return r; > } > > -float64 HELPER(vfp_fcvt_f16_to_f64)(float16 a, void *fpstp, uint32_t > ahp_mode) > +float64 HELPER(vfp_fcvt_f16_to_f64)(uint32_t a, void *fpstp, uint32_t > ahp_mode) > { > /* Squash FZ16 to 0 for the duration of conversion. In this case, > * it would affect flushing input denormals. > @@ -11604,7 +11604,7 @@ float64 HELPER(vfp_fcvt_f16_to_f64)(float16 a, void > *fpstp, uint32_t ahp_mode) > return r; > } > > -float16 HELPER(vfp_fcvt_f64_to_f16)(float64 a, void *fpstp, uint32_t > ahp_mode) > +uint32_t HELPER(vfp_fcvt_f64_to_f16)(float64 a, void *fpstp, uint32_t > ahp_mode) > { > /* Squash FZ16 to 0 for the duration of conversion. In this case, > * it would affect flushing output denormals. > @@ -11742,7 +11742,7 @@ static bool round_to_inf(float_status *fpst, bool > sign_bit) > g_assert_not_reached(); > } > > -float16 HELPER(recpe_f16)(float16 input, void *fpstp) > +uint32_t HELPER(recpe_f16)(uint32_t input, void *fpstp) > { > float_status *fpst = fpstp; > float16 f16 = float16_squash_input_denormal(input, fpst); > @@ -11937,7 +11937,7 @@ static uint64_t recip_sqrt_estimate(int *exp , int > exp_off, uint64_t frac) > return extract64(estimate, 0, 8) << 44; > } > > -float16 HELPER(rsqrte_f16)(float16 input, void *fpstp) > +uint32_t HELPER(rsqrte_f16)(uint32_t input, void *fpstp) > { > float_status *s = fpstp; > float16 f16 = float16_squash_input_denormal(input, s); > -- > 2.17.0 > >