On Fri, 2020-06-12 at 10:24 -0400, David Edelsohn wrote: > Hi, Will
Hi, > > On Fri, Jun 12, 2020 at 12:22 AM will schmidt < > will_schm...@vnet.ibm.com> wrote: > > > > > > Hi, > > Fix codegen implementation for the builtin > > vec_pack_to_short_fp32. > > > > Regtests are underway against powerpc64 > > (power7be,power8le,power9le). > > (this is a power9 builtin, so should be a noop for most of > > those). > > OK for trunk and backports? > > > > Thanks > > -Will > > > > > > [gcc] > > target pr/94954 > > > > * config/rs6000/altivec.h > > (vec_pack_to_short_fp32): Update. > > * config/rs6000/altivec.md (UNSPEC_CONVERT_4F32_8F16): New > > unspec. > > (convert_4f32_8f16): New define_expand. > > * config/rs6000/rs6000-builtin.def (convert_4f32_8f16): New > > builtin define > > and overload. > > * config/rs6000/rs6000-call.c > > (P9V_BUILTIN_VEC_CONVERT_4F32_8F16): New > > overloaded builtin entry. > > * config/rs6000/vsx.md (UNSPEC_VSX_XVCVSPHP): New unspec. > > (vsx_xvcvsphp): New define_insn. > > > > [testsuite] > > * testsuite/gcc.target/powerpc/builtins-1-p9-runnable.c: > > Update. > > > > diff --git a/gcc/config/rs6000/altivec.h > > b/gcc/config/rs6000/altivec.h > > index 0a7e8ab..ab10025 100644 > > --- a/gcc/config/rs6000/altivec.h > > +++ b/gcc/config/rs6000/altivec.h > > @@ -431,11 +431,11 @@ > > /* Vector additions added in ISA 3.0. */ > > #define vec_first_match_index __builtin_vec_first_match_index > > #define vec_first_match_or_eos_index > > __builtin_vec_first_match_or_eos_index > > #define vec_first_mismatch_index > > __builtin_vec_first_mismatch_index > > #define vec_first_mismatch_or_eos_index > > __builtin_vec_first_mismatch_or_eos_index > > -#define vec_pack_to_short_fp32 __builtin_vec_convert_4f32_8i16 > > +#define vec_pack_to_short_fp32 __builtin_vec_convert_4f32_8f16 > > #define vec_parity_lsbb __builtin_vec_vparity_lsbb > > #define vec_vctz __builtin_vec_vctz > > #define vec_cnttz __builtin_vec_vctz > > #define vec_vctzb __builtin_vec_vctzb > > #define vec_vctzd __builtin_vec_vctzd > > diff --git a/gcc/config/rs6000/altivec.md > > b/gcc/config/rs6000/altivec.md > > index 159f24e..855e7cc 100644 > > --- a/gcc/config/rs6000/altivec.md > > +++ b/gcc/config/rs6000/altivec.md > > @@ -78,10 +78,11 @@ > > UNSPEC_VUNPACK_HI_SIGN_DIRECT > > UNSPEC_VUNPACK_LO_SIGN_DIRECT > > UNSPEC_VUPKHPX > > UNSPEC_VUPKLPX > > UNSPEC_CONVERT_4F32_8I16 > > + UNSPEC_CONVERT_4F32_8F16 > > UNSPEC_DST > > UNSPEC_DSTT > > UNSPEC_DSTST > > UNSPEC_DSTSTT > > UNSPEC_LVSL > > @@ -3215,10 +3216,32 @@ > > emit_insn (gen_altivec_vctuxs (rtx_tmp_lo, operands[2], > > const0_rtx)); > > emit_insn (gen_altivec_vpkswss (operands[0], rtx_tmp_hi, > > rtx_tmp_lo)); > > DONE; > > }) > > > > + > > +;; Convert two vector F32 to packed vector f16. > > Just for consistency, the comment above probably should be F16 > (capitalized) above. Ok. I've fixed that locally. > Why does this pattern use V8HI mode instead of V8HF if this is for > 8F16 in contrast to the existing 8I16 pattern? Other than that > (fundamental) question, the rest looks good. Yeah, so even though the contents of the returned vector consists of packed float data, the return type is still vector unsigned short. I can't speak to the reasoning behind why the builtin was defined that way. Thanks for the review, thanks, -Will > > Thanks, David > > > +(define_expand "convert_4f32_8f16" > > + [(set (match_operand:V8HI 0 "register_operand" "=v") > > + (unspec:V8HI [(match_operand:V4SF 1 "register_operand" "v") > > + (match_operand:V4SF 2 "register_operand" > > "v")] > > + UNSPEC_CONVERT_4F32_8F16))] > > + "TARGET_P9_VECTOR" > > +{ > > + rtx rtx_tmp_hi = gen_reg_rtx (V4SImode); > > + rtx rtx_tmp_lo = gen_reg_rtx (V4SImode); > > + > > + emit_insn (gen_vsx_xvcvsphp (rtx_tmp_hi, operands[1] )); > > + emit_insn (gen_vsx_xvcvsphp (rtx_tmp_lo, operands[2] )); > > + if (BYTES_BIG_ENDIAN) > > + emit_insn (gen_altivec_vpkuwum (operands[0], rtx_tmp_lo, > > rtx_tmp_hi)); > > + else > > + emit_insn (gen_altivec_vpkuwum (operands[0], rtx_tmp_hi, > > rtx_tmp_lo)); > > + DONE; > > +}) > > + > > + > > ;; Generate > > ;; xxlxor/vxor SCRATCH0,SCRATCH0,SCRATCH0 > > ;; vsubu?m SCRATCH2,SCRATCH1,%1 > > ;; vmaxs? %0,%1,SCRATCH2" > > (define_expand "abs<mode>2" > > diff --git a/gcc/config/rs6000/rs6000-builtin.def > > b/gcc/config/rs6000/rs6000-builtin.def > > index 8b1ddb0..47e9137 100644 > > --- a/gcc/config/rs6000/rs6000-builtin.def > > +++ b/gcc/config/rs6000/rs6000-builtin.def > > @@ -2208,10 +2208,11 @@ BU_P8V_OVERLOAD_3 (VPERMXOR, "vpermxor") > > > > /* ISA 3.0 vector overloaded 2-argument functions. */ > > BU_P9V_AV_2 (VSLV, "vslv", CONST, > > vslv) > > BU_P9V_AV_2 (VSRV, "vsrv", CONST, > > vsrv) > > BU_P9V_AV_2 (CONVERT_4F32_8I16, "convert_4f32_8i16", CONST, > > convert_4f32_8i16) > > +BU_P9V_AV_2 (CONVERT_4F32_8F16, "convert_4f32_8f16", CONST, > > convert_4f32_8f16) > > > > BU_P9V_AV_2 (VFIRSTMATCHINDEX_V16QI, "first_match_index_v16qi", > > CONST, first_match_index_v16qi) > > BU_P9V_AV_2 (VFIRSTMATCHINDEX_V8HI, "first_match_index_v8hi", > > CONST, first_match_index_v8hi) > > @@ -2238,10 +2239,11 @@ BU_P9V_AV_2 (VFIRSTMISMATCHOREOSINDEX_V4SI, > > "first_mismatch_or_eos_index_v4si", > > > > /* ISA 3.0 vector overloaded 2-argument functions. */ > > BU_P9V_OVERLOAD_2 (VSLV, "vslv") > > BU_P9V_OVERLOAD_2 (VSRV, "vsrv") > > BU_P9V_OVERLOAD_2 (CONVERT_4F32_8I16, "convert_4f32_8i16") > > +BU_P9V_OVERLOAD_2 (CONVERT_4F32_8F16, "convert_4f32_8f16") > > > > /* 2 argument vector functions added in ISA 3.0 (power9). */ > > BU_P9V_AV_2 > > (VADUB, "vadub", CONST, vaduv16qi3) > > BU_P9V_AV_2 > > (VADUH, "vaduh", CONST, vaduv8hi3) > > BU_P9V_AV_2 > > (VADUW, "vaduw", CONST, vaduv4si3) > > diff --git a/gcc/config/rs6000/rs6000-call.c > > b/gcc/config/rs6000/rs6000-call.c > > index 0ac8054..9708d7e 100644 > > --- a/gcc/config/rs6000/rs6000-call.c > > +++ b/gcc/config/rs6000/rs6000-call.c > > @@ -1975,10 +1975,12 @@ const struct altivec_builtin_types > > altivec_overloaded_builtins[] = { > > { P8V_BUILTIN_VEC_NEG, P8V_BUILTIN_NEG_V2DF, > > RS6000_BTI_V2DF, RS6000_BTI_V2DF, 0, 0 }, > > > > { P9V_BUILTIN_VEC_CONVERT_4F32_8I16, > > P9V_BUILTIN_CONVERT_4F32_8I16, > > RS6000_BTI_unsigned_V8HI, RS6000_BTI_V4SF, RS6000_BTI_V4SF, 0 > > }, > > + { P9V_BUILTIN_VEC_CONVERT_4F32_8F16, > > P9V_BUILTIN_CONVERT_4F32_8F16, > > + RS6000_BTI_unsigned_V8HI, RS6000_BTI_V4SF, RS6000_BTI_V4SF, 0 > > }, > > > > { P9V_BUILTIN_VEC_VFIRSTMATCHINDEX, > > P9V_BUILTIN_VFIRSTMATCHINDEX_V16QI, > > RS6000_BTI_UINTSI, RS6000_BTI_V16QI, RS6000_BTI_V16QI, 0 }, > > { P9V_BUILTIN_VEC_VFIRSTMATCHINDEX, > > P9V_BUILTIN_VFIRSTMATCHINDEX_V16QI, > > RS6000_BTI_UINTSI, RS6000_BTI_unsigned_V16QI, > > RS6000_BTI_unsigned_V16QI, 0 }, > > diff --git a/gcc/config/rs6000/vsx.md b/gcc/config/rs6000/vsx.md > > index 2a28215..38908a5 100644 > > --- a/gcc/config/rs6000/vsx.md > > +++ b/gcc/config/rs6000/vsx.md > > @@ -295,10 +295,11 @@ > > UNSPEC_VSX_DIVSD > > UNSPEC_VSX_DIVUD > > UNSPEC_VSX_MULSD > > UNSPEC_VSX_SIGN_EXTEND > > UNSPEC_VSX_XVCVSPSXDS > > + UNSPEC_VSX_XVCVSPHP > > UNSPEC_VSX_VSLO > > UNSPEC_VSX_EXTRACT > > UNSPEC_VSX_SXEXPDP > > UNSPEC_VSX_SXSIG > > UNSPEC_VSX_SIEXPDP > > @@ -2177,10 +2178,19 @@ > > UNSPEC_VSX_CVHPSP))] > > "TARGET_P9_VECTOR" > > "xvcvhpsp %x0,%x1" > > [(set_attr "type" "vecfloat")]) > > > > +;; Generate xvcvsphp > > +(define_insn "vsx_xvcvsphp" > > + [(set (match_operand:V4SI 0 "register_operand" "=wa") > > + (unspec:V4SI [(match_operand:V4SF 1 "vsx_register_operand" > > "wa")] > > + UNSPEC_VSX_XVCVSPHP))] > > + "TARGET_P9_VECTOR" > > + "xvcvsphp %x0,%x1" > > +[(set_attr "type" "vecfloat")]) > > + > > ;; xscvdpsp used for splat'ing a scalar to V4SF, knowing that the > > internal SF > > ;; format of scalars is actually DF. > > (define_insn "vsx_xscvdpsp_scalar" > > [(set (match_operand:V4SF 0 "vsx_register_operand" "=wa") > > (unspec:V4SF [(match_operand:SF 1 "vsx_register_operand" > > "wa")] > > diff --git a/gcc/testsuite/gcc.target/powerpc/builtins-1-p9- > > runnable.c b/gcc/testsuite/gcc.target/powerpc/builtins-1-p9- > > runnable.c > > index 0e4ab48..038ba44 100644 > > --- a/gcc/testsuite/gcc.target/powerpc/builtins-1-p9-runnable.c > > +++ b/gcc/testsuite/gcc.target/powerpc/builtins-1-p9-runnable.c > > @@ -1,25 +1,37 @@ > > /* { dg-do run { target { powerpc*-*-linux* && { lp64 && > > p9vector_hw } } } } */ > > /* { dg-require-effective-target powerpc_p9vector_ok } */ > > /* { dg-options "-O2 -mdejagnu-cpu=power9" } */ > > > > #include <altivec.h> > > +#include <stdio.h> > > > > void abort (void); > > > > int main() { > > int i; > > vector float vfa, vfb; > > - vector unsigned short vur, vuexpt; > > + vector unsigned short vur, vresult; > > > > - vfa = (vector float){3.4, 5.0, 20.0, 50.9 }; > > - vfb = (vector float){10.0, 40.0, 70.0, 100.0 }; > > - vuexpt = (vector unsigned short){ 3, 5, 20, 50, > > - 10, 40, 70, 100}; > > + vfa = (vector float){0.4, 1.6, 20.0, 99.9 }; > > + vfb = (vector float){10.0, -2.0, 70.0, 999.0 }; > > > > + /* Results from converting those single-precion floats to half- > > precision packed values. */ > > + vresult = (vector unsigned short) { 0x3666, 0x3e66, 0x4d00, > > 0x563e, > > + 0x4900, 0xc000, 0x5460, > > 0x63ce }; > > + > > + /* convert from single-precion float to unsigned short and pack > > */ > > vur = vec_pack_to_short_fp32 (vfa, vfb); > > > > +#ifdef DEBUG > > + for(i = 0; i< 4; i++) { printf("i=[%d] %f \n",i,vfa[i]); } > > + for(i = 0; i< 4; i++) { printf("i=[%d] %f \n",i+4,vfb[i]); } > > + for(i = 0; i< 8; i++) { printf("i=[%d] %d \n",i,vur[i]); } > > +#endif > > + > > for(i = 0; i< 8; i++) { > > - if (vur[i] != vuexpt[i]) > > + if (vur[i] != vresult[i]) { > > + printf("i=[%d] 0x%x != 0x%x \n",i,vur[i],vresult[i]); > > abort(); > > + } > > } > > } > >