Christophe Lyon <christophe.l...@linaro.org> writes: > On Fri, 11 Jun 2021 at 18:25, Richard Sandiford > <richard.sandif...@arm.com> wrote: >> >> Christophe Lyon <christophe.l...@linaro.org> writes: >> > Thanks for the feedback. How about v2 attached? >> > Do you want me to merge neon_vec_unpack<US> and >> > mve_vec_unpack<US> and only have different assembly? >> > if (TARGET_HAVE_MVE) >> > return "vmovlb.<US>%#<V_sz_elem> %q0, %q1"; >> > else >> > return "vmovlb.<US>%#<V_sz_elem> %q0, %q1"; >> >> I think it'd be better to keep them separate, given that they have >> different attributes. >> >> > @@ -2199,10 +2222,23 @@ (define_insn "mve_vmovnbq_<supf><mode>" >> > [(set_attr "type" "mve_move") >> > ]) >> > >> > +;; vmovnb pattern used by the vec_pack_trunc expander to avoid the >> > +;; need for an extra register. >> >> “to avoid the need for an uninitailized input operand” might >> be clearer. >> > Done > >> > +(define_insn "@mve_vec_pack_trunc_lo_<mode>" >> > + [ >> > + (set (match_operand:<V_narrow_pack> 0 "s_register_operand" "=w") >> > + (unspec:<V_narrow_pack> [(match_operand:MVE_5 1 "s_register_operand" >> > "w")] >> > + VMOVNBQ_S)) >> > + ] >> > + "TARGET_HAVE_MVE" >> > + "vmovnb.i%#<V_sz_elem> %q0, %q1" >> > + [(set_attr "type" "mve_move") >> > +]) >> > + >> > […] >> > diff --git a/gcc/config/arm/vec-common.md b/gcc/config/arm/vec-common.md >> > index 430a92ce966..3afb3e1d891 100644 >> > --- a/gcc/config/arm/vec-common.md >> > +++ b/gcc/config/arm/vec-common.md >> > @@ -632,3 +632,91 @@ (define_expand "clz<mode>2" >> > "ARM_HAVE_<MODE>_ARITH >> > && !TARGET_REALLY_IWMMXT" >> > ) >> > + >> > +;; vmovl[tb] are not available for V4SI on MVE >> > +(define_expand "vec_unpack<US>_hi_<mode>" >> > + [(match_operand:<V_unpack> 0 "register_operand") >> > + (SE:<V_unpack> (match_operand:VU 1 "register_operand"))] >> > + "ARM_HAVE_<MODE>_ARITH >> > + && !TARGET_REALLY_IWMMXT >> > + && ! (<MODE>mode == V4SImode && TARGET_HAVE_MVE) >> > + && !BYTES_BIG_ENDIAN" >> > + { >> > + rtvec v = rtvec_alloc (<V_mode_nunits>/2); >> > + rtx t1; >> > + int i; >> > + for (i = 0; i < (<V_mode_nunits>/2); i++) >> > + RTVEC_ELT (v, i) = GEN_INT ((<V_mode_nunits>/2) + i); >> > + >> > + t1 = gen_rtx_PARALLEL (<MODE>mode, v); >> > + >> > + if (TARGET_NEON) >> > + emit_insn (gen_neon_vec_unpack<US>_hi_<mode> (operands[0], >> > + operands[1], >> > + t1)); >> > + else >> > + emit_insn (gen_mve_vec_unpack<US>_hi_<mode> (operands[0], >> > + operands[1], >> > + t1)); >> > + DONE; >> > + } >> > +) >> >> Since the patterns are the same for both MVE and Neon (a good thing!), >> it would be simpler to write this as: >> >> (define_insn "mve_vec_unpack<US>_lo_<mode>" > I guess you mean define_expand?
Yes, sorry. >> [(set (match_operand:<V_unpack> 0 "register_operand" "=w") >> (SE:<V_unpack> (vec_select:<V_HALF> >> (match_operand:VU 1 "register_operand" "w") >> (match_dup 2))))] >> … >> >> and then set operands[2] to what is t1 above. There would then be >> no need to call emit_insn & DONE, and we could use the natural >> iterator (MVE_3) for the MVE patterns. >> >> Same idea for the lo version. >> >> > […] >> > +;; vmovn[tb] are not available for V2DI on MVE >> > +(define_expand "vec_pack_trunc_<mode>" >> > + [(set (match_operand:<V_narrow_pack> 0 "register_operand") >> > + (vec_concat:<V_narrow_pack> >> > + (truncate:<V_narrow> >> > + (match_operand:VN 1 "register_operand")) >> > + (truncate:<V_narrow> >> > + (match_operand:VN 2 "register_operand"))))] >> > + "ARM_HAVE_<MODE>_ARITH >> > + && !TARGET_REALLY_IWMMXT >> > + && ! (<MODE>mode == V2DImode && TARGET_HAVE_MVE) >> > + && !BYTES_BIG_ENDIAN" >> > + { >> > + if (TARGET_NEON) >> > + { >> > + emit_insn (gen_neon_quad_vec_pack_trunc_<mode> (operands[0], >> > operands[1], >> > + operands[2])); >> > + } >> > + else >> > + { >> > + rtx tmpreg = gen_reg_rtx (<V_narrow_pack>mode); >> > + emit_insn (gen_mve_vec_pack_trunc_lo (<MODE>mode, tmpreg, >> > operands[1])); >> > + emit_insn (gen_mve_vmovntq (VMOVNTQ_S, <MODE>mode, >> > + tmpreg, tmpreg, operands[2])); >> > + emit_move_insn (operands[0], tmpreg); >> >> vmovntq can assign directly to operands[0], without a separate move. >> >> OK with those changes, thanks. > > Just to be sure, here is v3 which hopefully matches what you meant. > diff --git a/gcc/config/arm/vec-common.md b/gcc/config/arm/vec-common.md > index 430a92ce966..1d6d4379855 100644 > --- a/gcc/config/arm/vec-common.md > +++ b/gcc/config/arm/vec-common.md > @@ -632,3 +632,78 @@ (define_expand "clz<mode>2" > "ARM_HAVE_<MODE>_ARITH > && !TARGET_REALLY_IWMMXT" > ) > + > +;; vmovl[tb] are not available for V4SI on MVE > +(define_expand "vec_unpack<US>_hi_<mode>" > + [(set (match_operand:<V_unpack> 0 "register_operand") > + (SE:<V_unpack> (vec_select:<V_HALF> > + (match_operand:VU 1 "register_operand") > + (match_dup 2))))] The formatting of the vec_unpack<US>_lo_<mode> pattern looks nicer. > + "ARM_HAVE_<MODE>_ARITH > + && !TARGET_REALLY_IWMMXT > + && ! (<MODE>mode == V4SImode && TARGET_HAVE_MVE) > + && !BYTES_BIG_ENDIAN" > + { > + rtvec v = rtvec_alloc (<V_mode_nunits>/2); > + rtx t1; > + int i; > + for (i = 0; i < (<V_mode_nunits>/2); i++) > + RTVEC_ELT (v, i) = GEN_INT ((<V_mode_nunits>/2) + i); > + > + t1 = gen_rtx_PARALLEL (<MODE>mode, v); > + > + operands[2] = t1; t1 doesn't seem worth it now, might as well assign directly to operands[2]. > […] > + emit_insn (gen_mve_vmovntq (VMOVNTQ_S, <MODE>mode, > + operands[0], tmpreg, operands[2])); The indentation looks odd here (operands[0] should line up with VMOVNTQ_S), but that might be a mailer issue. > + } > + DONE; OK with those changes, thanks. Richard