unpack patterns

Richard Sandiford via Gcc-patches Mon, 14 Jun 2021 09:01:35 -0700

Christophe Lyon <christophe.l...@linaro.org> writes:
> On Fri, 11 Jun 2021 at 18:25, Richard Sandiford
> <richard.sandif...@arm.com> wrote:
>>
>> Christophe Lyon <christophe.l...@linaro.org> writes:
>> > Thanks for the feedback. How about v2 attached?
>> > Do you want me to merge neon_vec_unpack<US> and
>> > mve_vec_unpack<US>  and only have different assembly?
>> > if (TARGET_HAVE_MVE)
>> >   return "vmovlb.<US>%#<V_sz_elem> %q0, %q1";
>> > else
>> >   return "vmovlb.<US>%#<V_sz_elem> %q0, %q1";
>>
>> I think it'd be better to keep them separate, given that they have
>> different attributes.
>>
>> > @@ -2199,10 +2222,23 @@ (define_insn "mve_vmovnbq_<supf><mode>"
>> >    [(set_attr "type" "mve_move")
>> >  ])
>> >
>> > +;; vmovnb pattern used by the vec_pack_trunc expander to avoid the
>> > +;; need for an extra register.
>>
>> “to avoid the need for an uninitailized input operand” might
>> be clearer.
>>
> Done
>
>> > +(define_insn "@mve_vec_pack_trunc_lo_<mode>"
>> > +  [
>> > +   (set (match_operand:<V_narrow_pack> 0 "s_register_operand" "=w")
>> > +     (unspec:<V_narrow_pack> [(match_operand:MVE_5 1 "s_register_operand" 
>> > "w")]
>> > +      VMOVNBQ_S))
>> > +  ]
>> > +  "TARGET_HAVE_MVE"
>> > +  "vmovnb.i%#<V_sz_elem>     %q0, %q1"
>> > +  [(set_attr "type" "mve_move")
>> > +])
>> > +
>> > […]
>> > diff --git a/gcc/config/arm/vec-common.md b/gcc/config/arm/vec-common.md
>> > index 430a92ce966..3afb3e1d891 100644
>> > --- a/gcc/config/arm/vec-common.md
>> > +++ b/gcc/config/arm/vec-common.md
>> > @@ -632,3 +632,91 @@ (define_expand "clz<mode>2"
>> >    "ARM_HAVE_<MODE>_ARITH
>> >     && !TARGET_REALLY_IWMMXT"
>> >  )
>> > +
>> > +;; vmovl[tb] are not available for V4SI on MVE
>> > +(define_expand "vec_unpack<US>_hi_<mode>"
>> > +  [(match_operand:<V_unpack> 0 "register_operand")
>> > +   (SE:<V_unpack> (match_operand:VU 1 "register_operand"))]
>> > + "ARM_HAVE_<MODE>_ARITH
>> > +  && !TARGET_REALLY_IWMMXT
>> > +  && ! (<MODE>mode == V4SImode && TARGET_HAVE_MVE)
>> > +  && !BYTES_BIG_ENDIAN"
>> > +  {
>> > +    rtvec v = rtvec_alloc (<V_mode_nunits>/2);
>> > +    rtx t1;
>> > +    int i;
>> > +    for (i = 0; i < (<V_mode_nunits>/2); i++)
>> > +      RTVEC_ELT (v, i) = GEN_INT ((<V_mode_nunits>/2) + i);
>> > +
>> > +    t1 = gen_rtx_PARALLEL (<MODE>mode, v);
>> > +
>> > +    if (TARGET_NEON)
>> > +      emit_insn (gen_neon_vec_unpack<US>_hi_<mode> (operands[0],
>> > +                                                 operands[1],
>> > +                                                 t1));
>> > +    else
>> > +      emit_insn (gen_mve_vec_unpack<US>_hi_<mode> (operands[0],
>> > +                                                operands[1],
>> > +                                                t1));
>> > +    DONE;
>> > +  }
>> > +)
>>
>> Since the patterns are the same for both MVE and Neon (a good thing!),
>> it would be simpler to write this as:
>>
>> (define_insn "mve_vec_unpack<US>_lo_<mode>"
> I guess you mean define_expand?


Yes, sorry.

>>   [(set (match_operand:<V_unpack> 0 "register_operand" "=w")
>>         (SE:<V_unpack> (vec_select:<V_HALF>
>>                           (match_operand:VU 1 "register_operand" "w")
>>                           (match_dup 2))))]
>>   …
>>
>> and then set operands[2] to what is t1 above.  There would then be
>> no need to call emit_insn & DONE, and we could use the natural
>> iterator (MVE_3) for the MVE patterns.
>>
>> Same idea for the lo version.
>>
>> > […]
>> > +;; vmovn[tb] are not available for V2DI on MVE
>> > +(define_expand "vec_pack_trunc_<mode>"
>> > + [(set (match_operand:<V_narrow_pack> 0 "register_operand")
>> > +       (vec_concat:<V_narrow_pack>
>> > +             (truncate:<V_narrow>
>> > +                     (match_operand:VN 1 "register_operand"))
>> > +             (truncate:<V_narrow>
>> > +                     (match_operand:VN 2 "register_operand"))))]
>> > + "ARM_HAVE_<MODE>_ARITH
>> > +  && !TARGET_REALLY_IWMMXT
>> > +  && ! (<MODE>mode == V2DImode && TARGET_HAVE_MVE)
>> > +  && !BYTES_BIG_ENDIAN"
>> > + {
>> > +   if (TARGET_NEON)
>> > +     {
>> > +       emit_insn (gen_neon_quad_vec_pack_trunc_<mode> (operands[0], 
>> > operands[1],
>> > +                                                    operands[2]));
>> > +     }
>> > +   else
>> > +     {
>> > +       rtx tmpreg = gen_reg_rtx (<V_narrow_pack>mode);
>> > +       emit_insn (gen_mve_vec_pack_trunc_lo (<MODE>mode, tmpreg, 
>> > operands[1]));
>> > +       emit_insn (gen_mve_vmovntq (VMOVNTQ_S, <MODE>mode,
>> > +                                        tmpreg, tmpreg, operands[2]));
>> > +       emit_move_insn (operands[0], tmpreg);
>>
>> vmovntq can assign directly to operands[0], without a separate move.
>>
>> OK with those changes, thanks.
>
> Just to be sure, here is v3 which hopefully matches what you meant.

> diff --git a/gcc/config/arm/vec-common.md b/gcc/config/arm/vec-common.md
> index 430a92ce966..1d6d4379855 100644
> --- a/gcc/config/arm/vec-common.md
> +++ b/gcc/config/arm/vec-common.md
> @@ -632,3 +632,78 @@ (define_expand "clz<mode>2"
>    "ARM_HAVE_<MODE>_ARITH
>     && !TARGET_REALLY_IWMMXT"
>  )
> +
> +;; vmovl[tb] are not available for V4SI on MVE
> +(define_expand "vec_unpack<US>_hi_<mode>"
> +  [(set (match_operand:<V_unpack> 0 "register_operand")
> +       (SE:<V_unpack> (vec_select:<V_HALF>
> +                             (match_operand:VU 1 "register_operand")
> +                     (match_dup 2))))]

The formatting of the vec_unpack<US>_lo_<mode> pattern looks nicer.

> + "ARM_HAVE_<MODE>_ARITH
> +  && !TARGET_REALLY_IWMMXT
> +  && ! (<MODE>mode == V4SImode && TARGET_HAVE_MVE)
> +  && !BYTES_BIG_ENDIAN"
> +  {
> +    rtvec v = rtvec_alloc (<V_mode_nunits>/2);
> +    rtx t1;
> +    int i;
> +    for (i = 0; i < (<V_mode_nunits>/2); i++)
> +      RTVEC_ELT (v, i) = GEN_INT ((<V_mode_nunits>/2) + i);
> +
> +    t1 = gen_rtx_PARALLEL (<MODE>mode, v);
> +
> +    operands[2] = t1;

t1 doesn't seem worth it now, might as well assign directly to
operands[2].

> […]
> +       emit_insn (gen_mve_vmovntq (VMOVNTQ_S, <MODE>mode,
> +                                        operands[0], tmpreg, operands[2]));

The indentation looks odd here (operands[0] should line up with
VMOVNTQ_S), but that might be a mailer issue.

> +     }
> +   DONE;


OK with those changes, thanks.

Richard

Re: [PATCH 2/2] arm: Auto-vectorization for MVE: add pack/unpack patterns

Reply via email to