On Tue, Jun 29, 2021 at 6:16 AM H.J. Lu <hjl.to...@gmail.com> wrote:
>
> Add vec_duplicate<mode> expander for SSE2 if we can move from GPR to SSE
> register directly.
>
>         * config/i386/i386-expand.c (ix86_expand_vector_init_duplicate):
>         Make it global.
>         * config/i386/i386-protos.h (ix86_expand_vector_init_duplicate):
>         New prototype.
>         * config/i386/sse.md (INT_BROADCAST_MODE): New mode iterator.
>         (vec_duplicate<mode>): New expander.
> ---
>  gcc/config/i386/i386-expand.c |  5 +----
>  gcc/config/i386/i386-protos.h |  2 ++
>  gcc/config/i386/sse.md        | 31 +++++++++++++++++++++++++++++++
>  3 files changed, 34 insertions(+), 4 deletions(-)
>
> diff --git a/gcc/config/i386/i386-expand.c b/gcc/config/i386/i386-expand.c
> index e0e3ed4d8a4..e04019c4b79 100644
> --- a/gcc/config/i386/i386-expand.c
> +++ b/gcc/config/i386/i386-expand.c
> @@ -93,9 +93,6 @@ along with GCC; see the file COPYING3.  If not see
>  #include "i386-builtins.h"
>  #include "i386-expand.h"
>
> -static bool ix86_expand_vector_init_duplicate (bool, machine_mode, rtx,
> -                                              rtx);
> -
>  /* Split one or more double-mode RTL references into pairs of half-mode
>     references.  The RTL can be REG, offsettable MEM, integer constant, or
>     CONST_DOUBLE.  "operands" is a pointer to an array of double-mode RTLs to
> @@ -13909,7 +13906,7 @@ static bool expand_vec_perm_1 (struct 
> expand_vec_perm_d *d);
>  /* A subroutine of ix86_expand_vector_init.  Store into TARGET a vector
>     with all elements equal to VAR.  Return true if successful.  */
>
> -static bool
> +bool
>  ix86_expand_vector_init_duplicate (bool mmx_ok, machine_mode mode,
>                                    rtx target, rtx val)
>  {
> diff --git a/gcc/config/i386/i386-protos.h b/gcc/config/i386/i386-protos.h
> index 71745b9a1ea..51376fcc454 100644
> --- a/gcc/config/i386/i386-protos.h
> +++ b/gcc/config/i386/i386-protos.h
> @@ -258,6 +258,8 @@ extern void ix86_expand_mul_widen_hilo (rtx, rtx, rtx, 
> bool, bool);
>  extern void ix86_expand_sse2_mulv4si3 (rtx, rtx, rtx);
>  extern void ix86_expand_sse2_mulvxdi3 (rtx, rtx, rtx);
>  extern void ix86_expand_sse2_abs (rtx, rtx);
> +extern bool ix86_expand_vector_init_duplicate (bool, machine_mode, rtx,
> +                                              rtx);
>
>  /* In i386-c.c  */
>  extern void ix86_target_macros (void);
> diff --git a/gcc/config/i386/sse.md b/gcc/config/i386/sse.md
> index ffcc0c81964..5ededaedac7 100644
> --- a/gcc/config/i386/sse.md
> +++ b/gcc/config/i386/sse.md
> @@ -24814,3 +24814,34 @@ (define_insn "*aes<aeswideklvariant>u8"
>    "TARGET_WIDEKL"
>    "aes<aeswideklvariant>\t{%0}"
>    [(set_attr "type" "other")])
> +
> +;; Modes handled by broadcast patterns.  NB: Allow V64QI and V32HI with
> +;; TARGET_AVX512F since ix86_expand_vector_init_duplicate can expand
> +;; without TARGET_AVX512BW which is used by memset vector broadcast
> +;; expander to XI with:
> +;;     vmovd           %edi, %xmm15
> +;;     vpbroadcastb    %xmm15, %ymm15
> +;;     vinserti64x4    $0x1, %ymm15, %zmm15, %zmm15
> +
> +(define_mode_iterator INT_BROADCAST_MODE
> +  [(V64QI "TARGET_AVX512F") (V32QI "TARGET_AVX") V16QI
> +   (V32HI "TARGET_AVX512F") (V16HI "TARGET_AVX") V8HI
> +   (V16SI "TARGET_AVX512F") (V8SI "TARGET_AVX") V4SI
> +   (V8DI "TARGET_AVX512F && TARGET_64BIT")
> +   (V4DI "TARGET_AVX && TARGET_64BIT") (V2DI "TARGET_64BIT")])
> +
> +;; Broadcast from an integer.  NB: Enable broadcast only if we can move
> +;; from GPR to SSE register directly.
> +(define_expand "vec_duplicate<mode>"
> +  [(set (match_operand:INT_BROADCAST_MODE 0 "register_operand")
> +       (vec_duplicate:INT_BROADCAST_MODE
> +         (match_operand:<ssescalarmode> 1 "nonimmediate_operand")))]
> +  "TARGET_SSE2 && TARGET_INTER_UNIT_MOVES_TO_VEC"
Although we don't have broadcast instructions for sse2,
ix86_expand_vector_init_duplicate can still handle them by equivalent
instructions.
So the patch LGTM.
> +{
> +  if (!ix86_expand_vector_init_duplicate (false,
> +                                         GET_MODE (operands[0]),
> +                                         operands[0],
> +                                         operands[1]))
> +    gcc_unreachable ();
> +  DONE;
> +})
> --
> 2.31.1
>


-- 
BR,
Hongtao

Reply via email to