On Fri, Feb 18, 2022 at 10:01 PM Richard Biener via Gcc-patches
<gcc-patches@gcc.gnu.org> wrote:
>
> This uses the now passed SLP node to the vectorizer costing hook
> to adjust vector construction costs for the cost of moving an
> integer component from a GPR to a vector register when that's
> required for building a vector from components.  A cruical difference
> here is whether the component is loaded from memory or extracted
> from a vector register as in those cases no intermediate GPR is involved.
>
> The pr99881.c testcase can be Un-XFAILed with this patch, the
> pr91446.c testcase now produces scalar code which looks superior
> to me so I've adjusted it as well.
>
> I'm currently re-bootstrapping and testing on x86_64-unknown-linux-gnu
> after adding the BIT_FIELD_REF vector extracting special casing.
Does the patch handle PR101929?
>
> I suppose we can let autotesters look for SPEC performance fallout.
>
> OK if testing succeeds?
>
> Thanks,
> Richard.
>
> 2022-02-18  Richard Biener  <rguent...@suse.de>
>
>         PR tree-optimization/104582
>         PR target/99881
>         * config/i386/i386.cc (ix86_vector_costs::add_stmt_cost):
>         Cost GPR to vector register moves for integer vector construction.
>
>         * gcc.dg/vect/costmodel/x86_64/costmodel-pr104582-1.c: New.
>         * gcc.dg/vect/costmodel/x86_64/costmodel-pr104582-2.c: Likewise.
>         * gcc.dg/vect/costmodel/x86_64/costmodel-pr104582-3.c: Likewise.
>         * gcc.dg/vect/costmodel/x86_64/costmodel-pr104582-4.c: Likewise.
>         * gcc.target/i386/pr99881.c: Un-XFAIL.
>         * gcc.target/i386/pr91446.c: Adjust to not expect vectorization.
> ---
>  gcc/config/i386/i386.cc                       | 45 ++++++++++++++++++-
>  .../costmodel/x86_64/costmodel-pr104582-1.c   | 15 +++++++
>  .../costmodel/x86_64/costmodel-pr104582-2.c   | 13 ++++++
>  .../costmodel/x86_64/costmodel-pr104582-3.c   | 13 ++++++
>  .../costmodel/x86_64/costmodel-pr104582-4.c   | 15 +++++++
>  gcc/testsuite/gcc.target/i386/pr91446.c       |  2 +-
>  gcc/testsuite/gcc.target/i386/pr99881.c       |  2 +-
>  7 files changed, 102 insertions(+), 3 deletions(-)
>  create mode 100644 
> gcc/testsuite/gcc.dg/vect/costmodel/x86_64/costmodel-pr104582-1.c
>  create mode 100644 
> gcc/testsuite/gcc.dg/vect/costmodel/x86_64/costmodel-pr104582-2.c
>  create mode 100644 
> gcc/testsuite/gcc.dg/vect/costmodel/x86_64/costmodel-pr104582-3.c
>  create mode 100644 
> gcc/testsuite/gcc.dg/vect/costmodel/x86_64/costmodel-pr104582-4.c
>
> diff --git a/gcc/config/i386/i386.cc b/gcc/config/i386/i386.cc
> index 0830dbd7dca..b2bf90576d5 100644
> --- a/gcc/config/i386/i386.cc
> +++ b/gcc/config/i386/i386.cc
> @@ -22997,7 +22997,7 @@ ix86_vectorize_create_costs (vec_info *vinfo, bool 
> costing_for_scalar)
>
>  unsigned
>  ix86_vector_costs::add_stmt_cost (int count, vect_cost_for_stmt kind,
> -                                 stmt_vec_info stmt_info, slp_tree,
> +                                 stmt_vec_info stmt_info, slp_tree node,
>                                   tree vectype, int misalign,
>                                   vect_cost_model_location where)
>  {
> @@ -23160,6 +23160,49 @@ ix86_vector_costs::add_stmt_cost (int count, 
> vect_cost_for_stmt kind,
>        stmt_cost = ix86_builtin_vectorization_cost (kind, vectype, misalign);
>        stmt_cost *= (TYPE_VECTOR_SUBPARTS (vectype) + 1);
>      }
> +  else if (kind == vec_construct
> +          && node
> +          && SLP_TREE_DEF_TYPE (node) == vect_external_def
> +          && INTEGRAL_TYPE_P (TREE_TYPE (vectype)))
> +    {
> +      stmt_cost = ix86_builtin_vectorization_cost (kind, vectype, misalign);
> +      unsigned i;
> +      tree op;
> +      FOR_EACH_VEC_ELT (SLP_TREE_SCALAR_OPS (node), i, op)
> +       if (TREE_CODE (op) == SSA_NAME)
> +         TREE_VISITED (op) = 0;
> +      FOR_EACH_VEC_ELT (SLP_TREE_SCALAR_OPS (node), i, op)
> +       {
> +         if (TREE_CODE (op) != SSA_NAME
> +             || TREE_VISITED (op))
> +           continue;
> +         TREE_VISITED (op) = 1;
> +         gimple *def = SSA_NAME_DEF_STMT (op);
> +         tree tem;
> +         if (is_gimple_assign (def)
> +             && CONVERT_EXPR_CODE_P (gimple_assign_rhs_code (def))
> +             && ((tem = gimple_assign_rhs1 (def)), true)
> +             && TREE_CODE (tem) == SSA_NAME
> +             /* A sign-change expands to nothing.  */
> +             && tree_nop_conversion_p (TREE_TYPE (gimple_assign_lhs (def)),
> +                                       TREE_TYPE (tem)))
> +           def = SSA_NAME_DEF_STMT (tem);
> +         /* When the component is loaded from memory we can directly
> +            move it to a vector register, otherwise we have to go
> +            via a GPR or via vpinsr which involves similar cost.
> +            Likewise with a BIT_FIELD_REF extracting from a vector
> +            register we can hope to avoid using a GPR.  */
> +         if (!is_gimple_assign (def)
> +             || (!gimple_assign_load_p (def)
> +                 && (gimple_assign_rhs_code (def) != BIT_FIELD_REF
> +                     || !VECTOR_TYPE_P (TREE_TYPE
> +                               (TREE_OPERAND (gimple_assign_rhs1 (def), 
> 0))))))
> +           stmt_cost += ix86_cost->sse_to_integer;
> +       }
> +      FOR_EACH_VEC_ELT (SLP_TREE_SCALAR_OPS (node), i, op)
> +       if (TREE_CODE (op) == SSA_NAME)
> +         TREE_VISITED (op) = 0;
> +    }
>    if (stmt_cost == -1)
>      stmt_cost = ix86_builtin_vectorization_cost (kind, vectype, misalign);
>
> diff --git 
> a/gcc/testsuite/gcc.dg/vect/costmodel/x86_64/costmodel-pr104582-1.c 
> b/gcc/testsuite/gcc.dg/vect/costmodel/x86_64/costmodel-pr104582-1.c
> new file mode 100644
> index 00000000000..992a845ad7a
> --- /dev/null
> +++ b/gcc/testsuite/gcc.dg/vect/costmodel/x86_64/costmodel-pr104582-1.c
> @@ -0,0 +1,15 @@
> +/* { dg-do compile } */
> +/* { dg-additional-options "-msse -fdump-tree-slp2-details" } */
> +
> +struct S { unsigned long a, b; } s;
> +
> +void
> +foo (unsigned long *a, unsigned long *b)
> +{
> +  unsigned long a_ = *a;
> +  unsigned long b_ = *b;
> +  s.a = a_;
> +  s.b = b_;
> +}
> +
> +/* { dg-final { scan-tree-dump "basic block part vectorized" "slp2" } } */
> diff --git 
> a/gcc/testsuite/gcc.dg/vect/costmodel/x86_64/costmodel-pr104582-2.c 
> b/gcc/testsuite/gcc.dg/vect/costmodel/x86_64/costmodel-pr104582-2.c
> new file mode 100644
> index 00000000000..7637cdb4a97
> --- /dev/null
> +++ b/gcc/testsuite/gcc.dg/vect/costmodel/x86_64/costmodel-pr104582-2.c
> @@ -0,0 +1,13 @@
> +/* { dg-do compile } */
> +/* { dg-additional-options "-msse -fdump-tree-slp2-details" } */
> +
> +struct S { unsigned long a, b; } s;
> +
> +void
> +foo (unsigned long a, unsigned long b)
> +{
> +  s.a = a;
> +  s.b = b;
> +}
> +
> +/* { dg-final { scan-tree-dump-not "basic block part vectorized" "slp2" } } 
> */
> diff --git 
> a/gcc/testsuite/gcc.dg/vect/costmodel/x86_64/costmodel-pr104582-3.c 
> b/gcc/testsuite/gcc.dg/vect/costmodel/x86_64/costmodel-pr104582-3.c
> new file mode 100644
> index 00000000000..999c4905708
> --- /dev/null
> +++ b/gcc/testsuite/gcc.dg/vect/costmodel/x86_64/costmodel-pr104582-3.c
> @@ -0,0 +1,13 @@
> +/* { dg-do compile } */
> +/* { dg-additional-options "-msse -fdump-tree-slp2-details" } */
> +
> +struct S { double a, b; } s;
> +
> +void
> +foo (double a, double b)
> +{
> +  s.a = a;
> +  s.b = b;
> +}
> +
> +/* { dg-final { scan-tree-dump "basic block part vectorized" "slp2" } } */
> diff --git 
> a/gcc/testsuite/gcc.dg/vect/costmodel/x86_64/costmodel-pr104582-4.c 
> b/gcc/testsuite/gcc.dg/vect/costmodel/x86_64/costmodel-pr104582-4.c
> new file mode 100644
> index 00000000000..cc471e1ed73
> --- /dev/null
> +++ b/gcc/testsuite/gcc.dg/vect/costmodel/x86_64/costmodel-pr104582-4.c
> @@ -0,0 +1,15 @@
> +/* { dg-do compile } */
> +/* { dg-additional-options "-msse -fdump-tree-slp2-details" } */
> +
> +struct S { unsigned long a, b; } s;
> +
> +void
> +foo (signed long *a, unsigned long *b)
> +{
> +  unsigned long a_ = *a;
> +  unsigned long b_ = *b;
> +  s.a = a_;
> +  s.b = b_;
> +}
> +
> +/* { dg-final { scan-tree-dump "basic block part vectorized" "slp2" } } */
> diff --git a/gcc/testsuite/gcc.target/i386/pr91446.c 
> b/gcc/testsuite/gcc.target/i386/pr91446.c
> index 0243ca3ea68..067bf43f698 100644
> --- a/gcc/testsuite/gcc.target/i386/pr91446.c
> +++ b/gcc/testsuite/gcc.target/i386/pr91446.c
> @@ -21,4 +21,4 @@ foo (unsigned long long width, unsigned long long height,
>    bar (&t);
>  }
>
> -/* { dg-final { scan-assembler-times "vmovdqa\[^\n\r\]*xmm\[0-9\]" 2 } } */
> +/* { dg-final { scan-assembler-times "xmm\[0-9\]" 0 } } */
> diff --git a/gcc/testsuite/gcc.target/i386/pr99881.c 
> b/gcc/testsuite/gcc.target/i386/pr99881.c
> index 3e087eb2ed7..a1ec1d1ba8a 100644
> --- a/gcc/testsuite/gcc.target/i386/pr99881.c
> +++ b/gcc/testsuite/gcc.target/i386/pr99881.c
> @@ -1,7 +1,7 @@
>  /* PR target/99881.  */
>  /* { dg-do compile { target { ! ia32 } } } */
>  /* { dg-options "-Ofast -march=skylake" } */
> -/* { dg-final { scan-assembler-not "xmm\[0-9\]" { xfail *-*-* } } } */
> +/* { dg-final { scan-assembler-not "xmm\[0-9\]" } } */
>
>  void
>  foo (int* __restrict a, int n, int c)
> --
> 2.34.1



-- 
BR,
Hongtao

Reply via email to