On November 12, 2021 6:53:04 PM GMT+01:00, Richard Sandiford via Gcc-patches 
<gcc-patches@gcc.gnu.org> wrote:
>vect_check_gather_scatter had a binary “does this target support
>internal gather/scatter functions” test.  This dates from the time when
>we only handled gathers and scatters via direct target support, with
>x86_64 using built-in functions and aarch64 using IFNs.  But now that we
>can emulate gathers, we need to check whether the gather for a particular
>mode is going to be emulated or not.
>
>Without this, enabling SVE regresses emulated Advanced SIMD gather
>sequences in cases where SVE isn't used.
>
>Livermore kernel 15 can now be vectorised with Advanced SIMD when
>SVE is enabled.
>
>Regstrapped on aarch64-linux-gnu and x86_64-linux-gnu.  OK to install?

Ok. 

Richard. 

>Richard
>
>
>gcc/
>       * genopinit.c (main): Turn supports_vec_gather_load and
>       supports_vec_scatter_store into signed char arrays and remove
>       supports_vec_gather_load_cached and supports_vec_scatter_store_cached.
>       * optabs-query.c (supports_vec_convert_optab_p): Add a mode parameter.
>       If the mode is not VOIDmode, test only for that mode.
>       (supports_vec_gather_load_p): Likewise.
>       (supports_vec_scatter_store_p): Likewise.
>       * optabs-query.h (supports_vec_gather_load_p): Likewise.
>       (supports_vec_scatter_store_p): Likewise.
>       * tree-vect-data-refs.c (vect_check_gather_scatter): Pass the
>       vector mode to supports_vec_gather_load_p and
>       supports_vec_scatter_store_p.
>
>gcc/testsuite/
>       * gfortran.dg/vect/vect-8.f90: Bump number of vectorized loops
>       to 25 for SVE.
>       * gcc.target/aarch64/sve/gather_load_10.c: New test.
>---
> gcc/genopinit.c                               | 11 ++--
> gcc/optabs-query.c                            | 55 +++++++++----------
> gcc/optabs-query.h                            |  4 +-
> .../gcc.target/aarch64/sve/gather_load_10.c   | 18 ++++++
> gcc/testsuite/gfortran.dg/vect/vect-8.f90     |  3 +-
> gcc/tree-vect-data-refs.c                     |  4 +-
> 6 files changed, 56 insertions(+), 39 deletions(-)
> create mode 100644 gcc/testsuite/gcc.target/aarch64/sve/gather_load_10.c
>
>diff --git a/gcc/genopinit.c b/gcc/genopinit.c
>index 195ddf74fa2..c6be748079d 100644
>--- a/gcc/genopinit.c
>+++ b/gcc/genopinit.c
>@@ -313,12 +313,11 @@ main (int argc, const char **argv)
>          "  /* Patterns that are used by optabs that are enabled for this 
> target.  */\n"
>          "  bool pat_enable[NUM_OPTAB_PATTERNS];\n"
>          "\n"
>-         "  /* Cache if the target supports vec_gather_load for at least one 
>vector\n"
>-         "     mode.  */\n"
>-         "  bool supports_vec_gather_load;\n"
>-         "  bool supports_vec_gather_load_cached;\n"
>-         "  bool supports_vec_scatter_store;\n"
>-         "  bool supports_vec_scatter_store_cached;\n"
>+         "  /* Index VOIDmode caches if the target supports vec_gather_load 
>for any\n"
>+         "     vector mode.  Every other index X caches specifically for mode 
>X.\n"
>+         "     1 means yes, -1 means no.  */\n"
>+         "  signed char supports_vec_gather_load[NUM_MACHINE_MODES];\n"
>+         "  signed char supports_vec_scatter_store[NUM_MACHINE_MODES];\n"
>          "};\n"
>          "extern void init_all_optabs (struct target_optabs *);\n"
>          "\n"
>diff --git a/gcc/optabs-query.c b/gcc/optabs-query.c
>index a6dd0fed610..1c0778cba55 100644
>--- a/gcc/optabs-query.c
>+++ b/gcc/optabs-query.c
>@@ -712,13 +712,16 @@ lshift_cheap_p (bool speed_p)
>   return cheap[speed_p];
> }
> 
>-/* Return true if vector conversion optab OP supports at least one mode,
>-   given that the second mode is always an integer vector.  */
>+/* If MODE is not VOIDmode, return true if vector conversion optab OP supports
>+   that mode, given that the second mode is always an integer vector.
>+   If MODE is VOIDmode, return true if OP supports any vector mode.  */
> 
> static bool
>-supports_vec_convert_optab_p (optab op)
>+supports_vec_convert_optab_p (optab op, machine_mode mode)
> {
>-  for (int i = 0; i < NUM_MACHINE_MODES; ++i)
>+  int start = mode == VOIDmode ? 0 : mode;
>+  int end = mode == VOIDmode ? MAX_MACHINE_MODE : mode;
>+  for (int i = start; i <= end; ++i)
>     if (VECTOR_MODE_P ((machine_mode) i))
>       for (int j = MIN_MODE_VECTOR_INT; j < MAX_MODE_VECTOR_INT; ++j)
>       if (convert_optab_handler (op, (machine_mode) i,
>@@ -728,39 +731,35 @@ supports_vec_convert_optab_p (optab op)
>   return false;
> }
> 
>-/* Return true if vec_gather_load is available for at least one vector
>-   mode.  */
>+/* If MODE is not VOIDmode, return true if vec_gather_load is available for
>+   that mode.  If MODE is VOIDmode, return true if gather_load is available
>+   for at least one vector mode.  */
> 
> bool
>-supports_vec_gather_load_p ()
>+supports_vec_gather_load_p (machine_mode mode)
> {
>-  if (this_fn_optabs->supports_vec_gather_load_cached)
>-    return this_fn_optabs->supports_vec_gather_load;
>+  if (!this_fn_optabs->supports_vec_gather_load[mode])
>+    this_fn_optabs->supports_vec_gather_load[mode]
>+      = (supports_vec_convert_optab_p (gather_load_optab, mode)
>+       || supports_vec_convert_optab_p (mask_gather_load_optab, mode)
>+       ? 1 : -1);
> 
>-  this_fn_optabs->supports_vec_gather_load_cached = true;
>-
>-  this_fn_optabs->supports_vec_gather_load
>-    = (supports_vec_convert_optab_p (gather_load_optab)
>-       || supports_vec_convert_optab_p (mask_gather_load_optab));
>-
>-  return this_fn_optabs->supports_vec_gather_load;
>+  return this_fn_optabs->supports_vec_gather_load[mode] > 0;
> }
> 
>-/* Return true if vec_scatter_store is available for at least one vector
>-   mode.  */
>+/* If MODE is not VOIDmode, return true if vec_scatter_store is available for
>+   that mode.  If MODE is VOIDmode, return true if scatter_store is available
>+   for at least one vector mode.  */
> 
> bool
>-supports_vec_scatter_store_p ()
>+supports_vec_scatter_store_p (machine_mode mode)
> {
>-  if (this_fn_optabs->supports_vec_scatter_store_cached)
>-    return this_fn_optabs->supports_vec_scatter_store;
>-
>-  this_fn_optabs->supports_vec_scatter_store_cached = true;
>-
>-  this_fn_optabs->supports_vec_scatter_store
>-    = (supports_vec_convert_optab_p (scatter_store_optab)
>-       || supports_vec_convert_optab_p (mask_scatter_store_optab));
>+  if (!this_fn_optabs->supports_vec_scatter_store[mode])
>+    this_fn_optabs->supports_vec_scatter_store[mode]
>+      = (supports_vec_convert_optab_p (scatter_store_optab, mode)
>+       || supports_vec_convert_optab_p (mask_scatter_store_optab, mode)
>+       ? 1 : -1);
> 
>-  return this_fn_optabs->supports_vec_scatter_store;
>+  return this_fn_optabs->supports_vec_scatter_store[mode] > 0;
> }
> 
>diff --git a/gcc/optabs-query.h b/gcc/optabs-query.h
>index 876a3a6f348..6b24eecba46 100644
>--- a/gcc/optabs-query.h
>+++ b/gcc/optabs-query.h
>@@ -193,8 +193,8 @@ bool can_compare_and_swap_p (machine_mode, bool);
> bool can_atomic_exchange_p (machine_mode, bool);
> bool can_atomic_load_p (machine_mode);
> bool lshift_cheap_p (bool);
>-bool supports_vec_gather_load_p ();
>-bool supports_vec_scatter_store_p ();
>+bool supports_vec_gather_load_p (machine_mode = E_VOIDmode);
>+bool supports_vec_scatter_store_p (machine_mode = E_VOIDmode);
> 
> /* Version of find_widening_optab_handler_and_mode that operates on
>    specific mode types.  */
>diff --git a/gcc/testsuite/gcc.target/aarch64/sve/gather_load_10.c 
>b/gcc/testsuite/gcc.target/aarch64/sve/gather_load_10.c
>new file mode 100644
>index 00000000000..2a07c0be866
>--- /dev/null
>+++ b/gcc/testsuite/gcc.target/aarch64/sve/gather_load_10.c
>@@ -0,0 +1,18 @@
>+/* { dg-do compile } */
>+/* { dg-options "-O2 -fopenmp-simd -msve-vector-bits=256 
>-fno-vect-cost-model" } */
>+
>+#include <stdint.h>
>+
>+void
>+foo (uint64_t *restrict x, uint64_t *restrict y, uint64_t *restrict index)
>+{
>+#pragma omp for simd simdlen(2)
>+  for (int i = 0; i < 128; ++i)
>+    x[i] += y[index[i]];
>+}
>+
>+/* { dg-final { scan-assembler-times {\tldr\td[0-9]+, \[x[0-9]+, x[0-9]+, lsl 
>#?3\]} 2 } } */
>+/* { dg-final { scan-assembler-not {\tshl\tv[0-9]+\.2d,} } } */
>+/* { dg-final { scan-assembler-not {\tumov\t} } } */
>+/* { dg-final { scan-assembler {\tadd\tv[0-9]+\.2d,} } } */
>+/* { dg-final { scan-assembler {\tstr\tq[0-9]+,} } } */
>diff --git a/gcc/testsuite/gfortran.dg/vect/vect-8.f90 
>b/gcc/testsuite/gfortran.dg/vect/vect-8.f90
>index c8a7d896bac..ca72ddcffca 100644
>--- a/gcc/testsuite/gfortran.dg/vect/vect-8.f90
>+++ b/gcc/testsuite/gfortran.dg/vect/vect-8.f90
>@@ -704,6 +704,7 @@ CALL track('KERNEL  ')
> RETURN
> END SUBROUTINE kernel
> 
>-! { dg-final { scan-tree-dump-times "vectorized 24 loops" 1 "vect" { target 
>aarch64*-*-* } } }
>+! { dg-final { scan-tree-dump-times "vectorized 25 loops" 1 "vect" { target 
>aarch64_sve } } }
>+! { dg-final { scan-tree-dump-times "vectorized 24 loops" 1 "vect" { target { 
>aarch64*-*-* && { ! aarch64_sve } } } } }
> ! { dg-final { scan-tree-dump-times "vectorized 2\[234\] loops" 1 "vect" { 
> target { vect_intdouble_cvt && { ! aarch64*-*-* } } } } }
> ! { dg-final { scan-tree-dump-times "vectorized 17 loops" 1 "vect" { target { 
> { ! vect_intdouble_cvt } && { ! aarch64*-*-* } } } } }
>diff --git a/gcc/tree-vect-data-refs.c b/gcc/tree-vect-data-refs.c
>index 2ea8e983fe6..f1d7f01a9ce 100644
>--- a/gcc/tree-vect-data-refs.c
>+++ b/gcc/tree-vect-data-refs.c
>@@ -3969,8 +3969,8 @@ vect_check_gather_scatter (stmt_vec_info stmt_info, 
>loop_vec_info loop_vinfo,
>   /* True if we should aim to use internal functions rather than
>      built-in functions.  */
>   bool use_ifn_p = (DR_IS_READ (dr)
>-                  ? supports_vec_gather_load_p ()
>-                  : supports_vec_scatter_store_p ());
>+                  ? supports_vec_gather_load_p (TYPE_MODE (vectype))
>+                  : supports_vec_scatter_store_p (TYPE_MODE (vectype)));
> 
>   base = DR_REF (dr);
>   /* For masked loads/stores, DR_REF (dr) is an artificial MEM_REF,

Reply via email to