Hi,
inspired by Power we also introduce -munroll-only-small-loops. This
implies activating -funroll-loops and -munroll-only-small-loops at -O2
and above.
Bootstrapped and regtested.
This introduces one regression in gcc.dg/sms-compare-debug-1.c but
currently dumps for sms are broken as well. The difference is in the
location of some INSN_DELETED notes so I would consider this a minor issue.
Is it OK?
Regards
Robin
gcc/ChangeLog:
* common/config/s390/s390-common.cc: Enable -funroll-loops and
-munroll-only-small-loops for OPT_LEVELS_2_PLUS_SPEED_ONLY.
* config/s390/s390.cc (s390_loop_unroll_adjust): Do not unroll
loops larger than 12 instructions.
(s390_override_options_after_change): Set unroll options.
(s390_option_override_internal): Likewise.
* config/s390/s390.opt: Document munroll-only-small-loops.
gcc/testsuite/ChangeLog:
* gcc.target/s390/vector/vec-copysign.c: Do not unroll.
* gcc.target/s390/zvector/autovec-double-quiet-uneq.c: Dito.
* gcc.target/s390/zvector/autovec-double-signaling-ltgt.c: Dito.
* gcc.target/s390/zvector/autovec-float-quiet-uneq.c: Dito.
* gcc.target/s390/zvector/autovec-float-signaling-ltgt.c: Dito.
---
gcc/common/config/s390/s390-common.cc | 5 +++
gcc/config/s390/s390.cc | 31 +++
gcc/config/s390/s390.opt | 4 +++
.../gcc.target/s390/vector/vec-copysign.c | 2 +-
.../s390/zvector/autovec-double-quiet-uneq.c | 2 +-
.../zvector/autovec-double-signaling-ltgt.c | 2 +-
.../s390/zvector/autovec-float-quiet-uneq.c | 2 +-
.../zvector/autovec-float-signaling-ltgt.c| 2 +-
8 files changed, 45 insertions(+), 5 deletions(-)
diff --git a/gcc/common/config/s390/s390-common.cc
b/gcc/common/config/s390/s390-common.cc
index 72a5ef47eaac..be3e6f201429 100644
--- a/gcc/common/config/s390/s390-common.cc
+++ b/gcc/common/config/s390/s390-common.cc
@@ -64,6 +64,11 @@ static const struct default_options
s390_option_optimization_table[] =
/* Enable -fsched-pressure by default when optimizing. */
{ OPT_LEVELS_1_PLUS, OPT_fsched_pressure, NULL, 1 },
+/* Enable -munroll-only-small-loops with -funroll-loops to unroll small
+ loops at -O2 and above by default. */
+{ OPT_LEVELS_2_PLUS_SPEED_ONLY, OPT_funroll_loops, NULL, 1 },
+{ OPT_LEVELS_2_PLUS_SPEED_ONLY, OPT_munroll_only_small_loops, NULL,
1 },
+
/* ??? There are apparently still problems with -fcaller-saves. */
{ OPT_LEVELS_ALL, OPT_fcaller_saves, NULL, 0 },
diff --git a/gcc/config/s390/s390.cc b/gcc/config/s390/s390.cc
index 5644600edf3d..ef38fbe68c84 100644
--- a/gcc/config/s390/s390.cc
+++ b/gcc/config/s390/s390.cc
@@ -15457,6 +15457,21 @@ s390_loop_unroll_adjust (unsigned nunroll,
struct loop *loop)
if (s390_tune < PROCESSOR_2097_Z10)
return nunroll;
+ if (unroll_only_small_loops)
+{
+ /* Only unroll loops smaller than or equal to 12 insns. */
+ const unsigned int small_threshold = 12;
+
+ if (loop->ninsns > small_threshold)
+ return 0;
+
+ /* ???: Make this dependent on the type of registers in
+the loop. Increase the limit for vector registers. */
+ const unsigned int max_insns = optimize >= 3 ? 36 : 24;
+
+ nunroll = MIN (nunroll, max_insns / loop->ninsns);
+}
+
/* Count the number of memory references within the loop body. */
bbs = get_loop_body (loop);
subrtx_iterator::array_type array;
@@ -15531,6 +15546,19 @@ static void
s390_override_options_after_change (void)
{
s390_default_align (_options);
+
+ /* Explicit -funroll-loops turns -munroll-only-small-loops off. */
+ if ((OPTION_SET_P (flag_unroll_loops) && flag_unroll_loops)
+ || (OPTION_SET_P (flag_unroll_all_loops)
+ && flag_unroll_all_loops))
+{
+ if (!OPTION_SET_P (unroll_only_small_loops))
+ unroll_only_small_loops = 0;
+ if (!OPTION_SET_P (flag_cunroll_grow_size))
+ flag_cunroll_grow_size = 1;
+}
+ else if (!OPTION_SET_P (flag_cunroll_grow_size))
+flag_cunroll_grow_size = flag_peel_loops || optimize >= 3;
}
static void
@@ -15740,6 +15768,9 @@ s390_option_override_internal (struct
gcc_options *opts,
/* Set the default alignment. */
s390_default_align (opts);
+ /* Set unroll options. */
+ s390_override_options_after_change ();
+
/* Call target specific restore function to do post-init work. At
the moment,
this just sets opts->x_s390_cost_pointer. */
s390_function_specific_restore (opts, opts_set, NULL);
diff --git a/gcc/config/s390/s390.opt b/gcc/config/s390/s390.opt
index 9e8d3bfd404c..c375b9c5f729 100644
--- a/gcc/config/s390/s390.opt
+++ b/gcc/config/s390/s390.opt
@@ -321,3 +321,7 @@ and the default behavior is to emit separate
multiplication and addition
instructions for long doubles in vector registers, because measurements
show
that this improves performance. This option allows