On Fri, Jun 26, 2015 at 06:10:00PM +0100, Jakub Jelinek wrote:
> On Fri, Jun 26, 2015 at 06:03:34PM +0100, James Greenhalgh wrote:
> > --- /dev/null
> > +++ b/gcc/testsuite/g++.dg/pr66119.C
>
> I think generally testcases shouldn't be added into g++.dg/ directly,
> but subdirectories. So g++.dg/opt/ ?
>
> > @@ -0,0 +1,69 @@
> > +/* PR66119 - MOVE_RATIO is not constant in a compiler run, so Scalar
> > + Reduction of Aggregates must ask the back-end more than once what
> > + the value of MOVE_RATIO now is. */
> > +
> > +/* { dg-do compile { target i?86-*-* x86_64-*-* } } */
>
> In g++.dg/, dejagnu cycles through all 3 major -std=c* versions,
> thus using -std=c++11 is inappropriate.
> If the test requires c++11, instead you do
> // { dg-do compile { target { { i?86-*-* x86_64-*-* } && c++11 } } }
>
> > +/* { dg-options "-std=c++11 -O3 -mavx -fdump-tree-sra -march=slm" { target
> > avx_runtime } } */
>
> and remove -std=c++11 here. I don't see any point in guarding it with
> avx_runtime, after all, if not avx_runtime, the test will be compiled with
> -O0 and thus very likely fail the scan-tree-dump test.
>
> As it is dg-do compile test only, you have no dependency on assembler nor
> linker nor runtime.
> But I'd add -mtune=slm too.
Thanks, I'm used to the dance we try to do to get Neon enabled/disabled
correctly when testing multilib environments on ARM so tried to
overengineer things!
I've updated the testcase as you suggested, and moved it to g++.dg/opt.
OK?
Thanks,
James
---
gcc/
2015-06-30 James Greenhalgh <[email protected]>
PR tree-optimization/66119
* toplev.c (process_options): Don't set up default values for
the sra_max_scalarization_size_{speed,size} parameters.
* tree-sra (analyze_all_variable_accesses): If no values
have been set for the sra_max_scalarization_size_{speed,size}
parameters, call get_move_ratio to get target defaults.
gcc/testsuite/
2015-06-30 James Greenhalgh <[email protected]>
* g++.dg/opt/pr66119.C: New.
diff --git a/gcc/testsuite/g++.dg/opt/pr66119.C b/gcc/testsuite/g++.dg/opt/pr66119.C
new file mode 100644
index 0000000..5b420c2
--- /dev/null
+++ b/gcc/testsuite/g++.dg/opt/pr66119.C
@@ -0,0 +1,69 @@
+/* PR66119 - MOVE_RATIO is not constant in a compiler run, so Scalar
+ Reduction of Aggregates must ask the back-end more than once what
+ the value of MOVE_RATIO now is. */
+
+/* { dg-do compile { target { { i?86-*-* x86_64-*-* } && c++11 } } } */
+/* { dg-options "-O3 -mavx -fdump-tree-sra -march=slm -mtune=slm" } */
+
+#include <immintrin.h>
+
+class MyAVX
+{
+ __m256d data;
+public:
+ MyAVX () = default;
+ MyAVX (const MyAVX &) = default;
+ MyAVX (__m256d _data) : data(_data) { ; }
+
+ MyAVX & operator= (const MyAVX &) = default;
+
+ operator __m256d () const { return data; }
+ MyAVX operator+ (MyAVX s2) { return data+s2.data; }
+};
+
+template <typename T> class AVX_trait { ; };
+
+template <> class AVX_trait<double> {
+public:
+ typedef __m256d TSIMD;
+};
+
+
+template <typename T>
+class MyTSIMD
+{
+ typename AVX_trait<T>::TSIMD data;
+
+public:
+ MyTSIMD () = default;
+ MyTSIMD (const MyTSIMD &) = default;
+ // MyTSIMD (const MyTSIMD & s2) : data(s2.data) { ; }
+ MyTSIMD (typename AVX_trait<T>::TSIMD _data) : data(_data) { ; }
+
+ operator typename AVX_trait<T>::TSIMD() const { return data; }
+ MyTSIMD operator+ (MyTSIMD s2) { return data+s2.data; }
+};
+
+// using MyVec = MyAVX;
+using MyVec = MyTSIMD<double>;
+
+class Vec2
+{
+ MyVec a, b;
+public:
+ Vec2 (MyVec aa, MyVec ab) : a(aa), b(ab) { ; }
+ Vec2 operator+ (Vec2 v2) { return Vec2(a+v2.a, b+v2.b); }
+};
+
+inline __attribute__ ((__always_inline__))
+Vec2 ComputeSomething (Vec2 a, Vec2 b)
+{
+ return a+b;
+}
+
+Vec2 TestFunction (Vec2 a, Vec2 b)
+{
+ return ComputeSomething (a,b);
+}
+
+/* { dg-final { scan-tree-dump "Created a replacement for b" "sra" } } */
diff --git a/gcc/toplev.c b/gcc/toplev.c
index 573b144..d97d852 100644
--- a/gcc/toplev.c
+++ b/gcc/toplev.c
@@ -1299,20 +1299,6 @@ process_options (void)
so we can correctly initialize debug output. */
no_backend = lang_hooks.post_options (&main_input_filename);
- /* Set default values for parameters relation to the Scalar Reduction
- of Aggregates passes (SRA and IP-SRA). We must do this here, rather
- than in opts.c:default_options_optimization as historically these
- tuning heuristics have been based on MOVE_RATIO, which on some
- targets requires other symbols from the backend. */
- maybe_set_param_value
- (PARAM_SRA_MAX_SCALARIZATION_SIZE_SPEED,
- get_move_ratio (true) * UNITS_PER_WORD,
- global_options.x_param_values, global_options_set.x_param_values);
- maybe_set_param_value
- (PARAM_SRA_MAX_SCALARIZATION_SIZE_SIZE,
- get_move_ratio (false) * UNITS_PER_WORD,
- global_options.x_param_values, global_options_set.x_param_values);
-
/* Some machines may reject certain combinations of options. */
targetm.target_option.override ();
diff --git a/gcc/tree-sra.c b/gcc/tree-sra.c
index 7f242f7..e648061 100644
--- a/gcc/tree-sra.c
+++ b/gcc/tree-sra.c
@@ -2545,11 +2545,20 @@ analyze_all_variable_accesses (void)
bitmap tmp = BITMAP_ALLOC (NULL);
bitmap_iterator bi;
unsigned i;
- unsigned max_scalarization_size
- = (optimize_function_for_size_p (cfun)
- ? PARAM_VALUE (PARAM_SRA_MAX_SCALARIZATION_SIZE_SIZE)
- : PARAM_VALUE (PARAM_SRA_MAX_SCALARIZATION_SIZE_SPEED))
- * BITS_PER_UNIT;
+ bool optimize_speed_p = !optimize_function_for_size_p (cfun);
+
+ enum compiler_param param = optimize_speed_p
+ ? PARAM_SRA_MAX_SCALARIZATION_SIZE_SPEED
+ : PARAM_SRA_MAX_SCALARIZATION_SIZE_SIZE;
+
+ /* If the user didn't set PARAM_SRA_MAX_SCALARIZATION_SIZE_<...>,
+ fall back to a target default. */
+ unsigned HOST_WIDE_INT max_scalarization_size
+ = global_options_set.x_param_values[param]
+ ? PARAM_VALUE (param)
+ : get_move_ratio (optimize_speed_p) * UNITS_PER_WORD;
+
+ max_scalarization_size *= BITS_PER_UNIT;
EXECUTE_IF_SET_IN_BITMAP (candidate_bitmap, 0, i, bi)
if (bitmap_bit_p (should_scalarize_away_bitmap, i)