[committed] OpenMP declare variant template handling

2019-11-04 Thread Jakub Jelinek
Hi!

The following patch handles declare variant in templates, including
instantiating the "omp declare variant base" attribute value.

Bootstrapped/regtested on x86_64-linux and i686-linux, committed to trunk.

2019-11-05  Jakub Jelinek  

* decl.c (omp_declare_variant_finalize_one): Call
declare_simd_adjust_this not just on the context, but also on the
variant-id expression for methods.  Don't call
cp_get_callee_fndecl_nofold, call cp_get_callee and only if it is
safe cp_get_fndecl_from_callee.  Don't try to print as %qD
NULL in diagnostics.
* pt.c (tsubst_attribute): Handle "omp declare variant base"
attribute.
(tsubst_function_decl): Call omp_declare_variant_finalize
if there are any "omp declare variant base" attributes left.

* g++.dg/gomp/declare-variant-7.C: New test.
* g++.dg/gomp/declare-variant-8.C: New test.

--- gcc/cp/decl.c.jj2019-11-02 10:00:59.646252496 +0100
+++ gcc/cp/decl.c   2019-11-04 19:05:24.292235486 +0100
@@ -7099,8 +7099,12 @@ static bool
 omp_declare_variant_finalize_one (tree decl, tree attr)
 {
   if (TREE_CODE (TREE_TYPE (decl)) == METHOD_TYPE)
-walk_tree (_VALUE (TREE_VALUE (attr)), declare_simd_adjust_this,
-  DECL_ARGUMENTS (decl), NULL);
+{
+  walk_tree (_VALUE (TREE_VALUE (attr)), declare_simd_adjust_this,
+DECL_ARGUMENTS (decl), NULL);
+  walk_tree (_PURPOSE (TREE_VALUE (attr)), declare_simd_adjust_this,
+DECL_ARGUMENTS (decl), NULL);
+}
 
   tree ctx = TREE_VALUE (TREE_VALUE (attr));
   tree simd = omp_get_context_selector (ctx, "construct", "simd");
@@ -7179,7 +7183,16 @@ omp_declare_variant_finalize_one (tree d
   if (variant == error_mark_node && !processing_template_decl)
 return true;
 
-  variant = cp_get_callee_fndecl_nofold (variant);
+  variant = cp_get_callee (variant);
+  if (variant)
+{
+  if (TREE_CODE (variant) == FUNCTION_DECL)
+   ;
+  else if (TREE_TYPE (variant) && INDIRECT_TYPE_P (TREE_TYPE (variant)))
+   variant = cp_get_fndecl_from_callee (variant, false);
+  else
+   variant = NULL_TREE;
+}
 
   input_location = save_loc;
 
@@ -7211,7 +7224,7 @@ omp_declare_variant_finalize_one (tree d
 }
   else if (!processing_template_decl)
 {
-  error_at (varid_loc, "could not find variant %qD declaration", variant);
+  error_at (varid_loc, "could not find variant declaration");
   return true;
 }
 
--- gcc/cp/pt.c.jj  2019-10-31 08:08:42.883786288 +0100
+++ gcc/cp/pt.c 2019-11-04 19:29:54.115165644 +0100
@@ -9,6 +9,76 @@ tsubst_attribute (tree t, tree *decl_p,
   else
val = NULL_TREE;
 }
+  else if (flag_openmp
+  && is_attribute_p ("omp declare variant base",
+ get_attribute_name (t)))
+{
+  ++cp_unevaluated_operand;
+  tree varid
+   = tsubst_expr (TREE_PURPOSE (val), args, complain,
+  in_decl, /*integral_constant_expression_p=*/false);
+  --cp_unevaluated_operand;
+  tree chain = TREE_CHAIN (val);
+  location_t match_loc = cp_expr_loc_or_input_loc (TREE_PURPOSE (chain));
+  tree ctx = copy_list (TREE_VALUE (val));
+  tree simd = get_identifier ("simd");
+  tree score = get_identifier (" score");
+  tree condition = get_identifier ("condition");
+  for (tree t1 = ctx; t1; t1 = TREE_CHAIN (t1))
+   {
+ const char *set = IDENTIFIER_POINTER (TREE_PURPOSE (t1));
+ TREE_VALUE (t1) = copy_list (TREE_VALUE (t1));
+ for (tree t2 = TREE_VALUE (t1); t2; t2 = TREE_CHAIN (t2))
+   {
+ if (TREE_PURPOSE (t2) == simd && set[0] == 'c')
+   {
+ tree clauses = TREE_VALUE (t2);
+ clauses = tsubst_omp_clauses (clauses,
+   C_ORT_OMP_DECLARE_SIMD, args,
+   complain, in_decl);
+ c_omp_declare_simd_clauses_to_decls (*decl_p, clauses);
+ clauses = finish_omp_clauses (clauses, 
C_ORT_OMP_DECLARE_SIMD);
+ TREE_VALUE (t2) = clauses;
+   }
+ else
+   {
+ TREE_VALUE (t2) = copy_list (TREE_VALUE (t2));
+ for (tree t3 = TREE_VALUE (t2); t3; t3 = TREE_CHAIN (t3))
+   if (TREE_VALUE (t3))
+ {
+   bool allow_string
+ = ((TREE_PURPOSE (t2) != condition || set[0] != 'u')
+&& TREE_PURPOSE (t3) != score);
+   if (TREE_CODE (t3) == STRING_CST && allow_string)
+ continue;
+   tree v = TREE_VALUE (t3);
+   v = tsubst_expr (v, args, complain, in_decl, true);
+   v = fold_non_dependent_expr (v);
+   if (!INTEGRAL_TYPE_P 

GCC 7 branch is frozen, all commits require release manager approval

2019-11-04 Thread Richard Biener


The GCC 7 branch is now frozen for preparation of a release candidate
for the final release from the branch, GCC 7.5.  All commits require
release manager approval from now on, ideally there will be no
further commits to the branch since the branch will be closed after
the GCC 7.5 release.

Thanks for your understanding.


Previous Report
===

https://gcc.gnu.org/ml/gcc/2019-10/msg00142.html


[PATCH] Reject VLAs in inline asm operands that require registers (PR inline-asm/92352)

2019-11-04 Thread Jakub Jelinek
Hi!

On VLAs with register only constraints we ICE, because during gimplification
we try to create temporaries for them and force_constant_size aborts in that
case.

The following patch diagnoses those early, like we diagnose already C++
non-PODs.

Bootstrapped/regtested on x86_64-linux and i686-linux, ok for trunk?

2019-11-05  Jakub Jelinek  

PR inline-asm/92352
* gimplify.c (gimplify_asm_expr): Reject VLA in output or input
operands with non-memory constraints.

* c-c++-common/pr92352.c: New test.

--- gcc/gimplify.c.jj   2019-11-02 10:00:59.595253274 +0100
+++ gcc/gimplify.c  2019-11-05 00:21:01.585958514 +0100
@@ -6235,8 +6235,14 @@ gimplify_asm_expr (tree *expr_p, gimple_
  is_inout = false;
}
 
-  /* If we can't make copies, we can only accept memory.  */
-  if (TREE_ADDRESSABLE (TREE_TYPE (TREE_VALUE (link
+  /* If we can't make copies, we can only accept memory.
+Similarly for VLAs.  */
+  tree outtype = TREE_TYPE (TREE_VALUE (link));
+  if (outtype != error_mark_node
+ && (TREE_ADDRESSABLE (outtype)
+ || !COMPLETE_TYPE_P (outtype)
+ || (!tree_fits_poly_uint64_p (TYPE_SIZE_UNIT (outtype))
+ && max_int_size_in_bytes (outtype
{
  if (allows_mem)
allows_reg = 0;
@@ -6392,7 +6398,12 @@ gimplify_asm_expr (tree *expr_p, gimple_
  oconstraints, _mem, _reg);
 
   /* If we can't make copies, we can only accept memory.  */
-  if (TREE_ADDRESSABLE (TREE_TYPE (TREE_VALUE (link
+  tree intype = TREE_TYPE (TREE_VALUE (link));
+  if (intype != error_mark_node
+ && (TREE_ADDRESSABLE (intype)
+ || !COMPLETE_TYPE_P (intype)
+ || (!tree_fits_poly_uint64_p (TYPE_SIZE_UNIT (intype))
+ && max_int_size_in_bytes (intype
{
  if (allows_mem)
allows_reg = 0;
--- gcc/testsuite/c-c++-common/pr92352.c.jj 2019-11-04 14:03:18.725275255 
+0100
+++ gcc/testsuite/c-c++-common/pr92352.c2019-11-04 14:02:55.211629675 
+0100
@@ -0,0 +1,15 @@
+/* PR inline-asm/92352 */
+
+void
+foo (int x)
+{
+  int var[x];
+  asm volatile ("" : "+r" (var));  /* { dg-error "impossible constraint in 
'asm'" } */
+}  /* { dg-error "non-memory output 0 must 
stay in memory" "" { target *-*-* } .-1 } */
+
+void
+bar (int x)
+{
+  int var[x];
+  asm volatile ("" : "+m" (var));
+}

Jakub



[PATCH] Fix compute_objsize ICE on VLA ARRAY_REF (PR tree-optimization/91945)

2019-11-04 Thread Jakub Jelinek
Hi!

As the testcase shows, ARRAY_REF on an array with variable length element
doesn't have INTEGER_CST TYPE_SIZE_UNIT which the code was assuming.
The following patch punts in that case.

Bootstrapped/regtested on x86_64-linux and i686-linux, ok for trunk?

2019-11-05  Jakub Jelinek  

PR tree-optimization/91945
* builtins.c (compute_objsize): For ARRAY_REF, only multiply off
by tpsize if it is both non-NULL and INTEGER_CST, otherwise punt.
Formatting fix.

* gfortran.dg/pr91945.f90: New test.

--- gcc/builtins.c.jj   2019-10-10 01:33:20.0 +0200
+++ gcc/builtins.c  2019-11-04 10:09:21.200301352 +0100
@@ -3626,7 +3626,7 @@ compute_objsize (tree dest, int ostype,
}
}
  else if (TREE_CODE (off) == SSA_NAME
- && INTEGRAL_TYPE_P (TREE_TYPE (off)))
+  && INTEGRAL_TYPE_P (TREE_TYPE (off)))
{
  wide_int min, max;
  enum value_range_kind rng = get_range_info (off, , );
@@ -3680,7 +3680,8 @@ compute_objsize (tree dest, int ostype,
  if (TREE_CODE (dest) == ARRAY_REF)
{
  tree eltype = TREE_TYPE (dest);
- if (tree tpsize = TYPE_SIZE_UNIT (eltype))
+ tree tpsize = TYPE_SIZE_UNIT (eltype);
+ if (tpsize && TREE_CODE (tpsize) == INTEGER_CST)
off = fold_build2 (MULT_EXPR, size_type_node, off, tpsize);
  else
return NULL_TREE;
--- gcc/testsuite/gfortran.dg/pr91945.f90.jj2019-11-04 10:13:40.392378534 
+0100
+++ gcc/testsuite/gfortran.dg/pr91945.f90   2019-11-04 10:13:21.272667903 
+0100
@@ -0,0 +1,5 @@
+! PR tree-optimization/91945
+! { dg-do compile }
+! { dg-options "-O3 -fstack-arrays -fno-guess-branch-probability" }
+
+include 'result_in_spec_1.f90'

Jakub



[C++ PATCH] Allow [[likely]] and [[unlikely]] in constexpr functions (PR c++/92343)

2019-11-04 Thread Jakub Jelinek
Hi!

When Martin Liska added PREDICT_EXPR to potential_constant_expression_1,
it was with goto in mind and in that case goto isn't a potential
constant expression, but when the {,un}likely attributes are used on other
statements, they are valid.

Fixed thusly, bootstrapped/regtested on x86_64-linux and i686-linux, ok for
trunk?

2019-11-05  Jakub Jelinek  

PR c++/92343
* constexpr.c (potential_constant_expression_1): Return true rather
than false for PREDICT_EXPR.

* g++.dg/cpp2a/attr-likely6.C: New test.

--- gcc/cp/constexpr.c.jj   2019-11-02 00:26:48.965846855 +0100
+++ gcc/cp/constexpr.c  2019-11-04 09:53:35.070621487 +0100
@@ -6493,6 +6493,7 @@ potential_constant_expression_1 (tree t,
 case LABEL_DECL:
 case LABEL_EXPR:
 case CASE_LABEL_EXPR:
+case PREDICT_EXPR:
 case CONST_DECL:
 case SIZEOF_EXPR:
 case ALIGNOF_EXPR:
@@ -7354,7 +7355,6 @@ potential_constant_expression_1 (tree t,
   return true;
 
 case EMPTY_CLASS_EXPR:
-case PREDICT_EXPR:
   return false;
 
 case GOTO_EXPR:
--- gcc/testsuite/g++.dg/cpp2a/attr-likely6.C.jj2019-11-04 
09:54:50.126485303 +0100
+++ gcc/testsuite/g++.dg/cpp2a/attr-likely6.C   2019-11-04 09:55:21.001017926 
+0100
@@ -0,0 +1,14 @@
+// PR c++/92343
+// { dg-do compile { target c++14 } }
+
+constexpr bool
+foo (bool x)
+{
+  if (x)
+[[unlikely]] return true;
+  else
+[[likely]] return false;
+}
+
+static_assert (foo (true), "");
+static_assert (!foo (false), "");

Jakub



Re: [PATCH][vect] Disable vectorization of epilogues for loops with SIMDUID set

2019-11-04 Thread Jakub Jelinek
On Tue, Nov 05, 2019 at 08:07:53AM +0100, Richard Biener wrote:
> > I was using loop->simdlen to detect whether it was a SIMD loop and I don't
> > believe that was correct, as can be witnessed by the mass failures in 
> > libgomp.
> > My apologies for not running this, didn't think of it!
> > 
> > I found that these were failing because we do not handle vectorization of
> > epilogues correctly when SIMDUID is set. For now Jakub and I agreed to 
> > disable
> > epilogue vectorization for loops where SIMDUID is set until we have fixed
> > this. See further comments inline.
> > 
> > I bootstrapped it on aarch64 and x86_64, ran libgomp on both.
> > 
> > This OK for trunk?
> 
> OK.  Can you remove the simdlen == 0 check as a followup?

Yeah, exactly, I wanted to ask what the point of the simdlen == 0 check is.
All a non-zero simdlen says is a user assertion that certain inter-loop
depencencies don't exist.

Jakub



Re: [PATCH V3] rs6000: Refine small loop unroll in loop_unroll_adjust hook

2019-11-04 Thread Richard Biener
On Mon, 4 Nov 2019, Segher Boessenkool wrote:

> Hi!
> 
> On Mon, Nov 04, 2019 at 02:31:43PM +0800, Jiufu Guo wrote:
> > In this patch, loop unroll adjust hook is introduced for powerpc.  We can do
> > target related hueristic adjustment in this hook. In this patch, small loops
> > is unrolled 2 times for O2 and O3 by default.  With this patch, we can see
> > some improvement for spec2017.  This patch enhanced a little for [Patch V2] 
> > to
> > enable small loops unroll for O3 by default like O2.
> 
> > * gcc/config/rs6000/rs6000.opt (unroll_small_loops): New internal flag.
> 
> That's the declaration of a variable.  A command line flag is something
> like -munroll-small-loops.  Do we want a command line option like that?
> It makes testing simpler.
> 
> > -  /* unroll very small loops 2 time if no -funroll-loops.  */
> > +  /* If funroll-loops is not enabled explicitly, then enable small 
> > loops
> > +unrolling for -O2, and do not turn fweb or frename-registers on.  */
> >if (!global_options_set.x_flag_unroll_loops
> >   && !global_options_set.x_flag_unroll_all_loops)
> > {
> > - maybe_set_param_value (PARAM_MAX_UNROLL_TIMES, 2,
> > -global_options.x_param_values,
> > -global_options_set.x_param_values);
> > -
> > - maybe_set_param_value (PARAM_MAX_UNROLLED_INSNS, 20,
> > -global_options.x_param_values,
> > -global_options_set.x_param_values);
> > + unroll_small_loops = optimize >= 2 ? 1 : 0;
> 
> That includes -Os?

It also re-introduces the exactly same issue as the --param with LTO.

> I think you shouldn't always set it to some value, only enable it where
> you want to enable it.  If you make a command line option for it this is
> especially simple (the table in common/config/rs6000/rs6000-common.c).
> 
> > +static unsigned
> > +rs6000_loop_unroll_adjust (unsigned nunroll, struct loop * loop)
> > +{
> > +  if (unroll_small_loops)
> > +{
> > +  /* TODO: This is hardcoded to 10 right now.  It can be refined, for
> > +example we may want to unroll very small loops more times (4 perhaps).
> > +We also should use a PARAM for this.  */
> > +  if (loop->ninsns <= 10)
> > +   return MIN (2, nunroll);
> > +  else
> > +   return 0;
> > +}
> 
> (Add an empty line here?)
> 
> > +  return nunroll;
> > +}
> 
> Great :-)
> 
> > @@ -23472,6 +23488,7 @@ rs6000_function_specific_save (struct 
> > cl_target_option *ptr,
> >  {
> >ptr->x_rs6000_isa_flags = opts->x_rs6000_isa_flags;
> >ptr->x_rs6000_isa_flags_explicit = opts->x_rs6000_isa_flags_explicit;
> > +  ptr->x_unroll_small_loops = opts->x_unroll_small_loops;
> >  }
> 
> Yeah we shouldn't need to add that, this should all be automatic.
> 
> 
> Segher
> 

-- 
Richard Biener 
SUSE Software Solutions Germany GmbH, Maxfeldstrasse 5, 90409 Nuernberg,
Germany; GF: Felix Imendörffer; HRB 36809 (AG Nuernberg)

Re: [PATCH][vect] Disable vectorization of epilogues for loops with SIMDUID set

2019-11-04 Thread Richard Biener
On Mon, 4 Nov 2019, Andre Vieira (lists) wrote:

> Hi,
> 
> I was using loop->simdlen to detect whether it was a SIMD loop and I don't
> believe that was correct, as can be witnessed by the mass failures in libgomp.
> My apologies for not running this, didn't think of it!
> 
> I found that these were failing because we do not handle vectorization of
> epilogues correctly when SIMDUID is set. For now Jakub and I agreed to disable
> epilogue vectorization for loops where SIMDUID is set until we have fixed
> this. See further comments inline.
> 
> I bootstrapped it on aarch64 and x86_64, ran libgomp on both.
> 
> This OK for trunk?

OK.  Can you remove the simdlen == 0 check as a followup?

Thanks,
Richard.

> Cheers,
> Andre
> 
> gcc/ChangeLog:
> 2019-11-04  Andre Vieira  
> 
> * tree-vect-loop.c (vect_analyze_loop): Disable epilogue
> vectorization if loop->simduid is non null.
> 
> On 31/10/2019 16:58, Jakub Jelinek wrote:
> 
> > FAIL: libgomp.c/../libgomp.c-c++-common/loop-1.c execution test
> > FAIL: libgomp.c/examples-4/simd-3.c execution test
> > FAIL: libgomp.c/pr58392.c execution test
> > FAIL: libgomp.c/scan-13.c execution test
> > FAIL: libgomp.c/scan-17.c execution test
> > FAIL: libgomp.c/scan-19.c execution test
> > FAIL: libgomp.c/scan-20.c execution test
> > FAIL: libgomp.c/simd-10.c execution test
> > FAIL: libgomp.c/simd-12.c execution test
> > FAIL: libgomp.c/simd-13.c execution test
> > FAIL: libgomp.c/simd-6.c execution test
> > FAIL: libgomp.c++/../libgomp.c-c++-common/loop-1.c execution test
> > FAIL: libgomp.c++/simd-8.C execution test
> > FAIL: libgomp.fortran/examples-4/simd-3.f90   -O1  execution test
> > FAIL: libgomp.fortran/examples-4/simd-3.f90   -O2  execution test
> > FAIL: libgomp.fortran/examples-4/simd-3.f90   -O3 -fomit-frame-pointer
> > FAIL: -funroll-loops -fpeel-loops -ftracer -finline-functions  execution
> > FAIL: test
> > FAIL: libgomp.fortran/examples-4/simd-3.f90   -O3 -g  execution test
> > FAIL: libgomp.fortran/examples-4/simd-3.f90   -Os  execution test
> > FAIL: libgomp.fortran/nestedfn5.f90   -O1  execution test
> > FAIL: libgomp.fortran/nestedfn5.f90   -O2  execution test
> > FAIL: libgomp.fortran/nestedfn5.f90   -O3 -fomit-frame-pointer
> > FAIL: -funroll-loops -fpeel-loops -ftracer -finline-functions  execution
> > FAIL: test
> > FAIL: libgomp.fortran/nestedfn5.f90   -O3 -g  execution test
> > FAIL: libgomp.fortran/nestedfn5.f90   -Os  execution test
> 
> These should go away now, but we should revisit SIMDUID and epilogue
> vectorization later.  I tried to look into it, but I am afraid I know very
> little about SIMD loops to figure out how to make this work.
> > 
> > On i686-linux, I also see newly
> > FAIL: gcc.dg/vect/vect-epilogues.c -flto -ffat-lto-objects  scan-tree-dump
> > FAIL: vect "LOOP EPILOGUE VECTORIZED"
> > FAIL: gcc.dg/vect/vect-epilogues.c scan-tree-dump vect "LOOP EPILOGUE
> > FAIL: VECTORIZED"
> > and in libgomp just
> 
> These, just like for arm should be skipped for i686, I suspect it doesn't know
> how to vectorize for lower VF's.  Could someone add the appropriate skip
> target triple for i686?
> 
> > FAIL: libgomp.c/examples-4/simd-3.c execution test
> > FAIL: libgomp.c/scan-13.c execution test
> > FAIL: libgomp.c/scan-17.c execution test
> > FAIL: libgomp.fortran/examples-4/simd-3.f90   -O1  execution test
> > FAIL: libgomp.fortran/examples-4/simd-3.f90   -O2  execution test
> > FAIL: libgomp.fortran/examples-4/simd-3.f90   -O3 -fomit-frame-pointer
> > FAIL: -funroll-loops -fpeel-loops -ftracer -finline-functions  execution
> > FAIL: test
> > FAIL: libgomp.fortran/examples-4/simd-3.f90   -O3 -g  execution test
> > FAIL: libgomp.fortran/examples-4/simd-3.f90   -Os  execution test
> 
> Same as the other libgomp tests. Should go away now.
> 
> 
> 

-- 
Richard Biener 
SUSE Software Solutions Germany GmbH, Maxfeldstrasse 5, 90409 Nuernberg,
Germany; GF: Felix Imendörffer; HRB 36809 (AG Nuernberg)

Re: [PATCH v2] PR92090: Fix testcase failures by r276469

2019-11-04 Thread Kewen.Lin
on 2019/11/5 上午6:57, Joseph Myers wrote:
> On Mon, 4 Nov 2019, luoxhu wrote:
> 
>> -finline-functions is enabled by default for O2 since r276469, update the
>> test cases with -fno-inline-functions.
>>
>> v2: disable inlining for the failed cases.  Add two more failed cases
>> not listed in BZ.  Tested on P8LE, P8BE and P9LE.
> 
> If inlining (or other interprocedural analysis) invalidates a test's 
> intent (e.g. all the code gets optimized away), our normal approach is to 
> use noinline etc. function attributes to prevent that inlining.
> 
> If you're adding such options to work around an ICE, which certainly 
> appears to be the case in the architecture-independent testcases here, you 
> should (a) have comments in the tests saying explicitly that the options 
> are there temporarily to work around the ICE in a bug whose number is 
> given in the comment, and (b) a remark in the open regression bug for the 
> ICE saying that those options have been added as a temporary workaround 
> and that a patch fixing the ICE should remove them again.  The commit 
> message also needs to make very clear that the commit is *not* a fix for 
> that bug and so it must *not* be closed as fixed until there is an actual 
> fix for the ICE.
> 

Hi Joseph,

Very good point!  Since gcc doesn't pursue 100% testsuite pass rate, I noticed
there are a few failures exposed/caused by some PRs all the time.  Could we
just leave the test case there without any pre workaround till the PR get fixed?
It seems more like what we did often.  Or the good thing with pre workaround
here is to make this case sensitive again for being used for other testing?

BR,
Kewen

> So I don't think this patch is OK without having such comments in the 
> tests to explain the issue and a carefully written commit message warning 
> that the patch is a workaround, not a fix and the bug in question must not 
> be closed simply because of the commit mentioning it.
> 



Re: PR92163

2019-11-04 Thread Prathamesh Kulkarni
On Mon, 4 Nov 2019 at 18:37, Christophe Lyon  wrote:
>
> On Mon, 28 Oct 2019 at 16:03, Prathamesh Kulkarni
>  wrote:
> >
> > On Mon, 28 Oct 2019 at 07:18, Richard Biener  
> > wrote:
> > >
> > > On Fri, Oct 25, 2019 at 9:58 PM Prathamesh Kulkarni
> > >  wrote:
> > > >
> > > > On Fri, 25 Oct 2019 at 13:19, Richard Biener 
> > > >  wrote:
> > > > >
> > > > > On Wed, Oct 23, 2019 at 11:45 PM Prathamesh Kulkarni
> > > > >  wrote:
> > > > > >
> > > > > > Hi,
> > > > > > The attached patch tries to fix PR92163 by calling
> > > > > > gimple_purge_dead_eh_edges from ifcvt_local_dce if we need eh 
> > > > > > cleanup.
> > > > > > Does it look OK ?
> > > > >
> > > > > Hmm.  I think it shows an issue with the return value of 
> > > > > remove_stmt_form_eh_lp
> > > > > which is true if the LP index is -1 (externally throwing).  We don't
> > > > > need to purge
> > > > > any edges in that case.  That is, if-conversion should never need to
> > > > > do EH purging
> > > > > since that would be wrong-code.
> > > > >
> > > > > As of the segfault can you please instead either pass down 
> > > > > need_eh_cleanup
> > > > > as function parameter (and NULL from ifcvt) or use the return value 
> > > > > in DSE
> > > > > to set the bit in the caller.
> > > > Hi Richard,
> > > > Thanks for the suggestions, does the attached patch look OK ?
> > > > Bootstrap+test in progress on x86_64-unknown-linux-gnu.
> > >
> > > OK.
> > Thanks, committed to trunk in r277525 after bootstrap+test on
> > x86_64-unknown-linux-gnu.
> >
>
> Hi Prathamesh,
>
> There's a problem with the new test you added: if uses -fopenacc which
> is not supported by arm-eabi or aarch64-elf targets for instance.
> You probably want to move the test to gcc.dg/goacc or add
> dg-require-effective-target fopenacc.
Oops, sorry about that. Could you please confirm if attached patch
fixes the issue ?
I added dg-require-effective-target fopenacc.

Thanks,
Prathamesh
>
> Thanks,
>
> Christophe
>
> > Thanks,
> > Prathamesh
> > >
> > > Richard.
> > >
> > > > Thanks,
> > > > Prathamesh
> > > > >
> > > > > Thanks,
> > > > > Richard.
> > > > >
> > > > > > Thanks,
> > > > > > Prathamesh
diff --git a/gcc/testsuite/gcc.dg/tree-ssa/pr92163.c b/gcc/testsuite/gcc.dg/tree-ssa/pr92163.c
index 58f548fe76b..227c09255e4 100644
--- a/gcc/testsuite/gcc.dg/tree-ssa/pr92163.c
+++ b/gcc/testsuite/gcc.dg/tree-ssa/pr92163.c
@@ -1,4 +1,5 @@
 /* { dg-do "compile" } */
+/* { dg-require-effective-target fopenacc } */
 /* { dg-options "-O2 -fexceptions -fnon-call-exceptions -fopenacc" } */
 
 void


Re: [PATCH] handle constant size VLAs in -Warray-bounds (PR 82608, 92333)

2019-11-04 Thread Jeff Law
On 11/4/19 8:26 PM, Martin Sebor wrote:
> -Warray-bounds doesn't yet have the logic to detect out-of-bounds
> indices into dynamically allocated arrays like VLAs because it
> doesn't track allocation calls.  But the warning could detect
> such errors in accesses to VLAs with constant sizes even without
> such tracking.
> 
> The attached patch adds this capability.  Testing the warning
> revealed that the names and locations of VLAs with constant sizes
> are unavailable, making the warnings less useful than they could
> be otherwise.  So the other part of the patch also arranges for
> the arrays backing the constant size VLAs to have names that
> reflect those of the VLAs, and the same location.
> 
> Tested on x86_64-linux.
> 
> Martin
> 
> gcc-92333.diff
> 
> PR middle-end/92333 - missing variable name referencing VLA in warnings
> PR middle-end/82608 - missing -Warray-bounds on an out-of-bounds VLA index
> 
> gcc/testsuite/ChangeLog:
> 
>   PR middle-end/92333
>   PR middle-end/82608
>   * gcc.dg/Warray-bounds-53.c: New test.
> 
> gcc/ChangeLog:
> 
>   PR middle-end/92333
>   PR middle-end/82608
>   * tree-vrp.c (vrp_prop::check_array_ref): Handle VLAs with constant
>   size.
>   * tree-ssa-ccp.c (fold_builtin_alloca_with_align): Use a meaninful
>   name and location for a temporary variable.
OK
jeff



[committed] make vrp_bitmap_equal_p static

2019-11-04 Thread Aldy Hernandez
This function is only used once, so there's no need for it to be 
externally visible.


Committed as obvious.
commit 53363b9234db6e3b696abc53b24ea1e0d2547038
Author: Aldy Hernandez 
Date:   Tue Nov 5 05:10:41 2019 +0100

Move vrp_bitmap_equal_p above its only use and make it static.

diff --git a/gcc/ChangeLog b/gcc/ChangeLog
index 1c2fff16295..f492ea6da0c 100644
--- a/gcc/ChangeLog
+++ b/gcc/ChangeLog
@@ -1,3 +1,9 @@
+2019-11-05  Aldy Hernandez  
+
+	* tree-vrp.h (vrp_bitmap_equal_p): Remove.
+	* tree-vrp.c (vrp_bitmap_equal_p): Move before use and make
+	static.
+
 2019-11-05  Aldy Hernandez  
 
 	* tree-vrp.c (value_range_base::operator==): Use equal_p to
diff --git a/gcc/tree-vrp.c b/gcc/tree-vrp.c
index a6d44e9dc6d..e926670b962 100644
--- a/gcc/tree-vrp.c
+++ b/gcc/tree-vrp.c
@@ -254,6 +254,18 @@ value_range_base::equal_p (const value_range_base ) const
 	  && vrp_operand_equal_p (m_max, other.m_max));
 }
 
+/* Return true if the bitmaps B1 and B2 are equal.  */
+
+static bool
+vrp_bitmap_equal_p (const_bitmap b1, const_bitmap b2)
+{
+  return (b1 == b2
+	  || ((!b1 || bitmap_empty_p (b1))
+	  && (!b2 || bitmap_empty_p (b2)))
+	  || (b1 && b2
+	  && bitmap_equal_p (b1, b2)));
+}
+
 /* Returns TRUE if THIS == OTHER.  Ignores the equivalence bitmap if
IGNORE_EQUIVS is TRUE.  */
 
@@ -910,18 +922,6 @@ vrp_operand_equal_p (const_tree val1, const_tree val2)
   return true;
 }
 
-/* Return true, if the bitmaps B1 and B2 are equal.  */
-
-bool
-vrp_bitmap_equal_p (const_bitmap b1, const_bitmap b2)
-{
-  return (b1 == b2
-	  || ((!b1 || bitmap_empty_p (b1))
-	  && (!b2 || bitmap_empty_p (b2)))
-	  || (b1 && b2
-	  && bitmap_equal_p (b1, b2)));
-}
-
 static bool
 range_has_numeric_bounds_p (const value_range_base *vr)
 {
diff --git a/gcc/tree-vrp.h b/gcc/tree-vrp.h
index 5cd94733188..3861634fb7e 100644
--- a/gcc/tree-vrp.h
+++ b/gcc/tree-vrp.h
@@ -279,8 +279,6 @@ extern void register_edge_assert_for (tree, edge, enum tree_code,
 extern bool stmt_interesting_for_vrp (gimple *);
 extern bool infer_value_range (gimple *, tree, tree_code *, tree *);
 
-extern bool vrp_bitmap_equal_p (const_bitmap, const_bitmap);
-
 extern bool range_int_cst_p (const value_range_base *);
 
 extern int compare_values (tree, tree);


Re: [PATCH v3] PR92090: Fix testcase failures by r276469

2019-11-04 Thread luoxhu
Hi,

On 2019/11/5 06:57, Joseph Myers wrote:
> On Mon, 4 Nov 2019, luoxhu wrote:
> 
>> -finline-functions is enabled by default for O2 since r276469, update the
>> test cases with -fno-inline-functions.
>>
>> v2: disable inlining for the failed cases.  Add two more failed cases
>> not listed in BZ.  Tested on P8LE, P8BE and P9LE.
> 
> If inlining (or other interprocedural analysis) invalidates a test's
> intent (e.g. all the code gets optimized away), our normal approach is to
> use noinline etc. function attributes to prevent that inlining.
> 
> If you're adding such options to work around an ICE, which certainly
> appears to be the case in the architecture-independent testcases here, you
> should (a) have comments in the tests saying explicitly that the options
> are there temporarily to work around the ICE in a bug whose number is
> given in the comment, and (b) a remark in the open regression bug for the
> ICE saying that those options have been added as a temporary workaround
> and that a patch fixing the ICE should remove them again.  The commit
> message also needs to make very clear that the commit is *not* a fix for
> that bug and so it must *not* be closed as fixed until there is an actual
> fix for the ICE.
> 
> So I don't think this patch is OK without having such comments in the
> tests to explain the issue and a carefully written commit message warning
> that the patch is a workaround, not a fix and the bug in question must not
> be closed simply because of the commit mentioning it.
> 

Thanks. Updated the comments and message to mark the unsolved ICE issue as 
below,
will commit it soon:


-finline-functions is enabled by default for O2 since r276469, update the
test cases with -fno-inline-functions.
The options "-fno-inline-funtions --param max-inline-insns-single-O2=200"
for c11-atomic-exec-5.c is a temporary work around to the ICE of LRA on
BE systems in PR92090.  This commit is NOT a fix for the ICE bug and so it
must NOT be closed.

v2: Disable inlining for the failed cases.  Add two more failed cases
not listed in BZ.  Tested on P8LE, P8BE and P9LE.
v3: Update commit message and comments for atomic ICE.

gcc/testsuite/ChangeLog:

2019-10-30  Xiong Hu Luo  

PR92090
* g++.dg/tree-ssa/pr61034.C: Add -fno-inline-functions --param
max-inline-insns-single-O2=200.
* gcc.dg/atomic/c11-atomic-exec-5.c: Likewise.
* gcc.target/powerpc/pr72804.c: Likewise.
* gcc.target/powerpc/pr79439-1.c: Add -fno-inline-functions.
* gcc.target/powerpc/vsx-builtin-7.c: Likewise.
---
 gcc/testsuite/g++.dg/tree-ssa/pr61034.C  | 2 +-
 gcc/testsuite/gcc.dg/atomic/c11-atomic-exec-5.c  | 2 +-
 gcc/testsuite/gcc.target/powerpc/pr72804.c   | 2 +-
 gcc/testsuite/gcc.target/powerpc/pr79439-1.c | 2 +-
 gcc/testsuite/gcc.target/powerpc/vsx-builtin-7.c | 2 +-
 5 files changed, 5 insertions(+), 5 deletions(-)

diff --git a/gcc/testsuite/g++.dg/tree-ssa/pr61034.C 
b/gcc/testsuite/g++.dg/tree-ssa/pr61034.C
index 2e3dfecacb4..338de1ebb13 100644
--- a/gcc/testsuite/g++.dg/tree-ssa/pr61034.C
+++ b/gcc/testsuite/g++.dg/tree-ssa/pr61034.C
@@ -1,5 +1,5 @@
 // { dg-do compile }
-// { dg-options "-O2 -fdump-tree-fre3 -fdump-tree-optimized 
-fdelete-null-pointer-checks --param early-inlining-insns-O2=14" }
+// { dg-options "-O2 -fdump-tree-fre3 -fdump-tree-optimized 
-fdelete-null-pointer-checks --param early-inlining-insns-O2=14 
-fno-inline-functions --param max-inline-insns-single-O2=200"  }
 
 #define assume(x) if(!(x))__builtin_unreachable()
 
diff --git a/gcc/testsuite/gcc.dg/atomic/c11-atomic-exec-5.c 
b/gcc/testsuite/gcc.dg/atomic/c11-atomic-exec-5.c
index 692c64ad207..a3a5a05da30 100644
--- a/gcc/testsuite/gcc.dg/atomic/c11-atomic-exec-5.c
+++ b/gcc/testsuite/gcc.dg/atomic/c11-atomic-exec-5.c
@@ -3,7 +3,7 @@
iterations of the compare-and-exchange loop are needed, exceptions
get properly cleared).  */
 /* { dg-do run } */
-/* { dg-options "-std=c11 -pedantic-errors -pthread -U_POSIX_C_SOURCE 
-D_POSIX_C_SOURCE=200809L" } */
+/* { dg-options "-std=c11 -pedantic-errors -pthread -U_POSIX_C_SOURCE 
-D_POSIX_C_SOURCE=200809L -fno-inline-functions --param 
max-inline-insns-single-O2=200" } */  /* Disable inline here as "-Os" will hit 
ICE in LRA on Power8 BE, see PR92090.  */
 /* { dg-add-options ieee } */
 /* { dg-additional-options "-mfp-trap-mode=sui" { target alpha*-*-* } } */
 /* { dg-additional-options "-D_XOPEN_SOURCE=600" { target *-*-solaris2* } } */
diff --git a/gcc/testsuite/gcc.target/powerpc/pr72804.c 
b/gcc/testsuite/gcc.target/powerpc/pr72804.c
index b83b6350d75..0fc3df1d89b 100644
--- a/gcc/testsuite/gcc.target/powerpc/pr72804.c
+++ b/gcc/testsuite/gcc.target/powerpc/pr72804.c
@@ -1,6 +1,6 @@
 /* { dg-do compile { target { lp64 } } } */
 /* { dg-skip-if "" { powerpc*-*-darwin* } } */
-/* { dg-require-effective-target powerpc_vsx_ok } */
+/* { dg-require-effective-target powerpc_vsx_ok  -fno-inline-functions --param 

Re: handle symbolics when comparing ranges

2019-11-04 Thread Aldy Hernandez



On 11/5/19 3:55 AM, Andrew MacLeod wrote:

On 11/4/19 6:05 PM, Aldy Hernandez wrote:



On 11/4/19 11:45 PM, Andrew MacLeod wrote:

On 11/4/19 5:23 PM, Aldy Hernandez wrote:
value_range_base::operator== was originally lifted from a world 
where symbolics didn't exist (the ranger branch), but symbolics do 
exist in mainline.


Although this isn't causing a problem yet, as soon as someone tries 
to compare non numeric ranges, we'll die.  Using operand_equal_p 
simplifies the comparison code drastically.


I suppose if/when we get multiple sub-ranges in value_range_base, 
we'll have to adapt this function again to compare things piece 
wise. For now, let's keep things simple.


OK pending tests?
Oh, we brought over the multiple sub-range bits to 
value_range_base... yeah we can remove that and just check for 
operand equality.  we'll deal with multiple subranges when thats a 
thing.



Is this any different than just calling value_range_base::equal_p()?


Ooops, indeed, that's the same thing.

Adjusted.

OK pending tests?

yeah, approved.


Final committed patch attached.  I had to remove the now unused 
range_compatible_p function, as it's no longer necessary.


Thanks.

commit 10eefa1934bb1833c85d1c445f4da01b933c0909
Author: Aldy Hernandez 
Date:   Mon Nov 4 21:20:26 2019 +0100

Use value_range_base::equal_p in value_range_base::operator== so we can handle
symbolics without dying.

diff --git a/gcc/ChangeLog b/gcc/ChangeLog
index e726ff6d0a0..1c2fff16295 100644
--- a/gcc/ChangeLog
+++ b/gcc/ChangeLog
@@ -1,3 +1,9 @@
+2019-11-05  Aldy Hernandez  
+
+	* tree-vrp.c (value_range_base::operator==): Use equal_p to
+	properly handle symbolics.
+	(range_compatible_p): Remove.
+
 2019-11-04  Kamlesh Kumar  
 
 	* common.opt (-fabi-version): Document =14.
diff --git a/gcc/tree-vrp.c b/gcc/tree-vrp.c
index 452895bfc24..a6d44e9dc6d 100644
--- a/gcc/tree-vrp.c
+++ b/gcc/tree-vrp.c
@@ -6319,33 +6319,10 @@ value_range_base::intersect (const value_range_base )
 dump_flags |= TDF_DETAILS;
 }
 
-/* Return TRUE if two types are compatible for range operations.  */
-
-static bool
-range_compatible_p (tree t1, tree t2)
-{
-  if (POINTER_TYPE_P (t1) && POINTER_TYPE_P (t2))
-return true;
-
-  return types_compatible_p (t1, t2);
-}
-
 bool
 value_range_base::operator== (const value_range_base ) const
 {
-  if (undefined_p ())
-return r.undefined_p ();
-
-  if (num_pairs () != r.num_pairs ()
-  || !range_compatible_p (type (), r.type ()))
-return false;
-
-  for (unsigned p = 0; p < num_pairs (); p++)
-if (wi::ne_p (lower_bound (p), r.lower_bound (p))
-	|| wi::ne_p (upper_bound (p), r.upper_bound (p)))
-  return false;
-
-  return true;
+  return equal_p (r);
 }
 
 /* Visit all arguments for PHI node PHI that flow through executable


[PATCH] handle constant size VLAs in -Warray-bounds (PR 82608, 92333)

2019-11-04 Thread Martin Sebor

-Warray-bounds doesn't yet have the logic to detect out-of-bounds
indices into dynamically allocated arrays like VLAs because it
doesn't track allocation calls.  But the warning could detect
such errors in accesses to VLAs with constant sizes even without
such tracking.

The attached patch adds this capability.  Testing the warning
revealed that the names and locations of VLAs with constant sizes
are unavailable, making the warnings less useful than they could
be otherwise.  So the other part of the patch also arranges for
the arrays backing the constant size VLAs to have names that
reflect those of the VLAs, and the same location.

Tested on x86_64-linux.

Martin
PR middle-end/92333 - missing variable name referencing VLA in warnings
PR middle-end/82608 - missing -Warray-bounds on an out-of-bounds VLA index

gcc/testsuite/ChangeLog:

	PR middle-end/92333
	PR middle-end/82608
	* gcc.dg/Warray-bounds-53.c: New test.

gcc/ChangeLog:

	PR middle-end/92333
	PR middle-end/82608
	* tree-vrp.c (vrp_prop::check_array_ref): Handle VLAs with constant
	size.
	* tree-ssa-ccp.c (fold_builtin_alloca_with_align): Use a meaninful
	name and location for a temporary variable.

Index: gcc/tree-vrp.c
===
--- gcc/tree-vrp.c	(revision 277806)
+++ gcc/tree-vrp.c	(working copy)
@@ -4154,6 +4154,9 @@ vrp_prop::check_array_ref (location_t location, tr
   low_sub = up_sub = TREE_OPERAND (ref, 1);
   up_bound = array_ref_up_bound (ref);
 
+  /* Referenced decl if one can be determined.  */
+  tree decl = NULL_TREE;
+
   /* Set for accesses to interior zero-length arrays.  */
   bool interior_zero_len = false;
 
@@ -4182,7 +4185,8 @@ vrp_prop::check_array_ref (location_t location, tr
 	  tree arg = TREE_OPERAND (ref, 0);
 	  poly_int64 off;
 
-	  if (TREE_CODE (arg) == COMPONENT_REF)
+	  const bool compref = TREE_CODE (arg) == COMPONENT_REF;
+	  if (compref)
 	{
 	  /* Try to determine the size of the trailing array from
 		 its initializer (if it has one).  */
@@ -4191,12 +4195,27 @@ vrp_prop::check_array_ref (location_t location, tr
 		  maxbound = refsize;
 	}
 
-	  if (maxbound == ptrdiff_max
-	  && get_addr_base_and_unit_offset (arg, )
-	  && known_gt (off, 0))
-	maxbound = wide_int_to_tree (sizetype,
-	 wi::sub (wi::to_wide (maxbound),
-		  off));
+	  if (maxbound == ptrdiff_max)
+	{
+	  /* Try to determine the size of the base object.  Avoid
+		 COMPONENT_REF already tried above.  Using its DECL_SIZE
+		 size wouldn't necessarily be correct if the reference is
+		 to its flexible array member initialized in a different
+		 translation unit.  */
+	  tree base = get_addr_base_and_unit_offset (arg, );
+	  if (!compref && base && DECL_P (base))
+		if (tree basesize = DECL_SIZE_UNIT (base))
+		  if (TREE_CODE (basesize) == INTEGER_CST)
+		{
+		  maxbound = basesize;
+		  decl = base;
+		}
+
+	  if (known_gt (off, 0))
+		maxbound = wide_int_to_tree (sizetype,
+	 wi::sub (wi::to_wide (maxbound),
+		  off));
+	}
 	  else
 	maxbound = fold_convert (sizetype, maxbound);
 
@@ -4281,7 +4300,7 @@ vrp_prop::check_array_ref (location_t location, tr
 	  fprintf (dump_file, "\n");
 	}
 
-  ref = TREE_OPERAND (ref, 0);
+  ref = decl ? decl : TREE_OPERAND (ref, 0);
 
   tree rec = NULL_TREE;
   if (TREE_CODE (ref) == COMPONENT_REF)
Index: gcc/tree-ssa-ccp.c
===
--- gcc/tree-ssa-ccp.c	(revision 277806)
+++ gcc/tree-ssa-ccp.c	(working copy)
@@ -,7 +,25 @@ fold_builtin_alloca_with_align (gimple *stmt)
   elem_type = build_nonstandard_integer_type (BITS_PER_UNIT, 1);
   n_elem = size * 8 / BITS_PER_UNIT;
   array_type = build_array_type_nelts (elem_type, n_elem);
-  var = create_tmp_var (array_type);
+
+  if (tree ssa_name = SSA_NAME_IDENTIFIER (lhs))
+{
+  /* Give the temporary a name derived from the name of the VLA
+	 declaration so it can be referenced in diagnostics.  */
+  const char *name = IDENTIFIER_POINTER (ssa_name);
+  var = create_tmp_var (array_type, name);
+}
+  else
+var = create_tmp_var (array_type);
+
+  if (gimple *lhsdef = SSA_NAME_DEF_STMT (lhs))
+{
+  /* Set the temporary's location to that of the VLA declaration
+	 so it can be pointed to in diagnostics.  */
+  location_t loc = gimple_location (lhsdef);
+  DECL_SOURCE_LOCATION (var) = loc;
+}
+
   SET_DECL_ALIGN (var, TREE_INT_CST_LOW (gimple_call_arg (stmt, 1)));
   if (uid != 0)
 SET_DECL_PT_UID (var, uid);
Index: gcc/testsuite/gcc.dg/Warray-bounds-53.c
===
--- gcc/testsuite/gcc.dg/Warray-bounds-53.c	(nonexistent)
+++ gcc/testsuite/gcc.dg/Warray-bounds-53.c	(working copy)
@@ -0,0 +1,50 @@
+/* PR middle-end/92333 - missing variable name referencing VLA in warnings
+   PR middle-end/82608 - missing -Warray-bounds on an 

Re: [PATCH 3/3] [ARC] Don't split ior/mov predicated insns.

2019-11-04 Thread Jeff Law
On 10/22/19 2:21 AM, Claudiu Zissulescu wrote:
> Do not split immediate constants for predicated instructions.
> 
> gcc/
> -xx-xx  Claudiu Zissulescu  
> 
>   * config/arc/arc.c (arc_split_ior): Add asserts.
>   (arc_split_mov_const): Likewise.
>   (arc_check_ior_const): Do not match known short immediate values.
>   * config/arc/arc.md (movsi): Don't split predicated instructions.
>   (iorsi): Likewise.
> 
> testsuite/
> -xx-xx  Claudiu Zissulescu  
>   Sahahb Vahedi  
>   Cupertino Miranda  
> 
>   * gcc.target/arc/or-cnst-size2.c: New test.
OK
jeff



Re: [PATCH 2/3] [ARC] Update mea option documentation

2019-11-04 Thread Jeff Law
On 10/22/19 2:21 AM, Claudiu Zissulescu wrote:
> Update -mea option documentation.
> 
> gcc/
> -xx-xx  Claudiu Zissulescu  
> 
>   * config/arc/arc.opt (mea): Update help string.
>   * doc/invoke.texi(ARC): Update mea option info.
OK
jeff



Re: [PATCH 1/3] [ARC] Cleanup sign/zero extend patterns

2019-11-04 Thread Jeff Law
On 10/22/19 2:21 AM, Claudiu Zissulescu wrote:
> Cleanup sign/zero extend patterns (corrects the asm output string and 
> constraint letters).
> 
> gcc/
> -xx-xx  Claudiu Zissulescu  
> 
>   * config/arc/arc.md (zero_extendqihi2_i): Cleanup pattern.
>   (zero_extendqisi2_ac): Likewise.
>   (zero_extendhisi2_i): Likewise.
>   (extendqihi2_i): Likewise.
>   (extendqisi2_ac): Likewise.
>   (extendhisi2_i): Likewise.
OK.

Jeff



Re: handle symbolics when comparing ranges

2019-11-04 Thread Andrew MacLeod

On 11/4/19 6:05 PM, Aldy Hernandez wrote:



On 11/4/19 11:45 PM, Andrew MacLeod wrote:

On 11/4/19 5:23 PM, Aldy Hernandez wrote:
value_range_base::operator== was originally lifted from a world 
where symbolics didn't exist (the ranger branch), but symbolics do 
exist in mainline.


Although this isn't causing a problem yet, as soon as someone tries 
to compare non numeric ranges, we'll die.  Using operand_equal_p 
simplifies the comparison code drastically.


I suppose if/when we get multiple sub-ranges in value_range_base, 
we'll have to adapt this function again to compare things piece 
wise. For now, let's keep things simple.


OK pending tests?
Oh, we brought over the multiple sub-range bits to 
value_range_base... yeah we can remove that and just check for 
operand equality.  we'll deal with multiple subranges when thats a 
thing.



Is this any different than just calling value_range_base::equal_p()?


Ooops, indeed, that's the same thing.

Adjusted.

OK pending tests?

yeah, approved.

Andrew



Re: [PATCH, rs6000] Make load cost more in vectorization cost for P8/P9

2019-11-04 Thread Kewen.Lin
Hi Segher,

Thanks for the comments!

on 2019/11/5 上午4:21, Segher Boessenkool wrote:
> Hi!
> 
> On Mon, Nov 04, 2019 at 03:16:06PM +0800, Kewen.Lin wrote:
>> To align with rs6000_insn_cost costing more for load type insns,
> 
> (Which itself has history in rs6000_rtx_costs).
> 
>> this patch is to make load insns cost more in vectorization cost
>> function.  Considering that the result of load usually is used
>> somehow later (true-dep) but store won't, we keep the store as
>> before.
> 
> The latency of load insns is about twice that of "simple" instructions;
> 2 vs. 1 on older cores, and 4 (or so) vs. 2 on newer cores.
> 

Yes, for latest Power9, general load takes 4, vsx load takes 5.

>> The SPEC2017 performance evaluation on Power8 shows 525.x264_r
>> +9.56%, 511.povray_r +2.08%, 527.cam4_r 1.16% gains, no 
>> significant degradation, SPECINT geomean +0.88%, SPECFP geomean
>> +0.26%.
> 
> Nice :-)
> 
>> The SPEC2017 performance evaluation on Power9 shows no significant
>> improvement or degradation, SPECINT geomean +0.04%, SPECFP geomean
>> +0.04%.
>>
>> The SPEC2006 performance evaluation on Power8 shows 454.calculix
>> +4.41% gain but 416.gamess -1.19% and 453.povray -3.83% degradation.
>> I looked into the two degradation bmks, the degradation were NOT
>> due to hotspot changes by vectorization, were all side effects.
>> SPECINT geomean +0.10%, SPECFP geomean no changed considering
>> the degradation.
> 
> Also nice.
> 
>> --- a/gcc/config/rs6000/rs6000.c
>> +++ b/gcc/config/rs6000/rs6000.c
>> @@ -4763,15 +4763,22 @@ rs6000_builtin_vectorization_cost (enum 
>> vect_cost_for_stmt type_of_cost,
>>switch (type_of_cost)
>>  {
>>case scalar_stmt:
>> -  case scalar_load:
>>case scalar_store:
>>case vector_stmt:
>> -  case vector_load:
>>case vector_store:
>>case vec_to_scalar:
>>case scalar_to_vec:
>>case cond_branch_not_taken:
>>  return 1;
>> +  case scalar_load:
>> +  case vector_load:
>> +/* Like rs6000_insn_cost, make load insns cost a bit more. FIXME: the
> 
> (two spaces after full stop).
> 

Good catch!  Will fix it (and the others).

>> +   benefits were observed on Power8 and up, we can unify it if similar
>> +   profits are measured on Power6 and Power7.  */
>> +if (TARGET_P8_VECTOR)
>> +  return 2;
>> +else
>> +  return 1;
> 
> Hrm, but you showed benchmark improvements for p9 as well?
> 

No significant gains but no degradation as well, so I thought it's fine to align
it together.  Does it make sense?

> What happens if you enable this for everything as well?
> 

My concern was that if we enable it for everything, it's possible to introduce
degradation for some benchmarks on P6 or P7 where we didn't evaluate the
performance impact.  Although it's reasonable from the point view of load 
latency,
it's possible to get worse result in the actual benchmarks based on my fine 
grain
cost adjustment experiment before.  

Or do you suggest enabling it everywhere and solve the degradation issue if 
exposed?
I'm also fine with that.  :)


BR,
Kewen



Re: Ping: [PATCH V5] Extend IPA-CP to support arithmetically-computed value-passing on by-ref argument (PR ipa/91682)

2019-11-04 Thread Martin Jambor
Hi,

On Mon, Nov 04 2019, Feng Xue OS wrote:
> Hi, Honza & Martin,
>
> This is a new patch merged with the newest IPA changes. Would you
> please take a look at the patch?  Together with the other patch on
> recursive function versioning, we can find more than 30% performance
> boost on exchange2 in spec2017. So, it will be good if two patches can
> enter the gcc 10 release, though time schedule seems to be somewhat
> urgent. Thanks.

Sorry that it took so long.  Next time, please consider making the
review a bit easier by writing a ChangeLog (yes, I usually read them and
you'll have to write one anyway).

The good news is that I found no real objection to the patch, I'd only
like to request a few minor changes.  Thanks a lot for working on this,
this extension of aggregate jump functions is really appreciated.  But
it also shows that we really need to beat a bit more sense into the
various involved data structures now since they grew a bit out of hand.
Nevertheless, that is something for next stage1 and should not block
this.

Anyway, comments inline, there is really just a few of them.  Honza,
please have a final look but overall I like the patch.

>
> Feng
>
> From 2dfa5e8b5a828ad8d46c2af5f66ee97fb04ebc16 Mon Sep 17 00:00:00 2001
> From: Feng Xue 
> Date: Thu, 15 Aug 2019 15:47:14 +0800
> Subject: [PATCH 1/2] temp
>
> ---
>  gcc/ipa-cp.c   | 498 --
>  gcc/ipa-fnsummary.c|  48 +--
>  gcc/ipa-fnsummary.h|   8 +-
>  gcc/ipa-inline-analysis.c  |   6 +-
>  gcc/ipa-prop.c | 569 +
>  gcc/ipa-prop.h | 182 ++--
>  gcc/testsuite/gcc.dg/ipa/ipcp-agg-10.c |   8 +-
>  gcc/testsuite/gcc.dg/ipa/ipcp-agg-11.c |  77 
>  8 files changed, 1105 insertions(+), 291 deletions(-)
>  create mode 100644 gcc/testsuite/gcc.dg/ipa/ipcp-agg-11.c
>
> diff --git a/gcc/ipa-cp.c b/gcc/ipa-cp.c
> index 8a5f8d362f6..e100c5f3426 100644
> --- a/gcc/ipa-cp.c
> +++ b/gcc/ipa-cp.c
> @@ -1459,6 +1473,146 @@ ipa_context_from_jfunc (ipa_node_params *info, 
> cgraph_edge *cs, int csidx,
>return ctx;
>  }
>  
> +/* See if NODE is a clone with a known aggregate value at a given OFFSET of a
> +   parameter with the given INDEX.  */
> +
> +static tree
> +get_clone_agg_value (struct cgraph_node *node, HOST_WIDE_INT offset,
> +  int index)
> +{
> +  struct ipa_agg_replacement_value *aggval;
> +
> +  aggval = ipa_get_agg_replacements_for_node (node);
> +  while (aggval)
> +{
> +  if (aggval->offset == offset
> +   && aggval->index == index)
> + return aggval->value;
> +  aggval = aggval->next;
> +}
> +  return NULL_TREE;
> +}
> +
> +/* Determine whether ITEM, jump function for an aggregate part, evaluates to 
> a
> +   single known constant value and if so, return it.  Otherwise return NULL.
> +   NODE and INFO describes the caller node or the one it is inlined to, and
> +   its related info.  */
> +
> +static tree
> +ipa_agg_value_from_node (class ipa_node_params *info,
> +  struct cgraph_node *node,
> +  struct ipa_agg_jf_item *item)
> +{
> +  tree value = NULL_TREE;
> +  int src_idx;
> +
> +  if (item->offset < 0 || item->jftype == IPA_JF_UNKNOWN)
> +return NULL_TREE;
> +
> +  if (item->jftype == IPA_JF_CONST)
> +return item->value.constant;
> +
> +  gcc_checking_assert (item->jftype == IPA_JF_PASS_THROUGH
> +|| item->jftype == IPA_JF_LOAD_AGG);
> +
> +  src_idx = item->value.pass_through.formal_id;
> +
> +  if (info->ipcp_orig_node)
> +{
> +  if (item->jftype == IPA_JF_PASS_THROUGH)
> + value = info->known_csts[src_idx];
> +  else
> + value = get_clone_agg_value (node, item->value.load_agg.offset,
> +  src_idx);
> +}
> +  else if (info->lattices)
> +{
> +  class ipcp_param_lattices *src_plats
> + = ipa_get_parm_lattices (info, src_idx);

Wrong indentation for GNU coding standard.

> +
> +  if (item->jftype == IPA_JF_PASS_THROUGH)
> + {
> +   struct ipcp_lattice *lat = _plats->itself;
> +
> +   if (!lat->is_single_const ())
> + return NULL_TREE;
> +
> +   value = lat->values->value;
> + }
> +  else if (src_plats->aggs
> +&& !src_plats->aggs_bottom
> +&& !src_plats->aggs_contain_variable
> +&& src_plats->aggs_by_ref == item->value.load_agg.by_ref)
> + {
> +   struct ipcp_agg_lattice *aglat;
> +
> +   for (aglat = src_plats->aggs; aglat; aglat = aglat->next)
> + {
> +   if (aglat->offset > item->value.load_agg.offset)
> + break;
> +
> +   if (aglat->offset == item->value.load_agg.offset)
> + {
> +   if (aglat->is_single_const ())
> + value = aglat->values->value;
> +   break;
> + }
> + }
> + }
> +}

Re: [PATCH] Allow libcalls for complex memcpy when optimizing for size.

2019-11-04 Thread Jeff Law
On 10/31/19 5:41 PM, Jim Wilson wrote:
> The RISC-V backend wants to use a libcall when optimizing for size if
> more than 6 instructions are needed.  Emit_move_complex asks for no
> libcalls.  This case requires 8 insns for rv64 and 16 insns for rv32,
> so we get fallback code that emits a loop.  Commit_one_edge_insertion
> doesn't allow code inserted for a phi node on an edge to end with a
> branch, and so this triggers an assertion.  This problem goes away if
> we allow libcalls when optimizing for size, which gives the code the
> RISC-V backend wants, and avoids triggering the assert.
> 
>   gcc/
>   PR middle-end/92263
>   * expr.c (emit_move_complex): Only use BLOCK_OP_NO_LIBCALL when
>   optimize_insn_for_speed_p is true.
> 
>   gcc/testsuite/
>   PR middle-end/92263
>   * gcc.dg/pr92263.c: New.
OK.

jeff



Re: [PATCH v3] Updated the Fix:

2019-11-04 Thread Jason Merrill

On 11/3/19 6:42 AM, Kamlesh Kumar wrote:

ChangeLog Entries:

gcc/cp
--
2019-11-02  Kamlesh Kumar  

 PR c++/91979 - mangling nullptr expression
 * cp/mangle.c (write_template_arg_literal): Handle nullptr
 mangling.

gcc
--
2019-11-02  Kamlesh Kumar  

 * common.opt (-fabi-version): Added Description.
 * testsuite/g++.dg/cpp0x/nullptr27.C: Modify Test.
 * testsuite/g++.dg/cpp0x/nullptr43.C: New Test.
 * testsuite/g++.dg/cpp0x/nullptr44.C: New Test.

libiberty
---
2019-11-02  Kamlesh Kumar  

 * cp-demangle.c (d_expr_primary): Handle
 nullptr demangling.
 * testsuite/demangle-expected: Added test.

gcc/c-family
--
2019-11-02  Kamlesh Kumar  

 * c-opts.c (c_common_post_options): Updated
 latest_abi_version.


Applied, thanks.  I also added the -fabi-version=14 to doc/invoke.texi.

Any word on your copyright assignment?


---
  gcc/c-family/c-opts.c  |  2 +-
  gcc/common.opt |  2 ++
  gcc/cp/mangle.c|  4 +++-
  gcc/testsuite/g++.dg/cpp0x/nullptr27.C |  2 +-
  gcc/testsuite/g++.dg/cpp0x/nullptr43.C |  9 +
  gcc/testsuite/g++.dg/cpp0x/nullptr44.C | 15 +++
  libiberty/cp-demangle.c| 11 +++
  libiberty/testsuite/demangle-expected  |  4 
  8 files changed, 46 insertions(+), 3 deletions(-)
  create mode 100644 gcc/testsuite/g++.dg/cpp0x/nullptr43.C
  create mode 100644 gcc/testsuite/g++.dg/cpp0x/nullptr44.C

diff --git a/gcc/c-family/c-opts.c b/gcc/c-family/c-opts.c
index 0fffe60b140..d4c77be5cd5 100644
--- a/gcc/c-family/c-opts.c
+++ b/gcc/c-family/c-opts.c
@@ -937,7 +937,7 @@ c_common_post_options (const char **pfilename)
  
/* Change flag_abi_version to be the actual current ABI level, for the

   benefit of c_cpp_builtins, and to make comparison simpler.  */
-  const int latest_abi_version = 13;
+  const int latest_abi_version = 14;
/* Generate compatibility aliases for ABI v11 (7.1) by default.  */
const int abi_compat_default = 11;
  
diff --git a/gcc/common.opt b/gcc/common.opt

index cc279f411d7..fdd923e3c35 100644
--- a/gcc/common.opt
+++ b/gcc/common.opt
@@ -951,6 +951,8 @@ Driver Undocumented
  ; 13: Fixes the accidental change in 12 to the calling convention for classes
  ; with deleted copy constructor and trivial move constructor.
  ; Default in G++ 8.2.
+; 14: Corrects the mangling of nullptr expression.
+; Default in G++ 10.
  ;
  ; Additional positive integers will be assigned as new versions of
  ; the ABI become the default version of the ABI.
diff --git a/gcc/cp/mangle.c b/gcc/cp/mangle.c
index a9333b84349..a32ff2a2210 100644
--- a/gcc/cp/mangle.c
+++ b/gcc/cp/mangle.c
@@ -3400,7 +3400,9 @@ write_template_arg_literal (const tree value)
case INTEGER_CST:
gcc_assert (!same_type_p (TREE_TYPE (value), boolean_type_node)
|| integer_zerop (value) || integer_onep (value));
-   write_integer_cst (value);
+   if (!(abi_version_at_least (14)
+   && NULLPTR_TYPE_P (TREE_TYPE (value
+ write_integer_cst (value);
break;
  
case REAL_CST:

diff --git a/gcc/testsuite/g++.dg/cpp0x/nullptr27.C 
b/gcc/testsuite/g++.dg/cpp0x/nullptr27.C
index 2510dc80634..edd11606266 100644
--- a/gcc/testsuite/g++.dg/cpp0x/nullptr27.C
+++ b/gcc/testsuite/g++.dg/cpp0x/nullptr27.C
@@ -1,7 +1,7 @@
  // PR c++/52706
  // { dg-do compile { target c++11 } }
  // { dg-options "-fabi-version=0" }
-// { dg-final { scan-assembler "_Z1fIDnLDn0EEiT_" } }
+// { dg-final { scan-assembler "_Z1fIDnLDnEEiT_" } }
  
  template

  int f(T);
diff --git a/gcc/testsuite/g++.dg/cpp0x/nullptr43.C 
b/gcc/testsuite/g++.dg/cpp0x/nullptr43.C
new file mode 100644
index 000..fbdb6cd8e9d
--- /dev/null
+++ b/gcc/testsuite/g++.dg/cpp0x/nullptr43.C
@@ -0,0 +1,9 @@
+// PR c++/91979
+// { dg-do compile { target c++11 } }
+// { dg-options "-fabi-version=13" }
+// { dg-final { scan-assembler "_Z1fIDnLDn0EEiT_" } }
+
+template
+int f(T);
+
+int i2 = f(nullptr);
diff --git a/gcc/testsuite/g++.dg/cpp0x/nullptr44.C 
b/gcc/testsuite/g++.dg/cpp0x/nullptr44.C
new file mode 100644
index 000..9ceba14fc98
--- /dev/null
+++ b/gcc/testsuite/g++.dg/cpp0x/nullptr44.C
@@ -0,0 +1,15 @@
+// PR c++/91979
+// { dg-do compile { target c++11 } }
+// { dg-final { scan-assembler "_Z3fooILPv0EEvPN9enable_ifIXeqT_LDnEEvE4typeE" 
} }
+
+template 
+struct enable_if {};
+
+template 
+struct enable_if { typedef T type; };
+
+template 
+void foo(typename enable_if::type* = 0) {}
+
+template void foo<(void *)0>(void *);
+
diff --git a/libiberty/cp-demangle.c b/libiberty/cp-demangle.c
index 5b674d7d93c..3150efff80d 100644
--- a/libiberty/cp-demangle.c
+++ b/libiberty/cp-demangle.c
@@ -3577,6 +3577,17 @@ d_expr_primary (struct d_info *di)
  && type->u.s_builtin.type->print != D_PRINT_DEFAULT)
di->expansion -= 

Re: [PR47785] COLLECT_AS_OPTIONS

2019-11-04 Thread Kugan Vivekanandarajah
Hi,
Thanks for the review.

On Tue, 5 Nov 2019 at 03:57, H.J. Lu  wrote:
>
> On Sun, Nov 3, 2019 at 6:45 PM Kugan Vivekanandarajah
>  wrote:
> >
> > Thanks for the reviews.
> >
> >
> > On Sat, 2 Nov 2019 at 02:49, H.J. Lu  wrote:
> > >
> > > On Thu, Oct 31, 2019 at 6:33 PM Kugan Vivekanandarajah
> > >  wrote:
> > > >
> > > > On Wed, 30 Oct 2019 at 03:11, H.J. Lu  wrote:
> > > > >
> > > > > On Sun, Oct 27, 2019 at 6:33 PM Kugan Vivekanandarajah
> > > > >  wrote:
> > > > > >
> > > > > > Hi Richard,
> > > > > >
> > > > > > Thanks for the review.
> > > > > >
> > > > > > On Wed, 23 Oct 2019 at 23:07, Richard Biener 
> > > > > >  wrote:
> > > > > > >
> > > > > > > On Mon, Oct 21, 2019 at 10:04 AM Kugan Vivekanandarajah
> > > > > > >  wrote:
> > > > > > > >
> > > > > > > > Hi Richard,
> > > > > > > >
> > > > > > > > Thanks for the pointers.
> > > > > > > >
> > > > > > > >
> > > > > > > >
> > > > > > > > On Fri, 11 Oct 2019 at 22:33, Richard Biener 
> > > > > > > >  wrote:
> > > > > > > > >
> > > > > > > > > On Fri, Oct 11, 2019 at 6:15 AM Kugan Vivekanandarajah
> > > > > > > > >  wrote:
> > > > > > > > > >
> > > > > > > > > > Hi Richard,
> > > > > > > > > > Thanks for the review.
> > > > > > > > > >
> > > > > > > > > > On Wed, 2 Oct 2019 at 20:41, Richard Biener 
> > > > > > > > > >  wrote:
> > > > > > > > > > >
> > > > > > > > > > > On Wed, Oct 2, 2019 at 10:39 AM Kugan Vivekanandarajah
> > > > > > > > > > >  wrote:
> > > > > > > > > > > >
> > > > > > > > > > > > Hi,
> > > > > > > > > > > >
> > > > > > > > > > > > As mentioned in the PR, attached patch adds 
> > > > > > > > > > > > COLLECT_AS_OPTIONS for
> > > > > > > > > > > > passing assembler options specified with -Wa, to the 
> > > > > > > > > > > > link-time driver.
> > > > > > > > > > > >
> > > > > > > > > > > > The proposed solution only works for uniform -Wa 
> > > > > > > > > > > > options across all
> > > > > > > > > > > > TUs. As mentioned by Richard Biener, supporting 
> > > > > > > > > > > > non-uniform -Wa flags
> > > > > > > > > > > > would require either adjusting partitioning according 
> > > > > > > > > > > > to flags or
> > > > > > > > > > > > emitting multiple object files  from a single LTRANS 
> > > > > > > > > > > > CU. We could
> > > > > > > > > > > > consider this as a follow up.
> > > > > > > > > > > >
> > > > > > > > > > > > Bootstrapped and regression tests on  arm-linux-gcc. Is 
> > > > > > > > > > > > this OK for trunk?
> > > > > > > > > > >
> > > > > > > > > > > While it works for your simple cases it is unlikely to 
> > > > > > > > > > > work in practice since
> > > > > > > > > > > your implementation needs the assembler options be 
> > > > > > > > > > > present at the link
> > > > > > > > > > > command line.  I agree that this might be the way for 
> > > > > > > > > > > people to go when
> > > > > > > > > > > they face the issue but then it needs to be documented 
> > > > > > > > > > > somewhere
> > > > > > > > > > > in the manual.
> > > > > > > > > > >
> > > > > > > > > > > That is, with COLLECT_AS_OPTION (why singular?  I'd 
> > > > > > > > > > > expected
> > > > > > > > > > > COLLECT_AS_OPTIONS) available to cc1 we could stream this 
> > > > > > > > > > > string
> > > > > > > > > > > to lto_options and re-materialize it at link time (and 
> > > > > > > > > > > diagnose mismatches
> > > > > > > > > > > even if we like).
> > > > > > > > > > OK. I will try to implement this. So the idea is if we 
> > > > > > > > > > provide
> > > > > > > > > > -Wa,options as part of the lto compile, this should be 
> > > > > > > > > > available
> > > > > > > > > > during link time. Like in:
> > > > > > > > > >
> > > > > > > > > > arm-linux-gnueabihf-gcc -march=armv7-a -mthumb -O2 -flto
> > > > > > > > > > -Wa,-mimplicit-it=always,-mthumb -c test.c
> > > > > > > > > > arm-linux-gnueabihf-gcc  -flto  test.o
> > > > > > > > > >
> > > > > > > > > > I am not sure where should we stream this. Currently, 
> > > > > > > > > > cl_optimization
> > > > > > > > > > has all the optimization flag provided for compiler and it 
> > > > > > > > > > is
> > > > > > > > > > autogenerated and all the flags are integer values. Do you 
> > > > > > > > > > have any
> > > > > > > > > > preference or example where this should be done.
> > > > > > > > >
> > > > > > > > > In lto_write_options, I'd simply append the contents of 
> > > > > > > > > COLLECT_AS_OPTIONS
> > > > > > > > > (with -Wa, prepended to each of them), then recover them in 
> > > > > > > > > lto-wrapper
> > > > > > > > > for each TU and pass them down to the LTRANS compiles (if 
> > > > > > > > > they agree
> > > > > > > > > for all TUs, otherwise I'd warn and drop them).
> > > > > > > >
> > > > > > > > Attached patch streams it and also make sure that the options 
> > > > > > > > are the
> > > > > > > > same for all the TUs. Maybe it is a bit restrictive.
> > > > > > > >
> > > > > > > > What is the best place to document COLLECT_AS_OPTIONS. We don't 
> > > > > > > > seem
> > > > > > > > to document 

Re: handle symbolics when comparing ranges

2019-11-04 Thread Aldy Hernandez



On 11/4/19 11:45 PM, Andrew MacLeod wrote:

On 11/4/19 5:23 PM, Aldy Hernandez wrote:
value_range_base::operator== was originally lifted from a world where 
symbolics didn't exist (the ranger branch), but symbolics do exist in 
mainline.


Although this isn't causing a problem yet, as soon as someone tries to 
compare non numeric ranges, we'll die.  Using operand_equal_p 
simplifies the comparison code drastically.


I suppose if/when we get multiple sub-ranges in value_range_base, 
we'll have to adapt this function again to compare things piece wise. 
For now, let's keep things simple.


OK pending tests?
Oh, we brought over the multiple sub-range bits to value_range_base... 
yeah we can remove that and just check for operand equality.  we'll deal 
with multiple subranges when thats a thing.



Is this any different than just calling value_range_base::equal_p()?


Ooops, indeed, that's the same thing.

Adjusted.

OK pending tests?
commit eb3ca5b2eeb84233a485981e140c6f910deb4bcc
Author: Aldy Hernandez 
Date:   Mon Nov 4 21:20:26 2019 +0100

Use value_range_base::equal_p in value_range_base::operator== so we can handle
symbolics without dying.

diff --git a/gcc/ChangeLog b/gcc/ChangeLog
index 254b3950a8e..b9f61791270 100644
--- a/gcc/ChangeLog
+++ b/gcc/ChangeLog
@@ -1,3 +1,8 @@
+2019-11-04  Aldy Hernandez  
+
+	* tree-vrp.c (value_range_base::operator==): Use equal_p to
+	properly handle symbolics.
+
 2019-11-04  Aldy Hernandez  
 
 	* tree-vrp.c (value_range_base::set): Do not special case pointers.
diff --git a/gcc/tree-vrp.c b/gcc/tree-vrp.c
index 452895bfc24..9d6a4ebc7fe 100644
--- a/gcc/tree-vrp.c
+++ b/gcc/tree-vrp.c
@@ -6333,19 +6333,7 @@ range_compatible_p (tree t1, tree t2)
 bool
 value_range_base::operator== (const value_range_base ) const
 {
-  if (undefined_p ())
-return r.undefined_p ();
-
-  if (num_pairs () != r.num_pairs ()
-  || !range_compatible_p (type (), r.type ()))
-return false;
-
-  for (unsigned p = 0; p < num_pairs (); p++)
-if (wi::ne_p (lower_bound (p), r.lower_bound (p))
-	|| wi::ne_p (upper_bound (p), r.upper_bound (p)))
-  return false;
-
-  return true;
+  return equal_p (r);
 }
 
 /* Visit all arguments for PHI node PHI that flow through executable


Re: [PATCH v2] PR92090: Fix testcase failures by r276469

2019-11-04 Thread Joseph Myers
On Mon, 4 Nov 2019, luoxhu wrote:

> -finline-functions is enabled by default for O2 since r276469, update the
> test cases with -fno-inline-functions.
> 
> v2: disable inlining for the failed cases.  Add two more failed cases
> not listed in BZ.  Tested on P8LE, P8BE and P9LE.

If inlining (or other interprocedural analysis) invalidates a test's 
intent (e.g. all the code gets optimized away), our normal approach is to 
use noinline etc. function attributes to prevent that inlining.

If you're adding such options to work around an ICE, which certainly 
appears to be the case in the architecture-independent testcases here, you 
should (a) have comments in the tests saying explicitly that the options 
are there temporarily to work around the ICE in a bug whose number is 
given in the comment, and (b) a remark in the open regression bug for the 
ICE saying that those options have been added as a temporary workaround 
and that a patch fixing the ICE should remove them again.  The commit 
message also needs to make very clear that the commit is *not* a fix for 
that bug and so it must *not* be closed as fixed until there is an actual 
fix for the ICE.

So I don't think this patch is OK without having such comments in the 
tests to explain the issue and a carefully written commit message warning 
that the patch is a workaround, not a fix and the bug in question must not 
be closed simply because of the commit mentioning it.

-- 
Joseph S. Myers
jos...@codesourcery.com


Re: handle symbolics when comparing ranges

2019-11-04 Thread Andrew MacLeod

On 11/4/19 5:23 PM, Aldy Hernandez wrote:
value_range_base::operator== was originally lifted from a world where 
symbolics didn't exist (the ranger branch), but symbolics do exist in 
mainline.


Although this isn't causing a problem yet, as soon as someone tries to 
compare non numeric ranges, we'll die.  Using operand_equal_p 
simplifies the comparison code drastically.


I suppose if/when we get multiple sub-ranges in value_range_base, 
we'll have to adapt this function again to compare things piece wise.  
For now, let's keep things simple.


OK pending tests?
Oh, we brought over the multiple sub-range bits to value_range_base... 
yeah we can remove that and just check for operand equality.  we'll deal 
with multiple subranges when thats a thing.



Is this any different than just calling value_range_base::equal_p()?

Andrew








Re: [PATCH v2] PR92090: Fix testcase failures by r276469

2019-11-04 Thread Segher Boessenkool
Hi!

On Mon, Nov 04, 2019 at 11:29:56AM +0800, luoxhu wrote:
> -finline-functions is enabled by default for O2 since r276469, update the
> test cases with -fno-inline-functions.

Jeff already okay'ed it; just some changelog nits:

>   2019-10-30  Xiong Hu Luo  
> 
>   PR92090

PR other/92090

>   * gcc.target/powerpc/pr72804.c: Likewie.

"Likewise."

Thanks!


Segher


[PATCH][C++] * typeck.c (check_return_expr): Avoid redundant error.

2019-11-04 Thread Jason Merrill
I noticed that when returning an ambiguous call in a void function, we then
uselessly gave another error about returning a non-void expression.

Tested x86_64-pc-linux-gnu, applying to trunk.


---
 gcc/cp/typeck.c  | 2 +-
 gcc/testsuite/g++.dg/other/return2.C | 7 +++
 2 files changed, 8 insertions(+), 1 deletion(-)
 create mode 100644 gcc/testsuite/g++.dg/other/return2.C

diff --git a/gcc/cp/typeck.c b/gcc/cp/typeck.c
index 29a2942dcaf..27d97859cb3 100644
--- a/gcc/cp/typeck.c
+++ b/gcc/cp/typeck.c
@@ -9729,7 +9729,7 @@ check_return_expr (tree retval, bool *no_warning)
   type.  In that case, we have to evaluate the expression for
   its side-effects.  */
finish_expr_stmt (retval);
-  else
+  else if (retval != error_mark_node)
permerror (input_location,
   "return-statement with a value, in function "
   "returning %qT", valtype);
diff --git a/gcc/testsuite/g++.dg/other/return2.C 
b/gcc/testsuite/g++.dg/other/return2.C
new file mode 100644
index 000..b328fa6b5f7
--- /dev/null
+++ b/gcc/testsuite/g++.dg/other/return2.C
@@ -0,0 +1,7 @@
+void f(long);
+void f(char);
+
+void g()
+{
+  return f(42);// { dg-error "ambiguous" }
+}  // { dg-bogus "void" "" { target *-*-* } .-1 }

base-commit: e55fdf0aaa09abf207b37e2e6a52136f3f5b153d
-- 
2.18.1



Re: do not special case pointers while canonicalizing ranges

2019-11-04 Thread Andrew MacLeod

On 11/4/19 5:17 PM, Aldy Hernandez wrote:

And now with patch!

On 11/4/19 11:15 PM, Aldy Hernandez wrote:
There's no need to special case pointers when converting suitable 
VR_ANTI_RANGE's into VR_RANGE's now that vrp_val*{min, max} handle 
pointers by default.


OK?

this seems fine, and skirts very close to what I'd call obvious :-)

Approved.



Re: [PATCH] avoid folding of invalid indices to compound literals (PR 92341)

2019-11-04 Thread Jeff Law
On 11/4/19 3:05 PM, Martin Sebor wrote:
> While testing some other changes I noticed that -Warray-bounds
> fails to detect out-of-bounds indices to compound literals such
> as in:
> 
>   int *p = (int[]){ 1, 2, 3 };
>   // ...
>   p[3] = 7;
> 
> This is because SRA transforms such references into accesses to
> uninitialized scalar variables and also sets the TREE_NO_WARNING
> bit for the replacement variables.  This prevents -Wuninitialized
> from detecting such bugs, although that wouldn't be the right
> warning to issue in these cases).
> 
> The attached patch tweaks SRA to avoid this transformation when
> the access is out of the bounds of the referenced variable.  That
> in turn lets -Warray-bounds diagnose these invalid accesses.
> 
> The patch also adjusts -Warray-bounds to reference to correct
> index and message and issue the warning even for zero-length
> compound literal arrays.  This was exposed and the fix is relied
> on by the test I wrote for the compound literals.
> 
> Finally, the change also corrects an oversight of mine from some
> time ago in failing to handle out-of-bounds indices relative to
> addresses of function parameters.  This is a trivial one-line
> tweak that could be submitted separately but it doesn't seem
> worth the overhead.
> 
> Tested on x86_64-linux.
> 
> Martin
> 
> gcc-92341.diff
> 
> PR middle-end/92341 - missing -Warray-bounds indexing past the end of a 
> compound literal
> PR middle-end/82612 - missing -Warray-bounds on a non-zero offset from the 
> address of a non-array object
> 
> gcc/testsuite/ChangeLog:
> 
>   PR middle-end/92341
>   PR middle-end/82612
>   * gcc.dg/Warray-bounds-50.c: New test.
>   * gcc.dg/Warray-bounds-51.c: New test.
> 
> gcc/ChangeLog:
> 
>   PR middle-end/92341
>   PR middle-end/82612
>   * tree-sra.c (get_access_for_expr): Fail for out-of-bounds offsets.
>   * tree-vrp.c (vrp_prop::check_array_ref): Correct index and text
>   of message printed in a warning for empty arrays.
>   (vrp_prop::check_mem_ref): Also handle function parameters and
>   empty arrays.
OK
jeff



handle symbolics when comparing ranges

2019-11-04 Thread Aldy Hernandez
value_range_base::operator== was originally lifted from a world where 
symbolics didn't exist (the ranger branch), but symbolics do exist in 
mainline.


Although this isn't causing a problem yet, as soon as someone tries to 
compare non numeric ranges, we'll die.  Using operand_equal_p simplifies 
the comparison code drastically.


I suppose if/when we get multiple sub-ranges in value_range_base, we'll 
have to adapt this function again to compare things piece wise.  For 
now, let's keep things simple.


OK pending tests?
commit d1177659bd26ac8122410dee092f5ca427b4558b
Author: Aldy Hernandez 
Date:   Mon Nov 4 21:20:26 2019 +0100

Use operand_equal_p in value_range_base::operator== so we can handle
symbolics without dying.

diff --git a/gcc/ChangeLog b/gcc/ChangeLog
index 6fbbf87e294..3ebe7fd4348 100644
--- a/gcc/ChangeLog
+++ b/gcc/ChangeLog
@@ -1,3 +1,8 @@
+2019-11-04  Aldy Hernandez  
+
+	* tree-vrp.c (value_range_base::operator==): Use operand_equal_p
+	instead of wide-int's, to properly handle symbolics.
+
 2019-11-04  Aldy Hernandez  
 
 	* tree-vrp.c (value_range_base::set): Do not special case pointers.
diff --git a/gcc/tree-vrp.c b/gcc/tree-vrp.c
index 452895bfc24..e683339f3cd 100644
--- a/gcc/tree-vrp.c
+++ b/gcc/tree-vrp.c
@@ -6336,16 +6336,12 @@ value_range_base::operator== (const value_range_base ) const
   if (undefined_p ())
 return r.undefined_p ();
 
-  if (num_pairs () != r.num_pairs ()
-  || !range_compatible_p (type (), r.type ()))
+  if (!range_compatible_p (type (), r.type ()))
 return false;
 
-  for (unsigned p = 0; p < num_pairs (); p++)
-if (wi::ne_p (lower_bound (p), r.lower_bound (p))
-	|| wi::ne_p (upper_bound (p), r.upper_bound (p)))
-  return false;
-
-  return true;
+  return (m_kind == r.m_kind
+	  && operand_equal_p (m_min, r.m_min, 0)
+	  && operand_equal_p (m_max, r.m_max, 0));
 }
 
 /* Visit all arguments for PHI node PHI that flow through executable


Re: do not special case pointers while canonicalizing ranges

2019-11-04 Thread Aldy Hernandez

And now with patch!

On 11/4/19 11:15 PM, Aldy Hernandez wrote:
There's no need to special case pointers when converting suitable 
VR_ANTI_RANGE's into VR_RANGE's now that vrp_val*{min, max} handle 
pointers by default.


OK?
commit 2a8a783d542158405d2b90b5361669a8aa56ea83
Author: Aldy Hernandez 
Date:   Mon Nov 4 21:19:36 2019 +0100

Do not special case pointers in value_range_base::set.

diff --git a/gcc/ChangeLog b/gcc/ChangeLog
index 32af8bb3e9a..6fbbf87e294 100644
--- a/gcc/ChangeLog
+++ b/gcc/ChangeLog
@@ -1,3 +1,7 @@
+2019-11-04  Aldy Hernandez  
+
+	* tree-vrp.c (value_range_base::set): Do not special case pointers.
+
 2019-11-04  Aldy Hernandez  
 
 	* tree-vrp.h (vrp_val_min): Remove handle_pointers argument.
diff --git a/gcc/tree-vrp.c b/gcc/tree-vrp.c
index 0a0d7d760a7..452895bfc24 100644
--- a/gcc/tree-vrp.c
+++ b/gcc/tree-vrp.c
@@ -793,10 +793,8 @@ value_range_base::set (enum value_range_kind kind, tree min, tree max)
 {
   /* For -fstrict-enums we may receive out-of-range ranges so consider
  values < -INF and values > INF as -INF/INF as well.  */
-  bool is_min = (INTEGRAL_TYPE_P (type)
-		 && tree_int_cst_compare (min, TYPE_MIN_VALUE (type)) <= 0);
-  bool is_max = (INTEGRAL_TYPE_P (type)
-		 && tree_int_cst_compare (max, TYPE_MAX_VALUE (type)) >= 0);
+  bool is_min = vrp_val_is_min (min);
+  bool is_max = vrp_val_is_max (max);
 
   if (is_min && is_max)
 	{
@@ -816,10 +814,7 @@ value_range_base::set (enum value_range_kind kind, tree min, tree max)
 	min = max = vrp_val_min (TREE_TYPE (min));
 	  kind = VR_RANGE;
 	}
-  else if (is_min
-	   /* Allow non-zero pointers to be normalized to [1,MAX].  */
-	   || (POINTER_TYPE_P (TREE_TYPE (min))
-		   && integer_zerop (min)))
+  else if (is_min)
 {
 	  tree one = build_int_cst (TREE_TYPE (max), 1);
 	  min = int_const_binop (PLUS_EXPR, max, one);


do not special case pointers while canonicalizing ranges

2019-11-04 Thread Aldy Hernandez
There's no need to special case pointers when converting suitable 
VR_ANTI_RANGE's into VR_RANGE's now that vrp_val*{min, max} handle 
pointers by default.


OK?


[PATCH] avoid folding of invalid indices to compound literals (PR 92341)

2019-11-04 Thread Martin Sebor

While testing some other changes I noticed that -Warray-bounds
fails to detect out-of-bounds indices to compound literals such
as in:

  int *p = (int[]){ 1, 2, 3 };
  // ...
  p[3] = 7;

This is because SRA transforms such references into accesses to
uninitialized scalar variables and also sets the TREE_NO_WARNING
bit for the replacement variables.  This prevents -Wuninitialized
from detecting such bugs, although that wouldn't be the right
warning to issue in these cases).

The attached patch tweaks SRA to avoid this transformation when
the access is out of the bounds of the referenced variable.  That
in turn lets -Warray-bounds diagnose these invalid accesses.

The patch also adjusts -Warray-bounds to reference to correct
index and message and issue the warning even for zero-length
compound literal arrays.  This was exposed and the fix is relied
on by the test I wrote for the compound literals.

Finally, the change also corrects an oversight of mine from some
time ago in failing to handle out-of-bounds indices relative to
addresses of function parameters.  This is a trivial one-line
tweak that could be submitted separately but it doesn't seem
worth the overhead.

Tested on x86_64-linux.

Martin
PR middle-end/92341 - missing -Warray-bounds indexing past the end of a compound literal
PR middle-end/82612 - missing -Warray-bounds on a non-zero offset from the address of a non-array object

gcc/testsuite/ChangeLog:

	PR middle-end/92341
	PR middle-end/82612
	* gcc.dg/Warray-bounds-50.c: New test.
	* gcc.dg/Warray-bounds-51.c: New test.

gcc/ChangeLog:

	PR middle-end/92341
	PR middle-end/82612
	* tree-sra.c (get_access_for_expr): Fail for out-of-bounds offsets.
	* tree-vrp.c (vrp_prop::check_array_ref): Correct index and text
	of message printed in a warning for empty arrays.
	(vrp_prop::check_mem_ref): Also handle function parameters and
	empty arrays.

diff --git a/gcc/testsuite/gcc.dg/Warray-bounds-50.c b/gcc/testsuite/gcc.dg/Warray-bounds-50.c
new file mode 100644
index 000..c27e6a494f3
--- /dev/null
+++ b/gcc/testsuite/gcc.dg/Warray-bounds-50.c
@@ -0,0 +1,96 @@
+/* PR middle-end/92341 - missing -Warray-bounds indexing past the end
+   of a compound literal
+   { dg-do compile }
+   { dg-options "-O2 -Wall -ftrack-macro-expansion=0" } */
+
+#include "range.h"
+
+#define INT_MAX__INT_MAX__
+#define INT_MIN(-__INT_MAX__ - 1)
+
+void sink (int, ...);
+
+
+#define T(...) sink (__LINE__, (__VA_ARGS__))
+
+
+void direct_idx_cst (void)
+{
+  T ((int[]){ }[-1]);   // { dg-warning "array subscript -1 is outside array bounds of 'int\\\[0]'" }
+  T ((int[]){ }[0]);// { dg-warning "array subscript 0 is outside array bounds of 'int\\\[0]'" }
+  T ((int[]){ }[1]);// { dg-warning "array subscript 1 is outside array bounds of 'int\\\[0]'" }
+
+  T ((int[]){ 1 }[-1]); // { dg-warning "array subscript -1 is below array bounds of 'int\\\[1]'" }
+  T ((int[]){ 1 }[0]);
+  T ((int[]){ 1 }[1]);  // { dg-warning "array subscript 1 is above array bounds of 'int\\\[1]'" }
+  T ((int[]){ 1 }[INT_MIN]);// { dg-warning "array subscript -\[0-9\]+ is below array bounds of 'int\\\[1]'" }
+  T ((int[]){ 1 }[INT_MAX]);// { dg-warning "array subscript \[0-9\]+ is above array bounds of 'int\\\[1]'" }
+  T ((int[]){ 1 }[SIZE_MAX]);   // { dg-warning "array subscript \[0-9\]+ is above array bounds of 'int\\\[1]'" }
+}
+
+
+void direct_idx_var (int i)
+{
+  T ((char[]){ }[i]);   // { dg-warning "array subscript i is outside array bounds of 'char\\\[0]'" }
+  T ((int[]){ }[i]);// { dg-warning "array subscript i is outside array bounds of 'int\\\[0]'" }
+}
+
+
+void direct_idx_range (void)
+{
+  ptrdiff_t i = SR (-2, -1);
+
+  T ((int[]){ 1 }[i]);  // { dg-warning "array subscript \[ \n\r]+ is outside array bounds of 'int\\\[0]'" "pr?" { xfail *-*-* } }
+}
+
+
+#undef T
+#define T(idx, ...) do {			\
+int *p = (__VA_ARGS__);			\
+sink (p[idx]);\
+  } while (0)
+
+void ptr_idx_cst (void)
+{
+  T (-1, (int[]){ });   // { dg-warning "array subscript -1 is outside array bounds of 'int\\\[0]'" }
+  T ( 0, (int[]){ });   // { dg-warning "array subscript 0 is outside array bounds of 'int\\\[0]'" }
+  T (+1, (int[]){ });   // { dg-warning "array subscript 1 is outside array bounds of 'int\\\[0]'" }
+
+  T (-1, (int[]){ 1 }); // { dg-warning "array subscript -1 is outside array bounds of 'int\\\[1]'" }
+  T ( 0, (int[]){ 1 });
+  T (+1, (int[]){ 1 }); // { dg-warning "array subscript 1 is outside array bounds of 'int\\\[1]'" }
+  T (INT_MIN, (int[]){ 1 });// { dg-warning "array subscript -\[0-9\]+ is outside array bounds of 'int\\\[1]'" }
+  T (INT_MAX, (int[]){ 1 });// { dg-warning "array subscript \[0-9\]+ is outside array bounds of 'int\\\[1]'" }
+  T (SIZE_MAX, (int[]){ 1 });   // { dg-warning "array subscript -?\[0-9\]+ is outside array bounds of 'int\\\[1]'" }
+}
+
+
+void ptr_idx_var 

Re: always handle pointers in vrp_val*{min,max}

2019-11-04 Thread Andrew MacLeod

On 11/4/19 3:03 PM, Aldy Hernandez wrote:
When I added the range-ops code, I had to teach vrp_val_is*{min,max} 
about pointers, since it would just ignore them and return NULL. I was 
overly cautious about changing existing behavior and decided to add a 
handle_pointers argument, with a default value of FALSE, and just 
changed the needed callers on a need-to basis.


Little by little we've been changing every caller to TRUE, and it's 
becoming increasingly obvious that we should always handle pointers.  
I can't think of a reason why we wouldn't want to handle them, and if 
there is ever one, surely the caller can avoid calling these functions.


OK for trunk?
I thought it was weird initially anyway.. why wouldn't we handle 
pointers :-)   Furthermore, if we didn't want to handle them for some 
reason, the call site would be the place to deal with it.


Approved assuming no regressions.

Andrew



Re: [PATCH] Support multiple registers for the frame pointer

2019-11-04 Thread Georg-Johann Lay

Kwok Cheung Yeung schrieb:
The AMD GCN architecture uses 64-bit pointers, but the scalar registers 
are 32-bit wide, so pointers must reside in a pair of registers.


The two hard registers holding the frame pointer are currently fixed, 
but if they are changed to unfixed (so that the FP can be eliminated), 
GCC would sometimes allocate the second register to a pseudo while the 
frame pointer was in use, clobbering the value of the FP and crashing 
the program.


GCC currently does not handle multi-register hard frame pointers 
properly - no_unit_alloc_regs, regs_ever_live, eliminable_regset and 
ira_no_alloc_regs (which gets copied to lra_no_alloc_regs) are only set 
for HARD_FRAME_POINTER_REGNUM and not for any subsequent registers that 
may be used, which means that the register allocators consider 
HARD_FRAME_POINTER_REGNUM+1 free. This patch determines the number of 
registers needed to store the frame pointer using hard_regno_nregs, and 
sets the required variables for HARD_FRAME_POINTER_REGNUM and however 
many adjacent registers are needed (which on most architectures should 
be zero).


Bootstrapped on x86_64 and tested with no regressions, which is not 
surprising as nothing different happens when the FP fits into a single 
register. I believe this is true for the 64-bit variants of the more 
popular architectures as well (ARM, RS6000, MIPS, Sparc). Are there any 
other architectures similar to GCN (i.e. 64-bit pointers with 32-bit GPRs)?


If 16-bit pointers with 8-bit GPRs is similar enough: The avr port.

Johann

I have not included any specific testcases for this issue as it can 
affect pretty much everything not using -fomit-frame-pointer on AMD GCN.


Okay for trunk?

Kwok Yeung


Initialize global.statics_read in ipa-reference

2019-11-04 Thread Jan Hubicka
Hi,
while working on allocator reorg for summaries I noticed that
we never initialize statics_read in ipa-reference which is later used to
bookkeep the strongly connected components.  I am bit surprised that
this ever worked w/o crashing, but it seems to be the case.

Bootstrapped/regteted x86_64-linux, comitted.

Index: ipa-reference.c
===
--- ipa-reference.c (revision 277780)
+++ ipa-reference.c (working copy)
@@ -492,6 +493,7 @@ init_function_info (struct cgraph_node *
 
   info->local.statics_read = BITMAP_ALLOC (_info_obstack);
   info->local.statics_written = BITMAP_ALLOC (_info_obstack);
+  info->global.statics_read = NULL;
 
   return >local;
 }


[committed] do not create non-canonical ranges in value_range_base::invert

2019-11-04 Thread Aldy Hernandez
value_range_base::invert is twiddling the range kind in place.  You 
can't do that, because you may create non-canonical ranges.  Fixed by 
using the canonicalizing constructor.


Committed as obvious.
commit 7a1b10ff2446e3e7800a19e8970bfe57f894cda9
Author: Aldy Hernandez 
Date:   Mon Nov 4 21:15:47 2019 +0100

Use the value_range_base constructors in value_range_base::invert to
make sure we build canonically correct ranges.

diff --git a/gcc/ChangeLog b/gcc/ChangeLog
index c585360b537..6bdd5ffddbd 100644
--- a/gcc/ChangeLog
+++ b/gcc/ChangeLog
@@ -1,3 +1,8 @@
+2019-11-04  Aldy Hernandez  
+
+	* tree-vrp.c (value_range_base::invert): Use constructors to build
+	range.
+
 2019-11-04  Aldy Hernandez  
 
 	* tree-vrp.c (range_int_cst_singleton_p): Remove.
diff --git a/gcc/tree-vrp.c b/gcc/tree-vrp.c
index 070db903147..085308e519f 100644
--- a/gcc/tree-vrp.c
+++ b/gcc/tree-vrp.c
@@ -6286,10 +6286,12 @@ value_range_base::contains_p (tree cst) const
 void
 value_range_base::invert ()
 {
+  /* We can't just invert VR_RANGE and VR_ANTI_RANGE because we may
+ create non-canonical ranges.  Use the constructors instead.  */
   if (m_kind == VR_RANGE)
-m_kind = VR_ANTI_RANGE;
+*this = value_range_base (VR_ANTI_RANGE, m_min, m_max);
   else if (m_kind == VR_ANTI_RANGE)
-m_kind = VR_RANGE;
+*this = value_range_base (VR_RANGE, m_min, m_max);
   else
 gcc_unreachable ();
 }


Re: [PATCH V3] rs6000: Refine small loop unroll in loop_unroll_adjust hook

2019-11-04 Thread Segher Boessenkool
Hi!

On Mon, Nov 04, 2019 at 02:31:43PM +0800, Jiufu Guo wrote:
> In this patch, loop unroll adjust hook is introduced for powerpc.  We can do
> target related hueristic adjustment in this hook. In this patch, small loops
> is unrolled 2 times for O2 and O3 by default.  With this patch, we can see
> some improvement for spec2017.  This patch enhanced a little for [Patch V2] to
> enable small loops unroll for O3 by default like O2.

>   * gcc/config/rs6000/rs6000.opt (unroll_small_loops): New internal flag.

That's the declaration of a variable.  A command line flag is something
like -munroll-small-loops.  Do we want a command line option like that?
It makes testing simpler.

> -  /* unroll very small loops 2 time if no -funroll-loops.  */
> +  /* If funroll-loops is not enabled explicitly, then enable small loops
> +  unrolling for -O2, and do not turn fweb or frename-registers on.  */
>if (!global_options_set.x_flag_unroll_loops
> && !global_options_set.x_flag_unroll_all_loops)
>   {
> -   maybe_set_param_value (PARAM_MAX_UNROLL_TIMES, 2,
> -  global_options.x_param_values,
> -  global_options_set.x_param_values);
> -
> -   maybe_set_param_value (PARAM_MAX_UNROLLED_INSNS, 20,
> -  global_options.x_param_values,
> -  global_options_set.x_param_values);
> +   unroll_small_loops = optimize >= 2 ? 1 : 0;

That includes -Os?

I think you shouldn't always set it to some value, only enable it where
you want to enable it.  If you make a command line option for it this is
especially simple (the table in common/config/rs6000/rs6000-common.c).

> +static unsigned
> +rs6000_loop_unroll_adjust (unsigned nunroll, struct loop * loop)
> +{
> +  if (unroll_small_loops)
> +{
> +  /* TODO: This is hardcoded to 10 right now.  It can be refined, for
> +  example we may want to unroll very small loops more times (4 perhaps).
> +  We also should use a PARAM for this.  */
> +  if (loop->ninsns <= 10)
> + return MIN (2, nunroll);
> +  else
> + return 0;
> +}

(Add an empty line here?)

> +  return nunroll;
> +}

Great :-)

> @@ -23472,6 +23488,7 @@ rs6000_function_specific_save (struct 
> cl_target_option *ptr,
>  {
>ptr->x_rs6000_isa_flags = opts->x_rs6000_isa_flags;
>ptr->x_rs6000_isa_flags_explicit = opts->x_rs6000_isa_flags_explicit;
> +  ptr->x_unroll_small_loops = opts->x_unroll_small_loops;
>  }

Yeah we shouldn't need to add that, this should all be automatic.


Segher


Re: [PATCH, rs6000] Make load cost more in vectorization cost for P8/P9

2019-11-04 Thread Segher Boessenkool
Hi!

On Mon, Nov 04, 2019 at 03:16:06PM +0800, Kewen.Lin wrote:
> To align with rs6000_insn_cost costing more for load type insns,

(Which itself has history in rs6000_rtx_costs).

> this patch is to make load insns cost more in vectorization cost
> function.  Considering that the result of load usually is used
> somehow later (true-dep) but store won't, we keep the store as
> before.

The latency of load insns is about twice that of "simple" instructions;
2 vs. 1 on older cores, and 4 (or so) vs. 2 on newer cores.

> The SPEC2017 performance evaluation on Power8 shows 525.x264_r
> +9.56%, 511.povray_r +2.08%, 527.cam4_r 1.16% gains, no 
> significant degradation, SPECINT geomean +0.88%, SPECFP geomean
> +0.26%.

Nice :-)

> The SPEC2017 performance evaluation on Power9 shows no significant
> improvement or degradation, SPECINT geomean +0.04%, SPECFP geomean
> +0.04%.
> 
> The SPEC2006 performance evaluation on Power8 shows 454.calculix
> +4.41% gain but 416.gamess -1.19% and 453.povray -3.83% degradation.
> I looked into the two degradation bmks, the degradation were NOT
> due to hotspot changes by vectorization, were all side effects.
> SPECINT geomean +0.10%, SPECFP geomean no changed considering
> the degradation.

Also nice.

> --- a/gcc/config/rs6000/rs6000.c
> +++ b/gcc/config/rs6000/rs6000.c
> @@ -4763,15 +4763,22 @@ rs6000_builtin_vectorization_cost (enum 
> vect_cost_for_stmt type_of_cost,
>switch (type_of_cost)
>  {
>case scalar_stmt:
> -  case scalar_load:
>case scalar_store:
>case vector_stmt:
> -  case vector_load:
>case vector_store:
>case vec_to_scalar:
>case scalar_to_vec:
>case cond_branch_not_taken:
>  return 1;
> +  case scalar_load:
> +  case vector_load:
> + /* Like rs6000_insn_cost, make load insns cost a bit more. FIXME: the

(two spaces after full stop).

> +benefits were observed on Power8 and up, we can unify it if similar
> +profits are measured on Power6 and Power7.  */
> + if (TARGET_P8_VECTOR)
> +   return 2;
> + else
> +   return 1;

Hrm, but you showed benchmark improvements for p9 as well?

What happens if you enable this for everything as well?

> -return 2;
> + /* Like rs6000_insn_cost, make load insns cost a bit more. FIXME: the
> +benefits were observed on Power8 and up, we can unify it if similar
> +profits are measured on Power6 and Power7.  */
> + if (TARGET_P8_VECTOR)
> +   return 4;
> + else
> +   return 2;

And this.


Segher


always handle pointers in vrp_val*{min,max}

2019-11-04 Thread Aldy Hernandez
When I added the range-ops code, I had to teach vrp_val_is*{min,max} 
about pointers, since it would just ignore them and return NULL.  I was 
overly cautious about changing existing behavior and decided to add a 
handle_pointers argument, with a default value of FALSE, and just 
changed the needed callers on a need-to basis.


Little by little we've been changing every caller to TRUE, and it's 
becoming increasingly obvious that we should always handle pointers.  I 
can't think of a reason why we wouldn't want to handle them, and if 
there is ever one, surely the caller can avoid calling these functions.


OK for trunk?
commit 4c4ce7228754d847daa3b99e4ee0d4c466512d1a
Author: Aldy Hernandez 
Date:   Mon Nov 4 17:55:57 2019 +0100

Remove handle_pointers argument from all the vrp_val*{min,max} functions.  Always
assume pointers should be handled.

diff --git a/gcc/ChangeLog b/gcc/ChangeLog
index c585360b537..a9870475ea7 100644
--- a/gcc/ChangeLog
+++ b/gcc/ChangeLog
@@ -1,3 +1,26 @@
+2019-11-04  Aldy Hernandez  
+
+	* tree-vrp.h (vrp_val_min): Remove handle_pointers argument.
+	(vrp_val_max): Same.
+	(vrp_val_is_min): Same.
+	(vrp_val_is_max): Same.
+	(value_range_base::nonzero_p): Remove last argument to
+	vrp_val_is_max.
+	* tree-vrp.c (vrp_val_min): Remove handle_pointers argument.
+	(vrp_val_max): Same.
+	(vrp_val_is_min): Same.
+	(vrp_val_is_max): Same.
+	(value_range_base::set_varying): Remove last argument to vrp_val*.
+	(value_range_base::dump): Same.
+	(value_range_base::set): Same.
+	(value_range_base::normalize_symbolics): Same.
+	(value_range_base::num_pairs): Same.
+	(value_range_base::lower_bound): Same.
+	(value_range_base::upper_bound): Same.
+	(ranges_from_anti_range): Remove handle_pointers argument.
+	(value_range_base::singleton_p): Remove last argument to
+	ranges_from_anti_range.
+
 2019-11-04  Aldy Hernandez  
 
 	* tree-vrp.c (range_int_cst_singleton_p): Remove.
diff --git a/gcc/tree-vrp.c b/gcc/tree-vrp.c
index 070db903147..2d3e76af2a8 100644
--- a/gcc/tree-vrp.c
+++ b/gcc/tree-vrp.c
@@ -71,8 +71,7 @@ along with GCC; see the file COPYING3.  If not see
 
 static bool
 ranges_from_anti_range (const value_range_base *ar,
-			value_range_base *vr0, value_range_base *vr1,
-			bool handle_pointers = false);
+			value_range_base *vr0, value_range_base *vr1);
 
 /* Set of SSA names found live during the RPO traversal of the function
for still active basic-blocks.  */
@@ -310,8 +309,8 @@ value_range_base::set_varying (tree type)
   m_kind = VR_VARYING;
   if (supports_type_p (type))
 {
-  m_min = vrp_val_min (type, true);
-  m_max = vrp_val_max (type, true);
+  m_min = vrp_val_min (type);
+  m_max = vrp_val_max (type);
 }
   else
 /* We can't do anything range-wise with these types.  */
@@ -382,7 +381,7 @@ value_range_base::singleton_p (tree *result) const
   if (num_pairs () == 1)
 	{
 	  value_range_base vr0, vr1;
-	  ranges_from_anti_range (this, , , true);
+	  ranges_from_anti_range (this, , );
 	  return vr0.singleton_p (result);
 	}
 }
@@ -429,7 +428,7 @@ value_range_base::dump (FILE *file) const
   fprintf (file, ", ");
 
   if (supports_type_p (ttype)
-	  && vrp_val_is_max (max (), true)
+	  && vrp_val_is_max (max ())
 	  && TYPE_PRECISION (ttype) != 1)
 	fprintf (file, "+INF");
   else
@@ -574,11 +573,11 @@ static assert_locus **asserts_for;
 /* Return the maximum value for TYPE.  */
 
 tree
-vrp_val_max (const_tree type, bool handle_pointers)
+vrp_val_max (const_tree type)
 {
   if (INTEGRAL_TYPE_P (type))
 return TYPE_MAX_VALUE (type);
-  if (POINTER_TYPE_P (type) && handle_pointers)
+  if (POINTER_TYPE_P (type))
 {
   wide_int max = wi::max_value (TYPE_PRECISION (type), TYPE_SIGN (type));
   return wide_int_to_tree (const_cast (type), max);
@@ -589,11 +588,11 @@ vrp_val_max (const_tree type, bool handle_pointers)
 /* Return the minimum value for TYPE.  */
 
 tree
-vrp_val_min (const_tree type, bool handle_pointers)
+vrp_val_min (const_tree type)
 {
   if (INTEGRAL_TYPE_P (type))
 return TYPE_MIN_VALUE (type);
-  if (POINTER_TYPE_P (type) && handle_pointers)
+  if (POINTER_TYPE_P (type))
 return build_zero_cst (const_cast (type));
   return NULL_TREE;
 }
@@ -604,9 +603,9 @@ vrp_val_min (const_tree type, bool handle_pointers)
is not == to the integer constant with the same value in the type.  */
 
 bool
-vrp_val_is_max (const_tree val, bool handle_pointers)
+vrp_val_is_max (const_tree val)
 {
-  tree type_max = vrp_val_max (TREE_TYPE (val), handle_pointers);
+  tree type_max = vrp_val_max (TREE_TYPE (val));
   return (val == type_max
 	  || (type_max != NULL_TREE
 	  && operand_equal_p (val, type_max, 0)));
@@ -615,9 +614,9 @@ vrp_val_is_max (const_tree val, bool handle_pointers)
 /* Return whether VAL is equal to the minimum value of its type.  */
 
 bool
-vrp_val_is_min (const_tree val, bool handle_pointers)
+vrp_val_is_min (const_tree val)
 {
-  tree type_min = vrp_val_min (TREE_TYPE 

Re: [PATCH] Support multiple registers for the frame pointer

2019-11-04 Thread Dimitar Dimitrov
On Sat, 2 Nov 2019, 19:28:38 EET Kwok Cheung Yeung wrote:
> The AMD GCN architecture uses 64-bit pointers, but the scalar registers
> are 32-bit wide, so pointers must reside in a pair of registers.
...
> Bootstrapped on x86_64 and tested with no regressions, which is not
> surprising as nothing different happens when the FP fits into a single
> register. I believe this is true for the 64-bit variants of the more
> popular architectures as well (ARM, RS6000, MIPS, Sparc). Are there any
> other architectures similar to GCN (i.e. 64-bit pointers with 32-bit GPRs)?
Yes. PRU uses four 8-bit HW registers to hold 32-bit pointers. 

> 
...
> diff --git a/gcc/ira.c b/gcc/ira.c
> index 9f8da67..25e9359 100644
> --- a/gcc/ira.c
> +++ b/gcc/ira.c
> @@ -515,7 +515,13 @@ setup_alloc_regs (bool use_hard_frame_p)
>   #endif
> no_unit_alloc_regs = fixed_nonglobal_reg_set;
> if (! use_hard_frame_p)
> -SET_HARD_REG_BIT (no_unit_alloc_regs, HARD_FRAME_POINTER_REGNUM);
> +{
> +  int fp_reg_count = hard_regno_nregs (HARD_FRAME_POINTER_REGNUM,
> Pmode);
> +  for (int reg = HARD_FRAME_POINTER_REGNUM;
> +reg < HARD_FRAME_POINTER_REGNUM + fp_reg_count;
> +reg++)
> + SET_HARD_REG_BIT (no_unit_alloc_regs, reg);
> +}
Please consider using the existing helper function instead:
   add_to_hard_reg_set (_unit_alloc_regs, Pmode, reg);


Regards,
Dimitar




[committed] remove unused range_int_cst_singleton_p

2019-11-04 Thread Aldy Hernandez
This function is no longer being called.  The behavior in it is handled 
by the API (value_range_base::singleton_p).


Committed as obvious.

commit 9d22f224ec66efb0abf1394bff7e3d0080498417
Author: Aldy Hernandez 
Date:   Mon Nov 4 12:46:00 2019 +0100

Remove unused range_int_cst_singleton_p.

diff --git a/gcc/ChangeLog b/gcc/ChangeLog
index 407470f0ed6..548f2fca970 100644
--- a/gcc/ChangeLog
+++ b/gcc/ChangeLog
@@ -1,3 +1,8 @@
+2019-11-04  Aldy Hernandez  
+
+	* tree-vrp.c (range_int_cst_singleton_p): Remove.
+	* tree-vrp.h (range_int_cst_singleton_p): Remove.
+
 2019-11-04  Aldy Hernandez  
 
 	* tree-vrp.c (value_range_base::normalize_addresses): Handle
diff --git a/gcc/tree-vrp.c b/gcc/tree-vrp.c
index c0c1e87a259..070db903147 100644
--- a/gcc/tree-vrp.c
+++ b/gcc/tree-vrp.c
@@ -945,15 +945,6 @@ range_int_cst_p (const value_range_base *vr)
   return (vr->kind () == VR_RANGE && range_has_numeric_bounds_p (vr));
 }
 
-/* Return true if VR is a INTEGER_CST singleton.  */
-
-bool
-range_int_cst_singleton_p (const value_range_base *vr)
-{
-  return (range_int_cst_p (vr)
-	  && tree_int_cst_equal (vr->min (), vr->max ()));
-}
-
 /* Return the single symbol (an SSA_NAME) contained in T if any, or NULL_TREE
otherwise.  We only handle additive operations and set NEG to true if the
symbol is negated and INV to the invariant part, if any.  */
diff --git a/gcc/tree-vrp.h b/gcc/tree-vrp.h
index 4bfdfeb8f79..1fde88fe0fe 100644
--- a/gcc/tree-vrp.h
+++ b/gcc/tree-vrp.h
@@ -282,7 +282,6 @@ extern bool infer_value_range (gimple *, tree, tree_code *, tree *);
 extern bool vrp_bitmap_equal_p (const_bitmap, const_bitmap);
 
 extern bool range_int_cst_p (const value_range_base *);
-extern bool range_int_cst_singleton_p (const value_range_base *);
 
 extern int compare_values (tree, tree);
 extern int compare_values_warnv (tree, tree, bool *);


[committed] handle VR_UNDEFINED in normalize_addresses

2019-11-04 Thread Aldy Hernandez
We are currently ICEing when calling normalize_address on an undefined 
range because undefines do not have types.


Committed as obvious.
commit 12cdfbe483c8981ba724582c1faa1432e762b549
Author: Aldy Hernandez 
Date:   Mon Nov 4 12:16:37 2019 +0100

Handle VR_UNDEFINED in value_range_base::normalize_addresses().

diff --git a/gcc/ChangeLog b/gcc/ChangeLog
index d00348891bd..407470f0ed6 100644
--- a/gcc/ChangeLog
+++ b/gcc/ChangeLog
@@ -1,3 +1,8 @@
+2019-11-04  Aldy Hernandez  
+
+	* tree-vrp.c (value_range_base::normalize_addresses): Handle
+	VR_UNDEFINED.
+
 2019-11-04  Aldy Hernandez  
 
 	* tree-vrp.c (dump_assert_info): New.
diff --git a/gcc/tree-vrp.c b/gcc/tree-vrp.c
index 7c35802dacc..c0c1e87a259 100644
--- a/gcc/tree-vrp.c
+++ b/gcc/tree-vrp.c
@@ -6130,6 +6130,9 @@ value_range::union_ (const value_range *other)
 value_range_base
 value_range_base::normalize_addresses () const
 {
+  if (undefined_p ())
+return *this;
+
   if (!POINTER_TYPE_P (type ()) || range_has_numeric_bounds_p (this))
 return *this;
 


[committed] add debugging routines for assert_info structure

2019-11-04 Thread Aldy Hernandez
We have a way of dumping the asserts in the asserts_for on-the-side 
structure, but we have no way of dumping the assert_info structure that 
evrp uses.  Being able to dump these are quite useful in debugging.


Committed as obvious.
commit 9244efff3f7ccce390c70c83d9fc46cef6152bb4
Author: Aldy Hernandez 
Date:   Mon Nov 4 12:14:56 2019 +0100

Implement debugging functions for assert_info's.

diff --git a/gcc/ChangeLog b/gcc/ChangeLog
index f29939a0dd8..d00348891bd 100644
--- a/gcc/ChangeLog
+++ b/gcc/ChangeLog
@@ -1,3 +1,8 @@
+2019-11-04  Aldy Hernandez  
+
+	* tree-vrp.c (dump_assert_info): New.
+	(dump_asserts_info): New.
+
 2019-11-04  Tamar Christina  
 
 	* tree-vect-slp.c (vectorize_slp_instance_root_stmt): Initialize rstmt.
diff --git a/gcc/tree-vrp.c b/gcc/tree-vrp.c
index da6b6151b4a..7c35802dacc 100644
--- a/gcc/tree-vrp.c
+++ b/gcc/tree-vrp.c
@@ -2114,6 +2114,45 @@ debug_all_asserts (void)
   dump_all_asserts (stderr);
 }
 
+/* Dump assert_info structure.  */
+
+void
+dump_assert_info (FILE *file, const assert_info )
+{
+  fprintf (file, "Assert for: ");
+  print_generic_expr (file, assert.name);
+  fprintf (file, "\n\tPREDICATE: expr=[");
+  print_generic_expr (file, assert.expr);
+  fprintf (file, "] %s ", get_tree_code_name (assert.comp_code));
+  fprintf (file, "val=[");
+  print_generic_expr (file, assert.val);
+  fprintf (file, "]\n\n");
+}
+
+DEBUG_FUNCTION void
+debug (const assert_info )
+{
+  dump_assert_info (stderr, assert);
+}
+
+/* Dump a vector of assert_info's.  */
+
+void
+dump_asserts_info (FILE *file, const vec )
+{
+  for (unsigned i = 0; i < asserts.length (); ++i)
+{
+  dump_assert_info (file, asserts[i]);
+  fprintf (file, "\n");
+}
+}
+
+DEBUG_FUNCTION void
+debug (const vec )
+{
+  dump_asserts_info (stderr, asserts);
+}
+
 /* Push the assert info for NAME, EXPR, COMP_CODE and VAL to ASSERTS.  */
 
 static void


Avoid trashing of polymorphic call cache during inlining

2019-11-04 Thread Jan Hubicka
Hi,
I am not really pround of this implementation (and will think of better
interface), but this patch saves about 10% of WPA time by avoiding
unnecesary invalidations of the polymorphic call target hash during
inlining.

ipa-devirt register node removal hook to invalidate cache when one of
functions in it gets removed.  Now inliner often decides to inline into
a thunk. In order to get costs right it turns the thunk into a gimple
functions and re-inserts it into the summaries (so the summaries gets
computed for the actual thunk body). 

Bootstrapped/regtested x86_64-linux, comitted.

* ipa-inline-transform.c: Include ipa-utils.h
(inline_call): Set thunk_expansion flag.
* ipa-utils.h (thunk_expansion): Declare.
* ipa-devirt.c (thunk_expansion): New global var.
(devirt_node_removal_hook): Do not invalidate cache while
doing thunk expansion.
Index: ipa-inline-transform.c
===
--- ipa-inline-transform.c  (revision 277780)
+++ ipa-inline-transform.c  (working copy)
@@ -47,6 +47,7 @@ along with GCC; see the file COPYING3.
 #include "function.h"
 #include "cfg.h"
 #include "basic-block.h"
+#include "ipa-utils.h"
 
 int ncalls_inlined;
 int nfunctions_inlined;
@@ -352,6 +353,7 @@ inline_call (struct cgraph_edge *e, bool
   if (to->thunk.thunk_p)
 {
   struct cgraph_node *target = to->callees->callee;
+  thunk_expansion = true;
   symtab->call_cgraph_removal_hooks (to);
   if (in_lto_p)
to->get_untransformed_body ();
@@ -360,6 +362,7 @@ inline_call (struct cgraph_edge *e, bool
   for (e = to->callees; e && e->callee != target; e = e->next_callee)
;
   symtab->call_cgraph_insertion_hooks (to);
+  thunk_expansion = false;
   gcc_assert (e);
 }
 
Index: ipa-utils.h
===
--- ipa-utils.h (revision 277780)
+++ ipa-utils.h (working copy)
@@ -47,6 +47,9 @@ void ipa_merge_profiles (struct cgraph_n
 struct cgraph_node *src, bool preserve_body = false);
 bool recursive_call_p (tree, tree);
 
+/* In ipa-prop.c  */
+void ipa_remove_useless_jump_functions ();
+
 /* In ipa-profile.c  */
 bool ipa_propagate_frequency (struct cgraph_node *node);
 
@@ -54,6 +57,7 @@ bool ipa_propagate_frequency (struct cgr
 
 struct odr_type_d;
 typedef odr_type_d *odr_type;
+extern bool thunk_expansion;
 void build_type_inheritance_graph (void);
 void rebuild_type_inheritance_graph (void);
 void update_type_inheritance_graph (void);
Index: ipa-devirt.c
===
--- ipa-devirt.c(revision 277780)
+++ ipa-devirt.c(working copy)
@@ -172,6 +172,11 @@ struct default_hash_traits 
 }
 };
 
+/* HACK alert: this is used to communicate with ipa-inline-transform that
+   thunk is being expanded and there is no need to clear the polymorphic
+   call target cache.  */
+bool thunk_expansion;
+
 static bool odr_types_equivalent_p (tree, tree, bool, bool *,
hash_set *,
location_t, location_t);
@@ -2747,6 +2752,7 @@ static void
 devirt_node_removal_hook (struct cgraph_node *n, void *d ATTRIBUTE_UNUSED)
 {
   if (cached_polymorphic_call_targets
+  && !thunk_expansion
   && cached_polymorphic_call_targets->contains (n))
 free_polymorphic_call_targets_hash ();
 }


[committed][middle-end][SLP] Initialize variable to fix bootstrap after r277784.

2019-11-04 Thread Tamar Christina
Hi All,

This initializes the rstmt variable with NULL and adds an assert to
check that a value has been given by one of the if cases before use.

This fixes the bootstrap failure due to -Werror.

Bootstrapped on aarch64-none-linux-gnu and no issues.

Committed under the gcc obvious rule.

gcc/ChangeLog:

2019-11-04  Tamar Christina  

* tree-vect-slp.c (vectorize_slp_instance_root_stmt): Initialize rstmt.

-- 
diff --git a/gcc/tree-vect-slp.c b/gcc/tree-vect-slp.c
index 5549d053bf3cd102a4c4fcdc2e890c596927bd55..f4b445ac1ef9cff8280964dcc8937b3b74fe2a7c 100644
--- a/gcc/tree-vect-slp.c
+++ b/gcc/tree-vect-slp.c
@@ -4149,7 +4149,7 @@ vect_remove_slp_scalar_calls (slp_tree node)
 void
 vectorize_slp_instance_root_stmt (slp_tree node, slp_instance instance)
 {
-  gassign *rstmt;
+  gassign *rstmt = NULL;
 
   if (SLP_TREE_NUMBER_OF_VEC_STMTS (node) == 1)
 {
@@ -4183,6 +4183,9 @@ vectorize_slp_instance_root_stmt (slp_tree node, slp_instance instance)
   tree r_constructor = build_constructor (rtype, v);
   rstmt = gimple_build_assign (lhs, r_constructor);
 }
+
+gcc_assert (rstmt);
+
 gimple_stmt_iterator rgsi = gsi_for_stmt (instance->root_stmt->stmt);
 gsi_replace (, rstmt, true);
 }



Re: Optimize handling of inline summaries

2019-11-04 Thread Jan Hubicka
> On 11/4/19 3:12 PM, Jan Hubicka wrote:
> > Martin, do you know why this flag was introduced?
> 
> Hi.
> 
> The flag is used in IPA CP:
> 
> call_summary 
> 
> class edge_clone_summary
> {
> ...
>   cgraph_edge *prev_clone;
>   cgraph_edge *next_clone;
> }

I see, so it is there to collect chains of duplications. I suppose it
makes sense even though it is bit unexpected use of summaries (I suppose
I approved it :)

In this case we want to more know that something was duplicated and
trigger creation. There are other cases where we do not want to
duplicate in all siutations (like when inline clone is created).
I was wondering about adding duplicate_p function which will by default
return true if source summary exists and which one can overwrite with
different behaviour.  What do you think?

Honza


Fix PR testsuite/92302

2019-11-04 Thread Eric Botcazou
This makes the test more robust by relaxing the regexp.

Tested on SPARC/Solaris 11, applied on the mainline.


2019-11-04  Eric Botcazou  

PR testsuite/92302
* gcc.target/sparc/sparc-ret-3.c: Accept more registers in address.

-- 
Eric BotcazouIndex: gcc.target/sparc/sparc-ret-3.c
===
--- gcc.target/sparc/sparc-ret-3.c	(revision 277436)
+++ gcc.target/sparc/sparc-ret-3.c	(working copy)
@@ -50,4 +50,4 @@ unsigned int bug(unsigned int crc, const
 
 	return *ctx;
 }
-/* { dg-final { scan-assembler "ld\[ \t\]*\\\[%i5\\+8\\\], %i0\n\[^\n\]*return\[ \t\]*%i7\\+8" } } */
+/* { dg-final { scan-assembler "ld\[ \t\]*\\\[%\[a-z0-9\]*\\+\[0-9\]*\\\], %i0\n\[^\n\]*return\[ \t\]*%i7\\+8" } } */


[PATCH] avoid assuming struct, memebers have constant sizes (PR 92349)

2019-11-04 Thread Martin Sebor

The recent -Warray-bounds enhancement to detect past-the-end
accesses to struct members has introduced the assumption that
struct members have constant sizes.  That assumption is not
safe for the GCC VLA member extension.  The attached change
removes the assumption without attempting to handle past-
the-end accesses to such VLA members.

Tested on x86_64-linux and committed in r277786.

Martin
PR tree-optimization/92349 - ICE in -Warray-bounds of a VLA member

gcc/testsuite/ChangeLog:

	PR tree-optimization/92349
	* gcc.dg/Warray-bounds-50.c: New test.

gcc/ChangeLog:

	PR tree-optimization/92349
	* tree-vrp.c (vrp_prop::check_array_ref): Avoid assuming struct
	memebers have constant sizes.

Index: gcc/tree-vrp.c
===
--- gcc/tree-vrp.c	(revision 277783)
+++ gcc/tree-vrp.c	(working copy)
@@ -4164,7 +4164,8 @@ vrp_prop::check_array_ref (location_t location, tr
 	  /* Try to determine the size of the trailing array from
 		 its initializer (if it has one).  */
 	  if (tree refsize = component_ref_size (arg, _zero_len))
-		maxbound = refsize;
+		if (TREE_CODE (refsize) == INTEGER_CST)
+		  maxbound = refsize;
 	}
 
 	  if (maxbound == ptrdiff_max
Index: gcc/testsuite/gcc.dg/Warray-bounds-50.c
===
--- gcc/testsuite/gcc.dg/Warray-bounds-50.c	(nonexistent)
+++ gcc/testsuite/gcc.dg/Warray-bounds-50.c	(working copy)
@@ -0,0 +1,114 @@
+/* PR middle-end/92349 - ICE in -Warray-bounds on a VLA member
+   { dg-do compile }
+   { dg-options "-O2 -Wall" } */
+
+typedef __SIZE_TYPE__ size_t;
+
+void sink (void*, ...);
+
+void mem_vla_cst_store_idx (void)
+{
+  int n = 3;
+
+  struct {
+char a[n], b;
+  } s;
+
+  char *p = s.a;
+
+  s.a[0] = 0;
+  s.b = 0;
+
+  *++p = 1;
+  *++p = 2;
+
+  sink (, p);
+}
+
+void mem_vla_range_store_idx (int n)
+{
+  if (n < 3 || 4 < n)
+n = 3;
+
+  struct {
+char a[n], b;
+  } s;
+
+  char *p = s.a;
+
+  s.a[0] = 0;
+  s.b = 0;
+
+  *++p = 1;
+  *++p = 2;
+
+  sink (, p);
+}
+
+void mem_vla_var_store_idx (size_t n)
+{
+  struct {
+char a[n], b;
+  } s;
+
+  char *p = s.a;
+
+  s.a[0] = 0;
+  s.b = 0;
+
+  *++p = 1;
+  *++p = 2;
+
+  sink (, p);
+}
+
+
+void mem_vla_cst_store_ptr (void)
+{
+  int n = 3;
+
+  struct {
+char a[n], b;
+  } s;
+
+  char *p = s.a;
+
+  *p++ = __LINE__;
+  *p++ = __LINE__;
+  *p++ = __LINE__;
+
+  sink (, p);
+}
+
+void mem_vla_range_store_ptr (int n)
+{
+  if (n < 3 || 4 < n)
+n = 3;
+
+  struct {
+char a[n], b;
+  } s;
+
+  char *p = s.a;
+
+  *p++ = __LINE__;
+  *p++ = __LINE__;
+  *p++ = __LINE__;
+
+  sink (, p);
+}
+
+void mem_vla_var_store_ptr (size_t n)
+{
+  struct {
+char a[n], b;
+  } s;
+
+  char *p = s.a;
+
+  *p++ = __LINE__;
+  *p++ = __LINE__;
+  *p++ = __LINE__;
+
+  sink (, p);
+}


[PATCH][vect] Disable vectorization of epilogues for loops with SIMDUID set

2019-11-04 Thread Andre Vieira (lists)

Hi,

I was using loop->simdlen to detect whether it was a SIMD loop and I 
don't believe that was correct, as can be witnessed by the mass failures 
in libgomp. My apologies for not running this, didn't think of it!


I found that these were failing because we do not handle vectorization 
of epilogues correctly when SIMDUID is set. For now Jakub and I agreed 
to disable epilogue vectorization for loops where SIMDUID is set until 
we have fixed this. See further comments inline.


I bootstrapped it on aarch64 and x86_64, ran libgomp on both.

This OK for trunk?

Cheers,
Andre

gcc/ChangeLog:
2019-11-04  Andre Vieira  

* tree-vect-loop.c (vect_analyze_loop): Disable epilogue
vectorization if loop->simduid is non null.

On 31/10/2019 16:58, Jakub Jelinek wrote:


FAIL: libgomp.c/../libgomp.c-c++-common/loop-1.c execution test
FAIL: libgomp.c/examples-4/simd-3.c execution test
FAIL: libgomp.c/pr58392.c execution test
FAIL: libgomp.c/scan-13.c execution test
FAIL: libgomp.c/scan-17.c execution test
FAIL: libgomp.c/scan-19.c execution test
FAIL: libgomp.c/scan-20.c execution test
FAIL: libgomp.c/simd-10.c execution test
FAIL: libgomp.c/simd-12.c execution test
FAIL: libgomp.c/simd-13.c execution test
FAIL: libgomp.c/simd-6.c execution test
FAIL: libgomp.c++/../libgomp.c-c++-common/loop-1.c execution test
FAIL: libgomp.c++/simd-8.C execution test
FAIL: libgomp.fortran/examples-4/simd-3.f90   -O1  execution test
FAIL: libgomp.fortran/examples-4/simd-3.f90   -O2  execution test
FAIL: libgomp.fortran/examples-4/simd-3.f90   -O3 -fomit-frame-pointer 
-funroll-loops -fpeel-loops -ftracer -finline-functions  execution test
FAIL: libgomp.fortran/examples-4/simd-3.f90   -O3 -g  execution test
FAIL: libgomp.fortran/examples-4/simd-3.f90   -Os  execution test
FAIL: libgomp.fortran/nestedfn5.f90   -O1  execution test
FAIL: libgomp.fortran/nestedfn5.f90   -O2  execution test
FAIL: libgomp.fortran/nestedfn5.f90   -O3 -fomit-frame-pointer -funroll-loops 
-fpeel-loops -ftracer -finline-functions  execution test
FAIL: libgomp.fortran/nestedfn5.f90   -O3 -g  execution test
FAIL: libgomp.fortran/nestedfn5.f90   -Os  execution test


These should go away now, but we should revisit SIMDUID and epilogue 
vectorization later.  I tried to look into it, but I am afraid I know 
very little about SIMD loops to figure out how to make this work.


On i686-linux, I also see newly
FAIL: gcc.dg/vect/vect-epilogues.c -flto -ffat-lto-objects  scan-tree-dump vect 
"LOOP EPILOGUE VECTORIZED"
FAIL: gcc.dg/vect/vect-epilogues.c scan-tree-dump vect "LOOP EPILOGUE 
VECTORIZED"
and in libgomp just


These, just like for arm should be skipped for i686, I suspect it 
doesn't know how to vectorize for lower VF's.  Could someone add the 
appropriate skip target triple for i686?



FAIL: libgomp.c/examples-4/simd-3.c execution test
FAIL: libgomp.c/scan-13.c execution test
FAIL: libgomp.c/scan-17.c execution test
FAIL: libgomp.fortran/examples-4/simd-3.f90   -O1  execution test
FAIL: libgomp.fortran/examples-4/simd-3.f90   -O2  execution test
FAIL: libgomp.fortran/examples-4/simd-3.f90   -O3 -fomit-frame-pointer 
-funroll-loops -fpeel-loops -ftracer -finline-functions  execution test
FAIL: libgomp.fortran/examples-4/simd-3.f90   -O3 -g  execution test
FAIL: libgomp.fortran/examples-4/simd-3.f90   -Os  execution test


Same as the other libgomp tests. Should go away now.

diff --git a/gcc/tree-vect-loop.c b/gcc/tree-vect-loop.c
index fa873e9b435037e5a81dda6615cab809d2d4de48..d3a0fa015332dbcccf84bc68531dc1e49550cc19 100644
--- a/gcc/tree-vect-loop.c
+++ b/gcc/tree-vect-loop.c
@@ -2383,10 +2383,11 @@ vect_analyze_loop (class loop *loop, loop_vec_info orig_loop_vinfo,
   poly_uint64 lowest_th = 0;
   unsigned vectorized_loops = 0;
 
-  /* Only vectorize epilogues if PARAM_VECT_EPILOGUES_NOMASK is enabled, this
- is not a simd loop and it is the most inner loop.  */
+  /* Only vectorize epilogues if PARAM_VECT_EPILOGUES_NOMASK is enabled,
+ SIMDLEN is not set, SIMDUID is not set and this is the most inner loop.
+ TODO: Enable epilogue vectorization for loops with SIMDUID set.  */
   bool vect_epilogues
-= !loop->simdlen && loop->inner == NULL
+= loop->simdlen == 0 && loop->simduid == NULL_TREE && loop->inner == NULL
   && PARAM_VALUE (PARAM_VECT_EPILOGUES_NOMASK);
   while (1)
 {


Re: [PR47785] COLLECT_AS_OPTIONS

2019-11-04 Thread H.J. Lu
On Sun, Nov 3, 2019 at 6:45 PM Kugan Vivekanandarajah
 wrote:
>
> Thanks for the reviews.
>
>
> On Sat, 2 Nov 2019 at 02:49, H.J. Lu  wrote:
> >
> > On Thu, Oct 31, 2019 at 6:33 PM Kugan Vivekanandarajah
> >  wrote:
> > >
> > > On Wed, 30 Oct 2019 at 03:11, H.J. Lu  wrote:
> > > >
> > > > On Sun, Oct 27, 2019 at 6:33 PM Kugan Vivekanandarajah
> > > >  wrote:
> > > > >
> > > > > Hi Richard,
> > > > >
> > > > > Thanks for the review.
> > > > >
> > > > > On Wed, 23 Oct 2019 at 23:07, Richard Biener 
> > > > >  wrote:
> > > > > >
> > > > > > On Mon, Oct 21, 2019 at 10:04 AM Kugan Vivekanandarajah
> > > > > >  wrote:
> > > > > > >
> > > > > > > Hi Richard,
> > > > > > >
> > > > > > > Thanks for the pointers.
> > > > > > >
> > > > > > >
> > > > > > >
> > > > > > > On Fri, 11 Oct 2019 at 22:33, Richard Biener 
> > > > > > >  wrote:
> > > > > > > >
> > > > > > > > On Fri, Oct 11, 2019 at 6:15 AM Kugan Vivekanandarajah
> > > > > > > >  wrote:
> > > > > > > > >
> > > > > > > > > Hi Richard,
> > > > > > > > > Thanks for the review.
> > > > > > > > >
> > > > > > > > > On Wed, 2 Oct 2019 at 20:41, Richard Biener 
> > > > > > > > >  wrote:
> > > > > > > > > >
> > > > > > > > > > On Wed, Oct 2, 2019 at 10:39 AM Kugan Vivekanandarajah
> > > > > > > > > >  wrote:
> > > > > > > > > > >
> > > > > > > > > > > Hi,
> > > > > > > > > > >
> > > > > > > > > > > As mentioned in the PR, attached patch adds 
> > > > > > > > > > > COLLECT_AS_OPTIONS for
> > > > > > > > > > > passing assembler options specified with -Wa, to the 
> > > > > > > > > > > link-time driver.
> > > > > > > > > > >
> > > > > > > > > > > The proposed solution only works for uniform -Wa options 
> > > > > > > > > > > across all
> > > > > > > > > > > TUs. As mentioned by Richard Biener, supporting 
> > > > > > > > > > > non-uniform -Wa flags
> > > > > > > > > > > would require either adjusting partitioning according to 
> > > > > > > > > > > flags or
> > > > > > > > > > > emitting multiple object files  from a single LTRANS CU. 
> > > > > > > > > > > We could
> > > > > > > > > > > consider this as a follow up.
> > > > > > > > > > >
> > > > > > > > > > > Bootstrapped and regression tests on  arm-linux-gcc. Is 
> > > > > > > > > > > this OK for trunk?
> > > > > > > > > >
> > > > > > > > > > While it works for your simple cases it is unlikely to work 
> > > > > > > > > > in practice since
> > > > > > > > > > your implementation needs the assembler options be present 
> > > > > > > > > > at the link
> > > > > > > > > > command line.  I agree that this might be the way for 
> > > > > > > > > > people to go when
> > > > > > > > > > they face the issue but then it needs to be documented 
> > > > > > > > > > somewhere
> > > > > > > > > > in the manual.
> > > > > > > > > >
> > > > > > > > > > That is, with COLLECT_AS_OPTION (why singular?  I'd expected
> > > > > > > > > > COLLECT_AS_OPTIONS) available to cc1 we could stream this 
> > > > > > > > > > string
> > > > > > > > > > to lto_options and re-materialize it at link time (and 
> > > > > > > > > > diagnose mismatches
> > > > > > > > > > even if we like).
> > > > > > > > > OK. I will try to implement this. So the idea is if we provide
> > > > > > > > > -Wa,options as part of the lto compile, this should be 
> > > > > > > > > available
> > > > > > > > > during link time. Like in:
> > > > > > > > >
> > > > > > > > > arm-linux-gnueabihf-gcc -march=armv7-a -mthumb -O2 -flto
> > > > > > > > > -Wa,-mimplicit-it=always,-mthumb -c test.c
> > > > > > > > > arm-linux-gnueabihf-gcc  -flto  test.o
> > > > > > > > >
> > > > > > > > > I am not sure where should we stream this. Currently, 
> > > > > > > > > cl_optimization
> > > > > > > > > has all the optimization flag provided for compiler and it is
> > > > > > > > > autogenerated and all the flags are integer values. Do you 
> > > > > > > > > have any
> > > > > > > > > preference or example where this should be done.
> > > > > > > >
> > > > > > > > In lto_write_options, I'd simply append the contents of 
> > > > > > > > COLLECT_AS_OPTIONS
> > > > > > > > (with -Wa, prepended to each of them), then recover them in 
> > > > > > > > lto-wrapper
> > > > > > > > for each TU and pass them down to the LTRANS compiles (if they 
> > > > > > > > agree
> > > > > > > > for all TUs, otherwise I'd warn and drop them).
> > > > > > >
> > > > > > > Attached patch streams it and also make sure that the options are 
> > > > > > > the
> > > > > > > same for all the TUs. Maybe it is a bit restrictive.
> > > > > > >
> > > > > > > What is the best place to document COLLECT_AS_OPTIONS. We don't 
> > > > > > > seem
> > > > > > > to document COLLECT_GCC_OPTIONS anywhere ?
> > > > > >
> > > > > > Nowhere, it's an implementation detail then.
> > > > > >
> > > > > > > Attached patch passes regression and also fixes the original ARM
> > > > > > > kernel build issue with tumb2.
> > > > > >
> > > > > > Did you try this with multiple assembler options?  I see you stream
> > > > > > them as -Wa,-mfpu=xyz,-mthumb 

Re: [PATCH 1/2] [ARM,testsuite] Skip tests incompatible with -mpure-code

2019-11-04 Thread Kyrill Tkachov

Hi Christophe,

On 10/18/19 2:18 PM, Christophe Lyon wrote:

Hi,

All these tests fail when using -mpure-code:
* some force A or R profile
* some use Neon
* some use -fpic/-fPIC
all of which are not supported by this option.

OK?



Hmm... I'm tempted to ask if it would be simpler to add a check for 
-mpure-code in the effective target checks for the above features but a 
lot of those tests don't use effective target checks consistently anyway...


So this is ok, though I'm not a big fan of adding so many dg-skip-if 
directives.


Thanks,

Kyrill




Thanks,

Christophe


Re: [PATCH, GCC/ARM, 2/10] Add command line support for Armv8.1-M Mainline

2019-11-04 Thread Kyrill Tkachov

Hi Mihail,

On 10/23/19 10:26 AM, Mihail Ionescu wrote:

[PATCH, GCC/ARM, 2/10] Add command line support

Hi,

=== Context ===

This patch is part of a patch series to add support for Armv8.1-M
Mainline Security Extensions architecture. Its purpose is to add
command-line support for that new architecture.

=== Patch description ===

Besides the expected enabling of the new value for the -march
command-line option (-march=armv8.1-m.main) and its extensions (see
below), this patch disables support of the Security Extensions for this
newly added architecture. This is done both by not including the cmse
bit in the architecture description and by throwing an error message
when user request Armv8.1-M Mainline Security Extensions. Note that
Armv8-M Baseline and Mainline Security Extensions are still enabled.

Only extensions for already supported instructions are implemented in
this patch. Other extensions (MVE integer and float) will be added in
separate patches. The following configurations are allowed for Armv8.1-M
Mainline with regards to FPU and implemented in this patch:
+ no FPU (+nofp)
+ single precision VFPv5 with FP16 (+fp)
+ double precision VFPv5 with FP16 (+fp.dp)

ChangeLog entry are as follow:

*** gcc/ChangeLog ***

2019-10-23  Mihail-Calin Ionescu 
2019-10-23  Thomas Preud'homme 

    * config/arm/arm-cpus.in (armv8_1m_main): New feature.
    (ARMv4, ARMv4t, ARMv5t, ARMv5te, ARMv5tej, ARMv6, ARMv6j, ARMv6k,
    ARMv6z, ARMv6kz, ARMv6zk, ARMv6t2, ARMv6m, ARMv7, ARMv7a, ARMv7ve,
    ARMv7r, ARMv7m, ARMv7em, ARMv8a, ARMv8_1a, ARMv8_2a, ARMv8_3a,
    ARMv8_4a, ARMv8_5a, ARMv8m_base, ARMv8m_main, ARMv8r): Reindent.
    (ARMv8_1m_main): New feature group.
    (armv8.1-m.main): New architecture.
    * config/arm/arm-tables.opt: Regenerate.
    * config/arm/arm.c (arm_arch8_1m_main): Define and default 
initialize.

    (arm_option_reconfigure_globals): Initialize arm_arch8_1m_main.
    (arm_options_perform_arch_sanity_checks): Error out when targeting
    Armv8.1-M Mainline Security Extensions.
    * config/arm/arm.h (arm_arch8_1m_main): Declare.

*** gcc/testsuite/ChangeLog ***

2019-10-23  Mihail-Calin Ionescu 
2019-10-23  Thomas Preud'homme 

    * lib/target-supports.exp
    (check_effective_target_arm_arch_v8_1m_main_ok): Define.
    (add_options_for_arm_arch_v8_1m_main): Likewise.
(check_effective_target_arm_arch_v8_1m_main_multilib): Likewise.

Testing: bootstrapped on arm-linux-gnueabihf and arm-none-eabi; testsuite
shows no regression.

Is this ok for trunk?


Ok.

Thanks,

Kyrill



Best regards,

Mihail


### Attachment also inlined for ease of reply    
###



diff --git a/gcc/config/arm/arm-cpus.in b/gcc/config/arm/arm-cpus.in
index 
f8a3b3db67a537163bfe787d78c8f2edc4253ab3..652f2a4be9388fd7a74f0ec4615a292fd1cfcd36 
100644

--- a/gcc/config/arm/arm-cpus.in
+++ b/gcc/config/arm/arm-cpus.in
@@ -126,6 +126,9 @@ define feature armv8_5
 # M-Profile security extensions.
 define feature cmse

+# Architecture rel 8.1-M.
+define feature armv8_1m_main
+
 # Floating point and Neon extensions.
 # VFPv1 is not supported in GCC.

@@ -223,21 +226,21 @@ define fgroup ALL_FPU_INTERNAL vfpv2 vfpv3 vfpv4 
fpv5 fp16conv fp_dbl ALL_SIMD_I

 # -mfpu support.
 define fgroup ALL_FP    fp16 ALL_FPU_INTERNAL

-define fgroup ARMv4   armv4 notm
-define fgroup ARMv4t  ARMv4 thumb
-define fgroup ARMv5t  ARMv4t armv5t
-define fgroup ARMv5te ARMv5t armv5te
-define fgroup ARMv5tej    ARMv5te
-define fgroup ARMv6   ARMv5te armv6 be8
-define fgroup ARMv6j  ARMv6
-define fgroup ARMv6k  ARMv6 armv6k
-define fgroup ARMv6z  ARMv6
-define fgroup ARMv6kz ARMv6k quirk_armv6kz
-define fgroup ARMv6zk ARMv6k
-define fgroup ARMv6t2 ARMv6 thumb2
+define fgroup ARMv4 armv4 notm
+define fgroup ARMv4t    ARMv4 thumb
+define fgroup ARMv5t    ARMv4t armv5t
+define fgroup ARMv5te   ARMv5t armv5te
+define fgroup ARMv5tej  ARMv5te
+define fgroup ARMv6 ARMv5te armv6 be8
+define fgroup ARMv6j    ARMv6
+define fgroup ARMv6k    ARMv6 armv6k
+define fgroup ARMv6z    ARMv6
+define fgroup ARMv6kz   ARMv6k quirk_armv6kz
+define fgroup ARMv6zk   ARMv6k
+define fgroup ARMv6t2   ARMv6 thumb2
 # This is suspect.  ARMv6-m doesn't really pull in any useful features
 # from ARMv5* or ARMv6.
-define fgroup ARMv6m  armv4 thumb armv5t armv5te armv6 be8
+define fgroup ARMv6m    armv4 thumb armv5t armv5te armv6 be8
 # This is suspect, the 'common' ARMv7 subset excludes the thumb2 
'DSP' and

 # integer SIMD instructions that are in ARMv6T2.  */
 define fgroup ARMv7   ARMv6m thumb2 armv7
@@ -256,6 +259,10 @@ define fgroup ARMv8_5a    ARMv8_4a armv8_5 sb predres
 define fgroup ARMv8m_base ARMv6m armv8 cmse tdiv
 define fgroup ARMv8m_main ARMv7m armv8 cmse
 define fgroup ARMv8r  ARMv8a
+# Feature cmse is omitted to disable Security Extensions support 
while 

Re: [PATCH, GCC/ARM, 1/10] Fix -mcmse check in libgcc

2019-11-04 Thread Kyrill Tkachov

Hi Mihail,

On 10/23/19 10:26 AM, Mihail Ionescu wrote:

[PATCH, GCC/ARM, 1/10] Fix -mcmse check in libgcc

Hi,

=== Context ===

This patch is part of a patch series to add support for Armv8.1-M
Mainline Security Extensions architecture. Its purpose is to fix the
check to determine whether -mcmse is supported by the host compiler.

=== Patch description ===

Code to detect whether cmse.c can be buit with -mcmse checks the output
of host GCC when invoked with -mcmse. However, an error from the
compiler does not prevent some minimal output so this always holds true.

This does not affect currently supported architectures since the test is
guarded by __ARM_FEATURE_CMSE which is only defined for Armv8-M Baseline
and Mainline and these two architectures accept -mcmse.

However, in the intermediate patches adding support for Armv8.1-M
Mainline, support for Security Extensions is disabled until fully
implemented. This leads to libgcc/config/arm/cmse.c being built with
-mcmse due to the broken test which fails in the intermediate commits.

This patch instead change the test to look at the return value of the
host gcc when invoked with -mcmse.


ChangeLog entry is as follows:

*** libgcc/ChangeLog ***

2019-10-23  Mihail-Calin Ionescu 
2019-10-23  Thomas Preud'homme 

    * config/arm/t-arm: Check return value of gcc rather than lack of
    output.

Testing: Bootstrapped and tested on arm-none-eabi.
Without this patch, GCC stops building after the second patch
of this series.

Is this ok for trunk?


This looks ok to me and safe enough to go in on its own.

Thanks,

Kyrill



Best regards,

Mihail


### Attachment also inlined for ease of reply    
###



diff --git a/libgcc/config/arm/t-arm b/libgcc/config/arm/t-arm
index 
274bf2a8ef33c5e8a8ee2b246aba92d30297abe1..f2b927f3686a8c0a8e37abfe2d7768f2050d4fb3 
100644

--- a/libgcc/config/arm/t-arm
+++ b/libgcc/config/arm/t-arm
@@ -3,7 +3,7 @@ LIB1ASMFUNCS = _thumb1_case_sqi _thumb1_case_uqi 
_thumb1_case_shi \

 _thumb1_case_uhi _thumb1_case_si _speculation_barrier

 HAVE_CMSE:=$(findstring __ARM_FEATURE_CMSE,$(shell 
$(gcc_compile_bare) -dM -E - 
-ifneq ($(shell $(gcc_compile_bare) -E -mcmse - /dev/null),)
+ifeq ($(shell $(gcc_compile_bare) -E -mcmse - /dev/null 
2>/dev/null; echo $?),0)

 CMSE_OPTS:=-mcmse
 endif




Re: [PATCH][AArch64] Implement Armv8.5-A memory tagging (MTE) intrinsics

2019-11-04 Thread Kyrill Tkachov

Hi Dennis,

On 10/17/19 11:03 AM, Dennis Zhang wrote:

Hi,

Arm Memory Tagging Extension (MTE) is published with Armv8.5-A.
It can be used for spatial and temporal memory safety detection and
lightweight lock and key system.

This patch enables new intrinsics leveraging MTE instructions to
implement functionalities of creating tags, setting tags, reading tags,
and manipulating tags.
The intrinsics are part of Arm ACLE extension:
https://developer.arm.com/docs/101028/latest/memory-tagging-intrinsics
The MTE ISA specification can be found at
https://developer.arm.com/docs/ddi0487/latest chapter D6.

Bootstraped and regtested for aarch64-none-linux-gnu.

Please help to check if it's OK for trunk.



This looks mostly ok to me but for further review this needs to be 
rebased on top of current trunk as there are some conflicts with the SVE 
ACLE changes that recently went in. Most conflicts looks trivial to 
resolve but one that needs more attention is the definition of the 
TARGET_RESOLVE_OVERLOADED_BUILTIN hook.


Thanks,

Kyrill


Many Thanks
Dennis

gcc/ChangeLog:

2019-10-16  Dennis Zhang  

    * config/aarch64/aarch64-builtins.c (enum aarch64_builtins): Add
    AARCH64_MEMTAG_BUILTIN_START, AARCH64_MEMTAG_BUILTIN_IRG,
    AARCH64_MEMTAG_BUILTIN_GMI, AARCH64_MEMTAG_BUILTIN_SUBP,
    AARCH64_MEMTAG_BUILTIN_INC_TAG, AARCH64_MEMTAG_BUILTIN_SET_TAG,
    AARCH64_MEMTAG_BUILTIN_GET_TAG, and AARCH64_MEMTAG_BUILTIN_END.
    (aarch64_init_memtag_builtins): New.
    (AARCH64_INIT_MEMTAG_BUILTINS_DECL): New macro.
    (aarch64_general_init_builtins): Call 
aarch64_init_memtag_builtins.

    (aarch64_expand_builtin_memtag): New.
    (aarch64_general_expand_builtin): Call 
aarch64_expand_builtin_memtag.

    (AARCH64_BUILTIN_SUBCODE): New macro.
    (aarch64_resolve_overloaded_memtag): New.
    (aarch64_resolve_overloaded_builtin): New hook. Call
    aarch64_resolve_overloaded_memtag to handle overloaded MTE 
builtins.

    * config/aarch64/aarch64-c.c (aarch64_update_cpp_builtins): Define
    __ARM_FEATURE_MEMORY_TAGGING when enabled.
    * config/aarch64/aarch64-protos.h 
(aarch64_resolve_overloaded_builtin):

    Add declaration.
    * config/aarch64/aarch64.c (TARGET_RESOLVE_OVERLOADED_BUILTIN):
    New hook.
    * config/aarch64/aarch64.h (AARCH64_ISA_MEMTAG): New macro.
    (TARGET_MEMTAG): Likewise.
    * config/aarch64/aarch64.md (define_c_enum "unspec"): Add
    UNSPEC_GEN_TAG, UNSPEC_GEN_TAG_RND, and UNSPEC_TAG_SPACE.
    (irg, gmi, subp, addg, ldg, stg): New instructions.
    * config/aarch64/arm_acle.h (__arm_mte_create_random_tag): New 
macro.

    (__arm_mte_exclude_tag, __arm_mte_increment_tag): Likewise.
    (__arm_mte_ptrdiff, __arm_mte_set_tag, __arm_mte_get_tag): 
Likewise.

    * config/aarch64/predicates.md (aarch64_memtag_tag_offset): New.
    (aarch64_granule16_uimm6, aarch64_granule16_simm9): New.
    * config/arm/types.md (memtag): New.
    * doc/invoke.texi (-memtag): Update description.

gcc/testsuite/ChangeLog:

2019-10-16  Dennis Zhang  

    * gcc.target/aarch64/acle/memtag_1.c: New test.
    * gcc.target/aarch64/acle/memtag_2.c: New test.
    * gcc.target/aarch64/acle/memtag_3.c: New test.


Re: [Patch][AMD GCN][OpenMP] Add gcc/config/gcn/t-omp-device for OpenMP declare variant kind/arch/isa

2019-11-04 Thread Jakub Jelinek
On Mon, Nov 04, 2019 at 04:33:27PM +, Andrew Stubbs wrote:
> On 04/11/2019 15:37, Jakub Jelinek wrote:
> > My preference would be that arch on amdgcn is something like amdgcn or gcn.
> > I hope the general distinction between arch and isa will be something that
> > will be discussed next Tuesday on the language committee, so hopefully we'll
> > know more afterwards and can tweak afterwards depending on the outcome.
> > 
> > Ok with that change.
> 
> I'm fine with this, too. The OpenMP support should be posted Real Soon Now.
> 
> We've not been terribly consistent with "gcn" vs. "amdgcn", which is
> unfortunate, but we are where we are. Most of the API enums have "GCN", so
> let's use that, for now.

With the way it is defined in the OpenMP spec, there can be actually aliases, so
having both gcn and amdgcn as supported arch is fine too.

Jakub



Re: [Patch][AMD GCN][OpenMP] Add gcc/config/gcn/t-omp-device for OpenMP declare variant kind/arch/isa

2019-11-04 Thread Andrew Stubbs

On 04/11/2019 15:37, Jakub Jelinek wrote:

My preference would be that arch on amdgcn is something like amdgcn or gcn.
I hope the general distinction between arch and isa will be something that
will be discussed next Tuesday on the language committee, so hopefully we'll
know more afterwards and can tweak afterwards depending on the outcome.

Ok with that change.


I'm fine with this, too. The OpenMP support should be posted Real Soon Now.

We've not been terribly consistent with "gcn" vs. "amdgcn", which is 
unfortunate, but we are where we are. Most of the API enums have "GCN", 
so let's use that, for now.


Andrew


Re: [SLP] SLP vectorization: vectorize vector constructors

2019-11-04 Thread Richard Biener
On November 4, 2019 4:30:57 PM GMT+01:00, Joel Hutton  
wrote:
>First copy bounced from mailing list.
> > > On 30/10/2019 13:49, Richard Biener wrote:
> > >  >>
>> >  >>  * expr.c (store_constructor): Add case for constructor
>of
> > > vectors.
> > >  > Why do you need this?  The vectorizer already creates such 
>CTORs.  Any
> > >  > testcase that you can show?
> > >  >
> > >  > typedef long v2di __attribute__((vector_size(16)));
> > >  > v2di v;
> > >  > void
> > >  > foo()
> > >  > {
> > >  >    v = (v2di){v[1], v[0]};
> > >  > }
>> > There is a special case for single element vectors, where the
>vector
> > > mode and
> > > the element mode are the same.
>> > I've changed this slightly, as your solution of using VECTOR_MODE_P
>
>didn't
> > > work for my case where:
> > >    emode = E_DImode
> > >    eltmode = E_DImode
> > > On aarch64. As E_DImode is not a vector mode.
>> > Based on some checks I found in verify_gimple, I set the type of
>the
> > > replacement
>> > constructor to the same as the original constructor, as
>verify_gimple
> > > seems to
>> > expect flattened types. i.e. a vector of vectors of ints should
>have
> > > type vector
> > > of ints.
> >
> > Huh.  On aarch64 for the above testcase I see mode V2DImode and
> > emode = eltmode = DImode.  That's just a regular CTOR with
> > non-vector elements so not sure why you need any adjustment at all
> > here?
> >
> > It looks like your vectorization of the CTOR introduces a
> > V2DImode CTOR of vector(1) long elements which incidentially
> > have DImode.  That's because somehow V2DI vectorization isn't
> > profitable but V1DI one is.  In the end it's a noop transform
> > but the testcase shows that for V2DI vectorization we fail
> > to cost the CTOR in the scalar cost computation (you have to
> > include the pattern root there I guess).
> >
>Yes, sorry, I was unclear about this, it's the new constructor that the
>
>change is needed for.
>
> > Technically we feed valid GIMPLE to the expander so we indeed
> > shouldn't ICE.  So I'd like to have the earlier keying on
> > vec_vec_init_p match the later assert we run into, thus
> > sth like
> >
> > Index: gcc/expr.c
> > ===
> > --- gcc/expr.c  (revision 277765)
> > +++ gcc/expr.c  (working copy)
> > @@ -6809,6 +6809,7 @@ store_constructor (tree exp, rtx target,
> > && n_elts.is_constant (_n_elts))
> >   {
> > machine_mode emode = eltmode;
> > +   bool vector_typed_elts_p = false;
> >
> > if (CONSTRUCTOR_NELTS (exp)
> > && (TREE_CODE (TREE_TYPE (CONSTRUCTOR_ELT (exp,
> > 0)->value))
> > @@ -6819,13 +6820,14 @@ store_constructor (tree exp, rtx target,
>>   * TYPE_VECTOR_SUBPARTS (etype),
> >   n_elts));
> > emode = TYPE_MODE (etype);
> > +   vector_typed_elts_p = true;
> >   }
>> icode = convert_optab_handler (vec_init_optab, mode,
>emode);
> > if (icode != CODE_FOR_nothing)
> >   {
> > unsigned int n = const_n_elts;
> >
> > -   if (emode != eltmode)
> > +   if (vector_typed_elts_p)
> >   {
> > n = CONSTRUCTOR_NELTS (exp);
> > vec_vec_init_p = true;
> >
> >
>I've used this, thank you.
>
> > >  > @@ -4061,15 +4201,42 @@ vect_schedule_slp (vec_info *vinfo)
> > >  >    if (is_a  (vinfo))
> > >  >
>> >  > the ChangeLog doesn't mention this.  I guess this isn't
>necessary
> > >  > unless you fail vectorizing late which you shouldn't.
> > >  >
> > > It's necessary to avoid:
> > >
> > >  removing stmts twice for constructors of the form 
>{_1,_1,_1,_1}
>> >  removing stmts that define ssa names used elsewhere (which
>> > previously wasn't a consideration because the scalar_stmts were
>stores
> > > to memory, not assignments to ssa names)
> >
> > OK, but the code you are patching is just supposed to remove stores
> > from the final scalar stmt seeds - it doesn't expect any loads there
> > which is probably what happens.  I'd instead add a
> >
> >    /* Do not CSE the stmts feeding a CTOR, they might have uses
> >   outside of the vectorized stmts.  */
> >    if (SLP_INSTANCE_ROOT_STMT (instance))
> >  continue;
> >
> > before the loop over SLP_TREE_SCALAR_STMTS.
>Done.
>
> > + if (!subparts.is_constant () || !(subparts.to_constant ()
>> +   == CONSTRUCTOR_NELTS
>(rhs)))
> > +   continue;
> >
> > can be better written as if (maybe_ne (subparts, CONSTRUCTOR_NELTS 
>(rhs)))
>Done.
>
> > +/* Vectorize the instance root.  Return success or failure. */
> > +
> > +void
> >
> > since it's now void the comment has to be adjusted.
>Done.
>
> > +  vect_schedule_slp_instance (node,
> >   instance, bst_map);
> >
> > this now 

Re: [PATCH] Support multiple registers for the frame pointer

2019-11-04 Thread Georg-Johann Lay

Am 04.11.19 um 16:22 schrieb Vladimir Makarov:


On 2019-11-02 1:28 p.m., Kwok Cheung Yeung wrote:
The AMD GCN architecture uses 64-bit pointers, but the scalar 
registers are 32-bit wide, so pointers must reside in a pair of 
registers.


The two hard registers holding the frame pointer are currently fixed, 
but if they are changed to unfixed (so that the FP can be eliminated), 
GCC would sometimes allocate the second register to a pseudo while the 
frame pointer was in use, clobbering the value of the FP and crashing 
the program.


GCC currently does not handle multi-register hard frame pointers 
properly - no_unit_alloc_regs, regs_ever_live, eliminable_regset and 
ira_no_alloc_regs (which gets copied to lra_no_alloc_regs) are only 
set for HARD_FRAME_POINTER_REGNUM and not for any subsequent registers 
that may be used, which means that the register allocators consider 
HARD_FRAME_POINTER_REGNUM+1 free. This patch determines the number of 
registers needed to store the frame pointer using hard_regno_nregs, 
and sets the required variables for HARD_FRAME_POINTER_REGNUM and 
however many adjacent registers are needed (which on most 
architectures should be zero).


Bootstrapped on x86_64 and tested with no regressions, which is not 
surprising as nothing different happens when the FP fits into a single 
register. I believe this is true for the 64-bit variants of the more 
popular architectures as well (ARM, RS6000, MIPS, Sparc). Are there 
any other architectures similar to GCN (i.e. 64-bit pointers with 
32-bit GPRs)?


I have not included any specific testcases for this issue as it can 
affect pretty much everything not using -fomit-frame-pointer on AMD GCN.


Okay for trunk?



Yes.  You can commit the patch to the trunk.

Thank you.


The avr port already uses 2 hard-reg frame pointer ever since...

Does this patch has an impact on the avr port and its handling of
the frame pointer?

Johann




Re: [PATCH v2] PR92090: Fix testcase failures by r276469

2019-11-04 Thread Jeff Law
On 11/3/19 8:29 PM, luoxhu wrote:
> -finline-functions is enabled by default for O2 since r276469, update the
> test cases with -fno-inline-functions.
> 
> v2: disable inlining for the failed cases.  Add two more failed cases
> not listed in BZ.  Tested on P8LE, P8BE and P9LE.
> 
> 
> gcc/testsuite/ChangeLog:
> 
>   2019-10-30  Xiong Hu Luo  
> 
>   PR92090
>   * g++.dg/tree-ssa/pr61034.C: Add -fno-inline-functions --param
>   max-inline-insns-single-O2=200.
>   * gcc.dg/atomic/c11-atomic-exec-5.c: Likewise.
>   * gcc.target/powerpc/pr72804.c: Likewie.
>   * gcc.target/powerpc/pr79439-1.c: Add -fno-inline-functions.
>   * gcc.target/powerpc/vsx-builtin-7.c: Likewise.
OK
jeff



Re: [D] Remove unchecked to_constant in VECTOR_TYPE handling

2019-11-04 Thread Jeff Law
On 11/4/19 7:40 AM, Richard Sandiford wrote:
> The SVE port now tries to register variable-length VECTOR_TYPEs
> at start-up, so it's no longer possible to use the asserting
> to_constant on the number of vector elements.  This patch punts
> on variable element counts instead, just like we do for other
> things that the frontend doesn't recognise.
> 
> The brace indentation matches the surrounding style.
> 
> Tested on aarch64-linux-gnu.  OK to install?
> 
> Richard
> 
> 
> 2019-11-04  Richard Sandiford  
> 
> gcc/d/
>   * d-builtins.cc (build_frontend_type): Cope with variable
>   TYPE_VECTOR_SUBPARTS.
OK
jeff



Re: [RFH][libgcc] fp-bit bit ordering (PR 78804)

2019-11-04 Thread Jeff Law
On 11/3/19 5:12 AM, Oleg Endo wrote:
> On Fri, 2019-10-11 at 23:27 +0900, Oleg Endo wrote:
>> On Thu, 2019-10-03 at 19:34 -0600, Jeff Law wrote:
>>>
>>> So probably the most interesting target for this test is v850-elf
>>> as
>>> it's got a reasonably well functioning simulator, hard and soft FP
>>> targets, little endian, and I'm familiar with its current set of
>>> failures.
>>>
>>> I can confirm that your patch makes no difference in the test
>>> results
>>> (which includes execution results).
>>>
>>> In fact, there haven't been any problems on any target in my tester
>>> that
>>> I can tie back to this change.
>>>
>>> At this point I'd say let's go for it.
>>>
>>
>> Thanks, Jeff.  I'll commit it to trunk if there are no further
>> objections some time next week.
>>
> 
> I've just committed it as r277752.
> 
> Personally I'd like to install it on GCC 8 and 9 branches as well.
> Any thoughts on that?
Obviously we tend to be a bit more conservative the older the branch.
But this is also a correctness issue.

I'd suggest a week or two on the trunk, then go ahead with backporting.

jeff



Re: [Patch][AMD GCN][OpenMP] Add gcc/config/gcn/t-omp-device for OpenMP declare variant kind/arch/isa

2019-11-04 Thread Jakub Jelinek
On Mon, Nov 04, 2019 at 04:31:10PM +0100, Tobias Burnus wrote:
> This adds gcc/config/gcn/t-omp-device to augment the nvptx, hsa and x86 host
> implementation.
> 
> This compiler has -march= carrizo/fiji/gfx900/gfx906 (for Corrizo, Fiji and
> Vega GPUs; arch names aligned with LLVM and passed on to the linker).
> 
> This patch uses kind = "gnu" and those -march names for arch *and* isa, but

You mean "gpu" ;)

> can argue about this choice.

My preference would be that arch on amdgcn is something like amdgcn or gcn.
I hope the general distinction between arch and isa will be something that
will be discussed next Tuesday on the language committee, so hopefully we'll
know more afterwards and can tweak afterwards depending on the outcome.

Ok with that change.

Jakub



Re: [SLP] SLP vectorization: vectorize vector constructors

2019-11-04 Thread Joel Hutton
First copy bounced from mailing list.
 > > On 30/10/2019 13:49, Richard Biener wrote:
 > >  >>
 > >  >>  * expr.c (store_constructor): Add case for constructor of
 > > vectors.
 > >  > Why do you need this?  The vectorizer already creates such 
CTORs.  Any
 > >  > testcase that you can show?
 > >  >
 > >  > typedef long v2di __attribute__((vector_size(16)));
 > >  > v2di v;
 > >  > void
 > >  > foo()
 > >  > {
 > >  >    v = (v2di){v[1], v[0]};
 > >  > }
 > > There is a special case for single element vectors, where the vector
 > > mode and
 > > the element mode are the same.
 > > I've changed this slightly, as your solution of using VECTOR_MODE_P 
didn't
 > > work for my case where:
 > >    emode = E_DImode
 > >    eltmode = E_DImode
 > > On aarch64. As E_DImode is not a vector mode.
 > > Based on some checks I found in verify_gimple, I set the type of the
 > > replacement
 > > constructor to the same as the original constructor, as verify_gimple
 > > seems to
 > > expect flattened types. i.e. a vector of vectors of ints should have
 > > type vector
 > > of ints.
 >
 > Huh.  On aarch64 for the above testcase I see mode V2DImode and
 > emode = eltmode = DImode.  That's just a regular CTOR with
 > non-vector elements so not sure why you need any adjustment at all
 > here?
 >
 > It looks like your vectorization of the CTOR introduces a
 > V2DImode CTOR of vector(1) long elements which incidentially
 > have DImode.  That's because somehow V2DI vectorization isn't
 > profitable but V1DI one is.  In the end it's a noop transform
 > but the testcase shows that for V2DI vectorization we fail
 > to cost the CTOR in the scalar cost computation (you have to
 > include the pattern root there I guess).
 >
Yes, sorry, I was unclear about this, it's the new constructor that the 
change is needed for.

 > Technically we feed valid GIMPLE to the expander so we indeed
 > shouldn't ICE.  So I'd like to have the earlier keying on
 > vec_vec_init_p match the later assert we run into, thus
 > sth like
 >
 > Index: gcc/expr.c
 > ===
 > --- gcc/expr.c  (revision 277765)
 > +++ gcc/expr.c  (working copy)
 > @@ -6809,6 +6809,7 @@ store_constructor (tree exp, rtx target,
 > && n_elts.is_constant (_n_elts))
 >   {
 > machine_mode emode = eltmode;
 > +   bool vector_typed_elts_p = false;
 >
 > if (CONSTRUCTOR_NELTS (exp)
 > && (TREE_CODE (TREE_TYPE (CONSTRUCTOR_ELT (exp,
 > 0)->value))
 > @@ -6819,13 +6820,14 @@ store_constructor (tree exp, rtx target,
 >   * TYPE_VECTOR_SUBPARTS (etype),
 >   n_elts));
 > emode = TYPE_MODE (etype);
 > +   vector_typed_elts_p = true;
 >   }
 > icode = convert_optab_handler (vec_init_optab, mode, emode);
 > if (icode != CODE_FOR_nothing)
 >   {
 > unsigned int n = const_n_elts;
 >
 > -   if (emode != eltmode)
 > +   if (vector_typed_elts_p)
 >   {
 > n = CONSTRUCTOR_NELTS (exp);
 > vec_vec_init_p = true;
 >
 >
I've used this, thank you.

 > >  > @@ -4061,15 +4201,42 @@ vect_schedule_slp (vec_info *vinfo)
 > >  >    if (is_a  (vinfo))
 > >  >
 > >  > the ChangeLog doesn't mention this.  I guess this isn't necessary
 > >  > unless you fail vectorizing late which you shouldn't.
 > >  >
 > > It's necessary to avoid:
 > >
 > >  removing stmts twice for constructors of the form 
{_1,_1,_1,_1}
 > >  removing stmts that define ssa names used elsewhere (which
 > > previously wasn't a consideration because the scalar_stmts were stores
 > > to memory, not assignments to ssa names)
 >
 > OK, but the code you are patching is just supposed to remove stores
 > from the final scalar stmt seeds - it doesn't expect any loads there
 > which is probably what happens.  I'd instead add a
 >
 >    /* Do not CSE the stmts feeding a CTOR, they might have uses
 >   outside of the vectorized stmts.  */
 >    if (SLP_INSTANCE_ROOT_STMT (instance))
 >  continue;
 >
 > before the loop over SLP_TREE_SCALAR_STMTS.
Done.

 > + if (!subparts.is_constant () || !(subparts.to_constant ()
 > +   == CONSTRUCTOR_NELTS (rhs)))
 > +   continue;
 >
 > can be better written as if (maybe_ne (subparts, CONSTRUCTOR_NELTS 
(rhs)))
Done.

 > +/* Vectorize the instance root.  Return success or failure. */
 > +
 > +void
 >
 > since it's now void the comment has to be adjusted.
Done.

 > +  vect_schedule_slp_instance (node,
 >   instance, bst_map);
 >
 > this now fits in one line.
Done.

 > OK with those changes.
Tested and bootstrapped on aarch64.
OK?

2019-11-04  Joel Hutton  

  * expr.c (store_constructor): Modify to handle single element

[Patch][AMD GCN][OpenMP] Add gcc/config/gcn/t-omp-device for OpenMP declare variant kind/arch/isa

2019-11-04 Thread Tobias Burnus
This adds gcc/config/gcn/t-omp-device to augment the nvptx, hsa and x86 
host implementation.


This compiler has -march= carrizo/fiji/gfx900/gfx906 (for Corrizo, Fiji 
and Vega GPUs; arch names aligned with LLVM and passed on to the linker).


This patch uses kind = "gnu" and those -march names for arch *and* isa, 
but can argue about this choice.


(I have build the GCN compiler, but didn't do any additional tests – 
also because GCN as offloading compiler is not yet supported on the trunk.)


OK for the trunk?

Tobias

2019-11-04  Tobias Burnus  

	gcc/
	* config/gcn/gcn.c (gcn_omp_device_kind_arch_isa): New function.
	(TARGET_OMP_DEVICE_KIND_ARCH_ISA): Redefine to
	gcn_omp_device_kind_arch_isa.
	* config/gcn/t-omp-device: New file.
	* configure.ac: Support gcn for omp_device_property.
	* configure: Regenerate.



diff --git a/gcc/config/gcn/gcn.c b/gcc/config/gcn/gcn.c
index b5f09da173c..21219877431 100644
--- a/gcc/config/gcn/gcn.c
+++ b/gcc/config/gcn/gcn.c
@@ -2516,6 +2516,35 @@ gcn_gimplify_va_arg_expr (tree valist, tree type,
   return t;
 }
 
+/* Return 1 if TRAIT NAME is present in the OpenMP context's
+   device trait set, return 0 if not present in any OpenMP context in the
+   whole translation unit, or -1 if not present in the current OpenMP context
+   but might be present in another OpenMP context in the same TU.  */
+
+int
+gcn_omp_device_kind_arch_isa (enum omp_device_kind_arch_isa trait,
+			  const char *name)
+{
+  switch (trait)
+{
+case omp_device_kind:
+  return strcmp (name, "gpu") == 0;
+case omp_device_arch:
+case omp_device_isa:
+  if (strcmp (name, "carrizo") == 0)
+	return gcn_arch == PROCESSOR_CARRIZO;
+  if (strcmp (name, "fiji") == 0)
+	return gcn_arch == PROCESSOR_FIJI;
+  if (strcmp (name, "gfx900") == 0)
+	return gcn_arch == PROCESSOR_VEGA;
+  if (strcmp (name, "gfx906") == 0)
+	return gcn_arch == PROCESSOR_VEGA;
+  return 0;
+default:
+  gcc_unreachable ();
+}
+}
+
 /* Calculate stack offsets needed to create prologues and epilogues.  */
 
 static struct machine_function *
@@ -6030,6 +6059,8 @@ print_operand (FILE *file, rtx x, int code)
 #define TARGET_FUNCTION_VALUE_REGNO_P gcn_function_value_regno_p
 #undef  TARGET_GIMPLIFY_VA_ARG_EXPR
 #define TARGET_GIMPLIFY_VA_ARG_EXPR gcn_gimplify_va_arg_expr
+#undef TARGET_OMP_DEVICE_KIND_ARCH_ISA
+#define TARGET_OMP_DEVICE_KIND_ARCH_ISA gcn_omp_device_kind_arch_isa
 #undef  TARGET_GOACC_ADJUST_PROPAGATION_RECORD
 #define TARGET_GOACC_ADJUST_PROPAGATION_RECORD \
   gcn_goacc_adjust_propagation_record
diff --git a/gcc/config/gcn/t-omp-device b/gcc/config/gcn/t-omp-device
new file mode 100644
index 000..c79dc4b83dd
--- /dev/null
+++ b/gcc/config/gcn/t-omp-device
@@ -0,0 +1,4 @@
+omp-device-properties-gcn: $(srcdir)/config/gcn/gcn.c
+	echo kind: gpu > $@
+	echo arch: carrizo fiji gfx900 gfx906 >> $@
+	echo isa: carrizo fiji gfx900 gfx906 >> $@
diff --git a/gcc/configure b/gcc/configure
index 6808c23d26b..a2df82e021f 100755
--- a/gcc/configure
+++ b/gcc/configure
@@ -7896,6 +7896,10 @@ for tgt in `echo $enable_offload_targets | sed 's/,/ /g'`; do
 	omp_device_property=omp-device-properties-i386
 	omp_device_property_tmake_file="${omp_device_property_tmake_file} \$(srcdir)/config/i386/t-omp-device"
 	;;
+  gcn*-*)
+	omp_device_property=omp-device-properties-gcn
+	omp_device_property_tmake_file="${omp_device_property_tmake_file} \$(srcdir)/config/gcn/t-omp-device"
+	;;
   nvptx*-*)
 	omp_device_property=omp-device-properties-nvptx
 	omp_device_property_tmake_file="${omp_device_property_tmake_file} \$(srcdir)/config/nvptx/t-omp-device"
@@ -18933,7 +18937,7 @@ else
   lt_dlunknown=0; lt_dlno_uscore=1; lt_dlneed_uscore=2
   lt_status=$lt_dlunknown
   cat > conftest.$ac_ext <<_LT_EOF
-#line 18936 "configure"
+#line 18940 "configure"
 #include "confdefs.h"
 
 #if HAVE_DLFCN_H
@@ -19039,7 +19043,7 @@ else
   lt_dlunknown=0; lt_dlno_uscore=1; lt_dlneed_uscore=2
   lt_status=$lt_dlunknown
   cat > conftest.$ac_ext <<_LT_EOF
-#line 19042 "configure"
+#line 19046 "configure"
 #include "confdefs.h"
 
 #if HAVE_DLFCN_H
diff --git a/gcc/configure.ac b/gcc/configure.ac
index 1a0d68208e4..5f32fd4d5e4 100644
--- a/gcc/configure.ac
+++ b/gcc/configure.ac
@@ -1037,6 +1037,10 @@ for tgt in `echo $enable_offload_targets | sed 's/,/ /g'`; do
 	omp_device_property=omp-device-properties-i386
 	omp_device_property_tmake_file="${omp_device_property_tmake_file} \$(srcdir)/config/i386/t-omp-device"
 	;;
+  gcn*-*)
+	omp_device_property=omp-device-properties-gcn
+	omp_device_property_tmake_file="${omp_device_property_tmake_file} \$(srcdir)/config/gcn/t-omp-device"
+	;;
   nvptx*-*)
 	omp_device_property=omp-device-properties-nvptx
 	omp_device_property_tmake_file="${omp_device_property_tmake_file} \$(srcdir)/config/nvptx/t-omp-device"


[3/4] Don't vectorise single-iteration epilogues

2019-11-04 Thread Richard Sandiford
With a later patch I saw a case in which we peeled a single iteration
for gaps but didn't need to peel further iterations to make up a full
vector.  We then tried to vectorise the single-iteration epilogue.


2019-11-04  Richard Sandiford  

gcc/
* tree-vect-loop.c (vect_analyze_loop): Only try to vectorize
the epilogue if there are peeled iterations for it to handle.

Index: gcc/tree-vect-loop.c
===
--- gcc/tree-vect-loop.c2019-11-04 15:18:26.684592505 +
+++ gcc/tree-vect-loop.c2019-11-04 15:18:36.608524542 +
@@ -2462,6 +2462,7 @@ vect_analyze_loop (class loop *loop, loo
  vect_epilogues = (!loop->simdlen
&& loop->inner == NULL
&& PARAM_VALUE (PARAM_VECT_EPILOGUES_NOMASK)
+   && LOOP_VINFO_PEELING_FOR_NITER (first_loop_vinfo)
/* For now only allow one epilogue loop.  */
&& first_loop_vinfo->epilogue_vinfos.is_empty ());
 


[4/4] Use scan-tree-dump instead of scan-tree-dump-times for some vect tests

2019-11-04 Thread Richard Sandiford
With later patches, we're able to vectorise the epilogues of these tests
on AArch64 and so get two instances of "vectorizing stmts using SLP".
Although it would be possible with a bit of effort to predict when
this happens, it doesn't seem important whether we get 1 vs. 2
occurrences.  All that matters is zero vs. nonzero.


2019-11-04  Richard Sandiford  

gcc/testsuite/
* gcc.dg/vect/slp-9.c: Use scan-tree-dump rather than
scan-tree-dump-times.
* gcc.dg/vect/slp-widen-mult-s16.c: Likewise.
* gcc.dg/vect/slp-widen-mult-u8.c: Likewise.

Index: gcc/testsuite/gcc.dg/vect/slp-9.c
===
--- gcc/testsuite/gcc.dg/vect/slp-9.c   2019-03-08 18:15:02.276871200 +
+++ gcc/testsuite/gcc.dg/vect/slp-9.c   2019-11-04 15:18:14.656674872 +
@@ -44,5 +44,5 @@ int main (void)
 }
 
 /* { dg-final { scan-tree-dump-times "vectorized 1 loops" 1 "vect" { target 
vect_widen_mult_hi_to_si } } }*/
-/* { dg-final { scan-tree-dump-times "vectorizing stmts using SLP" 1 "vect" { 
target vect_widen_mult_hi_to_si } } } */
+/* { dg-final { scan-tree-dump "vectorizing stmts using SLP" "vect" { target 
vect_widen_mult_hi_to_si } } } */
 
Index: gcc/testsuite/gcc.dg/vect/slp-widen-mult-s16.c
===
--- gcc/testsuite/gcc.dg/vect/slp-widen-mult-s16.c  2019-03-08 
18:15:02.304871094 +
+++ gcc/testsuite/gcc.dg/vect/slp-widen-mult-s16.c  2019-11-04 
15:18:14.656674872 +
@@ -38,5 +38,5 @@ int main (void)
 }
 
 /* { dg-final { scan-tree-dump-times "vectorized 1 loops" 1 "vect" { target { 
vect_widen_mult_hi_to_si || vect_unpack } } } } */
-/* { dg-final { scan-tree-dump-times "vectorizing stmts using SLP" 1 "vect" { 
target { vect_widen_mult_hi_to_si || vect_unpack } } } } */
+/* { dg-final { scan-tree-dump "vectorizing stmts using SLP" "vect" { target { 
vect_widen_mult_hi_to_si || vect_unpack } } } } */
 
Index: gcc/testsuite/gcc.dg/vect/slp-widen-mult-u8.c
===
--- gcc/testsuite/gcc.dg/vect/slp-widen-mult-u8.c   2019-03-08 
18:15:02.292871138 +
+++ gcc/testsuite/gcc.dg/vect/slp-widen-mult-u8.c   2019-11-04 
15:18:14.668674793 +
@@ -38,5 +38,5 @@ int main (void)
 }
 
 /* { dg-final { scan-tree-dump-times "vectorized 1 loops" 1 "vect" { target { 
vect_widen_mult_qi_to_hi || vect_unpack } } } } */
-/* { dg-final { scan-tree-dump-times "vectorizing stmts using SLP" 1 "vect" { 
target { vect_widen_mult_hi_to_si || vect_unpack } } } } */
+/* { dg-final { scan-tree-dump "vectorizing stmts using SLP" "vect" { target { 
vect_widen_mult_hi_to_si || vect_unpack } } } } */
 


[2/4] Check the VF is small enough for an epilogue loop

2019-11-04 Thread Richard Sandiford
The number of iterations of an epilogue loop is always smaller than the
VF of the main loop.  vect_analyze_loop_costing was taking this into
account when deciding whether the loop is cheap enough to vectorise,
but that has no effect with the unlimited cost model.  We need to use
a separate check for correctness as well.

This can happen if the sizes returned by autovectorize_vector_sizes
happen to be out of order, e.g. because the target prefers smaller
vectors.  It can also happen with later patches if two vectorisation
attempts happen to end up with the same VF.


2019-11-04  Richard Sandiford  

gcc/
* tree-vect-loop.c (vect_analyze_loop_2): When vectorizing an
epilogue loop, make sure that the VF is small enough or that
the epilogue loop can be fully-masked.

Index: gcc/tree-vect-loop.c
===
--- gcc/tree-vect-loop.c2019-11-04 15:17:35.924940111 +
+++ gcc/tree-vect-loop.c2019-11-04 15:17:50.736838681 +
@@ -2142,6 +2142,16 @@ vect_analyze_loop_2 (loop_vec_info loop_
   " support peeling for gaps.\n");
 }
 
+  /* If we're vectorizing an epilogue loop, we either need a fully-masked
+ loop or a loop that has a lower VF than the main loop.  */
+  if (LOOP_VINFO_EPILOGUE_P (loop_vinfo)
+  && !LOOP_VINFO_FULLY_MASKED_P (loop_vinfo)
+  && maybe_ge (LOOP_VINFO_VECT_FACTOR (loop_vinfo),
+  LOOP_VINFO_VECT_FACTOR (orig_loop_vinfo)))
+return opt_result::failure_at (vect_location,
+  "Vectorization factor too high for"
+  " epilogue loop.\n");
+
   /* Check the costings of the loop make vectorizing worthwhile.  */
   res = vect_analyze_loop_costing (loop_vinfo);
   if (res < 0)


[1/4] Restructure vect_analyze_loop

2019-11-04 Thread Richard Sandiford
Once vect_analyze_loop has found a valid loop_vec_info X, we carry
on searching for alternatives if (1) X doesn't satisfy simdlen or
(2) we want to vectorise the epilogue of X.  I have a patch that
optionally adds a third reason: we want to see if there are cheaper
alternatives to X.

This patch restructures vect_analyze_loop so that it's easier
to add more reasons for continuing.  There's supposed to be no
behavioural change.

If we wanted to, we could allow vectorisation of epilogues once
loop->simdlen has been reached by changing "loop->simdlen" to
"simdlen" in the new vect_epilogues condition.  That should be
a separate change though.

This may conflict with Andre's fix for libgomp; I'll adjust if
that goes in first.


2019-11-04  Richard Sandiford  

gcc/
* tree-vect-loop.c (vect_analyze_loop): Break out of the main
loop when we've finished, rather than returning directly from
the loop.  Use a local variable to track whether we're still
searching for the preferred simdlen.  Make vect_epilogues
record whether the next iteration should try to treat the
loop as an epilogue.

Index: gcc/tree-vect-loop.c
===
--- gcc/tree-vect-loop.c2019-10-31 17:15:24.838521761 +
+++ gcc/tree-vect-loop.c2019-11-04 15:17:35.924940111 +
@@ -2383,16 +2383,13 @@ vect_analyze_loop (class loop *loop, loo
   poly_uint64 lowest_th = 0;
   unsigned vectorized_loops = 0;
 
-  /* Only vectorize epilogues if PARAM_VECT_EPILOGUES_NOMASK is enabled, this
- is not a simd loop and it is the most inner loop.  */
-  bool vect_epilogues
-= !loop->simdlen && loop->inner == NULL
-  && PARAM_VALUE (PARAM_VECT_EPILOGUES_NOMASK);
+  bool vect_epilogues = false;
+  opt_result res = opt_result::success ();
+  unsigned HOST_WIDE_INT simdlen = loop->simdlen;
   while (1)
 {
   /* Check the CFG characteristics of the loop (nesting, entry/exit).  */
-  opt_loop_vec_info loop_vinfo
-   = vect_analyze_loop_form (loop, shared);
+  opt_loop_vec_info loop_vinfo = vect_analyze_loop_form (loop, shared);
   if (!loop_vinfo)
{
  if (dump_enabled_p ())
@@ -2407,67 +2404,70 @@ vect_analyze_loop (class loop *loop, loo
 
   if (orig_loop_vinfo)
LOOP_VINFO_ORIG_LOOP_INFO (loop_vinfo) = orig_loop_vinfo;
-  else if (vect_epilogues && first_loop_vinfo)
+  else if (vect_epilogues)
LOOP_VINFO_ORIG_LOOP_INFO (loop_vinfo) = first_loop_vinfo;
 
-  opt_result res = vect_analyze_loop_2 (loop_vinfo, fatal, _stmts);
+  res = vect_analyze_loop_2 (loop_vinfo, fatal, _stmts);
   if (next_size == 0)
autodetected_vector_size = loop_vinfo->vector_size;
 
+  loop->aux = NULL;
   if (res)
{
  LOOP_VINFO_VECTORIZABLE_P (loop_vinfo) = 1;
  vectorized_loops++;
 
- if ((loop->simdlen
-  && maybe_ne (LOOP_VINFO_VECT_FACTOR (loop_vinfo),
-   (unsigned HOST_WIDE_INT) loop->simdlen))
- || vect_epilogues)
+ /* Once we hit the desired simdlen for the first time,
+discard any previous attempts.  */
+ if (simdlen
+ && known_eq (LOOP_VINFO_VECT_FACTOR (loop_vinfo), simdlen))
{
- if (first_loop_vinfo == NULL)
-   {
- first_loop_vinfo = loop_vinfo;
- lowest_th
-   = LOOP_VINFO_VERSIONING_THRESHOLD (first_loop_vinfo);
- loop->aux = NULL;
-   }
- else
-   {
- /* Keep track of vector sizes that we know we can vectorize
-the epilogue with.  Only vectorize first epilogue.  */
- if (vect_epilogues
- && first_loop_vinfo->epilogue_vinfos.is_empty ())
-   {
- loop->aux = NULL;
- first_loop_vinfo->epilogue_vinfos.reserve (1);
- first_loop_vinfo->epilogue_vinfos.quick_push (loop_vinfo);
- LOOP_VINFO_ORIG_LOOP_INFO (loop_vinfo) = first_loop_vinfo;
- poly_uint64 th
-   = LOOP_VINFO_VERSIONING_THRESHOLD (loop_vinfo);
- gcc_assert (!LOOP_REQUIRES_VERSIONING (loop_vinfo)
- || maybe_ne (lowest_th, 0U));
- /* Keep track of the known smallest versioning
-threshold.  */
- if (ordered_p (lowest_th, th))
-   lowest_th = ordered_min (lowest_th, th);
-   }
- else
-   delete loop_vinfo;
-   }
+ delete first_loop_vinfo;
+ first_loop_vinfo = opt_loop_vec_info::success (NULL);
+ LOOP_VINFO_ORIG_LOOP_INFO (loop_vinfo) = NULL;
+ simdlen = 0;
}
- else
+
+ if 

Re: [PATCH] Fix hash_operand for fields of a CONSTRUCTOR.

2019-11-04 Thread Jeff Law
On 11/4/19 6:36 AM, Richard Biener wrote:
> On Mon, Nov 4, 2019 at 2:35 PM Richard Biener
>  wrote:
>>
>> On Mon, Nov 4, 2019 at 10:09 AM Martin Liška  wrote:
>>>
>>> On 11/1/19 10:51 PM, Jeff Law wrote:
 On 10/31/19 10:01 AM, Martin Liška wrote:
> Hi.
>
> operand_equal_p can properly handle situation where we have a CONSTRUCTOR
> where indices are NULL:
>
>if (!operand_equal_p (c0->value, c1->value, flags)
>/* In GIMPLE the indexes can be either NULL or matching i.
>   Double check this so we won't get false
>   positives for GENERIC.  */
>|| (c0->index
>&& (TREE_CODE (c0->index) != INTEGER_CST
>|| compare_tree_int (c0->index, i)))
>|| (c1->index
>&& (TREE_CODE (c1->index) != INTEGER_CST
>|| compare_tree_int (c1->index, i
>  return false;
>
> but the corresponding hash function always hashes field (which
> can be NULL_TREE or equal to ctor index).
>
> Patch can bootstrap on x86_64-linux-gnu and survives regression tests.
>
> Ready to be installed?
> Thanks,
> Martin
>
> gcc/ChangeLog:
>
> 2019-10-31  Martin Liska  
>
>  PR ipa/92304
>  * fold-const.c (operand_compare::hash_operand): Fix field
>  hashing of CONSTRUCTOR.
 OK.  One question though, do these routines need to handle
 CONSTRUCTOR_NO_CLEARING?
>>>
>>> Good point, but I bet it's just a flag used in GENERIC, right?
>>
>> Yes.  It matters for gimplification only.  I don't think we can
>> optimistically make use of it in operand_equal_p.
> 
> OTOH for GENERIC and sth like ICF the flags have to match.
Precisely my concern.  I'm not immediately aware of any case where it
matters, but it'd be nice to future proof this if we can.

jeff



Re: [PATCH] Fix hash_operand for fields of a CONSTRUCTOR.

2019-11-04 Thread Jeff Law
On 11/4/19 6:35 AM, Richard Biener wrote:
> On Mon, Nov 4, 2019 at 10:09 AM Martin Liška  wrote:
>>
>> On 11/1/19 10:51 PM, Jeff Law wrote:
>>> On 10/31/19 10:01 AM, Martin Liška wrote:
 Hi.

 operand_equal_p can properly handle situation where we have a CONSTRUCTOR
 where indices are NULL:

if (!operand_equal_p (c0->value, c1->value, flags)
/* In GIMPLE the indexes can be either NULL or matching i.
   Double check this so we won't get false
   positives for GENERIC.  */
|| (c0->index
&& (TREE_CODE (c0->index) != INTEGER_CST
|| compare_tree_int (c0->index, i)))
|| (c1->index
&& (TREE_CODE (c1->index) != INTEGER_CST
|| compare_tree_int (c1->index, i
  return false;

 but the corresponding hash function always hashes field (which
 can be NULL_TREE or equal to ctor index).

 Patch can bootstrap on x86_64-linux-gnu and survives regression tests.

 Ready to be installed?
 Thanks,
 Martin

 gcc/ChangeLog:

 2019-10-31  Martin Liska  

  PR ipa/92304
  * fold-const.c (operand_compare::hash_operand): Fix field
  hashing of CONSTRUCTOR.
>>> OK.  One question though, do these routines need to handle
>>> CONSTRUCTOR_NO_CLEARING?
>>
>> Good point, but I bet it's just a flag used in GENERIC, right?
> 
> Yes.  It matters for gimplification only.  I don't think we can
> optimistically make use of it in operand_equal_p.
I'm not thinking about optimistically using it, quite the opposite :-)

My worry is that we could potentially consider two CONSTRUCTOR nodes
equal in the hashing and equality functions when they really aren't
equal due to the semantics around CONSTRUCTOR_NO_CLEARING.

jeff



[0/4] Vector epilogues vs. mixed vector sizes

2019-11-04 Thread Richard Sandiford
This patch bridges the gap between the recent epilogue vectorisation
patches and https://gcc.gnu.org/ml/gcc-patches/2019-10/msg01822.html .
I don't have any evidence that the series is independently useful,
but it shouldn't make things worse either.

Tested individually on aarch64-linux-gnu and as a series on
x86_64-linux-gnu.  OK to install?

Richard


Re: [PATCH] Support multiple registers for the frame pointer

2019-11-04 Thread Vladimir Makarov



On 2019-11-02 1:28 p.m., Kwok Cheung Yeung wrote:
The AMD GCN architecture uses 64-bit pointers, but the scalar 
registers are 32-bit wide, so pointers must reside in a pair of 
registers.


The two hard registers holding the frame pointer are currently fixed, 
but if they are changed to unfixed (so that the FP can be eliminated), 
GCC would sometimes allocate the second register to a pseudo while the 
frame pointer was in use, clobbering the value of the FP and crashing 
the program.


GCC currently does not handle multi-register hard frame pointers 
properly - no_unit_alloc_regs, regs_ever_live, eliminable_regset and 
ira_no_alloc_regs (which gets copied to lra_no_alloc_regs) are only 
set for HARD_FRAME_POINTER_REGNUM and not for any subsequent registers 
that may be used, which means that the register allocators consider 
HARD_FRAME_POINTER_REGNUM+1 free. This patch determines the number of 
registers needed to store the frame pointer using hard_regno_nregs, 
and sets the required variables for HARD_FRAME_POINTER_REGNUM and 
however many adjacent registers are needed (which on most 
architectures should be zero).


Bootstrapped on x86_64 and tested with no regressions, which is not 
surprising as nothing different happens when the FP fits into a single 
register. I believe this is true for the 64-bit variants of the more 
popular architectures as well (ARM, RS6000, MIPS, Sparc). Are there 
any other architectures similar to GCN (i.e. 64-bit pointers with 
32-bit GPRs)?


I have not included any specific testcases for this issue as it can 
affect pretty much everything not using -fomit-frame-pointer on AMD GCN.


Okay for trunk?



Yes.  You can commit the patch to the trunk.

Thank you.



Re: [Patch][Fortran] PR 92208 don't use function-result dummy variable as actual argument

2019-11-04 Thread Thomas König
Hi Tobias,

I think this is also OK for gcc 9. Backporting regression fixes should always 
be all right under normal circumstances.

Regards

Thomas

Re: [Patch][Fortran] PR 92208 don't use function-result dummy variable as actual argument

2019-11-04 Thread Tobias Burnus

On 10/30/19 8:06 PM, Thomas Koenig wrote:

OK for the trunk and GCC 9?


As far as I can see, this looks good.

So, OK for trunk. If it later turns out that there are problems
caused by this, I suspect we will hear about them soon enough :-)


What shall we do about GCC 9? https://gcc.gnu.org/PR92208 is marked as 
9/10 Regression.


We can either say in the PR: Won't fix (for GCC 9) – or we can backport 
the fix to GCC 9.


Suggestions?

Tobias

PS: GCC 9 patch attached (= trunk patch, only line number changes which 
patch handled); builds and regtests on the gcc-9-branch with 
x86_64-gnu-linux.


2019-11-04  Tobias Burnus  

	gcc/fortran/
	Backported from mainline
	2019-10-30  Tobias Burnus  

	PR fortran/92208
	* trans-array.c (gfc_conv_array_parameter): Only copy
	string-length backend_decl if expression is not a function.

	gcc/testsuite/
	Backported from mainline
	2019-10-30  Tobias Burnus  

	PR fortran/92208
	* gfortran.dg/pr92208.f90: New.
 
Index: gcc/fortran/trans-array.c
===
--- gcc/fortran/trans-array.c	(revision 277781)
+++ gcc/fortran/trans-array.c	(working copy)
@@ -8027,7 +8027,7 @@
 	  /* The components shall be deallocated before their containing entity.  */
 	  gfc_prepend_expr_to_block (>post, tmp);
 	}
-  if (expr->ts.type == BT_CHARACTER)
+  if (expr->ts.type == BT_CHARACTER && expr->expr_type != EXPR_FUNCTION)
 	se->string_length = expr->ts.u.cl->backend_decl;
   if (size)
 	array_parameter_size (se->expr, expr, size);
Index: gcc/testsuite/gfortran.dg/pr92208.f90
===
--- gcc/testsuite/gfortran.dg/pr92208.f90	(nonexistent)
+++ gcc/testsuite/gfortran.dg/pr92208.f90	(working copy)
@@ -0,0 +1,39 @@
+! { dg-do run }
+!
+! PR fortran/92208
+!
+! Contributed by Nils Reiche
+!
+program stringtest
+  implicit none
+  integer, parameter :: noVars = 2
+
+!  print*, "varNames: ", createVarnames("var",noVars)
+  call function1(noVars,createVarnames("var",noVars),"path")
+
+contains
+
+function createVarnames(string,noVars) result(stringArray)
+  implicit none
+  character(len=*),intent(in)  :: string
+  integer, intent(in)  :: noVars
+  character(len=len_trim(string)+6), dimension(noVars) :: stringArray
+  integer :: i
+  do i=1,noVars
+write(stringArray(i),'(a,i0)') string, i
+  enddo
+end function createVarnames
+
+subroutine function1(noVars,varNames,path)
+  implicit none
+  integer, intent(in)  :: noVars
+  character(len=*), intent(in)  :: path
+  character(len=*), dimension(noVars) :: varNames
+
+  if (path /= 'path') stop 1
+  if (any(varNames /= ['var1', 'var2'])) stop 2
+  !print*, "function1-path: ", trim(path)
+  !print*, "function1-varNames: ", varNames
+end subroutine function1
+
+end program stringtest


Re: [PATCH] Add if-chain to switch conversion pass.

2019-11-04 Thread Jakub Jelinek
On Mon, Nov 04, 2019 at 03:23:20PM +0100, Martin Liška wrote:
> The patch adds a new pass that identifies a series of if-elseif
> statements and transform then into a GIMPLE switch (if possible).
> The pass runs right after tree-ssa pass and I decided to implement
> matching of various forms that are introduced by folder (fold_range_test):

Not a review, just a few questions:

1) what does it do if __builtin_expect* has been used, does it preserve
   the probabilities and if in the end decides to expand as ifs, are those
   probabilities retained through it?
2) for the reassoc-*.c testcases, do you get identical or better code
   with the patch?
3) shouldn't it be gimple-if-to-switch.c instead?
4) what code size effect does the patch have say on cc1plus (if you don't
   count the code changes of the patch itself, i.e. revert the patch in the
   stage3 and rebuild just the stage3)?

> +struct case_range
> +{
> +  /* Default constructor.  */
> +  case_range ():
> +m_min (NULL_TREE), m_max (NULL_TREE)

I admit I'm never sure about coding conventions for C++,
but shouldn't there be a space before :, or even better :
be on the next line before m_min ?

Jakub



[D] Remove unchecked to_constant in VECTOR_TYPE handling

2019-11-04 Thread Richard Sandiford
The SVE port now tries to register variable-length VECTOR_TYPEs
at start-up, so it's no longer possible to use the asserting
to_constant on the number of vector elements.  This patch punts
on variable element counts instead, just like we do for other
things that the frontend doesn't recognise.

The brace indentation matches the surrounding style.

Tested on aarch64-linux-gnu.  OK to install?

Richard


2019-11-04  Richard Sandiford  

gcc/d/
* d-builtins.cc (build_frontend_type): Cope with variable
TYPE_VECTOR_SUBPARTS.

Index: gcc/d/d-builtins.cc
===
--- gcc/d/d-builtins.cc 2019-08-21 14:58:05.567060154 +0100
+++ gcc/d/d-builtins.cc 2019-11-04 14:38:57.680814567 +
@@ -197,20 +197,23 @@ build_frontend_type (tree type)
   break;
 
 case VECTOR_TYPE:
+{
+  unsigned HOST_WIDE_INT nunits;
+  if (!TYPE_VECTOR_SUBPARTS (type).is_constant ())
+   break;
+
   dtype = build_frontend_type (TREE_TYPE (type));
-  if (dtype)
-   {
- poly_uint64 nunits = TYPE_VECTOR_SUBPARTS (type);
- dtype = dtype->sarrayOf (nunits.to_constant ())->addMod (mod);
+  if (!dtype)
+   break;
 
- if (dtype->nextOf ()->isTypeBasic () == NULL)
-   break;
+  dtype = dtype->sarrayOf (nunits)->addMod (mod);
+  if (dtype->nextOf ()->isTypeBasic () == NULL)
+   break;
 
- dtype = (TypeVector::create (Loc (), dtype))->addMod (mod);
- builtin_converted_decls.safe_push (builtin_data (dtype, type));
- return dtype;
-   }
-  break;
+  dtype = (TypeVector::create (Loc (), dtype))->addMod (mod);
+  builtin_converted_decls.safe_push (builtin_data (dtype, type));
+  return dtype;
+}
 
 case RECORD_TYPE:
 {


Re: [PATCH v2] PR85678: Change default to -fno-common

2019-11-04 Thread Wilco Dijkstra
Hi Richard,

>> > Please don't add -fcommon in lto.exp.
>>
>> So what is the best way to add an extra option to lto.exp?
>> Note dg-lto-options completely overrides the options from lto.exp, so I can't
>> use that except in tests which already use it.
>
> On what testcases do you need it at all?

These need it in order to run over the original set of LTO options. A 
possibility
would be to select one of the set of options and just run that using 
dg-lto-options
(assuming it's safe to use -flto-partition and/or -flinker-plugin on all 
targets).

PASS->FAIL: g++.dg/lto/odr-6 2 (test for LTO warnings, odr-6_0.C line 3)
PASS->FAIL: g++.dg/lto/odr-6 2 (test for LTO warnings, odr-6_0.C line 3)
PASS->FAIL: g++.dg/lto/odr-6 2 (test for LTO warnings, odr-6_1.c line 1)
PASS->FAIL: g++.dg/lto/odr-6 2 (test for LTO warnings, odr-6_1.c line 1)
PASS->FAIL: g++.dg/lto/odr-6 cp_lto_odr-6_0.o-cp_lto_odr-6_1.o link, -O0 -flto 
-flto-partition=1to1 -fno-use-linker-plugin 
PASS->FAIL: g++.dg/lto/odr-6 cp_lto_odr-6_0.o-cp_lto_odr-6_1.o link, -O0 -flto 
-flto-partition=none -fuse-linker-plugin
PASS->FAIL: g++.dg/lto/odr-6 cp_lto_odr-6_0.o-cp_lto_odr-6_1.o link, -O0 -flto 
-fuse-linker-plugin -fno-fat-lto-objects 
PASS->FAIL: g++.dg/lto/odr-6 cp_lto_odr-6_0.o-cp_lto_odr-6_1.o link, -O2 -flto 
-flto-partition=1to1 -fno-use-linker-plugin 
PASS->FAIL: g++.dg/lto/odr-6 cp_lto_odr-6_0.o-cp_lto_odr-6_1.o link, -O2 -flto 
-flto-partition=none -fuse-linker-plugin -fno-fat-lto-objects 
PASS->FAIL: g++.dg/lto/odr-6 cp_lto_odr-6_0.o-cp_lto_odr-6_1.o link, -O2 -flto 
-fuse-linker-plugin


PASS->FAIL: gcc.dg/lto/pr88077 c_lto_pr88077_0.o-c_lto_pr88077_1.o link, -O0 
-flto -flto-partition=1to1 -fno-use-linker-plugin 
PASS->FAIL: gcc.dg/lto/pr88077 c_lto_pr88077_0.o-c_lto_pr88077_1.o link, -O0 
-flto -flto-partition=none -fuse-linker-plugin
PASS->FAIL: gcc.dg/lto/pr88077 c_lto_pr88077_0.o-c_lto_pr88077_1.o link, -O0 
-flto -fuse-linker-plugin -fno-fat-lto-objects 
PASS->FAIL: gcc.dg/lto/pr88077 c_lto_pr88077_0.o-c_lto_pr88077_1.o link, -O2 
-flto -flto-partition=1to1 -fno-use-linker-plugin 
PASS->FAIL: gcc.dg/lto/pr88077 c_lto_pr88077_0.o-c_lto_pr88077_1.o link, -O2 
-flto -flto-partition=none -fuse-linker-plugin -fno-fat-lto-objects 
PASS->FAIL: gcc.dg/lto/pr88077 c_lto_pr88077_0.o-c_lto_pr88077_1.o link, -O2 
-flto -fuse-linker-plugin


Wilco


Re: Optimize handling of inline summaries

2019-11-04 Thread Martin Liška

On 11/4/19 3:12 PM, Jan Hubicka wrote:

Martin, do you know why this flag was introduced?


Hi.

The flag is used in IPA CP:

call_summary 

class edge_clone_summary
{
...
  cgraph_edge *prev_clone;
  cgraph_edge *next_clone;
}

Apparently, IPA CP links all clones in one linked link. If you disable it:
diff --git a/gcc/ipa-cp.c b/gcc/ipa-cp.c
index 8a5f8d362f6..270e2e047a1 100644
--- a/gcc/ipa-cp.c
+++ b/gcc/ipa-cp.c
@@ -3461,7 +3461,7 @@ public:
   edge_clone_summary_t (symbol_table *symtab):
 call_summary  (symtab)
 {
-  m_initialize_when_cloning = true;
+//  m_initialize_when_cloning = true;
 }
 
   virtual void duplicate (cgraph_edge *src_edge, cgraph_edge *dst_edge,


You'll see following fallout:
FAIL: gcc.dg/ipa/ipcp-2.c scan-ipa-dump-times cp "Creating a specialized node of 
foo" 1
FAIL: gcc.dg/ipa/ipcp-2.c scan-ipa-dump-times cp "replacing param .. p with const 
0" 3
FAIL: gcc.dg/ipa/ipcp-2.c scan-ipa-dump cp "replacing param .0 s with const 4"
FAIL: gcc.dg/ipa/ipcp-agg-5.c scan-tree-dump-not optimized "->c;"
FAIL: gcc.dg/ipa/ipa-5.c scan-ipa-dump-times cp "Creating a specialized node" 3
FAIL: gcc.dg/ipa/ipcp-agg-6.c scan-ipa-dump-times cp "Creating a specialized node of 
foo/[0-9]*\\." 2
FAIL: gcc.dg/ipa/ipcp-agg-6.c scan-ipa-dump-times cp "Aggregate replacements:" 
10
FAIL: gcc.dg/ipa/ipcp-agg-6.c scan-tree-dump-not optimized "->c;"
FAIL: gcc.dg/ipa/ipcp-cstagg-2.c scan-ipa-dump cp "Discovered an indirect call to a 
known target"

But yes, by default we should not enable the m_initialize_when_cloning flag.

Martin






[PATCH] Fix PR92345

2019-11-04 Thread Richard Biener


Bootstrapped and tested on x86_64-unknown-linux-gnu, applied.

Richard.

2019-11-04  Richard Biener  

PR tree-optimization/92345
* tree-vect-loop.c (vect_is_simple_reduction): Return whether
we produced a reduction chain.
(vect_analyze_scalar_cycles_1): Do not add reduction chains to
LOOP_VINFO_REDUCTIONS.

* gcc.dg/torture/pr92345.c: New testcase.

Index: gcc/tree-vect-loop.c
===
--- gcc/tree-vect-loop.c(revision 21)
+++ gcc/tree-vect-loop.c(working copy)
@@ -155,7 +155,7 @@ along with GCC; see the file COPYING3.
 
 static void vect_estimate_min_profitable_iters (loop_vec_info, int *, int *);
 static stmt_vec_info vect_is_simple_reduction (loop_vec_info, stmt_vec_info,
-  bool *);
+  bool *, bool *);
 
 /* Subroutine of vect_determine_vf_for_stmt that handles only one
statement.  VECTYPE_MAYBE_SET_P is true if STMT_VINFO_VECTYPE
@@ -489,7 +489,7 @@ vect_analyze_scalar_cycles_1 (loop_vec_i
   tree init, step;
   auto_vec worklist;
   gphi_iterator gsi;
-  bool double_reduc;
+  bool double_reduc, reduc_chain;
 
   DUMP_VECT_SCOPE ("vect_analyze_scalar_cycles");
 
@@ -561,7 +561,8 @@ vect_analyze_scalar_cycles_1 (loop_vec_i
  && STMT_VINFO_DEF_TYPE (stmt_vinfo) == vect_unknown_def_type);
 
   stmt_vec_info reduc_stmt_info
-   = vect_is_simple_reduction (loop_vinfo, stmt_vinfo, _reduc);
+   = vect_is_simple_reduction (loop_vinfo, stmt_vinfo, _reduc,
+   _chain);
   if (reduc_stmt_info)
 {
  STMT_VINFO_REDUC_DEF (stmt_vinfo) = reduc_stmt_info;
@@ -596,7 +597,7 @@ vect_analyze_scalar_cycles_1 (loop_vec_i
   /* Store the reduction cycles for possible vectorization in
  loop-aware SLP if it was not detected as reduction
 chain.  */
- if (! REDUC_GROUP_FIRST_ELEMENT (reduc_stmt_info))
+ if (! reduc_chain)
LOOP_VINFO_REDUCTIONS (loop_vinfo).safe_push
  (reduc_stmt_info);
 }
@@ -2854,7 +2866,7 @@ check_reduction_path (dump_user_location
 
 static stmt_vec_info
 vect_is_simple_reduction (loop_vec_info loop_info, stmt_vec_info phi_info,
- bool *double_reduc)
+ bool *double_reduc, bool *reduc_chain_p)
 {
   gphi *phi = as_a  (phi_info->stmt);
   gimple *phi_use_stmt = NULL;
@@ -2862,6 +2874,7 @@ vect_is_simple_reduction (loop_vec_info
   use_operand_p use_p;
 
   *double_reduc = false;
+  *reduc_chain_p = false;
   STMT_VINFO_REDUC_TYPE (phi_info) = TREE_CODE_REDUCTION;
 
   tree phi_name = PHI_RESULT (phi);
@@ -3036,6 +3049,7 @@ vect_is_simple_reduction (loop_vec_info
  LOOP_VINFO_REDUCTION_CHAINS (loop_info).safe_push (reduc_chain[0]);
  REDUC_GROUP_SIZE (reduc_chain[0]) = reduc_chain.length ();
 
+ *reduc_chain_p = true;
  if (dump_enabled_p ())
dump_printf_loc (MSG_NOTE, vect_location,
"reduction: detected reduction chain\n");
Index: gcc/testsuite/gcc.dg/torture/pr92345.c
===
--- gcc/testsuite/gcc.dg/torture/pr92345.c  (nonexistent)
+++ gcc/testsuite/gcc.dg/torture/pr92345.c  (working copy)
@@ -0,0 +1,18 @@
+/* { dg-do compile } */
+/* { dg-additional-options "-ftree-vectorize" } */
+
+long int x1;
+int fr;
+
+int
+us (int sk, int jx)
+{
+  while (sk < 1)
+{
+  jx *= 2;
+  fr += x1 + 1;
+  ++sk;
+}
+
+  return jx;
+}


Re: [Patch,Fortran] PR92284 – gfc_desc_to_cfi_desc fixes

2019-11-04 Thread Tobias Burnus
I have now committed the patch to the GCC 9 branch – and added the test 
case for PR92277 (the ICE was a GCC 10 regression, only). – Rev. 277781.


Thanks for the review.

Tobias

PS: The release-manage item came up because the GCC home page linked to 
the wrong GCC 9 status report (pre-9.2 release instead of post-9.2 
release; both: August 2019). [Now fixed.]


On 10/31/19 10:46 AM, Paul Richard Thomas wrote:

Hi Tobias,

OK for trunk and for 9-branch. As with the patch for PR92277, you will
have to get release manager approval for 9.2.

Thanks for working on this.

Cheers

Paul

On Wed, 30 Oct 2019 at 23:29, Tobias Burnus  wrote:

Playing with the PR92284 test case revealed two issues related to
gfc_desc_to_cfi_desc:

* Access of uninitialized memory – copying the array bounds (in
libgfortran) does not make sense for unallocted allocatables and
nullified pointers. Hence, check for ".data == NULL".

* There is a memory leak. I misunderstood the dump when fixing PR91863
(rev.277502).
https://gcc.gnu.org/ml/gcc-patches/2019-10/msg01651.html

Regarding the latter: If one passed gfc_desc_to_cfi_desc a pointer var,
pointing to NULL, as CFI (Bind(C) array descriptor) argument,
libgfortran allocates the memory for the descriptor – which at some
point has to be freed.

Contrary to the original version, one can free that memory
unconditionally. (Not only because "free" handles NULL pointers but –
unless "malloc" failed – we know that ptr has been malloced.) I also
tried to make the comments a bit clearer.

Build and regtested.
OK for trunk and GCC 9 (the latter is also affected)?

Tobias

PR: Related pending patch:
https://gcc.gnu.org/ml/gcc-patches/2019-10/msg02148.html
Also missing: At the end of a bind(C) procedure written in Fortran,
allocatable/pointers array arguments need get updated: the "data" and
the bounds part of the array descriptor might have changed while running
the procedure body. Cf. this PR and PR 92189



commit be9a9d263b97083e1d7476af04f82f918e0a3b06
Author: burnus 
Date:   Mon Nov 4 14:14:43 2019 +

Backport Fortran BIND(C) fixes

gcc/fortran/
Backport from mainline
2019-10-31  Tobias Burnus  

PR fortran/92284.
* trans-expr.c (gfc_conv_gfc_desc_to_cfi_desc): Free CFI descriptor
at the end; partial revised revert of Rev. 277502.


gcc/testsuite/
Backport from mainline
2019-10-31  Jakub Jelinek  

PR fortran/92284
* gfortran.dg/bind_c_array_params_3_aux.c: Include
../../../libgfortran/ISO_Fortran_binding.h rather than
ISO_Fortran_binding.h.

2019-10-31  Tobias Burnus  

PR fortran/92284
* gfortran.dg/bind-c-intent-out.f90: Update expected dump;
extend comment.
* gfortran.dg/bind_c_array_params_3.f90: New.
* gfortran.dg/bind_c_array_params_3_aux.c: New.

2019-10-31  Tobias Burnus  

PR fortran/92277
* fortran.dg/pr92277.f90: New.



git-svn-id: svn+ssh://gcc.gnu.org/svn/gcc/branches/gcc-9-branch@277781 138bc75d-0d04-0410-961f-82ee72b054a4

diff --git a/gcc/fortran/ChangeLog b/gcc/fortran/ChangeLog
index 759390f5832..c4c16d9a462 100644
--- a/gcc/fortran/ChangeLog
+++ b/gcc/fortran/ChangeLog
@@ -1,3 +1,12 @@
+2019-11-04  Tobias Burnus  
+
+	Backport from mainline
+	2019-10-31  Tobias Burnus  
+
+	PR fortran/92284.
+	* trans-expr.c (gfc_conv_gfc_desc_to_cfi_desc): Free CFI descriptor
+	at the end; partial revised revert of Rev. 277502.
+
 2019-10-28  Paul Thomas  
 
 	Backport from trunk
diff --git a/gcc/fortran/trans-expr.c b/gcc/fortran/trans-expr.c
index 71f29883173..245e656a1f8 100644
--- a/gcc/fortran/trans-expr.c
+++ b/gcc/fortran/trans-expr.c
@@ -5090,13 +5090,13 @@ gfc_conv_gfc_desc_to_cfi_desc (gfc_se *parmse, gfc_expr *e, gfc_symbol *fsym)
   /* Now pass the gfc_descriptor by reference.  */
   parmse->expr = gfc_build_addr_expr (NULL_TREE, parmse->expr);
 
-  /* Variables to point to the gfc and CFI descriptors.  */
+  /* Variables to point to the gfc and CFI descriptors; cfi = NULL implies
+ that the CFI descriptor is allocated by the gfor_fndecl_gfc_to_cfi call.  */
   gfc_desc_ptr = parmse->expr;
   cfi_desc_ptr = gfc_create_var (pvoid_type_node, "cfi");
-  gfc_add_modify (>pre, cfi_desc_ptr,
-		  build_int_cst (pvoid_type_node, 0));
+  gfc_add_modify (>pre, cfi_desc_ptr, null_pointer_node);
 
-  /* Allocate the CFI descriptor and fill the fields.  */
+  /* Allocate the CFI descriptor itself and fill the fields.  */
   tmp = gfc_build_addr_expr (NULL_TREE, cfi_desc_ptr);
   tmp = build_call_expr_loc (input_location,
 			 gfor_fndecl_gfc_to_cfi, 2, tmp, gfc_desc_ptr);
@@ -5111,6 +5111,10 @@ gfc_conv_gfc_desc_to_cfi_desc (gfc_se *parmse, gfc_expr *e, gfc_symbol *fsym)
   /* The CFI descriptor is passed to the bind_C procedure.  */
   parmse->expr = 

Optimize handling of inline summaries

2019-11-04 Thread Jan Hubicka
Hi,
this patch turns edge growth cache into fast summary and fixes
some fallout.  I have noticed that we still use get_create on many
places to access inline edge summary and turning them into gets
uncovered furhter problems.

I have added sanity check to estimate_calls_size_and_time that
inlined edges have no summaries which turned out to fail becuase
symbols-summaries for some reason have flag m_initialize_when_cloning
which is set to true for call summaries (and is non-existent for
function summaries).  When this flag is set at the time of
edge duplication we create empty summary for source edge (if
non-existent) and then copy it via hook to new edge.

Martin, do you know why this flag was introduced?

I have disabled it and noticed that cgraph_node::create_version_clone
calls node duplication hooks for no good reason: newly produced function
will get body and will eventually be registered as new function to
callgraph. Additionally inliner sometimes attempt to update overall
summary of already inlined functions and also functions with -O0.
Both is deemed to fail since we do not have any analysis data on them.

lto-bootstrapped/regtested x86_64-linux, comitted.

* cgraphclones.c (cgraph_node::create_version_clone): Do not
duplicate summaries.
* ipa-fnsummary.c (ipa_fn_summary_alloc): Allocate size summary
first.
(ipa_fn_summary_t::duplicate): Use get instead of get_create to
access call summaries.
(dump_ipa_call_summary): Be ready for missing edge summaries.
(analyze_function_body): Use get instead of get_create to access
edge summary.
(estimate_calls_size_and_time): Do not access summaries of
inlined edges; sanity check they are missing.
(ipa_call_context::estimate_size_and_time): Use get instead
of get_create to access node summary.
(inline_update_callee_summaries): Do not update depth of
inlined edge.
(ipa_merge_fn_summary_after_inlining): Remove inline edge from
growth caches.
(ipa_merge_fn_summary_after_inlining): Use get instead
of get_create.
* ipa-fnsummary.h (ipa_remove_from_growth_caches): Declare.
* ipa-inline-analyssi.c (edge_growth_cache): Turn to
fast summary.
(initialize_growth_caches): Update.
(do_estimate_edge_time): Remove redundant copy of context.
(ipa_remove_from_growth_caches): New function.
* ipa-inline.c (flatten_function): Update overall summary
only when optimizing.
(inline_to_all_callers): Update overall summary of function
inlined to.
* ipa-inline.h (edge_growth_cache): Turn to fast summary.
* symbol-summary.h (call_summary_base): Set m_initialize_when_cloning
to false.
Index: cgraphclones.c
===
--- cgraphclones.c  (revision 277758)
+++ cgraphclones.c  (working copy)
@@ -892,8 +892,6 @@ cgraph_node::create_version_clone (tree
e->redirect_callee (new_version);
  }
 
-   symtab->call_cgraph_duplication_hooks (this, new_version);
-
dump_callgraph_transformation (this, new_version, suffix);
 
return new_version;
Index: ipa-fnsummary.c
===
--- ipa-fnsummary.c (revision 277759)
+++ ipa-fnsummary.c (working copy)
@@ -553,9 +553,9 @@ static void
 ipa_fn_summary_alloc (void)
 {
   gcc_checking_assert (!ipa_fn_summaries);
-  ipa_fn_summaries = ipa_fn_summary_t::create_ggc (symtab);
   ipa_size_summaries = new fast_function_summary 
 (symtab);
+  ipa_fn_summaries = ipa_fn_summary_t::create_ggc (symtab);
   ipa_call_summaries = new ipa_call_summary_t (symtab);
 }
 
@@ -688,7 +688,7 @@ ipa_fn_summary_t::duplicate (cgraph_node
   for (edge = dst->callees; edge; edge = next)
{
  predicate new_predicate;
- class ipa_call_summary *es = ipa_call_summaries->get_create (edge);
+ class ipa_call_summary *es = ipa_call_summaries->get (edge);
  next = edge->next_callee;
 
  if (!edge->inline_failed)
@@ -707,7 +707,7 @@ ipa_fn_summary_t::duplicate (cgraph_node
   for (edge = dst->indirect_calls; edge; edge = next)
{
  predicate new_predicate;
- class ipa_call_summary *es = ipa_call_summaries->get_create (edge);
+ class ipa_call_summary *es = ipa_call_summaries->get (edge);
  next = edge->next_callee;
 
  gcc_checking_assert (edge->inline_failed);
@@ -787,12 +787,15 @@ dump_ipa_call_summary (FILE *f, int inde
   int i;
 
   fprintf (f,
-  "%*s%s/%i %s\n%*s  loop depth:%2i freq:%4.2f size:%2i time: %2i",
+  "%*s%s/%i %s\n%*s  freq:%4.2f",
   indent, "", callee->name (), callee->order,
   !edge->inline_failed
   ? "inlined" : cgraph_inline_failed_string (edge-> 

Re: [PATCH] ggc-common.c bootstrap breakage

2019-11-04 Thread Richard Biener
On Mon, Nov 4, 2019 at 2:55 PM David Edelsohn  wrote:
>
> The change to add malloc.h to ggc-common.c broke bootstrap.  system.h
> must be included before system header files.  The following patch
> fixes this, committed as obvious with concurrence by Richi.

Broke x86.

I'm testing the attached.

Richard.

> Thanks, David
>
> * ggc-common.c: Include system.h before malloc.h.
>
> Index: ggc-common.c
> ===
> --- ggc-common.c(revision 27)
> +++ ggc-common.c(working copy)
> @@ -21,10 +21,10 @@
> any particular GC implementation.  */
>
>  #include "config.h"
> +#include "system.h"
>  #ifdef HAVE_MALLINFO
>  #include 
>  #endif
> -#include "system.h"
>  #include "coretypes.h"
>  #include "timevar.h"
>  #include "diagnostic-core.h"


p
Description: Binary data


[PATCH] ggc-common.c bootstrap breakage

2019-11-04 Thread David Edelsohn
The change to add malloc.h to ggc-common.c broke bootstrap.  system.h
must be included before system header files.  The following patch
fixes this, committed as obvious with concurrence by Richi.

Thanks, David

* ggc-common.c: Include system.h before malloc.h.

Index: ggc-common.c
===
--- ggc-common.c(revision 27)
+++ ggc-common.c(working copy)
@@ -21,10 +21,10 @@
any particular GC implementation.  */

 #include "config.h"
+#include "system.h"
 #ifdef HAVE_MALLINFO
 #include 
 #endif
-#include "system.h"
 #include "coretypes.h"
 #include "timevar.h"
 #include "diagnostic-core.h"


Re: [PATCH v2] PR85678: Change default to -fno-common

2019-11-04 Thread Richard Biener
On Wed, Oct 30, 2019 at 3:33 PM Wilco Dijkstra  wrote:
>
> Hi Richard,
>
> > Please don't add -fcommon in lto.exp.
>
> So what is the best way to add an extra option to lto.exp?
> Note dg-lto-options completely overrides the options from lto.exp, so I can't
> use that except in tests which already use it.

On what testcases do you need it at all?

Richard.

> Cheers,
> Wilco


Re: [FYI] pass --enable-obsolete to build configure

2019-11-04 Thread Alexandre Oliva
On Nov  3, 2019, Andreas Schwab  wrote:

> Don't add unreleated changes.

Oh, thanks, I'm glad I failed to omit the regenerated file from the
email, I might have otherwise missed the unexpected changes made by
Trisquel's autoconf.

I've installed a pristine autoconf-2.69 from ftp.gnu.org, and used it to
rebuild the file, now there aren't any unrelated changes.

Here's the patch I'm checking in momentarily.  No changes but the
improved oneline description and the rebuilt gcc/configure.


pass --enable-obsolete down to gcc/configure for auto-build.h

Configuring GCC for obsolete targets works as long as build = host.
When it isn't, --enable-obsolete is not passed down to the additional
build configure started by gcc/configure, used to generate
auto-build.h.  The build configure fails and we end up without a
auto-build.h, but the host configure proceeds, so we only get a fatal
failure much later, when make realizes auto-build.h is not there and
there's no rule to create it.

This patch gets the host configure to fail when the build configure
does, leaving the temporary build configure dir behind for
investigation.  It also arranges for --eanble-obsolete to be passed
down to the build configure.

Alas, the latter triggered a warning in the build configure because
--enable-obsolete is not a recognized configure option.  That's not
reported in the host configure because of the
--disable-option-checking passed by the top-level configure, so I
arranged for that to be passed down to the build configure as well.

Finally, since my initial suspicion when investigating this failure
was that auto-build.h had been removed after configuration and there
was no rule to rebuild it, I'm adding rules to gcc/Makefile to get it
created or updated as needed.  Since it is configure that creates it,
as run by e.g. config.status --recheck, and config.status is created
after auto-build.h, I've made config.status depend on auto-build.h,
and added a dummy rule to create auto-build.h.  This would normally
not be enough to create a header when needed, but since Makefile
depends on config.status, and make first updates Makefile, it ends up
working, as long as nothing else that Makefile depends on requires
auto-build.h but not config.status.  The config.status dependency and
the auto-build.h rule are only enabled in the cases in which
auto-build.h is actually used, namely when build != host.


for  gcc/ChangeLog

* configure.ac: Pass --enable-obsolete=* and
--enable-option-checking=* down to build configure, and fail
if it fails.  AC_SUBST HAVE_AUTO_BUILD.
* configure: Rebuild.
* Makefile.in [HAVE_AUTO_BUILD] (auto-build.h): New rule.
[HAVE_AUTO_BUILD] (config.status): Depend on auto-build.h.
---
 gcc/Makefile.in  |8 
 gcc/configure|   13 ++---
 gcc/configure.ac |8 +++-
 3 files changed, 25 insertions(+), 4 deletions(-)

diff --git a/gcc/Makefile.in b/gcc/Makefile.in
index 035b58f..95f054c 100644
--- a/gcc/Makefile.in
+++ b/gcc/Makefile.in
@@ -1899,6 +1899,14 @@ cstamp-h: config.in config.status
CONFIG_FILES= \
LANGUAGES="$(CONFIG_LANGUAGES)" $(SHELL) config.status
 
+# On configurations that require auto-build.h, it is created while
+# running configure, so make config.status depend on it, so that
+# config.status --recheck runs and updates or creates it.
+@HAVE_AUTO_BUILD@auto-build.h: $(srcdir)/configure $(srcdir)/config.gcc
+@HAVE_AUTO_BUILD@  @if test -f $@; then echo rerunning config.status to 
update $@; \
+@HAVE_AUTO_BUILD@  else echo rerunning config.status to update $@; fi
+@HAVE_AUTO_BUILD@config.status: auto-build.h
+
 # Really, really stupid make features, such as SUN's KEEP_STATE, may force
 # a target to build even if it is up-to-date.  So we must verify that
 # config.status does not exist before failing.
diff --git a/gcc/configure b/gcc/configure
index 9fe0429..6808c23 100755
--- a/gcc/configure
+++ b/gcc/configure
@@ -774,6 +774,7 @@ LIBINTL_DEP
 LIBINTL
 USE_NLS
 get_gcc_base_ver
+HAVE_AUTO_BUILD
 extra_opt_files
 extra_modes_file
 NATIVE_SYSTEM_HEADER_DIR
@@ -12208,6 +12209,7 @@ done
 if test x$host = x$build
 then
build_auto=auto-host.h
+   HAVE_AUTO_BUILD='# '
 else
# We create a subdir, then run autoconf in the subdir.
# To prevent recursion we set host and build for the new
@@ -12230,7 +12232,10 @@ else
GMPINC="" CPPFLAGS="${CPPFLAGS} -DGENERATOR_FILE" \
${realsrcdir}/configure \
--enable-languages=${enable_languages-all} \
-   --target=$target_alias --host=$build_alias --build=$build_alias
+   ${enable_obsolete+--enable-obsolete="$enable_obsolete"} \
+   
${enable_option_checking+--enable-option-checking="$enable_option_checking"} \
+   --target=$target_alias --host=$build_alias \
+   --build=$build_alias || exit # retaining $tempdir
 
# We just finished tests for the build machine, so 

Re: [PATCH] Fix hash_operand for fields of a CONSTRUCTOR.

2019-11-04 Thread Richard Biener
On Mon, Nov 4, 2019 at 2:35 PM Richard Biener
 wrote:
>
> On Mon, Nov 4, 2019 at 10:09 AM Martin Liška  wrote:
> >
> > On 11/1/19 10:51 PM, Jeff Law wrote:
> > > On 10/31/19 10:01 AM, Martin Liška wrote:
> > >> Hi.
> > >>
> > >> operand_equal_p can properly handle situation where we have a CONSTRUCTOR
> > >> where indices are NULL:
> > >>
> > >>if (!operand_equal_p (c0->value, c1->value, flags)
> > >>/* In GIMPLE the indexes can be either NULL or matching i.
> > >>   Double check this so we won't get false
> > >>   positives for GENERIC.  */
> > >>|| (c0->index
> > >>&& (TREE_CODE (c0->index) != INTEGER_CST
> > >>|| compare_tree_int (c0->index, i)))
> > >>|| (c1->index
> > >>&& (TREE_CODE (c1->index) != INTEGER_CST
> > >>|| compare_tree_int (c1->index, i
> > >>  return false;
> > >>
> > >> but the corresponding hash function always hashes field (which
> > >> can be NULL_TREE or equal to ctor index).
> > >>
> > >> Patch can bootstrap on x86_64-linux-gnu and survives regression tests.
> > >>
> > >> Ready to be installed?
> > >> Thanks,
> > >> Martin
> > >>
> > >> gcc/ChangeLog:
> > >>
> > >> 2019-10-31  Martin Liska  
> > >>
> > >>  PR ipa/92304
> > >>  * fold-const.c (operand_compare::hash_operand): Fix field
> > >>  hashing of CONSTRUCTOR.
> > > OK.  One question though, do these routines need to handle
> > > CONSTRUCTOR_NO_CLEARING?
> >
> > Good point, but I bet it's just a flag used in GENERIC, right?
>
> Yes.  It matters for gimplification only.  I don't think we can
> optimistically make use of it in operand_equal_p.

OTOH for GENERIC and sth like ICF the flags have to match.

Richard.

> Richard.
>
> > Martin
> >
> > >
> > > jeff
> > >
> >


Re: [PATCH] Fix hash_operand for fields of a CONSTRUCTOR.

2019-11-04 Thread Richard Biener
On Mon, Nov 4, 2019 at 10:09 AM Martin Liška  wrote:
>
> On 11/1/19 10:51 PM, Jeff Law wrote:
> > On 10/31/19 10:01 AM, Martin Liška wrote:
> >> Hi.
> >>
> >> operand_equal_p can properly handle situation where we have a CONSTRUCTOR
> >> where indices are NULL:
> >>
> >>if (!operand_equal_p (c0->value, c1->value, flags)
> >>/* In GIMPLE the indexes can be either NULL or matching i.
> >>   Double check this so we won't get false
> >>   positives for GENERIC.  */
> >>|| (c0->index
> >>&& (TREE_CODE (c0->index) != INTEGER_CST
> >>|| compare_tree_int (c0->index, i)))
> >>|| (c1->index
> >>&& (TREE_CODE (c1->index) != INTEGER_CST
> >>|| compare_tree_int (c1->index, i
> >>  return false;
> >>
> >> but the corresponding hash function always hashes field (which
> >> can be NULL_TREE or equal to ctor index).
> >>
> >> Patch can bootstrap on x86_64-linux-gnu and survives regression tests.
> >>
> >> Ready to be installed?
> >> Thanks,
> >> Martin
> >>
> >> gcc/ChangeLog:
> >>
> >> 2019-10-31  Martin Liska  
> >>
> >>  PR ipa/92304
> >>  * fold-const.c (operand_compare::hash_operand): Fix field
> >>  hashing of CONSTRUCTOR.
> > OK.  One question though, do these routines need to handle
> > CONSTRUCTOR_NO_CLEARING?
>
> Good point, but I bet it's just a flag used in GENERIC, right?

Yes.  It matters for gimplification only.  I don't think we can
optimistically make use of it in operand_equal_p.

Richard.

> Martin
>
> >
> > jeff
> >
>


Re: PR92163

2019-11-04 Thread Christophe Lyon
On Mon, 28 Oct 2019 at 16:03, Prathamesh Kulkarni
 wrote:
>
> On Mon, 28 Oct 2019 at 07:18, Richard Biener  
> wrote:
> >
> > On Fri, Oct 25, 2019 at 9:58 PM Prathamesh Kulkarni
> >  wrote:
> > >
> > > On Fri, 25 Oct 2019 at 13:19, Richard Biener  
> > > wrote:
> > > >
> > > > On Wed, Oct 23, 2019 at 11:45 PM Prathamesh Kulkarni
> > > >  wrote:
> > > > >
> > > > > Hi,
> > > > > The attached patch tries to fix PR92163 by calling
> > > > > gimple_purge_dead_eh_edges from ifcvt_local_dce if we need eh cleanup.
> > > > > Does it look OK ?
> > > >
> > > > Hmm.  I think it shows an issue with the return value of 
> > > > remove_stmt_form_eh_lp
> > > > which is true if the LP index is -1 (externally throwing).  We don't
> > > > need to purge
> > > > any edges in that case.  That is, if-conversion should never need to
> > > > do EH purging
> > > > since that would be wrong-code.
> > > >
> > > > As of the segfault can you please instead either pass down 
> > > > need_eh_cleanup
> > > > as function parameter (and NULL from ifcvt) or use the return value in 
> > > > DSE
> > > > to set the bit in the caller.
> > > Hi Richard,
> > > Thanks for the suggestions, does the attached patch look OK ?
> > > Bootstrap+test in progress on x86_64-unknown-linux-gnu.
> >
> > OK.
> Thanks, committed to trunk in r277525 after bootstrap+test on
> x86_64-unknown-linux-gnu.
>

Hi Prathamesh,

There's a problem with the new test you added: if uses -fopenacc which
is not supported by arm-eabi or aarch64-elf targets for instance.
You probably want to move the test to gcc.dg/goacc or add
dg-require-effective-target fopenacc.

Thanks,

Christophe

> Thanks,
> Prathamesh
> >
> > Richard.
> >
> > > Thanks,
> > > Prathamesh
> > > >
> > > > Thanks,
> > > > Richard.
> > > >
> > > > > Thanks,
> > > > > Prathamesh


Re: [patch] testsuite/libgomp.fortran/pr66199-2.f90 – remove dg-options

2019-11-04 Thread Jakub Jelinek
On Mon, Nov 04, 2019 at 02:02:38PM +0100, Tobias Burnus wrote:
> On 11/4/19 1:29 PM, Jakub Jelinek wrote:
> > Note, pr66199-1.f90 has the same issue. And I'd probably go for the
> > removal of dg-do run instead.
> 
> Well, there are even more (ignoring those which have -std=legacy or
> -fno-inline or … in dg-options):
> libgomp.fortran/examples-4/declare_target-5.f90,
> libgomp.fortran/taskloop[234].f90
> 
> libgomp.fortran/examples-4/declare_target-5.f90:! { dg-do run { target 
> vect_simd_clones } }

That vect_simd_clones can't be dropped of course.

> I assume, you prefer no "dg-do run" for those as well:
> How about the attached patch?

But your patch doesn't do that, so ok for trunk.

> 2019-11-04  Tobias Burnus  
> 
>   * testsuite/libgomp.fortran/pr66199-1.f90: Remove
>   'dg-do run' (implies torture test) as 'dg-options "O2"' is used.
>   * testsuite/libgomp.fortran/pr66199-2.f90: Ditto.
>   * testsuite/libgomp.fortran/taskloop2.f90: Ditto.
>   * testsuite/libgomp.fortran/taskloop3.f90: Ditto.
>   * testsuite/libgomp.fortran/taskloop4.f90: Ditto.

Jakub



Re: [patch] testsuite/libgomp.fortran/pr66199-2.f90 – remove dg-options

2019-11-04 Thread Tobias Burnus

On 11/4/19 1:29 PM, Jakub Jelinek wrote:
Note, pr66199-1.f90 has the same issue. And I'd probably go for the 
removal of dg-do run instead.


Well, there are even more (ignoring those which have -std=legacy or 
-fno-inline or … in dg-options): 
libgomp.fortran/examples-4/declare_target-5.f90, 
libgomp.fortran/taskloop[234].f90


libgomp.fortran/examples-4/declare_target-5.f90:! { dg-do run { target 
vect_simd_clones } }

I assume, you prefer no "dg-do run" for those as well:
How about the attached patch?

Tobias

2019-11-04  Tobias Burnus  

	* testsuite/libgomp.fortran/pr66199-1.f90: Remove
	'dg-do run' (implies torture test) as 'dg-options "O2"' is used.
	* testsuite/libgomp.fortran/pr66199-2.f90: Ditto.
	* testsuite/libgomp.fortran/taskloop2.f90: Ditto.
	* testsuite/libgomp.fortran/taskloop3.f90: Ditto.
	* testsuite/libgomp.fortran/taskloop4.f90: Ditto.

diff --git a/libgomp/testsuite/libgomp.fortran/pr66199-1.f90 b/libgomp/testsuite/libgomp.fortran/pr66199-1.f90
index 38cac5e4573..6c9e566cf86 100644
--- a/libgomp/testsuite/libgomp.fortran/pr66199-1.f90
+++ b/libgomp/testsuite/libgomp.fortran/pr66199-1.f90
@@ -1,5 +1,4 @@
 ! PR middle-end/66199
-! { dg-do run }
 ! { dg-options "-O2" }
 
   integer :: u(1024), v(1024), w(1024), a, b, c, d, e, a1, b1, a2, b2, d1, d2
diff --git a/libgomp/testsuite/libgomp.fortran/pr66199-2.f90 b/libgomp/testsuite/libgomp.fortran/pr66199-2.f90
index 0cc0fa5097f..fef15b2bb57 100644
--- a/libgomp/testsuite/libgomp.fortran/pr66199-2.f90
+++ b/libgomp/testsuite/libgomp.fortran/pr66199-2.f90
@@ -1,5 +1,4 @@
 ! PR middle-end/66199
-! { dg-do run }
 ! { dg-options "-O2" }
 
   integer :: u(1024), v(1024), w(1024), a, b, c, d, e, a1, b1, a2, b2, d1, d2
diff --git a/libgomp/testsuite/libgomp.fortran/taskloop2.f90 b/libgomp/testsuite/libgomp.fortran/taskloop2.f90
index d5646544d50..a20242f9774 100644
--- a/libgomp/testsuite/libgomp.fortran/taskloop2.f90
+++ b/libgomp/testsuite/libgomp.fortran/taskloop2.f90
@@ -1,4 +1,3 @@
-! { dg-do run }
 ! { dg-options "-O2" }
 ! { dg-additional-options "-msse2" { target sse2_runtime } }
 ! { dg-additional-options "-mavx" { target avx_runtime } }
diff --git a/libgomp/testsuite/libgomp.fortran/taskloop3.f90 b/libgomp/testsuite/libgomp.fortran/taskloop3.f90
index 158966e39dd..c4e57ddd451 100644
--- a/libgomp/testsuite/libgomp.fortran/taskloop3.f90
+++ b/libgomp/testsuite/libgomp.fortran/taskloop3.f90
@@ -1,4 +1,3 @@
-! { dg-do run }
 ! { dg-options "-O2" }
 
   integer, save :: g
diff --git a/libgomp/testsuite/libgomp.fortran/taskloop4.f90 b/libgomp/testsuite/libgomp.fortran/taskloop4.f90
index 88062805f67..4f7a25b88ca 100644
--- a/libgomp/testsuite/libgomp.fortran/taskloop4.f90
+++ b/libgomp/testsuite/libgomp.fortran/taskloop4.f90
@@ -1,4 +1,3 @@
-! { dg-do run }
 ! { dg-options "-O2" }
 
   integer, save :: u(64), v


[COMMITTED] Fix incorrect use of USE_TM_CLONE_REGISTRY in crtstuff.c

2019-11-04 Thread Jozef Lawrynowicz
r272769 added the --disable-tm-clone-registry configure option. This expects
defining USE_TM_CLONE_REGISTRY to 0 to remove blocks of code from
libgcc/crtstuff.c.

However, some #if blocks in crtstuff.c only check whether USE_TM_CLONE_REGISTRY
is defined, so when it is defined to 0, these will still be true.

The attached patch changes the remaining instances of "#if
defined(USE_TM_CLONE_REGISTRY)" to merely check "#if USE_TM_CLONE_REGISTRY".

A new clause at the top of the file ensures the macro is always defined to a
value.

Successfully bootstrapped and regtested for x86-64-pc-linux-gnu.
Successfully regtested for msp430-elf.

Committed as obvious.
>From 47a6db26ddbedccf6a9270718421e54a681868ee Mon Sep 17 00:00:00 2001
From: jozefl 
Date: Mon, 4 Nov 2019 12:41:56 +
Subject: [PATCH] libgcc: Fix incorrect use of USE_TM_CLONE_REGISTRY

2019-11-04  Jozef Lawrynowicz  

	* crtstuff.c: Define USE_TM_CLONE_REGISTRY to 0 if it's undefined and
	the target output object format is not ELF.
	s/defined(USE_TM_CLONE_REGISTRY)/USE_TM_CLONE_REGISTRY.


git-svn-id: svn+ssh://gcc.gnu.org/svn/gcc/trunk@25 138bc75d-0d04-0410-961f-82ee72b054a4
---
 libgcc/ChangeLog  |  6 ++
 libgcc/crtstuff.c | 11 +--
 2 files changed, 11 insertions(+), 6 deletions(-)

diff --git a/libgcc/ChangeLog b/libgcc/ChangeLog
index 806d048f5d1..c528cec2db3 100644
--- a/libgcc/ChangeLog
+++ b/libgcc/ChangeLog
@@ -1,3 +1,9 @@
+2019-11-04  Jozef Lawrynowicz  
+
+	* crtstuff.c: Define USE_TM_CLONE_REGISTRY to 0 if it's undefined and
+	the target output object format is not ELF.
+	s/defined(USE_TM_CLONE_REGISTRY)/USE_TM_CLONE_REGISTRY.
+
 2019-11-03  Oleg Endo  
 
 	PR libgcc/78804
diff --git a/libgcc/crtstuff.c b/libgcc/crtstuff.c
index c93e1cbcaca..ae6328d317d 100644
--- a/libgcc/crtstuff.c
+++ b/libgcc/crtstuff.c
@@ -153,6 +153,8 @@ call_ ## FUNC (void)	\
 
 #if !defined(USE_TM_CLONE_REGISTRY) && defined(OBJECT_FORMAT_ELF)
 # define USE_TM_CLONE_REGISTRY 1
+#elif !defined(USE_TM_CLONE_REGISTRY)
+# define USE_TM_CLONE_REGISTRY 0
 #endif
 
 /* We do not want to add the weak attribute to the declarations of these
@@ -450,8 +452,7 @@ CRT_CALL_STATIC_FUNCTION (__LIBGCC_INIT_SECTION_ASM_OP__,
 			  __do_global_dtors_aux_1)
 #endif
 
-#if defined(USE_EH_FRAME_REGISTRY) \
-|| defined(USE_TM_CLONE_REGISTRY)
+#if defined(USE_EH_FRAME_REGISTRY) || USE_TM_CLONE_REGISTRY
 /* Stick a call to __register_frame_info into the .init section.  For some
reason calls with no arguments work more reliably in .init, so stick the
call in another function.  */
@@ -560,8 +561,7 @@ __do_global_dtors (void)
 #endif
 }
 
-#if defined(USE_EH_FRAME_REGISTRY) \
-|| defined(USE_TM_CLONE_REGISTRY)
+#if defined(USE_EH_FRAME_REGISTRY) || USE_TM_CLONE_REGISTRY
 /* A helper function for __do_global_ctors, which is in crtend.o.  Here
in crtbegin.o, we can reference a couple of symbols not visible there.
Plus, since we're before libgcc.a, we have no problems referencing
@@ -733,8 +733,7 @@ void
 __do_global_ctors (void)
 {
   func_ptr *p;
-#if defined(USE_EH_FRAME_REGISTRY) \
-|| defined(USE_TM_CLONE_REGISTRY)
+#if defined(USE_EH_FRAME_REGISTRY) || USE_TM_CLONE_REGISTRY
   __do_global_ctors_1();
 #endif
   for (p = __CTOR_END__ - 1; *p != (func_ptr) -1; p--)
-- 
2.17.1



Re: [PR 70929] Remove gimple_call_types_likely_match_p... almost

2019-11-04 Thread Richard Biener
On Fri, 1 Nov 2019, Martin Jambor wrote:

> Hi,
> 
> I have spent some more time looking into PR 70929, how
> gimple_check_call_matching_types behaves when LTO-building Firefox to
> see what could replace it or if we perhaps could remove it.
> 
> TL;DR:
> I believe it can and should be removed, possibly except the use in
> value-prof.c where I replaced it with a clearly imprecise predicate to
> catch cases where the profile info is corrupted and since I had it, I
> also ended up using it for speculative devirtualization, in case it got
> its guess somehow wrong (but I have not seen either of the two cases
> happening).  See the patch below.
> 
> 
> More details:
> With LTO the predicate can always be fooled and so we cannot rely on it
> as means to prevent ICEs, if the user calls incompatible functions, the
> compiler should try to fix it with folding, VCEing or just using zero
> constructors in the most bogus of cases.
> 
> And trying to make the predicate more clever can be difficult.  When
> LTO-building Firefox (without PGO), gimple_check_call_matching_types
> returns false 8133 times and those cases can be divided into:
> 
>   - 2507x the callee was __builtin_constant_p
>   -   17x the callee was __builtin_signbit
>   - 1388x the callee was __builtin_unreachable
> 
>   - 4215x would pass the suggested test in comment 5 of the bug.  I
> examined quite a few and all were exactly the problem discussed in
> this PR - they were all deemed incompatible because one parameter
> was a reference to a TREE_ADDRESSABLE class.
> 
>   - 6x both predicates returned false for a target found by speculative
> devirtualization.  I tend to think they were both wrong because...
> 
> ...the predicate from comment #5 of the bug is not a good substitute
> because it returns false for perfectly fine virtual calls when the type
> of the call is a method belonging to an ancestor of the class to which
> the actual callee belongs.  Thousands of calls to AddRef did not pass
> the test.
> 
> Without finding any real case for having the predicate, I decided to
> remove its use from everywhere except for check_ic_target because its
> comment says:
> 
> /* Perform sanity check on the indirect call target. Due to race conditions,
>false function target may be attributed to an indirect call site. If the
>call expression type mismatches with the target function's type, 
> expand_call
>may ICE. Here we only do very minimal sanity check just to make compiler 
> happy.
>Returns true if TARGET is considered ok for call CALL_STMT.  */
> 
> and if some such race conditions really happen and can be detected if
> e.g. the number of parameters is clearly off, the compiler should
> probably discard the target.  But I did not want to keep the original
> clumsy predicate and therefore decided to extract the non-problematic
> bits of useless_type_conversion_p into:
> 
> /* Check the type of FNDECL and return true if it is likely compatible with 
> the
>callee type in CALL.  The check is imprecise, the intended use of this
>function is that when optimizations like value profiling and speculative
>devirtualization somehow guess a clearly wrong target of an indirect call,
>they can discard it.  */
> 
> bool
> gimple_call_types_likely_match_p (gcall *call, tree fndecl)
> {
>   tree call_type = gimple_call_fntype (call);
>   tree decl_type = TREE_TYPE (fndecl);
> 
>   /* If one is a function and the other a method, that's a mismatch.  */
>   if (TREE_CODE (call_type) != TREE_CODE (decl_type))
> return false;
>   /* If the return types are not compatible, bail out.  */
>   if (!useless_type_conversion_p (TREE_TYPE (call_type),
> TREE_TYPE (decl_type)))
> return false;
>   /* If the call was to an unprototyped function, all bets are off.  */
>   if (!prototype_p (call_type))
> return true;
> 
>   /* If the unqualified argument types are compatible, the types match.  */
>   if (TYPE_ARG_TYPES (call_type) == TYPE_ARG_TYPES (decl_type))
> return true;
> 
>   tree call_parm, decl_parm;
>   for (call_parm = TYPE_ARG_TYPES (call_type),
>decl_parm = TYPE_ARG_TYPES (decl_type);
>call_parm && decl_parm;
>call_parm = TREE_CHAIN (call_parm),
>decl_parm = TREE_CHAIN (decl_parm))
> if (!useless_type_conversion_p
>   (TYPE_MAIN_VARIANT (TREE_VALUE (call_parm)),
>TYPE_MAIN_VARIANT (TREE_VALUE (decl_parm
>   return false;
> 
>   /* If there is a mismatch in the number of arguments the functions
>  are not compatible.  */
>   if (call_parm || decl_parm)
> return false;
> 
>   return true;
> }
> 
> Crucially, the function is missing the part that does:
> 
>   /* Method types should belong to a compatible base class.  */
>   if (TREE_CODE (inner_type) == METHOD_TYPE
> && !useless_type_conversion_p (TYPE_METHOD_BASETYPE (outer_type),
>TYPE_METHOD_BASETYPE 

Re: [patch] testsuite/libgomp.fortran/pr66199-2.f90 – remove dg-options

2019-11-04 Thread Jakub Jelinek
On Mon, Nov 04, 2019 at 01:18:58PM +0100, Tobias Burnus wrote:
> This test case had from the beginning both
>  ! { dg-do run }
>  ! { dg-options "-O2" }
> 
> However, 'dg-do run' implies torture, i.e. multiple optimization options,
> while 'dg-options "-O2"' implies -O2, only.
> 
> This means that the test case is compiled 4 times with
> "-O0", "-O1", "-O3 -f…" and "-Os" – followed by "-O2".
> Hence, it is run 4 times with nearly identical options
> (With "-O3" [followed by "-O2"], the "-fomit-frame-pointer -funroll-loops 
> -fpeel-loops
> -ftracer -finline-functions" options do get added.)
> 
> Hence, we can either remove "dg-do run" or "dg-options". I suggest to remove
> the latter.
> 
> OK?

Note, pr66199-1.f90 has the same issue.  And I'd probably go for
the removal of dg-do run instead.

Jakub



[patch] testsuite/libgomp.fortran/pr66199-2.f90 – remove dg-options

2019-11-04 Thread Tobias Burnus

This test case had from the beginning both
 ! { dg-do run }
 ! { dg-options "-O2" }

However, 'dg-do run' implies torture, i.e. multiple optimization options,
while 'dg-options "-O2"' implies -O2, only.

This means that the test case is compiled 4 times with
"-O0", "-O1", "-O3 -f…" and "-Os" – followed by "-O2".
Hence, it is run 4 times with nearly identical options
(With "-O3" [followed by "-O2"], the "-fomit-frame-pointer -funroll-loops 
-fpeel-loops
-ftracer -finline-functions" options do get added.)

Hence, we can either remove "dg-do run" or "dg-options". I suggest to remove
the latter.

OK?

Tobias

2019-11-04  Tobias Burnus  

	* testsuite/libgomp.fortran/pr66199-2.f90: Remove
	dg-options from torture test.

diff --git a/libgomp/testsuite/libgomp.fortran/pr66199-2.f90 b/libgomp/testsuite/libgomp.fortran/pr66199-2.f90
index 0cc0fa5097f..5ee74149089 100644
--- a/libgomp/testsuite/libgomp.fortran/pr66199-2.f90
+++ b/libgomp/testsuite/libgomp.fortran/pr66199-2.f90
@@ -1,6 +1,5 @@
 ! PR middle-end/66199
 ! { dg-do run }
-! { dg-options "-O2" }
 
   integer :: u(1024), v(1024), w(1024), a, b, c, d, e, a1, b1, a2, b2, d1, d2
   a = 1


[COMMITTED] Regenerate gcc/configure

2019-11-04 Thread Jozef Lawrynowicz
r277753 regenerated gcc/configure but the version of autoconf used for the
regeneration added lines to support "runstatedir".

The attached patch is the result of regenerating configure with the correct
version/configuration of autoconf.
>From 04f59c00115d9e2517c15755f9058832703cb494 Mon Sep 17 00:00:00 2001
From: jozefl 
Date: Mon, 4 Nov 2019 11:18:24 +
Subject: [PATCH] Regenerate gcc/configure

git-svn-id: svn+ssh://gcc.gnu.org/svn/gcc/trunk@23 138bc75d-0d04-0410-961f-82ee72b054a4
---
 gcc/ChangeLog |  4 
 gcc/configure | 28 
 2 files changed, 12 insertions(+), 20 deletions(-)

diff --git a/gcc/ChangeLog b/gcc/ChangeLog
index c1c6d335d04..6f0bd8381a0 100644
--- a/gcc/ChangeLog
+++ b/gcc/ChangeLog
@@ -1,3 +1,7 @@
+2019-11-04  Jozef Lawrynowicz  
+
+	* configure: Regenerate.
+
 2019-11-04  Jozef Lawrynowicz  
 
 	* config/msp430/driver-msp430.c
diff --git a/gcc/configure b/gcc/configure
index 023d51d78fe..fd33bc661a8 100755
--- a/gcc/configure
+++ b/gcc/configure
@@ -903,7 +903,6 @@ infodir
 docdir
 oldincludedir
 includedir
-runstatedir
 localstatedir
 sharedstatedir
 sysconfdir
@@ -1068,7 +1067,6 @@ datadir='${datarootdir}'
 sysconfdir='${prefix}/etc'
 sharedstatedir='${prefix}/com'
 localstatedir='${prefix}/var'
-runstatedir='${localstatedir}/run'
 includedir='${prefix}/include'
 oldincludedir='/usr/include'
 docdir='${datarootdir}/doc/${PACKAGE}'
@@ -1321,15 +1319,6 @@ do
   | -silent | --silent | --silen | --sile | --sil)
 silent=yes ;;
 
-  -runstatedir | --runstatedir | --runstatedi | --runstated \
-  | --runstate | --runstat | --runsta | --runst | --runs \
-  | --run | --ru | --r)
-ac_prev=runstatedir ;;
-  -runstatedir=* | --runstatedir=* | --runstatedi=* | --runstated=* \
-  | --runstate=* | --runstat=* | --runsta=* | --runst=* | --runs=* \
-  | --run=* | --ru=* | --r=*)
-runstatedir=$ac_optarg ;;
-
   -sbindir | --sbindir | --sbindi | --sbind | --sbin | --sbi | --sb)
 ac_prev=sbindir ;;
   -sbindir=* | --sbindir=* | --sbindi=* | --sbind=* | --sbin=* \
@@ -1467,7 +1456,7 @@ fi
 for ac_var in	exec_prefix prefix bindir sbindir libexecdir datarootdir \
 		datadir sysconfdir sharedstatedir localstatedir includedir \
 		oldincludedir docdir infodir htmldir dvidir pdfdir psdir \
-		libdir localedir mandir runstatedir
+		libdir localedir mandir
 do
   eval ac_val=\$$ac_var
   # Remove trailing slashes.
@@ -1620,7 +1609,6 @@ Fine tuning of the installation directories:
   --sysconfdir=DIRread-only single-machine data [PREFIX/etc]
   --sharedstatedir=DIRmodifiable architecture-independent data [PREFIX/com]
   --localstatedir=DIR modifiable single-machine data [PREFIX/var]
-  --runstatedir=DIR   modifiable per-process data [LOCALSTATEDIR/run]
   --libdir=DIRobject code libraries [EPREFIX/lib]
   --includedir=DIRC header files [PREFIX/include]
   --oldincludedir=DIR C header files for non-gcc [/usr/include]
@@ -5905,7 +5893,7 @@ else
 We can't simply define LARGE_OFF_T to be 9223372036854775807,
 since some C++ compilers masquerading as C compilers
 incorrectly reject 9223372036854775807.  */
-#define LARGE_OFF_T off_t) 1 << 31) << 31) - 1 + (((off_t) 1 << 31) << 31))
+#define LARGE_OFF_T (((off_t) 1 << 62) - 1 + ((off_t) 1 << 62))
   int off_t_is_large[(LARGE_OFF_T % 2147483629 == 721
 		   && LARGE_OFF_T % 2147483647 == 1)
 		  ? 1 : -1];
@@ -5951,7 +5939,7 @@ else
 We can't simply define LARGE_OFF_T to be 9223372036854775807,
 since some C++ compilers masquerading as C compilers
 incorrectly reject 9223372036854775807.  */
-#define LARGE_OFF_T off_t) 1 << 31) << 31) - 1 + (((off_t) 1 << 31) << 31))
+#define LARGE_OFF_T (((off_t) 1 << 62) - 1 + ((off_t) 1 << 62))
   int off_t_is_large[(LARGE_OFF_T % 2147483629 == 721
 		   && LARGE_OFF_T % 2147483647 == 1)
 		  ? 1 : -1];
@@ -5975,7 +5963,7 @@ rm -f core conftest.err conftest.$ac_objext conftest.$ac_ext
 We can't simply define LARGE_OFF_T to be 9223372036854775807,
 since some C++ compilers masquerading as C compilers
 incorrectly reject 9223372036854775807.  */
-#define LARGE_OFF_T off_t) 1 << 31) << 31) - 1 + (((off_t) 1 << 31) << 31))
+#define LARGE_OFF_T (((off_t) 1 << 62) - 1 + ((off_t) 1 << 62))
   int off_t_is_large[(LARGE_OFF_T % 2147483629 == 721
 		   && LARGE_OFF_T % 2147483647 == 1)
 		  ? 1 : -1];
@@ -6020,7 +6008,7 @@ else
 We can't simply define LARGE_OFF_T to be 9223372036854775807,
 since some C++ compilers masquerading as C compilers
 incorrectly reject 9223372036854775807.  */
-#define LARGE_OFF_T off_t) 1 << 31) << 31) - 1 + (((off_t) 1 << 31) << 31))
+#define LARGE_OFF_T (((off_t) 1 << 62) - 1 + ((off_t) 1 << 62))
   int off_t_is_large[(LARGE_OFF_T % 2147483629 == 721
 		   && LARGE_OFF_T % 2147483647 == 1)
 		  ? 1 : -1];
@@ -6044,7 +6032,7 @@ rm -f core conftest.err conftest.$ac_objext conftest.$ac_ext
 We can't simply define 

[wwwdocs] Update to the new home of the GNU Coding Standards

2019-11-04 Thread Gerald Pfeifer
Committed.

Gerald

- Log -
commit cb3654b35594dc81dc78b01713142e4207d73191
Author: Gerald Pfeifer 
Date:   Sun Nov 3 20:42:22 2019 +

Update to the new home of the GNU Coding Standards

Format the surrounding HTML source a bit nicer.

diff --git a/htdocs/codingconventions.html b/htdocs/codingconventions.html
index 71a8772..f9dbc18 100644
--- a/htdocs/codingconventions.html
+++ b/htdocs/codingconventions.html
@@ -10,9 +10,8 @@
 GCC Coding Conventions
 
 There are some additional coding conventions for code in GCC,
-beyond those in the http://www.gnu.org/prep/standards_toc.html;>GNU Coding
-Standards.  Some existing code may not follow these conventions,
+beyond those in the https://www.gnu.org/prep/standards/;>GNU
+Coding Standards.  Some existing code may not follow these conventions,
 but they must be used for new code.  If changing existing
 code to follow these conventions, it is best to send changes to follow
 the conventions separately from any other changes to the code.
diff --git a/htdocs/contribute.html b/htdocs/contribute.html
index 499dfe8..80ebb26 100644
--- a/htdocs/contribute.html
+++ b/htdocs/contribute.html
@@ -62,11 +62,10 @@ copyright assignment on file.
 Coding Standards
 
 All contributions must conform to the http://www.gnu.org/prep/standards_toc.html;>GNU Coding
-Standards.  There are also some additional coding conventions for
-GCC; these include documentation and testsuite requirements as
-well as requirements on code formatting.
+href="https://www.gnu.org/prep/standards/;>GNU Coding Standards.
+There are also some additional coding
+conventions for GCC; these include documentation and testsuite
+requirements as well as requirements on code formatting.
 
 Submissions which do not conform to the standards will be returned
 with a request to address any such problems.  To help with the

---


[patch,committed] PR92305 libgomp/testsuite - use unique numbers with Fortran's 'stop'

2019-11-04 Thread Tobias Burnus

Committed as obvious (Rev. 277769).

Tobias

PS: Highest number is 242 – which still fits into 8 bit.

PPS: Instead of 'stop', one could also use 'error stop'. This prints 
'error stop' instead of 'stop' on stderr and might print a backtrace, 
depending on the target/libbacktrace availablility. Otherwise, it also 
passes the error code through, hence 256 = exit code 0 on Linux.


commit 9747aa88843d30bec8c11ce9b24735405251e949
Author: burnus 
Date:   Mon Nov 4 10:01:22 2019 +

libgomp/testsuite - use unique numbers with Fortran's 'stop'

PR fortran/92305
* testsuite/libgomp.fortran/allocatable2.f90: Use
unique numbers with 'stop'.
* testsuite/libgomp.fortran/use_device_addr-1.f90: Ditto.
* testsuite/libgomp.fortran/use_device_addr-2.f90: Ditto.
* testsuite/libgomp.fortran/use_device_ptr-1.f90: Ditto.
* testsuite/libgomp.oacc-fortran/lib-15.f90: Ditto.
* testsuite/libgomp.oacc-fortran/pset-1.f90: Ditto.



git-svn-id: svn+ssh://gcc.gnu.org/svn/gcc/trunk@277769 138bc75d-0d04-0410-961f-82ee72b054a4

diff --git a/libgomp/ChangeLog b/libgomp/ChangeLog
index 8cf7d958596..557a7512d6a 100644
--- a/libgomp/ChangeLog
+++ b/libgomp/ChangeLog
@@ -1,3 +1,14 @@
+2019-11-04  Tobias Burnus  
+
+	PR fortran/92305
+	* testsuite/libgomp.fortran/allocatable2.f90: Use
+	unique numbers with 'stop'.
+	* testsuite/libgomp.fortran/use_device_addr-1.f90: Ditto.
+	* testsuite/libgomp.fortran/use_device_addr-2.f90: Ditto.
+	* testsuite/libgomp.fortran/use_device_ptr-1.f90: Ditto.
+	* testsuite/libgomp.oacc-fortran/lib-15.f90: Ditto.
+	* testsuite/libgomp.oacc-fortran/pset-1.f90: Ditto.
+
 2019-11-01  Tobias Burnus  
 
 	* testsuite/libgomp.fortran/use_device_addr-1.f90 (test_nullptr_1,
diff --git a/libgomp/testsuite/libgomp.fortran/allocatable2.f90 b/libgomp/testsuite/libgomp.fortran/allocatable2.f90
index fa2e774f6ed..fbf81c214b8 100644
--- a/libgomp/testsuite/libgomp.fortran/allocatable2.f90
+++ b/libgomp/testsuite/libgomp.fortran/allocatable2.f90
@@ -16,7 +16,7 @@
   l = l.or..not.allocated (a)
   l = l.or.size(a).ne.12.or.size(a,1).ne.3.or.size(a,2).ne.4
 !$omp end parallel
-  if (l.or.any(a.ne.6)) stop 1
+  if (l.or.any(a.ne.6)) stop 2
 !$omp parallel num_threads (4) copyin (a) reduction(.or.:l) private (b)
   l = l.or.allocated (b)
   l = l.or..not.allocated (a)
@@ -37,11 +37,11 @@
   deallocate (b)
   l = l.or.allocated (b)
 !$omp end parallel
-  if (n.lt.0 .or. n.ge.4) stop 2
-  if (l.or.any(a.ne.(n + 36))) stop 3
+  if (n.lt.0 .or. n.ge.4) stop 3
+  if (l.or.any(a.ne.(n + 36))) stop 4
 !$omp parallel num_threads (4) reduction(.or.:l)
   deallocate (a)
   l = l.or.allocated (a)
 !$omp end parallel
-  if (l.or.allocated (a)) stop 4
+  if (l.or.allocated (a)) stop 5
 end
diff --git a/libgomp/testsuite/libgomp.fortran/use_device_addr-1.f90 b/libgomp/testsuite/libgomp.fortran/use_device_addr-1.f90
index 1183e49f2e4..94ac76f5700 100644
--- a/libgomp/testsuite/libgomp.fortran/use_device_addr-1.f90
+++ b/libgomp/testsuite/libgomp.fortran/use_device_addr-1.f90
@@ -135,26 +135,26 @@ contains
  call copy3_scalar(c_loc(aa), c_loc(bb))
  !$omp end target data
  if (abs(aa - 11.0_c_double) > 10.0_c_double * epsilon(aa)) stop 1
- if (abs(3.0_c_double * aa - bb) > 10.0_c_double * epsilon(aa)) stop 1
+ if (abs(3.0_c_double * aa - bb) > 10.0_c_double * epsilon(aa)) stop 2
 
  !$omp target data map(to:cc) map(from:dd) use_device_addr(cc,dd)
  call copy3_scalar(c_loc(cc), c_loc(dd))
  !$omp end target data
- if (abs(cc - 33.0_c_double) > 10.0_c_double * epsilon(cc)) stop 1
- if (abs(3.0_c_double * cc - dd) > 10.0_c_double * epsilon(cc)) stop 1
+ if (abs(cc - 33.0_c_double) > 10.0_c_double * epsilon(cc)) stop 3
+ if (abs(3.0_c_double * cc - dd) > 10.0_c_double * epsilon(cc)) stop 4
 
  !$omp target data map(to:ee) map(from:ff) use_device_addr(ee,ff)
  call copy3_scalar(c_loc(ee), c_loc(ff))
  !$omp end target data
- if (abs(ee - 55.0_c_double) > 10.0_c_double * epsilon(ee)) stop 1
- if (abs(3.0_c_double * ee - ff) > 10.0_c_double * epsilon(ee)) stop 1
+ if (abs(ee - 55.0_c_double) > 10.0_c_double * epsilon(ee)) stop 5
+ if (abs(3.0_c_double * ee - ff) > 10.0_c_double * epsilon(ee)) stop 6
 
 
  !$omp target data map(to:gg) map(from:hh) use_device_addr(gg,hh)
  call copy3_array(c_loc(gg), c_loc(hh), N)
  !$omp end target data
- if (any(abs(gg - 77.0_c_double) > 10.0_c_double * epsilon(gg))) stop 1
- if (any(abs(3.0_c_double * gg - hh) > 10.0_c_double * epsilon(gg))) stop 1
+ if (any(abs(gg - 77.0_c_double) > 10.0_c_double * epsilon(gg))) stop 7
+ if (any(abs(3.0_c_double * gg - hh) > 10.0_c_double * epsilon(gg))) stop 8
   end subroutine test_dummy_callee_1
 
   ! Save device ptr - and recall pointer
@@ -221,28 +221,28 @@ contains
  ! check c_loc ptr once
  call copy3_scalar(c_aptr, c_bptr)
  !$omp target 

  1   2   >