Re: [PATCH] Check calls before loop unrolling

2020-08-31 Thread Jiufu Guo via Gcc-patches
guojiufu  writes:

Hi,

In this patch, the default value of
param=max-unrolled-average-calls-x1 is '0', which means to unroll
a loop, there should be no call inside the body.  Do I need to set the
default value to a bigger value (16?) for later tune?  Biger value will
keep the behavior unchanged.

And is this patch ok for trunk?  Thanks a lot for you comments!

BR.
Jiufu.


> Hi,
>
> When unroll loops, if there are calls inside the loop, those calls
> may raise negative impacts for unrolling.  This patch adds a param
> param_max_unrolled_calls, and checks if the number of calls inside
> the loop bigger than this param, loop is prevent from unrolling.
>
> This patch is checking the _average_ number of calls which is the
> summary of call numbers multiply the possibility of the call maybe
> executed.  The _average_ number could be a fraction, to keep the
> precision, the param is the threshold number multiply 1.
>
> Bootstrap and regtest pass on powerpc64le.  Is this ok for trunk?
>
> gcc/ChangeLog
> 2020-08-19  Jiufu Guo   
>
>   * params.opt (param_max_unrolled_average_calls_x1): New param.
>   * cfgloop.h (average_num_loop_calls): New declare.
>   * cfgloopanal.c (average_num_loop_calls): New function.
>   * loop-unroll.c (decide_unroll_constant_iteration,
>   decide_unroll_runtime_iterations,
>   decide_unroll_stupid): Check average_num_loop_calls and
>   param_max_unrolled_average_calls_x1.
> ---
>  gcc/cfgloop.h |  2 ++
>  gcc/cfgloopanal.c | 25 +
>  gcc/loop-unroll.c | 10 ++
>  gcc/params.opt|  4 
>  4 files changed, 41 insertions(+)
>
> diff --git a/gcc/cfgloop.h b/gcc/cfgloop.h
> index 18b404e292f..dab933da150 100644
> --- a/gcc/cfgloop.h
> +++ b/gcc/cfgloop.h
> @@ -21,6 +21,7 @@ along with GCC; see the file COPYING3.  If not see
>  #define GCC_CFGLOOP_H
>  
>  #include "cfgloopmanip.h"
> +#include "sreal.h"
>  
>  /* Structure to hold decision about unrolling/peeling.  */
>  enum lpt_dec
> @@ -387,6 +388,7 @@ extern vec get_loop_exit_edges (const class loop *, 
> basic_block * = NULL);
>  extern edge single_exit (const class loop *);
>  extern edge single_likely_exit (class loop *loop, vec);
>  extern unsigned num_loop_branches (const class loop *);
> +extern sreal average_num_loop_calls (const class loop *);
>  
>  extern edge loop_preheader_edge (const class loop *);
>  extern edge loop_latch_edge (const class loop *);
> diff --git a/gcc/cfgloopanal.c b/gcc/cfgloopanal.c
> index 0b33e8272a7..a314db4e0c0 100644
> --- a/gcc/cfgloopanal.c
> +++ b/gcc/cfgloopanal.c
> @@ -233,6 +233,31 @@ average_num_loop_insns (const class loop *loop)
>return ret;
>  }
>  
> +/* Count the number of call insns in LOOP.  */
> +sreal
> +average_num_loop_calls (const class loop *loop)
> +{
> +  basic_block *bbs;
> +  rtx_insn *insn;
> +  unsigned int i, bncalls;
> +  sreal ncalls = 0;
> +
> +  bbs = get_loop_body (loop);
> +  for (i = 0; i < loop->num_nodes; i++)
> +{
> +  bncalls = 0;
> +  FOR_BB_INSNS (bbs[i], insn)
> + if (CALL_P (insn))
> +   bncalls++;
> +
> +  ncalls += (sreal) bncalls
> + * bbs[i]->count.to_sreal_scale (loop->header->count);
> +}
> +  free (bbs);
> +
> +  return ncalls;
> +}
> +
>  /* Returns expected number of iterations of LOOP, according to
> measured or guessed profile.
>  
> diff --git a/gcc/loop-unroll.c b/gcc/loop-unroll.c
> index 693c7768868..56b8fb37d2a 100644
> --- a/gcc/loop-unroll.c
> +++ b/gcc/loop-unroll.c
> @@ -370,6 +370,10 @@ decide_unroll_constant_iterations (class loop *loop, int 
> flags)
>  nunroll = nunroll_by_av;
>if (nunroll > (unsigned) param_max_unroll_times)
>  nunroll = param_max_unroll_times;
> +  if (!loop->unroll
> +  && (average_num_loop_calls (loop) * (sreal) 1).to_int ()
> +> (unsigned) param_max_unrolled_average_calls_x1)
> +nunroll = 0;
>  
>if (targetm.loop_unroll_adjust)
>  nunroll = targetm.loop_unroll_adjust (nunroll, loop);
> @@ -689,6 +693,9 @@ decide_unroll_runtime_iterations (class loop *loop, int 
> flags)
>  nunroll = nunroll_by_av;
>if (nunroll > (unsigned) param_max_unroll_times)
>  nunroll = param_max_unroll_times;
> +  if ((average_num_loop_calls (loop) * (sreal) 1).to_int ()
> +  > (unsigned) param_max_unrolled_average_calls_x1)
> +nunroll = 0;
>  
>if (targetm.loop_unroll_adjust)
>  nunroll = targetm.loop_unroll_adjust (nunroll, loop);
> @@ -1173,6 +1180,9 @@ decide_unroll_stupid (class loop *loop, int flags)
>  nunroll = nunroll_by_av;
>if (nunroll > (unsigned) param_max_unroll_times)
>  nunroll = param_max_unroll_times;
> +  if ((average_num_loop_calls (loop) * (sreal) 1).to_int ()
> +  > (unsigned) param_max_unrolled_average_calls_x1)
> +nunroll = 0;
>  
>if (targetm.loop_unroll_adjust)
>  nunroll = targetm.loop_unroll_adjust (nunroll, loop);
> diff --git a/gcc/params.opt b/gcc/params.opt
> index 

[PATCH] test/rs6000: Replace test target p8 and p9+

2020-08-31 Thread Kewen.Lin via Gcc-patches
Hi,

This is a trivial patch to clean existing rs6000 test targets
p8 and p9+ with existing has_arch_pwr8 and has_arch_pwr9
target combination or only one of them.  Not sure if it's a
good idea to tidy this, but send out for comments.

Bootstrapped/regtested on powerpc64le-linux-gnu P9.

Any comments are highly appreciated.

BR,
Kewen
-

gcc/testsuite/ChangeLog:

* gcc.target/powerpc/pr92398.p9+.c: Replace p9+ with has_arch_pwr9.
* gcc.target/powerpc/pr92398.p9-.c: Replace p9+ with has_arch_pwr9,
and replace p8 with has_arch_pwr8 && !has_arch_pwr9.
* lib/target-supports.exp (check_effective_target_p8): Remove.
(check_effective_target_p9+): Remove.
diff --git a/gcc/testsuite/gcc.target/powerpc/pr92398.p9+.c 
b/gcc/testsuite/gcc.target/powerpc/pr92398.p9+.c
index a819c3f16af..72dd1d9a274 100644
--- a/gcc/testsuite/gcc.target/powerpc/pr92398.p9+.c
+++ b/gcc/testsuite/gcc.target/powerpc/pr92398.p9+.c
@@ -1,4 +1,4 @@
-/* { dg-do compile { target { lp64 && p9+ } } } */
+/* { dg-do compile { target { lp64 && has_arch_pwr9 } } } */
 /* { dg-require-effective-target powerpc_vsx_ok } */
 /* { dg-options "-O2 -mvsx" } */
 
diff --git a/gcc/testsuite/gcc.target/powerpc/pr92398.p9-.c 
b/gcc/testsuite/gcc.target/powerpc/pr92398.p9-.c
index 065ae73f267..bd7fa98af51 100644
--- a/gcc/testsuite/gcc.target/powerpc/pr92398.p9-.c
+++ b/gcc/testsuite/gcc.target/powerpc/pr92398.p9-.c
@@ -1,9 +1,9 @@
-/* { dg-do compile { target { lp64 && {! p9+} } } } */
+/* { dg-do compile { target { lp64 && {! has_arch_pwr9} } } } */
 /* { dg-require-effective-target powerpc_vsx_ok } */
 /* { dg-options "-O2 -mvsx" } */
 
 /* { dg-final { scan-assembler-times {\mnot\M} 2 { xfail be } } } */
-/* { dg-final { scan-assembler-times {\mstd\M} 2 { xfail { p8 && be } } } } */
+/* { dg-final { scan-assembler-times {\mstd\M} 2 { xfail { { {! has_arch_pwr9} 
&& has_arch_pwr8 } && be } } } } */
 
 /* Source code for the test in pr92398.h */
 #include "pr92398.h"
diff --git a/gcc/testsuite/lib/target-supports.exp 
b/gcc/testsuite/lib/target-supports.exp
index de2196d71f7..220e858f63b 100644
--- a/gcc/testsuite/lib/target-supports.exp
+++ b/gcc/testsuite/lib/target-supports.exp
@@ -2687,26 +2687,6 @@ proc check_effective_target_le { } {
 }]
 }
 
-# Return 1 if we're generating code for only power8 platforms.
-
-proc check_effective_target_p8 {  } {
-  return [check_no_compiler_messages_nocache p8 assembly {
-   #if !(!defined(_ARCH_PWR9) && defined(_ARCH_PWR8))
-   #error NO
-   #endif
-  } ""]
-}
-
-# Return 1 if we're generating code for power9 or later platforms.
-
-proc check_effective_target_p9+ {  } {
-  return [check_no_compiler_messages_nocache p9+ assembly {
-   #if !(defined(_ARCH_PWR9))
-   #error NO
-   #endif
-  } ""]
-}
-
 # Return 1 if we're generating 32-bit code using default options, 0
 # otherwise.
 


Re: [PATCH] test/rs6000: Add Power9 and up as vect_len target

2020-08-31 Thread Kewen.Lin via Gcc-patches
Hi Segher,

>>  proc check_effective_target_vect_len_load_store { } {
>> -return 0
>> +return [expr { [check_effective_target_has_arch_pwr9] }]
>>  }
> 
> Why not just
> 
>   return check_effective_target_has_arch_pwr9;
> 
> ?  (Or lose at least two pairs of brackets if not all three :-) )
> 

Thanks for the suggestion to make it concise!

I tried
  
return check_effective_target_has_arch_pwr9

it failed due to that:
   
ERROR:... error executing dg-final: expected boolean value but got 
"check_effective_target_has_arch_pwr9"

I re-tried

return [check_effective_target_has_arch_pwr9]

it worked, nice!

Committed in r11-2960 with this concise writing then.

BR,
Kewen


PING: [PATCH V2] Add pattern for pointer-diff on addresses with same base/offset (PR 94234)

2020-08-31 Thread Feng Xue OS via Gcc-patches
Thanks,
Feng

From: Feng Xue OS 
Sent: Wednesday, August 19, 2020 5:17 PM
To: Richard Biener
Cc: gcc-patches@gcc.gnu.org; Marc Glisse
Subject: [PATCH V2] Add pattern for pointer-diff on addresses with same 
base/offset (PR 94234)

As Richard's comment, this patch is composed to simplify generalized
binary-with-convert pattern like ((T) X) OP ((T) Y). Instead of creating
almost duplicated rules into match.pd, we try to transform it to (T) (X OP Y),
and apply simplification on (X OP Y) in forwprop pass.

Regards,
Feng
---
2020-08-19  Feng Xue  

gcc/
PR tree-optimization/94234
* tree-ssa-forwprop.c (simplify_binary_with_convert): New function.
* (fwprop_ssa_val): Move it before its new caller.

gcc/testsuite/
PR tree-optimization/94234
* gcc.dg/ifcvt-3.c: Modified to suppress forward propagation.
* gcc.dg/tree-ssa/20030807-10.c: Likewise.
* gcc.dg/pr94234-2.c: New test.

> 
> From: Richard Biener 
> Sent: Monday, June 15, 2020 3:41 PM
> To: Feng Xue OS
> Cc: gcc-patches@gcc.gnu.org; Marc Glisse
> Subject: Re: [PATCH] Add pattern for pointer-diff on addresses with same 
> base/offset (PR 94234)
>
> On Fri, Jun 5, 2020 at 11:20 AM Feng Xue OS  
> wrote:
>>
>>  As Marc suggested, removed the new pointer_diff rule, and add another rule 
>> to fold
>>  convert-add expression. This new rule is:
>>
>>(T)(A * C) +- (T)(B * C) -> (T) ((A +- B) * C)
>>
>>  Regards,
>>  Feng
>>
>>  ---
>> 2020-06-01  Feng Xue  
>>
>>  gcc/
>>  PR tree-optimization/94234
>>  * match.pd ((T)(A * C) +- (T)(B * C)) -> (T)((A +- B) * C): New
>>  simplification.
>>  * ((PTR_A + OFF) - (PTR_B + OFF)) -> (PTR_A - PTR_B): New
>>  simplification.
>>
>>  gcc/testsuite/
>>  PR tree-optimization/94234
>>  * gcc.dg/pr94234.c: New test.
>>  ---
>>   gcc/match.pd   | 28 
>>   gcc/testsuite/gcc.dg/pr94234.c | 24 
>>   2 files changed, 52 insertions(+)
>>   create mode 100644 gcc/testsuite/gcc.dg/pr94234.c
>>
>>  diff --git a/gcc/match.pd b/gcc/match.pd
>>  index 33ee1a920bf..4f340bfe40a 100644
>>  --- a/gcc/match.pd
>>  +++ b/gcc/match.pd
>>  @@ -2515,6 +2515,9 @@ DEFINE_INT_AND_FLOAT_ROUND_FN (RINT)
>>   && TREE_CODE (@2) == INTEGER_CST
>>   && tree_int_cst_sign_bit (@2) == 0))
>>(minus (convert @1) (convert @2)
>>  +   (simplify
>>  +(pointer_diff (pointer_plus @0 @2) (pointer_plus @1 @2))
>>  + (pointer_diff @0 @1))
>
> This new pattern is OK.  Please commit it separately.
>
>>  (simplify
>>   (pointer_diff (pointer_plus @@0 @1) (pointer_plus @0 @2))
>>   /* The second argument of pointer_plus must be interpreted as signed, 
>> and
>>  @@ -2526,6 +2529,31 @@ DEFINE_INT_AND_FLOAT_ROUND_FN (RINT)
>>(minus (convert (view_convert:stype @1))
>>  (convert (view_convert:stype @2)))
>>
>>  +/* (T)(A * C) +- (T)(B * C) -> (T)((A +- B) * C) and
>>  +   (T)(A * C) +- (T)(A) -> (T)(A * (C +- 1)). */
>>  +(if (INTEGRAL_TYPE_P (type))
>>  + (for plusminus (plus minus)
>>  +  (simplify
>>  +   (plusminus (convert:s (mult:cs @0 @1)) (convert:s (mult:cs @0 @2)))
>>  +   (if (element_precision (type) <= element_precision (TREE_TYPE (@0))
>>  +   && (TYPE_OVERFLOW_UNDEFINED (type) || TYPE_OVERFLOW_WRAPS (type))
>>  +   && TYPE_OVERFLOW_WRAPS (TREE_TYPE (@0)))
>>  +(convert (mult (plusminus @1 @2) @0
>>  +  (simplify
>>  +   (plusminus (convert @0) (convert@2 (mult:c@3 @0 @1)))
>>  +   (if (element_precision (type) <= element_precision (TREE_TYPE (@0))
>>  +   && (TYPE_OVERFLOW_UNDEFINED (type) || TYPE_OVERFLOW_WRAPS (type))
>>  +   && TYPE_OVERFLOW_WRAPS (TREE_TYPE (@0))
>>  +   && single_use (@2) && single_use (@3))
>>  +(convert (mult (plusminus { build_one_cst (TREE_TYPE (@1)); } @1) 
>> @0
>>  +  (simplify
>>  +   (plusminus (convert@2 (mult:c@3 @0 @1)) (convert @0))
>>  +   (if (element_precision (type) <= element_precision (TREE_TYPE (@0))
>>  +   && (TYPE_OVERFLOW_UNDEFINED (type) || TYPE_OVERFLOW_WRAPS (type))
>>  +   && TYPE_OVERFLOW_WRAPS (TREE_TYPE (@0))
>>  +   && single_use (@2) && single_use (@3))
>>  +(convert (mult (plusminus @1 { build_one_cst (TREE_TYPE (@1)); }) 
>> @0))
>>  +
>
> This shows the limit of pattern matching IMHO.  I'm also not convinced
> it gets the
> overflow cases correct (but I didn't spend too much time here).  Note we have
> similar functionality implemented in fold_plusminus_mult_expr.  IMHO instead
> of doing the above moving fold_plusminus_mult_expr to GIMPLE by executing
> it from inside the forwprop pass would make more sense.  Or finally biting the
> bullet and try to teach reassociation about how to handle signed arithmetic
> with non-wrapping overflow behavior.
>
> Richard.



Re: [PATCH] test/rs6000: Add Power9 and up as vect_len target

2020-08-31 Thread Kewen.Lin via Gcc-patches
Hi Will,

Thanks for the review!

on 2020/9/1 上午1:13, will schmidt wrote:
> On Mon, 2020-08-31 at 14:43 +0800, Kewen.Lin via Gcc-patches wrote:
>> Hi,
>>
>> Power9 supports vector with length in bytes load/store, this patch
>> is to teach check_effective_target_vect_len_load_store to take it
>> and its laters as effective vector with length targets.
>>
>> Also supplement the documents for has_arch_pwr*.
>>
>> Bootstrapped/regtested on powerpc64le-linux-gnu P8.
> 
> Presumably also tested on P9 to see the
> check_effective_target_vect_len_store  check pass?  :-)
> 

Good point!  I should have also mentioned that with explicit
parameter vect-partial-vector-usage setting (to value 1) on P9,
it's also bootstrapped/regtested.  And yes, it passed, the
below check depends on this change:

   is-effective-target: vect_partial_vectors_usage_1 1

BR,
Kewen


Re: [PATCH] Enable GCC support for AMX

2020-08-31 Thread Hongyu Wang via Gcc-patches
PING^3

Hongyu Wang  于2020年8月4日周二 下午11:40写道:
>
> Kirill Yukhin  于2020年8月4日周二 下午10:47写道:
> >
> > Hello,
> >
> > On 06 июл 09:58, Hongyu Wang via Gcc-patches wrote:
> > > Hi:
> > >
> > > This patch is about to support Intel Advanced Matrix Extensions (AMX)
> > > which will be enabled in GLC.
> > >
> > > AMX is a new 64-bit programming paradigm consisting of two
> > > compo nents: a set of 2-dimensional registers (tiles) representing
> > > sub-arrays from a larger 2-dimensional memory image,
> > > and an accelerator able to operate on tiles
> > >
> > > Supported instructions are
> > >
> > > AMX-TILE:ldtilecfg/sttilecfg/tileloadd/tileloaddt1/tilezero/tilerelease
> > > AMX-INT8:tdpbssd/tdpbsud/tdpbusd/tdpbuud
> > > AMX-BF16:tdpbf16ps
> > >
> > > The intrinsics adopts constant tile register number as its input 
> > > parameters.
> >
> > I didn't go into the patch deeply, but why did you use inline asm for 
> > intrinsics
> > definition? Are you going to introduce register classes for thouse new tmm
> > registers and new instruction definitions for new insns in machine 
> > description?
>
> In this version of patch, we just align our implementation to what
> have been submitted
> to llvm community. Since AMX allows variant register size in runtime
> configuration,
> the implementation of register allocation is still under discussion.
> We will introduce
> new register class and new insns in the future patch.
>
> >
> > --
> > K


[committed] analyzer: handle __builtin___memset_chk [PR96798]

2020-08-31 Thread David Malcolm via Gcc-patches
Successfully bootstrapped & regrtested on x86_64-pc-linux-gnu.
Pushed to master as r11-2957-gbc62bfb0f43eeada02cb924e3cb5457a399b01c0.

gcc/analyzer/ChangeLog:
PR analyzer/96798
* region-model.cc (region_model::on_call_pre): Handle
BUILT_IN_MEMSET_CHK.

gcc/testsuite/ChangeLog:
PR analyzer/96798
* gcc.dg/analyzer/memset-1.c (test_5a): New.
---
 gcc/analyzer/region-model.cc |  1 +
 gcc/testsuite/gcc.dg/analyzer/memset-1.c | 14 ++
 2 files changed, 15 insertions(+)

diff --git a/gcc/analyzer/region-model.cc b/gcc/analyzer/region-model.cc
index ec5094cac28..d47e8960296 100644
--- a/gcc/analyzer/region-model.cc
+++ b/gcc/analyzer/region-model.cc
@@ -673,6 +673,7 @@ region_model::on_call_pre (const gcall *call, 
region_model_context *ctxt)
  case BUILT_IN_MALLOC:
return impl_call_malloc (cd);
  case BUILT_IN_MEMSET:
+ case BUILT_IN_MEMSET_CHK:
impl_call_memset (cd);
return false;
break;
diff --git a/gcc/testsuite/gcc.dg/analyzer/memset-1.c 
b/gcc/testsuite/gcc.dg/analyzer/memset-1.c
index 830c1105f46..5748aa1af84 100644
--- a/gcc/testsuite/gcc.dg/analyzer/memset-1.c
+++ b/gcc/testsuite/gcc.dg/analyzer/memset-1.c
@@ -68,6 +68,20 @@ void test_5 (int n)
   __analyzer_eval (buf[42] == '\0'); /* { dg-warning "UNKNOWN" } */
 }
 
+/* As test_5, but with "__builtin___memset_chk".  */
+
+void test_5a (int n)
+{
+  char buf[256];
+  buf[42] = 'A';
+  __analyzer_eval (buf[42] == 'A'); /* { dg-warning "TRUE" } */
+  __builtin___memset_chk (buf, 0, n, __builtin_object_size (buf, 0));
+
+  /* We can't know if buf[42] was written to or not.  */
+  __analyzer_eval (buf[42] == 'A'); /* { dg-warning "UNKNOWN" } */
+  __analyzer_eval (buf[42] == '\0'); /* { dg-warning "UNKNOWN" } */
+}
+
 /* A "memset" with unknown value, but with zero size.  */
 
 static size_t __attribute__((noinline))
-- 
2.26.2



[committed] analyzer: gather builtin/internal fn handling into switch statements

2020-08-31 Thread David Malcolm via Gcc-patches
Clean up this code in preparation for fixing PR analyzer/96798.

Successfully bootstrapped & regrtested on x86_64-pc-linux-gnu.
Pushed to master as r11-2956-gee7bfbe5eb70a23bbf3a2cedfdcbd2ea1a20c3f2.

gcc/analyzer/ChangeLog:
* region-model.cc (region_model::on_call_pre): Gather handling of
builtins and of internal fns into switch statements.  Handle
"alloca" and BUILT_IN_ALLOCA_WITH_ALIGN.
---
 gcc/analyzer/region-model.cc | 48 ++--
 1 file changed, 40 insertions(+), 8 deletions(-)

diff --git a/gcc/analyzer/region-model.cc b/gcc/analyzer/region-model.cc
index 1f794dacc13..ec5094cac28 100644
--- a/gcc/analyzer/region-model.cc
+++ b/gcc/analyzer/region-model.cc
@@ -652,18 +652,50 @@ region_model::on_call_pre (const gcall *call, 
region_model_context *ctxt)
 in region-model-impl-calls.cc.
 Having them split out into separate functions makes it easier
 to put breakpoints on the handling of specific functions.  */
-  if (is_named_call_p (callee_fndecl, "malloc", call, 1))
+
+  if (fndecl_built_in_p (callee_fndecl)
+ && gimple_builtin_call_types_compatible_p (call, callee_fndecl))
+   switch (DECL_UNCHECKED_FUNCTION_CODE (callee_fndecl))
+ {
+ default:
+   break;
+ case BUILT_IN_ALLOCA:
+ case BUILT_IN_ALLOCA_WITH_ALIGN:
+   return impl_call_alloca (cd);
+ case BUILT_IN_CALLOC:
+   return impl_call_calloc (cd);
+ case BUILT_IN_EXPECT:
+ case BUILT_IN_EXPECT_WITH_PROBABILITY:
+   return impl_call_builtin_expect (cd);
+ case BUILT_IN_FREE:
+   /* Handle in "on_call_post".  */
+   break;
+ case BUILT_IN_MALLOC:
+   return impl_call_malloc (cd);
+ case BUILT_IN_MEMSET:
+   impl_call_memset (cd);
+   return false;
+   break;
+ case BUILT_IN_STRLEN:
+   if (impl_call_strlen (cd))
+ return false;
+   break;
+ }
+  else if (gimple_call_internal_p (call))
+   switch (gimple_call_internal_fn (call))
+ {
+ default:
+   break;
+ case IFN_BUILTIN_EXPECT:
+   return impl_call_builtin_expect (cd);
+ }
+  else if (is_named_call_p (callee_fndecl, "malloc", call, 1))
return impl_call_malloc (cd);
   else if (is_named_call_p (callee_fndecl, "calloc", call, 2))
return impl_call_calloc (cd);
-  else if (is_named_call_p (callee_fndecl, "__builtin_alloca", call, 1))
+  else if (is_named_call_p (callee_fndecl, "alloca", call, 1))
return impl_call_alloca (cd);
-  else if (gimple_call_builtin_p (call, BUILT_IN_EXPECT)
-  || gimple_call_builtin_p (call, BUILT_IN_EXPECT_WITH_PROBABILITY)
-  || gimple_call_internal_p (call, IFN_BUILTIN_EXPECT))
-   return impl_call_builtin_expect (cd);
-  else if (is_named_call_p (callee_fndecl, "memset", call, 3)
-  || gimple_call_builtin_p (call, BUILT_IN_MEMSET))
+  else if (is_named_call_p (callee_fndecl, "memset", call, 3))
{
  impl_call_memset (cd);
  return false;
-- 
2.26.2



[committed] analyzer: fix ICE on unknown index in CONSTRUCTOR [PR96860]

2020-08-31 Thread David Malcolm via Gcc-patches
PR analyzer/96860 reports an ICE inside CONSTRUCTOR-handling with
--param analyzer-max-svalue-depth=0 when attempting to build a
binding_map for the CONSTRUCTOR's values.

The issue is that when handling (index, value) pairs for initializing
an array, the index values for the elements exceeds the svalue
complexity limit, and the index is thus treated as unknown, leading to
a symbolic rather than concrete offset for each array element.

This patch updates the CONSTRUCTOR-handling code so that it can
fail, returning an unknown value for the overall value of the
constructor for this case, fixing the ICE.

Successfully bootstrapped & regrtested on x86_64-pc-linux-gnu.
Pushed to master as r11-2955-g18056e45db1c75aa209fa9a756395ddceb867a88.

gcc/analyzer/ChangeLog:
PR analyzer/96860
* region.cc (decl_region::get_svalue_for_constructor): Support
apply_ctor_to_region failing.
* store.cc (binding_map::apply_ctor_to_region): Add failure
handling.
(binding_map::apply_ctor_val_to_range): Likewise.
(binding_map::apply_ctor_pair_to_child_region): Likewise.  Replace
assertion that child_base_offset is not symbolic with error
handling.
* store.h (binding_map::apply_ctor_to_region): Convert return type
from void to bool.
(binding_map::apply_ctor_val_to_range): Likewise.
(binding_map::apply_ctor_pair_to_child_region): Likewise.

gcc/testsuite/ChangeLog:
PR analyzer/96860
* gcc.dg/analyzer/pr96860-1.c: New test.
* gcc.dg/analyzer/pr96860-2.c: New test.
---
 gcc/analyzer/region.cc|  3 ++-
 gcc/analyzer/store.cc | 33 ---
 gcc/analyzer/store.h  |  6 ++---
 gcc/testsuite/gcc.dg/analyzer/pr96860-1.c |  9 +++
 gcc/testsuite/gcc.dg/analyzer/pr96860-2.c |  8 ++
 5 files changed, 45 insertions(+), 14 deletions(-)
 create mode 100644 gcc/testsuite/gcc.dg/analyzer/pr96860-1.c
 create mode 100644 gcc/testsuite/gcc.dg/analyzer/pr96860-2.c

diff --git a/gcc/analyzer/region.cc b/gcc/analyzer/region.cc
index 1823901a3ee..53f32dc912c 100644
--- a/gcc/analyzer/region.cc
+++ b/gcc/analyzer/region.cc
@@ -904,7 +904,8 @@ decl_region::get_svalue_for_constructor (tree ctor,
  decl_region as the base region when building child regions
  for offset calculations.  */
   binding_map map;
-  map.apply_ctor_to_region (this, ctor, mgr);
+  if (!map.apply_ctor_to_region (this, ctor, mgr))
+return mgr->get_or_create_unknown_svalue (get_type ());
 
   /* Return a compound svalue for the map we built.  */
   return mgr->get_or_create_compound_svalue (get_type (), map);
diff --git a/gcc/analyzer/store.cc b/gcc/analyzer/store.cc
index 8890a69a6f8..7f15aa92492 100644
--- a/gcc/analyzer/store.cc
+++ b/gcc/analyzer/store.cc
@@ -402,9 +402,11 @@ get_svalue_for_ctor_val (tree val, region_model_manager 
*mgr)
 }
 
 /* Bind values from CONSTRUCTOR to this map, relative to
-   PARENT_REG's relationship to its base region.  */
+   PARENT_REG's relationship to its base region.
+   Return true if successful, false if there was a problem (e.g. due
+   to hitting a complexity limit).  */
 
-void
+bool
 binding_map::apply_ctor_to_region (const region *parent_reg, tree ctor,
   region_model_manager *mgr)
 {
@@ -423,18 +425,24 @@ binding_map::apply_ctor_to_region (const region 
*parent_reg, tree ctor,
{
  tree min_index = TREE_OPERAND (index, 0);
  tree max_index = TREE_OPERAND (index, 1);
- apply_ctor_val_to_range (parent_reg, mgr, min_index, max_index, val);
+ if (!apply_ctor_val_to_range (parent_reg, mgr,
+   min_index, max_index, val))
+   return false;
  continue;
}
-  apply_ctor_pair_to_child_region (parent_reg, mgr, index, val);
+  if (!apply_ctor_pair_to_child_region (parent_reg, mgr, index, val))
+   return false;
 }
+  return true;
 }
 
 /* Bind the value VAL into the range of elements within PARENT_REF
from MIN_INDEX to MAX_INDEX (including endpoints).
-   For use in handling RANGE_EXPR within a CONSTRUCTOR.  */
+   For use in handling RANGE_EXPR within a CONSTRUCTOR.
+   Return true if successful, false if there was a problem (e.g. due
+   to hitting a complexity limit).  */
 
-void
+bool
 binding_map::apply_ctor_val_to_range (const region *parent_reg,
  region_model_manager *mgr,
  tree min_index, tree max_index,
@@ -469,12 +477,15 @@ binding_map::apply_ctor_val_to_range (const region 
*parent_reg,
 
   /* Bind the value to the range.  */
   put (range_key, sval);
+  return true;
 }
 
 /* Bind the value VAL into INDEX within PARENT_REF.
-   For use in handling a pair of entries within a CONSTRUCTOR.  */
+   For use in handling a pair of entries within a CONSTRUCTOR.
+   Return true if successful, false if there was a 

Re: [PATCH] separate reading past the end from -Wstringop-overflow

2020-08-31 Thread Martin Sebor via Gcc-patches

On 8/31/20 3:50 PM, Martin Sebor wrote:

On 8/31/20 4:51 AM, Christophe Lyon wrote:

Hi,


...


I pushed a small aarch64 patch as obvious:
 2020-08-31  Christophe Lyon  

 gcc/testsuite/
 * gcc.target/aarch64/strcmpopt_6.c: Suppress 
-Wstringop-overread.

(same as you added for i386)


Thank you!



On arm, there is a regression:
FAIL: c-c++-common/Warray-bounds-6.c  -Wc++-compat  (test for excess 
errors)

Excess errors:
/gcc/testsuite/c-c++-common/Warray-bounds-6.c:16:3: warning: 'strncpy'
writing 1 or more bytes into a region of size 0 overflows the
destination [-Wstringop-overflow=]

and the new test has several failures:
 gcc.dg/Wstringop-overread.c  (test for warnings, line 100)
 gcc.dg/Wstringop-overread.c  (test for warnings, line 110)
 gcc.dg/Wstringop-overread.c  (test for warnings, line 167)
 gcc.dg/Wstringop-overread.c  (test for warnings, line 177)
 gcc.dg/Wstringop-overread.c  (test for warnings, line 279)
 gcc.dg/Wstringop-overread.c  (test for warnings, line 289)
 gcc.dg/Wstringop-overread.c  (test for warnings, line 338)
 gcc.dg/Wstringop-overread.c  (test for warnings, line 372)
 gcc.dg/Wstringop-overread.c  (test for warnings, line 374)
 gcc.dg/Wstringop-overread.c  (test for warnings, line 532)
 gcc.dg/Wstringop-overread.c  (test for warnings, line 566)
 gcc.dg/Wstringop-overread.c  (test for warnings, line 568)
 gcc.dg/Wstringop-overread.c  (test for warnings, line 74)
 gcc.dg/Wstringop-overread.c  (test for warnings, line 84)
FAIL: gcc.dg/Wstringop-overread.c (test for excess errors)
Excess errors:
/gcc/testsuite/gcc.dg/Wstringop-overread.c:302:3: warning: 'strnlen'
specified bound [1, 4294967295] exceeds source size 0
[-Wstringop-overread]
/gcc/testsuite/gcc.dg/Wstringop-overread.c:306:3: warning: 'strnlen'
specified bound [1, 4294967295] exceeds source size 0
[-Wstringop-overread]
/gcc/testsuite/gcc.dg/Wstringop-overread.c:312:3: warning: 'strnlen'
specified bound [1, 4294967295] exceeds source size 0
[-Wstringop-overread]
/gcc/testsuite/gcc.dg/Wstringop-overread.c:323:3: warning: 'strnlen'
reading 1 or more bytes from a region of size 0 [-Wstringop-overread]
/gcc/testsuite/gcc.dg/Wstringop-overread.c:351:3: warning: 'strnlen'
reading 1 or more bytes from a region of size 0 [-Wstringop-overread]
/gcc/testsuite/gcc.dg/Wstringop-overread.c:498:3: warning: 'strndup'
specified bound [1, 4294967295] exceeds source size 0
[-Wstringop-overread]
/gcc/testsuite/gcc.dg/Wstringop-overread.c:502:3: warning: 'strndup'
specified bound [1, 4294967295] exceeds source size 0
[-Wstringop-overread]
/gcc/testsuite/gcc.dg/Wstringop-overread.c:508:3: warning: 'strndup'
specified bound [1, 4294967295] exceeds source size 0
[-Wstringop-overread]
/gcc/testsuite/gcc.dg/Wstringop-overread.c:518:3: warning: 'strndup'
reading 1 or more bytes from a region of size 0 [-Wstringop-overread]
/gcc/testsuite/gcc.dg/Wstringop-overread.c:545:3: warning: 'strndup'
reading 1 or more bytes from a region of size 0 [-Wstringop-overread]

Can you check these?


They should be fixed as of yesterday:
   https://gcc.gnu.org/pipermail/gcc-patches/2020-August/552976.html


Actually, Wstringop-overread.c still fails a bunch of cases in LP32.
Let me see what's going on now.

Martin



Can you please retest with an updated build?

Martin




Re: [PATCH] separate reading past the end from -Wstringop-overflow

2020-08-31 Thread Martin Sebor via Gcc-patches

On 8/31/20 4:51 AM, Christophe Lyon wrote:

Hi,


...


I pushed a small aarch64 patch as obvious:
 2020-08-31  Christophe Lyon  

 gcc/testsuite/
 * gcc.target/aarch64/strcmpopt_6.c: Suppress -Wstringop-overread.
(same as you added for i386)


Thank you!



On arm, there is a regression:
FAIL: c-c++-common/Warray-bounds-6.c  -Wc++-compat  (test for excess errors)
Excess errors:
/gcc/testsuite/c-c++-common/Warray-bounds-6.c:16:3: warning: 'strncpy'
writing 1 or more bytes into a region of size 0 overflows the
destination [-Wstringop-overflow=]

and the new test has several failures:
 gcc.dg/Wstringop-overread.c  (test for warnings, line 100)
 gcc.dg/Wstringop-overread.c  (test for warnings, line 110)
 gcc.dg/Wstringop-overread.c  (test for warnings, line 167)
 gcc.dg/Wstringop-overread.c  (test for warnings, line 177)
 gcc.dg/Wstringop-overread.c  (test for warnings, line 279)
 gcc.dg/Wstringop-overread.c  (test for warnings, line 289)
 gcc.dg/Wstringop-overread.c  (test for warnings, line 338)
 gcc.dg/Wstringop-overread.c  (test for warnings, line 372)
 gcc.dg/Wstringop-overread.c  (test for warnings, line 374)
 gcc.dg/Wstringop-overread.c  (test for warnings, line 532)
 gcc.dg/Wstringop-overread.c  (test for warnings, line 566)
 gcc.dg/Wstringop-overread.c  (test for warnings, line 568)
 gcc.dg/Wstringop-overread.c  (test for warnings, line 74)
 gcc.dg/Wstringop-overread.c  (test for warnings, line 84)
FAIL: gcc.dg/Wstringop-overread.c (test for excess errors)
Excess errors:
/gcc/testsuite/gcc.dg/Wstringop-overread.c:302:3: warning: 'strnlen'
specified bound [1, 4294967295] exceeds source size 0
[-Wstringop-overread]
/gcc/testsuite/gcc.dg/Wstringop-overread.c:306:3: warning: 'strnlen'
specified bound [1, 4294967295] exceeds source size 0
[-Wstringop-overread]
/gcc/testsuite/gcc.dg/Wstringop-overread.c:312:3: warning: 'strnlen'
specified bound [1, 4294967295] exceeds source size 0
[-Wstringop-overread]
/gcc/testsuite/gcc.dg/Wstringop-overread.c:323:3: warning: 'strnlen'
reading 1 or more bytes from a region of size 0 [-Wstringop-overread]
/gcc/testsuite/gcc.dg/Wstringop-overread.c:351:3: warning: 'strnlen'
reading 1 or more bytes from a region of size 0 [-Wstringop-overread]
/gcc/testsuite/gcc.dg/Wstringop-overread.c:498:3: warning: 'strndup'
specified bound [1, 4294967295] exceeds source size 0
[-Wstringop-overread]
/gcc/testsuite/gcc.dg/Wstringop-overread.c:502:3: warning: 'strndup'
specified bound [1, 4294967295] exceeds source size 0
[-Wstringop-overread]
/gcc/testsuite/gcc.dg/Wstringop-overread.c:508:3: warning: 'strndup'
specified bound [1, 4294967295] exceeds source size 0
[-Wstringop-overread]
/gcc/testsuite/gcc.dg/Wstringop-overread.c:518:3: warning: 'strndup'
reading 1 or more bytes from a region of size 0 [-Wstringop-overread]
/gcc/testsuite/gcc.dg/Wstringop-overread.c:545:3: warning: 'strndup'
reading 1 or more bytes from a region of size 0 [-Wstringop-overread]

Can you check these?


They should be fixed as of yesterday:
  https://gcc.gnu.org/pipermail/gcc-patches/2020-August/552976.html

Can you please retest with an updated build?

Martin


Re: [patch] PR tree-optimization/96818 - cast label range to type of switch operand

2020-08-31 Thread Jason Merrill via Gcc-patches

On 8/31/20 5:10 AM, Aldy Hernandez wrote:

On 8/31/20 10:33 AM, Richard Biener wrote:

On Mon, Aug 31, 2020 at 10:20 AM Aldy Hernandez  wrote:


As discussed in the PR, the type of a switch operand may be different
than the type of the individual cases.  This is causing us to attempt to
intersect incompatible ranges.

This inconsistent IL seems wrong to me, but it also seems pervasive
throughout GCC.  Jason, as one of the original gimple designers, do you
remember if this was an oversight, or was there a reason for this?

Richard, you mention in the PR that normalizing the IL would cause
propagation of widening cast sources into switches harder.  Couldn't we
just cast all labels to the switch operand type upon creation?  What
would be the problem with that?  Would we generate sub-optimal code?

In either case, I'd hate to leave trunk in a broken case while this is
being discussed.  The attached patch fixes the PRs.

OK?


    widest_irange label_range (CASE_LOW (min_label), case_high);
+  if (!types_compatible_p (label_range.type (), range_of_op->type 
()))

+   range_cast (label_range, range_of_op->type ());
    label_range.intersect (range_of_op);

so label_range is of 'widest_int' type but then instead of casting
range_of_op to widest_irange you cast that widest_irange to the
type of the range op?  Why not build label_range in the type
of range_op immediately?


The "widest" in widest_irange does not denote the type of the range, but 
the number of sub-ranges that can maximally fit.  It has nothing to do 
with the underlying type of its elements.  Instead, it is supposed to 
indicate that label_range can fit an infinite amount of sub-ranges.  In 
practice this is 255 sub-ranges, but we can adjust that if needed.


So, in the constructor:

widest_irange label_range (CASE_LOW (min_label), case_high);

...the type for the sub-ranges in label_range is the type of 
CASE_LOW(min_label) and case_high.  They must match IIRC.


I am working on an irange best practices document that should go into 
detail on all this, and will post it later this week.




Using widest_int for both would have been obvious to me, you're
indicating that switch (type op) { case type2 lab: } is supposed to
match as (type)lab == op rather than (type2)op == lab.  I think
the IL will be consistent in a way that sign-extending both
op and lab to a common larger integer type is correct
(thus using widest_int), but no truncation should happen here
(such trucation should be applied by the frontend).


I suppose we could convert both label_range and range_of_op to a wider 
type and do the intersection on these results.  Is there a tree type 
that indicates a widest possible int?



In any case type mismatches here are of course unfortunate
and both more verification and documentation would be
nice.  verify_gimple_switch only verifies all case labels
have the same type, the type of the switch argument is
not verified in any way against that type.


Is the verification good enough with what Jakub posted?  I can adjust 
the documentation, but first I'd like a nod from Jason that this was 
indeed expected behavior.


I don't remember any such decision.  In C++, the case values are 
converted to the type of the switch argument (in case_conversion).


Jason



[wwwdocs] cxx-status: Use C++20 instead of C++2a.

2020-08-31 Thread Marek Polacek via Gcc-patches
It's time to use 20 instead of 2a.  Pushed.

commit 2330b0f89b6ac47cbb2253562a350eb1d121dd69
Author: Marek Polacek 
Date:   Mon Aug 31 16:30:51 2020 -0400

cxx-status: Use C++20 instead of C++2a.

diff --git a/htdocs/projects/cxx-status.html b/htdocs/projects/cxx-status.html
index d942e7d3..e69decdb 100644
--- a/htdocs/projects/cxx-status.html
+++ b/htdocs/projects/cxx-status.html
@@ -18,7 +18,7 @@
 C++11
 C++14
 C++17
-C++2a
+C++20
 Technical Specifications
   
   
@@ -31,27 +31,27 @@
Implementation Status section of the Libstdc++ manual.
   
 
-  C++2a Support in GCC
+  C++20 Support in GCC
 
   GCC has experimental support for the next revision of the C++
   standard, which is expected to be published in 2020.
 
-  C++2a features are available since GCC 8. To enable C++2a
+  C++20 features are available since GCC 8. To enable C++20
   support, add the command-line parameter -std=c++20
   (use -std=c++2a in GCC 9 and earlier)
   to your g++ command line. Or, to enable GNU
-  extensions in addition to C++2a features,
-add -std=gnu++2a.
+  extensions in addition to C++20 features,
+add -std=gnu++20.
 
-  Important: Because the ISO C++2a standard is
+  Important: Because the ISO C++20 standard is
   still evolving, GCC's support is experimental. No attempt
   will be made to maintain backward compatibility with implementations of
-  C++2a features that do not reflect the final standard.
+  C++20 features that do not reflect the final standard.
 
-  C++2a Language Features
+  C++20 Language Features
 
   The following table lists new language features that have been
-  accepted into the C++2a working draft. The "Proposal" column
+  accepted into the C++20 working draft. The "Proposal" column
   provides a link to the ISO C++ committee proposal that describes the
   feature, while the "Available in GCC?" column indicates the first
   version of GCC that contains an implementation of this feature (if

Marek



[committed] analyzer: fix ICE on RANGE_EXPR in CONSTRUCTORs [PR96763]

2020-08-31 Thread David Malcolm via Gcc-patches
Successfully bootstrapped & regrtested on x86_64-pc-linux-gnu.
Pushed to master as r11-2953-g0d1b4edc5fff834e8f924b20dd021ded7a21d2d2.

gcc/analyzer/ChangeLog:
PR analyzer/96763
* store.cc (binding_map::apply_ctor_to_region): Handle RANGE_EXPR
by calling a new binding_map::apply_ctor_val_to_range subroutine.
Split out the existing non-CONSTRUCTOR-handling code to a new
apply_ctor_pair_to_child_region subroutine.
(binding_map::apply_ctor_val_to_range): New.
(binding_map::apply_ctor_pair_to_child_region): New, split out
from binding_map::apply_ctor_to_region as noted above.
* store.h (binding_map::apply_ctor_val_to_range): New decl.
(binding_map::apply_ctor_pair_to_child_region): New decl.

gcc/testsuite/ChangeLog:
PR analyzer/96763
* g++.dg/analyzer/pr96763.C: New test.
---
 gcc/analyzer/store.cc   | 129 +---
 gcc/analyzer/store.h|   8 ++
 gcc/testsuite/g++.dg/analyzer/pr96763.C |  13 +++
 3 files changed, 115 insertions(+), 35 deletions(-)
 create mode 100644 gcc/testsuite/g++.dg/analyzer/pr96763.C

diff --git a/gcc/analyzer/store.cc b/gcc/analyzer/store.cc
index 14f7c00bde6..8890a69a6f8 100644
--- a/gcc/analyzer/store.cc
+++ b/gcc/analyzer/store.cc
@@ -419,43 +419,102 @@ binding_map::apply_ctor_to_region (const region 
*parent_reg, tree ctor,
 {
   if (!index)
index = build_int_cst (integer_type_node, ix);
-  const region *child_reg
-   = get_subregion_within_ctor (parent_reg, index, mgr);
-  if (TREE_CODE (val) == CONSTRUCTOR)
-   apply_ctor_to_region (child_reg, val, mgr);
-  else
+  else if (TREE_CODE (index) == RANGE_EXPR)
{
- const svalue *sval = get_svalue_for_ctor_val (val, mgr);
- const binding_key *k
-   = binding_key::make (mgr->get_store_manager (), child_reg,
-BK_direct);
- /* Handle the case where we have an unknown size for child_reg
-(e.g. due to it being a trailing field with incomplete array
-type.  */
- if (!k->concrete_p ())
-   {
- /* Assume that sval has a well-defined size for this case.  */
- tree sval_type = sval->get_type ();
- gcc_assert (sval_type);
- HOST_WIDE_INT sval_byte_size = int_size_in_bytes (sval_type);
- gcc_assert (sval_byte_size != -1);
- bit_size_t sval_bit_size = sval_byte_size * BITS_PER_UNIT;
- /* Get offset of child relative to base region.  */
- region_offset child_base_offset = child_reg->get_offset ();
- gcc_assert (!child_base_offset.symbolic_p ());
- /* Convert to an offset relative to the parent region.  */
- region_offset parent_base_offset = parent_reg->get_offset ();
- gcc_assert (!parent_base_offset.symbolic_p ());
- bit_offset_t child_parent_offset
-   = (child_base_offset.get_bit_offset ()
-  - parent_base_offset.get_bit_offset ());
- /* Create a concrete key for the child within the parent.  */
- k = mgr->get_store_manager ()->get_concrete_binding
-   (child_parent_offset, sval_bit_size, BK_direct);
-   }
- gcc_assert (k->concrete_p ());
- put (k, sval);
+ tree min_index = TREE_OPERAND (index, 0);
+ tree max_index = TREE_OPERAND (index, 1);
+ apply_ctor_val_to_range (parent_reg, mgr, min_index, max_index, val);
+ continue;
+   }
+  apply_ctor_pair_to_child_region (parent_reg, mgr, index, val);
+}
+}
+
+/* Bind the value VAL into the range of elements within PARENT_REF
+   from MIN_INDEX to MAX_INDEX (including endpoints).
+   For use in handling RANGE_EXPR within a CONSTRUCTOR.  */
+
+void
+binding_map::apply_ctor_val_to_range (const region *parent_reg,
+ region_model_manager *mgr,
+ tree min_index, tree max_index,
+ tree val)
+{
+  gcc_assert (TREE_CODE (min_index) == INTEGER_CST);
+  gcc_assert (TREE_CODE (max_index) == INTEGER_CST);
+
+  /* Generate a binding key for the range.  */
+  const region *min_element
+= get_subregion_within_ctor (parent_reg, min_index, mgr);
+  const region *max_element
+= get_subregion_within_ctor (parent_reg, max_index, mgr);
+  region_offset min_offset = min_element->get_offset ();
+  bit_offset_t start_bit_offset = min_offset.get_bit_offset ();
+  store_manager *smgr = mgr->get_store_manager ();
+  const binding_key *max_element_key
+= binding_key::make (smgr, max_element, BK_direct);
+  gcc_assert (max_element_key->concrete_p ());
+  const concrete_binding *max_element_ckey
+= max_element_key->dyn_cast_concrete_binding ();
+  bit_size_t range_size_in_bits
+= max_element_ckey->get_next_bit_offset () - 

[committed] analyzer: fix ICE on casting float to pointer [PR96764]

2020-08-31 Thread David Malcolm via Gcc-patches
Successfully bootstrapped & regrtested on x86_64-pc-linux-gnu.
Pushed to master as r11-2952-gecdb93224c56189a129e97c556fe6b78e1b15a63.

gcc/analyzer/ChangeLog:
PR analyzer/96764
* region-model-manager.cc
(region_model_manager::maybe_fold_unaryop): Handle VIEW_CONVERT_EXPR.
(region_model_manager::get_or_create_cast): Move logic for
real->integer casting to...
(get_code_for_cast): ...this new function, and add logic for
real->non-integer casts.
(region_model_manager::maybe_fold_sub_svalue): Handle
VIEW_CONVERT_EXPR.
* region-model.cc
(region_model::add_any_constraints_from_gassign): Likewise.
* svalue.cc (svalue::maybe_undo_cast): Likewise.
(unaryop_svalue::dump_to_pp): Likewise.

gcc/testsuite/ChangeLog:
PR analyzer/96764
* gcc.dg/analyzer/pr96764.c: New test.
---
 gcc/analyzer/region-model-manager.cc| 35 -
 gcc/analyzer/region-model.cc|  1 +
 gcc/analyzer/svalue.cc  | 13 +
 gcc/testsuite/gcc.dg/analyzer/pr96764.c |  6 +
 4 files changed, 44 insertions(+), 11 deletions(-)
 create mode 100644 gcc/testsuite/gcc.dg/analyzer/pr96764.c

diff --git a/gcc/analyzer/region-model-manager.cc 
b/gcc/analyzer/region-model-manager.cc
index 9bfb0812998..da8fa01077b 100644
--- a/gcc/analyzer/region-model-manager.cc
+++ b/gcc/analyzer/region-model-manager.cc
@@ -324,6 +324,7 @@ region_model_manager::maybe_fold_unaryop (tree type, enum 
tree_code op,
   switch (op)
 {
 default: break;
+case VIEW_CONVERT_EXPR:
 case NOP_EXPR:
   {
/* Handle redundant casts.  */
@@ -390,6 +391,30 @@ region_model_manager::get_or_create_unaryop (tree type, 
enum tree_code op,
   return unaryop_sval;
 }
 
+/* Get a tree code for a cast to DST_TYPE from SRC_TYPE.
+   Use NOP_EXPR if possible (e.g. to help fold_unary convert casts
+   of 0 to (T*) to simple pointer constants), but use FIX_TRUNC_EXPR
+   and VIEW_CONVERT_EXPR for cases that fold_unary would otherwise crash
+   on.  */
+
+static enum tree_code
+get_code_for_cast (tree dst_type, tree src_type)
+{
+  gcc_assert (dst_type);
+  if (!src_type)
+return NOP_EXPR;
+
+  if (TREE_CODE (src_type) == REAL_TYPE)
+{
+  if (TREE_CODE (dst_type) == INTEGER_TYPE)
+   return FIX_TRUNC_EXPR;
+  else
+   return VIEW_CONVERT_EXPR;
+}
+
+  return NOP_EXPR;
+}
+
 /* Return the svalue * for a cast of ARG to type TYPE, creating it
if necessary.  */
 
@@ -397,11 +422,8 @@ const svalue *
 region_model_manager::get_or_create_cast (tree type, const svalue *arg)
 {
   gcc_assert (type);
-  if (arg->get_type ())
-if (TREE_CODE (type) == INTEGER_TYPE
-   && TREE_CODE (arg->get_type ()) == REAL_TYPE)
-  return get_or_create_unaryop (type, FIX_TRUNC_EXPR, arg);
-  return get_or_create_unaryop (type, NOP_EXPR, arg);
+  enum tree_code op = get_code_for_cast (type, arg->get_type ());
+  return get_or_create_unaryop (type, op, arg);
 }
 
 /* Subroutine of region_model_manager::get_or_create_binop.
@@ -561,7 +583,8 @@ region_model_manager::maybe_fold_sub_svalue (tree type,
   if (const unaryop_svalue *unary
   = parent_svalue->dyn_cast_unaryop_svalue ())
 {
-  if (unary->get_op () == NOP_EXPR)
+  if (unary->get_op () == NOP_EXPR
+ || unary->get_op () == VIEW_CONVERT_EXPR)
if (tree cst = unary->get_arg ()->maybe_get_constant ())
  if (zerop (cst))
{
diff --git a/gcc/analyzer/region-model.cc b/gcc/analyzer/region-model.cc
index 02bbfa54781..1f794dacc13 100644
--- a/gcc/analyzer/region-model.cc
+++ b/gcc/analyzer/region-model.cc
@@ -1764,6 +1764,7 @@ region_model::add_any_constraints_from_gassign (enum 
tree_code op,
   break;
 
 case NOP_EXPR:
+case VIEW_CONVERT_EXPR:
   {
add_constraint (gimple_assign_rhs1 (assign), op, rhs, ctxt);
   }
diff --git a/gcc/analyzer/svalue.cc b/gcc/analyzer/svalue.cc
index a1c6241a0ba..fcab578674e 100644
--- a/gcc/analyzer/svalue.cc
+++ b/gcc/analyzer/svalue.cc
@@ -128,16 +128,19 @@ svalue::maybe_get_constant () const
 return NULL_TREE;
 }
 
-/* If this svalue is a cast (i.e a unaryop NOP_EXPR), return the underlying
-   svalue.
+/* If this svalue is a cast (i.e a unaryop NOP_EXPR or VIEW_CONVERT_EXPR),
+   return the underlying svalue.
Otherwise return NULL.  */
 
 const svalue *
 svalue::maybe_undo_cast () const
 {
   if (const unaryop_svalue *unaryop_sval = dyn_cast_unaryop_svalue ())
-if (unaryop_sval->get_op () == NOP_EXPR)
-  return unaryop_sval->get_arg ();
+{
+  enum tree_code op = unaryop_sval->get_op ();
+  if (op == NOP_EXPR || op == VIEW_CONVERT_EXPR)
+   return unaryop_sval->get_arg ();
+}
   return NULL;
 }
 
@@ -566,7 +569,7 @@ unaryop_svalue::dump_to_pp (pretty_printer *pp, bool 
simple) const
 {
   if (simple)
 {
-  if (m_op == NOP_EXPR)
+  if (m_op == VIEW_CONVERT_EXPR || m_op == NOP_EXPR)
 

Re: [PATCH] c++: Fix resolving the address of overloaded pmf [PR96647]

2020-08-31 Thread Jason Merrill via Gcc-patches

On 8/28/20 12:45 PM, Patrick Palka wrote:

(Removing libstd...@gcc.gnu.org from CC list)

On Fri, 28 Aug 2020, Patrick Palka wrote:

In resolve_address_of_overloaded_function, currently only the second
pass over the overload set (which considers just the function templates
in the overload set) checks constraints and performs return type
deduction when necessary.  But as the testcases below show, we need to
do this when considering non-template functions during the first pass,
too.

Tested on x86_64-pc-linux-gnu, does this look OK for trunk?

gcc/cp/ChangeLog:

PR c++/96647
* class.c (resolve_address_of_overloaded_function): Also check
constraints and perform return type deduction when considering
non-template functions in the overload set.

gcc/testsuite/ChangeLog:

PR c++/96647
* g++.dg/cpp0x/auto-96647.C: New test.
* g++.dg/cpp2a/concepts-fn6.C: New test.
---
  gcc/cp/class.c| 16 
  gcc/testsuite/g++.dg/cpp0x/auto-96647.C   | 10 ++
  gcc/testsuite/g++.dg/cpp2a/concepts-fn6.C | 10 ++
  3 files changed, 36 insertions(+)
  create mode 100644 gcc/testsuite/g++.dg/cpp0x/auto-96647.C
  create mode 100644 gcc/testsuite/g++.dg/cpp2a/concepts-fn6.C



+   if (undeduced_auto_decl (fn))
+ {
+   /* Force instantiation to do return type deduction.  */
+   ++function_depth;
+   instantiate_decl (fn, /*defer*/false, /*class*/false);
+   --function_depth;


How about maybe_instantiate_decl instead of this hunk?  This looks like 
it could call instantiate_decl for a non-template function, which is wrong.


Jason



Re: [PATCH v3] c++: Implement P1009: Array size deduction in new-expressions.

2020-08-31 Thread Jason Merrill via Gcc-patches

On 8/25/20 8:26 AM, Marek Polacek wrote:

On Mon, Aug 24, 2020 at 10:49:38PM -0400, Jason Merrill wrote:

On 8/24/20 5:44 PM, Marek Polacek wrote:

--- a/gcc/tree.c
+++ b/gcc/tree.c
@@ -2123,6 +2123,23 @@ build_constructor_from_list (tree type, tree vals)
 return build_constructor (type, v);
   }
+/* Return a new CONSTRUCTOR node whose type is TYPE and whose values
+   are in a vector pointed to by VALS.  Note that the TREE_PURPOSE
+   fields in the constructor remain null.  */
+
+tree
+build_constructor_from_vec (tree type, const vec *vals)
+{
+  tree ctor = build_constructor (type, NULL);
+
+  unsigned int ix;
+  tree t;
+  FOR_EACH_VEC_SAFE_ELT (vals, ix, t)
+CONSTRUCTOR_APPEND_ELT (CONSTRUCTOR_ELTS (ctor), NULL_TREE, t);


Either you need to build up the constructor_elt vec first and then pass it
to build_constructor, or call recompute_constructor_flags after you add all
the elements.


Ug.  I wanted to get rid of the vec temporary, moved
build_constructor, and forgot about recompute.  Fixed.


And perhaps

   for (tree t : *vals)


Nice!  So nice to get rid of the ix temp, too.

Bootstrapped/regtested on x86_64-pc-linux-gnu, ok for trunk?


OK.


-- >8 --
This patch implements C++20 P1009, allowing code like

   new double[]{1,2,3}; // array bound will be deduced

Since this proposal makes the initialization rules more consistent, it is
applied to all previous versions of C++ (thus, effectively, all the way back
to C++11).

My patch is based on Jason's patch that handled the basic case.  I've
extended it to work with ()-init and also the string literal case.
Further testing revealed that to handle stuff like

   new int[]{t...};

in a template, we have to consider such a NEW_EXPR type-dependent.
Obviously, we first have to expand the pack to be able to deduce the
number of elements in the array.

Curiously, while implementing this proposal, I noticed that we fail
to accept

   new char[4]{"abc"};

so I've assigned 77841 to self.  I think the fix will depend on the
build_new_1 hunk in this patch.

The new tree.c function build_constructor_from_vec helps us morph
a vector into a CONSTRUCTOR more efficiently.

gcc/cp/ChangeLog:

PR c++/93529
* call.c (build_new_method_call_1): Use build_constructor_from_vec
instead of build_tree_list_vec + build_constructor_from_list.
* init.c (build_new_1): Handle new char[]{"foo"}.  Use
build_constructor_from_vec instead of build_tree_list_vec +
build_constructor_from_list.
(build_new): Deduce the array size in new-expression if not
present.  Handle ()-init.  Handle initializing an array from
a string literal.
* parser.c (cp_parser_new_type_id): Leave [] alone.
(cp_parser_direct_new_declarator): Allow [].
* pt.c (type_dependent_expression_p): In a NEW_EXPR, consider
array types whose dimension has to be deduced type-dependent.

gcc/ChangeLog:

PR c++/93529
* tree.c (build_constructor_from_vec): New.
* tree.h (build_constructor_from_vec): Declare.

gcc/testsuite/ChangeLog:

PR c++/93529
* g++.dg/cpp0x/sfinae4.C: Adjust expected result after P1009.
* g++.dg/cpp2a/new-array1.C: New test.
* g++.dg/cpp2a/new-array2.C: New test.
* g++.dg/cpp2a/new-array3.C: New test.
* g++.dg/cpp2a/new-array4.C: New test.

Co-authored-by: Jason Merrill 
---
  gcc/cp/call.c   |  4 +-
  gcc/cp/init.c   | 55 +--
  gcc/cp/parser.c | 13 +++--
  gcc/cp/pt.c |  4 ++
  gcc/testsuite/g++.dg/cpp0x/sfinae4.C|  8 ++-
  gcc/testsuite/g++.dg/cpp2a/new-array1.C | 70 +
  gcc/testsuite/g++.dg/cpp2a/new-array2.C | 22 
  gcc/testsuite/g++.dg/cpp2a/new-array3.C | 17 ++
  gcc/testsuite/g++.dg/cpp2a/new-array4.C | 10 
  gcc/tree.c  | 15 ++
  gcc/tree.h  |  1 +
  11 files changed, 209 insertions(+), 10 deletions(-)
  create mode 100644 gcc/testsuite/g++.dg/cpp2a/new-array1.C
  create mode 100644 gcc/testsuite/g++.dg/cpp2a/new-array2.C
  create mode 100644 gcc/testsuite/g++.dg/cpp2a/new-array3.C
  create mode 100644 gcc/testsuite/g++.dg/cpp2a/new-array4.C

diff --git a/gcc/cp/call.c b/gcc/cp/call.c
index 4726e57a30d..61bbb38bd2b 100644
--- a/gcc/cp/call.c
+++ b/gcc/cp/call.c
@@ -10297,8 +10297,8 @@ build_new_method_call_1 (tree instance, tree fns, 
vec **args,
  && !vec_safe_is_empty (user_args))
{
  /* Create a CONSTRUCTOR from ARGS, e.g. {1, 2} from <1, 2>.  */
- tree list = build_tree_list_vec (user_args);
- tree ctor = build_constructor_from_list (init_list_type_node, list);
+ tree ctor = build_constructor_from_vec (init_list_type_node,
+ user_args);
  CONSTRUCTOR_IS_DIRECT_INIT (ctor) = true;
  

Re: [PATCH 3/4 v3] ivopts: Consider cost_step on different forms during unrolling

2020-08-31 Thread Segher Boessenkool
Hi!

Just a note:

On Tue, Aug 25, 2020 at 08:46:55PM +0800, Kewen.Lin wrote:
> 1) Currently address_cost hook on rs6000 always return zero, but at least
> from Power7, pre_inc/pre_dec kind instructions are cracked, it means we
> have to take the address update into account (scalar normal operation).

>From Power4 on already (not sure about Power6, but does anyone care?)


Segher


Re: [PATCH] test/rs6000: Add Power9 and up as vect_len target

2020-08-31 Thread Segher Boessenkool
Hi!

On Mon, Aug 31, 2020 at 02:43:50PM +0800, Kewen.Lin wrote:
> --- a/gcc/testsuite/lib/target-supports.exp
> +++ b/gcc/testsuite/lib/target-supports.exp
> @@ -7066,7 +7066,7 @@ proc check_effective_target_vect_fully_masked { } {
>  # @code{len_store} optabs.
>  
>  proc check_effective_target_vect_len_load_store { } {
> -return 0
> +return [expr { [check_effective_target_has_arch_pwr9] }]
>  }

Why not just

  return check_effective_target_has_arch_pwr9;

?  (Or lose at least two pairs of brackets if not all three :-) )

Okay for trunk, with that change if that works.  Thanks!


Segher


Re: [PATCH] test/rs6000: Add Power9 and up as vect_len target

2020-08-31 Thread will schmidt via Gcc-patches
On Mon, 2020-08-31 at 14:43 +0800, Kewen.Lin via Gcc-patches wrote:
> Hi,
> 
> Power9 supports vector with length in bytes load/store, this patch
> is to teach check_effective_target_vect_len_load_store to take it
> and its laters as effective vector with length targets.
> 
> Also supplement the documents for has_arch_pwr*.
> 
> Bootstrapped/regtested on powerpc64le-linux-gnu P8.

Presumably also tested on P9 to see the
check_effective_target_vect_len_store  check pass?  :-)

> 
> Is it ok for trunk?
> 
> BR,
> Kewen
> --
> 
> gcc/ChangeLog:
> 
>   * doc/sourcebuild.texi (has_arch_pwr5, has_arch_pwr6,
> has_arch_pwr7,
>   has_arch_pwr8, has_arch_pwr9): Document.

looks good.

> 
> gcc/testsuite/ChangeLog:
> 
>   * lib/target-supports.exp
>   (check_effective_target_vect_len_load_store): Call check
> function
>   check_effective_target_has_arch_pwr9.

Cosmetically matches other procs , looks good.

thanks
-Will



Re: [PATCH] rs6000: Expand vec_insert in expander instead of gimple [PR79251]

2020-08-31 Thread Segher Boessenkool
Hi!

On Mon, Aug 31, 2020 at 04:06:47AM -0500, Xiong Hu Luo wrote:
> vec_insert accepts 3 arguments, arg0 is input vector, arg1 is the value
> to be insert, arg2 is the place to insert arg1 to arg0.  This patch adds
> __builtin_vec_insert_v4si[v4sf,v2di,v2df,v8hi,v16qi] for vec_insert to
> not expand too early in gimple stage if arg2 is variable, to avoid generate
> store hit load instructions.
> 
> For Power9 V4SI:
>   addi 9,1,-16
>   rldic 6,6,2,60
>   stxv 34,-16(1)
>   stwx 5,9,6
>   lxv 34,-16(1)
> =>
>   addis 9,2,.LC0@toc@ha
>   addi 9,9,.LC0@toc@l
>   mtvsrwz 33,5
>   lxv 32,0(9)
>   sradi 9,6,2
>   addze 9,9
>   sldi 9,9,2
>   subf 9,9,6
>   subfic 9,9,3
>   sldi 9,9,2
>   subfic 9,9,20
>   lvsl 13,0,9
>   xxperm 33,33,45
>   xxperm 32,32,45
>   xxsel 34,34,33,32

For v a V4SI, x a SI, j some int,  what do we generate for
  v[j&3] = x;
?
This should be exactly the same as we generate for
  vec_insert(x, v, j);
(the builtin does a modulo 4 automatically).


Segher


[PATCH] doc: Update documentation on MODE_PARTIAL_INT subregs

2020-08-31 Thread Jozef Lawrynowicz
In d8487c949ad5 (~GCC 4.9.0), MODE_PARTIAL_INT modes were changed from
having an unknown number of undefined bits, to having a known number of
undefined bits, however the documentation on using SUBREG expressions
with MODE_PARTIAL_INT modes was not updated to reflect this.

The attached patch updates that portion of the GCC internals
documentation.

Ok for trunk?
>From e195dce328b272cd413ca7c659b800170eb60f2c Mon Sep 17 00:00:00 2001
From: Jozef Lawrynowicz 
Date: Mon, 31 Aug 2020 17:26:31 +0100
Subject: [PATCH] doc: Update documentation on MODE_PARTIAL_INT subregs

In d8487c949ad5, MODE_PARTIAL_INT modes were changed from having an
unknown number of undefined bits, to having a known number of undefined
bits, however the documentation on using SUBREG expressions with
MODE_PARTIAL_INT modes was not updated to reflect this.

gcc/ChangeLog:

* doc/rtl.texi (subreg): Fix documentation to state there is a known
number of undefined bits in regs and subregs of MODE_PARTIAL_INT modes.

---
 gcc/doc/rtl.texi | 30 ++
 1 file changed, 18 insertions(+), 12 deletions(-)

diff --git a/gcc/doc/rtl.texi b/gcc/doc/rtl.texi
index f8e1f950823..4760bb97d52 100644
--- a/gcc/doc/rtl.texi
+++ b/gcc/doc/rtl.texi
@@ -2209,17 +2209,22 @@ whether the subreg is a lowpart of a block.
 @end table
 
 A @code{MODE_PARTIAL_INT} mode behaves as if it were as wide as the
-corresponding @code{MODE_INT} mode, except that it has an unknown
-number of undefined bits.  For example:
+corresponding @code{MODE_INT} mode, except that it has a number of
+undefined bits, which are determined by the precision of the
+mode.
+
+For example, on a little-endian target which defines @code{PSImode}
+to have a precision of 20 bits:
 
 @smallexample
 (subreg:PSI (reg:SI 0) 0)
 @end smallexample
 
+accesses the low 20 bits of @samp{(reg:SI 0)}.
+
 @findex REGMODE_NATURAL_SIZE
-accesses the whole of @samp{(reg:SI 0)}, but the exact relationship
-between the @code{PSImode} value and the @code{SImode} value is not
-defined.  If we assume @samp{REGMODE_NATURAL_SIZE (DImode) <= 4},
+Continuing with a @code{PSImode} precision of 20 bits, if we assume
+@samp{REGMODE_NATURAL_SIZE (DImode) <= 4},
 then the following two @code{subreg}s:
 
 @smallexample
@@ -2227,9 +2232,8 @@ then the following two @code{subreg}s:
 (subreg:PSI (reg:DI 0) 4)
 @end smallexample
 
-represent independent 4-byte accesses to the two halves of
-@samp{(reg:DI 0)}.  Both @code{subreg}s have an unknown number
-of undefined bits.
+represent accesses to the low 20 bits of the two halves of
+@samp{(reg:DI 0)}.
 
 If @samp{REGMODE_NATURAL_SIZE (PSImode) <= 2} then these two @code{subreg}s:
 
@@ -2240,15 +2244,17 @@ If @samp{REGMODE_NATURAL_SIZE (PSImode) <= 2} then 
these two @code{subreg}s:
 
 represent independent 2-byte accesses that together span the whole
 of @samp{(reg:PSI 0)}.  Storing to the first @code{subreg} does not
-affect the value of the second, and vice versa.  @samp{(reg:PSI 0)}
-has an unknown number of undefined bits, so the assignment:
+affect the value of the second, and vice versa, so the assignment:
 
 @smallexample
 (set (subreg:HI (reg:PSI 0) 0) (reg:HI 4))
 @end smallexample
 
-does not guarantee that @samp{(subreg:HI (reg:PSI 0) 0)} has the
-value @samp{(reg:HI 4)}.
+sets the low 16 bits of @samp{(reg:PSI 0)} to @samp{(reg:HI 4)}, and
+the high 4 defined bits of @samp{(reg:PSI 0)} retain their
+original value.  The behavior here is therefore the same as
+normal @code{subreg}s, when there are no
+@code{MODE_PARTIAL_INT} modes involved.
 
 @cindex @code{TARGET_CAN_CHANGE_MODE_CLASS} and subreg semantics
 The rules above apply to both pseudo @var{reg}s and hard @var{reg}s.
-- 
2.28.0


Re: [PATCH] rs6000: Expand vec_insert in expander instead of gimple [PR79251]

2020-08-31 Thread will schmidt via Gcc-patches
On Mon, 2020-08-31 at 04:06 -0500, Xiong Hu Luo via Gcc-patches wrote:
> vec_insert accepts 3 arguments, arg0 is input vector, arg1 is the value
> to be insert, arg2 is the place to insert arg1 to arg0.  This patch adds
> __builtin_vec_insert_v4si[v4sf,v2di,v2df,v8hi,v16qi] for vec_insert to
> not expand too early in gimple stage if arg2 is variable, to avoid generate
> store hit load instructions.
> 
> For Power9 V4SI:
>   addi 9,1,-16
>   rldic 6,6,2,60
>   stxv 34,-16(1)
>   stwx 5,9,6
>   lxv 34,-16(1)
> =>
>   addis 9,2,.LC0@toc@ha
>   addi 9,9,.LC0@toc@l
>   mtvsrwz 33,5
>   lxv 32,0(9)
>   sradi 9,6,2
>   addze 9,9
>   sldi 9,9,2
>   subf 9,9,6
>   subfic 9,9,3
>   sldi 9,9,2
>   subfic 9,9,20
>   lvsl 13,0,9
>   xxperm 33,33,45
>   xxperm 32,32,45
>   xxsel 34,34,33,32
> 
> Though instructions increase from 5 to 15, the performance is improved
> 60% in typical cases.

Ok.  :-)


(bunch of nits below, no issues with the gist of the patch).


> gcc/ChangeLog:
> 
>   * config/rs6000/altivec.md (altivec_lvsl_reg_2): Extend to
>   SDI mode.

(altivec_lvsl_reg): Rename to (altivec_lvsl_reg_2) and extend to SDI mode.


>   * config/rs6000/rs6000-builtin.def (BU_VSX_X): Add support
>   macros for vec_insert built-in functions.

Should that list the VEC_INSERT_V16QI, VEC_INSERT_V8HI, ... values instead of 
the BU_VSX_X ?  (need second opinion.. )


>   * config/rs6000/rs6000-c.c (altivec_resolve_overloaded_builtin):
>   Generate built-in calls for vec_insert.

>   * config/rs6000/rs6000-call.c (altivec_expand_vec_insert_builtin):
>   New function.

>   (altivec_expand_builtin): Add case entry for
>   VSX_BUILTIN_VEC_INSERT_V16QI, VSX_BUILTIN_VEC_INSERT_V8HI,
>   VSX_BUILTIN_VEC_INSERT_V4SF,  VSX_BUILTIN_VEC_INSERT_V4SI,
>   VSX_BUILTIN_VEC_INSERT_V2DF,  VSX_BUILTIN_VEC_INSERT_V2DI.

plural entries :-) 


>   (altivec_init_builtins):

Add defines for __builtin_vec_insert_v16qi, __builtin_vec_insert_v8hi, ...


>   * config/rs6000/rs6000-protos.h (rs6000_expand_vector_insert):
>   New declear.

declare

>   * config/rs6000/rs6000.c (rs6000_expand_vector_insert):
>   New function.



>   * config/rs6000/rs6000.md (FQHS): New mode iterator.
>   (FD): New mode iterator.
>   p8_mtvsrwz_v16qi2: New define_insn.
>   p8_mtvsrd_v16qi2: New define_insn.
>   * config/rs6000/vsx.md: Call gen_altivec_lvsl_reg_di2.
ok

> 
> gcc/testsuite/ChangeLog:
> 
>   * gcc.target/powerpc/pr79251.c: New test.
> ---
>  gcc/config/rs6000/altivec.md   |   4 +-
>  gcc/config/rs6000/rs6000-builtin.def   |   6 +
>  gcc/config/rs6000/rs6000-c.c   |  61 +
>  gcc/config/rs6000/rs6000-call.c|  74 +++
>  gcc/config/rs6000/rs6000-protos.h  |   1 +
>  gcc/config/rs6000/rs6000.c | 146 +
>  gcc/config/rs6000/rs6000.md|  19 +++
>  gcc/config/rs6000/vsx.md   |   2 +-
>  gcc/testsuite/gcc.target/powerpc/pr79251.c |  23 
>  9 files changed, 333 insertions(+), 3 deletions(-)
>  create mode 100644 gcc/testsuite/gcc.target/powerpc/pr79251.c
> 
> diff --git a/gcc/config/rs6000/altivec.md b/gcc/config/rs6000/altivec.md
> index 0a2e634d6b0..66b636059a6 100644
> --- a/gcc/config/rs6000/altivec.md
> +++ b/gcc/config/rs6000/altivec.md
> @@ -2772,10 +2772,10 @@
>DONE;
>  })
> 
> -(define_insn "altivec_lvsl_reg"
> +(define_insn "altivec_lvsl_reg_2"
>[(set (match_operand:V16QI 0 "altivec_register_operand" "=v")
>   (unspec:V16QI
> - [(match_operand:DI 1 "gpc_reg_operand" "b")]
> + [(match_operand:SDI 1 "gpc_reg_operand" "b")]
>   UNSPEC_LVSL_REG))]
>"TARGET_ALTIVEC"
>"lvsl %0,0,%1"
> diff --git a/gcc/config/rs6000/rs6000-builtin.def 
> b/gcc/config/rs6000/rs6000-builtin.def
> index f9f0fece549..d095b365c14 100644
> --- a/gcc/config/rs6000/rs6000-builtin.def
> +++ b/gcc/config/rs6000/rs6000-builtin.def
> @@ -2047,6 +2047,12 @@ BU_VSX_X (VEC_INIT_V2DI,  "vec_init_v2di", CONST)
>  BU_VSX_X (VEC_SET_V1TI,"vec_set_v1ti",   CONST)
>  BU_VSX_X (VEC_SET_V2DF,"vec_set_v2df",   CONST)
>  BU_VSX_X (VEC_SET_V2DI,"vec_set_v2di",   CONST)
> +BU_VSX_X (VEC_INSERT_V16QI,"vec_insert_v16qi",   CONST)
> +BU_VSX_X (VEC_INSERT_V8HI, "vec_insert_v8hi",CONST)
> +BU_VSX_X (VEC_INSERT_V4SI, "vec_insert_v4si",CONST)
> +BU_VSX_X (VEC_INSERT_V4SF, "vec_insert_v4sf",CONST)
> +BU_VSX_X (VEC_INSERT_V2DI, "vec_insert_v2di",CONST)
> +BU_VSX_X (VEC_INSERT_V2DF, "vec_insert_v2df",CONST)
>  BU_VSX_X (VEC_EXT_V1TI,"vec_ext_v1ti",   CONST)
>  BU_VSX_X (VEC_EXT_V2DF,"vec_ext_v2df",   CONST)
>  BU_VSX_X (VEC_EXT_V2DI,"vec_ext_v2di",   CONST)
> diff --git a/gcc/config/rs6000/rs6000-c.c 

Re: [Patch] OpenMP/Fortran: Handle polymorphic scalars in data-sharing FIRSTPRIVATE (PR86470)

2020-08-31 Thread Jakub Jelinek via Gcc-patches
On Tue, Aug 25, 2020 at 12:50:46PM +0200, Tobias Burnus wrote:
> OK for mainline?

Generally, you know Fortran FE much more than I do, so just a few random
comments.

> --- a/gcc/fortran/trans-openmp.c
> +++ b/gcc/fortran/trans-openmp.c
> @@ -355,6 +355,51 @@ gfc_has_alloc_comps (tree type, tree decl)
>return false;
>  }
>  
> +/* Return true if TYPE is polymorphic but not with pointer attribute.  */
> +
> +static bool
> +gfc_is_polymorphic_nonptr (tree type)
> +{
> +  if (POINTER_TYPE_P (type))
> +type = TREE_TYPE (type);
> +  if (TREE_CODE (type) != RECORD_TYPE)
> +return false;
> +
> +  tree field = TYPE_FIELDS (type);
> +  if (!field || 0 != strcmp ("_data", IDENTIFIER_POINTER (DECL_NAME 
> (field
> +return false;
> +  field = DECL_CHAIN (field);
> +  if (!field || 0 != strcmp ("_vptr", IDENTIFIER_POINTER (DECL_NAME 
> (field

Is it safe to just look at the field names?  Shouldn't it at least also
test that the fields are DECL_ARTIFICIAL, or somehow else ensure that it
isn't a user derived type with _data and _vptr fields in it.

  type foo
integer :: _data
integer :: _vptr
integer :: _len
  end type
  type(foo) :: a
  a%_data = 1
  a%_vptr = 2
  a%_len = 3
end
compiles just fine with -fallow-leading-underscore ...

> @@ -740,6 +785,87 @@ gfc_omp_clause_copy_ctor (tree clause, tree dest, tree 
> src)
>gcc_assert (OMP_CLAUSE_CODE (clause) == OMP_CLAUSE_FIRSTPRIVATE
> || OMP_CLAUSE_CODE (clause) == OMP_CLAUSE_LINEAR);
>  
> +  /* TODO: implement support for polymorphic arrays; reject for now.  */
> +  /* Void arrays appear as var.0 = var._data.data. A bit hackish to
> + distinguish from 'type(c_ptr) :: var(5)' by scanning for '.';
> + this assumes that ASM_FORMAT_PRIVATE_NAME uses a '.', which most
> + systems do. */
> +  if (TREE_CODE (type) == ARRAY_TYPE
> +  && TREE_TYPE (type) == pvoid_type_node
> +  && TREE_CODE (dest) == MEM_REF
> +  && strchr (IDENTIFIER_POINTER (DECL_NAME (TREE_OPERAND (dest, 0))), 
> '.'))

This seems very fragile, there are targets that use $ instead, other targets
use only underscores.
$ grep NO_DOT_IN_LABEL config/* config/*/* 2>/dev/null
config/vx-common.h:# define NO_DOT_IN_LABEL
config/mmix/mmix.h:#define NO_DOT_IN_LABEL
config/nvptx/nvptx.h:#define NO_DOT_IN_LABEL
config/xtensa/elf.h:#define NO_DOT_IN_LABEL
$ grep -w NO_DOLLAR_IN_LABEL config/* config/*/* 2>/dev/null
config/dragonfly.h:#undef NO_DOLLAR_IN_LABEL
config/elfos.h:#define NO_DOLLAR_IN_LABEL
config/freebsd.h:#undef NO_DOLLAR_IN_LABEL
config/vx-common.h:# undef NO_DOLLAR_IN_LABEL
config/alpha/alpha.h:#undef NO_DOLLAR_IN_LABEL
config/arm/aout.h:#ifndef NO_DOLLAR_IN_LABEL
config/arm/aout.h:#define NO_DOLLAR_IN_LABEL 1
config/mips/n32-elf.h:#define NO_DOLLAR_IN_LABEL
config/mmix/mmix.h:#define NO_DOLLAR_IN_LABEL
config/rs6000/rs6000.c:#ifdef NO_DOLLAR_IN_LABEL
config/rs6000/rs6000-protos.h:#ifdef NO_DOLLAR_IN_LABEL
config/rs6000/xcoff.h:#define NO_DOLLAR_IN_LABEL
config/tilegx/tilegx.h:#undef NO_DOLLAR_IN_LABEL
config/tilepro/tilepro.h:#undef NO_DOLLAR_IN_LABEL
config/xtensa/elf.h:#undef NO_DOLLAR_IN_LABEL

Couldn't it be recorded somewhere in DECL_LANG_SPECIFIC of the decl?

> +fatal_error (input_location,
> +  "Sorry, polymorphic arrays not yet supported for "
> +  "firstprivate");

Shouldn't this be sorry ("...") instead?

> +  /* var._data - _data is void* for scalars and descriptor for arrays.  
> */
> +  if (TREE_CODE (TREE_TYPE (TYPE_FIELDS (type))) == RECORD_TYPE)
> + fatal_error (input_location,
> +  "Sorry, polymorphic arrays not yet supported for "
> +  "firstprivate");

Likewise.

> +  /* Malloc memory + call class->_vpt->_copy.  */
> +  call = builtin_decl_explicit (BUILT_IN_MALLOC);

Is malloc what the FE uses elsewhere for it?  Will something free it
afterwards?

> +  if (!GFC_DECL_GET_SCALAR_ALLOCATABLE (TREE_OPERAND (dest_data, 1)))
> + {
> +   gfc_add_block_to_block (, _block);
> + }

Formatting, one stmt shouldn't be wrapped into {}s.

Jakub



Re: [PATCH] rs6000, vec_popcntd is improperly defined in altivec.h

2020-08-31 Thread Carl Love via Gcc-patches
Will:

> That looks OK within this context.
> 
> Are there any existing tests that use these named variations?  
> 
> Thanks,
> -Will

I was not able to find any test cases for these named builtins.  I
fixed the other issues you mentioned in the message and patch below.

   Carl Love

--


GCC maintainers:

The defines for vec_popcnt, vec_popcnth, vec_popcntw, vec_popcntd in
gcc/config/rs6000/altivec.h are not listed in the Power 64-Bi ELF V2
ABI specification revision 1.4, May 10, 2017.  They are not used by any
of the regression tests.  They also do not work as reported in PR
85830.

The following patch removes the unsupported defines for the builtin
functions.

The patch has been tested on 

  powerpc64le-unknown-linux-gnu (Power 9 LE

with no regression errors.

Please let me know if this patch is acceptable for mainline.

 Carl Love

-
rs6000, fix improperly defined in builtins.

gcc/ChangeLog

2020-08-31  Carl Love  

PR target/85830
* config/rs6000/altivec.h (vec_popcntb, vec_popcnth,
vec_popcntw,
vec_popcntd): Remove defines.
---
 gcc/config/rs6000/altivec.h | 4 
 1 file changed, 4 deletions(-)

diff --git a/gcc/config/rs6000/altivec.h b/gcc/config/rs6000/altivec.h
index bf2240f16a2..8a2dcda0144 100644
--- a/gcc/config/rs6000/altivec.h
+++ b/gcc/config/rs6000/altivec.h
@@ -407,10 +407,6 @@
 #define vec_vpopcnth __builtin_vec_vpopcnth
 #define vec_vpopcntw __builtin_vec_vpopcntw
 #define vec_popcnt __builtin_vec_vpopcntu
-#define vec_popcntb __builtin_vec_vpopcntub
-#define vec_popcnth __builtin_vec_vpopcntuh
-#define vec_popcntw __builtin_vec_vpopcntuw
-#define vec_popcntd __builtin_vec_vpopcntud
 #define vec_vrld __builtin_vec_vrld
 #define vec_vsld __builtin_vec_vsld
 #define vec_vsrad __builtin_vec_vsrad
-- 
2.17.1




Re: [PATCH] rs6000, vec_popcntd is improperly defined in altivec.h

2020-08-31 Thread Segher Boessenkool
Hi!

On Fri, Aug 28, 2020 at 08:08:05AM -0700, Carl Love wrote:
> The defines for vec_popcnt, bvec_popcnth, vec_popcntw, vec_popcntd in
> gcc/config/rs6000/altivec.h are not listed in the Power 64-Bi ELF V2
> ABI specification revision 1.4, May 10, 2017.  They are not used by any
> of the regression tests.

(s/Bi/Bit/, in addition to what Will found)

There is vec_vpopcntb etc. (note the "v"), maybe that confused things?

> They also do not work as reported in:
> 
> https://gcc.gnu.org/bugzilla/show_bug.cgi?id=85830
> 
> The following patch removes the unsupported defines for the builtin
> functions.

Okay for trunk, and okay for any backports you think is good.  Thanks!


Segher


[Patch, fortran] PR fortran/96870 - Class name on error message

2020-08-31 Thread José Rui Faustino de Sousa via Gcc-patches

Hi all!

Proposed patch to PR96870 - Class name on error message.

Patch tested only on x86_64-pc-linux-gnu.

Make the error message more intelligible for the average user.

Thank you very much.

Best regards,
José Rui


2020-8-21  José Rui Faustino de Sousa  

gcc/fortran/ChangeLog:

PR fortran/96870
* misc.c (gfc_typename): use class name instead of internal name
on error message.

gcc/testsuite/ChangeLog:

PR fortran/96870
* gfortran.dg/PR96870.f90: New test.


diff --git a/gcc/fortran/misc.c b/gcc/fortran/misc.c
index 65bcfa6..43edfd8 100644
--- a/gcc/fortran/misc.c
+++ b/gcc/fortran/misc.c
@@ -184,8 +184,11 @@ gfc_typename (gfc_typespec *ts, bool for_hash)
 	  break;
 	}
   ts1 = ts->u.derived->components ? >u.derived->components->ts : NULL;
-  if (ts1 && ts1->u.derived && ts1->u.derived->attr.unlimited_polymorphic)
-	sprintf (buffer, "CLASS(*)");
+  if (ts1 && ts1->u.derived)
+	if (ts1->u.derived->attr.unlimited_polymorphic)
+	  sprintf (buffer, "CLASS(*)");
+	else
+	  sprintf (buffer, "CLASS(%s)", ts1->u.derived->name);
   else
 	sprintf (buffer, "CLASS(%s)", ts->u.derived->name);
   break;
diff --git a/gcc/testsuite/gfortran.dg/PR96870.f90 b/gcc/testsuite/gfortran.dg/PR96870.f90
new file mode 100644
index 000..c1b321e
--- /dev/null
+++ b/gcc/testsuite/gfortran.dg/PR96870.f90
@@ -0,0 +1,41 @@
+! { dg-do compile }
+!
+! Test fix for PR96870
+!
+
+Program main_p
+
+  implicit none
+  
+  Type :: t0
+  End Type t0
+  
+  Type, extends(t0) :: t1
+  End Type t1
+  
+  type(t0),   target :: x
+  class(t0), pointer :: p
+
+  p => x
+  Call sub_1(x) ! { dg-error "Type mismatch in argument .p. at .1.; passed TYPE\\(t0\\) to CLASS\\(t1\\)" }
+  Call sub_1(p) ! { dg-error "Type mismatch in argument .p. at .1.; passed CLASS\\(t0\\) to CLASS\\(t1\\)" }
+  Call sub_2(x) ! { dg-error "Type mismatch in argument .p. at .1.; passed TYPE\\(t0\\) to TYPE\\(t1\\)" }
+  Call sub_2(p) ! { dg-error "Type mismatch in argument .p. at .1.; passed CLASS\\(t0\\) to TYPE\\(t1\\)" }
+  stop
+  
+Contains
+  
+  Subroutine sub_1(p)
+class(t1), Intent(In) :: p
+
+return
+  End Subroutine sub_1
+  
+  Subroutine sub_2(p)
+type(t1), Intent(In) :: p
+
+return
+  End Subroutine sub_2
+  
+End Program main_p
+


Re: [Patch] OpenMP: Handle cpp_implicit_alias in declare-target discovery (PR96390)

2020-08-31 Thread Jakub Jelinek via Gcc-patches
On Mon, Aug 03, 2020 at 05:37:40PM +0200, Tobias Burnus wrote:
> It turned out that the omp_discover_declare_target_tgt_fn_r
> discovered all nodes – but as it tagged the C++ alias nodes
> and not the streamed-out nodes, no device function was created
> and one got link errors if offloading devices were configured.
> (Only with -O0 as otherwise inlining happened.)
> 
> (Testcase is based on a sollve_vv testcase which in turn was
> based on an LLVM bugreport.)

> OpenMP: Handle cpp_implicit_alias in declare-target discovery (PR96390)
> 
> gcc/ChangeLog:
> 
>   PR middle-end/96390
>   * omp-offload.c (omp_discover_declare_target_tgt_fn_r): Handle
>   cpp_implicit_alias nodes.
> 
> libgomp/ChangeLog:
> 
>   PR middle-end/96390
>   * testsuite/libgomp.c++/pr96390.C: New test.
> 
>  gcc/omp-offload.c   |  8 ++
>  libgomp/testsuite/libgomp.c++/pr96390.C | 49 
> +
>  2 files changed, 57 insertions(+)
> 
> diff --git a/gcc/omp-offload.c b/gcc/omp-offload.c
> index 32c2485abd4..4aef7dbea6c 100644
> --- a/gcc/omp-offload.c
> +++ b/gcc/omp-offload.c
> @@ -207,6 +207,14 @@ omp_discover_declare_target_tgt_fn_r (tree *tp, int 
> *walk_subtrees, void *data)
>symtab_node *node = symtab_node::get (*tp);
>if (node != NULL)
>   {
> +   if (node->cpp_implicit_alias)
> + {
> +   node = node->get_alias_target ();
> +   if (!omp_declare_target_fn_p (node->decl))
> + ((vec *) data)->safe_push (node->decl);
> +   DECL_ATTRIBUTES (node->decl)
> + = tree_cons (id, NULL_TREE, DECL_ATTRIBUTES (node->decl));
> + }
> node->offloadable = 1;
> if (ENABLE_OFFLOADING)
>   g->have_offload = true;

Sorry for the review delay.

I don't see what is special on cpp_implicit_alias here, compared to any
other aliases.
So, I wonder if the code shouldn't do:
  tree decl = *tp;
  symtab_node *node = symtab_node::get (decl);
  if (node != NULL)
{
  symtab_node *anode = node->ultimate_alias_target ();
  if (anode && anode != node)
{
  decl = anode->decl;
  gcc_assert (TREE_CODE (decl) == FUNCTION_DECL);
  if (omp_declare_target_fn_p (*tp)
  || lookup_attribute ("omp declare target host",
   DECL_ATTRIBUTES (decl)))
return NULL_TREE;
  node = anode;
}
}
  tree id = get_identifier ("omp declare target");
  if (!DECL_EXTERNAL (decl) && DECL_SAVED_TREE (decl))
((vec *) data)->safe_push (decl);
  DECL_ATTRIBUTES (decl) = tree_cons (id, NULL_TREE, DECL_ATTRIBUTES 
(decl));
  if (node != NULL)
{
  node->offloadable = 1;
  if (ENABLE_OFFLOADING)
g->have_offload = true;
}

Otherwise, if we have say:
void foo () { }
void bar () __attribute__((alias ("foo")));
void baz () __attribute__((alias ("bar")));
int
main ()
{
  #pragma omp target
  baz ();
}
we won't mark foo as being declare target.
Though, maybe that is not enough and we need to mark all the aliases from
node to the ultimate alias that way (so perhaps instead iterate one
get_alias_target (if node->alias) by one and mark all the decls the way the
code marks right now (i.e. pushes those non-DECL_EXTERNAL with
DECL_SAVED_TREE for further processing and add attributes to all of them?

Jakub



Re: [PATCH] rs6000, vec_popcntd is improperly defined in altivec.h

2020-08-31 Thread will schmidt via Gcc-patches
On Fri, 2020-08-28 at 08:08 -0700, Carl Love wrote:
> GCC maintainers:
> 

Hi, 



> The defines for vec_popcnt, bvec_popcnth, vec_popcntw, vec_popcntd in

s/bvec/vec/

> gcc/config/rs6000/altivec.h are not listed in the Power 64-Bi ELF V2
> ABI specification revision 1.4, May 10, 2017.  They are not used by
> any
> of the regression tests.  They also do not work as reported in:
> 
> https://gcc.gnu.org/bugzilla/show_bug.cgi?id=85830
> 


Can probably replace that URL with a reference to "PR 85830".

> The following patch removes the unsupported defines for the builtin
> functions.
> 
> The patch has been tested on 
> 
>   powerpc64le-unknown-linux-gnu (Power 9 LE
> 
> with no regression errors.
> 
> Please let me know if this patch is acceptable for mainline.
> 
>  Carl Love
> 
> --
>  vec_popcntd is improperly defined in altivec.h


^ stray line ? 

> 
> gcc/ChangeLog
> 
> 2020-08-27  Carl Love  
> 
>   PR target/85830
>   * config/rs6000/altivec.h (vec_popcntub, vec_popcntuh,
> vec_popcntuw,
>   vec_popcntud): Remove defines.
> ---
>  gcc/config/rs6000/altivec.h | 4 
>  1 file changed, 4 deletions(-)
> 
> diff --git a/gcc/config/rs6000/altivec.h
> b/gcc/config/rs6000/altivec.h
> index bf2240f16a2..8a2dcda0144 100644
> --- a/gcc/config/rs6000/altivec.h
> +++ b/gcc/config/rs6000/altivec.h
> @@ -407,10 +407,6 @@
>  #define vec_vpopcnth __builtin_vec_vpopcnth
>  #define vec_vpopcntw __builtin_vec_vpopcntw
>  #define vec_popcnt __builtin_vec_vpopcntu
> -#define vec_popcntb __builtin_vec_vpopcntub
> -#define vec_popcnth __builtin_vec_vpopcntuh
> -#define vec_popcntw __builtin_vec_vpopcntuw
> -#define vec_popcntd __builtin_vec_vpopcntud

That looks OK within this context.

Are there any existing tests that use these named variations?  

Thanks,
-Will

>  #define vec_vrld __builtin_vec_vrld
>  #define vec_vsld __builtin_vec_vsld
>  #define vec_vsrad __builtin_vec_vsrad



Re: [PATCH v3] libgcc: Use `-fasynchronous-unwind-tables' for LIB2_DIVMOD_FUNCS

2020-08-31 Thread Maciej W. Rozycki via Gcc-patches
On Fri, 28 Aug 2020, Jakub Jelinek wrote:

> >  As far as `-fexceptions' and `-fasynchronous-unwind-tables' are concerned 
> > it aligns with my understanding, i.e. in this specific scenario we need 
> > `-fasynchronous-unwind-tables' for libcalls (all of them) so that an 
> 
> -fasynchronous-unwind-tables is solely about whether one can unwind through
> the code, but not necessarily that EH will work.  If it is not turned on,
> non-call exceptions will certainly not work properly, because unwind info
> if present at all will only be correct on call insns and not guaranteed on
> anything else.

 Ack, this is what my understanding has been.

> -fexceptions enables EH handling in whatever function it is turned on,
> entries in .gcc_except_table as well as a personality routine will be added
> for it, calls from it which can throw (including special calls like throw)
> will be marked there and cause EH regions, similarly EH pads will be added
> for anything where an exception needs to be caught or where a cleanup needs
> to be run before continuing with the unwinding (e.g. destructors or cleanup
> attribute).  But still, it only expects that call can throw, not arbitrary
> instructions, so those won't be covered necessarily in EH regions, in the IL
> there won't be EH edges etc.
> 
> -fnon-call-exceptions is like -fexceptions, except in addition to calls
> (that may throw) it considers also possibly trapping instructions as
> something that can throw.  So way too many more EH edges, EH regions etc.

 OK, I believe I have realised what we do here.

 Obviously there may be multiple `try'/`catch' blocks in a program, and in 
a given function even.  Catching an exception essentially means matching 
the PC recorded for execution resumption (the return PC of a callee or a 
signal handler, which may come from various places according to the psABI 
in use, like the link register, a stack slot in the frame below, etc.) 
against the PC range(s) associated with a `try' block and passing control 
to the associated `catch' block.  For that obviously a set of exception 
data tables is required.

 Now, where `-fexceptions' only has been used we only record return PCs of 
call instructions in exception data tables, rather than recording the PC 
span of the whole `try' block, which in the course of optimisation might 
have been split into multiple disjoint ranges.  This is I gather to reduce 
the size of the tables and consequently the amount of processing required 
in EH, but the downside is an exception triggering at another place still 
within the `try' block will not be caught and will instead be passed up 
the frame chain.

 So where we want to also catch exceptions on instructions other than 
calls the `-fnon-call-exceptions' option can be used, in which case PC 
values associated with instructions from additional classes as per 
`may_trap_p_1' will be added to exception tables, making them larger.

 Still there is no way to catch exceptions on absolutely all instructions 
within a `try' block, as we have no option for that, IIUC, but the caller 
of the enclosing function can with the use of a `try' block around the 
call.

 I wasn't aware we have such optimisations/simplifications/restrictions in 
place and I honestly thought a `try' block was always completely covered.  
That is what I remember language semantics implied, but it has been long 
since I last looked at that, and no doubt standards have evolved and may 
have given leeway for compilers to only partially cover `try' blocks as 
they see fit for performance gain.  It is not clear from the description 
of `-fexceptions' or `-fnon-call-exceptions' options in our manual either.

 Perhaps it is general knowledge supposed to be held by software 
developers that I somehow missed previously, but it seems to me like it 
wouldn't hurt if it was actually mentioned in the description of said 
options, so if you agree with me, than I'll think and propose an update.

> Now, I'd say if for some trapping instruction in a function we want to throw
> an exception from a signal handler, surely the signal handler needs to be
> built with -fexceptions, but as long as the function that contains the
> trapping instruction doesn't really need to catch the exception (including
> running a cleanup in it and continuing with the exception), i.e. when the
> exception is thrown from the signal handler and caught in some caller of the
> function with the trapping instruction, I'd say all that needs to be done
> is the function with the trapping instruction be compiled with
> -fasynchronous-unwind-tables and the callers of it that are still not meant
> to catch it should be compiled with -funwind-tables (or
> -fasynchronous-unwind-tables) and only the function that should catch it or
> run cleanups should be compiled with -fexceptions (no need for
> -fnon-call-exceptions in that case, but of course not disallowed).

 Right, given what you have written and my observations above, and that 

[committed] d: Fix ICEs in the front-end when pointer size is 16-bit.

2020-08-31 Thread Iain Buclaw via Gcc-patches
Hi,

This patch fixes a couple of ICEs seen when compiling D source code for
a 16-bit target.  The one I used for testing was xstormy16-elf.

In the lowering of `bt*' intrinsics, some integer constants had
mismatched types, and bitsize was set to the wrong value.

In base_vtable_offset, the base offset value was calculated incorrectly.
The TypeInfo_Class object is comprised of 18 pointers and 1 uint field,
so now the internal classinfo type size is used instead.

Bootstrapped and regression tested on x86_64-linux-gnu/-m32/-mx32, and
committed to mainline.

Regards
Iain

---
gcc/d/ChangeLog:

* d-target.cc (Target::_init): Don't set classinfosize.
* d-tree.h (base_vtable_offset): Move under typeinfo.cc section.
* decl.cc (base_vtable_offset): Move to...
* typeinfo.cc (base_vtable_offset): ...here.  Get base offset from
internal TypeInfo_Class type.
* intrinsics.cc (expand_intrinsic_bt): Use pointer TYPE_SIZE for
setting bitsize value.  Build integer constants of correct type.
---
 gcc/d/d-target.cc   |  3 ---
 gcc/d/d-tree.h  |  2 +-
 gcc/d/decl.cc   | 36 
 gcc/d/intrinsics.cc |  7 ---
 gcc/d/typeinfo.cc   | 36 
 5 files changed, 41 insertions(+), 43 deletions(-)

diff --git a/gcc/d/d-target.cc b/gcc/d/d-target.cc
index 71156e34a9c..ea425ad897c 100644
--- a/gcc/d/d-target.cc
+++ b/gcc/d/d-target.cc
@@ -115,9 +115,6 @@ Target::_init (const Param &)
   (TYPE_PRECISION (long_double_type_node) / BITS_PER_UNIT));
   this->realalignsize = TYPE_ALIGN_UNIT (long_double_type_node);
 
-  /* Size of run-time TypeInfo object.  */
-  this->classinfosize = 19 * this->ptrsize;
-
   /* Much of the dmd front-end uses ints for sizes and offsets, and cannot
  handle any larger data type without some pervasive rework.  */
   this->maxStaticDataSize = tree_to_shwi (TYPE_MAX_VALUE (integer_type_node));
diff --git a/gcc/d/d-tree.h b/gcc/d/d-tree.h
index b01a133fe62..31fe5181912 100644
--- a/gcc/d/d-tree.h
+++ b/gcc/d/d-tree.h
@@ -622,7 +622,6 @@ extern tree make_thunk (FuncDeclaration *, int);
 extern tree start_function (FuncDeclaration *);
 extern void finish_function (tree);
 extern void mark_needed (tree);
-extern unsigned base_vtable_offset (ClassDeclaration *, BaseClass *);
 extern tree get_vtable_decl (ClassDeclaration *);
 extern tree build_new_class_expr (ClassReferenceExp *);
 extern tree aggregate_initializer_decl (AggregateDeclaration *);
@@ -660,6 +659,7 @@ extern tree build_libcall (libcall_fn, Type *, int ...);
 extern bool have_typeinfo_p (ClassDeclaration *);
 extern tree layout_typeinfo (TypeInfoDeclaration *);
 extern tree layout_classinfo (ClassDeclaration *);
+extern unsigned base_vtable_offset (ClassDeclaration *, BaseClass *);
 extern tree get_typeinfo_decl (TypeInfoDeclaration *);
 extern tree get_classinfo_decl (ClassDeclaration *);
 extern tree build_typeinfo (const Loc &, Type *);
diff --git a/gcc/d/decl.cc b/gcc/d/decl.cc
index 008a2bb16ab..8e4237dd056 100644
--- a/gcc/d/decl.cc
+++ b/gcc/d/decl.cc
@@ -1991,42 +1991,6 @@ mark_needed (tree decl)
 }
 }
 
-/* Get the offset to the BC's vtbl[] initializer from the start of CD.
-   Returns "~0u" if the base class is not found in any vtable interfaces.  */
-
-unsigned
-base_vtable_offset (ClassDeclaration *cd, BaseClass *bc)
-{
-  unsigned csymoffset = target.classinfosize;
-  unsigned interfacesize = int_size_in_bytes (vtbl_interface_type_node);
-  csymoffset += cd->vtblInterfaces->length * interfacesize;
-
-  for (size_t i = 0; i < cd->vtblInterfaces->length; i++)
-{
-  BaseClass *b = (*cd->vtblInterfaces)[i];
-  if (b == bc)
-   return csymoffset;
-  csymoffset += b->sym->vtbl.length * target.ptrsize;
-}
-
-  /* Check all overriding interface vtbl[]s.  */
-  for (ClassDeclaration *cd2 = cd->baseClass; cd2; cd2 = cd2->baseClass)
-{
-  for (size_t k = 0; k < cd2->vtblInterfaces->length; k++)
-   {
- BaseClass *bs = (*cd2->vtblInterfaces)[k];
- if (bs->fillVtbl (cd, NULL, 0))
-   {
- if (bc == bs)
-   return csymoffset;
- csymoffset += bs->sym->vtbl.length * target.ptrsize;
-   }
-   }
-}
-
-  return ~0u;
-}
-
 /* Get the VAR_DECL of the vtable symbol for DECL.  If this does not yet exist,
create it.  The vtable is accessible via ClassInfo, but since it is needed
frequently (like for rtti comparisons), make it directly accessible.  */
diff --git a/gcc/d/intrinsics.cc b/gcc/d/intrinsics.cc
index 486db127747..1b12a3ec6fb 100644
--- a/gcc/d/intrinsics.cc
+++ b/gcc/d/intrinsics.cc
@@ -303,7 +303,7 @@ expand_intrinsic_bt (intrinsic_code intrinsic, tree callexp)
   tree type = TREE_TYPE (TREE_TYPE (ptr));
 
   /* size_t bitsize = sizeof(*ptr) * BITS_PER_UNIT;  */
-  tree bitsize = fold_convert (type, TYPE_SIZE (type));
+  tree bitsize = fold_convert (type, TYPE_SIZE (TREE_TYPE 

[WIP][PATCH] RISC-V: Add `-mgprel' option for GP-relative addressing

2020-08-31 Thread Maciej W. Rozycki via Gcc-patches
Implement `-mgprel', forcing `-mexplicit-relocs' whenever the option is 
active due to the lack of GAS macro support for GP-relative addressing.

gcc/
* riscv/riscv-protos.h (riscv_symbol_type): Add SYMBOL_GPREL 
enumeration constant.
* config/riscv/riscv.c (riscv_classify_symbol)
(riscv_symbolic_constant_p, riscv_symbol_insns)
(riscv_split_symbol_type, riscv_split_symbol, riscv_output_move)
(riscv_print_operand_reloc, riscv_option_override): Handle 
GP-relative addressing.
* config/riscv/riscv.md (unspec): Add UNSPEC_GPREL enumeration 
constant.
* config/riscv/riscv.opt (mgprel): New option.
---
Hi,

 This is very early stage really, just implementing basic GP-relative
relocation support, in preparation for FDPIC support discussed here:



I planned adding support to GAS macros such as LA next so that we don't 
have to force explicit relocations with `-mgprel' for this option to work, 
but I won't be able to continue with this effort now as I am leaving 
Western Digital today, so I am posting this in case someone finds it 
useful or wishes to continue the effort.

 No regressions in `riscv64-linux-gnu' testing, RV32/ilp32d ABI with QEMU 
in the Linux user emulation mode across all GCC frontends and libraries, 
except for an odd ICE with one of the Fortran test cases and a couple 
timeouts with GNAT test cases, which I put all on the version difference 
between the test runs (10.0.1 20200426 vs 11.0.0 20200827).  

 Unfortunately I lost several hours, because 11.0.0 20200829 has regressed 
enough compared to 11.0.0 20200827 for testing not to progress well enough 
in 15 hours where it usually completes in ~10 hours.  So I had to restart 
with an older snapshot and wouldn't get reference results in time (I only 
had libgo results with 11.0.0 20200827).  I think my assumption as to the 
nature of the regressions is right though.

 A corresponding binutils change has also been posted.

  Maciej
---
 gcc/config/riscv/riscv-protos.h |1 +
 gcc/config/riscv/riscv.c|   32 +---
 gcc/config/riscv/riscv.md   |1 +
 gcc/config/riscv/riscv.opt  |4 
 4 files changed, 35 insertions(+), 3 deletions(-)

gcc-riscv-gprel.diff
Index: gcc/gcc/config/riscv/riscv-protos.h
===
--- gcc.orig/gcc/config/riscv/riscv-protos.h
+++ gcc/gcc/config/riscv/riscv-protos.h
@@ -28,6 +28,7 @@ enum riscv_symbol_type {
   SYMBOL_ABSOLUTE,
   SYMBOL_PCREL,
   SYMBOL_GOT_DISP,
+  SYMBOL_GPREL,
   SYMBOL_TLS,
   SYMBOL_TLS_LE,
   SYMBOL_TLS_IE,
Index: gcc/gcc/config/riscv/riscv.c
===
--- gcc.orig/gcc/config/riscv/riscv.c
+++ gcc/gcc/config/riscv/riscv.c
@@ -559,7 +559,13 @@ riscv_classify_symbol (const_rtx x)
   if (GET_CODE (x) == SYMBOL_REF && flag_pic && !riscv_symbol_binds_local_p 
(x))
 return SYMBOL_GOT_DISP;
 
-  return riscv_cmodel == CM_MEDLOW ? SYMBOL_ABSOLUTE : SYMBOL_PCREL;
+  if (riscv_cmodel == CM_MEDLOW)
+return SYMBOL_ABSOLUTE;
+
+  if (LABEL_REF_P (x) || (SYMBOL_REF_P (x) && SYMBOL_REF_FUNCTION_P (x)))
+return SYMBOL_PCREL;
+
+  return TARGET_GPREL ? SYMBOL_GPREL : SYMBOL_PCREL;
 }
 
 /* Classify the base of symbolic expression X.  */
@@ -604,6 +610,7 @@ riscv_symbolic_constant_p (rtx x, enum r
 case SYMBOL_ABSOLUTE:
 case SYMBOL_PCREL:
 case SYMBOL_TLS_LE:
+case SYMBOL_GPREL:
   /* GAS rejects offsets outside the range [-2^31, 2^31-1].  */
   return sext_hwi (INTVAL (offset), 32) == INTVAL (offset);
 
@@ -622,6 +629,7 @@ static int riscv_symbol_insns (enum risc
 case SYMBOL_ABSOLUTE: return 2; /* LUI + the reference.  */
 case SYMBOL_PCREL: return 2; /* AUIPC + the reference.  */
 case SYMBOL_TLS_LE: return 3; /* LUI + ADD TP + the reference.  */
+case SYMBOL_GPREL: return 3; /* LUI + ADD GP + the reference.  */
 case SYMBOL_GOT_DISP: return 3; /* AUIPC + LD GOT + the reference.  */
 default: gcc_unreachable ();
 }
@@ -735,7 +743,9 @@ riscv_split_symbol_type (enum riscv_symb
   if (!TARGET_EXPLICIT_RELOCS)
 return false;
 
-  return symbol_type == SYMBOL_ABSOLUTE || symbol_type == SYMBOL_PCREL;
+  return (symbol_type == SYMBOL_ABSOLUTE
+ || symbol_type == SYMBOL_PCREL
+ || symbol_type == SYMBOL_GPREL);
 }
 
 /* Return true if a LO_SUM can address a value of mode MODE when the
@@ -1241,6 +1251,17 @@ riscv_split_symbol (rtx temp, rtx addr,
}
break;
 
+  case SYMBOL_GPREL:
+   {
+ rtx high = gen_rtx_HIGH (Pmode, copy_rtx (addr));
+ rtx gp = gen_rtx_REG (Pmode, GP_REGNUM);
+ high = riscv_force_temporary (temp, high, in_splitter);
+ rtx reg = gen_rtx_PLUS (Pmode, high, gp);
+ reg = riscv_force_temporary (temp, reg, in_splitter);
+ *low_out = 

Re: [Patch] OpenMP/Fortran: Handle polymorphic scalars in data-sharing FIRSTPRIVATE (PR86470)

2020-08-31 Thread Tobias Burnus

Hi Andre,

On 8/31/20 12:55 PM, Andre Vehreschild wrote:

+gfc_is_unlimited_polymorphic_nonptr (tree type)
+  tree field = TYPE_FIELDS (type); /* _data */
+  if (!field)

^^^ here you don't . So theoretically this routine could match a type which
has a _len as its third field, but that is not a unlim-poly class. Maybe factor
out the test from the above routine and unify with this one to reuse the test
for a BT_CLASS?!


Granted. The reason was the code use:
if (polymophic)
  {
  ...
  if (unlimited_polymorphic)

Hence, I assumed that that check was already done, reducing
code size (but having less universality) and increasing
(cold-code) performance.

My new idea is to unify the two functions and add an
"bool only_unlimited" flag.


Btw, I believe the first routine can be better replaced by:

static bool
gfc_is_polymorphic_nonptr (tree type)
{
   if (POINTER_TYPE_P (type))
 type = TREE_TYPE (type);
   return GFC_CLASS_TYPE_P (type);
}


Maybe. However, when looking into the check for polymorphic
arrays, the DECL_LANG_SPECIFIC (and I think TYPE_LANG_SPECIFIC)
were present but contained only garbage. Thus, it might not work.
(I have to check.) – If it works, I will use your nicer suggestion.
If it doesn't work, I would go for my proposal above.
(Eventually, in a follow-up patch for polymorphic arrays, it has
to be fixed properly to avoid the following hack.)


+  /* TODO: implement support for polymorphic arrays; reject for now.  */
+  /* Void arrays appear as var.0 = var._data.data. A bit hackish to
+ distinguish from 'type(c_ptr) :: var(5)' by scanning for '.';
+ this assumes that ASM_FORMAT_PRIVATE_NAME uses a '.', which most
+ systems do. */
...
I totally agree that this is hackish and I don't like for that. But I can't
come up with a better solution at the moment.


I think some changes at multiple places are needed to implement this
properly – but for the 'sorry' I did not want to do non-local changes;
for the real version, it should use some nicer code!

Thanks for the suggestions and review.

Tobias

PS: I want to first finish working on some other tasks before coming back
to this patch.



Re: [patch] PR tree-optimization/96818 - cast label range to type of switch operand

2020-08-31 Thread Richard Biener via Gcc-patches
On Mon, Aug 31, 2020 at 11:10 AM Aldy Hernandez  wrote:
>
>
>
> On 8/31/20 10:33 AM, Richard Biener wrote:
> > On Mon, Aug 31, 2020 at 10:20 AM Aldy Hernandez  wrote:
> >>
> >> As discussed in the PR, the type of a switch operand may be different
> >> than the type of the individual cases.  This is causing us to attempt to
> >> intersect incompatible ranges.
> >>
> >> This inconsistent IL seems wrong to me, but it also seems pervasive
> >> throughout GCC.  Jason, as one of the original gimple designers, do you
> >> remember if this was an oversight, or was there a reason for this?
> >>
> >> Richard, you mention in the PR that normalizing the IL would cause
> >> propagation of widening cast sources into switches harder.  Couldn't we
> >> just cast all labels to the switch operand type upon creation?  What
> >> would be the problem with that?  Would we generate sub-optimal code?
> >>
> >> In either case, I'd hate to leave trunk in a broken case while this is
> >> being discussed.  The attached patch fixes the PRs.
> >>
> >> OK?
> >
> > widest_irange label_range (CASE_LOW (min_label), case_high);
> > +  if (!types_compatible_p (label_range.type (), range_of_op->type ()))
> > +   range_cast (label_range, range_of_op->type ());
> > label_range.intersect (range_of_op);
> >
> > so label_range is of 'widest_int' type but then instead of casting
> > range_of_op to widest_irange you cast that widest_irange to the
> > type of the range op?  Why not build label_range in the type
> > of range_op immediately?
>
> The "widest" in widest_irange does not denote the type of the range, but
> the number of sub-ranges that can maximally fit.  It has nothing to do
> with the underlying type of its elements.  Instead, it is supposed to
> indicate that label_range can fit an infinite amount of sub-ranges.  In
> practice this is 255 sub-ranges, but we can adjust that if needed.

That's definitely confusing naming then :/

> So, in the constructor:
>
> widest_irange label_range (CASE_LOW (min_label), case_high);
>
> ...the type for the sub-ranges in label_range is the type of
> CASE_LOW(min_label) and case_high.  They must match IIRC.
>
> I am working on an irange best practices document that should go into
> detail on all this, and will post it later this week.
>
> >
> > Using widest_int for both would have been obvious to me, you're
> > indicating that switch (type op) { case type2 lab: } is supposed to
> > match as (type)lab == op rather than (type2)op == lab.  I think
> > the IL will be consistent in a way that sign-extending both
> > op and lab to a common larger integer type is correct
> > (thus using widest_int), but no truncation should happen here
> > (such trucation should be applied by the frontend).
>
> I suppose we could convert both label_range and range_of_op to a wider
> type and do the intersection on these results.  Is there a tree type
> that indicates a widest possible int?

Btw, just looked up how we expand GIMPLE_SWITCH and there we
convert case label values to the type of the index expression
(what your patch did).

So the patch is OK.

Richard.



> >
> > In any case type mismatches here are of course unfortunate
> > and both more verification and documentation would be
> > nice.  verify_gimple_switch only verifies all case labels
> > have the same type, the type of the switch argument is
> > not verified in any way against that type.
>
> Is the verification good enough with what Jakub posted?  I can adjust
> the documentation, but first I'd like a nod from Jason that this was
> indeed expected behavior.
>
> Thanks.
> Aldy
>


Re: [PATCH] rs6000: Expand vec_insert in expander instead of gimple [PR79251]

2020-08-31 Thread Richard Biener via Gcc-patches
On Mon, Aug 31, 2020 at 11:09 AM Xiong Hu Luo via Gcc-patches
 wrote:
>
> vec_insert accepts 3 arguments, arg0 is input vector, arg1 is the value
> to be insert, arg2 is the place to insert arg1 to arg0.  This patch adds
> __builtin_vec_insert_v4si[v4sf,v2di,v2df,v8hi,v16qi] for vec_insert to
> not expand too early in gimple stage if arg2 is variable, to avoid generate
> store hit load instructions.
>
> For Power9 V4SI:
> addi 9,1,-16
> rldic 6,6,2,60
> stxv 34,-16(1)
> stwx 5,9,6
> lxv 34,-16(1)
> =>
> addis 9,2,.LC0@toc@ha
> addi 9,9,.LC0@toc@l
> mtvsrwz 33,5
> lxv 32,0(9)
> sradi 9,6,2
> addze 9,9
> sldi 9,9,2
> subf 9,9,6
> subfic 9,9,3
> sldi 9,9,2
> subfic 9,9,20
> lvsl 13,0,9
> xxperm 33,33,45
> xxperm 32,32,45
> xxsel 34,34,33,32
>
> Though instructions increase from 5 to 15, the performance is improved
> 60% in typical cases.

Not sure if it is already possible but maybe use internal functions for
those purely internal builtins instead?  That makes it possible
to overload with a single IFN.

Richard.

> gcc/ChangeLog:
>
> * config/rs6000/altivec.md (altivec_lvsl_reg_2): Extend to
> SDI mode.
> * config/rs6000/rs6000-builtin.def (BU_VSX_X): Add support
> macros for vec_insert built-in functions.
> * config/rs6000/rs6000-c.c (altivec_resolve_overloaded_builtin):
> Generate built-in calls for vec_insert.
> * config/rs6000/rs6000-call.c (altivec_expand_vec_insert_builtin):
> New function.
> (altivec_expand_builtin): Add case entry for
> VSX_BUILTIN_VEC_INSERT_V16QI, VSX_BUILTIN_VEC_INSERT_V8HI,
> VSX_BUILTIN_VEC_INSERT_V4SF,  VSX_BUILTIN_VEC_INSERT_V4SI,
> VSX_BUILTIN_VEC_INSERT_V2DF,  VSX_BUILTIN_VEC_INSERT_V2DI.
> (altivec_init_builtins):
> * config/rs6000/rs6000-protos.h (rs6000_expand_vector_insert):
> New declear.
> * config/rs6000/rs6000.c (rs6000_expand_vector_insert):
> New function.
> * config/rs6000/rs6000.md (FQHS): New mode iterator.
> (FD): New mode iterator.
> p8_mtvsrwz_v16qi2: New define_insn.
> p8_mtvsrd_v16qi2: New define_insn.
> * config/rs6000/vsx.md: Call gen_altivec_lvsl_reg_di2.
>
> gcc/testsuite/ChangeLog:
>
> * gcc.target/powerpc/pr79251.c: New test.
> ---
>  gcc/config/rs6000/altivec.md   |   4 +-
>  gcc/config/rs6000/rs6000-builtin.def   |   6 +
>  gcc/config/rs6000/rs6000-c.c   |  61 +
>  gcc/config/rs6000/rs6000-call.c|  74 +++
>  gcc/config/rs6000/rs6000-protos.h  |   1 +
>  gcc/config/rs6000/rs6000.c | 146 +
>  gcc/config/rs6000/rs6000.md|  19 +++
>  gcc/config/rs6000/vsx.md   |   2 +-
>  gcc/testsuite/gcc.target/powerpc/pr79251.c |  23 
>  9 files changed, 333 insertions(+), 3 deletions(-)
>  create mode 100644 gcc/testsuite/gcc.target/powerpc/pr79251.c
>
> diff --git a/gcc/config/rs6000/altivec.md b/gcc/config/rs6000/altivec.md
> index 0a2e634d6b0..66b636059a6 100644
> --- a/gcc/config/rs6000/altivec.md
> +++ b/gcc/config/rs6000/altivec.md
> @@ -2772,10 +2772,10 @@
>DONE;
>  })
>
> -(define_insn "altivec_lvsl_reg"
> +(define_insn "altivec_lvsl_reg_2"
>[(set (match_operand:V16QI 0 "altivec_register_operand" "=v")
> (unspec:V16QI
> -   [(match_operand:DI 1 "gpc_reg_operand" "b")]
> +   [(match_operand:SDI 1 "gpc_reg_operand" "b")]
> UNSPEC_LVSL_REG))]
>"TARGET_ALTIVEC"
>"lvsl %0,0,%1"
> diff --git a/gcc/config/rs6000/rs6000-builtin.def 
> b/gcc/config/rs6000/rs6000-builtin.def
> index f9f0fece549..d095b365c14 100644
> --- a/gcc/config/rs6000/rs6000-builtin.def
> +++ b/gcc/config/rs6000/rs6000-builtin.def
> @@ -2047,6 +2047,12 @@ BU_VSX_X (VEC_INIT_V2DI,  "vec_init_v2di",   CONST)
>  BU_VSX_X (VEC_SET_V1TI,  "vec_set_v1ti",   CONST)
>  BU_VSX_X (VEC_SET_V2DF,  "vec_set_v2df",   CONST)
>  BU_VSX_X (VEC_SET_V2DI,  "vec_set_v2di",   CONST)
> +BU_VSX_X (VEC_INSERT_V16QI,  "vec_insert_v16qi",   CONST)
> +BU_VSX_X (VEC_INSERT_V8HI,   "vec_insert_v8hi",CONST)
> +BU_VSX_X (VEC_INSERT_V4SI,   "vec_insert_v4si",CONST)
> +BU_VSX_X (VEC_INSERT_V4SF,   "vec_insert_v4sf",CONST)
> +BU_VSX_X (VEC_INSERT_V2DI,   "vec_insert_v2di",CONST)
> +BU_VSX_X (VEC_INSERT_V2DF,   "vec_insert_v2df",CONST)
>  BU_VSX_X (VEC_EXT_V1TI,  "vec_ext_v1ti",   CONST)
>  BU_VSX_X (VEC_EXT_V2DF,  "vec_ext_v2df",   CONST)
>  BU_VSX_X (VEC_EXT_V2DI,  "vec_ext_v2di",   CONST)
> diff --git a/gcc/config/rs6000/rs6000-c.c b/gcc/config/rs6000/rs6000-c.c
> index 2fad3d94706..03b00738a5e 100644
> --- a/gcc/config/rs6000/rs6000-c.c
> +++ 

Re: [r11-1851 Regression] FAIL: gcc.dg/vect/slp-46.c scan-tree-dump-times vect "vectorizing stmts using SLP" 2 on Linux/x86_64 (-m64 -march=cascadelake)

2020-08-31 Thread H.J. Lu via Gcc-patches
On Mon, Aug 31, 2020 at 12:25 AM Richard Biener  wrote:
>
> On Sat, 29 Aug 2020, sunil.k.pandey wrote:
>
> > On Linux/x86_64,
> >
> > dccbf1e2a6e544f71b4a5795f0c79015db019fc3 is the first bad commit
> > commit dccbf1e2a6e544f71b4a5795f0c79015db019fc3
> > Author: Richard Biener 
> > Date:   Mon Jul 6 16:26:50 2020 +0200
> >
> > tree-optimization/96075 - fix bogus misalignment calculation
> >
> > caused
> >
> > FAIL: gcc.dg/vect/slp-46.c -flto -ffat-lto-objects  scan-tree-dump-times 
> > vect "vectorizing stmts using SLP" 2
> > FAIL: gcc.dg/vect/slp-46.c scan-tree-dump-times vect "vectorizing stmts 
> > using SLP" 2
>
> It might be useful to point out that the commit in question added the
> testcase that "regressed" plus that you are using a non-standard
> configuration (x86 vectorization unfortunately is too fragmented to
> produce fully attributed or generic testcases that will pass with
> any configuration).
>
> So the appropriate report would be "FAIL: gcc.dg/vect/slp-46.c
> with -march=cascadelake" which isn't a regression.
>

We analyze and report any new failures.  When a testcase fails
with -march=cascadelake, it can be

1. The testcase doesn't expect AVX512.
2. Compiler fails to handle AVX512.

In any case, there are failures which didn't exist before the commit.

-- 
H.J.


[PATCH] tree-optimization/96854 - SLP reduction of two-operator is broken

2020-08-31 Thread Richard Biener
This fixes SLP reduction of two-operator operations by marking those
not supported.  In fact any live lane out of such an operation cannot
be code-generated correctly.

Bootstrapped on x86_64-unknown-linux-gnu, testing in progress.

This is for GCC 10 only, trunk gets the new testcase only.

Richard.

2020-08-31  Richard Biener  

PR tree-optimization/96854
* tree-vect-loop.c (vectorizable_live_operation): Disallow
SLP_TREE_TWO_OPERATORS nodes.

* gcc.dg/vect/pr96854.c: New testcase.
---
 gcc/testsuite/gcc.dg/vect/pr96854.c | 20 
 gcc/tree-vect-loop.c|  5 +
 2 files changed, 25 insertions(+)
 create mode 100644 gcc/testsuite/gcc.dg/vect/pr96854.c

diff --git a/gcc/testsuite/gcc.dg/vect/pr96854.c 
b/gcc/testsuite/gcc.dg/vect/pr96854.c
new file mode 100644
index 000..e3980d41303
--- /dev/null
+++ b/gcc/testsuite/gcc.dg/vect/pr96854.c
@@ -0,0 +1,20 @@
+/* { dg-additional-options "-ffast-math" } */
+
+double _Complex __attribute__((noipa))
+foo (double _Complex acc, const double _Complex *x, const double _Complex* y, 
int N)
+{
+  for (int c = 0; c < N; ++c)
+acc -= x[c] * y[c];
+  return acc;
+}
+
+int
+main()
+{
+  static const double _Complex y[] = { 1, 2, };
+  static const double _Complex x[] = { 1, 3, };
+  double _Complex ref = foo (0, x, y, 2);
+  if (__builtin_creal (ref) != -7.)
+__builtin_abort ();
+  return 0;
+}
diff --git a/gcc/tree-vect-loop.c b/gcc/tree-vect-loop.c
index b6c3faeae51..29071630327 100644
--- a/gcc/tree-vect-loop.c
+++ b/gcc/tree-vect-loop.c
@@ -7867,6 +7867,11 @@ vectorizable_live_operation (stmt_vec_info stmt_info,
 
   gcc_assert (STMT_VINFO_LIVE_P (stmt_info));
 
+  /* Due to how we generate code for SLP_TREE_TWO_OPERATORS we cannot
+ vectorize live operations out of it.  */
+  if (slp_node && SLP_TREE_TWO_OPERATORS (slp_node))
+return false;
+
   /* If a stmt of a reduction is live, vectorize it via
  vect_create_epilog_for_reduction.  vectorizable_reduction assessed
  validity so just trigger the transform here.  */
-- 
2.26.2


Re: [PATCH 0/6] Parallelize Intra-Procedural Optimizations using the LTO Engine.

2020-08-31 Thread Jan Hubicka
> 
> Yeah.  Or even refactor the output machinery so that in theory
> we can create asm fragments [into memory] for functions and variables
> and only at the end concat/output them in a desired order  (cf.
> -fno-toplevel-reorder
> wrt toporder code-generation we'd prefer).  That would also be one (small)
> step towards eventually being able to thread the RTL pipeline.

That would be nice.  We also have WIP Martin's code layout pass
that would benefit from this.
> 
> Note that in theory the auto-parallel work could be leveraged to
> elide LTRANS streaming if we'd drive LTRANS compile from WPA
> instead of from LTO wrapper.  We could simply do the forking,
> apply the partition and then load bodies from the original IL files
> (with the caveat of needing GC to truncate the decls and types
> memory from WPA).

Yep, as mentioned in other mail, I implemented that at one point, but
it was memory hungry and not faster than ltrans streaming, so put it on
hold :)
I suppose same way as for non-WPA build, there is a size of unit until
which one thread works best, from some point partitioning help and for
even bigger units streaming start to help, just because it repickles
data with better locality.

Honza
> 
> But yes, avoiding the partial link would be nice - it would also
> be possible to support parallelizing -S compiles.  And it would
> avoid the symbol renaming.
> 
> Richard.
> 
> > Honza
> > >
> > > >
> > > > > Bootstrapped and Regtested on Linux x86_64.
> > > > >
> > > > > Giuliano Belinassi (6):
> > > > >   Modify gcc driver for parallel compilation
> > > > >   Implement a new partitioner for parallel compilation
> > > > >   Implement fork-based parallelism engine
> > > > >   Add `+' for Jobserver Integration
> > > > >   Add invoke documentation
> > > > >   New tests for parallel compilation feature
> > > > >
> > > > >  gcc/Makefile.in   |6 +-
> > > > >  gcc/cgraph.c  |   16 +
> > > > >  gcc/cgraph.h  |   13 +
> > > > >  gcc/cgraphunit.c  |  198 ++-
> > > > >  gcc/common.opt|4 +
> > > > >  gcc/doc/invoke.texi   |   32 +-
> > > > >  gcc/gcc.c | 1219 
> > > > > +
> > > > >  gcc/ipa-fnsummary.c   |2 +-
> > > > >  gcc/ipa-icf.c |3 +-
> > > > >  gcc/ipa-visibility.c  |3 +-
> > > > >  gcc/ipa.c |4 +-
> > > > >  gcc/jobserver.cc  |  168 +++
> > > > >  gcc/jobserver.h   |   33 +
> > > > >  gcc/lto-cgraph.c  |  172 +++
> > > > >  gcc/{lto => }/lto-partition.c |  463 ++-
> > > > >  gcc/{lto => }/lto-partition.h |4 +-
> > > > >  gcc/lto-streamer.h|4 +
> > > > >  gcc/lto/Make-lang.in  |4 +-
> > > > >  gcc/lto/lto.c |2 +-
> > > > >  gcc/params.opt|8 +
> > > > >  gcc/symtab.c  |   46 +-
> > > > >  gcc/testsuite/driver/a.c  |6 +
> > > > >  gcc/testsuite/driver/b.c  |6 +
> > > > >  gcc/testsuite/driver/driver.exp   |   80 ++
> > > > >  gcc/testsuite/driver/empty.c  |0
> > > > >  gcc/testsuite/driver/foo.c|7 +
> > > > >  .../gcc.dg/parallel-early-constant.c  |   22 +
> > > > >  gcc/testsuite/gcc.dg/parallel-static-1.c  |   21 +
> > > > >  gcc/testsuite/gcc.dg/parallel-static-2.c  |   21 +
> > > > >  .../gcc.dg/parallel-static-clash-1.c  |   23 +
> > > > >  .../gcc.dg/parallel-static-clash-aux.c|   14 +
> > > > >  gcc/toplev.c  |   58 +-
> > > > >  gcc/toplev.h  |3 +
> > > > >  gcc/tree.c|   23 +-
> > > > >  gcc/varasm.c  |   26 +-
> > > > >  intl/Makefile.in  |2 +-
> > > > >  libbacktrace/Makefile.in  |2 +-
> > > > >  libcpp/Makefile.in|2 +-
> > > > >  libdecnumber/Makefile.in  |2 +-
> > > > >  libiberty/Makefile.in |  212 +--
> > > > >  zlib/Makefile.in  |   64 +-
> > > > >  41 files changed, 2539 insertions(+), 459 deletions(-)
> > > > >  create mode 100644 gcc/jobserver.cc
> > > > >  create mode 100644 gcc/jobserver.h
> > > > >  rename gcc/{lto => }/lto-partition.c (78%)
> > > > >  rename gcc/{lto => }/lto-partition.h (89%)
> > > > >  create mode 100644 gcc/testsuite/driver/a.c
> > > > >  create mode 100644 gcc/testsuite/driver/b.c
> > > > >  create 

Re: [PATCH 3/n] ipa: Simplify interface of ipa_call_context::estimate_size_and_time

2020-08-31 Thread Jan Hubicka
> 
> All right, but let's start from the basic objective of the patch.   Do
> we want to have something like ipa_call_arg_values?

OK, i suppose you patch was motivated by two things:
 1) combine results of the estimates to one place (reducing number
of parameters in estimate_size_and_time) which can be done by
inventing ipa_estimates datastructure
 2) combine the different things we track on values into one
datastructure (ipa_call_agg_values).
Which essentially will reduce API for
estimate_ipcp_clone_size_and_time
and evaluate_properties_for_edge
and the construtor of ipa_call_context

ipa_call_context is meant to do the following
 1) hold precisely the info on which estimates depends on. "unnecesary"
info is thrown away to make comparsions quick and memory use down.
However the notion of unnecesariness is closely tied into what
summary contains. I.e. the info still can be useful for propagation.
 2) be able to produce estimates from that info
 3) be able to compare itself with other context to handle the context
cache.
In longer term I want to keep ability to have multiple contextes per
function (i.e. add hash method) and update corresponding estimates in
cache incrementally/recompute if function changes.  I implemented that
but it did not seem to pay back in compile time tests enough to be
merged. However I epxpect this to change in future as we track more
useful info and units keep growing.
> 
> I hope that the answer to this question is yes.  I certainly hope that
> we want to get rid of passing around each vector as an individual
> parameter.  The one alternative I can think of would be to make each
> function that now receives the three or four vectors either a method of
> ipa_call_context or receive ipa_call_context as the parameter.  That
> however does not fit naturally to uses in 1) ipa_fn_summary_t::duplicate,
> 2) ipa_merge_fn_summary_after_inlining and 3) anywhere in IPA-CP where
> I'd like the pass to continue with the pass owning the vectors and
> constructing contexts for each change.

Yep, because these functions compute info context is build from :)

I understnad that you want to have a common datastructure to handle
known information about parameters. Have way to construct it using the
two ways we use (in inliner and in ipa-cp) and feed it into context.

If you merge the four vectors and clauses into ipa_call_agg_values which
has auto_vecs of prealocated size, so if it is homed on stack, it will
mostly avoid calling malloc, then you can pass it to the constructor of
context and turn evaluate_properties_for_edge and
estimate_ipcp_clone_size_and_time to perhaps member function used to
fill them up.

We can still play the games about ownership of the vectors needed to
keep malloc calls down.
> 
> If the answer to the above question is yes, then we can have
> ipa_call_arg_values containing pointers to the vectors - which would be
> part of ipa_call_context or it can be even derived from it - and which
> would be constructible from ipa_auto_call_arg_values, which would
> contain the vectors as autovecs.  This would also help me unify the
> information for IPA-CP clones.
> 
> Then ipa_cached_call_context would not need to be a different type, it
> would only allocate the vectors almost like before although I'd prefer
> it to be, just so that the release function is specific to this kind of
> context.  Just the vec structures themselves would be on the heap.
> Would that work?
> 
> Note that, however, I still think that the most context-consumer
> friendly interface would be to have static function
> ipa_call_context::for_inlined_edge (see my 2nd patch) which would fetch
> a context either from a cache or create it without the user knowing
> anything about the cache at all.  But then in order to cache the
> estimates, they would have to be part of the context... but I do not
> insist, we can leave the caching explicit.

It seems to me that keeping the context separate from info about
arguments is doable and would be cleaner (since it is not meant to be
holder for info about arguments).

Does that seem to make sense?
Honza
> 
> Anyway, please let me know what you think about the above plan.
> 
> Martin
> 
> 
> 
> >
> > Honza
> >> 
> >> > What
> >> > about keeping them separate and inventing ipa_call_estimates structure
> >> > to hold the reults?
> >> 
> >> I can but unless you do not like the first patch and want me to re-write
> >> it or just not do anything like it, I don't think it matters because the
> >> structures will almost always lie next to each other on the user's
> >> stack.
> >> 
> >> Martin
> >> 
> >> >> 
> >> >> 
> >> >> gcc/ChangeLog:
> >> >> 
> >> >> 2020-08-28  Martin Jambor  
> >> >> 
> >> >> * ipa-fnsummary.h (class ipa_call_context): Changed declaration 
> >> >> of
> >> >> estimate_size_and_time to accept two booleans.  Added an 
> >> >> overload
> >> >> of the method without any parameters.  New fields 

Re: [PATCH] Fix ICE in ipa-cp due to cost addition overflow (PR 96806)

2020-08-31 Thread Jan Hubicka
> Hi,
> 
> On Mon, Aug 31 2020, Feng Xue OS wrote:
> > This patch is to fix a bug that cost that is used to evaluate clone 
> > candidate
> > becomes negative due to integer overflow.
> >
> > Feng
> > ---
> > 2020-08-31  Feng Xue  
> >
> > gcc/
> > PR tree-optimization/96806
> 
> the component is "ipa," please change that when you commit the patch.
> 
> > * ipa-cp.c (decide_about_value): Use safe_add to avoid cost addition
> > overflow.
> 
> assuming you have bootstrapped and tested it, it is OK for both trunk
> and all affected release branches.

I have already added caps on things that come from profile counts so
things do not overflow, but I think in longer run we want to simply use
sreals here..
> >&& !good_cloning_opportunity_p (node,
> > - val->local_time_benefit
> > - + val->prop_time_benefit,
> > + safe_add (val->local_time_benefit,
> > +   val->prop_time_benefit),
> >   freq_sum, count_sum,
> > - val->local_size_cost
> > - + val->prop_size_cost))
> > + safe_add (val->local_size_cost,
> > +   val->prop_size_cost)))

Is it also size cost that may overflow? That seem bit odd ;)

Honza
> >  return false;
> >  
> >if (dump_file)
> 
> [...]


Re: [Patch] OpenMP/Fortran: Handle polymorphic scalars in data-sharing FIRSTPRIVATE (PR86470)

2020-08-31 Thread Andre Vehreschild
Hi Tobias,

in (look for ^^^):

diff --git a/gcc/fortran/trans-openmp.c b/gcc/fortran/trans-openmp.c
index 063d4c145e2..705cdc7749f 100644
--- a/gcc/fortran/trans-openmp.c
+++ b/gcc/fortran/trans-openmp.c
@@ -355,6 +355,51 @@ gfc_has_alloc_comps (tree type, tree decl)
   return false;
 }

+/* Return true if TYPE is polymorphic but not with pointer attribute.  */
+
+static bool
+gfc_is_polymorphic_nonptr (tree type)
+{
+  if (POINTER_TYPE_P (type))
+type = TREE_TYPE (type);
+  if (TREE_CODE (type) != RECORD_TYPE)
+return false;
+
+  tree field = TYPE_FIELDS (type);
+  if (!field || 0 != strcmp ("_data", IDENTIFIER_POINTER (DECL_NAME (field

^^^ here you are comparing the field - name

+return false;
+  field = DECL_CHAIN (field);
+  if (!field || 0 != strcmp ("_vptr", IDENTIFIER_POINTER (DECL_NAME (field
+return false;
+
+  return true;
+}
+
+/* Return true if TYPE is unlimited polymorphic but not with pointer attribute;
+   unlimited means also intrinsic types are handled and _len is used.  */
+
+static bool
+gfc_is_unlimited_polymorphic_nonptr (tree type)
+{
+  if (POINTER_TYPE_P (type))
+type = TREE_TYPE (type);
+  if (TREE_CODE (type) != RECORD_TYPE)
+return false;
+
+  tree field = TYPE_FIELDS (type); /* _data */
+  if (!field)

^^^ here you don't . So theoretically this routine could match a type which
has a _len as its third field, but that is not a unlim-poly class. Maybe factor
out the test from the above routine and unify with this one to reuse the test
for a BT_CLASS?!

+return false;
+  field = DECL_CHAIN (field); /* _vptr */
+  if (!field)
+return false;
+  field = DECL_CHAIN (field);
+  if (!field || 0 != strcmp ("_len", IDENTIFIER_POINTER (DECL_NAME (field
+return false;
+
+  return true;
+}
+
+

---

Btw, I believe the first routine can be better replaced by:

static bool
gfc_is_polymorphic_nonptr (tree type)
{
  if (POINTER_TYPE_P (type))
type = TREE_TYPE (type);
  return GFC_CLASS_TYPE_P (type);
}

I have no better solution for learning whether a tree's type is unlimited poly
yet.


@@ -740,6 +785,87 @@ gfc_omp_clause_copy_ctor (tree clause, tree dest, tree src)
   gcc_assert (OMP_CLAUSE_CODE (clause) == OMP_CLAUSE_FIRSTPRIVATE
  || OMP_CLAUSE_CODE (clause) == OMP_CLAUSE_LINEAR);

+  /* TODO: implement support for polymorphic arrays; reject for now.  */
+  /* Void arrays appear as var.0 = var._data.data. A bit hackish to
+ distinguish from 'type(c_ptr) :: var(5)' by scanning for '.';
+ this assumes that ASM_FORMAT_PRIVATE_NAME uses a '.', which most
+ systems do. */
+  if (TREE_CODE (type) == ARRAY_TYPE
+  && TREE_TYPE (type) == pvoid_type_node
+  && TREE_CODE (dest) == MEM_REF
+  && strchr (IDENTIFIER_POINTER (DECL_NAME (TREE_OPERAND (dest, 0))), '.'))
+fatal_error (input_location,
+"Sorry, polymorphic arrays not yet supported for "
+"firstprivate");

I totally agree that this is hackish and I don't like for that. But I can't
come up with a better solution at the moment.

The remainder looks ok to me.

Regards,
Andre
--
Andre Vehreschild * Email: vehre ad gmx dot de


Re: [PATCH] separate reading past the end from -Wstringop-overflow

2020-08-31 Thread Christophe Lyon via Gcc-patches
Hi,

On Wed, 26 Aug 2020 at 22:36, Jeff Law via Gcc-patches
 wrote:
>
> On Tue, 2020-06-23 at 20:05 -0600, Martin Sebor wrote:
> > -Wstringop-overflow is issued for both writing and reading past
> > the end, even though the latter problem is often viewed as being
> > lower severity than the former, or at least sufficiently different
> > to triage separately.  In CWE, for example, each of the two kinds
> > of problems has its own classification (CWE-787: Out-of-bounds
> > Write, and CWE-125: Out-of-bounds Read).
> >
> > To make this easier with GCC, the attached patch introduces
> > a new option called -Wstringop-overread and splits the current
> > indiscriminate warning into the respective two categories.  Other
> > than the new option the improvements also exposed a few instances
> > of reading past the end that GCC doesn't detect that the new code
> > does thanks to more consistent checking.
> >
> > As usual, I tested the patch by building Binutils/GDB, Glibc, and
> > the Linux kernel with no unexpected (or any, in fact) instances
> > of the two warnings.
> >
> > The work involved more churn that I expected, and maintaining
> > the expected precedence of warnings (missing terminating nul,
> > excessive bounds, etc.) turned out to be more delicate and time
> > consuming than I would have liked.  I think the result is cleaner
> > code, but I'm quite sure it can still stand to be made better.
> > That's also my plan for the next step when I'd like to move this
> > checking out of builtins.c and calls.c and into a file of its own.
> > Ultimately, Jeff and have would like to integrate it into the path
> > isolation pass.
> >
> > Martin
> >
> > PS There's a FIXME comment in expand_builtin_memory_chk as
> > a reminder of a future change.  It currently has no bearing
> > on anything.
>
> > Add -Wstringop-overread for reading past the end by string functions.
> >
> > gcc/ChangeLog:
> >
> >   * attribs.c (init_attr_rdwr_indices): Use global access_mode.
> >   * attribs.h (struct attr_access): Same.
> >   * builtins.c (fold_builtin_strlen): Add argument.
> >   (compute_objsize): Declare.
> >   (get_range): Declare.
> >   (check_read_access): New function.
> >   (access_ref::access_ref): Define ctor.
> >   (warn_string_no_nul): Add arguments.  Handle -Wstrintop-overread.
> >   (check_nul_terminated_array): Handle source strings of different
> >   ranges of sizes.
> >   (expand_builtin_strlen): Remove warning code, call check_read_access
> >   instead.  Declare locals closer to their initialization.
> >   (expand_builtin_strnlen): Same.
> >   (maybe_warn_for_bound): New function.
> >   (warn_for_access): Remove argument.  Handle -Wstrintop-overread.
> >   (inform_access): Change argument type.
> >   (get_size_range): New function.
> >   (check_access): Remove unused arguments.  Add new arguments.  Handle
> >   -Wstrintop-overread.  Move warning code to helpers and call them.
> >   Call check_nul_terminated_array.
> >   (check_memop_access): Remove unnecessary and provide additional
> >   arguments in calls.
> >   (expand_builtin_memchr): Call check_read_access.
> >   (expand_builtin_strcat): Remove unnecessary and provide additional
> >   arguments in calls.
> >   (expand_builtin_strcpy): Same.
> >   (expand_builtin_strcpy_args): Same.  Avoid testing no-warning bit.
> >   (expand_builtin_stpcpy_1): Remove unnecessary and provide additional
> >   arguments in calls.
> >   (expand_builtin_stpncpy): Same.
> >   (check_strncat_sizes): Same.
> >   (expand_builtin_strncat): Remove unnecessary and provide additional
> >   arguments in calls.  Adjust comments.
> >   (expand_builtin_strncpy): Remove unnecessary and provide additional
> >   arguments in calls.
> >   (expand_builtin_memcmp): Remove warning code.  Call check_access.
> >   (expand_builtin_strcmp): Call check_access instead of
> >   check_nul_terminated_array.
> >   (expand_builtin_strncmp): Handle -Wstrintop-overread.
> >   (expand_builtin_fork_or_exec): Call check_access instead of
> >   check_nul_terminated_array.
> >   (expand_builtin): Same.
> >   (fold_builtin_1): Pass additional argument.
> >   (fold_builtin_n): Same.
> >   (fold_builtin_strpbrk): Remove calls to check_nul_terminated_array.
> >   (expand_builtin_memory_chk): Add comments.
> >   (maybe_emit_chk_warning): Remove unnecessary and provide additional
> >   arguments in calls.
> >   (maybe_emit_sprintf_chk_warning): Same.  Adjust comments.
> >   * builtins.h (warn_string_no_nul): Add arguments.
> >   (struct access_ref): Add member and ctor argument.
> >   (struct access_data): Add members and ctor.
> >   (check_access): Adjust signature.
> >   * calls.c (maybe_warn_nonstring_arg): Return an indication of
> >   whether a warning was issued.  Issue -Wstrintop-overread instead
> >   

Re: [Patch, fortran] PR96495 - [gfortran] Composition of user-defined operators does not copy ALLOCATABLE property of derived type

2020-08-31 Thread Andre Vehreschild
Hi Paul,

your patch looks fine to me. Ok for trunk.

Thanks for the patch.

Regards,
Andre

On Sat, 29 Aug 2020 12:50:20 +0100
Paul Richard Thomas via Fortran  wrote:

> This patch detects a scalar function result that has allocatable components
> and is being used inside a scalarization loop. Before this patch, the
> components would be deallocated and nullified within the scalarization loop
> and so would cause a segfault on the second cycle of the loop.
>
> The stored result has to be found by identifying the expression in the loop
> ss chain. This is then used for the deallocation of the allocatable
> components in the loop post block, which keeps gimple happy and prevents
> the segfault.
>
> Regtests on FC31/x86_64 - OK for master?
>
> Paul
>
> This patch fixes PR96495 - frees result components outside loop.
>
> 2020-29-08  Paul Thomas  
>
> gcc/fortran
> PR fortran/96495
> * trans-expr.c (gfc_conv_procedure_call): Take the deallocation
> of allocatable result components of a scalar result outside the
> scalarization loop. Find and use the stored result.
>
> gcc/testsuite/
> PR fortran/96495
> * gfortran.dg/alloc_comp_result_2.f90 : New test.


--
Andre Vehreschild * Email: vehre ad gmx dot de


Re: [PATCH 3/6] Implement fork-based parallelism engine

2020-08-31 Thread Richard Biener via Gcc-patches
On Thu, Aug 27, 2020 at 8:27 PM Giuliano Belinassi
 wrote:
>
> Hi, Honza.
>
> Thank you for your detailed review!
>
> On 08/27, Jan Hubicka wrote:
> > > diff --git a/gcc/cgraph.c b/gcc/cgraph.c
> > > index c0b45795059..22405098dc5 100644
> > > --- a/gcc/cgraph.c
> > > +++ b/gcc/cgraph.c
> > > @@ -226,6 +226,22 @@ cgraph_node::delete_function_version_by_decl (tree 
> > > decl)
> > >decl_node->remove ();
> > >  }
> > >
> > > +/* Release function dominator info if present.  */
> > > +
> > > +void
> > > +cgraph_node::maybe_release_dominators (void)
> > > +{
> > > +  struct function *fun = DECL_STRUCT_FUNCTION (decl);
> > > +
> > > +  if (fun && fun->cfg)
> > > +{
> > > +  if (dom_info_available_p (fun, CDI_DOMINATORS))
> > > +   free_dominance_info (fun, CDI_DOMINATORS);
> > > +  if (dom_info_available_p (fun, CDI_POST_DOMINATORS))
> > > +   free_dominance_info (fun, CDI_POST_DOMINATORS);
> > > +}
> > > +}
> >
> > I am not sure if that needs to be member function, but if so we want to
> > merge it with other places in cgraph.c and cgraphunit.c where dominators
> > are freed.  I do not think you need to check avalability.
>
> This is necessary to remove some nodes from the callgraph.  For some
> reason, if I node->remove () and it still have the dominance info
> available, it will fail some assertions on the compiler.
>
> However, with regard to code layout, this can be moved to lto-cgraph.c,
> as it is only used there.
>
> > > +
> > >  /* Record that DECL1 and DECL2 are semantically identical function
> > > versions.  */
> > >  void
> > > diff --git a/gcc/cgraph.h b/gcc/cgraph.h
> > > index b4a7871bd3d..72ac19f9672 100644
> > > --- a/gcc/cgraph.h
> > > +++ b/gcc/cgraph.h
> > > @@ -463,6 +463,15 @@ public:
> > >   Return NULL if there's no such node.  */
> > >static symtab_node *get_for_asmname (const_tree asmname);
> > >
> > > +  /* Get symtab node by order.  */
> > > +  static symtab_node *find_by_order (int order);
> >
> > This is quadratic and moreover seems unused. Why do you add it?
>
> I added this for debugging, since I used this a lot inside GDB.
> Sure, I can remove this without any problems, or print a warning
> for the developer to avoid calling this in production code.
>
> > > +
> > > +  /* Get symtab_node by its name.  */
> > > +  static symtab_node *find_by_name (const char *);
> >
> > Similarly here, note that names are not really very meaningful as lookup
> > things, since they get duplicated.
> > > +
> > > +  /* Get symtab_node by its ASM name.  */
> > > +  static symtab_node *find_by_asm_name (const char *);
> >
> > For this we have get_for_asmname (which also populates asm name hash as
> > needed and is not quadratic)
>
> Cool. I will surely remove this then :)
>
> > > diff --git a/gcc/cgraphunit.c b/gcc/cgraphunit.c
> > > index d10d635e942..73e4bed3b61 100644
> > > --- a/gcc/cgraphunit.c
> > > +++ b/gcc/cgraphunit.c
> > > @@ -2258,6 +2258,11 @@ cgraph_node::expand (void)
> > >  {
> > >location_t saved_loc;
> > >
> > > +  /* FIXME: Find out why body-removed nodes are marked for output.  */
> > > +  if (body_removed)
> > > +return;
> >
> > Indeed, we should know :)
>
> Looks like this was due an early problem. I removed this and bootstrap
> is working OK.
>
> > > +
> > > +
> > >/* We ought to not compile any inline clones.  */
> > >gcc_assert (!inlined_to);
> > >
> > > @@ -2658,6 +2663,7 @@ ipa_passes (void)
> > >
> > >execute_ipa_summary_passes
> > > ((ipa_opt_pass_d *) passes->all_regular_ipa_passes);
> > > +
> > This seems accidental.
>
> Yes.
>
> > >  }
> > >
> > >/* Some targets need to handle LTO assembler output specially.  */
> > > @@ -2687,10 +2693,17 @@ ipa_passes (void)
> > >if (flag_generate_lto || flag_generate_offload)
> > >  targetm.asm_out.lto_end ();
> > >
> > > -  if (!flag_ltrans
> > > +  if (split_outputs)
> > > +flag_ltrans = true;
> > > +
> > > +  if ((!flag_ltrans || split_outputs)
> > >&& ((in_lto_p && flag_incremental_link != INCREMENTAL_LINK_LTO)
> > >   || !flag_lto || flag_fat_lto_objects))
> > >  execute_ipa_pass_list (passes->all_regular_ipa_passes);
> > > +
> > > +  if (split_outputs)
> > > +flag_ltrans = false;
> > > +
> > >invoke_plugin_callbacks (PLUGIN_ALL_IPA_PASSES_END, NULL);
> > >
> > >bitmap_obstack_release (NULL);
> > > @@ -2742,6 +2755,185 @@ symbol_table::output_weakrefs (void)
> > >}
> > >  }
> > >
> > > +static bool is_number (const char *str)
> > > +{
> > > +  while (*str != '\0')
> > > +switch (*str++)
> > > +  {
> > > +   case '0':
> > > +   case '1':
> > > +   case '2':
> > > +   case '3':
> > > +   case '4':
> > > +   case '5':
> > > +   case '6':
> > > +   case '7':
> > > +   case '8':
> > > +   case '9':
> > > + continue;
> > > +   default:
> > > + return false;
> > > +  }
> > > +
> > > +  return true;
> > > +}
> >
> > This looks odd, we have other places where we parse number from command
> > line :)
>
> 

Re: [PATCH 2/6] Implement a new partitioner for parallel compilation

2020-08-31 Thread Richard Biener via Gcc-patches
On Fri, Aug 21, 2020 at 12:00 AM Giuliano Belinassi
 wrote:
>
> When using the LTO infrastructure to compile files in parallel, we
> can't simply use any of the LTO partitioner, once extra dependency
> analysis is required to ensure that some nodes are correctly
> partitioned together.
>
> Therefore, here we implement a new partitioner called
> "lto_merge_comdat_map" that does all these required analysis.
> The partitioner works as follows:
>
> 1. We create a number of disjoint sets and inserts each node into a
>separate set, which may be merged together in the future.
>
> 2. Find COMDAT groups, and mark them to be partitioned together.
>
> 3. Check all nodes that would require any COMDAT group to be
>copied to its partition (which we name "COMDAT frontier"),
>and mark them to be partitioned together.
>This avoids duplication of COMDAT groups and crashes on the LTO
>partitioning infrastructure.
>
> 4. Check if the user allows the partitioner to promote non-public
>functions or variables to global to improve parallelization
>opportunity with a cost of modifying the output code layout.
>
> 5. Balance generated partitions for performance unless not told to.
>
> The choice of 1. was by design, so we could use a union-find
> data structure, which are know for being very fast on set unite
> operations.
>
> For 3. to work properly, we also had to modify
> lto_promote_cross_file_statics to handle this case.
>
> The parameters --param=promote-statics and --param=balance-partitions
> control 4. and 5., respectively

Just a few comments ontop of Honzas remarks:

> gcc/ChangeLog:
> 2020-08-20  Giuliano Belinassi  
>
> * Makefile.in: Add lto-partition.o
> * cgraph.h (struct symtab_node::aux2): New variable.
> * lto-partition.c: Move from gcc/lto/lto-partition.c
> (add_symbol_to_partition_1): Only compute insn size
> if information is available.
> (node_cmp): Same as above.
> (class union_find): New.
> (ds_print_roots): New function.
> (balance_partitions): New function.
> (build_ltrans_partitions): New function.
> (merge_comdat_nodes): New function.
> (merge_static_calls): New function.
> (merge_contained_symbols): New function.
> (lto_merge_comdat_map): New function.
> (privatize_symbol_name_1): Handle when WPA is not enabled.
> (privatize_symbol_name): Same as above.
> (lto_promote_cross_file_statics): New parameter to select when
> to promote to global.
> (lto_check_usage_from_other_partitions): New function.
> * lto-partition.h: Move from gcc/lto/lto-partition.h
> (lto_promote_cross_file_statics): Update prototype.
> (lto_check_usage_from_other_partitions): Declare.
> (lto_merge_comdat_map): Declare.
>
> gcc/lto/ChangeLog:
> 2020-08-20  Giuliano Belinassi  
>
> * lto-partition.c: Move to gcc/lto-partition.c.
> * lto-partition.h: Move to gcc/lto-partition.h.
> * lto.c: Update call to lto_promote_cross_file_statics.
> * Makefile.in: Remove lto-partition.o.
> ---
>  gcc/Makefile.in   |   1 +
>  gcc/cgraph.h  |   1 +
>  gcc/{lto => }/lto-partition.c | 463 +-
>  gcc/{lto => }/lto-partition.h |   4 +-
>  gcc/lto/Make-lang.in  |   4 +-
>  gcc/lto/lto.c |   2 +-
>  gcc/params.opt|   8 +
>  gcc/tree.c|  23 +-
>  8 files changed, 489 insertions(+), 17 deletions(-)
>  rename gcc/{lto => }/lto-partition.c (78%)
>  rename gcc/{lto => }/lto-partition.h (89%)
>
> diff --git a/gcc/Makefile.in b/gcc/Makefile.in
> index 79e854aa938..be42b15f4ff 100644
> --- a/gcc/Makefile.in
> +++ b/gcc/Makefile.in
> @@ -1457,6 +1457,7 @@ OBJS = \
> lra-spills.o \
> lto-cgraph.o \
> lto-streamer.o \
> +   lto-partition.o \
> lto-streamer-in.o \
> lto-streamer-out.o \
> lto-section-in.o \
> diff --git a/gcc/cgraph.h b/gcc/cgraph.h
> index 0211f08964f..b4a7871bd3d 100644
> --- a/gcc/cgraph.h
> +++ b/gcc/cgraph.h
> @@ -615,6 +615,7 @@ public:
>struct lto_file_decl_data * lto_file_data;
>
>PTR GTY ((skip)) aux;
> +  int aux2;
>
>/* Comdat group the symbol is in.  Can be private if GGC allowed that.  */
>tree x_comdat_group;
> diff --git a/gcc/lto/lto-partition.c b/gcc/lto-partition.c
> similarity index 78%
> rename from gcc/lto/lto-partition.c
> rename to gcc/lto-partition.c
> index 8e0488ab13e..ca962e69b5d 100644
> --- a/gcc/lto/lto-partition.c
> +++ b/gcc/lto-partition.c
> @@ -170,7 +170,11 @@ add_symbol_to_partition_1 (ltrans_partition part, 
> symtab_node *node)
>  {
>struct cgraph_edge *e;
>if (!node->alias && c == SYMBOL_PARTITION)
> -   part->insns += ipa_size_summaries->get (cnode)->size;
> +   {
> + /* FIXME: Find out why this is being returned NULL in some cases.  
> */
> + 

Re: [PATCH] Fix ICE in ipa-cp due to cost addition overflow (PR 96806)

2020-08-31 Thread Feng Xue OS via Gcc-patches
>>> the component is "ipa," please change that when you commit the patch.
>> Mistake has been made, I'v pushed it. Is there a way to correct it? git push 
>> --force?
>
> There is.  You need to wait until tomorrow (after the commit message
> gets copied to gcc/ChangeLog by a script) and then push a commit that
> modifies nothing else but the ChangeLog. IIUC.
> 
> Thanks again for taking care of this,

I will. Thanks.

Feng


Re: [PATCH] Fix ICE in ipa-cp due to cost addition overflow (PR 96806)

2020-08-31 Thread Martin Jambor
Hi,

On Mon, Aug 31 2020, Feng Xue OS wrote:
>>> gcc/
>>> PR tree-optimization/96806
>
>> the component is "ipa," please change that when you commit the patch.
> Mistake has been made, I'v pushed it. Is there a way to correct it? git push 
> --force?

There is.  You need to wait until tomorrow (after the commit message
gets copied to gcc/ChangeLog by a script) and then push a commit that
modifies nothing else but the ChangeLog. IIUC.

Thanks again for taking care of this,

Martin


Re: [patch] PR tree-optimization/96818 - cast label range to type of switch operand

2020-08-31 Thread Aldy Hernandez via Gcc-patches




On 8/31/20 10:33 AM, Richard Biener wrote:

On Mon, Aug 31, 2020 at 10:20 AM Aldy Hernandez  wrote:


As discussed in the PR, the type of a switch operand may be different
than the type of the individual cases.  This is causing us to attempt to
intersect incompatible ranges.

This inconsistent IL seems wrong to me, but it also seems pervasive
throughout GCC.  Jason, as one of the original gimple designers, do you
remember if this was an oversight, or was there a reason for this?

Richard, you mention in the PR that normalizing the IL would cause
propagation of widening cast sources into switches harder.  Couldn't we
just cast all labels to the switch operand type upon creation?  What
would be the problem with that?  Would we generate sub-optimal code?

In either case, I'd hate to leave trunk in a broken case while this is
being discussed.  The attached patch fixes the PRs.

OK?


widest_irange label_range (CASE_LOW (min_label), case_high);
+  if (!types_compatible_p (label_range.type (), range_of_op->type ()))
+   range_cast (label_range, range_of_op->type ());
label_range.intersect (range_of_op);

so label_range is of 'widest_int' type but then instead of casting
range_of_op to widest_irange you cast that widest_irange to the
type of the range op?  Why not build label_range in the type
of range_op immediately?


The "widest" in widest_irange does not denote the type of the range, but 
the number of sub-ranges that can maximally fit.  It has nothing to do 
with the underlying type of its elements.  Instead, it is supposed to 
indicate that label_range can fit an infinite amount of sub-ranges.  In 
practice this is 255 sub-ranges, but we can adjust that if needed.


So, in the constructor:

widest_irange label_range (CASE_LOW (min_label), case_high);

...the type for the sub-ranges in label_range is the type of 
CASE_LOW(min_label) and case_high.  They must match IIRC.


I am working on an irange best practices document that should go into 
detail on all this, and will post it later this week.




Using widest_int for both would have been obvious to me, you're
indicating that switch (type op) { case type2 lab: } is supposed to
match as (type)lab == op rather than (type2)op == lab.  I think
the IL will be consistent in a way that sign-extending both
op and lab to a common larger integer type is correct
(thus using widest_int), but no truncation should happen here
(such trucation should be applied by the frontend).


I suppose we could convert both label_range and range_of_op to a wider 
type and do the intersection on these results.  Is there a tree type 
that indicates a widest possible int?




In any case type mismatches here are of course unfortunate
and both more verification and documentation would be
nice.  verify_gimple_switch only verifies all case labels
have the same type, the type of the switch argument is
not verified in any way against that type.


Is the verification good enough with what Jakub posted?  I can adjust 
the documentation, but first I'd like a nod from Jason that this was 
indeed expected behavior.


Thanks.
Aldy



Re: [PATCH] Fix ICE in ipa-cp due to cost addition overflow (PR 96806)

2020-08-31 Thread Feng Xue OS via Gcc-patches
>> gcc/
>> PR tree-optimization/96806

> the component is "ipa," please change that when you commit the patch.
Mistake has been made, I'v pushed it. Is there a way to correct it? git push 
--force?

Thanks,
Feng

[PATCH] rs6000: Expand vec_insert in expander instead of gimple [PR79251]

2020-08-31 Thread Xiong Hu Luo via Gcc-patches
vec_insert accepts 3 arguments, arg0 is input vector, arg1 is the value
to be insert, arg2 is the place to insert arg1 to arg0.  This patch adds
__builtin_vec_insert_v4si[v4sf,v2di,v2df,v8hi,v16qi] for vec_insert to
not expand too early in gimple stage if arg2 is variable, to avoid generate
store hit load instructions.

For Power9 V4SI:
addi 9,1,-16
rldic 6,6,2,60
stxv 34,-16(1)
stwx 5,9,6
lxv 34,-16(1)
=>
addis 9,2,.LC0@toc@ha
addi 9,9,.LC0@toc@l
mtvsrwz 33,5
lxv 32,0(9)
sradi 9,6,2
addze 9,9
sldi 9,9,2
subf 9,9,6
subfic 9,9,3
sldi 9,9,2
subfic 9,9,20
lvsl 13,0,9
xxperm 33,33,45
xxperm 32,32,45
xxsel 34,34,33,32

Though instructions increase from 5 to 15, the performance is improved
60% in typical cases.

gcc/ChangeLog:

* config/rs6000/altivec.md (altivec_lvsl_reg_2): Extend to
SDI mode.
* config/rs6000/rs6000-builtin.def (BU_VSX_X): Add support
macros for vec_insert built-in functions.
* config/rs6000/rs6000-c.c (altivec_resolve_overloaded_builtin):
Generate built-in calls for vec_insert.
* config/rs6000/rs6000-call.c (altivec_expand_vec_insert_builtin):
New function.
(altivec_expand_builtin): Add case entry for
VSX_BUILTIN_VEC_INSERT_V16QI, VSX_BUILTIN_VEC_INSERT_V8HI,
VSX_BUILTIN_VEC_INSERT_V4SF,  VSX_BUILTIN_VEC_INSERT_V4SI,
VSX_BUILTIN_VEC_INSERT_V2DF,  VSX_BUILTIN_VEC_INSERT_V2DI.
(altivec_init_builtins):
* config/rs6000/rs6000-protos.h (rs6000_expand_vector_insert):
New declear.
* config/rs6000/rs6000.c (rs6000_expand_vector_insert):
New function.
* config/rs6000/rs6000.md (FQHS): New mode iterator.
(FD): New mode iterator.
p8_mtvsrwz_v16qi2: New define_insn.
p8_mtvsrd_v16qi2: New define_insn.
* config/rs6000/vsx.md: Call gen_altivec_lvsl_reg_di2.

gcc/testsuite/ChangeLog:

* gcc.target/powerpc/pr79251.c: New test.
---
 gcc/config/rs6000/altivec.md   |   4 +-
 gcc/config/rs6000/rs6000-builtin.def   |   6 +
 gcc/config/rs6000/rs6000-c.c   |  61 +
 gcc/config/rs6000/rs6000-call.c|  74 +++
 gcc/config/rs6000/rs6000-protos.h  |   1 +
 gcc/config/rs6000/rs6000.c | 146 +
 gcc/config/rs6000/rs6000.md|  19 +++
 gcc/config/rs6000/vsx.md   |   2 +-
 gcc/testsuite/gcc.target/powerpc/pr79251.c |  23 
 9 files changed, 333 insertions(+), 3 deletions(-)
 create mode 100644 gcc/testsuite/gcc.target/powerpc/pr79251.c

diff --git a/gcc/config/rs6000/altivec.md b/gcc/config/rs6000/altivec.md
index 0a2e634d6b0..66b636059a6 100644
--- a/gcc/config/rs6000/altivec.md
+++ b/gcc/config/rs6000/altivec.md
@@ -2772,10 +2772,10 @@
   DONE;
 })
 
-(define_insn "altivec_lvsl_reg"
+(define_insn "altivec_lvsl_reg_2"
   [(set (match_operand:V16QI 0 "altivec_register_operand" "=v")
(unspec:V16QI
-   [(match_operand:DI 1 "gpc_reg_operand" "b")]
+   [(match_operand:SDI 1 "gpc_reg_operand" "b")]
UNSPEC_LVSL_REG))]
   "TARGET_ALTIVEC"
   "lvsl %0,0,%1"
diff --git a/gcc/config/rs6000/rs6000-builtin.def 
b/gcc/config/rs6000/rs6000-builtin.def
index f9f0fece549..d095b365c14 100644
--- a/gcc/config/rs6000/rs6000-builtin.def
+++ b/gcc/config/rs6000/rs6000-builtin.def
@@ -2047,6 +2047,12 @@ BU_VSX_X (VEC_INIT_V2DI,  "vec_init_v2di",   CONST)
 BU_VSX_X (VEC_SET_V1TI,  "vec_set_v1ti",   CONST)
 BU_VSX_X (VEC_SET_V2DF,  "vec_set_v2df",   CONST)
 BU_VSX_X (VEC_SET_V2DI,  "vec_set_v2di",   CONST)
+BU_VSX_X (VEC_INSERT_V16QI,  "vec_insert_v16qi",   CONST)
+BU_VSX_X (VEC_INSERT_V8HI,   "vec_insert_v8hi",CONST)
+BU_VSX_X (VEC_INSERT_V4SI,   "vec_insert_v4si",CONST)
+BU_VSX_X (VEC_INSERT_V4SF,   "vec_insert_v4sf",CONST)
+BU_VSX_X (VEC_INSERT_V2DI,   "vec_insert_v2di",CONST)
+BU_VSX_X (VEC_INSERT_V2DF,   "vec_insert_v2df",CONST)
 BU_VSX_X (VEC_EXT_V1TI,  "vec_ext_v1ti",   CONST)
 BU_VSX_X (VEC_EXT_V2DF,  "vec_ext_v2df",   CONST)
 BU_VSX_X (VEC_EXT_V2DI,  "vec_ext_v2di",   CONST)
diff --git a/gcc/config/rs6000/rs6000-c.c b/gcc/config/rs6000/rs6000-c.c
index 2fad3d94706..03b00738a5e 100644
--- a/gcc/config/rs6000/rs6000-c.c
+++ b/gcc/config/rs6000/rs6000-c.c
@@ -1563,6 +1563,67 @@ altivec_resolve_overloaded_builtin (location_t loc, tree 
fndecl,
  return build_call_expr (call, 3, arg1, arg0, arg2);
}
 
+  else if (VECTOR_MEM_VSX_P (mode))
+   {
+ tree call = NULL_TREE;
+
+ arg2 = fold_for_warn (arg2);
+
+ /* If the second argument is variable, we can optimize it if we are
+generating 64-bit code on a machine with direct move.  */
+   

Re: [PATCH] Fix ICE in ipa-cp due to cost addition overflow (PR 96806)

2020-08-31 Thread Martin Jambor
Hi,

On Mon, Aug 31 2020, Feng Xue OS wrote:
> This patch is to fix a bug that cost that is used to evaluate clone candidate
> becomes negative due to integer overflow.
>
> Feng
> ---
> 2020-08-31  Feng Xue  
>
> gcc/
> PR tree-optimization/96806

the component is "ipa," please change that when you commit the patch.

> * ipa-cp.c (decide_about_value): Use safe_add to avoid cost addition
> overflow.

assuming you have bootstrapped and tested it, it is OK for both trunk
and all affected release branches.

Thank you,

Martin

>
> gcc/testsuite/
> PR tree-optimization/96806
> * g++.dg/ipa/pr96806.C: New test.
> From 8d92b4ca4be2303a73f0a2441e57564488ca1c23 Mon Sep 17 00:00:00 2001
> From: Feng Xue 
> Date: Mon, 31 Aug 2020 15:00:52 +0800
> Subject: [PATCH] ipa/96806 - Fix ICE in ipa-cp due to integer addition
>  overflow
>
> 2020-08-31  Feng Xue  
>
> gcc/
> PR tree-optimization/96806
> * ipa-cp.c (decide_about_value): Use safe_add to avoid cost addition
>   overflow.
>
> gcc/testsuite/
> PR tree-optimization/96806
> * g++.dg/ipa/pr96806.C: New test.
> ---
>  gcc/ipa-cp.c   |  8 ++---
>  gcc/testsuite/g++.dg/ipa/pr96806.C | 53 ++
>  2 files changed, 57 insertions(+), 4 deletions(-)
>  create mode 100644 gcc/testsuite/g++.dg/ipa/pr96806.C
>
> diff --git a/gcc/ipa-cp.c b/gcc/ipa-cp.c
> index e4910a04ffa..8e5d6e2a393 100644
> --- a/gcc/ipa-cp.c
> +++ b/gcc/ipa-cp.c
> @@ -5480,11 +5480,11 @@ decide_about_value (struct cgraph_node *node, int 
> index, HOST_WIDE_INT offset,
>  freq_sum, count_sum,
>  val->local_size_cost)
>&& !good_cloning_opportunity_p (node,
> -   val->local_time_benefit
> -   + val->prop_time_benefit,
> +   safe_add (val->local_time_benefit,
> + val->prop_time_benefit),
> freq_sum, count_sum,
> -   val->local_size_cost
> -   + val->prop_size_cost))
> +   safe_add (val->local_size_cost,
> + val->prop_size_cost)))
>  return false;
>  
>if (dump_file)

[...]


Re: [patch] PR tree-optimization/96818 - cast label range to type of switch operand

2020-08-31 Thread Richard Biener via Gcc-patches
On Mon, Aug 31, 2020 at 10:39 AM Jakub Jelinek  wrote:
>
> On Mon, Aug 31, 2020 at 10:33:20AM +0200, Richard Biener wrote:
> > In any case type mismatches here are of course unfortunate
> > and both more verification and documentation would be
> > nice.  verify_gimple_switch only verifies all case labels
> > have the same type, the type of the switch argument is
> > not verified in any way against that type.
>
> When looking at the verification, I have noticed a bug in it.
>
> The verification that CASE_HIGH (if present) has the same type as CASE_LOW
> is only performed for the case label 2 and higher, case label 1 (the first
> one after the default label) isn't checked.
>
> The following patch fixes that, it will uselessly also compare
> TREE_TYPE (CASE_LOW (elt)) != elt_type for the case label 1, but I think
> that isn't that expensive and helps readability of the code.
>
> Bootstrapped/regtested on x86_64-linux and i686-linux, ok for trunk?

OK.

> 2020-08-31  Jakub Jelinek  
>
> * tree-cfg.c (verify_gimple_switch): If the first non-default case
> label has CASE_HIGH, verify it has the same type as CASE_LOW.
>
> --- gcc/tree-cfg.c.jj   2020-08-27 18:42:35.660711349 +0200
> +++ gcc/tree-cfg.c  2020-08-27 22:46:34.574154370 +0200
> @@ -4809,17 +4809,7 @@ verify_gimple_switch (gswitch *stmt)
>   return true;
> }
>
> -  if (elt_type)
> -   {
> - if (TREE_TYPE (CASE_LOW (elt)) != elt_type
> - || (CASE_HIGH (elt) && TREE_TYPE (CASE_HIGH (elt)) != elt_type))
> -   {
> - error ("type mismatch for case label in switch statement");
> - debug_generic_expr (elt);
> - return true;
> -   }
> -   }
> -  else
> +  if (! elt_type)
> {
>   elt_type = TREE_TYPE (CASE_LOW (elt));
>   if (TYPE_PRECISION (index_type) < TYPE_PRECISION (elt_type))
> @@ -4828,6 +4818,13 @@ verify_gimple_switch (gswitch *stmt)
>   return true;
> }
> }
> +  if (TREE_TYPE (CASE_LOW (elt)) != elt_type
> +  || (CASE_HIGH (elt) && TREE_TYPE (CASE_HIGH (elt)) != elt_type))
> +   {
> + error ("type mismatch for case label in switch statement");
> + debug_generic_expr (elt);
> + return true;
> +   }
>
>if (prev_upper_bound)
> {
>
>
> Jakub
>


Re: [patch] PR tree-optimization/96818 - cast label range to type of switch operand

2020-08-31 Thread Jakub Jelinek via Gcc-patches
On Mon, Aug 31, 2020 at 10:33:20AM +0200, Richard Biener wrote:
> In any case type mismatches here are of course unfortunate
> and both more verification and documentation would be
> nice.  verify_gimple_switch only verifies all case labels
> have the same type, the type of the switch argument is
> not verified in any way against that type.

When looking at the verification, I have noticed a bug in it.

The verification that CASE_HIGH (if present) has the same type as CASE_LOW
is only performed for the case label 2 and higher, case label 1 (the first
one after the default label) isn't checked.

The following patch fixes that, it will uselessly also compare
TREE_TYPE (CASE_LOW (elt)) != elt_type for the case label 1, but I think
that isn't that expensive and helps readability of the code.

Bootstrapped/regtested on x86_64-linux and i686-linux, ok for trunk?

2020-08-31  Jakub Jelinek  

* tree-cfg.c (verify_gimple_switch): If the first non-default case
label has CASE_HIGH, verify it has the same type as CASE_LOW.

--- gcc/tree-cfg.c.jj   2020-08-27 18:42:35.660711349 +0200
+++ gcc/tree-cfg.c  2020-08-27 22:46:34.574154370 +0200
@@ -4809,17 +4809,7 @@ verify_gimple_switch (gswitch *stmt)
  return true;
}
 
-  if (elt_type)
-   {
- if (TREE_TYPE (CASE_LOW (elt)) != elt_type
- || (CASE_HIGH (elt) && TREE_TYPE (CASE_HIGH (elt)) != elt_type))
-   {
- error ("type mismatch for case label in switch statement");
- debug_generic_expr (elt);
- return true;
-   }
-   }
-  else
+  if (! elt_type)
{
  elt_type = TREE_TYPE (CASE_LOW (elt));
  if (TYPE_PRECISION (index_type) < TYPE_PRECISION (elt_type))
@@ -4828,6 +4818,13 @@ verify_gimple_switch (gswitch *stmt)
  return true;
}
}
+  if (TREE_TYPE (CASE_LOW (elt)) != elt_type
+  || (CASE_HIGH (elt) && TREE_TYPE (CASE_HIGH (elt)) != elt_type))
+   {
+ error ("type mismatch for case label in switch statement");
+ debug_generic_expr (elt);
+ return true;
+   }
 
   if (prev_upper_bound)
{


Jakub



Re: [patch] PR tree-optimization/96818 - cast label range to type of switch operand

2020-08-31 Thread Richard Biener via Gcc-patches
On Mon, Aug 31, 2020 at 10:20 AM Aldy Hernandez  wrote:
>
> As discussed in the PR, the type of a switch operand may be different
> than the type of the individual cases.  This is causing us to attempt to
> intersect incompatible ranges.
>
> This inconsistent IL seems wrong to me, but it also seems pervasive
> throughout GCC.  Jason, as one of the original gimple designers, do you
> remember if this was an oversight, or was there a reason for this?
>
> Richard, you mention in the PR that normalizing the IL would cause
> propagation of widening cast sources into switches harder.  Couldn't we
> just cast all labels to the switch operand type upon creation?  What
> would be the problem with that?  Would we generate sub-optimal code?
>
> In either case, I'd hate to leave trunk in a broken case while this is
> being discussed.  The attached patch fixes the PRs.
>
> OK?

   widest_irange label_range (CASE_LOW (min_label), case_high);
+  if (!types_compatible_p (label_range.type (), range_of_op->type ()))
+   range_cast (label_range, range_of_op->type ());
   label_range.intersect (range_of_op);

so label_range is of 'widest_int' type but then instead of casting
range_of_op to widest_irange you cast that widest_irange to the
type of the range op?  Why not build label_range in the type
of range_op immediately?

Using widest_int for both would have been obvious to me, you're
indicating that switch (type op) { case type2 lab: } is supposed to
match as (type)lab == op rather than (type2)op == lab.  I think
the IL will be consistent in a way that sign-extending both
op and lab to a common larger integer type is correct
(thus using widest_int), but no truncation should happen here
(such trucation should be applied by the frontend).

In any case type mismatches here are of course unfortunate
and both more verification and documentation would be
nice.  verify_gimple_switch only verifies all case labels
have the same type, the type of the switch argument is
not verified in any way against that type.

Richard.


> Aldy


Re: [Patch] OpenMP/Fortran: Handle polymorphic scalars in data-sharing FIRSTPRIVATE (PR86470)

2020-08-31 Thread Tobias Burnus

*PING* — For this part 1/n patch series.

On 8/25/20 12:50 PM, Tobias Burnus wrote:

This patch adds support for polymorphic variables ("CLASS")
to OpenMP's data-sharing clause FIRSTPRIVATE.

While the patch should be okay, there is more follow-up
work required, but one has to make a start :-)

* PRIVATE – as used in the testcase of the PR is not yet supported,
  only FIRSTPRIVATE.
* polymorphic arrays are not supported (see 'sorry').
– For nonallocatable arrays, the decl passed to the function
  does contain much information; the LANG_SPECIFIC is non-NULL
  its the pointer components contain garbage :-(
– Handling noncharacter polymorphic arrays (hence: non-unlimited
  polymorphic) seems to be simpler; the current patch seems to
  work for some cases already, if the "sorry" is commented.
* ...

OK for mainline?

Tobias

PS: Supporting *map*ing of polymorphic variables is another matter,
which is unfortunately even harder.


-
Mentor Graphics (Deutschland) GmbH, Arnulfstraße 201, 80634 München / Germany
Registergericht München HRB 106955, Geschäftsführer: Thomas Heurung, Alexander 
Walter


Re: [PATCH] Fix ICE in ipa-cp due to cost addition overflow (PR 96806)

2020-08-31 Thread Richard Biener via Gcc-patches
On Mon, Aug 31, 2020 at 10:06 AM Feng Xue OS via Gcc-patches
 wrote:
>
> This patch is to fix a bug that cost that is used to evaluate clone candidate
> becomes negative due to integer overflow.

OK.

Richard.

> Feng
> ---
> 2020-08-31  Feng Xue  
>
> gcc/
> PR tree-optimization/96806
> * ipa-cp.c (decide_about_value): Use safe_add to avoid cost addition
> overflow.
>
> gcc/testsuite/
> PR tree-optimization/96806
> * g++.dg/ipa/pr96806.C: New test.


[patch] PR tree-optimization/96818 - cast label range to type of switch operand

2020-08-31 Thread Aldy Hernandez via Gcc-patches
As discussed in the PR, the type of a switch operand may be different 
than the type of the individual cases.  This is causing us to attempt to 
intersect incompatible ranges.


This inconsistent IL seems wrong to me, but it also seems pervasive 
throughout GCC.  Jason, as one of the original gimple designers, do you 
remember if this was an oversight, or was there a reason for this?


Richard, you mention in the PR that normalizing the IL would cause 
propagation of widening cast sources into switches harder.  Couldn't we 
just cast all labels to the switch operand type upon creation?  What 
would be the problem with that?  Would we generate sub-optimal code?


In either case, I'd hate to leave trunk in a broken case while this is 
being discussed.  The attached patch fixes the PRs.


OK?

Aldy
>From 3c2db553c22ffbf014564d374d3d2c9210b5b83b Mon Sep 17 00:00:00 2001
From: Aldy Hernandez 
Date: Fri, 28 Aug 2020 18:44:58 +0200
Subject: [PATCH] PR tree-optimization/96818 - cast label range to type of
 switch operand

	PR tree-optimization/96818
	* tree-vrp.c (find_case_label_range): Cast label range to
	type of switch operand.
---
 gcc/testsuite/g++.dg/pr96818.C | 28 
 gcc/testsuite/gcc.dg/pr96818.c | 14 ++
 gcc/tree-vrp.c |  2 ++
 3 files changed, 44 insertions(+)
 create mode 100644 gcc/testsuite/g++.dg/pr96818.C
 create mode 100644 gcc/testsuite/gcc.dg/pr96818.c

diff --git a/gcc/testsuite/g++.dg/pr96818.C b/gcc/testsuite/g++.dg/pr96818.C
new file mode 100644
index 000..134bf8b6980
--- /dev/null
+++ b/gcc/testsuite/g++.dg/pr96818.C
@@ -0,0 +1,28 @@
+// { dg-do compile }
+// { dg-options "-O2" }
+
+bool operatorY ();
+
+struct l
+{
+  int m;
+  int k;
+  void n ();
+l ()
+  {
+while (operatorY ())
+  switch ((unsigned char) k)
+	case 0:
+	{
+	  n ();
+	  case 1:if (m)
+	;
+	}
+  }
+};
+
+void
+p ()
+{
+  l ();
+}
diff --git a/gcc/testsuite/gcc.dg/pr96818.c b/gcc/testsuite/gcc.dg/pr96818.c
new file mode 100644
index 000..ea2ac540d39
--- /dev/null
+++ b/gcc/testsuite/gcc.dg/pr96818.c
@@ -0,0 +1,14 @@
+// { dg-do compile }
+// { dg-options "-O2" }
+
+int a, b, c;
+void d() {
+  unsigned short e;
+  while (b)
+;
+  e = (e + 5) / a;
+  switch (e)
+  case 0:
+  case 3:
+c = a;
+}
diff --git a/gcc/tree-vrp.c b/gcc/tree-vrp.c
index 8c1a1854daa..a165164bb40 100644
--- a/gcc/tree-vrp.c
+++ b/gcc/tree-vrp.c
@@ -3850,6 +3850,8 @@ find_case_label_range (gswitch *switch_stmt, const irange *range_of_op)
   if (!case_high)
 	case_high = CASE_LOW (max_label);
   widest_irange label_range (CASE_LOW (min_label), case_high);
+  if (!types_compatible_p (label_range.type (), range_of_op->type ()))
+	range_cast (label_range, range_of_op->type ());
   label_range.intersect (range_of_op);
   if (label_range.undefined_p ())
 	return gimple_switch_label (switch_stmt, 0);
-- 
2.26.2



Re: [PATCH] [AVX512] [PR87767] Optimize memory broadcast for constant vector under AVX512

2020-08-31 Thread Richard Biener via Gcc-patches
On Sun, Aug 30, 2020 at 11:24 AM Jakub Jelinek  wrote:
>
> On Fri, Aug 28, 2020 at 06:25:46PM +0200, Jakub Jelinek via Gcc-patches wrote:
> > You're right, thanks for spotting it, I've missed native_encode_rtx will do
> > quick_push rather than safe_push.
> >
> > Updated patch below, it shouldn't be needed in the second loop, because
> > the first loop should already grow it to the largest size.
>
> Testing beyond a bug in i386.md revealed also that I've lost a cast to long
> to avoid breaking 32-bit bootstrap.
>
> This is the version that passed bootstrap/regtest on both x86_64-linux and
> i686-linux.  In both bootstraps/regtests together, it saved (from the
> statistics I've gathered) 63104 .rodata bytes (before constant merging),
> in 6814 hits of the data->desc->mark = ~(*slot)->desc->labelno;.
>
> Ok for trunk?

OK.

Thanks,
Richard.

> 2020-08-30  Jakub Jelinek  
>
> PR middle-end/54201
> * varasm.c: Include alloc-pool.h.
> (output_constant_pool_contents): Emit desc->mark < 0 entries as
> aliases.
> (struct constant_descriptor_rtx_data): New type.
> (constant_descriptor_rtx_data_cmp): New function.
> (struct const_rtx_data_hasher): New type.
> (const_rtx_data_hasher::hash, const_rtx_data_hasher::equal): New
> methods.
> (optimize_constant_pool): New function.
> (output_shared_constant_pool): Call it if TARGET_SUPPORTS_ALIASES.
>
> --- gcc/varasm.c.jj 2020-07-28 15:39:10.091755086 +0200
> +++ gcc/varasm.c2020-08-28 18:21:58.943759578 +0200
> @@ -57,6 +57,7 @@ along with GCC; see the file COPYING3.
>  #include "asan.h"
>  #include "rtl-iter.h"
>  #include "file-prefix-map.h" /* remap_debug_filename()  */
> +#include "alloc-pool.h"
>
>  #ifdef XCOFF_DEBUGGING_INFO
>  #include "xcoffout.h"  /* Needed for external data declarations.  */
> @@ -4198,7 +4199,27 @@ output_constant_pool_contents (struct rt
>class constant_descriptor_rtx *desc;
>
>for (desc = pool->first; desc ; desc = desc->next)
> -if (desc->mark)
> +if (desc->mark < 0)
> +  {
> +#ifdef ASM_OUTPUT_DEF
> +   const char *name = targetm.strip_name_encoding (XSTR (desc->sym, 0));
> +   char label[256];
> +   char buffer[256 + 32];
> +   const char *p;
> +
> +   ASM_GENERATE_INTERNAL_LABEL (label, "LC", ~desc->mark);
> +   p = targetm.strip_name_encoding (label);
> +   if (desc->offset)
> + {
> +   sprintf (buffer, "%s+%ld", p, (long) (desc->offset));
> +   p = buffer;
> + }
> +   ASM_OUTPUT_DEF (asm_out_file, name, p);
> +#else
> +   gcc_unreachable ();
> +#endif
> +  }
> +else if (desc->mark)
>{
> /* If the constant is part of an object_block, make sure that
>the constant has been positioned within its block, but do not
> @@ -4216,6 +4237,160 @@ output_constant_pool_contents (struct rt
>}
>  }
>
> +struct constant_descriptor_rtx_data {
> +  constant_descriptor_rtx *desc;
> +  target_unit *bytes;
> +  unsigned short size;
> +  unsigned short offset;
> +  unsigned int hash;
> +};
> +
> +/* qsort callback to sort constant_descriptor_rtx_data * vector by
> +   decreasing size.  */
> +
> +static int
> +constant_descriptor_rtx_data_cmp (const void *p1, const void *p2)
> +{
> +  constant_descriptor_rtx_data *const data1
> += *(constant_descriptor_rtx_data * const *) p1;
> +  constant_descriptor_rtx_data *const data2
> += *(constant_descriptor_rtx_data * const *) p2;
> +  if (data1->size > data2->size)
> +return -1;
> +  if (data1->size < data2->size)
> +return 1;
> +  if (data1->hash < data2->hash)
> +return -1;
> +  gcc_assert (data1->hash > data2->hash);
> +  return 1;
> +}
> +
> +struct const_rtx_data_hasher : nofree_ptr_hash
> +{
> +  static hashval_t hash (constant_descriptor_rtx_data *);
> +  static bool equal (constant_descriptor_rtx_data *,
> +constant_descriptor_rtx_data *);
> +};
> +
> +/* Hash and compare functions for const_rtx_data_htab.  */
> +
> +hashval_t
> +const_rtx_data_hasher::hash (constant_descriptor_rtx_data *data)
> +{
> +  return data->hash;
> +}
> +
> +bool
> +const_rtx_data_hasher::equal (constant_descriptor_rtx_data *x,
> + constant_descriptor_rtx_data *y)
> +{
> +  if (x->hash != y->hash || x->size != y->size)
> +return 0;
> +  unsigned int align1 = x->desc->align;
> +  unsigned int align2 = y->desc->align;
> +  unsigned int offset1 = (x->offset * BITS_PER_UNIT) & (align1 - 1);
> +  unsigned int offset2 = (y->offset * BITS_PER_UNIT) & (align2 - 1);
> +  if (offset1)
> +align1 = least_bit_hwi (offset1);
> +  if (offset2)
> +align2 = least_bit_hwi (offset2);
> +  if (align2 > align1)
> +return 0;
> +  if (memcmp (x->bytes, y->bytes, x->size * sizeof (target_unit)) != 0)
> +return 0;
> +  return 1;
> +}
> +
> +/* Attempt to optimize constant pool POOL.  If it contains both CONST_VECTOR
> +   constants 

Re: [PATCH 0/6] Parallelize Intra-Procedural Optimizations using the LTO Engine.

2020-08-31 Thread Richard Biener via Gcc-patches
On Sat, Aug 29, 2020 at 1:31 PM Jan Hubicka  wrote:
>
> >
> > It not only creates hidden symbols, but it also changes the original
> > symbol name to avoid clashses with other object files. It could be
> > very nice to avoid doing this at all.
> >
> > There was once an idea (I don't remember if from Richi or Honza) to
> > avoid using partial linking, but instead concatenate the generated
> > assembler files into a single assembler file and assemble it.  This
> > would remove the requirement of symbol promotion, as they would be
> > in the same assembler file, but I am not sure if this would work out
> > of the box (i.e. if GCC generates assembler that could be concatenated
> > together).
>
> Not out of the box, because we number labels which will clash but we
> also do smarter tihngs like producing constant pools based on whole unit
> knowledge.
>
> But it should not be technically that hard: simply initialize asm output
> before fork to one file, in each fork arrange separate file and avoid
> priting the end of file stuff and then concatenate in the main compiler
> and work out the places where this breaks...

Yeah.  Or even refactor the output machinery so that in theory
we can create asm fragments [into memory] for functions and variables
and only at the end concat/output them in a desired order  (cf.
-fno-toplevel-reorder
wrt toporder code-generation we'd prefer).  That would also be one (small)
step towards eventually being able to thread the RTL pipeline.

Note that in theory the auto-parallel work could be leveraged to
elide LTRANS streaming if we'd drive LTRANS compile from WPA
instead of from LTO wrapper.  We could simply do the forking,
apply the partition and then load bodies from the original IL files
(with the caveat of needing GC to truncate the decls and types
memory from WPA).

But yes, avoiding the partial link would be nice - it would also
be possible to support parallelizing -S compiles.  And it would
avoid the symbol renaming.

Richard.

> Honza
> >
> > >
> > > > Bootstrapped and Regtested on Linux x86_64.
> > > >
> > > > Giuliano Belinassi (6):
> > > >   Modify gcc driver for parallel compilation
> > > >   Implement a new partitioner for parallel compilation
> > > >   Implement fork-based parallelism engine
> > > >   Add `+' for Jobserver Integration
> > > >   Add invoke documentation
> > > >   New tests for parallel compilation feature
> > > >
> > > >  gcc/Makefile.in   |6 +-
> > > >  gcc/cgraph.c  |   16 +
> > > >  gcc/cgraph.h  |   13 +
> > > >  gcc/cgraphunit.c  |  198 ++-
> > > >  gcc/common.opt|4 +
> > > >  gcc/doc/invoke.texi   |   32 +-
> > > >  gcc/gcc.c | 1219 +
> > > >  gcc/ipa-fnsummary.c   |2 +-
> > > >  gcc/ipa-icf.c |3 +-
> > > >  gcc/ipa-visibility.c  |3 +-
> > > >  gcc/ipa.c |4 +-
> > > >  gcc/jobserver.cc  |  168 +++
> > > >  gcc/jobserver.h   |   33 +
> > > >  gcc/lto-cgraph.c  |  172 +++
> > > >  gcc/{lto => }/lto-partition.c |  463 ++-
> > > >  gcc/{lto => }/lto-partition.h |4 +-
> > > >  gcc/lto-streamer.h|4 +
> > > >  gcc/lto/Make-lang.in  |4 +-
> > > >  gcc/lto/lto.c |2 +-
> > > >  gcc/params.opt|8 +
> > > >  gcc/symtab.c  |   46 +-
> > > >  gcc/testsuite/driver/a.c  |6 +
> > > >  gcc/testsuite/driver/b.c  |6 +
> > > >  gcc/testsuite/driver/driver.exp   |   80 ++
> > > >  gcc/testsuite/driver/empty.c  |0
> > > >  gcc/testsuite/driver/foo.c|7 +
> > > >  .../gcc.dg/parallel-early-constant.c  |   22 +
> > > >  gcc/testsuite/gcc.dg/parallel-static-1.c  |   21 +
> > > >  gcc/testsuite/gcc.dg/parallel-static-2.c  |   21 +
> > > >  .../gcc.dg/parallel-static-clash-1.c  |   23 +
> > > >  .../gcc.dg/parallel-static-clash-aux.c|   14 +
> > > >  gcc/toplev.c  |   58 +-
> > > >  gcc/toplev.h  |3 +
> > > >  gcc/tree.c|   23 +-
> > > >  gcc/varasm.c  |   26 +-
> > > >  intl/Makefile.in  |2 +-
> > > >  libbacktrace/Makefile.in  |2 +-
> > > >  libcpp/Makefile.in|2 +-
> > > >  libdecnumber/Makefile.in  |2 +-
> > > >  libiberty/Makefile.in 

Re: [PATCH] c: Silently ignore pragma region [PR85487]

2020-08-31 Thread Richard Biener via Gcc-patches
On Fri, Aug 28, 2020 at 10:04 PM Austin Morton  wrote:
>
> > The patch misses documentation of the pragma.
>
> This was an intentional omission - when looking for documentation of
> the pragma in clang I found none.
>
> If we do want to document the pragmas in GCC:
>  - what section of the documentation would that go in?
> - gcc/doc/cpp.texi, section 7, "Pragmas"
> - this section states: "This manual documents the pragmas
> which are meaningful to the preprocessor itself. Other pragmas are
> meaningful to the C or C++ compilers. They are documented in the GCC
> manual."
> - these pragmas aren't exactly meaningful to the preprocessor
> _or_ the C/C++ compiler, so it's not exactly clear where they fit.
> - gcc/doc/extend.texi, section 6.62, "Pragmas Accepted by GCC"
>  - what exactly would it say?
> - I guess just mention the existence and that they won't trigger
> unknown pragma warnings? They don't _do_ anything

Yes, documenting their existance and their zero effect.

Richard.

>
> Austin


[PATCH] Fix ICE in ipa-cp due to cost addition overflow (PR 96806)

2020-08-31 Thread Feng Xue OS via Gcc-patches
This patch is to fix a bug that cost that is used to evaluate clone candidate
becomes negative due to integer overflow.

Feng
---
2020-08-31  Feng Xue  

gcc/
PR tree-optimization/96806
* ipa-cp.c (decide_about_value): Use safe_add to avoid cost addition
overflow.

gcc/testsuite/
PR tree-optimization/96806
* g++.dg/ipa/pr96806.C: New test.From 8d92b4ca4be2303a73f0a2441e57564488ca1c23 Mon Sep 17 00:00:00 2001
From: Feng Xue 
Date: Mon, 31 Aug 2020 15:00:52 +0800
Subject: [PATCH] ipa/96806 - Fix ICE in ipa-cp due to integer addition
 overflow

2020-08-31  Feng Xue  

gcc/
PR tree-optimization/96806
* ipa-cp.c (decide_about_value): Use safe_add to avoid cost addition
	overflow.

gcc/testsuite/
PR tree-optimization/96806
* g++.dg/ipa/pr96806.C: New test.
---
 gcc/ipa-cp.c   |  8 ++---
 gcc/testsuite/g++.dg/ipa/pr96806.C | 53 ++
 2 files changed, 57 insertions(+), 4 deletions(-)
 create mode 100644 gcc/testsuite/g++.dg/ipa/pr96806.C

diff --git a/gcc/ipa-cp.c b/gcc/ipa-cp.c
index e4910a04ffa..8e5d6e2a393 100644
--- a/gcc/ipa-cp.c
+++ b/gcc/ipa-cp.c
@@ -5480,11 +5480,11 @@ decide_about_value (struct cgraph_node *node, int index, HOST_WIDE_INT offset,
    freq_sum, count_sum,
    val->local_size_cost)
   && !good_cloning_opportunity_p (node,
-  val->local_time_benefit
-  + val->prop_time_benefit,
+  safe_add (val->local_time_benefit,
+		val->prop_time_benefit),
   freq_sum, count_sum,
-  val->local_size_cost
-  + val->prop_size_cost))
+  safe_add (val->local_size_cost,
+		val->prop_size_cost)))
 return false;
 
   if (dump_file)
diff --git a/gcc/testsuite/g++.dg/ipa/pr96806.C b/gcc/testsuite/g++.dg/ipa/pr96806.C
new file mode 100644
index 000..28fdf7787a1
--- /dev/null
+++ b/gcc/testsuite/g++.dg/ipa/pr96806.C
@@ -0,0 +1,53 @@
+/* { dg-do compile } */
+/* { dg-options "-std=c++11 -O -fipa-cp -fipa-cp-clone --param=ipa-cp-max-recursive-depth=94 --param=logical-op-non-short-circuit=0" } */
+
+enum a {};
+struct m;
+struct n {
+  a d;
+};
+int o(int, int);
+struct p {
+  char d;
+  char aa;
+  p *ab;
+  bool q() const {
+int h = d & 4;
+return h;
+  }
+  char r() const { return aa; }
+  int s(const m *, bool) const;
+} l;
+struct t {
+  p *ac;
+  p *u() { return ac; }
+  p *v(int);
+};
+int w(const p *, const p *, const m *, int = 0);
+struct m : n {
+  struct {
+t *ad;
+  } ae;
+  char x() const;
+  p *y(int z) const { return ae.ad ? nullptr : ae.ad->v(z); }
+} j;
+int w(const p *z, const p *af, const m *ag, int ah) {
+  int a, g = z->s(ag, true), i = af->s(ag, true);
+  if (af->q()) {
+if (ag->x())
+  return 0;
+ah++;
+char b = af->r();
+p *c = ag->y(b), *e = ag->ae.ad->u();
+int d = w(z, c, ag, ah), f = w(z, af ? e : af->ab, ag, ah);
+a = f ? d : f;
+return a;
+  }
+  if (g || i == 1)
+return ag->d ? o(g, i) : o(g, i);
+  return 0;
+}
+void ai() {
+  for (p k;;)
+w(, , );
+}
-- 
2.17.1



Re: [PATCH v3] libgcc: Use `-fasynchronous-unwind-tables' for LIB2_DIVMOD_FUNCS

2020-08-31 Thread Richard Biener via Gcc-patches
On Fri, Aug 28, 2020 at 5:47 PM Ramana Radhakrishnan
 wrote:
>
> On Wed, Aug 26, 2020 at 12:08 PM Richard Biener via Gcc-patches
>  wrote:
> >
> > On Tue, Aug 25, 2020 at 6:32 PM Maciej W. Rozycki  wrote:
> > >
> > > Hi Kito,
> > >
> > > > I just found the mail thread about div mod with -fnon-call-exceptions,
> > > > I think keeping the default LIB2_DIVMOD_EXCEPTION_FLAGS unchanged
> > > > should be the best way to go.
> > > >
> > > > Non-call exceptions and libcalls
> > > > https://gcc.gnu.org/legacy-ml/gcc/2001-06/msg01108.html
> > > >
> > > > Non-call exceptions and libcalls Part 2
> > > > https://gcc.gnu.org/legacy-ml/gcc/2001-07/msg00402.html
> > >
> > >  Thank you for your input.  I believe I had a look at these commits before
> > > I posted my original proposal.  Please note however that they both predate
> > > the addition of `-fasynchronous-unwind-tables', so clearly the option
> > > could not have been considered at the time the changes were accepted into
> > > GCC.
> > >
> > >  Please note that, as observed by Andreas and Richard here:
> > >  in no case we
> > > want to have full exception handling here, so we clearly need no
> > > `-fexceptions'; this libcall code won't itself ever call `throw'.
> > >
> > >  Now it might be a bit unclear from documentation as to whether we want
> > > `-fnon-call-exceptions' or `-fasynchronous-unwind-tables', as it says that
> > > the former option makes GCC:
> > >
> > > "Generate code that allows trapping instructions to throw
> > >  exceptions.  Note that this requires platform-specific runtime
> > >  support that does not exist everywhere.  Moreover, it only allows
> > >  _trapping_ instructions to throw exceptions, i.e. memory references
> > >  or floating-point instructions.  It does not allow exceptions to be
> > >  thrown from arbitrary signal handlers such as 'SIGALRM'."
> > >
> > > Note the observation that arbitrary signal handlers (invoked at more inner
> > > a frame level, and necessarily built with `-fexceptions') are still not
> > > allowed to throw exceptions.  For that, as far as I understand it, you
> > > actually need `-fasynchronous-unwind-tables', which makes GCC:
> > >
> > > "Generate unwind table in DWARF format, if supported by target
> > >  machine.  The table is exact at each instruction boundary, so it
> > >  can be used for stack unwinding from asynchronous events (such as
> > >  debugger or garbage collector)."
> > >
> > > and therefore allows arbitrary signal handlers to throw exceptions,
> > > effectively making the option a superset of `-fexceptions'.  As libcall
> > > code can generally be implicitly invoked everywhere, we want people not to
> > > be restrained by it and let a exception thrown by e.g. a user-supplied
> > > SIGALRM handler propagate through the relevant libcall's stack frame,
> > > rather than just those exceptions the libcall itself might indirectly
> > > cause.
> > >
> > >  Maybe I am missing something here, especially as `-fexceptions' mentions
> > > code generation, while `-fasynchronous-unwind-tables' only refers to
> > > unwind table generation, but then what would be the option to allow
> > > exceptions to be thrown from arbitrary signal handlers rather than those
> > > for memory references or floating-point instructions (where by a special
> > > provision integer division falls as well)?
> > >
> > >  My understanding has been it is `-fasynchronous-unwind-tables', but I'll
> > > be gladly straightened out otherwise.  If I am indeed right, then perhaps
> > > the documentation could be clarified and expanded a bit.
> > >
> > >  Barring evidence to the contrary I maintain the change I have proposed is
> > > correct, and not only removes the RISC-V `ld.so' build issue, but it fixes
> > > the handling of asynchronous events arriving in the middle of the relevant
> > > libcalls for all platforms as well.
> > >
> > >  Please let me know if you have any further questions, comments or
> > > concerns.
> >
> > You only need -fexceptions for that, then you can throw; from a signal 
> > handler
> > for example.  If you want to be able to catch the exception somewhere up
> > the call chain all intermediate code needs to be compiled so that unwinding
> > from asynchronous events is possible - -fasynchronous-unwind-tables.
> >
> > So -fasynchronous-unwind-tables is about unwinding.  -f[non-call]-exceptions
> > is about throw/catch.  Clearly libgcc does neither throw nor catch but with
> > async events we might need to unwind from inside it.
> >
> > Now I don't know about the arm situation but if arm cannot do async 
> > unwinding
> > then even -fexceptions won't help it here - libgcc still does not throw.
>
> On Arm as in the AArch32 port,  async unwinding will not work as those
> can't be expressed in the EH format tables.

And surely building libgcc with -fexceptions does not change that either.

Richard.

> regards
> Ramana

Re: [PATCH] VEC_COND_EXPR: fix ICE in gimple_expand_vec_cond_expr

2020-08-31 Thread Richard Biener via Gcc-patches
On Fri, Aug 28, 2020 at 4:18 PM Martin Liška  wrote:
>
> Hey.
>
> The patch is about VEC_COND_EXP comparison of a SSA_NAME with a constant
> that is artifact of -fno-tree-ccp.
>
> Patch can bootstrap on x86_64-linux-gnu and survives regression tests.
>
> Ready to be installed?

Err, no - we shouldn't do this at RTL expansion time.  And piecewise
expansion is gross - in this the case is a constant boolean vector IIRC
(not sure what mode?) and ISEL only tries matching up with
a comparison operand.  But ISEL should also handle matching up with
a constant "boolean vector" ("boolean vector"s are artifacts of VEC_COND_EXPRs,
but some targets allow bitwise arithmetic on them...).  So the compensation
needs to happen in ISEL, recognizing

 _1 = {0}
 _2 = _1 ? ...;

as

 _2 = .VCOND (0 == 0, ...)

or so.

Richard.

> Thanks,
> Martin
>
> gcc/ChangeLog:
>
> PR tree-optimization/96466
> * gimple-fold.c (expand_cmp_piecewise): New.
> * gimple-fold.h (nunits_for_known_piecewise_op): New.
> (expand_cmp_piecewise): Moved from ...
> * tree-vect-generic.c (expand_vector_comparison): ... here.
> (nunits_for_known_piecewise_op): Moved to gimple-fold.h.
> * gimple-isel.cc (gimple_expand_vec_cond_expr): Use
> expand_cmp_piecewise fallback for constants.
>
> gcc/testsuite/ChangeLog:
>
> PR tree-optimization/96466
> * gcc.dg/vect/pr96466.c: New test.
> ---
>   gcc/gimple-fold.c   | 28 
>   gcc/gimple-fold.h   | 14 ++
>   gcc/gimple-isel.cc  | 10 ---
>   gcc/testsuite/gcc.dg/vect/pr96466.c | 18 +
>   gcc/tree-vect-generic.c | 41 ++---
>   5 files changed, 69 insertions(+), 42 deletions(-)
>   create mode 100644 gcc/testsuite/gcc.dg/vect/pr96466.c
>
> diff --git a/gcc/gimple-fold.c b/gcc/gimple-fold.c
> index dcc1b56a273..86d5d0ed7d8 100644
> --- a/gcc/gimple-fold.c
> +++ b/gcc/gimple-fold.c
> @@ -8056,3 +8056,31 @@ gimple_stmt_integer_valued_real_p (gimple *stmt, int 
> depth)
> return false;
>   }
>   }
> +
> +tree
> +expand_cmp_piecewise (gimple_stmt_iterator *gsi, tree type, tree op0, tree 
> op1)
> +{
> +  tree inner_type = TREE_TYPE (TREE_TYPE (op0));
> +  tree part_width = vector_element_bits_tree (TREE_TYPE (op0));
> +  tree index = bitsize_int (0);
> +  int nunits = nunits_for_known_piecewise_op (TREE_TYPE (op0));
> +  int prec = GET_MODE_PRECISION (SCALAR_TYPE_MODE (type));
> +  tree ret_type = build_nonstandard_integer_type (prec, 1);
> +  tree ret_inner_type = boolean_type_node;
> +  int i;
> +  tree t = build_zero_cst (ret_type);
> +
> +  if (TYPE_PRECISION (ret_inner_type) != 1)
> +ret_inner_type = build_nonstandard_integer_type (1, 1);
> +  for (i = 0; i < nunits;
> +   i++, index = int_const_binop (PLUS_EXPR, index, part_width))
> +{
> +  tree a = tree_vec_extract (gsi, inner_type, op0, part_width, index);
> +  tree b = tree_vec_extract (gsi, inner_type, op1, part_width, index);
> +  tree result = gimplify_build2 (gsi, NE_EXPR, ret_inner_type, a, b);
> +  t = gimplify_build3 (gsi, BIT_INSERT_EXPR, ret_type, t, result,
> +  bitsize_int (i));
> +}
> +
> +  return gimplify_build1 (gsi, VIEW_CONVERT_EXPR, type, t);
> +}
> diff --git a/gcc/gimple-fold.h b/gcc/gimple-fold.h
> index 0ed1d1ffe83..7e843b34f53 100644
> --- a/gcc/gimple-fold.h
> +++ b/gcc/gimple-fold.h
> @@ -147,6 +147,20 @@ gimple_build_vector (gimple_seq *seq, 
> tree_vector_builder *builder)
>   extern bool gimple_stmt_nonnegative_warnv_p (gimple *, bool *, int = 0);
>   extern bool gimple_stmt_integer_valued_real_p (gimple *, int = 0);
>
> +/* Return the number of elements in a vector type TYPE that we have
> +   already decided needs to be expanded piecewise.  We don't support
> +   this kind of expansion for variable-length vectors, since we should
> +   always check for target support before introducing uses of those.  */
> +
> +static inline unsigned int
> +nunits_for_known_piecewise_op (const_tree type)
> +{
> +  return TYPE_VECTOR_SUBPARTS (type).to_constant ();
> +}
> +
> +extern tree expand_cmp_piecewise (gimple_stmt_iterator *gsi, tree lhs,
> + tree op0, tree op1);
> +
>   /* In gimple-match.c.  */
>   extern tree gimple_simplify (enum tree_code, tree, tree,
>  gimple_seq *, tree (*)(tree));
> diff --git a/gcc/gimple-isel.cc b/gcc/gimple-isel.cc
> index b330cf4c20e..32e3bc31f7f 100644
> --- a/gcc/gimple-isel.cc
> +++ b/gcc/gimple-isel.cc
> @@ -33,8 +33,8 @@ along with GCC; see the file COPYING3.  If not see
>   #include "gimplify-me.h"
>   #include "gimplify.h"
>   #include "tree-cfg.h"
> -#include "bitmap.h"
>   #include "tree-ssa-dce.h"
> +#include "gimple-fold.h"
>
>   /* Expand all VEC_COND_EXPR gimple assignments into calls to internal
>  function based on type of selected expansion.  */
> @@ -119,8 +119,12 

Re: [PATCH] [AVX512]For vector compare to mask register, UNSPEC is needed instead of comparison operator [PR96243]

2020-08-31 Thread Jakub Jelinek via Gcc-patches
On Tue, Aug 11, 2020 at 05:43:48PM +0800, Hongtao Liu via Gcc-patches wrote:
> Hi:
>   The issue is described in the bugzilla.
>   Bootstrap is ok, regression test for i386/x86-64 backend is ok.
>  Ok for trunk?
> 
> ChangeLog
> gcc/
> PR target/96551
> * config/i386/sse.md (vec_unpacku_float_hi_v16si): For vector
> compare to integer mask, don't use gen_rtx_LT , use

Please remove the space before comma.

> ix86_expand_mask_vec_cmp instead.
> (vec_unpacku_float_hi_v16si): Ditto.
> 
> gcc/testsuite
> * gcc.target/i386/pr96551-1.c: New test.
> * gcc.target/i386/pr96551-2.c: New test.

And please rename the testcases to avx512f-pr96551-{1,2}.c.

> +  for (int i = 0; i != 256; i++)
> +if (exp[i] != b[i])
> +  __builtin_abort ();

You can use just abort (); here, given that avx512-check.h includes
stdlib.h.

Ok for trunk with those nits changed.

Jakub



Re: [r11-1851 Regression] FAIL: gcc.dg/vect/slp-46.c scan-tree-dump-times vect "vectorizing stmts using SLP" 2 on Linux/x86_64 (-m64 -march=cascadelake)

2020-08-31 Thread Richard Biener
On Sat, 29 Aug 2020, sunil.k.pandey wrote:

> On Linux/x86_64,
> 
> dccbf1e2a6e544f71b4a5795f0c79015db019fc3 is the first bad commit
> commit dccbf1e2a6e544f71b4a5795f0c79015db019fc3
> Author: Richard Biener 
> Date:   Mon Jul 6 16:26:50 2020 +0200
> 
> tree-optimization/96075 - fix bogus misalignment calculation
> 
> caused
> 
> FAIL: gcc.dg/vect/slp-46.c -flto -ffat-lto-objects  scan-tree-dump-times vect 
> "vectorizing stmts using SLP" 2
> FAIL: gcc.dg/vect/slp-46.c scan-tree-dump-times vect "vectorizing stmts using 
> SLP" 2

It might be useful to point out that the commit in question added the
testcase that "regressed" plus that you are using a non-standard
configuration (x86 vectorization unfortunately is too fragmented to
produce fully attributed or generic testcases that will pass with
any configuration).

So the appropriate report would be "FAIL: gcc.dg/vect/slp-46.c
with -march=cascadelake" which isn't a regression.

Richard.


Re: [PATCH] sra: Avoid SRAing if there is an aout-of-bounds access (PR 96820)

2020-08-31 Thread Richard Biener
On Fri, 28 Aug 2020, Martin Jambor wrote:

> Hi,
> 
> the testcase causes and ICE in the SRA verifier on x86_64 when
> compiling with -m32 because build_user_friendly_ref_for_offset looks
> at an out-of-bounds array_ref within an array_ref which accesses an
> offset which does not fit into a signed 32bit integer and turns it
> into an array-ref with a negative index.
> 
> The best thing is probably to bail out early when encountering an out
> of bounds access to a local stack-allocated aggregate (and let the DSE
> just delete such statements) which is what the patch does.
> 
> I also glanced over to the initial candidate vetting routine to make
> sure the size would fit into HWI and noticed that it uses unsigned
> variants whereas the rest of SRA operates on signed offsets and
> sizes (because get_ref_and_extent does) and so changed that for the
> sake of consistency.  These ancient checks operate on sizes of types
> as opposed to DECLs but I hope that any issues potentially arising
> from that are basically hypothetical.
> 
> Bootstrapped and tested on x86_64-linux.  OK for master and then for
> gcc-10 branch?

OK.

Richard.

> Thanks,
> 
> Martin
> 
> 
> gcc/ChangeLog:
> 
> 2020-08-28  Martin Jambor  
> 
>   PR tree-optimization/96820
>   * tree-sra.c (create_access): Disqualify candidates with accesses
>   beyond the end of the original aggregate.
>   (maybe_add_sra_candidate): Check that candidate type size fits
>   signed uhwi for the sake of consistency.
> 
> gcc/testsuite/ChangeLog:
> 
> 2020-08-28  Martin Jambor  
> 
>   PR tree-optimization/96820
>   * gcc.dg/tree-ssa/pr96820.c: New test.
> ---
>  gcc/testsuite/gcc.dg/tree-ssa/pr96820.c | 12 
>  gcc/tree-sra.c  |  9 +++--
>  2 files changed, 19 insertions(+), 2 deletions(-)
>  create mode 100644 gcc/testsuite/gcc.dg/tree-ssa/pr96820.c
> 
> diff --git a/gcc/testsuite/gcc.dg/tree-ssa/pr96820.c 
> b/gcc/testsuite/gcc.dg/tree-ssa/pr96820.c
> new file mode 100644
> index 000..f5c2195f310
> --- /dev/null
> +++ b/gcc/testsuite/gcc.dg/tree-ssa/pr96820.c
> @@ -0,0 +1,12 @@
> +/* { dg-do compile } */
> +/* { dg-options "-O1" } */
> +
> +struct a {
> +  int b;
> +};
> +int main() {
> +  struct a d[][6] = {4};
> +  struct a e;
> +  d[1955249013][1955249013] = e;
> +  return e.b;
> +}
> diff --git a/gcc/tree-sra.c b/gcc/tree-sra.c
> index 754f41302fc..98a6cacbe2a 100644
> --- a/gcc/tree-sra.c
> +++ b/gcc/tree-sra.c
> @@ -941,6 +941,11 @@ create_access (tree expr, gimple *stmt, bool write)
>disqualify_candidate (base, "Encountered an unconstrained access.");
>return NULL;
>  }
> +  if (offset + size > tree_to_shwi (DECL_SIZE (base)))
> +{
> +  disqualify_candidate (base, "Encountered an access beyond the base.");
> +  return NULL;
> +}
>  
>access = create_access_1 (base, offset, size);
>access->expr = expr;
> @@ -1880,12 +1885,12 @@ maybe_add_sra_candidate (tree var)
>reject (var, "has incomplete type");
>return false;
>  }
> -  if (!tree_fits_uhwi_p (TYPE_SIZE (type)))
> +  if (!tree_fits_shwi_p (TYPE_SIZE (type)))
>  {
>reject (var, "type size not fixed");
>return false;
>  }
> -  if (tree_to_uhwi (TYPE_SIZE (type)) == 0)
> +  if (tree_to_shwi (TYPE_SIZE (type)) == 0)
>  {
>reject (var, "type size is zero");
>return false;
> 

-- 
Richard Biener 
SUSE Software Solutions Germany GmbH, Maxfeldstrasse 5, 90409 Nuernberg,
Germany; GF: Felix Imendörffer; HRB 36809 (AG Nuernberg)


[PATCH] test/rs6000: Add Power9 and up as vect_len target

2020-08-31 Thread Kewen.Lin via Gcc-patches
Hi,

Power9 supports vector with length in bytes load/store, this patch
is to teach check_effective_target_vect_len_load_store to take it
and its laters as effective vector with length targets.

Also supplement the documents for has_arch_pwr*.

Bootstrapped/regtested on powerpc64le-linux-gnu P8.

Is it ok for trunk?

BR,
Kewen
--

gcc/ChangeLog:

* doc/sourcebuild.texi (has_arch_pwr5, has_arch_pwr6, has_arch_pwr7,
has_arch_pwr8, has_arch_pwr9): Document.

gcc/testsuite/ChangeLog:

* lib/target-supports.exp
(check_effective_target_vect_len_load_store): Call check function
check_effective_target_has_arch_pwr9.
diff --git a/gcc/doc/sourcebuild.texi b/gcc/doc/sourcebuild.texi
index f7d24752da0..4a08f8e9ecd 100644
--- a/gcc/doc/sourcebuild.texi
+++ b/gcc/doc/sourcebuild.texi
@@ -2174,6 +2174,26 @@ PowerPC target supports executing AltiVec instructions.
 
 @item vsx_hw
 PowerPC target supports executing VSX instructions (ISA 2.06).
+
+@item has_arch_pwr5
+PowerPC target pre-defines macro _ARCH_PWR5 which means the @code{-mcpu}
+setting is Power5 or later.
+
+@item has_arch_pwr6
+PowerPC target pre-defines macro _ARCH_PWR6 which means the @code{-mcpu}
+setting is Power6 or later.
+
+@item has_arch_pwr7
+PowerPC target pre-defines macro _ARCH_PWR7 which means the @code{-mcpu}
+setting is Power7 or later.
+
+@item has_arch_pwr8
+PowerPC target pre-defines macro _ARCH_PWR8 which means the @code{-mcpu}
+setting is Power8 or later.
+
+@item has_arch_pwr9
+PowerPC target pre-defines macro _ARCH_PWR9 which means the @code{-mcpu}
+setting is Power9 or later.
 @end table
 
 @subsubsection Other hardware attributes
diff --git a/gcc/testsuite/lib/target-supports.exp 
b/gcc/testsuite/lib/target-supports.exp
index a1e4799a404..6f886fd1425 100644
--- a/gcc/testsuite/lib/target-supports.exp
+++ b/gcc/testsuite/lib/target-supports.exp
@@ -7066,7 +7066,7 @@ proc check_effective_target_vect_fully_masked { } {
 # @code{len_store} optabs.
 
 proc check_effective_target_vect_len_load_store { } {
-return 0
+return [expr { [check_effective_target_has_arch_pwr9] }]
 }
 
 # Return the value of parameter vect-partial-vector-usage specified for


Re: [PATCH] RISC-V/libgcc: Use `-fasynchronous-unwind-tables' for LIB2_DIVMOD_FUNCS

2020-08-31 Thread Kito Cheng via Gcc-patches
Hi Maciej:

LGTM, thanks for your patch!

On Sat, Aug 29, 2020 at 9:19 PM Maciej W. Rozycki via Gcc-patches
 wrote:
>
> Use `-fasynchronous-unwind-tables' rather than `-fexceptions
> -fnon-call-exceptions' in LIB2_DIVMOD_FUNCS compilation flags so as to
> provide unwind tables for the affected functions while not pulling the
> unwinder proper, which is not required here.
>
> Beyond saving program space it fixes a RISC-V glibc build error due to
> unsatisfied `malloc' and `free' references from the unwinder causing
> link errors with `ld.so' where libgcc has been built at -O0.
>
> libgcc/
> * config/riscv/t-elf (LIB2_DIVMOD_EXCEPTION_FLAGS): New
> variable.
> ---
> Hi,
>
>  As Mon, Aug 31st (a bank holiday in England) will be my last day at
> Western Digital and I won't be able to submit patches on behalf of the
> company afterwards here is a replacement change for RISC-V only in case
> the generic one discussed here:
>
> 
> does not go through.  While I won't be able to submit changes I will
> continue watching the discussion and I will be able to commit either
> change once there is the final outcome, just as anyone would.
>
>  This change has passed full GCC regression testing with the
> `riscv64-linux-gnu' target, RV64/lp64d and RV32/ilp32d multilibs, using
> QEMU in the Linux user emulation mode.
>
>   Maciej
> ---
>  libgcc/config/riscv/t-elf |2 ++
>  1 file changed, 2 insertions(+)
>
> gcc-riscv-libgcc-divmod-asynchronous-unwind-tables.diff
> Index: gcc/libgcc/config/riscv/t-elf
> ===
> --- gcc.orig/libgcc/config/riscv/t-elf
> +++ gcc/libgcc/config/riscv/t-elf
> @@ -4,3 +4,5 @@ LIB2ADD += $(srcdir)/config/riscv/save-r
>$(srcdir)/config/riscv/div.S \
>$(srcdir)/config/riscv/atomic.c \
>
> +# Avoid the full unwinder being pulled along with the division libcalls.
> +LIB2_DIVMOD_EXCEPTION_FLAGS := -fasynchronous-unwind-tables


[PATCH] Adjust testcase

2020-08-31 Thread Hongtao Liu via Gcc-patches
Hi:
  This patch is to adjust testcases which failed the regression test
when gcc is built with -march=skylake-avx512.
  Also add runtime check for AVX512 tests.

gcc/testsuite/ChangeLog:
PR target/96246
PR target/96855
PR target/96856
PR target/96857
* g++.target/i386/avx512bw-pr96246-2.C: Add runtime check for
AVX512BW.
* g++.target/i386/avx512vl-pr96246-2.C: Add runtime check for
AVX512BW and AVX512VL
* g++.target/i386/avx512f-helper.h: New header.
* gcc.target/i386/pr92658-avx512f.c: Add
-mprefer-vector-width=512 to avoid impact of different default
mtune which gcc is built with.
* gcc.target/i386/avx512bw-pr95488-1.c: Ditto.
* gcc.target/i386/pr92645-4.c: Add -mno-avx512f to avoid
impact of different default march which gcc is built with.


-- 
BR,
Hongtao
From 80effa00835d53962608a3607ef79da243a6dc5a Mon Sep 17 00:00:00 2001
From: liuhongt 
Date: Mon, 31 Aug 2020 10:54:13 +0800
Subject: [PATCH] Adjust testcase.

gcc/testsuite/ChangeLog:
	PR target/96246
	PR target/96855
	PR target/96856
	PR target/96857
	* g++.target/i386/avx512bw-pr96246-2.C: Add runtime check for
	AVX512BW.
	* g++.target/i386/avx512vl-pr96246-2.C: Add runtime check for
	AVX512BW and AVX512VL
	* g++.target/i386/avx512f-helper.h: New header.
	* gcc.target/i386/pr92658-avx512f.c: Add
	-mprefer-vector-width=512 to avoid impact of different default
	mtune which gcc is built with.
	* gcc.target/i386/avx512bw-pr95488-1.c: Ditto.
	* gcc.target/i386/pr92645-4.c: Add -mno-avx512f to avoid
	impact of different default march which gcc is built with.
---
 .../g++.target/i386/avx512bw-pr96246-2.C  |  9 +---
 .../g++.target/i386/avx512f-helper.h  |  1 +
 .../g++.target/i386/avx512vl-pr96246-2.C  | 21 +--
 .../gcc.target/i386/avx512bw-pr95488-1.c  |  2 +-
 gcc/testsuite/gcc.target/i386/pr92645-4.c |  2 +-
 .../gcc.target/i386/pr92658-avx512f.c |  2 +-
 6 files changed, 25 insertions(+), 12 deletions(-)
 create mode 100644 gcc/testsuite/g++.target/i386/avx512f-helper.h

diff --git a/gcc/testsuite/g++.target/i386/avx512bw-pr96246-2.C b/gcc/testsuite/g++.target/i386/avx512bw-pr96246-2.C
index b96b7c7c932..30a1b959573 100644
--- a/gcc/testsuite/g++.target/i386/avx512bw-pr96246-2.C
+++ b/gcc/testsuite/g++.target/i386/avx512bw-pr96246-2.C
@@ -3,6 +3,10 @@
 /* { dg-require-effective-target avx512bw } */
 /* { dg-options "-O2 -std=c++14 -mavx512bw" } */
 
+#define AVX512BW
+
+#include "avx512f-helper.h"
+
 #include "avx512bw-pr96246-1.C"
 
 #define RUNTIME_TEST(vtype, num)			\
@@ -24,8 +28,8 @@
 }			\
   while (0)
 
-int
-main (void)
+void
+test_512 (void)
 {
   RUNTIME_TEST (v64qi, 64);
   RUNTIME_TEST (v32hi, 32);
@@ -33,5 +37,4 @@ main (void)
   RUNTIME_TEST (v8di, 8);
   RUNTIME_TEST (v16sf, 16);
   RUNTIME_TEST (v8df, 8);
-  return 0;
 }
diff --git a/gcc/testsuite/g++.target/i386/avx512f-helper.h b/gcc/testsuite/g++.target/i386/avx512f-helper.h
new file mode 100644
index 000..09b6bcbf77a
--- /dev/null
+++ b/gcc/testsuite/g++.target/i386/avx512f-helper.h
@@ -0,0 +1 @@
+#include "../../gcc.target/i386/avx512f-helper.h"
diff --git a/gcc/testsuite/g++.target/i386/avx512vl-pr96246-2.C b/gcc/testsuite/g++.target/i386/avx512vl-pr96246-2.C
index 9a16f0d2c9e..db9dce2caef 100644
--- a/gcc/testsuite/g++.target/i386/avx512vl-pr96246-2.C
+++ b/gcc/testsuite/g++.target/i386/avx512vl-pr96246-2.C
@@ -4,6 +4,11 @@
 /* { dg-require-effective-target avx512vl } */
 /* { dg-options "-O2 -std=c++14 -mavx512bw -mavx512vl" } */
 
+#define AVX512VL
+#define AVX512BW
+
+#include "avx512f-helper.h"
+
 #include "avx512vl-pr96246-1.C"
 
 #define RUNTIME_TEST(vtype, num)			\
@@ -25,17 +30,21 @@
 }			\
   while (0)
 
-int
-main (void)
+void
+test_256 (void)
 {
-  RUNTIME_TEST (v16qi, 16);
   RUNTIME_TEST (v32qi, 32);
   RUNTIME_TEST (v16hi, 16);
-  RUNTIME_TEST (v4si, 4);
   RUNTIME_TEST (v8si, 8);
-  RUNTIME_TEST (v4sf, 4);
   RUNTIME_TEST (v8sf, 8);
   RUNTIME_TEST (v4di, 4);
   RUNTIME_TEST (v4df, 4);
-  return 0;
+}
+
+void
+test_128 (void)
+{
+  RUNTIME_TEST (v16qi, 16);
+  RUNTIME_TEST (v4si, 4);
+  RUNTIME_TEST (v4sf, 4);
 }
diff --git a/gcc/testsuite/gcc.target/i386/avx512bw-pr95488-1.c b/gcc/testsuite/gcc.target/i386/avx512bw-pr95488-1.c
index 594e511868d..e6e0ac2fd82 100644
--- a/gcc/testsuite/gcc.target/i386/avx512bw-pr95488-1.c
+++ b/gcc/testsuite/gcc.target/i386/avx512bw-pr95488-1.c
@@ -1,6 +1,6 @@
 /* PR target/95488  */
 /* { dg-do compile } */
-/* { dg-options "-O2 -mavx512bw" }  */
+/* { dg-options "-O2 -mavx512bw -mprefer-vector-width=512" }  */
 /* { dg-final { scan-assembler-times "vpmovzxbw" 4 } } */
 /* { dg-final { scan-assembler-times "vpmullw\[^\n\]*zmm" 2 } } */
 /* { dg-final { scan-assembler-times "vpmovwb" 2 } } */
diff --git a/gcc/testsuite/gcc.target/i386/pr92645-4.c b/gcc/testsuite/gcc.target/i386/pr92645-4.c
index 5d459040846..28a3f9a3527 100644
---