Re: [patch, fortran] Fix PR 90813

2019-07-29 Thread Thomas Koenig

Hi Paul,


That is very well done. Thanks for picking it up and running with it.
OK on both the fix and the dumping of the gsymbols.


Committed, thanks.


You might consider back porting both this patch and my fix for the
original bug to 9-branch.


Both patches apply cleanly together on gcc-9 (well, except for the
whitespace fix of a comment that didn't exist yet in gcc-9, so
I think we can ignore than), and pass regression-tests on POWER9.

So, I will commit this tonight.

Regards

Thomas


[PATCH, i386]: Some further movstrict cleanups

2019-07-29 Thread Uros Bizjak
Attached patch allows only SUBREGs as output operands to movstrict
expander and further allows only register operand outputs in various
corresponding splitters. This enforces the restriction, as documented
for STRICT_LOW_PART RTX:

 This expression code is used in only one context: as the
 destination operand of a 'set' expression.  In addition, the
 operand of this expression must be a non-paradoxical 'subreg'
 expression.

Additionally, the patch removes post-reload calls to movstrict
expander and consequently constructs RTXes involving STRICT_LOW_PART
with hard registers "by hand".

2019-07-30  Uroš Bizjak  

* config/i386/i386.md (movstrict): Use register_operand
predicate for operand 0.  Add expander condition.  Assert that
operand 0 is a SUBREG RTX.
(*movstrict_1): Use register_operand predicate for operand 0.
Update operand constraints and insn condition.
(zero_extendsi2_and): Do not call gen_movstrict.
(zero_extendqihi2_and): Do not call gen_movstrictqi.
(*setcc_qi_slp): Use register_operand predicate for operand 0.
Update operand 0 constraints.
(setcc_qi_slp splitters): Use register_operand predicate for operand 0.

Bootstrapped and regression tested on x86_64-linux-gnu {,-m32}.

Committed to mainline SVN.

Uros.
Index: config/i386/i386.md
===
--- config/i386/i386.md (revision 273873)
+++ config/i386/i386.md (working copy)
@@ -2786,26 +2786,20 @@
(set_attr "bdver1_decode" "double")])
 
 (define_expand "movstrict"
-  [(set (strict_low_part (match_operand:SWI12 0 "nonimmediate_operand"))
+  [(set (strict_low_part (match_operand:SWI12 0 "register_operand"))
(match_operand:SWI12 1 "general_operand"))]
-  ""
+  "!TARGET_PARTIAL_REG_STALL || optimize_function_for_size_p (cfun)"
 {
-  if (TARGET_PARTIAL_REG_STALL && optimize_function_for_speed_p (cfun))
+  gcc_assert (SUBREG_P (operands[0]));
+  if (GET_MODE_CLASS (GET_MODE (SUBREG_REG (operands[0]))) != MODE_INT)
 FAIL;
-  if (SUBREG_P (operands[0])
-  && GET_MODE_CLASS (GET_MODE (SUBREG_REG (operands[0]))) != MODE_INT)
-FAIL;
-  /* Don't generate memory->memory moves, go through a register */
-  if (MEM_P (operands[0]) && MEM_P (operands[1]))
-operands[1] = force_reg (mode, operands[1]);
 })
 
 (define_insn "*movstrict_1"
   [(set (strict_low_part
- (match_operand:SWI12 0 "nonimmediate_operand" "+m,"))
-   (match_operand:SWI12 1 "general_operand" "n,m"))]
-  "(!TARGET_PARTIAL_REG_STALL || optimize_function_for_size_p (cfun))
-   && !(MEM_P (operands[0]) && MEM_P (operands[1]))"
+ (match_operand:SWI12 0 "register_operand" "+"))
+   (match_operand:SWI12 1 "general_operand" "mn"))]
+  "!TARGET_PARTIAL_REG_STALL || optimize_function_for_size_p (cfun)"
   "mov{}\t{%1, %0|%0, %1}"
   [(set_attr "type" "imov")
(set_attr "mode" "")])
@@ -4011,8 +4005,10 @@
   ix86_expand_clear (operands[0]);
 
   gcc_assert (!TARGET_PARTIAL_REG_STALL);
-  emit_insn (gen_movstrict
- (gen_lowpart (mode, operands[0]), operands[1]));
+  emit_insn (gen_rtx_SET
+(gen_rtx_STRICT_LOW_PART
+ (VOIDmode, gen_lowpart (mode, operands[0])),
+ operands[1]));
   DONE;
 }
 
@@ -4063,8 +4059,10 @@
   ix86_expand_clear (operands[0]);
 
   gcc_assert (!TARGET_PARTIAL_REG_STALL);
-  emit_insn (gen_movstrictqi
- (gen_lowpart (QImode, operands[0]), operands[1]));
+  emit_insn (gen_rtx_SET
+(gen_rtx_STRICT_LOW_PART
+ (VOIDmode, gen_lowpart (QImode, operands[0])),
+ operands[1]));
   DONE;
 }
 
@@ -11835,7 +11833,7 @@
(set_attr "mode" "QI")])
 
 (define_insn "*setcc_qi_slp"
-  [(set (strict_low_part (match_operand:QI 0 "nonimmediate_operand" "+qm"))
+  [(set (strict_low_part (match_operand:QI 0 "register_operand" "+q"))
(match_operator:QI 1 "ix86_comparison_operator"
  [(reg FLAGS_REG) (const_int 0)]))]
   ""
@@ -11864,7 +11862,7 @@
 })
 
 (define_split
-  [(set (strict_low_part (match_operand:QI 0 "nonimmediate_operand"))
+  [(set (strict_low_part (match_operand:QI 0 "register_operand"))
(ne:QI (match_operator 1 "ix86_comparison_operator"
 [(reg FLAGS_REG) (const_int 0)])
(const_int 0)))]
@@ -11896,7 +11894,7 @@
 })
 
 (define_split
-  [(set (strict_low_part (match_operand:QI 0 "nonimmediate_operand"))
+  [(set (strict_low_part (match_operand:QI 0 "register_operand"))
(eq:QI (match_operator 1 "ix86_comparison_operator"
 [(reg FLAGS_REG) (const_int 0)])
(const_int 0)))]


Re: [PATCH] Fix simd attribute handling on aarch64 (version 2)

2019-07-29 Thread Steve Ellcey
Ping.

Steve Ellcey
sell...@marvell.com

On Mon, 2019-07-22 at 11:25 -0700, Steve Ellcey wrote:
> On Fri, 2019-07-19 at 19:24 +0100, Richard Sandiford wrote:
> > 
> > You can probably also remove:
> > 
> >   tree new_type = build_distinct_type_copy (TREE_TYPE (node-
> > >decl));
> >   ...
> >   TREE_TYPE (node->decl) = new_type;
> > 
> > in simd_clone_adjust_argument_types.
> > 
> > I'm happy doing it this way or doing the copy in the AArch64 hook.
> > It's really Jakub's call.
> 
> You are right, that is no longer needed with the current patch.  I
> removed it and retested with no regressions.  Jakub, do you have
> any preference?  I have attached a new version of the patch to this
> email.
> 
> > I don't think the tests need:
> > 
> > /* { dg-require-effective-target aarch64_variant_pcs } */
> > 
> > since they're only dg-do compile.  Leaving the line out would get
> > more
> > coverage for people using older binutils.
> > 
> > The tests are OK with that change, thanks.
> 
> OK, I made that change to the tests.
> 
> 
> Latest version of the patch:
> 
> 2019-07-22  Steve Ellcey  
> 
>   * omp-simd-clone.c (simd_clone_adjust_return_type): Remove call
> to
>   build_distinct_type_copy.
>   (simd_clone_adjust_argument_types): Ditto.
>   (simd_clone_adjust): Call build_distinct_type_copy here.
>   (expand_simd_clones): Ditto.
> 
> 
> 2019-07-22  Steve Ellcey  
> 
>   * gcc.target/aarch64/simd_pcs_attribute.c: New test.
>   * gcc.target/aarch64/simd_pcs_attribute-2.c: Ditto.
>   * gcc.target/aarch64/simd_pcs_attribute-3.c: Ditto.
> 
> 


Re: [PATCH 4/5, OpenACC] Allow optional arguments to be used in the use_device OpenACC clause

2019-07-29 Thread Kwok Cheung Yeung

On 12/07/2019 12:38 pm, Kwok Cheung Yeung wrote:
This patch fixes a similar situation that occurs with the use_device 
clause, where the lowering would result in a null dereference if applied 
to a non-present optional argument. This patch builds a conditional 
check that skips the dereference if the argument is non-present, and 
ensures that optional arguments are treated like references.


With the updated patch in part 2 of this series, this patch can be 
simplified slightly by reducing the number of calls to 
omp_is_optional_argument.


Kwok

gcc/
* omp-low.c (lower_omp_target): For use_device clauses that take
optional arguments, generate conditional statements to avoid
dereferences of non-present arguments.
---
 gcc/omp-low.c | 75 
+--

 1 file changed, 73 insertions(+), 2 deletions(-)

diff --git a/gcc/omp-low.c b/gcc/omp-low.c
index 1c7b43b..9c40100 100644
--- a/gcc/omp-low.c
+++ b/gcc/omp-low.c
@@ -11636,9 +11636,40 @@ lower_omp_target (gimple_stmt_iterator *gsi_p, 
omp_context *ctx)

  tkind = GOMP_MAP_FIRSTPRIVATE_INT;
type = TREE_TYPE (ovar);
if (TREE_CODE (type) == ARRAY_TYPE)
- var = build_fold_addr_expr (var);
+ {
+   var = build_fold_addr_expr (var);
+   gimplify_assign (x, var, );
+ }
else
  {
+   tree opt_arg_label;
+   bool optional_arg_p
+ = TREE_CODE (TREE_TYPE (ovar)) == POINTER_TYPE
+   && omp_is_optional_argument (ovar);
+
+   if (optional_arg_p)
+ {
+   tree null_label
+ = create_artificial_label (UNKNOWN_LOCATION);
+   tree notnull_label
+ = create_artificial_label (UNKNOWN_LOCATION);
+   opt_arg_label
+ = create_artificial_label (UNKNOWN_LOCATION);
+   tree new_x = copy_node (x);
+   gcond *cond = gimple_build_cond (EQ_EXPR, ovar,
+null_pointer_node,
+null_label,
+notnull_label);
+   gimple_seq_add_stmt (, cond);
+   gimple_seq_add_stmt (,
+gimple_build_label (null_label));
+   gimplify_assign (new_x, null_pointer_node, );
+   gimple_seq_add_stmt (,
+gimple_build_goto (opt_arg_label));
+   gimple_seq_add_stmt (,
+gimple_build_label (notnull_label));
+ }
+
if (omp_is_reference (ovar))
  {
type = TREE_TYPE (type);
@@ -11646,8 +11677,12 @@ lower_omp_target (gimple_stmt_iterator *gsi_p, 
omp_context *ctx)

  var = build_simple_mem_ref (var);
var = fold_convert (TREE_TYPE (x), var);
  }
+
+   gimplify_assign (x, var, );
+   if (optional_arg_p)
+ gimple_seq_add_stmt (,
+  gimple_build_label (opt_arg_label));
  }
-   gimplify_assign (x, var, );
s = size_int (0);
purpose = size_int (map_idx++);
CONSTRUCTOR_APPEND_ELT (vsize, purpose, s);
@@ -11829,11 +11864,43 @@ lower_omp_target (gimple_stmt_iterator *gsi_p, 
omp_context *ctx)

  {
tree type = TREE_TYPE (var);
tree new_var = lookup_decl (var, ctx);
+   tree opt_arg_label = NULL_TREE;
+
if (omp_is_reference (var))
  {
type = TREE_TYPE (type);
if (TREE_CODE (type) != ARRAY_TYPE)
  {
+   if (omp_is_optional_argument (var))
+ {
+   tree null_label
+ = create_artificial_label (UNKNOWN_LOCATION);
+   tree notnull_label
+ = create_artificial_label (UNKNOWN_LOCATION);
+   opt_arg_label
+ = create_artificial_label (UNKNOWN_LOCATION);
+   glabel *null_glabel
+ = gimple_build_label (null_label);
+   glabel *notnull_glabel
+ = gimple_build_label (notnull_label);
+   ggoto *opt_arg_ggoto
+ = gimple_build_goto (opt_arg_label);
+   gcond *cond;
+
+   gimplify_expr (, _body, NULL, is_gimple_val,
+  fb_rvalue);
+   cond = 

Re: [PATCH 2/5, OpenACC] Support Fortran optional arguments in the firstprivate clause

2019-07-29 Thread Kwok Cheung Yeung

On 12/07/2019 12:41 pm, Jakub Jelinek wrote:

+/* Return true if DECL is a Fortran optional argument.  */
+
+bool
+omp_is_optional_argument (tree decl)
+{
+  /* A passed-by-reference Fortran optional argument is similar to
+ a normal argument, but since it can be null the type is a
+ POINTER_TYPE rather than a REFERENCE_TYPE.  */
+  return lang_GNU_Fortran ()
+&& TREE_CODE (decl) == PARM_DECL
+&& DECL_BY_REFERENCE (decl)
+&& TREE_CODE (TREE_TYPE (decl)) == POINTER_TYPE;
+}


This should be done through a langhook.
Are really all PARM_DECLs wtih DECL_BY_REFERENCE and pointer type optional
arguments?  I mean, POINTER_TYPE is used for a lot of cases.



I have now added a language hook for omp_is_optional_argument, implemented only 
for Fortran. In the Fortran FE, I have introduced a language-specific decl flag 
that is set to true during the construction of the PARM_DECLs for a function if 
the symbol for the parameter has attr.optional set. The implementation of 
omp_is_optional_argument checks this flag - this way, there should be no false 
positives.


This implementation of omp_is_optional_argument returns true for both 
by-reference and by-value optional arguments, so an extra check of the type is 
needed to differentiate between the two. I have also made by-reference optional 
arguments return true for gfc_omp_privatize_by_reference (since they should 
behave like regular arguments if present), which simplifies the code a little.


>if (omp_is_reference (new_var)
>&& TREE_CODE (TREE_TYPE (new_var)) != POINTER_TYPE)

As is, this code in lower_omp_target will always reject optional arguments, so 
it must be changed. This was introduced in commit r261025 (SVN trunk) as a fix 
for PR 85879, but it also breaks support for arrays in firstprivate, which was 
probably an unintended side-effect. I have found that allowing POINTER_TYPEs 
that are also DECL_BY_REFERENCE through in this condition allows both optional 
arguments and arrays to work without regressing the tests in r261025.


Okay for trunk?

Kwok

gcc/fortran/
* f95-lang.c (LANG_HOOKS_OMP_IS_OPTIONAL_ARGUMENT): Define to
gfc_omp_is_optional_argument.
* trans-decl.c (create_function_arglist): Set
GFC_DECL_OPTIONAL_ARGUMENT in the generated decl if the parameter is
optional.
* trans-openmp.c (gfc_omp_is_optional_argument): New.
(gfc_omp_privatize_by_reference): Return true if the decl is an
optional pass-by-reference argument.
* trans.h (gfc_omp_is_optional_argument): New declaration.
(lang_decl): Add new optional_arg field.
(GFC_DECL_OPTIONAL_ARGUMENT): New macro.

gcc/
* langhooks-def.h (LANG_HOOKS_OMP_IS_OPTIONAL_ARGUMENT): Default to
false.
(LANG_HOOKS_DECLS): Add LANG_HOOKS_OMP_IS_OPTIONAL_ARGUMENT.
* langhooks.h (omp_is_optional_argument): New hook.
* omp-general.c (omp_is_optional_argument): New.
* omp-general.h (omp_is_optional_argument): New declaration.
* omp-low.c (lower_omp_target): Create temporary for received value
and take the address for new_var if the original variable was a
DECL_BY_REFERENCE.  Use size of referenced object when a
pass-by-reference optional argument used as argument to firstprivate.
---
 gcc/fortran/f95-lang.c |  2 ++
 gcc/fortran/trans-decl.c   |  5 +
 gcc/fortran/trans-openmp.c | 13 +
 gcc/fortran/trans.h|  4 
 gcc/langhooks-def.h|  2 ++
 gcc/langhooks.h|  3 +++
 gcc/omp-general.c  |  8 
 gcc/omp-general.h  |  1 +
 gcc/omp-low.c  |  7 +--
 9 files changed, 43 insertions(+), 2 deletions(-)

diff --git a/gcc/fortran/f95-lang.c b/gcc/fortran/f95-lang.c
index 6b9f490..2467cd9 100644
--- a/gcc/fortran/f95-lang.c
+++ b/gcc/fortran/f95-lang.c
@@ -113,6 +113,7 @@ static const struct attribute_spec gfc_attribute_table[] =
 #undef LANG_HOOKS_TYPE_FOR_MODE
 #undef LANG_HOOKS_TYPE_FOR_SIZE
 #undef LANG_HOOKS_INIT_TS
+#undef LANG_HOOKS_OMP_IS_OPTIONAL_ARGUMENT
 #undef LANG_HOOKS_OMP_PRIVATIZE_BY_REFERENCE
 #undef LANG_HOOKS_OMP_PREDETERMINED_SHARING
 #undef LANG_HOOKS_OMP_REPORT_DECL
@@ -145,6 +146,7 @@ static const struct attribute_spec gfc_attribute_table[] =
 #define LANG_HOOKS_TYPE_FOR_MODE   gfc_type_for_mode
 #define LANG_HOOKS_TYPE_FOR_SIZE   gfc_type_for_size
 #define LANG_HOOKS_INIT_TS gfc_init_ts
+#define LANG_HOOKS_OMP_IS_OPTIONAL_ARGUMENTgfc_omp_is_optional_argument
 #define LANG_HOOKS_OMP_PRIVATIZE_BY_REFERENCE  gfc_omp_privatize_by_reference
 #define LANG_HOOKS_OMP_PREDETERMINED_SHARING   gfc_omp_predetermined_sharing
 #define LANG_HOOKS_OMP_REPORT_DECL gfc_omp_report_decl
diff --git a/gcc/fortran/trans-decl.c b/gcc/fortran/trans-decl.c
index 64ce4bb..14f4d21 100644
--- a/gcc/fortran/trans-decl.c
+++ b/gcc/fortran/trans-decl.c
@@ -2632,6 +2632,11 

Re: [PATCH][MSP430] Reject -m{code,data}-region options unless large memory model is selected

2019-07-29 Thread Jozef Lawrynowicz
On Wed, 24 Jul 2019 09:31:11 -0600
Jeff Law  wrote:

> On 7/23/19 7:22 AM, Jozef Lawrynowicz wrote:
> > 
> > gcc/ChangeLog:
> > 
> > 2019-07-23  Jozef Lawrynowicz  
> > 
> > * config/msp430/msp430.h (DRIVER_SELF_SPECS): Define and emit errors
> > when -m{code,data}-region are used without -mlarge.
> > * config/msp430/msp430.c (msp430_option_override): Error when a
> > non-default code or data region is used without -mlarge.
> > (msp430_section_attr): Emit a warning and do not add upper/lower/either
> > attributes when they are used without -mlarge.
> > 
> > gcc/testsuite/ChangeLog:
> > 
> > 2019-07-23  Jozef Lawrynowicz  
> > 
> > * gcc.target/msp430/pr78818-data-region.c: Add -mlarge to dg-options.
> > * gcc.target/msp430/region-misuse-code.c: New test.
> > * gcc.target/msp430/region-misuse-data.c: Likewise.
> > * gcc.target/msp430/region-misuse-code-data.c: Likewise.
> > * gcc.target/msp430/region-attribute-misuse.c: Likewise.  
> OK.  I didn't check all the formatting directives in the error messages.
>  You might consider building a native compiler and using that to then
> build your msp cross to ensure those formatting directives pass the
> checker.  Reasonable adjustments to the error messages to address any
> such issues found should be considered pre-approved.
> 
> jeff

Thanks for the tip, I checked for any format warnings emitted when building
the cross using native trunk and there weren't any issues with my patch.

There are problems with some of the other strings in the msp430 backend though,
I'll make a note to fix them.

Applied.

Jozef




Re: Ping agian: [PATCH V2] Loop split upon semi-invariant condition (PR tree-optimization/89134)

2019-07-29 Thread Michael Matz
Hello Feng,

first, sorry for the terrible delay in reviewing, but here is one now :)

Generally I do like the idea of the transformation, and the basic building 
blocks seem to be sound.  But I dislike it being a separate pass, so 
please integrate the code you have written into the existing loop split 
pass.  Most building blocks can be used as is, except the main driver.

The existing loop-split code uses loop->aux as binary marker and analyses 
and transforms loops in one go, you're doing it separately.  That 
separation makes sense for you, so the existing code should be changed to 
also do that separately.  Some info for the existing loop-split analysis 
needs to be stored in the info struct then as well, which can be done if 
you add some fields in yours.  Some splitting-out of code from the 
existing main driver is probably needed (basically the parts that 
determine eligibility and which cond statement to split).

The two routines that actually split the loops (those that call 
loop_version) also have an overlap, so maybe something more can be 
commonized between the two (ultimately the way of splitting in both 
variants is different, so somewhere they'll do something else, but still 
some parts are common).

So, with these general remarks, some more concrete ones about the patch:

> diff --git a/gcc/doc/invoke.texi b/gcc/doc/invoke.texi
> index eaef4cd63d2..0427fede3d6 100644
> --- a/gcc/doc/invoke.texi
> +++ b/gcc/doc/invoke.texi
> @@ -11352,6 +11352,14 @@ The maximum number of branches unswitched in a 
> single loop.
>  @item lim-expensive
>  The minimum cost of an expensive expression in the loop invariant motion.
> 
> +@item max-cond-loop-split-insns
> +The maximum number of insns to be increased due to loop split on
> +semi-invariant condition statement.

"to be increased" --> "to be generated" (or "added")

> +@item min-cond-loop-split-prob
> +The minimum threshold for probability of semi-invaraint condition
> +statement to trigger loop split.

typo, semi-invariant
I think somewhere in the docs your definition of semi-invariant needs
to be stated in some form (can be short, doesn't need to reproduce the 
diagram or such), so don't just replicate the short info from the 
params.def file.

> diff --git a/gcc/params.def b/gcc/params.def
> index 0db60951413..5384f7d1c4d 100644
> --- a/gcc/params.def
> +++ b/gcc/params.def
> @@ -386,6 +386,20 @@ DEFPARAM(PARAM_MAX_UNSWITCH_LEVEL,
>  "The maximum number of unswitchings in a single loop.",
>  3, 0, 0)
> 
> +/* The maximum number of increased insns due to loop split on semi-invariant
> +   condition statement.  */
> +DEFPARAM(PARAM_MAX_COND_LOOP_SPLIT_INSNS,
> +   "max-cond-loop-split-insns",
> +   "The maximum number of insns to be increased due to loop split on "
> +   "semi-invariant condition statement.",
> +   100, 0, 0)
> +
> +DEFPARAM(PARAM_MIN_COND_LOOP_SPLIT_PROB,
> +   "min-cond-loop-split-prob",
> +   "The minimum threshold for probability of semi-invaraint condition "
> +   "statement to trigger loop split.",

Same typo: "semi-invariant".

> diff --git a/gcc/tree-ssa-loop-split.c b/gcc/tree-ssa-loop-split.c
> index 999c9a30366..7239d0cfb00 100644
> --- a/gcc/tree-ssa-loop-split.c
> +++ b/gcc/tree-ssa-loop-split.c
> -/* This file implements loop splitting, i.e. transformation of loops like
> +/* This file implements two kind of loop splitting.

kind_s_, plural

> +   One transformation of loops like:
> 
> for (i = 0; i < 100; i++)
>   {
> @@ -670,6 +674,782 @@ tree_ssa_split_loops (void)
>return 0;
>  }
> 
> +
> +/* Another transformation of loops like:
> +
> +   for (i = INIT (); CHECK (i); i = NEXT ())
> + {
> +   if (expr (a_1, a_2, ..., a_n))
> + a_j = ...;  // change at least one a_j
> +   else
> + S;  // not change any a_j
> + }

You should mention that 'expr' needs to be pure, i.e. once it
becomes false and the inputs don't change, that it remains false.

> +
> +   into:
> +
> +   for (i = INIT (); CHECK (i); i = NEXT ())
> + {
> +   if (expr (a_1, a_2, ..., a_n))
> + a_j = ...;
> +   else
> + {
> +   S;
> +   i = NEXT ();
> +   break;
> + }
> + }
> +
> +   for (; CHECK (i); i = NEXT ())
> + {
> +   S;
> + }
> +
> +   */
> +
...
> +/* Determine when conditional statement never transfers execution to one of 
> its
> +   branch, whether we can remove the branch's leading basic block (BRANCH_BB)
> +   and those basic blocks dominated by BRANCH_BB.  */
> +
> +static bool
> +branch_removable_p (basic_block branch_bb)
> +{
> +  if (single_pred_p (branch_bb))
> +return true;
> +
> +  edge e;
> +  edge_iterator ei;
> +
> +  FOR_EACH_EDGE (e, ei, branch_bb->preds)
> +{
> +  if (dominated_by_p (CDI_DOMINATORS, e->src, branch_bb))
> +   continue;
> +
> +  if (dominated_by_p (CDI_DOMINATORS, branch_bb, e->src))
> +   continue;

My gut feeling is 

Re: [PATCH], PowerPC #11, Add DS offset mode to rs6000.c's reg_addr

2019-07-29 Thread Segher Boessenkool
Please send patch series as patch series.  It is almost impossible to
keep track of a patch series as unrelated emails, even worse than the
other way around.

Please keep subjects to about 50 chars max., distinguishable in the
first 30 or so chars, so that patches can actually be found in mail
clients and in git --oneline logs and the like.

Please keep email bodies to <72 chars wide, just like commit messages
and changelogs are (excluding the leading tab for the latter).  You
cannot use the email message (or parts of it) as commit message
otherwise, and replying to your emails is harder than needed already.

Don't write irrelevant history of other patches in a patch message.

Don't write irrelevant history of other patches in a patch message.

In the subject write the core thing the patch does, and in the first
paragraph expand on that a bit.  Importantly, write *why* this is a
good thing to do.

If you make a series, the patches should be related.  Early in the
series patches are typically clean ups (that can go in separately, but
you need them for later patches), then you often have some stuff
making some infrastructure or utility functions or the like for later
patches, and then you get the meat of the matter, maybe followed by
some extra cleanups that now became possible (or that you just noticed
because of it, even).

Infrastructure additions or code rearrangement or similar stuff that
does not actually have any direct function, should always be followed
by other patches that make use of it.

When I get a patch, I read the message text.  If then I still have any
clue what the patch is about, I look if the patch actually does what it
was said to do.  I might refer to later patches in the series for that,
and also for the last step, which is to see if what the patch series
does is such a good idea at all.  If I cannot do these steps, it takes
hours more than needed to review every single patch.


Segher


[PATCH][ARM] Switch to default sched pressure algorithm

2019-07-29 Thread Wilco Dijkstra
Currently the Arm backend selects the alternative sched pressure algorithm.
The issue is that this doesn't take register pressure into account, and so
it causes significant additional spilling on Arm where there are only 14
allocatable registers.  SPEC2006 shows significant codesize reduction
with the default pressure algorithm, so switch back to that.  PR77308 shows
~800 fewer instructions.

SPECINT2006 is ~0.6% faster on Cortex-A57 together with the other DImode
patches. Overall SPEC codesize is 1.1% smaller.

Bootstrap & regress OK on arm-none-linux-gnueabihf --with-cpu=cortex-a57

ChangeLog:
2019-07-29  Wilco Dijkstra  

* config/arm/arm.c (arm_option_override): Don't override sched
pressure algorithm.

--

diff --git a/gcc/config/arm/arm.c b/gcc/config/arm/arm.c
index 
81286cadf32f908e045d704128c5e06842e0cc92..628cf02f23fb29392a63d87f561c3ee2fb73a515
 100644
--- a/gcc/config/arm/arm.c
+++ b/gcc/config/arm/arm.c
@@ -3575,11 +3575,6 @@ arm_option_override (void)
   if (use_neon_for_64bits == 1)
  prefer_neon_for_64bits = true;
 
-  /* Use the alternative scheduling-pressure algorithm by default.  */
-  maybe_set_param_value (PARAM_SCHED_PRESSURE_ALGORITHM, SCHED_PRESSURE_MODEL,
-global_options.x_param_values,
-global_options_set.x_param_values);
-
   /* Look through ready list and all of queue for instructions
  relevant for L2 auto-prefetcher.  */
   int param_sched_autopref_queue_depth;



Re: [PATCH] Builtin function roundeven folding implementation

2019-07-29 Thread Martin Jambor
Hi Joseph,

can you please have look at this patch from Tejas, whether it is perhaps
ready to be committed to trunk or what things still need to be
addressed?

Thanks a lot,

Martin



On Fri, Jun 28 2019, Tejas Joshi wrote:
> Hi.
> This patch includes implementation of new function roundeven along
> with two utility functions. The patch bootstraps on x86_64-linux-gnu
> and passes regression tests.
>
> Thanks,
> Tejas
>
> gcc/ChangeLog:
>
> 2019-06-12  Tejas Joshi  
>
> * builtins.c (mathfn_built_in_2): Added CASE_MATHFN for ROUNDEVEN.
> * builtins.def: Added function definitions for roundeven function
> variants.
> * fold-const-call.c (fold_const_call_ss): Added case for function
> call and fold_const_conversion call for roundeven function.
> * fold-const.c (negate_mathfn_p): Added case for roundeven function.
> (tree_call_nonnegative_warnv_p): Added case for roundeven function.
> (integer_valued_real_call_p): Added case for roundeven function.
> * real.c (is_even): New function. Returns true if real number is
> even, otherwise returns false.
> (is_halfway_below): New function. Returns true if real number is
> halfway between two integers, else return false.
> (real_roundeven): New function. Round real number to nearest
> integer, rounding halfway cases towards even.
> * real.h (real_value): Added descriptive comments.
> Added function declaration for roundeven function.
>
> gcc/testsuite/ChangeLog:
>
> 2019-06-12  Tejas Joshi  
>
> * gcc.dg/torture/builtin-round-roundeven.c: New test.
> * gcc.dg/torture/builtin-round-roundevenf128.c: New test.
> diff --git a/gcc/builtins.c b/gcc/builtins.c
> index 3463ffb1539..85a945877a4 100644
> --- a/gcc/builtins.c
> +++ b/gcc/builtins.c
> @@ -2085,6 +2085,7 @@ mathfn_built_in_2 (tree type, combined_fn fn)
>  CASE_MATHFN (REMQUO)
>  CASE_MATHFN_FLOATN (RINT)
>  CASE_MATHFN_FLOATN (ROUND)
> +CASE_MATHFN (ROUNDEVEN)
>  CASE_MATHFN (SCALB)
>  CASE_MATHFN (SCALBLN)
>  CASE_MATHFN (SCALBN)
> diff --git a/gcc/builtins.def b/gcc/builtins.def
> index 6d41bdb4f44..8bb7027aac7 100644
> --- a/gcc/builtins.def
> +++ b/gcc/builtins.def
> @@ -542,12 +542,18 @@ DEF_C99_BUILTIN(BUILT_IN_RINTL, "rintl", 
> BT_FN_LONGDOUBLE_LONGDOUBLE, AT
>  #define RINT_TYPE(F) BT_FN_##F##_##F
>  DEF_EXT_LIB_FLOATN_NX_BUILTINS (BUILT_IN_RINT, "rint", RINT_TYPE, 
> ATTR_CONST_NOTHROW_LEAF_LIST)
>  #undef RINT_TYPE
> +DEF_EXT_LIB_BUILTIN(BUILT_IN_ROUNDEVEN, "roundeven", 
> BT_FN_DOUBLE_DOUBLE, ATTR_CONST_NOTHROW_LEAF_LIST)
> +DEF_EXT_LIB_BUILTIN(BUILT_IN_ROUNDEVENF, "roundevenf", 
> BT_FN_FLOAT_FLOAT, ATTR_CONST_NOTHROW_LEAF_LIST)
> +DEF_EXT_LIB_BUILTIN(BUILT_IN_ROUNDEVENL, "roundevenl", 
> BT_FN_LONGDOUBLE_LONGDOUBLE, ATTR_CONST_NOTHROW_LEAF_LIST)
>  DEF_C99_BUILTIN(BUILT_IN_ROUND, "round", BT_FN_DOUBLE_DOUBLE, 
> ATTR_CONST_NOTHROW_LEAF_LIST)
>  DEF_C99_BUILTIN(BUILT_IN_ROUNDF, "roundf", BT_FN_FLOAT_FLOAT, 
> ATTR_CONST_NOTHROW_LEAF_LIST)
>  DEF_C99_BUILTIN(BUILT_IN_ROUNDL, "roundl", 
> BT_FN_LONGDOUBLE_LONGDOUBLE, ATTR_CONST_NOTHROW_LEAF_LIST)
>  #define ROUND_TYPE(F) BT_FN_##F##_##F
>  DEF_EXT_LIB_FLOATN_NX_BUILTINS (BUILT_IN_ROUND, "round", ROUND_TYPE, 
> ATTR_CONST_NOTHROW_LEAF_LIST)
>  #undef ROUND_TYPE
> +#define ROUNDEVEN_TYPE(F) BT_FN_##F##_##F
> +DEF_EXT_LIB_FLOATN_NX_BUILTINS (BUILT_IN_ROUNDEVEN, "roundeven", 
> ROUNDEVEN_TYPE, ATTR_CONST_NOTHROW_LEAF_LIST)
> +#undef ROUNDEVEN_TYPE
>  DEF_EXT_LIB_BUILTIN(BUILT_IN_SCALB, "scalb", BT_FN_DOUBLE_DOUBLE_DOUBLE, 
> ATTR_MATHFN_FPROUNDING_ERRNO)
>  DEF_EXT_LIB_BUILTIN(BUILT_IN_SCALBF, "scalbf", BT_FN_FLOAT_FLOAT_FLOAT, 
> ATTR_MATHFN_FPROUNDING_ERRNO)
>  DEF_EXT_LIB_BUILTIN(BUILT_IN_SCALBL, "scalbl", 
> BT_FN_LONGDOUBLE_LONGDOUBLE_LONGDOUBLE, ATTR_MATHFN_FPROUNDING_ERRNO)
> diff --git a/gcc/fold-const-call.c b/gcc/fold-const-call.c
> index 702c8b4057a..d9b546e6803 100644
> --- a/gcc/fold-const-call.c
> +++ b/gcc/fold-const-call.c
> @@ -836,6 +836,15 @@ fold_const_call_ss (real_value *result, combined_fn fn,
>   }
>return false;
>  
> +CASE_CFN_ROUNDEVEN:
> +CASE_CFN_ROUNDEVEN_FN:
> +  if (!REAL_VALUE_ISNAN (*arg) || !flag_errno_math)
> +  {
> +real_roundeven (result, format, arg);
> +return true;
> +  }
> +  return false;
> +
>  CASE_CFN_LOGB:
>return fold_const_logb (result, arg, format);
>  
> @@ -898,6 +907,10 @@ fold_const_call_ss (wide_int *result, combined_fn fn,
>return fold_const_conversion (result, real_round, arg,
>   precision, format);
>  
> +CASE_CFN_ROUNDEVEN:
> +CASE_CFN_ROUNDEVEN_FN:
> +  return fold_const_conversion (result, real_roundeven, arg, precision, 
> format);
> +
>  CASE_CFN_IRINT:
>  CASE_CFN_LRINT:
>  CASE_CFN_LLRINT:
> diff --git a/gcc/fold-const.c b/gcc/fold-const.c
> index 0ca472d422f..07d82a17e25 100644
> --- a/gcc/fold-const.c
> +++ b/gcc/fold-const.c

Re: [PATCH] PR91195: fix -Wmaybe-uninitialized warning for conditional store optimization

2019-07-29 Thread Jakub Jelinek
On Wed, Jul 24, 2019 at 12:07:36PM -0600, Martin Sebor wrote:
> > > There are a number of existing instances of setting TREE_NO_WARNING
> > > to suppress -Wuninitialized, and some are the cause of known problems.
> > > Bugs 51545, 58950, 74762, 74765 or 89697 are examples.  They all boil
> > > down to the fact that there's just one bit for all warnings.  Jakub
> > > mentioned adding a hash-map for this.  That seems like a simple and
> > > good solution.
> > I'm not sure how that really helps here.  We marking the MEM on the LHS
> > and that's not a shared object.  I don't see how it's going to be
> > significantly different using a hash map vs the bit in this circumstance.
> 
> I don't know what Jakub had in mind but the mapping I envision is
> one like hash_map that would make it possible to set
> a bit for each distinct warning for every tree node.  It would let
> us set a bit for -Wuninitialized while leaving the bit for
> -Warray-bounds (and all other warnings) clear.

I had in mind something like a hash_map / hash_map (or just make it a ) where possibly the
bitmaps would be shared, so have a hash table in which one would look up 
existing
bitmaps containing particular sets of warnings and if not create a new one (and
where the bitmaps would be const after creation).
The TREE_NO_WARNING or gimple_no_warning bits would be a flag that one
should look up the hash_map if needed, those bits clear would imply empty
bitmap.  Routines that copy trees or gimple stmts, like copy_node or
gimple_copy would take care of adding the new tree/gimple * into the
hash_map if needed too.
And, because we have a lot of warnings that are only emitted in the FEs, or
say before IPA and not afterwards, we could also have spots where we'd
replace the bitmaps with ones that don't have any of the no longer
interesting warning bits.
Say during gimplification we could drop TREE_NO_WARNING bit or not set
gimple_no_warning if the bitmap only contains FE warning bits, or (perhaps
more questionable whether worth it) replace with a const_bitmap that doesn't
have those).

Jakub


Re: [PATCH] PR91195: fix -Wmaybe-uninitialized warning for conditional store optimization

2019-07-29 Thread Jakub Jelinek
On Tue, Jul 23, 2019 at 04:26:24AM +, JiangNing OS wrote:
> --- a/gcc/ChangeLog
> +++ b/gcc/ChangeLog
> @@ -1,3 +1,9 @@
> +2019-07-22  Jiangning Liu  
> +
> + PR middle-end/91195
> + * tree-ssa-phiopt.c (cond_store_replacement): Work around
> + -Wmaybe-uninitialized warning.
> +
>  2019-07-22  Stafford Horne  
>  
>   * config/or1k/or1k.c (or1k_expand_compare): Check for int before
> diff --git a/gcc/tree-ssa-phiopt.c b/gcc/tree-ssa-phiopt.c
> index b64bde695f4..7a86007d087 100644
> --- a/gcc/tree-ssa-phiopt.c
> +++ b/gcc/tree-ssa-phiopt.c
> @@ -2240,6 +2240,14 @@ cond_store_replacement (basic_block middle_bb, 
> basic_block join_bb,
>tree base = get_base_address (lhs);
>if (!auto_var_p (base) || TREE_ADDRESSABLE (base))
>   return false;
> +
> +  /* The transformation below will inherently introduce a memory load,
> +  for which LHS may not be initialized yet if it is not in NOTRAP,
> +  so a -Wmaybe-uninitialized warning message could be triggered.
> +  Since it's a bit hard to have a very accurate uninitialization
> +  analysis to memory reference, we disable the warning here to avoid
> +  confusion.  */
> +  TREE_NO_WARNING (lhs) = 1;

I don't like this, but not for the reasons Martin stated, we use
TREE_NO_WARNING not just when we've emitted warnings, but in many places
when we've done something that might trigger false positives.
Yes, it would be nice to do it more selectively.

The problem I see with the above though is that lhs might very well be
a decl, and setting TREE_NO_WARNING on it then doesn't affect only the
hoisted load, but also all other code that refers to the decl.

If the TREE_NO_WARNING bit is set on something that isn't shareable, that is
fine with me, like a MEM_REF, TARGET_MEM_REF or handled component.  If lhs
is a decl, can we force a MEM_REF around it (and won't we fold it back to
the decl?)?  Or perhaps better, can we gimple_set_no_warning on the load
stmt instead?

Jakub


[PATCH][arm][committed] Make ACLE builtins use arm_* namespace for expanders

2019-07-29 Thread Kyrill Tkachov

Hi all,

The builtins from  use fairly general expander names such as 
"crc", "mcr" etc.

These run the risk of being reserved by the midend in the future.
Let's namespace them to arm_* as is convention.

Bootstrapped and tested on arm-none-linux-gnueabihf.

Committing to trunk.
Thanks,
Kyrill

2019-07-29  Kyrylo Tkachov  

    * config/arm/arm-builtins.c (acle_builtin_data): Expand VAR1 to
    CODE_FOR_arm_##.
    * config/arm/arm.md (): Rename to...
    (arm_): ... This.
    (): Rename to...
    (arm_): ... This.
    (): Rename to...
    (arm_): ... This.
    (): Rename to...
    (arm_): ... This.
    (): Rename to...
    (arm_): ... This.
    (): Rename to...
    (arm_): ... This.
    (): Rename to...
    (arm_): ... This.
    (): Rename to...
    (arm_): ... This.

diff --git a/gcc/config/arm/arm-builtins.c b/gcc/config/arm/arm-builtins.c
index 8f2c93743792bbf8ee9a421c408501b3a3051b2b..07da55e10a36fd50adbac9b69eb9c77c6513465f 100644
--- a/gcc/config/arm/arm-builtins.c
+++ b/gcc/config/arm/arm-builtins.c
@@ -376,7 +376,7 @@ static arm_builtin_datum neon_builtin_data[] =
 #undef CF
 #undef VAR1
 #define VAR1(T, N, A) \
-  {#N, UP (A), CODE_FOR_##N, 0, T##_QUALIFIERS},
+  {#N, UP (A), CODE_FOR_arm_##N, 0, T##_QUALIFIERS},
 
 static arm_builtin_datum acle_builtin_data[] =
 {
diff --git a/gcc/config/arm/arm.md b/gcc/config/arm/arm.md
index f11745ba855957da4acec197f2df9c931708062a..0cc1f6e46c5ff703391403f8919279d6f71143ba 100644
--- a/gcc/config/arm/arm.md
+++ b/gcc/config/arm/arm.md
@@ -12081,7 +12081,7 @@
(set_attr "predicable" "yes")])
 
 ;; ARMv8 CRC32 instructions.
-(define_insn ""
+(define_insn "arm_"
   [(set (match_operand:SI 0 "s_register_operand" "=r")
 (unspec:SI [(match_operand:SI 1 "s_register_operand" "r")
 (match_operand: 2 "s_register_operand" "r")]
@@ -12197,7 +12197,7 @@
   DONE;
 })
 
-(define_insn ""
+(define_insn "arm_"
   [(unspec_volatile [(match_operand:SI 0 "immediate_operand" "n")
 		 (match_operand:SI 1 "immediate_operand" "n")
 		 (match_operand:SI 2 "immediate_operand" "n")
@@ -12243,19 +12243,19 @@
   [(set_attr "length" "4")
(set_attr "type" "coproc")])
 
-(define_expand ""
+(define_expand "arm_"
   [(unspec_volatile [(match_operand:SI 0 "immediate_operand")
 		 (match_operand:SI 1 "immediate_operand")
 		 (mem:SI (match_operand:SI 2 "s_register_operand"))] LDCI)]
   "arm_coproc_builtin_available (VUNSPEC_)")
 
-(define_expand ""
+(define_expand "arm_"
   [(unspec_volatile [(match_operand:SI 0 "immediate_operand")
 		 (match_operand:SI 1 "immediate_operand")
 		 (mem:SI (match_operand:SI 2 "s_register_operand"))] STCI)]
   "arm_coproc_builtin_available (VUNSPEC_)")
 
-(define_insn ""
+(define_insn "arm_"
   [(unspec_volatile [(match_operand:SI 0 "immediate_operand" "n")
 		 (match_operand:SI 1 "immediate_operand" "n")
 		 (match_operand:SI 2 "s_register_operand" "r")
@@ -12275,7 +12275,7 @@
   [(set_attr "length" "4")
(set_attr "type" "coproc")])
 
-(define_insn ""
+(define_insn "arm_"
   [(set (match_operand:SI 0 "s_register_operand" "=r")
 	(unspec_volatile:SI [(match_operand:SI 1 "immediate_operand" "n")
 			  (match_operand:SI 2 "immediate_operand" "n")
@@ -12294,7 +12294,7 @@
   [(set_attr "length" "4")
(set_attr "type" "coproc")])
 
-(define_insn ""
+(define_insn "arm_"
   [(unspec_volatile [(match_operand:SI 0 "immediate_operand" "n")
 		 (match_operand:SI 1 "immediate_operand" "n")
 		 (match_operand:DI 2 "s_register_operand" "r")
@@ -12310,7 +12310,7 @@
   [(set_attr "length" "4")
(set_attr "type" "coproc")])
 
-(define_insn ""
+(define_insn "arm_"
   [(set (match_operand:DI 0 "s_register_operand" "=r")
 	(unspec_volatile:DI [(match_operand:SI 1 "immediate_operand" "n")
 			  (match_operand:SI 2 "immediate_operand" "n")


Re: [Mingw-w64-public] Fwd: [patch] Reimplement GNU threads library on native Windows

2019-07-29 Thread Liu Hao
在 2019/7/29 22:43, JonY 写道:
> 
> Any updates?
> 

No. I am still under the impression that using thread handles as
`std::thread::id`s:

0) complexifies comparison of thread IDs without obvious benefits, and
1) does not work reliably because handles can be duplicated, and
2) makes `__gthread_self()` return invalid handles in detached threads.


-- 
Best regards,
LH_Mouse



signature.asc
Description: OpenPGP digital signature


Re: [Mingw-w64-public] Fwd: [patch] Reimplement GNU threads library on native Windows

2019-07-29 Thread JonY
On 7/3/19 12:55 PM, Liu Hao wrote:
> 在 2019/7/2 下午8:27, Jonathan Wakely 写道:
>>
>> What do you mean by "unclosed thread"? If I read it correctly, the MSDN
>> page
>> refers to closing a handle (which makes sense), not closing a thread.
>>
> 
> Yes, it meant a thread which has terminated but not deleted due to some
> handles left open.
> 
> 
>>> This could also mean that there is no effect way to denote a thread
>>> uniquely. As a consequence libstdc++ may have to its own bookkeeping
>>> mechanism.
>>
>> As I said in my last mail, libstdc++ does not need a way to denote a
>> thread uniquely.
>>
> 
> At my last glance at the `__gthread_` interfaces, libstdc++ requires
> thread IDs to be LessThanComparable, which would require retrieval of
> thread IDs by handle, as in `__gthread_equal()`.
> 
> More than that, if my previous vision was correct (a terminated thread
> has no ID associated) then `GetThreadId()` on a thread that has
> terminated would not return a valid thread ID. Fortunately, this seems
> not the case:
> 
> ```c
> #include 
> #include 
> 
> DWORD __stdcall ThreadProc(void* pParam)
>   {
> printf("thread %lu running\n", GetCurrentThreadId());
> return 0;
>   }
> 
> int main(void)
>   {
> HANDLE hThread = CreateThread(0, 0, ThreadProc, 0, CREATE_SUSPENDED, 0);
> printf("thread %lu created\n", GetThreadId(hThread));
> 
> ResumeThread(hThread);
> WaitForSingleObject(hThread, INFINITE);
> printf("thread %lu terminated\n", GetThreadId(hThread));
> 
> CloseHandle(hThread);
> // `hThread` is now invalid; DO NOT PLAY WITH THIS AT HOME!
> printf("thread %lu closed\n", GetThreadId(hThread));
>   }
> ```
> 
> This program outputs
> 
> ```text
> E:\Desktop>gcc test.c -Wall -Wextra -Wpedantic && a.exe
> test.c: In function 'ThreadProc':
> test.c:4:34: warning: unused parameter 'pParam' [-Wunused-parameter]
> 4 | DWORD __stdcall ThreadProc(void* pParam)
>   |~~^~
> thread 9172 created
> thread 9172 running
> thread 9172 terminated
> thread 0 closed
> 
> E:\Desktop>
> ```
> 
> Despite Microsoft's documentation, the identifier of a thread seems
> uncollected as long as there are still handles to the thread. So it
> might be safe to assume that the identifier of an `std::thread` *cannot*
> be reused before it is `join()`'d or `detach()`'d which closes the
> handle stored in the `std::thread` object.
> 
> 

Any updates?



signature.asc
Description: OpenPGP digital signature


[PATCH] Improve PTA for PR91257, new bitmap_ior_into_and_free

2019-07-29 Thread Richard Biener


The following patch addresses appearant quadraticness in PTAs
SCC merging algorithm which does (simplified)

 for (node N in SCC)
   bitmap_ior_into (pred(leader), pred(N));

with the linked list implementation this leads to increasingly large
amount of walking for pred(leader).

The patch changes this to

 while (split /= 2)
   for (node N in lower-half-of-SCC)
 bitmap_ior_into_and_free (pred (N), pred (N in upper half of SCC));

so in each outer iteration reducing the number of bitmaps by a factor
of two and thus hopefully (and in practice for the testcase) reducing
the intermediate bitmap sizes we work on.

This improves

 tree PTA   :   8.38 ( 23%)   0.22 ( 33%)   8.62 ( 
24%)7679 kB (  1%)

to

 tree PTA   :   5.30 ( 16%)   0.22 ( 32%)   5.54 ( 
16%)   28081 kB (  4%)

for the reduced testcase.

I somehow didn't manage a 1:1 conversion so I need to re-check details
here still I'd like to hear comments about the new bitmap API
which is a source-destructive variant for bitmap_ior_into, instead of
copying elements not in A moving them from B and in the end releasing
the rest.  Note using this API with keeping the merging as-is doesn't
help PTA time.

Richard.

2019-07-29  Richard Biener  

PR tree-optimization/91257
* bitmap.h (bitmap_ior_into_and_free): Declare.
* bitmap.c (bitmap_list_unlink_element): Add defaulted param
whether to add the unliked element to the freelist.
(bitmap_list_insert_element_after): Add defaulted param for
an already allocated element.
(bitmap_ior_into_and_free): New function.
* tree-ssa-structalias.c (condense_visit): Reduce the
predecessor and edge bitmaps of the SCC members in a
logarithmic fashion rather than all to one.

Index: gcc/bitmap.c
===
--- gcc/bitmap.c(revision 273795)
+++ gcc/bitmap.c(working copy)
@@ -252,7 +252,8 @@ bitmap_list_link_element (bitmap head, b
and return it to the freelist.  */
 
 static inline void
-bitmap_list_unlink_element (bitmap head, bitmap_element *element)
+bitmap_list_unlink_element (bitmap head, bitmap_element *element,
+   bool to_freelist = true)
 {
   bitmap_element *next = element->next;
   bitmap_element *prev = element->prev;
@@ -279,18 +280,21 @@ bitmap_list_unlink_element (bitmap head,
head->indx = 0;
 }
 
-  bitmap_elem_to_freelist (head, element);
+  if (to_freelist)
+bitmap_elem_to_freelist (head, element);
 }
 
-/* Insert a new uninitialized element into bitmap HEAD after element
-   ELT.  If ELT is NULL, insert the element at the start.  Return the
-   new element.  */
+/* Insert a new uninitialized element (or NODE if not NULL) into bitmap
+   HEAD after element ELT.  If ELT is NULL, insert the element at the start.
+   Return the new element.  */
 
 static bitmap_element *
 bitmap_list_insert_element_after (bitmap head,
- bitmap_element *elt, unsigned int indx)
+ bitmap_element *elt, unsigned int indx,
+ bitmap_element *node = NULL)
 {
-  bitmap_element *node = bitmap_element_allocate (head);
+  if (!node)
+node = bitmap_element_allocate (head);
   node->indx = indx;
 
   gcc_checking_assert (!head->tree_form);
@@ -2009,6 +2013,56 @@ bitmap_ior_into (bitmap a, const_bitmap
   return changed;
 }
 
+/* A |= B.  Return true if A changes.  Free B (re-using its storage
+   for the result).  */
+
+bool
+bitmap_ior_into_and_free (bitmap a, bitmap *b_)
+{
+  bitmap b = *b_;
+  bitmap_element *a_elt = a->first;
+  bitmap_element *b_elt = b->first;
+  bitmap_element *a_prev = NULL;
+  bitmap_element **a_prev_pnext = >first;
+  bool changed = false;
+
+  gcc_checking_assert (!a->tree_form && !b->tree_form);
+  gcc_assert (a->obstack == b->obstack);
+  if (a == b)
+return false;
+
+  while (b_elt)
+{
+  /* If A lags behind B, just advance it.  */
+  if (!a_elt || a_elt->indx == b_elt->indx)
+   {
+ changed = bitmap_elt_ior (a, a_elt, a_prev, a_elt, b_elt, changed);
+ b_elt = b_elt->next;
+   }
+  else if (a_elt->indx > b_elt->indx)
+   {
+ bitmap_element *b_elt_next = b_elt->next;
+ bitmap_list_unlink_element (b, b_elt, false);
+ bitmap_list_insert_element_after (a, a_prev, b_elt->indx, b_elt);
+ b_elt = b_elt_next;
+   }
+
+  a_prev = *a_prev_pnext;
+  a_prev_pnext = _prev->next;
+  a_elt = *a_prev_pnext;
+}
+
+  gcc_checking_assert (!a->current == !a->first);
+  if (a->current)
+a->indx = a->current->indx;
+
+  if (b->obstack)
+BITMAP_FREE (*b_);
+  else
+bitmap_clear (b);
+  return changed;
+}
+
 /* DST = A ^ B  */
 
 void
Index: gcc/bitmap.h
===
--- gcc/bitmap.h(revision 273795)
+++ 

Re: [PATCH] Remove also 2nd argument for unused delete operator (PR tree-optimization/91270).

2019-07-29 Thread Richard Biener
On Mon, Jul 29, 2019 at 12:37 PM Martin Liška  wrote:
>
> On 7/29/19 11:59 AM, Richard Biener wrote:
> > On Sun, Jul 28, 2019 at 4:57 PM Martin Liška  wrote:
> >>
> >> Hi.
> >>
> >> And there's one more patch that deals with delete operator
> >> which has a second argument (size). That definition GIMPLE
> >> statement of the size must be also properly marked.
> >>
> >> Patch can bootstrap on x86_64-linux-gnu and survives regression tests.
> >>
> >> Ready to be installed?
> >
> > This isn't a proper fix.  You can't mark a random operand as necessary
> > during elimination - this only works in very constraint cases.  Here
> > it will break down if the stmt you just marked depends on another stmt
> > only used by the stmt you just marked (and thus also not necessary).
>
> Ah, I see.
>
> >
> > The fix is to propagate_necessity where you either have to excempt
> > the two-operator delete from handling
>
> We want to process also these delete operators.
>
> > or to mark its second operand
> > as necessary despite eventually deleting the call.
>
> Really marking that as necessary? Why?
>
> > You can avoid
> > this in case the allocation function used the exact same size argument.
>
> That's not the case as the operator new happens in a loop:
>
>:
>   # iftmp.0_15 = PHI 
>   _23 = operator new [] (iftmp.0_15);
>   _24 = _23;
>   MEM[(sizetype *)_24] = _19;
>   _26 = _24 + 8;
>   _27 = _26;
>   _2 = _19 + 18446744073709551615;
>   _28 = (long int) _2;
>
>:
>   # _12 = PHI <_27(5), _29(7)>
>   # _13 = PHI <_28(5), _30(7)>
>   if (_13 < 0)
> goto ; [INV]
>   else
> goto ; [INV]
>
> Btw. is the hunk moved to propagate_necessity still wrong:
>
> diff --git a/gcc/tree-ssa-dce.c b/gcc/tree-ssa-dce.c
> index cf507fa0453..1c890889932 100644
> --- a/gcc/tree-ssa-dce.c
> +++ b/gcc/tree-ssa-dce.c
> @@ -803,7 +803,23 @@ propagate_necessity (bool aggressive)
>|| DECL_FUNCTION_CODE (def_callee) == 
> BUILT_IN_MALLOC
>|| DECL_FUNCTION_CODE (def_callee) == 
> BUILT_IN_CALLOC))
>   || DECL_IS_REPLACEABLE_OPERATOR_NEW_P (def_callee)))
> -   continue;
> +   {
> + /* Some delete operators have 2 arguments, where
> +the second argument is size of the deallocated memory.  
> */
> + if (gimple_call_num_args (stmt) == 2)

Please make sure this only matches operator delete (just make the check
we already do above also set a bool flag).

> +   {
> + tree ptr = gimple_call_arg (stmt, 1);
> + if (TREE_CODE (ptr) == SSA_NAME)

you can use mark_operand_necessary (ptr) here (but please name 'ptr'
differently ;))

And note you can elide this in case the size arguments to new and delete
match.  I notice the above also matches

   ptr = malloc (4);
   delete ptr;

not sure if intended (or harmful).

Richard.

> +   {
> + gimple *def_stmt = SSA_NAME_DEF_STMT (ptr);
> + if (!gimple_nop_p (def_stmt)
> + && !gimple_plf (def_stmt, STMT_NECESSARY))
> +   gimple_set_plf (stmt, STMT_NECESSARY, false);
> +   }
> +   }
> +
> + continue;
> +   }
> }
>
>   FOR_EACH_SSA_TREE_OPERAND (use, stmt, iter, SSA_OP_USE)
>
> Thanks,
> Martin
>
> >
> > Richard.
> >
> >> Thanks,
> >> Martin
>


[PATCH] PR libstdc++/51333 Define recursive_init_error constructor non-inline

2019-07-29 Thread Jonathan Wakely

The recursive_init_error class is defined in a header, with an inline
constructor, but the definition of the vtable and destructor are not
exported from the shared library. With -fkeep-inline-functions the
constructor gets emitted in user code, and requires the (non-exported)
vtable. This fails to link.

As far as I can tell, the recursive_init_error class definition was
moved into  so it could be documented with Doxygen, not for
any technical reason. But now it's there (and documented), somebody
could be relying on it, by catching that type and possibly performing
derived-to-base conversions to the std::exception base class. So the
conservative fix is to leave the class definition in the header but make
the constructor non-inline. This still allows the type to be caught and
still defines its base class. User code can no longer construct objects
of that type, but that's not something we need to support.

PR libstdc++/51333
* libsupc++/cxxabi.h (__gnu_cxx::recursive_init_error): Do not define
constructor inline.
* libsupc++/guard_error.cc (__gnu_cxx::recursive_init_error): Define
constructor.
* testsuite/18_support/51333.cc: New test.

Tested x86_64-linux, committed to trunk.

commit 853fb49def8642dd687cd053dda5b8167082934f
Author: redi 
Date:   Mon Jul 29 14:27:19 2019 +

PR libstdc++/51333 Define recursive_init_error constructor non-inline

The recursive_init_error class is defined in a header, with an inline
constructor, but the definition of the vtable and destructor are not
exported from the shared library. With -fkeep-inline-functions the
constructor gets emitted in user code, and requires the (non-exported)
vtable. This fails to link.

As far as I can tell, the recursive_init_error class definition was
moved into  so it could be documented with Doxygen, not for
any technical reason. But now it's there (and documented), somebody
could be relying on it, by catching that type and possibly performing
derived-to-base conversions to the std::exception base class. So the
conservative fix is to leave the class definition in the header but make
the constructor non-inline. This still allows the type to be caught and
still defines its base class. User code can no longer construct objects
of that type, but that's not something we need to support.

PR libstdc++/51333
* libsupc++/cxxabi.h (__gnu_cxx::recursive_init_error): Do not 
define
constructor inline.
* libsupc++/guard_error.cc (__gnu_cxx::recursive_init_error): Define
constructor.
* testsuite/18_support/51333.cc: New test.

git-svn-id: svn+ssh://gcc.gnu.org/svn/gcc/trunk@273878 
138bc75d-0d04-0410-961f-82ee72b054a4

diff --git a/libstdc++-v3/libsupc++/cxxabi.h b/libstdc++-v3/libsupc++/cxxabi.h
index 3562d4c2271..4d9f829ffed 100644
--- a/libstdc++-v3/libsupc++/cxxabi.h
+++ b/libstdc++-v3/libsupc++/cxxabi.h
@@ -684,8 +684,9 @@ namespace __gnu_cxx
*  @brief Exception thrown by __cxa_guard_acquire.
*  @ingroup exceptions
*
-   *  6.7[stmt.dcl]/4: If control re-enters the declaration (recursively)
-   *  while the object is being initialized, the behavior is undefined.
+   *  C++ 2011 6.7 [stmt.dcl]/4: If control re-enters the declaration
+   *  recursively while the variable is being initialized, the behavior
+   *  is undefined.
*
*  Since we already have a library function to handle locking, we might
*  as well check for this situation and throw an exception.
@@ -695,8 +696,8 @@ namespace __gnu_cxx
   class recursive_init_error: public std::exception
   {
   public:
-recursive_init_error() throw() { }
-virtual ~recursive_init_error() throw ();
+recursive_init_error() _GLIBCXX_NOTHROW;
+virtual ~recursive_init_error() _GLIBCXX_NOTHROW;
   };
 }
 #endif // __cplusplus
diff --git a/libstdc++-v3/libsupc++/guard_error.cc 
b/libstdc++-v3/libsupc++/guard_error.cc
index 7595c67872a..d7625af4d54 100644
--- a/libstdc++-v3/libsupc++/guard_error.cc
+++ b/libstdc++-v3/libsupc++/guard_error.cc
@@ -26,6 +26,6 @@
 
 namespace __gnu_cxx
 {
-  recursive_init_error::~recursive_init_error() throw() { }
+  recursive_init_error::recursive_init_error() noexcept { }
+  recursive_init_error::~recursive_init_error() noexcept { }
 }
-
diff --git a/libstdc++-v3/testsuite/18_support/51333.cc 
b/libstdc++-v3/testsuite/18_support/51333.cc
new file mode 100644
index 000..0fb7c338f8b
--- /dev/null
+++ b/libstdc++-v3/testsuite/18_support/51333.cc
@@ -0,0 +1,22 @@
+// Copyright (C) 2019 Free Software Foundation, Inc.
+//
+// This file is part of the GNU ISO C++ Library.  This library is free
+// software; you can redistribute it and/or modify it under the
+// terms of the GNU General Public License as published by the
+// Free Software Foundation; either version 3, or (at your option)
+// any later version.
+
+// This library is distributed in 

Re: Fix inchash handling of wide_ints (PR91242)

2019-07-29 Thread Richard Biener
On Mon, Jul 29, 2019 at 3:06 PM Richard Sandiford
 wrote:
>
> Richard Sandiford  writes:
> > Richard Biener  writes:
> >> On Mon, Jul 29, 2019 at 11:11 AM Richard Sandiford
> >>  wrote:
> >>>
> >>> inchash::hash::add_wide_int operated directly on the raw encoding
> >>> of the wide_int, including any redundant upper bits.  The problem
> >>> with that is that the upper bits are only defined for some wide-int
> >>> storage types (including wide_int itself).  wi::to_wide(tree) instead
> >>> returns a value that is extended according to the signedness of the
> >>> type (so that wi::to_widest can use the same encoding) while rtxes
> >>> have the awkward special case of BI, which can be zero-extended
> >>> rather than sign-extended.
> >>>
> >>> In the PR, we computed a hash for a "normal" sign-extended wide_int
> >>> while the existing entries hashed wi::to_wide(tree).  This gives
> >>> different results for unsigned types that have the top bit set.
> >>>
> >>> The patch fixes that by hashing the canonical sign-extended form even
> >>> if the raw encoding happens to be different.
> >>>
> >>> Tested on aarch64-linux-gnu (with and without SVE), armeb-eabi
> >>> and x86_64-linux-gnu.  OK to install?
> >>
> >> But only the most significant elt needs this treatment, no?  So
> >> we can save some compile-time in the hasing by not doing
> >> it for all elts.
> >
> > Yeah, we only need it for the most significant elt.  But the vast
> > majority of constants need 64 bits or fewer of encoding anyway
> > (even if the precision is greater than 64 bits), so that's usually the
> > only one we need to process.  I'm not convinced handling multiple elts
> > specially would pay off.
>
> To go into a bit more detail, the code for:
>
> void
> f1 (inchash::hash , const wide_int )
> {
>   h.add_wide_int (x);
> }
>
> doesn't change with the patch, because is_sign_extended is true for
> wide_int.  The code on x86_64 from a bootstrap compiler is:
>
> 
> .cfi_startproc
> // Hash the length.
> movl24(%rsi), %edx
> movl(%rdi), %ecx
> movl$-1640531527, %r8d
> subl%edx, %r8d
> subl%ecx, %edx
> movl%r8d, %eax
> movl%ecx, %r8d
> subl%ecx, %eax
> shrl$13, %r8d
> xorl%eax, %r8d
> movl%edx, %eax
> movl%r8d, %edx
> subl%r8d, %eax
> subl%r8d, %ecx
> sall$8, %edx
> xorl%eax, %edx
> movl%ecx, %eax
> movl%edx, %ecx
> subl%edx, %eax
> subl%edx, %r8d
> shrl$13, %ecx
> xorl%eax, %ecx
> movl%r8d, %eax
> movl%ecx, %r8d
> subl%ecx, %eax
> subl%ecx, %edx
> shrl$12, %r8d
> xorl%eax, %r8d
> movl%edx, %eax
> movl%r8d, %edx
> subl%r8d, %eax
> subl%r8d, %ecx
> sall$16, %edx
> xorl%eax, %edx
> movl%ecx, %eax
> movl%edx, %ecx
> subl%edx, %eax
> subl%edx, %r8d
> shrl$5, %ecx
> xorl%eax, %ecx
> movl%ecx, %eax
> subl%ecx, %r8d
> subl%ecx, %edx
> shrl$3, %eax
> xorl%eax, %r8d
> movl%edx, %eax
> movl%r8d, %edx
> subl%r8d, %eax
> sall$10, %edx
> xorl%eax, %edx
> movl%ecx, %eax
> subl%r8d, %eax
> xorl%r8d, %r8d
> subl%edx, %eax
> shrl$15, %edx
> xorl%edx, %eax
> movl%eax, (%rdi)
> // Hash the elements.
> movl24(%rsi), %edx
> testl   %edx, %edx
> je  .L444
> .p2align 4,,10
> .p2align 3
> .L446:
> movl%r8d, %edx
> movq(%rsi,%rdx,8), %r9
> movq%r9, %rdx
> subl%eax, %r9d
> sarq$32, %rdx
> movl%r9d, %ecx
> movl%eax, %r9d
> subl%edx, %ecx
> shrl$13, %r9d
> subl%eax, %edx
> xorl%ecx, %r9d
> movl%edx, %ecx
> movl%r9d, %edx
> subl%r9d, %ecx
> subl%r9d, %eax
> sall$8, %edx
> xorl%ecx, %edx
> movl%edx, %ecx
> subl%edx, %eax
> subl%edx, %r9d
> shrl$13, %ecx
> xorl%ecx, %eax
> movl%r9d, %ecx
> movl%eax, %r9d
> subl%eax, %ecx
> subl%eax, %edx
> shrl$12, %r9d
> xorl%ecx, %r9d
> movl%edx, %ecx
> movl%r9d, %edx
> subl%r9d, %ecx
> subl%r9d, %eax
> sall$16, %edx
> xorl%ecx, %edx
> movl%edx, %ecx
> subl%edx, %eax
> subl%edx, %r9d
> shrl 

Re: [PATCH] Do not emit __gnu_lto_v1 symbol.

2019-07-29 Thread Georg-Johann Lay

Hi Martin.

In SVN r273662 you changed a comment in avr.c from __gnu_lto_v1 to 
__gnu_lto_slim without changing the relevant code:


  /* __gnu_lto_slim is just a marker for the linker injected by toplev.c.
 There is no need to trigger __do_clear_bss code for them.  */

  if (!STR_PREFIX_P (name, "__gnu_lto"))
avr_need_clear_bss_p = true;

This means that just defining symbol __gnu_lto_slim will trigger code 
from libgcc (code to clear .bss, which is supposed to be conditional 
depending on whether .bss actually contains anything).


Would you also adjust that test?

Thanks,

Johann



[PATCH] Provide proper error message for -flto=abcd.

2019-07-29 Thread Martin Liška
Hi.

The patch is about proper error message for situations where
user give a wrong argument to -flto option.

Patch can bootstrap on x86_64-linux-gnu and survives regression tests.

Ready to be installed?
Thanks,
Martin

gcc/ChangeLog:

2019-07-29  Martin Liska  

* opts.c (common_handle_option): Error for an invalid argument
to -flto=.

gcc/testsuite/ChangeLog:

2019-07-29  Martin Liska  

* gcc.dg/spellcheck-options-21.c: New test.
---
 gcc/opts.c   | 8 
 gcc/testsuite/gcc.dg/spellcheck-options-21.c | 3 +++
 2 files changed, 11 insertions(+)
 create mode 100644 gcc/testsuite/gcc.dg/spellcheck-options-21.c


diff --git a/gcc/opts.c b/gcc/opts.c
index 296f0f61802..3639edf4cd7 100644
--- a/gcc/opts.c
+++ b/gcc/opts.c
@@ -2821,6 +2821,14 @@ common_handle_option (struct gcc_options *opts,
   opts->x_flag_lto = value ? "" : NULL;
   break;
 
+case OPT_flto_:
+  if (strcmp (arg, "none") != 0
+	  && strcmp (arg, "jobserver") != 0
+	  && atoi (arg) == 0)
+	error_at (loc,
+		  "unrecognized argument to %<-flto=%> option: %qs", arg);
+  break;
+
 case OPT_w:
   dc->dc_inhibit_warnings = true;
   break;
diff --git a/gcc/testsuite/gcc.dg/spellcheck-options-21.c b/gcc/testsuite/gcc.dg/spellcheck-options-21.c
new file mode 100644
index 000..3e0e8a8ebaf
--- /dev/null
+++ b/gcc/testsuite/gcc.dg/spellcheck-options-21.c
@@ -0,0 +1,3 @@
+/* { dg-do compile } */
+/* { dg-options "-flto=sparta" } */
+/* { dg-error "unrecognized argument to '-flto=' option: 'sparta'" "" { target *-*-* } 0 } */



Re: [PATCH] Deduce automatically number of cores for -flto option.

2019-07-29 Thread Martin Liška
Hi.

I'm sending v2 of the patch that can newly auto-detect make
jobserver and use it.

Patch can bootstrap on x86_64-linux-gnu and survives regression tests.

Ready to be installed?
Thanks,
Martin
>From df747a46241dcdb8ad055071f9e0605c9f469268 Mon Sep 17 00:00:00 2001
From: Martin Liska 
Date: Tue, 23 Jul 2019 10:14:31 +0200
Subject: [PATCH 1/2] Deduce automatically number of cores for -flto option.

gcc/ChangeLog:

2019-07-23  Martin Liska  

	* doc/invoke.texi: Document new behavior.
	* lto-wrapper.c (cpuset_popcount): New function
	is a copy of libgomp/config/linux/proc.c.
	(init_num_threads): Likewise.
	(run_gcc): Automatically detect core count for -flto.
	(jobserver_active_p): New function.
---
 gcc/doc/invoke.texi |   3 +-
 gcc/lto-wrapper.c   | 167 ++--
 2 files changed, 162 insertions(+), 8 deletions(-)

diff --git a/gcc/doc/invoke.texi b/gcc/doc/invoke.texi
index 77a2d561e38..f5bfea3f003 100644
--- a/gcc/doc/invoke.texi
+++ b/gcc/doc/invoke.texi
@@ -10396,7 +10396,8 @@ If you specify the optional @var{n}, the optimization and code
 generation done at link time is executed in parallel using @var{n}
 parallel jobs by utilizing an installed @command{make} program.  The
 environment variable @env{MAKE} may be used to override the program
-used.  The default value for @var{n} is 1.
+used.  The default value for @var{n} is automatically detected based
+on number of cores.
 
 You can also specify @option{-flto=jobserver} to use GNU make's
 job server mode to determine the number of parallel jobs. This
diff --git a/gcc/lto-wrapper.c b/gcc/lto-wrapper.c
index 946897726d0..353187c6043 100644
--- a/gcc/lto-wrapper.c
+++ b/gcc/lto-wrapper.c
@@ -1110,6 +1110,136 @@ cmp_priority (const void *a, const void *b)
   return *((const int *)b)-*((const int *)a);
 }
 
+/* Number of CPUs that can be used for parallel LTRANS phase.  */
+
+static unsigned long nthreads_var = 0;
+
+#ifdef HAVE_PTHREAD_AFFINITY_NP
+unsigned long cpuset_size;
+static unsigned long get_cpuset_size;
+cpu_set_t *cpusetp;
+
+unsigned long
+static cpuset_popcount (unsigned long cpusetsize, cpu_set_t *cpusetp)
+{
+#ifdef CPU_COUNT_S
+  /* glibc 2.7 and above provide a macro for this.  */
+  return CPU_COUNT_S (cpusetsize, cpusetp);
+#else
+#ifdef CPU_COUNT
+  if (cpusetsize == sizeof (cpu_set_t))
+/* glibc 2.6 and above provide a macro for this.  */
+return CPU_COUNT (cpusetp);
+#endif
+  size_t i;
+  unsigned long ret = 0;
+  STATIC_ASSERT (sizeof (cpusetp->__bits[0]) == sizeof (unsigned long int));
+  for (i = 0; i < cpusetsize / sizeof (cpusetp->__bits[0]); i++)
+{
+  unsigned long int mask = cpusetp->__bits[i];
+  if (mask == 0)
+	continue;
+  ret += __builtin_popcountl (mask);
+}
+  return ret;
+#endif
+}
+#endif
+
+/* At startup, determine the default number of threads.  It would seem
+   this should be related to the number of cpus online.  */
+
+static void
+init_num_threads (void)
+{
+#ifdef HAVE_PTHREAD_AFFINITY_NP
+#if defined (_SC_NPROCESSORS_CONF) && defined (CPU_ALLOC_SIZE)
+  cpuset_size = sysconf (_SC_NPROCESSORS_CONF);
+  cpuset_size = CPU_ALLOC_SIZE (cpuset_size);
+#else
+  cpuset_size = sizeof (cpu_set_t);
+#endif
+
+  cpusetp = (cpu_set_t *) xmalloc (gomp_cpuset_size);
+  do
+{
+  int ret = pthread_getaffinity_np (pthread_self (), gomp_cpuset_size,
+	cpusetp);
+  if (ret == 0)
+	{
+	  /* Count only the CPUs this process can use.  */
+	  nthreads_var = cpuset_popcount (cpuset_size, cpusetp);
+	  if (nthreads_var == 0)
+	break;
+	  get_cpuset_size = cpuset_size;
+#ifdef CPU_ALLOC_SIZE
+	  unsigned long i;
+	  for (i = cpuset_size * 8; i; i--)
+	if (CPU_ISSET_S (i - 1, cpuset_size, cpusetp))
+	  break;
+	  cpuset_size = CPU_ALLOC_SIZE (i);
+#endif
+	  return;
+	}
+  if (ret != EINVAL)
+	break;
+#ifdef CPU_ALLOC_SIZE
+  if (cpuset_size < sizeof (cpu_set_t))
+	cpuset_size = sizeof (cpu_set_t);
+  else
+	cpuset_size = cpuset_size * 2;
+  if (cpuset_size < 8 * sizeof (cpu_set_t))
+	cpusetp
+	  = (cpu_set_t *) realloc (cpusetp, cpuset_size);
+  else
+	{
+	  /* Avoid fatal if too large memory allocation would be
+	 requested, e.g. kernel returning EINVAL all the time.  */
+	  void *p = realloc (cpusetp, cpuset_size);
+	  if (p == NULL)
+	break;
+	  cpusetp = (cpu_set_t *) p;
+	}
+#else
+  break;
+#endif
+}
+  while (1);
+  cpuset_size = 0;
+  nthreads_var = 1;
+  free (cpusetp);
+  cpusetp = NULL;
+#endif
+#ifdef _SC_NPROCESSORS_ONLN
+  nthreads_var = sysconf (_SC_NPROCESSORS_ONLN);
+#endif
+}
+
+/* FIXME: once using -std=c11, we can use std::thread::hardware_concurrency.  */
+
+/* Return true when a jobserver is running and can accept a job.  */
+
+static bool
+jobserver_active_p (void)
+{
+  const char *makeflags = getenv ("MAKEFLAGS");
+  if (makeflags == NULL)
+return false;
+
+  const char *needle = "--jobserver-auth=";
+  const char *n = strstr (makeflags, needle);
+  if (n == NULL)
+return 

Re: Fix inchash handling of wide_ints (PR91242)

2019-07-29 Thread Richard Sandiford
Richard Sandiford  writes:
> Richard Biener  writes:
>> On Mon, Jul 29, 2019 at 11:11 AM Richard Sandiford
>>  wrote:
>>>
>>> inchash::hash::add_wide_int operated directly on the raw encoding
>>> of the wide_int, including any redundant upper bits.  The problem
>>> with that is that the upper bits are only defined for some wide-int
>>> storage types (including wide_int itself).  wi::to_wide(tree) instead
>>> returns a value that is extended according to the signedness of the
>>> type (so that wi::to_widest can use the same encoding) while rtxes
>>> have the awkward special case of BI, which can be zero-extended
>>> rather than sign-extended.
>>>
>>> In the PR, we computed a hash for a "normal" sign-extended wide_int
>>> while the existing entries hashed wi::to_wide(tree).  This gives
>>> different results for unsigned types that have the top bit set.
>>>
>>> The patch fixes that by hashing the canonical sign-extended form even
>>> if the raw encoding happens to be different.
>>>
>>> Tested on aarch64-linux-gnu (with and without SVE), armeb-eabi
>>> and x86_64-linux-gnu.  OK to install?
>>
>> But only the most significant elt needs this treatment, no?  So
>> we can save some compile-time in the hasing by not doing
>> it for all elts.
>
> Yeah, we only need it for the most significant elt.  But the vast
> majority of constants need 64 bits or fewer of encoding anyway
> (even if the precision is greater than 64 bits), so that's usually the
> only one we need to process.  I'm not convinced handling multiple elts
> specially would pay off.

To go into a bit more detail, the code for:

void
f1 (inchash::hash , const wide_int )
{
  h.add_wide_int (x);
}

doesn't change with the patch, because is_sign_extended is true for
wide_int.  The code on x86_64 from a bootstrap compiler is:


.cfi_startproc
// Hash the length.
movl24(%rsi), %edx
movl(%rdi), %ecx
movl$-1640531527, %r8d
subl%edx, %r8d
subl%ecx, %edx
movl%r8d, %eax
movl%ecx, %r8d
subl%ecx, %eax
shrl$13, %r8d
xorl%eax, %r8d
movl%edx, %eax
movl%r8d, %edx
subl%r8d, %eax
subl%r8d, %ecx
sall$8, %edx
xorl%eax, %edx
movl%ecx, %eax
movl%edx, %ecx
subl%edx, %eax
subl%edx, %r8d
shrl$13, %ecx
xorl%eax, %ecx
movl%r8d, %eax
movl%ecx, %r8d
subl%ecx, %eax
subl%ecx, %edx
shrl$12, %r8d
xorl%eax, %r8d
movl%edx, %eax
movl%r8d, %edx
subl%r8d, %eax
subl%r8d, %ecx
sall$16, %edx
xorl%eax, %edx
movl%ecx, %eax
movl%edx, %ecx
subl%edx, %eax
subl%edx, %r8d
shrl$5, %ecx
xorl%eax, %ecx
movl%ecx, %eax
subl%ecx, %r8d
subl%ecx, %edx
shrl$3, %eax
xorl%eax, %r8d
movl%edx, %eax
movl%r8d, %edx
subl%r8d, %eax
sall$10, %edx
xorl%eax, %edx
movl%ecx, %eax
subl%r8d, %eax
xorl%r8d, %r8d
subl%edx, %eax
shrl$15, %edx
xorl%edx, %eax
movl%eax, (%rdi)
// Hash the elements.
movl24(%rsi), %edx
testl   %edx, %edx
je  .L444
.p2align 4,,10
.p2align 3
.L446:
movl%r8d, %edx
movq(%rsi,%rdx,8), %r9
movq%r9, %rdx
subl%eax, %r9d
sarq$32, %rdx
movl%r9d, %ecx
movl%eax, %r9d
subl%edx, %ecx
shrl$13, %r9d
subl%eax, %edx
xorl%ecx, %r9d
movl%edx, %ecx
movl%r9d, %edx
subl%r9d, %ecx
subl%r9d, %eax
sall$8, %edx
xorl%ecx, %edx
movl%edx, %ecx
subl%edx, %eax
subl%edx, %r9d
shrl$13, %ecx
xorl%ecx, %eax
movl%r9d, %ecx
movl%eax, %r9d
subl%eax, %ecx
subl%eax, %edx
shrl$12, %r9d
xorl%ecx, %r9d
movl%edx, %ecx
movl%r9d, %edx
subl%r9d, %ecx
subl%r9d, %eax
sall$16, %edx
xorl%ecx, %edx
movl%edx, %ecx
subl%edx, %eax
subl%edx, %r9d
shrl$5, %ecx
xorl%eax, %ecx
movl%ecx, %eax
subl%ecx, %r9d
subl%ecx, %edx
shrl$3, %eax
xorl%eax, %r9d
movl%edx, %eax
movl%r9d, %edx
subl%r9d, %eax
sall$10, %edx
xorl%eax, %edx
movl%ecx, %eax
addl$1, 

[PATCH] Improve RPO VN for PR91257

2019-07-29 Thread Richard Biener


The following patch addresses a known issue with managing availability
in RPO VN - this is just the first time a testcase actually shows
measurable cycles here.  It's hammering libc malloc/free for
allocating one-element vectors.  So the following finally goes the
way the comment suggests and hooks the availability chain off
vn_ssa_aux, also converting it to a single-linked list of elements
we allocate from the same obstack.  Having only a single hash-table
is probably also more cache-friendly.

This cuts down VN time by ~20% on the testcase (but overall
compile-time much less).

The remaining "slowness" is GC, out-of-SSA and the SSA propagator
passes (via the SSA propagator engine).  All known and hard to fix.

Bootstrap / regtest running on x86_64-unknown-linux-gnu.

Richard.

2019-07-29  Richard Biener  

PR tree-optimization/91257
* tree-ssa-sccvn.h (struct vn_avail): New.
(struct vn_ssa_aux): Add avail member.
* tree-ssa-sccvn.c (class rpo_elim): Remove m_rpo_avail
member, add m_avail_freelist one.
(rpo_elim::~rpo_elim): Remove.
(rpo_elim::eliminate_avail): Adjust to new avail tracking
data structure.
(rpo_elim::eliminate_push_avail): Likewise.
(do_unwind): Likewise.
(do_rpo_vn): Likewise.

Index: gcc/tree-ssa-sccvn.h
===
--- gcc/tree-ssa-sccvn.h(revision 273792)
+++ gcc/tree-ssa-sccvn.h(working copy)
@@ -193,6 +193,25 @@ vn_constant_eq_with_type (tree c1, tree
  && types_compatible_p (TREE_TYPE (c1), TREE_TYPE (c2)));
 }
 
+/* Instead of having a local availability lattice for each basic-block
+   and availability at X defined as union of the local availabilities
+   at X and its dominators we're turning this upside down and track
+   availability per value given values are usually made available at very
+   few points.
+   So we have a chain of LOCATION, LEADER entries where LOCATION is
+   specifying the basic-block LEADER is made available for VALUE.
+   We prepend to this chain in RPO order thus for iteration we can simply
+   remove the last entries.
+   LOCATION is the basic-block index and LEADER is its SSA name version.  */
+struct vn_avail
+{
+  vn_avail *next;
+  /* The basic-block LEADER is made available.  */
+  int location;
+  /* The LEADER for the value we are chained on.  */
+  int leader;
+};
+
 typedef struct vn_ssa_aux
 {
   /* SSA name this vn_ssa_aux is associated with in the lattice.  */
@@ -202,6 +221,10 @@ typedef struct vn_ssa_aux
   /* Statements to insert if needs_insertion is true.  */
   gimple_seq expr;
 
+  /* AVAIL entries, last in RPO order is first.  This is only tracked
+ for SSA names also serving as values (NAME == VALNUM).  */
+  vn_avail *avail;
+
   /* Unique identifier that all expressions with the same value have. */
   unsigned int value_id;
 
Index: gcc/tree-ssa-sccvn.c
===
--- gcc/tree-ssa-sccvn.c(revision 273792)
+++ gcc/tree-ssa-sccvn.c(working copy)
@@ -2126,36 +2126,17 @@ class rpo_elim : public eliminate_dom_wa
 {
 public:
   rpo_elim(basic_block entry_)
-: eliminate_dom_walker (CDI_DOMINATORS, NULL), entry (entry_) {}
-  ~rpo_elim();
+: eliminate_dom_walker (CDI_DOMINATORS, NULL), entry (entry_),
+  m_avail_freelist (NULL) {}
 
   virtual tree eliminate_avail (basic_block, tree op);
 
   virtual void eliminate_push_avail (basic_block, tree);
 
   basic_block entry;
-  /* Instead of having a local availability lattice for each
- basic-block and availability at X defined as union of
- the local availabilities at X and its dominators we're
- turning this upside down and track availability per
- value given values are usually made available at very
- few points (at least one).
- So we have a value -> vec map where
- LOCATION is specifying the basic-block LEADER is made
- available for VALUE.  We push to this vector in RPO
- order thus for iteration we can simply pop the last
- entries.
- LOCATION is the basic-block index and LEADER is its
- SSA name version.  */
-  /* ???  We'd like to use auto_vec here with embedded storage
- but that doesn't play well until we can provide move
- constructors and use std::move on hash-table expansion.
- So for now this is a bit more expensive than necessary.
- We eventually want to switch to a chaining scheme like
- for hashtable entries for unwinding which would make
- making the vector part of the vn_ssa_aux structure possible.  */
-  typedef hash_map > > rpo_avail_t;
-  rpo_avail_t m_rpo_avail;
+  /* Freelist of avail entries which are allocated from the vn_ssa_aux
+ obstack.  */
+  vn_avail *m_avail_freelist;
 };
 
 /* Global RPO state for access from hooks.  */
@@ -6197,14 +6178,6 @@ vn_lookup_simplify_result (gimple_match_
   return res;
 }
 
-rpo_elim::~rpo_elim ()
-{

[PATCH] Improve PR91257

2019-07-29 Thread Richard Biener


The following significantly brings down compile-time spent in VRP
for the testcase.

Bootstrapped and tested on x86_64-unknown-linux-gnu, applied to trunk.

Richard.

2019-07-29  Richard Biener  

PR tree-optimization/91257
* tree-vrp.c (operand_less_p): Avoid dispatching to fold for
most cases, instead call compare_values which handles the
symbolic ranges we handle specially.
(compare_values_warnv): Do not call operand_less_p but open-code
the effective fold calls.  Avoid converting so much.

Index: gcc/tree-vrp.c
===
--- gcc/tree-vrp.c  (revision 273792)
+++ gcc/tree-vrp.c  (working copy)
@@ -909,22 +909,17 @@ operand_less_p (tree val, tree val2)
   /* LT is folded faster than GE and others.  Inline the common case.  */
   if (TREE_CODE (val) == INTEGER_CST && TREE_CODE (val2) == INTEGER_CST)
 return tree_int_cst_lt (val, val2);
+  else if (TREE_CODE (val) == SSA_NAME && TREE_CODE (val2) == SSA_NAME)
+return val == val2 ? 0 : -2;
   else
 {
-  tree tcmp;
-
-  fold_defer_overflow_warnings ();
-
-  tcmp = fold_binary_to_constant (LT_EXPR, boolean_type_node, val, val2);
-
-  fold_undefer_and_ignore_overflow_warnings ();
-
-  if (!tcmp
- || TREE_CODE (tcmp) != INTEGER_CST)
-   return -2;
-
-  if (!integer_zerop (tcmp))
+  int cmp = compare_values (val, val2);
+  if (cmp == -1)
return 1;
+  else if (cmp == 0 || cmp == 1)
+   return 0;
+  else
+   return -2;
 }
 
   return 0;
@@ -958,8 +953,8 @@ compare_values_warnv (tree val1, tree va
 
   /* Convert the two values into the same type.  This is needed because
  sizetype causes sign extension even for unsigned types.  */
-  val2 = fold_convert (TREE_TYPE (val1), val2);
-  STRIP_USELESS_TYPE_CONVERSION (val2);
+  if (!useless_type_conversion_p (TREE_TYPE (val1), TREE_TYPE (val2)))
+val2 = fold_convert (TREE_TYPE (val1), val2);
 
   const bool overflow_undefined
 = INTEGRAL_TYPE_P (TREE_TYPE (val1))
@@ -1067,32 +1062,43 @@ compare_values_warnv (tree val1, tree va
 }
   else
 {
-  tree t;
+  if (TREE_CODE (val1) == INTEGER_CST && TREE_CODE (val2) == INTEGER_CST)
+   {
+ /* We cannot compare overflowed values.  */
+ if (TREE_OVERFLOW (val1) || TREE_OVERFLOW (val2))
+   return -2;
+
+ return tree_int_cst_compare (val1, val2);
+   }
 
   /* First see if VAL1 and VAL2 are not the same.  */
-  if (val1 == val2 || operand_equal_p (val1, val2, 0))
+  if (operand_equal_p (val1, val2, 0))
return 0;
 
+  fold_defer_overflow_warnings ();
+
   /* If VAL1 is a lower address than VAL2, return -1.  */
-  if (operand_less_p (val1, val2) == 1)
-   return -1;
+  tree t = fold_binary_to_constant (LT_EXPR, boolean_type_node, val1, 
val2);
+  if (t && integer_onep (t))
+   {
+ fold_undefer_and_ignore_overflow_warnings ();
+ return -1;
+   }
 
   /* If VAL1 is a higher address than VAL2, return +1.  */
-  if (operand_less_p (val2, val1) == 1)
-   return 1;
-
-  /* If VAL1 is different than VAL2, return +2.
-For integer constants we either have already returned -1 or 1
-or they are equivalent.  We still might succeed in proving
-something about non-trivial operands.  */
-  if (TREE_CODE (val1) != INTEGER_CST
- || TREE_CODE (val2) != INTEGER_CST)
+  t = fold_binary_to_constant (LT_EXPR, boolean_type_node, val2, val1);
+  if (t && integer_onep (t))
{
-  t = fold_binary_to_constant (NE_EXPR, boolean_type_node, val1, val2);
- if (t && integer_onep (t))
-   return 2;
+ fold_undefer_and_ignore_overflow_warnings ();
+ return 1;
}
 
+  /* If VAL1 is different than VAL2, return +2.  */
+  t = fold_binary_to_constant (NE_EXPR, boolean_type_node, val1, val2);
+  fold_undefer_and_ignore_overflow_warnings ();
+  if (t && integer_onep (t))
+   return 2;
+
   return -2;
 }
 }


Re: Fix inchash handling of wide_ints (PR91242)

2019-07-29 Thread Richard Sandiford
Richard Biener  writes:
> On Mon, Jul 29, 2019 at 11:11 AM Richard Sandiford
>  wrote:
>>
>> inchash::hash::add_wide_int operated directly on the raw encoding
>> of the wide_int, including any redundant upper bits.  The problem
>> with that is that the upper bits are only defined for some wide-int
>> storage types (including wide_int itself).  wi::to_wide(tree) instead
>> returns a value that is extended according to the signedness of the
>> type (so that wi::to_widest can use the same encoding) while rtxes
>> have the awkward special case of BI, which can be zero-extended
>> rather than sign-extended.
>>
>> In the PR, we computed a hash for a "normal" sign-extended wide_int
>> while the existing entries hashed wi::to_wide(tree).  This gives
>> different results for unsigned types that have the top bit set.
>>
>> The patch fixes that by hashing the canonical sign-extended form even
>> if the raw encoding happens to be different.
>>
>> Tested on aarch64-linux-gnu (with and without SVE), armeb-eabi
>> and x86_64-linux-gnu.  OK to install?
>
> But only the most significant elt needs this treatment, no?  So
> we can save some compile-time in the hasing by not doing
> it for all elts.

Yeah, we only need it for the most significant elt.  But the vast
majority of constants need 64 bits or fewer of encoding anyway
(even if the precision is greater than 64 bits), so that's usually the
only one we need to process.  I'm not convinced handling multiple elts
specially would pay off.

Thanks,
Richard

>
>> Richard
>>
>>
>> 2019-07-29  Richard Sandiford  
>>
>> gcc/
>> * wide-int.h (generic_wide_int::sext_elt): New function.
>> * inchash.h (hash::add_wide_int): Use it instead of elt.
>>
>> Index: gcc/wide-int.h
>> ===
>> --- gcc/wide-int.h  2019-07-10 19:41:26.391898059 +0100
>> +++ gcc/wide-int.h  2019-07-29 10:08:12.048610030 +0100
>> @@ -730,6 +730,7 @@ class GTY(()) generic_wide_int : public
>>/* Public accessors for the interior of a wide int.  */
>>HOST_WIDE_INT sign_mask () const;
>>HOST_WIDE_INT elt (unsigned int) const;
>> +  HOST_WIDE_INT sext_elt (unsigned int) const;
>>unsigned HOST_WIDE_INT ulow () const;
>>unsigned HOST_WIDE_INT uhigh () const;
>>HOST_WIDE_INT slow () const;
>> @@ -909,6 +910,23 @@ generic_wide_int ::elt (unsigne
>>  return this->get_val ()[i];
>>  }
>>
>> +/* Like elt, but sign-extend beyond the upper bit, instead of returning
>> +   the raw encoding.  */
>> +template 
>> +inline HOST_WIDE_INT
>> +generic_wide_int ::sext_elt (unsigned int i) const
>> +{
>> +  HOST_WIDE_INT elt_i = elt (i);
>> +  if (!is_sign_extended)
>> +{
>> +  unsigned int precision = this->get_precision ();
>> +  unsigned int lsb = i * HOST_BITS_PER_WIDE_INT;
>> +  if (precision - lsb < HOST_BITS_PER_WIDE_INT)
>> +   elt_i = sext_hwi (elt_i, precision - lsb);
>> +}
>> +  return elt_i;
>> +}
>> +
>>  template 
>>  template 
>>  inline generic_wide_int  &
>> Index: gcc/inchash.h
>> ===
>> --- gcc/inchash.h   2019-03-08 18:15:36.704740334 +
>> +++ gcc/inchash.h   2019-07-29 10:08:12.048610030 +0100
>> @@ -85,7 +85,7 @@ hashval_t iterative_hash_hashval_t (hash
>>{
>>  add_int (x.get_len ());
>>  for (unsigned i = 0; i < x.get_len (); i++)
>> -  add_hwi (x.elt (i));
>> +  add_hwi (x.sext_elt (i));
>>}
>>
>>/* Hash in pointer PTR.  */


Re: [PATCH] Remove also 2nd argument for unused delete operator (PR tree-optimization/91270).

2019-07-29 Thread Martin Liška
On 7/29/19 11:59 AM, Richard Biener wrote:
> On Sun, Jul 28, 2019 at 4:57 PM Martin Liška  wrote:
>>
>> Hi.
>>
>> And there's one more patch that deals with delete operator
>> which has a second argument (size). That definition GIMPLE
>> statement of the size must be also properly marked.
>>
>> Patch can bootstrap on x86_64-linux-gnu and survives regression tests.
>>
>> Ready to be installed?
> 
> This isn't a proper fix.  You can't mark a random operand as necessary
> during elimination - this only works in very constraint cases.  Here
> it will break down if the stmt you just marked depends on another stmt
> only used by the stmt you just marked (and thus also not necessary).

Ah, I see.

> 
> The fix is to propagate_necessity where you either have to excempt
> the two-operator delete from handling

We want to process also these delete operators.

> or to mark its second operand
> as necessary despite eventually deleting the call.

Really marking that as necessary? Why?

> You can avoid
> this in case the allocation function used the exact same size argument.

That's not the case as the operator new happens in a loop:

   :
  # iftmp.0_15 = PHI 
  _23 = operator new [] (iftmp.0_15);
  _24 = _23;
  MEM[(sizetype *)_24] = _19;
  _26 = _24 + 8;
  _27 = _26;
  _2 = _19 + 18446744073709551615;
  _28 = (long int) _2;

   :
  # _12 = PHI <_27(5), _29(7)>
  # _13 = PHI <_28(5), _30(7)>
  if (_13 < 0)
goto ; [INV]
  else
goto ; [INV]

Btw. is the hunk moved to propagate_necessity still wrong:

diff --git a/gcc/tree-ssa-dce.c b/gcc/tree-ssa-dce.c
index cf507fa0453..1c890889932 100644
--- a/gcc/tree-ssa-dce.c
+++ b/gcc/tree-ssa-dce.c
@@ -803,7 +803,23 @@ propagate_necessity (bool aggressive)
   || DECL_FUNCTION_CODE (def_callee) == BUILT_IN_MALLOC
   || DECL_FUNCTION_CODE (def_callee) == 
BUILT_IN_CALLOC))
  || DECL_IS_REPLACEABLE_OPERATOR_NEW_P (def_callee)))
-   continue;
+   {
+ /* Some delete operators have 2 arguments, where
+the second argument is size of the deallocated memory.  */
+ if (gimple_call_num_args (stmt) == 2)
+   {
+ tree ptr = gimple_call_arg (stmt, 1);
+ if (TREE_CODE (ptr) == SSA_NAME)
+   {
+ gimple *def_stmt = SSA_NAME_DEF_STMT (ptr);
+ if (!gimple_nop_p (def_stmt)
+ && !gimple_plf (def_stmt, STMT_NECESSARY))
+   gimple_set_plf (stmt, STMT_NECESSARY, false);
+   }
+   }
+
+ continue;
+   }
}
 
  FOR_EACH_SSA_TREE_OPERAND (use, stmt, iter, SSA_OP_USE)

Thanks,
Martin

> 
> Richard.
> 
>> Thanks,
>> Martin



Re: Add ARRAY_REF based access patch disambiguation

2019-07-29 Thread Jan Hubicka
Hello,
I missed your email initially, so sorry for late reaction.
> > +/* REF1 and REF2 are ARRAY_REFs which either start at the same address or
> > +   are completely disjoint.
> 
> So the ARRAY_REF array bases are at the same address, not the ARRAY_REF
> itself?

Yes, updated the comment.

> 
> > +   Return 1 if the refs are non-overlapping.
> > +   Return 0 if they are possibly overlapping but if so the overlap again
> > +   starts on the same address.
> > +   Return -1 otherwise.  */
> > +
> > +int
> > +nonoverlapping_array_refs_p (tree ref1, tree ref2)
> > +{
> > +  tree low_bound1 = array_ref_low_bound (ref1);
> > +  tree low_bound2 = array_ref_low_bound (ref2);
> > +  tree index1 = TREE_OPERAND (ref1, 1);
> > +  tree index2 = TREE_OPERAND (ref2, 1);
> > +
> > +  /* Handle zero offsets first: we do not need to match type size in this
> > + case.  */
> > +  if (operand_equal_p (index1, low_bound1, 0)
> > +  && operand_equal_p (index2, low_bound2, 0))
> > +return 0;
> > +
> > +  /* If type sizes are different, give up.  */
> > +  if (!operand_equal_p (array_ref_element_size (ref1),
> > +   array_ref_element_size (ref2), 0))
> > +return -1;
> 
> So both array_ref_low_bound and array_ref_element_size are quite 
> expensive.  I wonder if you should resort to operand_equal_p
> of TREE_OPERAND (ref1, 2) or the raw TYPE_MIN_VALUE of the
> domain here since you are not interested in the actual size
> or the actual minimum value.

I added the operand_equal_p checks. I think it will be OK for
-fstrict-aliasing. This is because we tend to only walk paths where
types are the same (others are ruled out by the alias checks).
For -fno-strict-aliasing we may want to handle at
least case everyting is constant but that can be done incrementally.
> > + /* We generally assume that both access paths starts by same sequence
> > +of refs.  However if number of array refs is not in sync, try
> > +to recover and pop elts until number match.  This helps the case
> > +where one access path starts by array and other by element.  */
> > + for (narray_refs1 = 0; narray_refs1 < component_refs1.length ();
> > +  narray_refs1++)
> > +   if (TREE_CODE (component_refs1 [component_refs1.length()
> > +   - 1 - narray_refs1]) != ARRAY_REF)
> > + break;
> > +
> > + for (narray_refs2 = 0; narray_refs2 < component_refs2.length ();
> > +  narray_refs2++)
> > +   if (TREE_CODE (component_refs2 [component_refs2.length()
> > +   - 1 - narray_refs2]) != ARRAY_REF)
> > + break;
> 
> so here we now have narray_refs1,2, the number of ARRAY_REFs at the
> end of the component_refs.  I wonder if this part can be easily

I think this is not completely easy to do since before we hit the
outermost component ref in the walk we do not know which block of array
refs is supposed to match which.

I am also thinking to reorg all the access path functions to work on
vectors summarizing the refs (so we do not need them twice for IPA
mod-ref) and then we do not want to insert such implementation details.

> done during component_refN construction - simply ++count
> if pushing an ARRAY_REF, count = 0 if not?  What about ARRAY_RANGE_REF?
> Will those invalidate any of the stuff and thus we need to handle
> them conservative in some way?

For now I cowardly ignore ARRAY_RANGE_REF.  We will get unmatched_ref
there and only synchronize on next component ref if present.
> 
> > + for (; narray_refs1 > narray_refs2; narray_refs1--)
> > +   {
> > + ref1 = component_refs1.pop ();
> > + if (!operand_equal_p (TREE_OPERAND (ref1, 1),
> > +   array_ref_low_bound (ref1), 0))
> > +   seen_unmatched_ref_p = true;
> > +   }
> > + for (; narray_refs2 > narray_refs1; narray_refs2--)
> > +   {
> > + ref2 = component_refs2.pop ();
> > + if (!operand_equal_p (TREE_OPERAND (ref2, 1),
> > +   array_ref_low_bound (ref2), 0))
> > +   seen_unmatched_ref_p = true;
> > +   }
> 
> here we're trying to make them equal by popping.  seen_unmached_ref_p
> is magic, you have to add comments.

Comment added.
> 
> > + /* Try to disambiguate matched arrays.  */
> > + for (unsigned int i = 0; i < narray_refs1; i++)
> > +   {
> > + int cmp = nonoverlapping_array_refs_p (component_refs1.pop (),
> > +component_refs2.pop ());
> > + if (cmp == 1 && !partial_overlap)
> > +   {
> > + ++alias_stats
> > +   .nonoverlapping_refs_since_match_p_no_alias;
> > + return 1;
> > +   }
> > + partial_overlap = false;
> > + if (cmp == -1)
> > +   seen_unmatched_ref_p = true;
> > +   }
> > +   }
> > +
> > +  /* Next look for component_refs.  */
> > +
> 
> So for 

[PATCH] Fix PR91267

2019-07-29 Thread Richard Biener


Bootstrapped and tested on x86_64-unknown-linux-gnu, applied.

Richard.

2019-07-29  Richard Biener  

PR tree-optimization/91267
* vr-values.c (vr_values::update_value_range): Add early return
for effectively VARYING lattice entry.

* gcc.dg/torture/pr91267.c: New testcase.

Index: gcc/vr-values.c
===
--- gcc/vr-values.c (revision 273792)
+++ gcc/vr-values.c (working copy)
@@ -202,8 +202,12 @@ vr_values::update_value_range (const_tre
new_vr->intersect ();
 }
 
-  /* Update the value range, if necessary.  */
+  /* Update the value range, if necessary.  If we cannot allocate a lattice
+ entry for VAR keep it at VARYING.  This happens when DOM feeds us stmts
+ with SSA names allocated after setting up the lattice.  */
   old_vr = get_lattice_entry (var);
+  if (!old_vr)
+return false;
   is_new = !old_vr->equal_p (*new_vr, /*ignore_equivs=*/false);
 
   if (is_new)
Index: gcc/testsuite/gcc.dg/torture/pr91267.c
===
--- gcc/testsuite/gcc.dg/torture/pr91267.c  (nonexistent)
+++ gcc/testsuite/gcc.dg/torture/pr91267.c  (working copy)
@@ -0,0 +1,21 @@
+/* { dg-do compile } */
+
+void bar (void);
+void baz (int);
+char *qux (void);
+int a, b;
+
+void
+foo (int f, char *d)
+{
+  char *e;
+  while (d)
+{
+  if (f)
+   if (e)
+ bar ();
+  baz (e - (d + a));
+  b = e - d;
+  d = qux ();
+}
+}


Re: Fix inchash handling of wide_ints (PR91242)

2019-07-29 Thread Richard Biener
On Mon, Jul 29, 2019 at 11:11 AM Richard Sandiford
 wrote:
>
> inchash::hash::add_wide_int operated directly on the raw encoding
> of the wide_int, including any redundant upper bits.  The problem
> with that is that the upper bits are only defined for some wide-int
> storage types (including wide_int itself).  wi::to_wide(tree) instead
> returns a value that is extended according to the signedness of the
> type (so that wi::to_widest can use the same encoding) while rtxes
> have the awkward special case of BI, which can be zero-extended
> rather than sign-extended.
>
> In the PR, we computed a hash for a "normal" sign-extended wide_int
> while the existing entries hashed wi::to_wide(tree).  This gives
> different results for unsigned types that have the top bit set.
>
> The patch fixes that by hashing the canonical sign-extended form even
> if the raw encoding happens to be different.
>
> Tested on aarch64-linux-gnu (with and without SVE), armeb-eabi
> and x86_64-linux-gnu.  OK to install?

But only the most significant elt needs this treatment, no?  So
we can save some compile-time in the hasing by not doing
it for all elts.

> Richard
>
>
> 2019-07-29  Richard Sandiford  
>
> gcc/
> * wide-int.h (generic_wide_int::sext_elt): New function.
> * inchash.h (hash::add_wide_int): Use it instead of elt.
>
> Index: gcc/wide-int.h
> ===
> --- gcc/wide-int.h  2019-07-10 19:41:26.391898059 +0100
> +++ gcc/wide-int.h  2019-07-29 10:08:12.048610030 +0100
> @@ -730,6 +730,7 @@ class GTY(()) generic_wide_int : public
>/* Public accessors for the interior of a wide int.  */
>HOST_WIDE_INT sign_mask () const;
>HOST_WIDE_INT elt (unsigned int) const;
> +  HOST_WIDE_INT sext_elt (unsigned int) const;
>unsigned HOST_WIDE_INT ulow () const;
>unsigned HOST_WIDE_INT uhigh () const;
>HOST_WIDE_INT slow () const;
> @@ -909,6 +910,23 @@ generic_wide_int ::elt (unsigne
>  return this->get_val ()[i];
>  }
>
> +/* Like elt, but sign-extend beyond the upper bit, instead of returning
> +   the raw encoding.  */
> +template 
> +inline HOST_WIDE_INT
> +generic_wide_int ::sext_elt (unsigned int i) const
> +{
> +  HOST_WIDE_INT elt_i = elt (i);
> +  if (!is_sign_extended)
> +{
> +  unsigned int precision = this->get_precision ();
> +  unsigned int lsb = i * HOST_BITS_PER_WIDE_INT;
> +  if (precision - lsb < HOST_BITS_PER_WIDE_INT)
> +   elt_i = sext_hwi (elt_i, precision - lsb);
> +}
> +  return elt_i;
> +}
> +
>  template 
>  template 
>  inline generic_wide_int  &
> Index: gcc/inchash.h
> ===
> --- gcc/inchash.h   2019-03-08 18:15:36.704740334 +
> +++ gcc/inchash.h   2019-07-29 10:08:12.048610030 +0100
> @@ -85,7 +85,7 @@ hashval_t iterative_hash_hashval_t (hash
>{
>  add_int (x.get_len ());
>  for (unsigned i = 0; i < x.get_len (); i++)
> -  add_hwi (x.elt (i));
> +  add_hwi (x.sext_elt (i));
>}
>
>/* Hash in pointer PTR.  */


Re: [PATCH] Remove also 2nd argument for unused delete operator (PR tree-optimization/91270).

2019-07-29 Thread Richard Biener
On Sun, Jul 28, 2019 at 4:57 PM Martin Liška  wrote:
>
> Hi.
>
> And there's one more patch that deals with delete operator
> which has a second argument (size). That definition GIMPLE
> statement of the size must be also properly marked.
>
> Patch can bootstrap on x86_64-linux-gnu and survives regression tests.
>
> Ready to be installed?

This isn't a proper fix.  You can't mark a random operand as necessary
during elimination - this only works in very constraint cases.  Here
it will break down if the stmt you just marked depends on another stmt
only used by the stmt you just marked (and thus also not necessary).

The fix is to propagate_necessity where you either have to excempt
the two-operator delete from handling or to mark its second operand
as necessary despite eventually deleting the call.  You can avoid
this in case the allocation function used the exact same size argument.

Richard.

> Thanks,
> Martin


Re: [PATCH 1/2] Come up with function_decl_type and use it in tree_function_decl.

2019-07-29 Thread Richard Biener
On Thu, Jul 25, 2019 at 5:27 PM Martin Liška  wrote:
>
> On 7/25/19 3:30 PM, Martin Liška wrote:
> > On 7/25/19 2:18 PM, Marc Glisse wrote:
> >> On Thu, 25 Jul 2019, Martin Liška wrote:
> >>
>  DCE has special code to avoid removing the LHS of malloc when it is 
>  unused. Is there something different about operator new that makes it 
>  not need the same handling?
> >>
> >> If you take gcc.dg/torture/pr51692.c and replace __builtin_calloc (1, 
> >> sizeof (double)) with new double(), we get an ICE with -O or higher...
> >>
> >
> > I can see, I'm working on that.
> >
> > Martin
> >
>
> There's a patch candidate I've been testing right now.
>
> Ready to be installed after tests?

OK.

Richard.

> Thanks,
> Martin


Fix inchash handling of wide_ints (PR91242)

2019-07-29 Thread Richard Sandiford
inchash::hash::add_wide_int operated directly on the raw encoding
of the wide_int, including any redundant upper bits.  The problem
with that is that the upper bits are only defined for some wide-int
storage types (including wide_int itself).  wi::to_wide(tree) instead
returns a value that is extended according to the signedness of the
type (so that wi::to_widest can use the same encoding) while rtxes
have the awkward special case of BI, which can be zero-extended
rather than sign-extended.

In the PR, we computed a hash for a "normal" sign-extended wide_int
while the existing entries hashed wi::to_wide(tree).  This gives
different results for unsigned types that have the top bit set.

The patch fixes that by hashing the canonical sign-extended form even
if the raw encoding happens to be different.

Tested on aarch64-linux-gnu (with and without SVE), armeb-eabi
and x86_64-linux-gnu.  OK to install?

Richard


2019-07-29  Richard Sandiford  

gcc/
* wide-int.h (generic_wide_int::sext_elt): New function.
* inchash.h (hash::add_wide_int): Use it instead of elt.

Index: gcc/wide-int.h
===
--- gcc/wide-int.h  2019-07-10 19:41:26.391898059 +0100
+++ gcc/wide-int.h  2019-07-29 10:08:12.048610030 +0100
@@ -730,6 +730,7 @@ class GTY(()) generic_wide_int : public
   /* Public accessors for the interior of a wide int.  */
   HOST_WIDE_INT sign_mask () const;
   HOST_WIDE_INT elt (unsigned int) const;
+  HOST_WIDE_INT sext_elt (unsigned int) const;
   unsigned HOST_WIDE_INT ulow () const;
   unsigned HOST_WIDE_INT uhigh () const;
   HOST_WIDE_INT slow () const;
@@ -909,6 +910,23 @@ generic_wide_int ::elt (unsigne
 return this->get_val ()[i];
 }
 
+/* Like elt, but sign-extend beyond the upper bit, instead of returning
+   the raw encoding.  */
+template 
+inline HOST_WIDE_INT
+generic_wide_int ::sext_elt (unsigned int i) const
+{
+  HOST_WIDE_INT elt_i = elt (i);
+  if (!is_sign_extended)
+{
+  unsigned int precision = this->get_precision ();
+  unsigned int lsb = i * HOST_BITS_PER_WIDE_INT;
+  if (precision - lsb < HOST_BITS_PER_WIDE_INT)
+   elt_i = sext_hwi (elt_i, precision - lsb);
+}
+  return elt_i;
+}
+
 template 
 template 
 inline generic_wide_int  &
Index: gcc/inchash.h
===
--- gcc/inchash.h   2019-03-08 18:15:36.704740334 +
+++ gcc/inchash.h   2019-07-29 10:08:12.048610030 +0100
@@ -85,7 +85,7 @@ hashval_t iterative_hash_hashval_t (hash
   {
 add_int (x.get_len ());
 for (unsigned i = 0; i < x.get_len (); i++)
-  add_hwi (x.elt (i));
+  add_hwi (x.sext_elt (i));
   }
 
   /* Hash in pointer PTR.  */


Re: Prevent tree-ssa-dce.c from deleting stores at -Og

2019-07-29 Thread Richard Sandiford
Jeff Law  writes:
> On 7/7/19 3:45 AM, Richard Sandiford wrote:
>> DCE tries to delete dead stores to local data and also tries to insert
>> debug binds for simple cases:
>> 
>>   /* If this is a store into a variable that is being optimized away,
>>  add a debug bind stmt if possible.  */
>>   if (MAY_HAVE_DEBUG_BIND_STMTS
>>   && gimple_assign_single_p (stmt)
>>   && is_gimple_val (gimple_assign_rhs1 (stmt)))
>> {
>>   tree lhs = gimple_assign_lhs (stmt);
>>   if ((VAR_P (lhs) || TREE_CODE (lhs) == PARM_DECL)
>>&& !DECL_IGNORED_P (lhs)
>>&& is_gimple_reg_type (TREE_TYPE (lhs))
>>&& !is_global_var (lhs)
>>&& !DECL_HAS_VALUE_EXPR_P (lhs))
>>  {
>>tree rhs = gimple_assign_rhs1 (stmt);
>>gdebug *note
>>  = gimple_build_debug_bind (lhs, unshare_expr (rhs), stmt);
>>gsi_insert_after (i, note, GSI_SAME_STMT);
>>  }
>> }
>> 
>> But this doesn't help for things like "print *ptr" when ptr points
>> to the local variable (tests Og-dce-1.c and Og-dce-2.c).  It also tends
>> to make the *live* -- and thus useful -- values optimised out, because
>> we can't yet switch back to tracking the memory location as it evolves
>> over time (test Og-dce-3.c).
>> 
>> So for -Og I think it'd be better not to delete any stmts with
>> vdefs for now.  This also means that we can avoid the potentially
>> expensive vop walks (which already have a cut-off, but still).
>> 
>> The patch also fixes the Og failures in gcc.dg/guality/pr54970.c
>> (PR 86638).
>> 
>> Tested on aarch64-linux-gnu and x86_64-linux-gnu.  OK to install?
>> 
>> Richard
>> 
>> 
>> 2019-07-07  Richard Sandiford  
>> 
>> gcc/
>>  PR debug/86638
>>  * tree-ssa-dce.c (keep_all_vdefs_p): New function.
>>  (mark_stmt_if_obviously_necessary): Mark all stmts with vdefs as
>>  necessary if keep_all_vdefs_p is true.
>>  (mark_aliased_reaching_defs_necessary): Add a gcc_checking_assert
>>  that keep_all_vdefs_p is false.
>>  (mark_all_reaching_defs_necessary): Likewise.
>>  (propagate_necessity): Skip the vuse scan if keep_all_vdefs_p is true.
>> 
>> gcc/testsuite/
>>  * c-c++-common/guality/Og-dce-1.c: New test.
>>  * c-c++-common/guality/Og-dce-2.c: Likewise.
>>  * c-c++-common/guality/Og-dce-3.c: Likewise.
> OK
> jeff

Thanks.  I realised later that Og-dce-3.c didn't test what I claimed it
was testing, so I committed the following version instead.  (I re-checked
that all the tests failed without the patch.)

Richard


2019-07-29  Richard Sandiford  

gcc/
PR debug/86638
* tree-ssa-dce.c (keep_all_vdefs_p): New function.
(mark_stmt_if_obviously_necessary): Mark all stmts with vdefs as
necessary if keep_all_vdefs_p is true.
(mark_aliased_reaching_defs_necessary): Add a gcc_checking_assert
that keep_all_vdefs_p is false.
(mark_all_reaching_defs_necessary): Likewise.
(propagate_necessity): Skip the vuse scan if keep_all_vdefs_p is true.

gcc/testsuite/
* c-c++-common/guality/Og-dce-1.c: New test.
* c-c++-common/guality/Og-dce-2.c: Likewise.
* c-c++-common/guality/Og-dce-3.c: Likewise.

Index: gcc/tree-ssa-dce.c
===
--- gcc/tree-ssa-dce.c  2019-07-29 09:39:50.030163053 +0100
+++ gcc/tree-ssa-dce.c  2019-07-29 09:47:56.498267828 +0100
@@ -115,6 +115,14 @@ #define STMT_NECESSARY GF_PLF_1
 static int *bb_postorder;
 
 
+/* True if we should treat any stmt with a vdef as necessary.  */
+
+static inline bool
+keep_all_vdefs_p ()
+{
+  return optimize_debug;
+}
+
 /* If STMT is not already marked necessary, mark it, and add it to the
worklist if ADD_TO_WORKLIST is true.  */
 
@@ -317,6 +325,12 @@ mark_stmt_if_obviously_necessary (gimple
   return;
 }
 
+  if (gimple_vdef (stmt) && keep_all_vdefs_p ())
+{
+  mark_stmt_necessary (stmt, true);
+  return;
+}
+
   return;
 }
 
@@ -532,6 +546,9 @@ mark_aliased_reaching_defs_necessary_1 (
 static void
 mark_aliased_reaching_defs_necessary (gimple *stmt, tree ref)
 {
+  /* Should have been caught before calling this function.  */
+  gcc_checking_assert (!keep_all_vdefs_p ());
+
   unsigned int chain;
   ao_ref refd;
   gcc_assert (!chain_ovfl);
@@ -610,6 +627,8 @@ mark_all_reaching_defs_necessary_1 (ao_r
 static void
 mark_all_reaching_defs_necessary (gimple *stmt)
 {
+  /* Should have been caught before calling this function.  */
+  gcc_checking_assert (!keep_all_vdefs_p ());
   walk_aliased_vdefs (NULL, gimple_vuse (stmt),
  mark_all_reaching_defs_necessary_1, NULL, );
 }
@@ -813,6 +832,10 @@ propagate_necessity (bool aggressive)
  if (!use)
continue;
 
+ /* No need to search for vdefs if we intrinsicly keep them all.  */
+ if (keep_all_vdefs_p ())
+   continue;
+
  /* If we dropped to simple mode make all immediately
 reachable 

Avoid ICE compiling cactusBSSN

2019-07-29 Thread Jan Hubicka
Hi,
this patch fixes ICE when we output ODR violation warning compiling
cactusBSSN. As indicated in PR, i think the warning is wrong and we need
to teach C++ FE to not consider the type in question as anonymous
namespace.

Regtested x86_64-linux. Comitted as obvious.

PR lto/91222
* ipa-devirt.c (warn_types_mismatch): Compare indentifiers
than INDENTIFIER_POINTER.
Index: ipa-devirt.c
===
--- ipa-devirt.c(revision 273865)
+++ ipa-devirt.c(working copy)
@@ -1003,7 +1003,7 @@ warn_types_mismatch (tree t1, tree t2, l
n2 = DECL_NAME (n2);
   /* Most of the time, the type names will match, do not be unnecesarily
  verbose.  */
-  if (IDENTIFIER_POINTER (n1) != IDENTIFIER_POINTER (n2))
+  if (n1 != n2)
 inform (loc_t1,
"type %qT defined in anonymous namespace cannot match "
"type %qT across the translation unit boundary",