[sched] Dump dependency graph to a dot file

2015-11-05 Thread Nikolai Bozhenov

Hi!

The attached patch adds a procedure to dump the scheduler's dependency
graph into a dot file. The patch has been bootstrapped and regtested
on x86_64. Please commit if it is OK for trunk.

Thanks,
Nikolai
2015-11-04  Nikolai Bozhenov  

	* sched-int.h (dump_rgn_dependencies_dot): Declare
	* sched-rgn.c (dump_rgn_dependencies_dot): New function
	* print-rtl.h (print_insn): Add prototype

diff --git a/gcc/print-rtl.h b/gcc/print-rtl.h
index eb079af..f601d33 100644
--- a/gcc/print-rtl.h
+++ b/gcc/print-rtl.h
@@ -25,12 +25,14 @@ extern void print_rtl (FILE *, const_rtx);
 #endif
 
 extern void dump_value_slim (FILE *, const_rtx, int);
 extern void dump_insn_slim (FILE *, const rtx_insn *);
 extern void dump_rtl_slim (FILE *, const rtx_insn *, const rtx_insn *,
 			   int, int);
 extern void print_value (pretty_printer *, const_rtx, int);
 extern void print_pattern (pretty_printer *, const_rtx, int);
+extern void print_insn (pretty_printer *pp, const rtx_insn *x, int verbose);
+
 extern void rtl_dump_bb_for_graph (pretty_printer *, basic_block);
 extern const char *str_pattern_slim (const_rtx);
 
 #endif  // GCC_PRINT_RTL_H
diff --git a/gcc/sched-int.h b/gcc/sched-int.h
index 86f5821..4600347 100644
--- a/gcc/sched-int.h
+++ b/gcc/sched-int.h
@@ -1492,16 +1492,19 @@ extern void sched_rgn_local_finish (void);
 extern void sched_rgn_local_free (void);
 extern void extend_regions (void);
 extern void rgn_make_new_region_out_of_new_block (basic_block);
 
 extern void compute_priorities (void);
 extern void increase_insn_priority (rtx_insn *, int);
 extern void debug_rgn_dependencies (int);
 extern void debug_dependencies (rtx_insn *, rtx_insn *);
+extern void dump_rgn_dependencies_dot (FILE *);
+extern void dump_rgn_dependencies_dot (const char *);
+
 extern void free_rgn_deps (void);
 extern int contributes_to_priority (rtx_insn *, rtx_insn *);
 extern void extend_rgns (int *, int *, sbitmap, int *);
 extern void deps_join (struct deps_desc *, struct deps_desc *);
 
 extern void rgn_setup_common_sched_info (void);
 extern void rgn_setup_sched_infos (void);
 
diff --git a/gcc/sched-rgn.c b/gcc/sched-rgn.c
index eafb3fd..d744d83 100644
--- a/gcc/sched-rgn.c
+++ b/gcc/sched-rgn.c
@@ -58,16 +58,18 @@ along with GCC; see the file COPYING3.  If not see
 #include "insn-attr.h"
 #include "except.h"
 #include "params.h"
 #include "cfganal.h"
 #include "sched-int.h"
 #include "sel-sched.h"
 #include "tree-pass.h"
 #include "dbgcnt.h"
+#include "pretty-print.h"
+#include "print-rtl.h"
 
 #ifdef INSN_SCHEDULING
 
 /* Some accessor macros for h_i_d members only used within this file.  */
 #define FED_BY_SPEC_LOAD(INSN) (HID (INSN)->fed_by_spec_load)
 #define IS_LOAD_INSN(INSN) (HID (insn)->is_load_insn)
 
 /* nr_inter/spec counts interblock/speculative motion for the function.  */
@@ -2855,16 +2857,118 @@ void debug_dependencies (rtx_insn *head, rtx_insn *tail)
 		   DEP_NONREG (dep) ? "n" : "",
 		   DEP_MULTIPLE (dep) ? "m" : "");
   }
   fprintf (sched_dump, "\n");
 }
 
   fprintf (sched_dump, "\n");
 }
+
+/* Dump dependency graph for the current region to a file using dot syntax. */
+
+void
+dump_rgn_dependencies_dot (FILE *file)
+{
+  rtx_insn *head, *tail, *con, *pro;
+  sd_iterator_def sd_it;
+  dep_t dep;
+  int bb;
+  pretty_printer pp;
+
+  pp.buffer->stream = file;
+  pp_printf (, "digraph SchedDG {\n");
+
+  for (bb = 0; bb < current_nr_blocks; ++bb)
+{
+  // begin subgraph (basic block)
+  pp_printf (, "subgraph cluster_block_%d {\n", bb);
+  pp_printf (, "\t" "color=blue;" "\n");
+  pp_printf (, "\t" "style=bold;" "\n");
+  pp_printf (, "\t" "label=\"BB #%d\";\n", BB_TO_BLOCK (bb));
+
+  // setup head and tail (no support for EBBs)
+  gcc_assert (EBB_FIRST_BB (bb) == EBB_LAST_BB (bb));
+  get_ebb_head_tail (EBB_FIRST_BB (bb), EBB_LAST_BB (bb), , );
+  tail = NEXT_INSN (tail);
+
+  // dump all insns
+  for (con = head; con != tail; con = NEXT_INSN (con))
+	{
+	  if (!INSN_P (con))
+	continue;
+
+	  // pretty print the insn
+	  pp_printf (, "\t%d [label=\"{", INSN_UID (con));
+	  pp_write_text_to_stream ();
+	  print_insn (, con, /*verbose=*/false);
+	  pp_write_text_as_dot_label_to_stream (, /*for_record=*/true);
+	  pp_write_text_to_stream ();
+
+	  // dump instruction attributes
+	  pp_printf (, "|{ uid:%d | luid:%d | prio:%d }}\",shape=record]\n",
+		 INSN_UID (con), INSN_LUID (con), INSN_PRIORITY (con));
+
+	  /* dump all deps */
+	  FOR_EACH_DEP (con, SD_LIST_BACK, sd_it, dep)
+	{
+	  int weight = 0;
+	  const char *color;
+	  pro = DEP_PRO (dep);
+
+	  switch (DEP_TYPE (dep))
+		{
+		case REG_DEP_TRUE:
+		  color = "black";
+		  weight = 1;
+		  break;
+		case REG_DEP_OUTPUT:
+		case REG_DEP_ANTI:
+		  color = "orange";
+		  break;
+		case REG_DEP_CONTROL:
+		  color = "blue";
+		  break;
+		default:
+		  gcc_unreachable ();
+		}
+
+	  pp_printf (, "\t%d -> %d [color=%s",
+			 INSN_UID 

[PATCH] PR54224 Warn for unused internal procedures -- update testcase

2015-11-05 Thread Dominique d'Humières
Is there any objection to commit the following (see comments 21 and 22)?

Dominique

Index: gcc/testsuite/ChangeLog
===
--- gcc/testsuite/ChangeLog (revision 229793)
+++ gcc/testsuite/ChangeLog (working copy)
@@ -1,3 +1,9 @@
+2015-11-05  Dominique d'Humieres 
+
+   PR fortran/54224
+   * gfortran.dg/warn_unused_function_2.f90: Add two new 
+   "defined but not used" subroutines.
+
 2015-11-05  Richard Biener  
 
* gcc.dg/tree-ssa/loadpre2.c: Avoid undefined behavior due to
Index: gcc/testsuite/gfortran.dg/warn_unused_function_2.f90
===
--- gcc/testsuite/gfortran.dg/warn_unused_function_2.f90(revision 
229793)
+++ gcc/testsuite/gfortran.dg/warn_unused_function_2.f90(working copy)
@@ -2,6 +2,7 @@
 ! { dg-options "-Wall" }
 !
 ! [4.8 Regression] PR 54997: -Wunused-function gives false warnings
+! PR 54224: missing warnings with -Wunused-function
 !
 ! Contributed by Janus Weil 
 
@@ -14,6 +15,9 @@
 
   subroutine s1! { dg-warning "defined but not used" }
 call s2(s3)
+contains
+  subroutine s4! { dg-warning "defined but not used" }
+  end subroutine
   end subroutine
 
   subroutine s2(dummy) ! { dg-warning "Unused dummy argument" }
@@ -30,5 +34,10 @@
 entry en
 end subroutine
 
+program test
+contains
+  subroutine s5! { dg-warning "defined but not used" }
+  end subroutine
+end
 
 ! { dg-final { cleanup-modules "m" } }



Re: [sched] Dump dependency graph to a dot file

2015-11-05 Thread Kyrill Tkachov

Hi Nikolai,

On 05/11/15 08:29, Nikolai Bozhenov wrote:

Hi!

The attached patch adds a procedure to dump the scheduler's dependency
graph into a dot file. The patch has been bootstrapped and regtested
on x86_64. Please commit if it is OK for trunk.

Thanks,
Nikolai


A couple of style nits.

+  // begin subgraph (basic block)
+  pp_printf (, "subgraph cluster_block_%d {\n", bb);
+  pp_printf (, "\t" "color=blue;" "\n");
+  pp_printf (, "\t" "style=bold;" "\n");
+  pp_printf (, "\t" "label=\"BB #%d\";\n", BB_TO_BLOCK (bb));
+
+  // setup head and tail (no support for EBBs)
+  gcc_assert (EBB_FIRST_BB (bb) == EBB_LAST_BB (bb));
+  get_ebb_head_tail (EBB_FIRST_BB (bb), EBB_LAST_BB (bb), , );
+  tail = NEXT_INSN (tail);
+
+  // dump all insns
+  for (con = head; con != tail; con = NEXT_INSN (con))
+   {
+ if (!INSN_P (con))
+   continue;
+
+ // pretty print the insn
+ pp_printf (, "\t%d [label=\"{", INSN_UID (con));

Please use C-style comments i.e. /**/ instead of //.
Also, throughout the comments leave two empty spaces after a full stop
i.e. /* .  */.

You can use the check_GNU_style.sh script in the contrib/ directory on
your patches to highlight similar issues. For example:
$ ./contrib/check_GNU_style.sh ~/patches/dep-graph.patch

Dot, space, space, end of comment.
83:+/* Dump dependency graph for the current region to a file using dot syntax. 
*/
166:+/* Dump dependency graph for the current region to a file using dot 
syntax. */

Sentences should end with a dot.  Dot, space, space, end of the comment.
127:+  /* dump all deps */

Cheers,
Kyrill



[ping] Change behavior of -fsched-verbose option

2015-11-05 Thread Nikolai Bozhenov

Please commit if it is OK for trunk.

Thanks,
Nikolai

On 10/22/2015 06:38 PM, Nikolai Bozhenov wrote:

Hi!

Currently -fsched-verbose option redirects debugging dumps to stderr
if there is no dump_file for the current pass. It would be fine if
there were the only scheduling pass. But for example for AArch64
there are 3 scheduling passes in the default pipeline: sched1,
fusion and sched2. So, when passing options like
-fsched-verbose=7 -fdump-rtl-sched1 to GCC I get a neat dump for
sched1 in the file and a mess of fusion/sched2 logs in the console.

In fact, currently there's no way to tell GCC that I want extremely
verbose logs for sched1 and I want no logs for all other passes.
Especially to the console.

I suggest disabling such redirection in the scheduler and omitting
debugging output for passes without dump_file. I believe a better
way to redirect printing to the stderr is to use options like
-fdump-rtl-sched=stderr. The attached patch implements the
suggestion.

Anyway, the old behavior may be reproduced with options
-fsched-verbose=7 -fdump-rtl-sched1 
-fdump-rtl-{sched_fusion,sched2}=stderr

if it is really necessary.

The patch has been bootstrapped and regtested on x86_64.

Thanks,
Nikolai


2015-10-22  Nikolai Bozhenov  

	* haifa-sched.c (setup_sched_dump): Don't redirect output to stderr.
	* common.opt (-fsched-verbose): Set default value to 1.
	* invoke.texi (-fsched-verbose): Update the option's description.

diff --git a/gcc/common.opt b/gcc/common.opt
index 961a1b6..757ce85 100644
--- a/gcc/common.opt
+++ b/gcc/common.opt
@@ -1967,17 +1967,17 @@ fsched-spec-load
 Common Report Var(flag_schedule_speculative_load) Optimization
 Allow speculative motion of some loads.
 
 fsched-spec-load-dangerous
 Common Report Var(flag_schedule_speculative_load_dangerous) Optimization
 Allow speculative motion of more loads.
 
 fsched-verbose=
-Common RejectNegative Joined UInteger Var(sched_verbose_param)
+Common RejectNegative Joined UInteger Var(sched_verbose_param) Init(1)
 -fsched-verbose=	Set the verbosity level of the scheduler.
 
 fsched2-use-superblocks
 Common Report Var(flag_sched2_use_superblocks) Optimization
 If scheduling post reload, do superblock scheduling.
 
 fsched2-use-traces
 Common Ignore
diff --git a/gcc/doc/invoke.texi b/gcc/doc/invoke.texi
index 4fc7d88..11b8697 100644
--- a/gcc/doc/invoke.texi
+++ b/gcc/doc/invoke.texi
@@ -7415,22 +7415,17 @@ place unique stamps in coverage data files and the object files that
 produce them.  You can use the @option{-frandom-seed} option to produce
 reproducibly identical object files.
 
 The @var{number} should be different for every file you compile.
 
 @item -fsched-verbose=@var{n}
 @opindex fsched-verbose
 On targets that use instruction scheduling, this option controls the
-amount of debugging output the scheduler prints.  This information is
-written to standard error, unless @option{-fdump-rtl-sched1} or
-@option{-fdump-rtl-sched2} is specified, in which case it is output
-to the usual dump listing file, @file{.sched1} or @file{.sched2}
-respectively.  However for @var{n} greater than nine, the output is
-always printed to standard error.
+amount of debugging output the scheduler prints to the dump files.
 
 For @var{n} greater than zero, @option{-fsched-verbose} outputs the
 same information as @option{-fdump-rtl-sched1} and @option{-fdump-rtl-sched2}.
 For @var{n} greater than one, it also output basic block probabilities,
 detailed ready list information and unit/insn info.  For @var{n} greater
 than two, it includes RTL at abort point, control-flow and regions info.
 And for @var{n} over four, @option{-fsched-verbose} also includes
 dependence info.
diff --git a/gcc/haifa-sched.c b/gcc/haifa-sched.c
index caadc11..835648b 100644
--- a/gcc/haifa-sched.c
+++ b/gcc/haifa-sched.c
@@ -198,27 +198,24 @@ static int modulo_iter0_max_uid;
 static int modulo_backtracks_left;
 
 /* The stage in which the last insn from the original loop was
scheduled.  */
 static int modulo_last_stage;
 
 /* sched-verbose controls the amount of debugging output the
scheduler prints.  It is controlled by -fsched-verbose=N:
-   N>0 and no -DSR : the output is directed to stderr.
-   N>=10 will direct the printouts to stderr (regardless of -dSR).
-   N=1: same as -dSR.
+   N=0: no debugging output.
+   N=1: default value.
N=2: bb's probabilities, detailed ready list info, unit/insn info.
N=3: rtl at abort point, control-flow, regions info.
N=5: dependences info.  */
-
 int sched_verbose = 0;
 
-/* Debugging file.  All printouts are sent to dump, which is always set,
-   either to stderr, or to the dump listing file (-dRS).  */
+/* Debugging file.  All printouts are sent to dump. */
 FILE *sched_dump = 0;
 
 /* This is a placeholder for the scheduler parameters common
to all schedulers.  */
 struct common_sched_info_def *common_sched_info;
 
 #define INSN_TICK(INSN)	(HID (INSN)->tick)
 #define INSN_EXACT_TICK(INSN) 

Re: [PATCH 1/4][AArch64] Add scheduling and cost models for Exynos M1

2015-11-05 Thread James Greenhalgh
On Wed, Nov 04, 2015 at 05:21:03PM -0600, Evandro Menezes wrote:
> Please, ignore the previous patch.  This is the intended patch.
> 
> Sorry.
> 
> -- 
> Evandro Menezes
> 
> On 11/04/2015 05:18 PM, Evandro Menezes wrote:
> >This patch adds extra tuning information about AArch64 targets:
> >
> > * Maximum number of case values before resorting to a jump table
> >   The default values assumed independently of the specific backends
> >   may be rather low for modern processors, which sport quite efficient
> >   direct branch prediction, whereas indirect branch prediction is
> >   still typically not so efficient.  This value may be specifically
> >   set for a processor or left at zero to use the default values.
> > * L1 cache line size
> >   The auto-prefetcher uses this information when emitting software
> >   prefetch insns.
> >
> >Please, commit if it's alright.
> >
> >Thank you,
> >
> 

Thanks for the patch,

> diff --git a/gcc/config/aarch64/aarch64-protos.h 
> b/gcc/config/aarch64/aarch64-protos.h
> index 81792bc..ecf4685 100644
> --- a/gcc/config/aarch64/aarch64-protos.h
> +++ b/gcc/config/aarch64/aarch64-protos.h
> @@ -195,6 +195,9 @@ struct tune_params
>int vec_reassoc_width;
>int min_div_recip_mul_sf;
>int min_div_recip_mul_df;
> +  int max_case_values; /* Case values threshold; or 0 for the default.  */

If we're using an int, how about -1 as the sentinel value? (Maybe someone
really likes jump tables!). Otherwise, make this an unsigned int?

> +
> +  int cache_line_size; /* Cache line size; or 0 for the default.  */

unsigned int?

The patch is otherwise OK, though it needs a ChangeLog.

Thanks,
James



[PATCH][ARM][cleanup] Remove uses of CONST_DOUBLE_HIGH/LOW

2015-11-05 Thread Kyrill Tkachov

Hi all,

This cleanup patch removes handling of CONST_DOUBLE rtxes that carry large 
integers.
These should never be passed down from the midend and the arm backend doesn't 
create them.
The code has been there since 2007 but the arm backend was moved to 
TARGET_SUPPORTS_WIDE_INT
in 2014, so this path should never be taken.

Bootstrapped and tested on arm-none-linux-gnueabihf.

Ok for trunk?

Thanks,
Kyrill

commit 8a1c27b21e6662c3203b95c457c0df5c0239805e
Author: Kyrylo Tkachov 
Date:   Thu Oct 29 11:20:43 2015 +

[ARM][cleanup] Remove uses of CONST_DOUBLE_HIGH/LOW

diff --git a/gcc/config/arm/arm.c b/gcc/config/arm/arm.c
index 5c97786..36958af 100644
--- a/gcc/config/arm/arm.c
+++ b/gcc/config/arm/arm.c
@@ -12386,32 +12386,15 @@ neon_valid_immediate (rtx op, machine_mode mode, int inverse,
 {
   rtx el = vector ? CONST_VECTOR_ELT (op, i) : op;
   unsigned HOST_WIDE_INT elpart;
-  unsigned int part, parts;
 
-  if (CONST_INT_P (el))
-{
-  elpart = INTVAL (el);
-  parts = 1;
-}
-  else if (CONST_DOUBLE_P (el))
-{
-  elpart = CONST_DOUBLE_LOW (el);
-  parts = 2;
-}
-  else
-gcc_unreachable ();
+  gcc_assert (CONST_INT_P (el));
+  elpart = INTVAL (el);
 
-  for (part = 0; part < parts; part++)
-{
-  unsigned int byte;
-  for (byte = 0; byte < innersize; byte++)
-{
-  bytes[idx++] = (elpart & 0xff) ^ invmask;
-  elpart >>= BITS_PER_UNIT;
-}
-  if (CONST_DOUBLE_P (el))
-elpart = CONST_DOUBLE_HIGH (el);
-}
+  for (unsigned int byte = 0; byte < innersize; byte++)
+	{
+	  bytes[idx++] = (elpart & 0xff) ^ invmask;
+	  elpart >>= BITS_PER_UNIT;
+	}
 }
 
   /* Sanity check.  */


Re: [PATCH v3 1/2] [PR debug/67192] Fix C loops' back-jump location

2015-11-05 Thread Bernd Schmidt

On 11/04/2015 05:17 PM, Andreas Arnez wrote:


gcc/c/ChangeLog:

PR debug/67192
* c-parser.c (c_parser_while_statement): Finish the loop before
parsing ahead for misleading indentation.
(c_parser_for_statement): Likewise.


This is OK. Does C++ have similar issues?


Bernd



Re: [ping] Change behavior of -fsched-verbose option

2015-11-05 Thread Bernd Schmidt

On 11/05/2015 09:11 AM, Nikolai Bozhenov wrote:

Please commit if it is OK for trunk.


Hmm, do you have a copyright assignment on file?


Bernd



Re: [Patch AArch64] Switch constant pools to separate rodata sections.

2015-11-05 Thread James Greenhalgh
On Wed, Nov 04, 2015 at 02:26:31PM +, Ramana Radhakrishnan wrote:
> 
> 
> On 03/11/15 17:09, James Greenhalgh wrote:
> > On Tue, Nov 03, 2015 at 04:36:45PM +, Ramana Radhakrishnan wrote:
> >> Hi,
> >>
> >>Now that PR63304 is fixed and we have an option to address
> >> any part of the memory using adrp / add or adrp / ldr instructions
> >> it makes sense to switch out literal pools into their own
> >> mergeable sections by default.
> >>
> >> This would mean that by default we could now start getting
> >> the benefits of constant sharing across the board, potentially
> >> improving code size. The other advantage of doing so, for the
> >> security conscious is that this prevents intermingling of literal
> >> pools with code.
> >>
> >> Wilco's kindly done some performance measurements and suggests that
> >> there is not really a performance regression in doing this.
> >> I've looked at the code size for SPEC2k6 today at -Ofast and
> >> in general there is a good code size improvement as expected
> >> by sharing said constants.
> >>
> >> Tested on aarch64-none-elf with no regressions and bootstrapped 
> >> and regression tested in my tree for a number of days now.
> >>
> >> Ok to commit ?
> > 
> > OK with the comment nits below fixed up.
> > 
> >>* config/aarch64/aarch64.c (aarch64_select_rtx_section): Switch
> >> to default section handling for non PC relative literal loads.
> > 
> >> diff --git a/gcc/config/aarch64/aarch64.c b/gcc/config/aarch64/aarch64.c
> >> index 5c8604f..9d709e5 100644
> >> --- a/gcc/config/aarch64/aarch64.c
> >> +++ b/gcc/config/aarch64/aarch64.c
> >> @@ -5244,13 +5244,22 @@ aarch64_use_blocks_for_constant_p (machine_mode 
> >> mode ATTRIBUTE_UNUSED,
> >>return false;
> >>  }
> >>  
> >> +/* Force all constant pool entries into the current function section.
> > 
> > Is this comment accurate now? I think it only applied to -mcmodel=large
> > but maybe I'm misunderstanding?
> > 
> >> +   In the large model we cannot reliably address all the address space
> >> +   thus for now, inline this with the text.  */
> >>  static section *
> >> +aarch64_select_rtx_section (machine_mode mode,
> >> +  rtx x,
> >> +  unsigned HOST_WIDE_INT align)
> >> +{
> >> +  /* Force all constant pool entries into the current function section.
> >> + In the large model we cannot reliably address all the address space
> >> + thus for now, inline this with the text.  */
> >> +  if (!aarch64_nopcrelative_literal_loads
> >> +  || aarch64_cmodel == AARCH64_CMODEL_LARGE)
> >> +return function_section (current_function_decl);
> > 
> > This is just a copy paste of the text above (and probably the better place
> > for it).
> > 
> > I think we'd want a more general comment at the top of the function,
> > then this can stay.
> > 
> 
> True and I've just been reading more of the backend - We could now start
> using blocks for constant pools as well. So let's do that.
> 
> How does something like this look ?

One tiny nit. Otherwise, this is OK.


> Tested on aarch64-none-elf - no regressions.
> 
> 2015-11-04  Ramana Radhakrishnan  
> 
> * config/aarch64/aarch64.c
> (aarch64_can_use_per_function_literal_pools_p): New.
> (aarch64_use_blocks_for_constant_p): Adjust declaration
> and use aarch64_can_use_function_literal_pools_p.
> (aarch64_select_rtx_section): Update.

> diff --git a/gcc/config/aarch64/aarch64.c b/gcc/config/aarch64/aarch64.c
> index 5c8604f..4f5e069 100644
> --- a/gcc/config/aarch64/aarch64.c
> +++ b/gcc/config/aarch64/aarch64.c
> @@ -5235,24 +5235,37 @@ aarch64_uxt_size (int shift, HOST_WIDE_INT mask)
>return 0;
>  }
>  
> +/* Constant pools are per function only when PC relative
> +   literal loads are true or we are in the large memory
> +   model.  */

Newline here please.

Thanks,
James

> +static inline bool
> +aarch64_can_use_per_function_literal_pools_p (void)
> +{
> +  return (!aarch64_nopcrelative_literal_loads
> +   || aarch64_cmodel == AARCH64_CMODEL_LARGE);
> +}
> +



[Fortran, patch, pr68218, v1] ALLOCATE with size given by a module function

2015-11-05 Thread Andre Vehreschild
Hi all,

attached is a rather trivial patch to prevent multiple evaluations of a
function in:

  allocate( array(func()) )

The patch tests whether the upper bound of the array is a function
and calls gfc_evaluate_now().

Bootstrapped and regtested for x86_64-linux-gnu/f21.

Ok for trunk?

Regards,
Andre
-- 
Andre Vehreschild * Email: vehre ad gmx dot de 


pr68218_1.clog
Description: Binary data
diff --git a/gcc/fortran/trans-array.c b/gcc/fortran/trans-array.c
index 6bbf8cc..e28a5ce 100644
--- a/gcc/fortran/trans-array.c
+++ b/gcc/fortran/trans-array.c
@@ -5150,6 +5150,8 @@ gfc_array_init_size (tree descriptor, int rank, int corank, tree * poffset,
 	  gcc_assert (ubound);
 	  gfc_conv_expr_type (, ubound, gfc_array_index_type);
 	  gfc_add_block_to_block (pblock, );
+	  if (ubound->expr_type == EXPR_FUNCTION)
+	se.expr = gfc_evaluate_now (se.expr, pblock);
 	}
   gfc_conv_descriptor_ubound_set (descriptor_block, descriptor,
   gfc_rank_cst[n], se.expr);
diff --git a/gcc/testsuite/gfortran.dg/allocate_with_arrayspec_1.f90 b/gcc/testsuite/gfortran.dg/allocate_with_arrayspec_1.f90
new file mode 100644
index 000..686b612
--- /dev/null
+++ b/gcc/testsuite/gfortran.dg/allocate_with_arrayspec_1.f90
@@ -0,0 +1,29 @@
+! { dg-do run }
+! { dg-options "-fdump-tree-original" }
+
+MODULE mo_test
+
+  integer :: n = 0
+CONTAINS
+
+  FUNCTION nquery()
+INTEGER :: nquery
+WRITE (0,*) "hello!"
+n = n + 1
+nquery = n
+  END FUNCTION nquery
+
+END MODULE mo_test
+
+
+! --
+! MAIN PROGRAM
+! --
+PROGRAM example
+   USE mo_test
+   INTEGER, ALLOCATABLE :: query_buf(:)
+   ALLOCATE(query_buf(nquery()))
+   if (n /= 1 .or. size(query_buf) /= n) call abort()
+END PROGRAM example
+
+! { dg-final { scan-tree-dump-times "nquery" 5 "original" } }


[PATCH, ARM] PR target/68059 libgcc should not use __write for printing fatal error

2015-11-05 Thread Szabolcs Nagy

libgcc/config/arm/linux-atomic-64bit.c uses __write to print an error
message if the 64bit xchg method is not available in the kernel.

__write is not part of the public libc abi.  Since this code is only
run on linux the write syscall can be invoked directly.

And __builtin_trap is a simpler way to crash than abort.

The new behaviour on a linux kernel before v3.1:

# ./a.out
A newer kernel is required to run this binary. (__kernel_cmpxchg64 helper)
Illegal instruction

OK for trunk and backporting?

libgcc/ChangeLog:

2015-11-05  Szabolcs Nagy  

PR target/68059
* config/arm/linux-atomic-64bit.c (__write): Remove declaration.
(abort): Likewise.
(linux_write): Define.
diff --git a/libgcc/config/arm/linux-atomic-64bit.c b/libgcc/config/arm/linux-atomic-64bit.c
index cdf713c..aba3334 100644
--- a/libgcc/config/arm/linux-atomic-64bit.c
+++ b/libgcc/config/arm/linux-atomic-64bit.c
@@ -33,9 +33,6 @@ see the files COPYING3 and COPYING.RUNTIME respectively.  If not, see
kernels; we check for that in an init section and bail out rather
unceremoneously.  */
 
-extern unsigned int __write (int fd, const void *buf, unsigned int count);
-extern void abort (void);
-
 /* Kernel helper for compare-and-exchange.  */
 typedef int (__kernel_cmpxchg64_t) (const long long* oldval,
 	const long long* newval,
@@ -45,6 +42,19 @@ typedef int (__kernel_cmpxchg64_t) (const long long* oldval,
 /* Kernel helper page version number.  */
 #define __kernel_helper_version (*(unsigned int *)0x0ffc)
 
+static void
+linux_write (int fd, const void *buf, unsigned int count)
+{
+	register long r7 asm ("r7") = 4; /* Linux __NR_write.  */
+	register long r0 asm ("r0") = fd;
+	register long r1 asm ("r1") = (long)buf;
+	register long r2 asm ("r2") = count;
+	asm volatile ("svc 0"
+		:
+		: "r" (r7), "r" (r0), "r" (r1), "r" (r2)
+		: "memory");
+}
+
 /* Check that the kernel has a new enough version at load.  */
 static void __check_for_sync8_kernelhelper (void)
 {
@@ -53,11 +63,9 @@ static void __check_for_sync8_kernelhelper (void)
   const char err[] = "A newer kernel is required to run this binary. "
 "(__kernel_cmpxchg64 helper)\n";
   /* At this point we need a way to crash with some information
-	 for the user - I'm not sure I can rely on much else being
-	 available at this point, so do the same as generic-morestack.c
-	 write () and abort ().  */
-  __write (2 /* stderr.  */, err, sizeof (err));
-  abort ();
+	 for the user.  */
+  linux_write (2 /* stderr.  */, err, sizeof (err));
+  __builtin_trap ();
 }
 };
 


Re: [ping] Fix PR debug/66728

2015-11-05 Thread Richard Biener
On Thu, Nov 5, 2015 at 12:45 AM, Mike Stump  wrote:
>
> On Nov 4, 2015, at 12:50 PM, Richard Sandiford  
> wrote:
>
>> Mike Stump  writes:
>>> Index: dwarf2out.c
>>> ===
>>> --- dwarf2out.c  (revision 229720)
>>> +++ dwarf2out.c  (working copy)
>>> @@ -15593,8 +15593,13 @@
>>>   return true;
>>>
>>> case CONST_WIDE_INT:
>>> -  add_AT_wide (die, DW_AT_const_value,
>>> -   std::make_pair (rtl, GET_MODE (rtl)));
>>> +  {
>>> +wide_int w1 = std::make_pair (rtl, MAX_MODE_INT);
>>> +int prec = MIN (wi::min_precision (w1, UNSIGNED),
>>> +(unsigned int)CONST_WIDE_INT_NUNITS (rtl) * 
>>> HOST_BITS_PER_WIDE_INT);
>>> +wide_int w = wide_int::from (w1, prec, UNSIGNED);
>>> +add_AT_wide (die, DW_AT_const_value, w);
>>> +  }
>>>   return true;
>>>
>>> case CONST_DOUBLE:
>>
>> Setting the precision based on CONST_WIDE_INT_NUNITS means that
>> we might end up with two different precisions for two values of
>> the same variable.  E.g. for a 192-bit type, 1<<64 would be given
>> a precision of 128 (because it needs two HWIs to store) but 1<<128
>> would be given a precision of 192 (because it needs three HWIs to store).
>> We could then abort when comparing them for equality, since equality
>> needs both integers to have the same precision.  E.g. from same_dw_val_p:
>>
>>case dw_val_class_wide_int:
>>  return *v1->v.val_wide == *v2->v.val_wide;
>
> Yeah, seems like we should have a v1.prec == v2.prec && on that.  The bad 
> news, there are four of them that are like this.  The good news, 3 of them 
> are location operands, and I don’t think they can hit for a very long time.  
> I think this is an oversight from the double_int version of the code where we 
> just check the 128 bits for equality.  We can see if Richard wants to weigh 
> in.  I think I’d just pre-approve the change, though, I think a helper to 
> perform mixed equality testing would be the way to go as there are 4 of them, 
> and I pretty sure they should all use the mixed version.  Though, maybe the 
> location list versions are never mixed.  If they aren’t, then there is only 1 
> client, so, I’d just do the precision test inline.  Anyone able to comment on 
> the location list aspect of this?

No idea on location lists but maybe this means we should just use the
maximum supported integer mode for CONST_WIDE_INTs?


Re: [PATCH/RFC/RFA] Machine modes for address printing (all targets)

2015-11-05 Thread Bernd Schmidt

On 11/04/2015 01:54 PM, Julian Brown wrote:

That's not all targets by any means, but may be enough to warrant a
change in the interface. I propose that:

* The output_address function should have a machine_mode argument
   added. Bare addresses (e.g. the 'a' case in final.c) should pass
   "VOIDmode" for this argument.

* Other callers of output_address -- actually all in backends -- can
   pass the machine mode for the memory access in question.

* The TARGET_PRINT_OPERAND_ADDRESS hook shall also have a machine_mode
   argument added. The legacy PRINT_OPERAND_ADDRESS hook can be left
   alone. (The documentation for the operand-printing hooks needs fixing
   too, incidentally.)


I think this approach seems fine.


 static void
-mcore_print_operand_address (FILE * stream, rtx x)
+mcore_print_operand_address (FILE * stream, machine_mode mode ATTRIBUTE_UNUSED,
+rtx x)


So apparently we're settling on writing the unused arg as just 
"machine_mode" without a name. Please change everywhere.



@@ -1754,7 +1754,7 @@ mmix_print_operand_punct_valid_p (unsign
 /* TARGET_PRINT_OPERAND_ADDRESS.  */

 static void
-mmix_print_operand_address (FILE *stream, rtx x)
+mmix_print_operand_address (FILE *stream, machine_mode mode, rtx x)
 {
   if (REG_P (x))
 {


The arg appears to be unused - I'd expect to see a warning here.

Other thank that it looks OK. I'm not going to require that you test 
every target, but it would be good to have the full set built to cc1 
before and after, and please be on the lookout for fallout.



Bernd


[gomp4, committed, 1/9] Move expansion of kernels region back to first omp-expand

2015-11-05 Thread Tom de Vries

On 05/11/15 11:16, Tom de Vries wrote:

Hi,

now that we have committed -foffload-alias in gomp-4_0-branch (
https://gcc.gnu.org/ml/gcc-patches/2015-11/msg00214.html ), we no longer
need the kernels region to be a part of the original function when doing
alias analysis.

So, we no longer have the need to postpone splitting off the kernels
region into a seperate function until after alias analysis, but we can
do this at the same time as when we expand the parallel region.

The following patch series implements that:

  1Move expansion of kernels region back to first omp-expand
  2Update gate_oacc_kernels to handle oacc function
  3Revert "Add skip_stmt parm to pass_dominator::get_sese ()"
  4Revert "Add pass_dominator::sese_mode_p ()"
  5Handle oacc function in parloops
  6Update goacc kernels C testcases
  7Update goacc kernels Fortran testcases
  8Release_defs in expand_omp_atomic_fetch_op
  9Remove BUILT_IN_GOACC_KERNELS_INTERNAL

[ The patch series is broken up into logical bits, but intended as
single commit. Various things in kernels support will be broken in
intermediate stages. ]

Committed to gomp-4_0-branch.

I'll post the patches in reply to this message.



This patch moves expansion of the kernels region back to the first 
omp-expand pass, before ssa.


There's no longer a need for the region-replacing call to expand into 
BUILT_IN_GOACC_KERNELS_INTERNAL and subsequently map onto 
BUILT_IN_GOACC_PARALLEL during the second omp-expand pass.  The 
BUILT_IN_GOACC_KERNELS_INTERNAL call was modeled transparant to alias 
analysis, and that's no longer needed.


Thanks,
- Tom
Move expansion of kernels region back to first omp-expand

2015-11-04  Tom de Vries  

	* omp-low.c (expand_omp_target): Remove do_emit_library_call variable,
	assume true.  Remove do_splitoff variable, assume true.  Remove
	BUILT_IN_GOACC_KERNELS_INTERNAL handling.  Replace kernels region with a
	BUILT_IN_GOACC_PARALLEL call.
---
 gcc/omp-low.c | 57 -
 1 file changed, 4 insertions(+), 53 deletions(-)

diff --git a/gcc/omp-low.c b/gcc/omp-low.c
index 6bac074..ac8c8d0 100644
--- a/gcc/omp-low.c
+++ b/gcc/omp-low.c
@@ -12563,8 +12563,6 @@ expand_omp_target (struct omp_region *region)
   gimple *stmt;
   edge e;
   bool offloaded, data_region;
-  bool do_emit_library_call = true;
-  bool do_splitoff = true;
 
   entry_stmt = as_a  (last_stmt (region->entry));
 
@@ -12608,43 +12606,7 @@ expand_omp_target (struct omp_region *region)
   exit_bb = region->exit;
 
   if (gimple_omp_target_kind (entry_stmt) == GF_OMP_TARGET_KIND_OACC_KERNELS)
-{
-  if (!gimple_in_ssa_p (cfun))
-	{
-	  /* We need to do analysis and optimizations on the kernels region
-	 before splitoff.  Since that's hard to do on low gimple, we
-	 postpone the splitoff until we're in SSA.
-	 However, we do the emit of the corresponding function call already,
-	 in order to keep the arguments of the call alive until the
-	 splitoff.
-	 Since at this point the function that is called is empty, we can
-	 model the function as BUILT_IN_GOACC_KERNELS_INTERNAL, which marks
-	 some of it's function arguments as non-escaping, so it acts less
-	 as an optimization barrier.  */
-	  do_splitoff = false;
-	  cfun->curr_properties &= ~PROP_gimple_eomp;
-
-	  mark_loops_in_oacc_kernels_region (region->entry, region->exit);
-	}
-  else
-	{
-	  /* Don't emit the library call.  We've already done that.  */
-	  do_emit_library_call = false;
-	  /* Transform BUILT_IN_GOACC_KERNELS_INTERNAL into
-	 BUILT_IN_GOACC_PARALLELL.  Now that the function
-	 body will be split off, we can no longer regard the
-	 omp_data_array reference as non-escaping.  */
-	  gsi = gsi_last_bb (entry_bb);
-	  gsi_prev ();
-	  gcall *call = as_a  (gsi_stmt (gsi));
-	  gcc_assert (gimple_call_builtin_p
-		  (call, BUILT_IN_GOACC_KERNELS_INTERNAL));
-	  tree fndecl = builtin_decl_explicit (BUILT_IN_GOACC_PARALLEL);
-	  gimple_call_set_fndecl (call, fndecl);
-	  gimple_call_set_fntype (call, TREE_TYPE (fndecl));
-	  gimple_call_reset_alias_info (call);
-	}
-}
+mark_loops_in_oacc_kernels_region (region->entry, region->exit);
 
   basic_block entry_succ_bb = single_succ (entry_bb);
   if (offloaded && !gimple_in_ssa_p (cfun))
@@ -12654,8 +12616,7 @@ expand_omp_target (struct omp_region *region)
 	gsi_remove (, true);
 }
 
-  if (offloaded
-  && do_splitoff)
+  if (offloaded)
 {
   unsigned srcidx, dstidx, num;
 
@@ -12852,13 +12813,6 @@ expand_omp_target (struct omp_region *region)
   pop_cfun ();
 }
 
-  if (!do_emit_library_call)
-{
-  if (gimple_in_ssa_p (cfun))
-	update_ssa (TODO_update_ssa_only_virtuals);
-  return;
-}
-
   /* Emit a library call to launch the offloading region, or do data
  transfers.  */
   tree t1, t2, t3, t4, device, cond, depend, 

[gomp4, committed, 4/9] Revert "Add pass_dominator::sese_mode_p ()"

2015-11-05 Thread Tom de Vries

On 05/11/15 11:16, Tom de Vries wrote:

Hi,

now that we have committed -foffload-alias in gomp-4_0-branch (
https://gcc.gnu.org/ml/gcc-patches/2015-11/msg00214.html ), we no longer
need the kernels region to be a part of the original function when doing
alias analysis.

So, we no longer have the need to postpone splitting off the kernels
region into a seperate function until after alias analysis, but we can
do this at the same time as when we expand the parallel region.

The following patch series implements that:

  1Move expansion of kernels region back to first omp-expand
  2Update gate_oacc_kernels to handle oacc function
  3Revert "Add skip_stmt parm to pass_dominator::get_sese ()"
  4Revert "Add pass_dominator::sese_mode_p ()"
  5Handle oacc function in parloops
  6Update goacc kernels C testcases
  7Update goacc kernels Fortran testcases
  8Release_defs in expand_omp_atomic_fetch_op
  9Remove BUILT_IN_GOACC_KERNELS_INTERNAL

[ The patch series is broken up into logical bits, but intended as
single commit. Various things in kernels support will be broken in
intermediate stages. ]

Committed to gomp-4_0-branch.

I'll post the patches in reply to this message.


We needed the sese_mode_p functionality in the dominators pass to limit 
optimization scope to kernels regions. Now we no longer encounter 
kernels region in the kernels pass group, but split-off function that 
used to be  kernels region. So we no longer need this functionality.


This patch reverts the sese_mode_p functionality.

Thanks,
- Tom
Revert "Add pass_dominator::sese_mode_p ()"

2015-11-04  Tom de Vries  

	revert:
	2015-10-12  Tom de Vries  

	* tree-ssa-dom.c (pass_dominator::jump_threading_p): Handle sese_mode_p.
	(pass_dominator::sese_mode_p, pass_dominator::get_sese): New protected
	virtual function.
	(pass_dominator::execute): Handle sese_mode_p.
---
 gcc/tree-ssa-dom.c | 78 ++
 1 file changed, 8 insertions(+), 70 deletions(-)

diff --git a/gcc/tree-ssa-dom.c b/gcc/tree-ssa-dom.c
index e1cf38b..44253bf 100644
--- a/gcc/tree-ssa-dom.c
+++ b/gcc/tree-ssa-dom.c
@@ -44,7 +44,6 @@ along with GCC; see the file COPYING3.  If not see
 #include "tree-ssa-dom.h"
 #include "gimplify.h"
 #include "tree-cfgcleanup.h"
-#include "cfgcleanup.h"
 #include "omp-low.h"
 
 /* This file implements optimizations on the dominator tree.  */
@@ -535,17 +534,7 @@ class dominator_base : public gimple_opt_pass
   unsigned int execute (function *);
 
   /* Return true if pass should perform jump threading.  */
-  virtual bool jump_threading_p (void) { return !sese_mode_p (); }
-
-  /* Return true if pass should visit a series of seses rather than the whole
- dominator tree.  */
-  virtual bool sese_mode_p (void) { return false; }
-
-  /* In sese mode, return true if there's another sese to visit.  Return the
- sese to visit in SESE_ENTRY and SESE_EXIT.  */
-  virtual bool get_sese (basic_block *sese_entry ATTRIBUTE_UNUSED,
-			 basic_block *sese_exit ATTRIBUTE_UNUSED)
-{ gcc_unreachable (); }
+  virtual bool jump_threading_p (void) { return true; }
 }; // class dominator_base
 
 const pass_data pass_data_dominator =
@@ -602,14 +591,11 @@ dominator_base::execute (function *fun)
  LOOPS_HAVE_PREHEADERS won't be needed here.  */
   loop_optimizer_init (LOOPS_HAVE_PREHEADERS | LOOPS_HAVE_SIMPLE_LATCHES);
 
-  if (!sese_mode_p ())
-/* Initialize the value-handle array.  */
-threadedge_initialize_values ();
+  /* Initialize the value-handle array.  */
+  threadedge_initialize_values ();
 
   if (jump_threading_p ())
 {
-  gcc_assert (!sese_mode_p ());
-
   /* We need accurate information regarding back edges in the CFG
 	 for jump threading; this may include back edges that are not part of
 	 a single loop.  */
@@ -631,29 +617,7 @@ dominator_base::execute (function *fun)
 			 const_and_copies,
 			 avail_exprs_stack,
 			 jump_threading_p ());
-  if (!sese_mode_p ())
-walker.walk (fun->cfg->x_entry_block_ptr);
-  else
-{
-  basic_block sese_entry, sese_exit;
-  while (get_sese (_entry, _exit))
-	{
-	  threadedge_initialize_values ();
-	  avail_exprs_stack->push_marker ();
-	  const_and_copies->push_marker ();
-
-	  walker.walk_until (sese_entry, sese_exit, true);
-
-	  avail_exprs_stack->pop_to_marker ();
-	  const_and_copies->pop_to_marker ();
-	  threadedge_finalize_values ();
-
-	  /* KLUDGE: The dom_walker does not allow unreachable blocks when
-	 starting the walk, and during the dom_opt_dom_walker walk we may
-	 produce unreachable blocks, so we need to clean them up here.  */
-	  delete_unreachable_blocks ();
-	}
-}
+  walker.walk (fun->cfg->x_entry_block_ptr);
 
   {
 gimple_stmt_iterator gsi;
@@ -753,9 +717,8 @@ dominator_base::execute (function *fun)
   delete avail_exprs_stack;
   delete 

[gomp4, committed, 5/9] Handle oacc function in parloops

2015-11-05 Thread Tom de Vries

On 05/11/15 11:16, Tom de Vries wrote:

Hi,

now that we have committed -foffload-alias in gomp-4_0-branch (
https://gcc.gnu.org/ml/gcc-patches/2015-11/msg00214.html ), we no longer
need the kernels region to be a part of the original function when doing
alias analysis.

So, we no longer have the need to postpone splitting off the kernels
region into a seperate function until after alias analysis, but we can
do this at the same time as when we expand the parallel region.

The following patch series implements that:

  1Move expansion of kernels region back to first omp-expand
  2Update gate_oacc_kernels to handle oacc function
  3Revert "Add skip_stmt parm to pass_dominator::get_sese ()"
  4Revert "Add pass_dominator::sese_mode_p ()"
  5Handle oacc function in parloops
  6Update goacc kernels C testcases
  7Update goacc kernels Fortran testcases
  8Release_defs in expand_omp_atomic_fetch_op
  9Remove BUILT_IN_GOACC_KERNELS_INTERNAL

[ The patch series is broken up into logical bits, but intended as
single commit. Various things in kernels support will be broken in
intermediate stages. ]

Committed to gomp-4_0-branch.

I'll post the patches in reply to this message.


This patch removes handling of kernels regions in tree-parloops.c, and 
adds handling of oacc functions that used to be kernels regions before 
they were split off.


That means we no longer add a parallel pragma. OTOH, we now have to 
clear PROP_gimple_eomp in order to trigger the subsequent omp-expand pass.


Thanks,
- Tom

Handle oacc function in parloops

2015-11-04  Tom de Vries  

	* omp-low.c (set_oacc_fn_attrib): Remove static.
	* omp-low.h (set_oacc_fn_attrib): Declare.
	* tree-parloops.c (create_parallel_loop): Remove region_entry parameter.
	Remove handling of oacc kernels pragma and GOACC_kernels_internal call.
	Remove insertion of oacc parallel pragma.  Set oacc function attributes.
	(gen_parallel_loop): Remove region_entry parameter.
	(get_omp_data_i_param): New function.
	(try_create_reduction_list): Use get_omp_data_i_param instead of
	gimple_stmt_omp_data_i_init_p.
	(ref_conflicts_with_region): Handle GIMPLE_RETURN.
	(oacc_entry_exit_ok_1): Same.  Add missing is_gimple_call test before
	gimple_call_internal_p test.
	(oacc_entry_exit_ok): Remove region_entry parameter.  Use
	get_omp_data_i_param instead of get_omp_data_i.  Set region_bbs to all
	bbs in function.  Use function entry as region entry.
	(parallelize_loops): Allow oacc functions and parallelized function if
	oacc_kernels_p.  Remove	region_entry variable.
	(pass_parallelize_loops_oacc_kernels::execute): Clear PROP_gimple_eomp
	if a loop was parallelized.
---
 gcc/omp-low.c   |   2 +-
 gcc/omp-low.h   |   1 +
 gcc/tree-parloops.c | 119 ++--
 3 files changed, 51 insertions(+), 71 deletions(-)

diff --git a/gcc/omp-low.c b/gcc/omp-low.c
index ac8c8d0..58cb959 100644
--- a/gcc/omp-low.c
+++ b/gcc/omp-low.c
@@ -12456,7 +12456,7 @@ replace_oacc_fn_attrib (tree fn, tree dims)
function attribute.  Push any that are non-constant onto the ARGS
list, along with an appropriate GOMP_LAUNCH_DIM tag.  */
 
-static void
+void
 set_oacc_fn_attrib (tree fn, tree clauses, vec *args)
 {
   /* Must match GOMP_DIM ordering.  */
diff --git a/gcc/omp-low.h b/gcc/omp-low.h
index 7c9efdc..673b470 100644
--- a/gcc/omp-low.h
+++ b/gcc/omp-low.h
@@ -40,6 +40,7 @@ extern vec get_bbs_in_oacc_kernels_region (basic_block,
 extern void replace_oacc_fn_attrib (tree, tree);
 extern tree build_oacc_routine_dims (tree);
 extern tree get_oacc_fn_attrib (tree);
+extern void set_oacc_fn_attrib (tree, tree, vec *);
 
 extern GTY(()) vec *offload_funcs;
 extern GTY(()) vec *offload_vars;
diff --git a/gcc/tree-parloops.c b/gcc/tree-parloops.c
index f14cf8a..c038dfe 100644
--- a/gcc/tree-parloops.c
+++ b/gcc/tree-parloops.c
@@ -2017,7 +2017,7 @@ transform_to_exit_first_loop (struct loop *loop,
 static void
 create_parallel_loop (struct loop *loop, tree loop_fn, tree data,
 		  tree new_data, unsigned n_threads, location_t loc,
-		  basic_block region_entry, bool oacc_kernels_p)
+		  bool oacc_kernels_p)
 {
   gimple_stmt_iterator gsi;
   basic_block bb, paral_bb, for_bb, ex_bb, continue_bb;
@@ -2039,10 +2039,6 @@ create_parallel_loop (struct loop *loop, tree loop_fn, tree data,
   paral_bb = single_pred (bb);
   gsi = gsi_last_bb (paral_bb);
 }
-  else
-/* Make sure the oacc parallel is inserted on top of the oacc kernels
-   region.  */
-gsi = gsi_last_bb (region_entry);
 
   if (!oacc_kernels_p)
 {
@@ -2056,50 +2052,10 @@ create_parallel_loop (struct loop *loop, tree loop_fn, tree data,
 }
   else
 {
-  /* Create oacc parallel pragma based on oacc kernels pragma and
-	 GOACC_kernels_internal call.  */
-  gomp_target *kernels = as_a  (gsi_stmt (gsi));
-
-  tree 

Re: Fix 61441

2015-11-05 Thread Sujoy Saraswati
Hi,

> It seems this code has moved to fold-const.c, so the patch will need
> updating.

I have updated the patch and sending again.

> Now, is it actually correct for integer_valued_real_single_p (now) to
> treat sNaN as an integer?  Looking at existing uses, they all appear to be
> in match.pd: it's used for removing redundant trunc / floor / ceil / round
> / nearbyint on integer values.  In such cases, it's *incorrect* to count
> sNaN as an integer, because while those functions map qNaN quietly to
> qNaN, they should map sNaN to qNaN and raise "invalid" in the process.  So
> I don't think you should have this change at all.

Ok. I have also updated the comments for the functions stating they
return false for sNaN.

> (I don't see why the handling of rint checks flag_errno_math.  rint should
> quietly return infinite and qNaN arguments just like nearbyint; it never
> needs to set errno and distinguishing rint / nearbyint here is spurious;
> the only difference is regarding the "inexact" exception.)

With these changes, real_isinteger returns false for sNaN. This in
turn means that integer_valued_real_p would return false with sNaN.
This means that for sNaN, rint will not go to the path of checking
flag_errno_math. Anyway, I modified that also as part of this patch to
make rint similar to nearbyint.

> I realise this corresponds to some existing code (in const_binop, at
> least), but I don't think that existing code is a good model.
>
> If -fsignaling-nans, it's still perfectly fine to fold when the argument
> is a quiet NaN.  So what you actually want in such cases where you have a
> known constant argument - as opposed to an unknown argument that might or
> might not be constant - is to check whether that argument is itself a
> signaling NaN.  So you want to add a new REAL_VALUE_ISSIGNALING, or
> something like that.  And then find existing code that has the wrong (NaN
> and sNaNs supported) check and convert it to checking (this value is
> sNaN).
>
> (This code has also moved to another function in fold-const.c, it seems.)
>
> Cf. the match.pd handling of fmin / fmax checking for signaling NaN
> arguments explicitly.
>
> Some other places in this patch have similar issues with checking
> HONOR_SNANS && REAL_VALUE_ISNAN when they should check if the particular
> value is sNaN.

I modified the code to check for sNaN specifically. The modified patch is below.

Bootstrap and regression tests on x86_64-linux-gnu passed with changes
done on trunk.

Is this fine ?

Regards,
Sujoy

2015-11-04  Sujoy Saraswati 

   PR tree-optimization/61441
   * fold-const-call.c (fold_const_pow): Avoid the operation
   if flag_signaling_nans is on and the operand is an sNaN.
   (fold_const_call_sss): Same for BUILT_IN_POWI.
   * fold-const.c (const_binop): Convert sNaN to qNaN when
   flag_signaling_nans is off.
   (const_unop): Avoid the operation, other than NEGATE and
   ABS, if flag_signaling_nans is on and the operand is an sNaN.
   (fold_convert_const_real_from_real): Avoid the operation if
   flag_signaling_nans is on and the operand is an sNaN.
   (integer_valued_real_unary_p): Update comment stating it
   returns false for sNaN values.
   (integer_valued_real_binary_p, integer_valued_real_call_p): Same.
   (integer_valued_real_single_p): Same.
   (integer_valued_real_invalid_p, integer_valued_real_p): Same.
   * gimple-fold.c (gimple_assign_integer_valued_real_p): Same.
   (gimple_call_integer_valued_real_p): Same.
   (gimple_phi_integer_valued_real_p): Same.
   (gimple_stmt_integer_valued_real_p): Same.
   * match.pd (f(x) -> x): Removed flag_errno_math check for RINT.
   * real.c (do_add): Make resulting NaN value to be qNaN.
   (do_multiply, do_divide, do_fix_trunc): Same.
   (real_arithmetic, real_ldexp): Same.
   (real_issignaling_nan): New.
   (real_isinteger): Updated comment stating it returns false for sNaN.
   * real.h (real_issignaling_nan, REAL_VALUE_ISSIGNALING_NAN): New.
   * simplify-rtx.c (simplify_const_unary_operation): Avoid the
   operation if flag_signaling_nans is on and the operand is an sNaN.
   (simplify_const_binary_operation): Check for sNaN instead of NaN.
   * tree-ssa-math-opts.c (gimple_expand_builtin_pow): Avoid the
   operation if flag_signaling_nans is on and the operand is an sNaN.

   PR tree-optimization/61441
   * gcc.dg/pr61441.c: New testcase.

Index: gcc/fold-const-call.c
===
--- gcc/fold-const-call.c   (revision 229805)
+++ gcc/fold-const-call.c   (working copy)
@@ -484,7 +484,11 @@ fold_const_pow (real_value *result, const real_val
  || !real_equal (arg0, )))
 {
   bool inexact = real_powi (result, format, arg0, n1);
-  if (flag_unsafe_math_optimizations || !inexact)
+  /* Avoid the folding if flag_signaling_nans is on.  */
+   

Re: [PATCH v3 1/2] [PR debug/67192] Fix C loops' back-jump location

2015-11-05 Thread Bernd Schmidt

On 11/05/2015 12:33 PM, Andreas Arnez wrote:


Thanks again for reviewing.  Are you going to look at patch #2 as well?


Yeah, still thinking about that one.


Does C++ have similar issues?


Not this particular issue, AFAIK.  But I've just looked at how C++ fares
with the enhanced version of pr67192.c from patch #2.  There I see the
following:

   Breakpoint 2, f4 () at pr67192.cc:54
   (gdb) p cnt
   $1 = 16

I.e., when breaking on "while (1)" the first loop iteration has already
executed.  This is because the C++ parser assigns the backward-goto to
the 'while' token.  It's the same issue you pointed at with version 2 of
my patch.

Shall I open a bug for that?


I'd obviously prefer if you'd manage to get the two frontends behave 
identically. The alternative would be to open a bug.



Bernd


Re: [PATCH 5/6]tree-sra.c: Fix completely_scalarize for negative array indices

2015-11-05 Thread Alan Lawrence
On 30/10/15 10:54, Eric Botcazou wrote:
> On 30/10/15 10:44, Richard Biener wrote:
>>
>> I think you want to use wide-ints here and
>>
>> wide_int idx = wi::from (minidx, TYPE_PRECISION (TYPE_DOMAIN
>> (...)), TYPE_SIGN (TYPE_DOMAIN (..)));
>> wide_int maxidx = ...
>>
>> you can then simply iterate minidx with ++ and do the final compare
>> against maxidx
>> with while (++idx <= maxidx).  For the array ref index we want to use
>> TYPE_DOMAIN
>> as type as well, not size_int.  Thus wide_int_to_tree (TYPE_DOMAIN 
>> (...)..idx).
[...]
> But using offset_int should be OK, see for example get_ref_base_and_extent.
>

Here's a patch using offset_int. (Not as easy to construct as wide_int::from,
the sign-extend is what appeared to be done elsewhere that constructs
offset_ints).

Tested by bootstrap+check-{gcc,g++,ada,fortran} with the rest of the patchset
(which causes an array[-1..1] to be completely scalarized, among others), on
x86_64 and ARM.

I don't have a test without all that (such would have to be in Ada, and trigger
SRA of such an array but not involving the constant pool); is it OK without?

gcc/ChangeLog:

* tree-sra.c (completely_scalarize): Properly handle negative array
indices using offset_int.
---
 gcc/tree-sra.c | 23 +++
 1 file changed, 15 insertions(+), 8 deletions(-)

diff --git a/gcc/tree-sra.c b/gcc/tree-sra.c
index e15df1f..6168a7e 100644
--- a/gcc/tree-sra.c
+++ b/gcc/tree-sra.c
@@ -1010,18 +1010,25 @@ completely_scalarize (tree base, tree decl_type, 
HOST_WIDE_INT offset, tree ref)
if (maxidx)
  {
gcc_assert (TREE_CODE (maxidx) == INTEGER_CST);
-   /* MINIDX and MAXIDX are inclusive.  Try to avoid overflow.  */
-   unsigned HOST_WIDE_INT lenp1 = tree_to_shwi (maxidx)
-   - tree_to_shwi (minidx);
-   unsigned HOST_WIDE_INT idx = 0;
-   do
+   tree domain = TYPE_DOMAIN (decl_type);
+   /* MINIDX and MAXIDX are inclusive, and must be interpreted in
+  DOMAIN (e.g. signed int, whereas min/max may be size_int).  */
+   offset_int idx = wi::to_offset (minidx);
+   offset_int max = wi::to_offset (maxidx);
+   if (!TYPE_UNSIGNED (domain))
  {
-   tree nref = build4 (ARRAY_REF, elemtype, ref, size_int (idx),
+   idx = wi::sext (idx, TYPE_PRECISION (domain));
+   max = wi::sext (max, TYPE_PRECISION (domain));
+ }
+   for (int el_off = offset; wi::les_p (idx, max); ++idx)
+ {
+   tree nref = build4 (ARRAY_REF, elemtype,
+   ref,
+   wide_int_to_tree (domain, idx),
NULL_TREE, NULL_TREE);
-   int el_off = offset + idx * el_size;
scalarize_elem (base, el_off, el_size, nref, elemtype);
+   el_off += el_size;
  }
-   while (++idx <= lenp1);
  }
   }
   break;
-- 
1.9.1



Re: [PATCH] clarify documentation of -Q --help=optimizers

2015-11-05 Thread Alexander Monakov
On Thu, 5 Nov 2015, Joseph Myers wrote:
> On Wed, 4 Nov 2015, Martin Sebor wrote:
> 
> > Improving the compiler output is a good idea. The attached patch
> > prints "[disabled by -O0]" instead of "[enabled]" when an optimization
> > option is enabled by default but when optimization (i.e., -O1 or
> > greater) is not enabled.
> 
> I don't think it's entirely accurate that all options marked as 
> Optimization in *.opt are actually disabled by -O0.  Many are, but it 
> depends on the actual logic controlling each optimization.

Indeed, and Richard already pointed that out when reviewing (my) previous
attempt.  Martin, please review the thread ending at
https://gcc.gnu.org/ml/gcc-patches/2012-05/msg00113.html

I guess eventually we can slightly overhaul optimization options to remove
such ambiguities, but in short term I see no way to accurately determine
whether a pass/option is ultimately in effect or not.

Martin, what about taking Richard's suggestion, tacking a short explanatory
sentence onto the end of --help=optimizers output?

Thanks.
Alexander


[gomp4, committed, 9/9] Remove BUILT_IN_GOACC_KERNELS_INTERNAL

2015-11-05 Thread Tom de Vries

On 05/11/15 11:16, Tom de Vries wrote:

Hi,

now that we have committed -foffload-alias in gomp-4_0-branch (
https://gcc.gnu.org/ml/gcc-patches/2015-11/msg00214.html ), we no longer
need the kernels region to be a part of the original function when doing
alias analysis.

So, we no longer have the need to postpone splitting off the kernels
region into a seperate function until after alias analysis, but we can
do this at the same time as when we expand the parallel region.

The following patch series implements that:

  1Move expansion of kernels region back to first omp-expand
  2Update gate_oacc_kernels to handle oacc function
  3Revert "Add skip_stmt parm to pass_dominator::get_sese ()"
  4Revert "Add pass_dominator::sese_mode_p ()"
  5Handle oacc function in parloops
  6Update goacc kernels C testcases
  7Update goacc kernels Fortran testcases
  8Release_defs in expand_omp_atomic_fetch_op
  9Remove BUILT_IN_GOACC_KERNELS_INTERNAL

[ The patch series is broken up into logical bits, but intended as
single commit. Various things in kernels support will be broken in
intermediate stages. ]

Committed to gomp-4_0-branch.

I'll post the patches in reply to this message.


This patch removes the no longer needed BUILT_IN_GOACC_KERNELS_INTERNAL 
builtin.


Thanks,
- Tom

Remove BUILT_IN_GOACC_KERNELS_INTERNAL

2015-11-04  Tom de Vries  

	* omp-builtins.def (BUILT_IN_GOACC_KERNELS_INTERNAL): Remove
	DEF_GOACC_BUILTIN_FNSPEC.
	* omp-low.c (expand_omp_target): Remove handling of
	BUILT_IN_GOACC_KERNELS_INTERNAL.
---
 gcc/omp-builtins.def | 5 -
 gcc/omp-low.c| 1 -
 2 files changed, 6 deletions(-)

diff --git a/gcc/omp-builtins.def b/gcc/omp-builtins.def
index 0478eaf..6908f94 100644
--- a/gcc/omp-builtins.def
+++ b/gcc/omp-builtins.def
@@ -43,11 +43,6 @@ DEF_GOACC_BUILTIN_FNSPEC (BUILT_IN_GOACC_ENTER_EXIT_DATA,
 			  BT_FN_VOID_INT_SIZE_PTR_PTR_PTR_INT_INT_VAR,
 			  ATTR_FNSPEC_DOT_DOT_DOT_r_r_r_NOTHROW_LIST,
 			  ATTR_NOTHROW_LIST, "...rrr")
-DEF_GOACC_BUILTIN_FNSPEC (BUILT_IN_GOACC_KERNELS_INTERNAL,
-			  "GOACC_kernels_internal",
-			  BT_FN_VOID_INT_OMPFN_SIZE_PTR_PTR_PTR_VAR,
-			  ATTR_FNSPEC_DOT_DOT_DOT_r_r_r_NOTHROW_LIST,
-			  ATTR_NOTHROW_LIST, "...rrr")
 DEF_GOACC_BUILTIN (BUILT_IN_GOACC_PARALLEL, "GOACC_parallel_keyed",
 		   BT_FN_VOID_INT_OMPFN_SIZE_PTR_PTR_PTR_VAR,
 		   ATTR_NOTHROW_LIST)
diff --git a/gcc/omp-low.c b/gcc/omp-low.c
index 84accd9..643d017 100644
--- a/gcc/omp-low.c
+++ b/gcc/omp-low.c
@@ -12994,7 +12994,6 @@ expand_omp_target (struct omp_region *region)
 	depend = build_int_cst (ptr_type_node, 0);
   args.quick_push (depend);
   break;
-case BUILT_IN_GOACC_KERNELS_INTERNAL:
 case BUILT_IN_GOACC_PARALLEL:
   {
 	set_oacc_fn_attrib (child_fn, clauses, );
-- 
1.9.1



[gomp4, committed] expand oacc kernels region at same time as oacc parallel region

2015-11-05 Thread Tom de Vries

Hi,

now that we have committed -foffload-alias in gomp-4_0-branch ( 
https://gcc.gnu.org/ml/gcc-patches/2015-11/msg00214.html ), we no longer 
need the kernels region to be a part of the original function when doing 
alias analysis.


So, we no longer have the need to postpone splitting off the kernels 
region into a seperate function until after alias analysis, but we can 
do this at the same time as when we expand the parallel region.


The following patch series implements that:

 1  Move expansion of kernels region back to first omp-expand
 2  Update gate_oacc_kernels to handle oacc function
 3  Revert "Add skip_stmt parm to pass_dominator::get_sese ()"
 4  Revert "Add pass_dominator::sese_mode_p ()"
 5  Handle oacc function in parloops
 6  Update goacc kernels C testcases
 7  Update goacc kernels Fortran testcases
 8  Release_defs in expand_omp_atomic_fetch_op
 9  Remove BUILT_IN_GOACC_KERNELS_INTERNAL

[ The patch series is broken up into logical bits, but intended as 
single commit. Various things in kernels support will be broken in 
intermediate stages. ]


Committed to gomp-4_0-branch.

I'll post the patches in reply to this message.

Thanks,
- Tom


[gomp4.5] Tweak OpenMP 4.5 mapping rules

2015-11-05 Thread Jakub Jelinek
Hi!

This patch updates the mapping of vars to what has been ratified
in OpenMP 4.5, or where left unspecified, hopefully follows the
discussed intent.

In particular:
1) for C++ references we map what they refer to, and on target
   construct privatize the references themselves (but not what
   they point to, because that is mapped)
2) same var may not be present in both data sharing and mapping
   clauses
3) structure element based array sections (or C++ references)
   don't have the structure elements privatized, but mapped with
   an always pointer store at the start of the region (except
   exit data; and update doesn't touch the structure elements)
4) omp_target_is_present on one past the last element really
   is about what mapping starts at that point, so essentially
   it is checking if the first byte at the specified address
   is mapped
5) zero length array sections pointing to one past the last
   element really are about what mapping starts at that point

>From the above, 3) is really not specified in the standard
and just based on the discussions we had, hopefully OpenMP 5.0 will
clarify, and 4)/5) are fuzzy in the standard and also based
on the discussions.

2015-11-05  Jakub Jelinek  

gcc/
* gimplify.c (omp_notice_variable): For references check
whether what it refers to has mappable type, rather than
the reference itself.
(gimplify_scan_omp_clauses): Add support for
GOMP_MAP_FIRSTPRIVATE_REFERENCE and GOMP_MAP_ALWAYS_POINTER,
remove old handling of structure element based array sections.
(gimplify_adjust_omp_clauses_1): For implicit references to
variables with reference type and when not ref to scalar or
ref to pointer, map what they refer to using tofrom and
use GOMP_MAP_FIRSTPRIVATE_REFERENCE for the reference.
(gimplify_adjust_omp_clauses): Remove GOMP_MAP_ALWAYS_POINTER
from target exit data.  Handle GOMP_MAP_FIRSTPRIVATE_REFERENCE.
Drop OMP_CLAUSE_MAP_PRIVATE support.
* omp-low.c (scan_sharing_clauses): Handle
GOMP_MAP_FIRSTPRIVATE_REFERENCE, drop OMP_CLAUSE_MAP_PRIVATE
support.
(lower_omp_target): Handle GOMP_MAP_FIRSTPRIVATE_REFERENCE
and GOMP_MAP_ALWAYS_POINTER.  Drop OMP_CLAUSE_MAP_PRIVATE
support.
* tree-pretty-print.c (dump_omp_clause): Handle
GOMP_MAP_FIRSTPRIVATE_REFERENCE and GOMP_MAP_ALWAYS_POINTER.
Simplify.
* tree-vect-stmts.c (vectorizable_simd_clone_call): Add
SIMD_CLONE_ARG_TYPE_LINEAR_{REF,VAL,UVAL}_VARIABLE_STEP
cases.
gcc/c/
* c-parser.c (c_parser_omp_target_data,
c_parser_omp_target_enter_data,
c_parser_omp_target_exit_data, c_parser_omp_target): Allow
GOMP_MAP_ALWAYS_POINTER.
* c-typeck.c (handle_omp_array_sections): For structure element
based array sections use GOMP_MAP_ALWAYS_POINTER instead of
GOMP_MAP_FIRSTPRIVATE_POINTER.
(c_finish_omp_clauses): Drop generic_field_head, structure
elements are now always mapped even as array section bases,
diagnose same var in data sharing and mapping clauses.
gcc/cp/
* parser.c (cp_parser_omp_target_data,
cp_parser_omp_target_enter_data,
cp_parser_omp_target_exit_data, cp_parser_omp_target): Allow
GOMP_MAP_ALWAYS_POINTER and GOMP_MAP_FIRSTPRIVATE_REFERENCE.
* semantics.c (handle_omp_array_sections): For structure element
based array sections use GOMP_MAP_ALWAYS_POINTER instead of
GOMP_MAP_FIRSTPRIVATE_POINTER.
(finish_omp_clauses): Drop generic_field_head, structure
elements are now always mapped even as array section bases,
diagnose same var in data sharing and mapping clauses.
For references map what they refer to using GOMP_MAP_ALWAYS_POINTER
for structure elements and GOMP_MAP_FIRSTPRIVATE_REFERENCE
otherwise.
gcc/testsuite/
* c-c++-common/gomp/clauses-2.c (foo): Adjust for diagnostics
of variables in both data sharing and mapping clauses and for
structure element based array sections being mapped rather than
privatized.
include/
* gomp-constants.h (enum gomp_map_kind): Add
GOMP_MAP_ALWAYS_POINTER and GOMP_MAP_FIRSTPRIVATE_REFERENCE.
libgomp/
* target.c (gomp_map_0len_lookup, gomp_map_val): New inline
functions.
(gomp_map_vars): Handle GOMP_MAP_ALWAYS_POINTER.  For
GOMP_MAP_ZERO_LEN_ARRAY_SECTION use gomp_map_0len_lookup.
Use gomp_map_val function.
(gomp_exit_data): For GOMP_MAP_*ZERO_LEN* use
gomp_map_0len_lookup instead of gomp_map_lookup.
(omp_target_is_present): Use gomp_map_0len_lookup instead of
gomp_map_lookup.
* testsuite/libgomp.c/target-12.c (main): Adjust for
omp_target_is_present change for one-past-last element.
* testsuite/libgomp.c/target-17.c (foo): Drop tests where
  

[gomp4, committed, 2/9] Update gate_oacc_kernels to handle oacc function

2015-11-05 Thread Tom de Vries

On 05/11/15 11:16, Tom de Vries wrote:

Hi,

now that we have committed -foffload-alias in gomp-4_0-branch (
https://gcc.gnu.org/ml/gcc-patches/2015-11/msg00214.html ), we no longer
need the kernels region to be a part of the original function when doing
alias analysis.

So, we no longer have the need to postpone splitting off the kernels
region into a seperate function until after alias analysis, but we can
do this at the same time as when we expand the parallel region.

The following patch series implements that:

  1Move expansion of kernels region back to first omp-expand
  2Update gate_oacc_kernels to handle oacc function
  3Revert "Add skip_stmt parm to pass_dominator::get_sese ()"
  4Revert "Add pass_dominator::sese_mode_p ()"
  5Handle oacc function in parloops
  6Update goacc kernels C testcases
  7Update goacc kernels Fortran testcases
  8Release_defs in expand_omp_atomic_fetch_op
  9Remove BUILT_IN_GOACC_KERNELS_INTERNAL

[ The patch series is broken up into logical bits, but intended as
single commit. Various things in kernels support will be broken in
intermediate stages. ]

Committed to gomp-4_0-branch.

I'll post the patches in reply to this message.


This patch updates the kernels pass group gate function.

Before, it needed to trigger on functions containing kernel regions.
Now, it needs to trigger on oacc functions that used to be kernels 
regions before they were split off.


Furthermore, I've duplicated the parloops gate here 
(flag_tree_parallelize_loops > 1).  There's not much sense in running 
the pass group unless we're trying to parallelize.


Consequently, I needed to add a "-ftree-parallelize-loops=32" settting 
to a testcase which missed that setting.


Thanks,
- Tom

Update gate_oacc_kernels to handle oacc function

2015-11-04  Tom de Vries  

	* tree-ssa-loop.c: Include omp-low.h.
	(gate_oacc_kernels): Test for flag_tree_parallelize_loops.  Test for
	oacc function attribute.  Test for loop with in_oacc_kernels_region.

	* c-c++-common/goacc/kernels-counter-var-redundant-load.c: Run with
	-ftree-parallelize-loops=32.
---
 .../goacc/kernels-counter-var-redundant-load.c |  1 +
 gcc/tree-ssa-loop.c| 22 +-
 2 files changed, 22 insertions(+), 1 deletion(-)

diff --git a/gcc/testsuite/c-c++-common/goacc/kernels-counter-var-redundant-load.c b/gcc/testsuite/c-c++-common/goacc/kernels-counter-var-redundant-load.c
index c4ffc1d..bf59838 100644
--- a/gcc/testsuite/c-c++-common/goacc/kernels-counter-var-redundant-load.c
+++ b/gcc/testsuite/c-c++-common/goacc/kernels-counter-var-redundant-load.c
@@ -1,4 +1,5 @@
 /* { dg-additional-options "-O2" } */
+/* { dg-additional-options "-ftree-parallelize-loops=32" } */
 /* { dg-additional-options "-fdump-tree-dom_oacc_kernels3" } */
 
 #include 
diff --git a/gcc/tree-ssa-loop.c b/gcc/tree-ssa-loop.c
index 5e0b5a5..344c6c7 100644
--- a/gcc/tree-ssa-loop.c
+++ b/gcc/tree-ssa-loop.c
@@ -40,6 +40,7 @@ along with GCC; see the file COPYING3.  If not see
 #include "tree-inline.h"
 #include "tree-scalar-evolution.h"
 #include "tree-vectorizer.h"
+#include "omp-low.h"
 
 
 /* A pass making sure loops are fixed up.  */
@@ -151,7 +152,26 @@ make_pass_tree_loop (gcc::context *ctxt)
 static bool
 gate_oacc_kernels (function *fn)
 {
-  return (fn->curr_properties & PROP_gimple_eomp) == 0;
+  if (flag_tree_parallelize_loops <= 1)
+return false;
+
+  tree oacc_function_attr = get_oacc_fn_attrib (fn->decl);
+  if (oacc_function_attr == NULL_TREE)
+return false;
+
+  tree val = TREE_VALUE (oacc_function_attr);
+  while (val != NULL_TREE && TREE_VALUE (val) == NULL_TREE)
+val = TREE_CHAIN (val);
+
+  if (val != NULL_TREE)
+return false;
+
+  struct loop *loop;
+  FOR_EACH_LOOP (loop, 0)
+if (loop->in_oacc_kernels_region)
+  return true;
+
+  return false;
 }
 
 /* The oacc kernels superpass.  */
-- 
1.9.1



[gomp4, committed, 3/9] Revert "Add skip_stmt parm to pass_dominator::get_sese ()"

2015-11-05 Thread Tom de Vries

On 05/11/15 11:16, Tom de Vries wrote:

Hi,

now that we have committed -foffload-alias in gomp-4_0-branch (
https://gcc.gnu.org/ml/gcc-patches/2015-11/msg00214.html ), we no longer
need the kernels region to be a part of the original function when doing
alias analysis.

So, we no longer have the need to postpone splitting off the kernels
region into a seperate function until after alias analysis, but we can
do this at the same time as when we expand the parallel region.

The following patch series implements that:

  1Move expansion of kernels region back to first omp-expand
  2Update gate_oacc_kernels to handle oacc function
  3Revert "Add skip_stmt parm to pass_dominator::get_sese ()"
  4Revert "Add pass_dominator::sese_mode_p ()"
  5Handle oacc function in parloops
  6Update goacc kernels C testcases
  7Update goacc kernels Fortran testcases
  8Release_defs in expand_omp_atomic_fetch_op
  9Remove BUILT_IN_GOACC_KERNELS_INTERNAL

[ The patch series is broken up into logical bits, but intended as
single commit. Various things in kernels support will be broken in
intermediate stages. ]

Committed to gomp-4_0-branch.

I'll post the patches in reply to this message.



We were using the skip_stmt functionality in pass_dominator to skip over 
a statement '.omp_data_i = &.omp_data_arr' in the kernels region.  That 
statement is removed during function split-off, so we no longer 
encounter that statement in the kernels pass group.


This patch reverts the stip_stmt functionality.

Thanks,
- Tom

Revert "Add skip_stmt parm to pass_dominator::get_sese ()"

2015-11-04  Tom de Vries  

	Revert:
	2015-10-12  Tom de Vries  

	* tree-ssa-dom.c (dom_opt_dom_walker::set_skip_stmt): New function.
	(dom_opt_dom_walker::m_skip_stmt): New private var.
	(pass_dominator::get_sese): Add skip_stmt parameters.
	(pass_dominator::execute): Call set_skip_stmt with statement to skip for
	sese.
	(dom_opt_dom_walker::before_dom_children): Handle m_skip_stmt.
---
 gcc/tree-ssa-dom.c | 27 +--
 1 file changed, 5 insertions(+), 22 deletions(-)

diff --git a/gcc/tree-ssa-dom.c b/gcc/tree-ssa-dom.c
index 6688a79..e1cf38b 100644
--- a/gcc/tree-ssa-dom.c
+++ b/gcc/tree-ssa-dom.c
@@ -505,7 +505,6 @@ public:
 
   virtual void before_dom_children (basic_block);
   virtual void after_dom_children (basic_block);
-  void set_skip_stmt (gimple *skip_stmt) { m_skip_stmt = skip_stmt; }
 
 private:
   void thread_across_edge (edge);
@@ -516,7 +515,6 @@ private:
 
   gcond *m_dummy_cond;
   bool m_jump_threading_p;
-  gimple *m_skip_stmt;
 };
 
 /* Jump threading, redundancy elimination and const/copy propagation.
@@ -544,11 +542,9 @@ class dominator_base : public gimple_opt_pass
   virtual bool sese_mode_p (void) { return false; }
 
   /* In sese mode, return true if there's another sese to visit.  Return the
- sese to visit in SESE_ENTRY and SESE_EXIT.  If a stmt in the sese should
- not be optimized, return it in SKIP_STMT.  */
+ sese to visit in SESE_ENTRY and SESE_EXIT.  */
   virtual bool get_sese (basic_block *sese_entry ATTRIBUTE_UNUSED,
-			 basic_block *sese_exit ATTRIBUTE_UNUSED,
-			 gimple **skip_stmt ATTRIBUTE_UNUSED)
+			 basic_block *sese_exit ATTRIBUTE_UNUSED)
 { gcc_unreachable (); }
 }; // class dominator_base
 
@@ -640,11 +636,8 @@ dominator_base::execute (function *fun)
   else
 {
   basic_block sese_entry, sese_exit;
-  gimple *skip_stmt = NULL;
-  while (get_sese (_entry, _exit, _stmt))
+  while (get_sese (_entry, _exit))
 	{
-	  walker.set_skip_stmt (skip_stmt);
-
 	  threadedge_initialize_values ();
 	  avail_exprs_stack->push_marker ();
 	  const_and_copies->push_marker ();
@@ -797,8 +790,7 @@ public:
 protected:
   /* dominator_base methods: */
   virtual bool sese_mode_p (void) { return true; }
-  virtual bool get_sese (basic_block *sese_entry, basic_block *sese_exit,
-			 gimple **skip_stmt)
+  virtual bool get_sese (basic_block *sese_entry, basic_block *sese_exit)
   {
 if (m_regions == NULL)
   {
@@ -821,10 +813,6 @@ protected:
 *sese_entry = BASIC_BLOCK_FOR_FN (cfun, index);
 *sese_exit = get_oacc_kernels_region_exit (*sese_entry);
 
-tree omp_data_i = get_omp_data_i (single_pred (*sese_entry));
-if (omp_data_i != NULL_TREE)
-  *skip_stmt = SSA_NAME_DEF_STMT (omp_data_i);
-
 return true;
   }
 
@@ -1446,12 +1434,7 @@ dom_opt_dom_walker::before_dom_children (basic_block bb)
   m_avail_exprs_stack->pop_to_marker ();
 
   for (gsi = gsi_start_bb (bb); !gsi_end_p (gsi); gsi_next ())
-{
-  if (gsi_stmt (gsi) == m_skip_stmt)
-	continue;
-
-  optimize_stmt (bb, gsi, m_const_and_copies, m_avail_exprs_stack);
-}
+optimize_stmt (bb, gsi, m_const_and_copies, m_avail_exprs_stack);
 
   /* Now prepare to process dominated blocks.  */
   if (m_jump_threading_p)
-- 
1.9.1



[gomp4, committed, 7/9] Update goacc kernels Fortran testcases

2015-11-05 Thread Tom de Vries

On 05/11/15 11:16, Tom de Vries wrote:

Hi,

now that we have committed -foffload-alias in gomp-4_0-branch (
https://gcc.gnu.org/ml/gcc-patches/2015-11/msg00214.html ), we no longer
need the kernels region to be a part of the original function when doing
alias analysis.

So, we no longer have the need to postpone splitting off the kernels
region into a seperate function until after alias analysis, but we can
do this at the same time as when we expand the parallel region.

The following patch series implements that:

  1Move expansion of kernels region back to first omp-expand
  2Update gate_oacc_kernels to handle oacc function
  3Revert "Add skip_stmt parm to pass_dominator::get_sese ()"
  4Revert "Add pass_dominator::sese_mode_p ()"
  5Handle oacc function in parloops
  6Update goacc kernels C testcases
  7Update goacc kernels Fortran testcases
  8Release_defs in expand_omp_atomic_fetch_op
  9Remove BUILT_IN_GOACC_KERNELS_INTERNAL

[ The patch series is broken up into logical bits, but intended as
single commit. Various things in kernels support will be broken in
intermediate stages. ]

Committed to gomp-4_0-branch.

I'll post the patches in reply to this message.


This patch updates the goacc kernels Fortran testcases

Thanks,
- Tom

Update goacc kernels Fortran testcases

2015-11-04  Tom de Vries  

	* gfortran.dg/goacc/kernels-loop-2.f95: Scan for oacc function instead
	of oacc_parallel.
	* gfortran.dg/goacc/kernels-loop-data-2.f95:
	* gfortran.dg/goacc/kernels-loop-data-enter-exit-2.f95:
	* gfortran.dg/goacc/kernels-loop-data-enter-exit.f95:
	* gfortran.dg/goacc/kernels-loop-data-update.f95:
	* gfortran.dg/goacc/kernels-loop-data.f95:
	* gfortran.dg/goacc/kernels-loop-nest-independent.f95:
	* gfortran.dg/goacc/kernels-loop.f95:
	* gfortran.dg/goacc/kernels-parallel-loop-data-enter-exit.f95:
---
 gcc/testsuite/gfortran.dg/goacc/kernels-loop-2.f95  | 2 +-
 gcc/testsuite/gfortran.dg/goacc/kernels-loop-data-2.f95 | 2 +-
 gcc/testsuite/gfortran.dg/goacc/kernels-loop-data-enter-exit-2.f95  | 2 +-
 gcc/testsuite/gfortran.dg/goacc/kernels-loop-data-enter-exit.f95| 2 +-
 gcc/testsuite/gfortran.dg/goacc/kernels-loop-data-update.f95| 2 +-
 gcc/testsuite/gfortran.dg/goacc/kernels-loop-data.f95   | 2 +-
 gcc/testsuite/gfortran.dg/goacc/kernels-loop-nest-independent.f95   | 2 +-
 gcc/testsuite/gfortran.dg/goacc/kernels-loop.f95| 2 +-
 .../gfortran.dg/goacc/kernels-parallel-loop-data-enter-exit.f95 | 2 +-
 9 files changed, 9 insertions(+), 9 deletions(-)

diff --git a/gcc/testsuite/gfortran.dg/goacc/kernels-loop-2.f95 b/gcc/testsuite/gfortran.dg/goacc/kernels-loop-2.f95
index ccfc3ae..7fd6d4e 100644
--- a/gcc/testsuite/gfortran.dg/goacc/kernels-loop-2.f95
+++ b/gcc/testsuite/gfortran.dg/goacc/kernels-loop-2.f95
@@ -42,4 +42,4 @@ end program main
 ! { dg-final { scan-tree-dump-times "(?n);; Function MAIN__._omp_fn.1 " 1 "optimized" } }
 ! { dg-final { scan-tree-dump-times "(?n);; Function MAIN__._omp_fn.2 " 1 "optimized" } }
 
-! { dg-final { scan-tree-dump-times "(?n)pragma omp target oacc_parallel.*num_gangs\\(32\\)" 3 "parloops_oacc_kernels" } }
+! { dg-final { scan-tree-dump-times "(?n)oacc function \\(32," 3 "parloops_oacc_kernels" } }
diff --git a/gcc/testsuite/gfortran.dg/goacc/kernels-loop-data-2.f95 b/gcc/testsuite/gfortran.dg/goacc/kernels-loop-data-2.f95
index 2f46c6d..f788f67 100644
--- a/gcc/testsuite/gfortran.dg/goacc/kernels-loop-data-2.f95
+++ b/gcc/testsuite/gfortran.dg/goacc/kernels-loop-data-2.f95
@@ -48,4 +48,4 @@ end program main
 ! { dg-final { scan-tree-dump-times "(?n);; Function MAIN__._omp_fn.1 " 1 "optimized" } }
 ! { dg-final { scan-tree-dump-times "(?n);; Function MAIN__._omp_fn.2 " 1 "optimized" } }
 
-! { dg-final { scan-tree-dump-times "(?n)pragma omp target oacc_parallel.*num_gangs\\(32\\)" 3 "parloops_oacc_kernels" } }
+! { dg-final { scan-tree-dump-times "(?n)oacc function \\(32," 3 "parloops_oacc_kernels" } }
diff --git a/gcc/testsuite/gfortran.dg/goacc/kernels-loop-data-enter-exit-2.f95 b/gcc/testsuite/gfortran.dg/goacc/kernels-loop-data-enter-exit-2.f95
index 34b4a06..3599052 100644
--- a/gcc/testsuite/gfortran.dg/goacc/kernels-loop-data-enter-exit-2.f95
+++ b/gcc/testsuite/gfortran.dg/goacc/kernels-loop-data-enter-exit-2.f95
@@ -48,4 +48,4 @@ end program main
 ! { dg-final { scan-tree-dump-times "(?n);; Function MAIN__._omp_fn.1 " 1 "optimized" } }
 ! { dg-final { scan-tree-dump-times "(?n);; Function MAIN__._omp_fn.2 " 1 "optimized" } }
 
-! { dg-final { scan-tree-dump-times "(?n)pragma omp target oacc_parallel.*num_gangs\\(32\\)" 3 "parloops_oacc_kernels" } }
+! { dg-final { scan-tree-dump-times "(?n)oacc function \\(32," 3 "parloops_oacc_kernels" } }
diff --git a/gcc/testsuite/gfortran.dg/goacc/kernels-loop-data-enter-exit.f95 

Re: OpenACC dimension range propagation optimization

2015-11-05 Thread Richard Biener
On Wed, Nov 4, 2015 at 6:29 PM, Nathan Sidwell  wrote:
> On 11/04/15 05:26, Richard Biener wrote:
>>
>> On Tue, Nov 3, 2015 at 7:11 PM, Nathan Sidwell  wrote:
>>>
>>> Richard,
>
>
>> this all seems a little bit fragile and relying on implementation details?
>> Is the attribute always present?  Is the call argument always a constant
>> that fits in a HOST_WIDE_INT (or even int here)?  Are there always enough
>> 'dims' in the tree list?  Is the 'dim' value always an INTEGER_CST that
>> fits a HOST_WIDE_INT (or even an int here)?
>
>
>
>> If so I'd like to see helper functions to hide these implementation
>> details
>> from generic code like this.
>
>
> Like this?
>
> I've added two helper functions to omp-low.c, one to  get the internal fn
> arg number and the other to get a dimension value, given an axis number.
> omp-low seemed the most appropriate point -- that's  where the dimension
> processing is, and the generation of these internal fn calls.
>
> (Bernd, I'll fixup the dimension folding patch to use these calls before
> applying it.)
>
> ok?

Ok

Thanks,
Richard.

> nathan


Re: [openacc] tile, independent, default, private and firstprivate support in c/++

2015-11-05 Thread Thomas Schwinge
Hi Cesar!

On Tue, 3 Nov 2015 14:16:59 -0800, Cesar Philippidis  
wrote:
> This patch does the following to the c and c++ front ends:

>  * updates c_oacc_split_loop_clauses to filter out the loop clauses
>from combined parallel/kernels loops

>   gcc/c-family/
>   * c-omp.c (c_oacc_split_loop_clauses): Make TILE, GANG, WORKER, VECTOR,
>   AUTO, SEQ and independent as loop clauses.  Associate REDUCTION
>   clauses with parallel and kernels.

> --- a/gcc/c-family/c-omp.c
> +++ b/gcc/c-family/c-omp.c
> @@ -709,12 +709,21 @@ c_oacc_split_loop_clauses (tree clauses, tree 
> *not_loop_clauses)
>  
>switch (OMP_CLAUSE_CODE (clauses))
>  {
> +   /* Loop clauses.  */
>   case OMP_CLAUSE_COLLAPSE:
> - case OMP_CLAUSE_REDUCTION:
> + case OMP_CLAUSE_TILE:
> + case OMP_CLAUSE_GANG:
> + case OMP_CLAUSE_WORKER:
> + case OMP_CLAUSE_VECTOR:
> + case OMP_CLAUSE_AUTO:
> + case OMP_CLAUSE_SEQ:
> + case OMP_CLAUSE_INDEPENDENT:
> OMP_CLAUSE_CHAIN (clauses) = loop_clauses;
> loop_clauses = clauses;
> break;
>  
> +   /* Parallel/kernels clauses.  */
> +
>   default:
> OMP_CLAUSE_CHAIN (clauses) = *not_loop_clauses;
> *not_loop_clauses = clauses;

Contrary to your ChangeLog entry, this is not duplicating but is moving
OMP_CLAUSE_REDUCTION handling.  Is that intentional?  (And, doesn't
anything break in the testsuite?)


Grüße
 Thomas


signature.asc
Description: PGP signature


Re: [PATCH] Use subreg_regno instead of subreg_regno_offset

2015-11-05 Thread Bernd Schmidt

On 11/02/2015 11:29 PM, Anatoly Sokolov wrote:



@@ -5522,12 +5516,7 @@
  op0 = SUBREG_REG (op0);
  code0 = GET_CODE (op0);
  if (code0 == REG && REGNO (op0) < FIRST_PSEUDO_REGISTER)
-  op0 = gen_rtx_REG (word_mode,
- (REGNO (op0) +
-  subreg_regno_offset (REGNO (SUBREG_REG (orig_op0)),
-   GET_MODE (SUBREG_REG (orig_op0)),
-   SUBREG_BYTE (orig_op0),
-   GET_MODE (orig_op0;
+  op0 = gen_rtx_REG (word_mode, subreg_regno (op0));
}


Same problem as in the reg-stack code? The existing code was using
orig_op0 to get the subreg, you've changed it to use op0 which is
already the SUBREG_REG.



No promblens here. At this point op0 is equivalent orig_op0. New value
to op0 can be assigned later.


Err, what? Before the quoted code we have
rtx op0 = orig_op0;
and then
   op0 = SUBREG_REG (op0);
Are you overlooking this line?


+  else if (REG_P (reg) +   && HARD_REGISTER_P (reg))


I don't see how this would even compile.


-  regno += subreg_regno_offset (regno, GET_MODE (SUBREG_REG (reg)),
-SUBREG_BYTE (reg), GET_MODE (reg));
+  regno = subreg_regno (reg);   endregno = regno + subreg_nregs (reg);


This looks like you edited the patch? The endregno assignment is on its 
own line after this.


NAK, a final one as far as I'm concerned.


Bernd


Re: [PATCH][combine][RFC] Don't transform sign and zero extends inside mults

2015-11-05 Thread Kyrill Tkachov

Hi Segher,

On 04/11/15 23:50, Segher Boessenkool wrote:

Hi Kyrill,

On Mon, Nov 02, 2015 at 02:15:27PM +, Kyrill Tkachov wrote:

This patch attempts to restrict combine from transforming ZERO_EXTEND and
SIGN_EXTEND operations into and-bitmask
and weird SUBREG expressions when they appear inside MULT expressions. This
is because a MULT rtx containing these
extend operations is usually a well understood widening multiply operation.

Right.  I have often wanted for combine to try plain substitution as well
as substitution and simplification: simplification (which uses nonzero_bits
etc.) often makes a mess of even moderately complex instructions.

But since we do not have that yet, and your 24% number is nicely impressive,
let's try to do no simplification for widening mults, as in your patch.


With this patch on aarch64 I saw increased generation of smaddl and umaddl
instructions where
before the combination of smull and add instructions were rejected because
the extend operands
were being transformed into these weird equivalent expressions.

Overall, I see 24% more [su]maddl instructions being generated for SPEC2006
and with no performance regressions.

The testcase in the patch is the most minimal one I could get that
demonstrates the issue I'm trying to solve.

Does this approach look ok?

In broad lines it does.  Could you try this patch instead?  Not tested
etc. (other than building an aarch64 cross and your test case); I'll do
that tomorrow if you like the result.


Thanks, that looks less intrusive. I did try it out on arm and aarch64.
It does work on the aarch64 testcase. However, there's also a correctness
regression, I'll try to explain inline




Segher


diff --git a/gcc/combine.c b/gcc/combine.c
index c3db2e0..3bf7ffb 100644
--- a/gcc/combine.c
+++ b/gcc/combine.c
@@ -5284,6 +5284,15 @@ subst (rtx x, rtx from, rtx to, int in_dest, int 
in_cond, int unique_copy)
  || GET_CODE (SET_DEST (x)) == PC))
fmt = "ie";
  
+  /* Substituting into the operands of a widening MULT is not likely

+to create RTL matching a machine insn.  */
+  if (code == MULT
+ && (GET_CODE (XEXP (x, 0)) == ZERO_EXTEND
+ || GET_CODE (XEXP (x, 0)) == SIGN_EXTEND)
+ && (GET_CODE (XEXP (x, 1)) == ZERO_EXTEND
+ || GET_CODE (XEXP (x, 1)) == SIGN_EXTEND))
+   return x;


I think we should also add:
  && REG_P (XEXP (XEXP (x, 0), 0))
  && REG_P (XEXP (XEXP (x, 1), 0))

to the condition. Otherwise I've seen regressions in the arm testsuite, the
gcc.target/arm/smlatb-1.s test in particular that tries to match the pattern
(define_insn "*maddhisi4tb"
  [(set (match_operand:SI 0 "s_register_operand" "=r")
(plus:SI (mult:SI (ashiftrt:SI
   (match_operand:SI 1 "s_register_operand" "r")
   (const_int 16))
  (sign_extend:SI
   (match_operand:HI 2 "s_register_operand" "r")))
 (match_operand:SI 3 "s_register_operand" "r")))]


There we have a sign_extend of a shift that we want to convert to the form
that the pattern expects. So adding the checks for REG_P fixes that for me.

For the correctness issue I saw on aarch64 the shortest case I could reduce is:
short int a[16], b[16];
void
f5 (void)
{
  a[0] = b[0] / 14;
}

We synthesise the division and lose one of the intermediate immediates.
The good codegen is:
f5:
adrpx1, b
movw0, 9363// <--- This gets lost!
movkw0, 0x9249, lsl 16 // <--- This gets lost!
adrpx2, a
ldrshw1, [x1, #:lo12:b]
smullx0, w1, w0
lsrx0, x0, 32
addw0, w1, w0
asrw0, w0, 3
subw0, w0, w1, asr 31
strhw0, [x2, #:lo12:a]
ret

The bad one with this patch is:
adrpx1, b
adrpx2, a
ldrshw1, [x1, #:lo12:b]
smullx0, w1, w0
lsrx0, x0, 32
addw0, w1, w0
asrw0, w0, 3
subw0, w0, w1, asr 31
strhw0, [x2, #:lo12:a]
ret


The problematic hunk there is the subst hunk.
In the expression 'x':
(mult:DI (sign_extend:DI (reg:SI 80 [ b ]))
(sign_extend:DI (reg:SI 82)))

it tries to substitute 'from': (reg:SI 82)
with 'to': (const_int -1840700269 [0x92492493])

since we now return just 'x' combine thinks that the substitution succeeded
and eliminates the immediate move.
Is there a way that subst can signal some kind of "failed to substitute" result?
If not, I got it to work by using that check to set the in_dest variable to the
subsequent recursive call to subst, in a similar way to my original patch, but 
I was
hoping to avoid overloading the meaning of in_dest.

Thanks,
Kyrill


+
/* Get the mode of operand 0 in case X is now a SIGN_EXTEND of a
 constant.  */
if (fmt[0] == 'e')
@@ -8455,6 +8464,15 @@ force_to_mode (rtx x, machine_mode mode, unsigned 
HOST_WIDE_INT mask,
/* ... fall through ...  */
  
  case MULT:

+  /* Substituting into the operands of a widening 

Re: [ping] Change behavior of -fsched-verbose option

2015-11-05 Thread Nikolai Bozhenov

On 11/05/2015 12:26 PM, Bernd Schmidt wrote:

On 11/05/2015 09:11 AM, Nikolai Bozhenov wrote:

Please commit if it is OK for trunk.


Hmm, do you have a copyright assignment on file?



Yes I do have a copyright assignment for all past and future changes
to GCC (RT:828836).

Thanks,
Nikolai


Re: [sched] Dump dependency graph to a dot file

2015-11-05 Thread Nikolai Bozhenov

On 11/05/2015 12:18 PM, Kyrill Tkachov wrote:

Hi Nikolai,

On 05/11/15 08:29, Nikolai Bozhenov wrote:

Hi!

The attached patch adds a procedure to dump the scheduler's dependency
graph into a dot file. The patch has been bootstrapped and regtested
on x86_64. Please commit if it is OK for trunk.

Thanks,
Nikolai


A couple of style nits.

+  // begin subgraph (basic block)
+  pp_printf (, "subgraph cluster_block_%d {\n", bb);
+  pp_printf (, "\t" "color=blue;" "\n");
+  pp_printf (, "\t" "style=bold;" "\n");
+  pp_printf (, "\t" "label=\"BB #%d\";\n", BB_TO_BLOCK (bb));
+
+  // setup head and tail (no support for EBBs)
+  gcc_assert (EBB_FIRST_BB (bb) == EBB_LAST_BB (bb));
+  get_ebb_head_tail (EBB_FIRST_BB (bb), EBB_LAST_BB (bb), , 
);

+  tail = NEXT_INSN (tail);
+
+  // dump all insns
+  for (con = head; con != tail; con = NEXT_INSN (con))
+{
+  if (!INSN_P (con))
+continue;
+
+  // pretty print the insn
+  pp_printf (, "\t%d [label=\"{", INSN_UID (con));

Please use C-style comments i.e. /**/ instead of //.
Also, throughout the comments leave two empty spaces after a full stop
i.e. /* .  */.

You can use the check_GNU_style.sh script in the contrib/ directory on
your patches to highlight similar issues. For example:
$ ./contrib/check_GNU_style.sh ~/patches/dep-graph.patch

Dot, space, space, end of comment.
83:+/* Dump dependency graph for the current region to a file using 
dot syntax. */
166:+/* Dump dependency graph for the current region to a file using 
dot syntax. */


Sentences should end with a dot.  Dot, space, space, end of the comment.
127:+  /* dump all deps */

Cheers,
Kyrill



Thanks for your remarks, Kyrill!

I've uploaded an updated patch.

Nikolai
2015-11-04  Nikolai Bozhenov  

	* sched-int.h (dump_rgn_dependencies_dot): Declare
	* sched-rgn.c (dump_rgn_dependencies_dot): New function
	* print-rtl.h (print_insn): Add prototype

diff --git a/gcc/print-rtl.h b/gcc/print-rtl.h
index eb079af..f601d33 100644
--- a/gcc/print-rtl.h
+++ b/gcc/print-rtl.h
@@ -25,12 +25,14 @@ extern void print_rtl (FILE *, const_rtx);
 #endif
 
 extern void dump_value_slim (FILE *, const_rtx, int);
 extern void dump_insn_slim (FILE *, const rtx_insn *);
 extern void dump_rtl_slim (FILE *, const rtx_insn *, const rtx_insn *,
 			   int, int);
 extern void print_value (pretty_printer *, const_rtx, int);
 extern void print_pattern (pretty_printer *, const_rtx, int);
+extern void print_insn (pretty_printer *pp, const rtx_insn *x, int verbose);
+
 extern void rtl_dump_bb_for_graph (pretty_printer *, basic_block);
 extern const char *str_pattern_slim (const_rtx);
 
 #endif  // GCC_PRINT_RTL_H
diff --git a/gcc/sched-int.h b/gcc/sched-int.h
index 86f5821..4600347 100644
--- a/gcc/sched-int.h
+++ b/gcc/sched-int.h
@@ -1492,16 +1492,19 @@ extern void sched_rgn_local_finish (void);
 extern void sched_rgn_local_free (void);
 extern void extend_regions (void);
 extern void rgn_make_new_region_out_of_new_block (basic_block);
 
 extern void compute_priorities (void);
 extern void increase_insn_priority (rtx_insn *, int);
 extern void debug_rgn_dependencies (int);
 extern void debug_dependencies (rtx_insn *, rtx_insn *);
+extern void dump_rgn_dependencies_dot (FILE *);
+extern void dump_rgn_dependencies_dot (const char *);
+
 extern void free_rgn_deps (void);
 extern int contributes_to_priority (rtx_insn *, rtx_insn *);
 extern void extend_rgns (int *, int *, sbitmap, int *);
 extern void deps_join (struct deps_desc *, struct deps_desc *);
 
 extern void rgn_setup_common_sched_info (void);
 extern void rgn_setup_sched_infos (void);
 
diff --git a/gcc/sched-rgn.c b/gcc/sched-rgn.c
index eafb3fd..d744d83 100644
--- a/gcc/sched-rgn.c
+++ b/gcc/sched-rgn.c
@@ -58,16 +58,18 @@ along with GCC; see the file COPYING3.  If not see
 #include "insn-attr.h"
 #include "except.h"
 #include "params.h"
 #include "cfganal.h"
 #include "sched-int.h"
 #include "sel-sched.h"
 #include "tree-pass.h"
 #include "dbgcnt.h"
+#include "pretty-print.h"
+#include "print-rtl.h"
 
 #ifdef INSN_SCHEDULING
 
 /* Some accessor macros for h_i_d members only used within this file.  */
 #define FED_BY_SPEC_LOAD(INSN) (HID (INSN)->fed_by_spec_load)
 #define IS_LOAD_INSN(INSN) (HID (insn)->is_load_insn)
 
 /* nr_inter/spec counts interblock/speculative motion for the function.  */
@@ -2855,16 +2857,118 @@ void debug_dependencies (rtx_insn *head, rtx_insn *tail)
 		   DEP_NONREG (dep) ? "n" : "",
 		   DEP_MULTIPLE (dep) ? "m" : "");
   }
   fprintf (sched_dump, "\n");
 }
 
   fprintf (sched_dump, "\n");
 }
+
+/* Dump dependency graph for the current region to a file using dot syntax.  */
+
+void
+dump_rgn_dependencies_dot (FILE *file)
+{
+  rtx_insn *head, *tail, *con, *pro;
+  sd_iterator_def sd_it;
+  dep_t dep;
+  int bb;
+  pretty_printer pp;
+
+  pp.buffer->stream = file;
+  pp_printf (, "digraph SchedDG {\n");
+
+  for (bb = 0; bb < 

[PATCH] New version of libmpx with new memmove wrapper

2015-11-05 Thread Aleksandra Tsvetkova
New version of libmpx was added. There is a new function get_bd() that
allows to get bounds directory. Wrapper for memmove was modified. Now
it moves data and then moves corresponding bounds directly from one
bounds table to another. This approach made moving unaligned pointers
possible. It also makes memmove function faster on sizes bigger than
64 bytes.
diff --git a/gcc/testsuite/ChangeLog b/gcc/testsuite/ChangeLog
old mode 100644
new mode 100755
index e693063..3026a05
--- a/gcc/testsuite/ChangeLog
+++ b/gcc/testsuite/ChangeLog
@@ -1,3 +1,7 @@
+2015-10-27  Tsvetkova Alexandra  
+
+   * gcc.target/i386/mpx/memmove.c: New test for __mpx_wrapper_memmove.
+
 2015-10-27  Alan Lawrence  
 
* gcc.dg/vect/vect-strided-shift-1.c: New.
diff --git a/gcc/testsuite/gcc.target/i386/mpx/memmove.c 
b/gcc/testsuite/gcc.target/i386/mpx/memmove.c
new file mode 100755
index 000..3e2e61c
--- /dev/null
+++ b/gcc/testsuite/gcc.target/i386/mpx/memmove.c
@@ -0,0 +1,118 @@
+/* { dg-do run } */
+/* { dg-options "-fcheck-pointer-bounds -mmpx" } */
+
+
+#include 
+#include 
+#include 
+#include 
+
+#ifdef __i386__
+/* i386 directory size is 4MB.  */
+#define MPX_NUM_L2_BITS 10
+#define MPX_NUM_IGN_BITS 2
+#else /* __i386__ */
+/* x86_64 directory size is 2GB.  */
+#define MPX_NUM_L2_BITS 17
+#define MPX_NUM_IGN_BITS 3
+#endif /* !__i386__ */
+
+/* bt_num_of_elems is the number of elements in bounds table.  */
+unsigned long bt_num_of_elems = (1UL << MPX_NUM_L2_BITS);
+/* Function to test MPX wrapper of memmove function.
+   src_bigger_dst determines which address is bigger, can be 0 or 1.
+   src_bt_index and dst_bt index are bt_indexes
+   from the beginning of the page.
+   bd_index_end is the bd index of the last element of src if we define
+   bd index of the first element as 0.
+   src_bt index_end is bt index of the last element of src.
+   pointers inside determines if array being copied includes pointers
+   src_align and dst_align are alignments of src and dst.
+   Arrays may contain unaligned pointers.  */
+int
+test (int src_bigger_dst, int src_bt_index, int dst_bt_index,
+  int bd_index_end, int src_bt_index_end, int pointers_inside,
+  int src_align, int dst_align)
+{
+  const int n =
+src_bt_index_end - src_bt_index + bd_index_end * bt_num_of_elems;
+  if (n < 0)
+{
+  return 0;
+}
+  const int num_of_pointers = (bd_index_end + 2) * bt_num_of_elems;
+  void **arr = 0;
+  posix_memalign ((void **) (),
+   1UL << (MPX_NUM_L2_BITS + MPX_NUM_IGN_BITS),
+   num_of_pointers * sizeof (void *));
+  void **src = arr, **dst = arr;
+  if ((src_bigger_dst) && (src_bt_index < dst_bt_index))
+src_bt_index += bt_num_of_elems;
+  if (!(src_bigger_dst) && (src_bt_index > dst_bt_index))
+dst_bt_index += bt_num_of_elems;
+  src += src_bt_index;
+  dst += dst_bt_index;
+  char *realign = (char *) src;
+  realign += src_align;
+  src = (void **) realign;
+  realign = (char *) dst;
+  realign += src_align;
+  dst = (void **) realign;
+  if (pointers_inside)
+{
+  for (int i = 0; i < n; i++)
+src[i] = __bnd_set_ptr_bounds (arr + i, i * sizeof (void *) + 1);
+}
+  memmove (dst, src, n * sizeof (void *));
+  if (pointers_inside)
+{
+  for (int i = 0; i < n; i++)
+{
+  if (dst[i] != arr + i)
+abort ();
+  if (__bnd_get_ptr_lbound (dst[i]) != arr + i)
+abort ();
+  if (__bnd_get_ptr_ubound (dst[i]) != arr + 2 * i)
+abort ();
+}
+}
+  free (arr);
+  return 0;
+}
+
+/* Call testall to test common cases of memmove for MPX.  */
+void
+testall ()
+{
+  int align[3];
+  align[0] = 0;
+  align[1] = 1;
+  align[2] = 7;
+  test (0, 1, 2, 0, 2, 1, 0, 0);
+  for (int pointers_inside = 0; pointers_inside < 2; pointers_inside++)
+for (int src_bigger_dst = 0; src_bigger_dst < 2; src_bigger_dst++)
+  for (int src_align = 0; src_align < 3; src_align ++)
+for (int dst_align = 0; dst_align < 3; dst_align ++)
+  for (int pages = 0; pages < 4; pages++)
+{
+  test (src_bigger_dst, 1, 2, pages, 1, pointers_inside,
+align[src_align], align[dst_align]);
+  test (src_bigger_dst, 1, 2, pages, 2, pointers_inside,
+align[src_align], align[dst_align]);
+  test (src_bigger_dst, 2, 1, pages, 12, pointers_inside,
+align[src_align], align[dst_align]);
+  test (src_bigger_dst, 2, 1, pages, 1, pointers_inside,
+align[src_align], align[dst_align]);
+  test (src_bigger_dst, 2, 3, pages, 12, pointers_inside,
+align[src_align], align[dst_align]);
+  test (src_bigger_dst, 1, bt_num_of_elems - 2, pages, 2,
+pointers_inside, align[src_align], align[dst_align]);
+}
+};
+
+int
+main ()
+{
+  testall ();
+  return 0;
+}
diff --git 

[gomp4, committed, 6/9] Update goacc kernels C testcases

2015-11-05 Thread Tom de Vries

On 05/11/15 11:16, Tom de Vries wrote:

Hi,

now that we have committed -foffload-alias in gomp-4_0-branch (
https://gcc.gnu.org/ml/gcc-patches/2015-11/msg00214.html ), we no longer
need the kernels region to be a part of the original function when doing
alias analysis.

So, we no longer have the need to postpone splitting off the kernels
region into a seperate function until after alias analysis, but we can
do this at the same time as when we expand the parallel region.

The following patch series implements that:

  1Move expansion of kernels region back to first omp-expand
  2Update gate_oacc_kernels to handle oacc function
  3Revert "Add skip_stmt parm to pass_dominator::get_sese ()"
  4Revert "Add pass_dominator::sese_mode_p ()"
  5Handle oacc function in parloops
  6Update goacc kernels C testcases
  7Update goacc kernels Fortran testcases
  8Release_defs in expand_omp_atomic_fetch_op
  9Remove BUILT_IN_GOACC_KERNELS_INTERNAL

[ The patch series is broken up into logical bits, but intended as
single commit. Various things in kernels support will be broken in
intermediate stages. ]

Committed to gomp-4_0-branch.

I'll post the patches in reply to this message.



This patch updates the goacc kernels C testcases.

Thanks,
- Tom

Update goacc kernels C testcases

2015-11-04  Tom de Vries  

	* c-c++-common/goacc/kernels-acc-loop-reduction.c: Scan for oacc
	function instead of oacc_parallel.
	* c-c++-common/goacc/kernels-acc-loop-smaller-equal.c: Same.
	* c-c++-common/goacc/kernels-counter-vars-function-scope.c: Same.
	* c-c++-common/goacc/kernels-double-reduction.c: Same.
	* c-c++-common/goacc/kernels-independent.c: Same.
	* c-c++-common/goacc/kernels-loop-2-acc-loop.c:  Same.
	* c-c++-common/goacc/kernels-loop-2.c:  Same.
	* c-c++-common/goacc/kernels-loop-3-acc-loop.c: Same.
	* c-c++-common/goacc/kernels-loop-3.c: Same.
	* c-c++-common/goacc/kernels-loop-acc-loop.c: Same.
	* c-c++-common/goacc/kernels-loop-data-2.c: Same.
	* c-c++-common/goacc/kernels-loop-data-enter-exit-2.c: Same.
	* c-c++-common/goacc/kernels-loop-data-enter-exit.c: Same.
	* c-c++-common/goacc/kernels-loop-data-update.c: Same.
	* c-c++-common/goacc/kernels-loop-data.c: Same.
	* c-c++-common/goacc/kernels-loop-g.c: Same.
	* c-c++-common/goacc/kernels-loop-mod-not-zero.c: Same.
	* c-c++-common/goacc/kernels-loop-n-acc-loop.c: Same.
	* c-c++-common/goacc/kernels-loop-n.c: Same.
	* c-c++-common/goacc/kernels-loop-nest-independent.c: Same.
	* c-c++-common/goacc/kernels-loop-nest.c: Same.
	* c-c++-common/goacc/kernels-loop.c: Same.
	* c-c++-common/goacc/kernels-one-counter-var.c: Same.
	* c-c++-common/goacc/kernels-parallel-loop-data-enter-exit.c: Same.
	* c-c++-common/goacc/kernels-reduction.c: Same.
	* c-c++-common/goacc/kernels-counter-var-redundant-load.c: Same.  Update
	scan.
---
 gcc/testsuite/c-c++-common/goacc/kernels-acc-loop-reduction.c  | 2 +-
 gcc/testsuite/c-c++-common/goacc/kernels-acc-loop-smaller-equal.c  | 2 +-
 .../c-c++-common/goacc/kernels-counter-var-redundant-load.c| 7 +++
 .../c-c++-common/goacc/kernels-counter-vars-function-scope.c   | 2 +-
 gcc/testsuite/c-c++-common/goacc/kernels-double-reduction.c| 2 +-
 gcc/testsuite/c-c++-common/goacc/kernels-independent.c | 3 ++-
 gcc/testsuite/c-c++-common/goacc/kernels-loop-2-acc-loop.c | 3 ++-
 gcc/testsuite/c-c++-common/goacc/kernels-loop-2.c  | 2 +-
 gcc/testsuite/c-c++-common/goacc/kernels-loop-3-acc-loop.c | 2 +-
 gcc/testsuite/c-c++-common/goacc/kernels-loop-3.c  | 3 ++-
 gcc/testsuite/c-c++-common/goacc/kernels-loop-acc-loop.c   | 2 +-
 gcc/testsuite/c-c++-common/goacc/kernels-loop-data-2.c | 2 +-
 gcc/testsuite/c-c++-common/goacc/kernels-loop-data-enter-exit-2.c  | 2 +-
 gcc/testsuite/c-c++-common/goacc/kernels-loop-data-enter-exit.c| 2 +-
 gcc/testsuite/c-c++-common/goacc/kernels-loop-data-update.c| 2 +-
 gcc/testsuite/c-c++-common/goacc/kernels-loop-data.c   | 2 +-
 gcc/testsuite/c-c++-common/goacc/kernels-loop-g.c  | 3 ++-
 gcc/testsuite/c-c++-common/goacc/kernels-loop-mod-not-zero.c   | 2 +-
 gcc/testsuite/c-c++-common/goacc/kernels-loop-n-acc-loop.c | 2 +-
 gcc/testsuite/c-c++-common/goacc/kernels-loop-n.c  | 3 ++-
 gcc/testsuite/c-c++-common/goacc/kernels-loop-nest-independent.c   | 2 +-
 gcc/testsuite/c-c++-common/goacc/kernels-loop-nest.c   | 2 +-
 gcc/testsuite/c-c++-common/goacc/kernels-loop.c| 3 ++-
 gcc/testsuite/c-c++-common/goacc/kernels-one-counter-var.c | 3 ++-
 .../c-c++-common/goacc/kernels-parallel-loop-data-enter-exit.c | 2 +-
 gcc/testsuite/c-c++-common/goacc/kernels-reduction.c   | 3 ++-
 26 files changed, 36 insertions(+), 29 deletions(-)

diff --git a/gcc/testsuite/c-c++-common/goacc/kernels-acc-loop-reduction.c 

[gomp4, committed, 8/9] Release_defs in expand_omp_atomic_fetch_op

2015-11-05 Thread Tom de Vries

On 05/11/15 11:16, Tom de Vries wrote:

Hi,

now that we have committed -foffload-alias in gomp-4_0-branch (
https://gcc.gnu.org/ml/gcc-patches/2015-11/msg00214.html ), we no longer
need the kernels region to be a part of the original function when doing
alias analysis.

So, we no longer have the need to postpone splitting off the kernels
region into a seperate function until after alias analysis, but we can
do this at the same time as when we expand the parallel region.

The following patch series implements that:

  1Move expansion of kernels region back to first omp-expand
  2Update gate_oacc_kernels to handle oacc function
  3Revert "Add skip_stmt parm to pass_dominator::get_sese ()"
  4Revert "Add pass_dominator::sese_mode_p ()"
  5Handle oacc function in parloops
  6Update goacc kernels C testcases
  7Update goacc kernels Fortran testcases
  8Release_defs in expand_omp_atomic_fetch_op
  9Remove BUILT_IN_GOACC_KERNELS_INTERNAL

[ The patch series is broken up into logical bits, but intended as
single commit. Various things in kernels support will be broken in
intermediate stages. ]

Committed to gomp-4_0-branch.

I'll post the patches in reply to this message.



The parloops pass constructs an atomic update:
...
  #pragma omp atomic_load
  D.1839_59 = *&.paral_data_load.33_51->reduction.23;
  D.1840_60 = sum.27_56 + D.1839_59;
  #pragma omp atomic_store (D.1840_60);
...

The expand_omp_atomic_fetch_op function removes the update statement but 
doesn't release the ssa-name D.1840_60 defined by the update statement.


This causes an error when running ccp in lto1. We run into trouble here 
for this unreleased ssa-name, because SSA_NAME_VAR (var) == NULL_TREE 
and TREE_CODE (NULL_TREE) causes a sigsegv:

...
get_default_value (tree var)
{
  ccp_prop_value_t val = { UNINITIALIZED, NULL_TREE, 0 };
  gimple *stmt;

  stmt = SSA_NAME_DEF_STMT (var);

  if (gimple_nop_p (stmt))
{
  /* Variables defined by an empty statement are those used
 before being initialized.  If VAR is a local variable, we
 can assume initially that it is UNDEFINED, otherwise we must
 consider it VARYING.  */
  if (!virtual_operand_p (var)
  && TREE_CODE (SSA_NAME_VAR (var)) == VAR_DECL)
val.lattice_val = UNDEFINED;
...

This patch fixes the problem by releasing the ssa-name in 
expand_omp_atomic_fetch_op function.


Thanks,
- Tom
Release_defs in expand_omp_atomic_fetch_op

2015-11-04  Tom de Vries  

	* omp-low.c (expand_omp_atomic_fetch_op):  Release defs of update stmt.
---
 gcc/omp-low.c | 6 +-
 1 file changed, 5 insertions(+), 1 deletion(-)

diff --git a/gcc/omp-low.c b/gcc/omp-low.c
index 58cb959..84accd9 100644
--- a/gcc/omp-low.c
+++ b/gcc/omp-low.c
@@ -11914,10 +11914,14 @@ expand_omp_atomic_fetch_op (basic_block load_bb,
   gcc_assert (gimple_code (gsi_stmt (gsi)) == GIMPLE_OMP_ATOMIC_STORE);
   gsi_remove (, true);
   gsi = gsi_last_bb (store_bb);
+  stmt = gsi_stmt (gsi);
   gsi_remove (, true);
 
   if (gimple_in_ssa_p (cfun))
-update_ssa (TODO_update_ssa_no_phi);
+{
+  release_defs (stmt);
+  update_ssa (TODO_update_ssa_no_phi);
+}
 
   return true;
 }
-- 
1.9.1



Re: [PATCH v3 1/2] [PR debug/67192] Fix C loops' back-jump location

2015-11-05 Thread Andreas Arnez
On Thu, Nov 05 2015, Bernd Schmidt wrote:

> On 11/04/2015 05:17 PM, Andreas Arnez wrote:
>>
>> gcc/c/ChangeLog:
>>
>>  PR debug/67192
>>  * c-parser.c (c_parser_while_statement): Finish the loop before
>>  parsing ahead for misleading indentation.
>>  (c_parser_for_statement): Likewise.
>
> This is OK.

Thanks again for reviewing.  Are you going to look at patch #2 as well?

> Does C++ have similar issues?

Not this particular issue, AFAIK.  But I've just looked at how C++ fares
with the enhanced version of pr67192.c from patch #2.  There I see the
following:

  Breakpoint 2, f4 () at pr67192.cc:54
  (gdb) p cnt
  $1 = 16

I.e., when breaking on "while (1)" the first loop iteration has already
executed.  This is because the C++ parser assigns the backward-goto to
the 'while' token.  It's the same issue you pointed at with version 2 of
my patch.

Shall I open a bug for that?

--
Andreas



Re: [PATCH] Fix c-c++-common/torture/vector-compare-1.c on powerpc

2015-11-05 Thread Ilya Enkovich
On 29 Oct 15:29, Richard Biener wrote:
> 
> I think this should unconditionally produce the COND_EXPR and
> build cst_true using build_all_ones_cst (stype).
> 
> Ok with that change.
> 
> Thanks,
> Richard.
> 

Here is an updated patch version.  Bootstrapped and regtested on 
powerpc64le-unknown-linux-gnu and x86_64-unknown-linux-gnu.  Applied to trunk.

Thanks,
Ilya
--
gcc/

2015-11-05  Ilya Enkovich  

* tree-vect-generic.c (do_compare): Use -1 for true
result instead of 1.


diff --git a/gcc/tree-vect-generic.c b/gcc/tree-vect-generic.c
index d0a4e0f..b59f699 100644
--- a/gcc/tree-vect-generic.c
+++ b/gcc/tree-vect-generic.c
@@ -161,10 +161,16 @@ static tree
 do_compare (gimple_stmt_iterator *gsi, tree inner_type, tree a, tree b,
tree bitpos, tree bitsize, enum tree_code code, tree type)
 {
+  tree stype = TREE_TYPE (type);
+  tree cst_false = build_zero_cst (stype);
+  tree cst_true = build_all_ones_cst (stype);
+  tree cmp;
+
   a = tree_vec_extract (gsi, inner_type, a, bitsize, bitpos);
   b = tree_vec_extract (gsi, inner_type, b, bitsize, bitpos);
 
-  return gimplify_build2 (gsi, code, TREE_TYPE (type), a, b);
+  cmp = build2 (code, boolean_type_node, a, b);
+  return gimplify_build3 (gsi, COND_EXPR, stype, cmp, cst_true, cst_false);
 }
 
 /* Expand vector addition to scalars.  This does bit twiddling


Re: [vec-cmp, patch 1/6] Add optabs for vector comparison

2015-11-05 Thread Ilya Enkovich
2015-10-27 23:52 GMT+03:00 Jeff Law :
>
> Sigh.  I searched for the enum type, not for CODE_FOR_nothing ;(  My bad.
>
> If it's easy to get rid of, yes.  I believe we've got 3 uses of
> CODE_FOR_nothing.  AFAICT in none of those cases do we care about the code
> other than does it correspond to CODE_FOR_nothing.
>
> Ideally we'd like to have both optabs-query and optabs-tree not know about
> insn codes.  The former is supposed to be IR agnostic, but insn codes are
> part of the RTL IR, so that's a wart.  The latter is supposed to be tree
> specific and thus shouldn't know about the RTL IR either.
>
> I'd settle for getting the wart out of optabs-tree and we can put further
> cleanup of optabs-query in the queue.
>
> To get the wart out of optabs-tree all I think we need is a true boolean
> function that tells us if there's a suitable optab.
>
> It's unfortunate that the routines exported by optabs-query are
> can_{extend,float,fix}_p since those would fairly natural for the boolean
> query we want to make and they're used elsewhere, but not in a boolean form.
>
> I think that we ought to rename the existing uses & definition of can_XXX_p
> that are exported by optabs-query.c, then creating new can_XXX_p for those
> uses that just care about the boolean status should work.  At that point we
> remove insn-codes.h from optab-tree.c.

Do you want this refactoring be a part of this patch or series?

Thanks,
Ilya

>
> Jeff


[PATCH, PR tree-optimization/68145] Fix vectype computation in vectorizable_operation

2015-11-05 Thread Ilya Enkovich
Hi,

This patch fixes a way vectype is computed in vectorizable_operation.  
Currently op0 is always used to compute vectype.  If it is a loop invariant 
then its type is used to get vectype which is impossible for booleans requiring 
a context to correctly compute vectype.  This patch uses output vectype in such 
cases, this should always work fine for operations on booleans.  Bootstrapped 
on x86_64-unknown-linux-gnu.  Regression tesing is in progress.  Ok if no 
regressions?

Thanks,
Ilya
--
gcc/

2015-11-05  Ilya Enkovich  

PR tree-optimization/68145
* tree-vect-stmts.c (vectorizable_operation): Fix
determination for booleans.

gcc/testsuite/

2015-11-05  Ilya Enkovich  

PR tree-optimization/68145
* g++.dg/vect/pr68145.cc: New test.


diff --git a/gcc/testsuite/g++.dg/vect/pr68145.cc 
b/gcc/testsuite/g++.dg/vect/pr68145.cc
new file mode 100644
index 000..51e663a
--- /dev/null
+++ b/gcc/testsuite/g++.dg/vect/pr68145.cc
@@ -0,0 +1,41 @@
+/* { dg-do compile } */
+
+struct A {
+  bool operator()(int p1, int p2) { return p1 && p2; }
+};
+class B {
+public:
+  bool *cbegin();
+  bool *cend();
+};
+template  void operator&&(B p1, T p2) {
+  B a;
+  arrayContTransform(p1, p2, a, A());
+}
+
+template 
+void myrtransform(_InputIterator1 p1, _OutputIterator p2, T p3,
+  _BinaryOperation p4) {
+  _InputIterator1 b;
+  for (; b != p1; ++b, ++p2)
+*p2 = p4(*b, p3);
+}
+
+template 
+void arrayContTransform(L p1, R p2, RES p3, BinaryOperator p4) {
+  myrtransform(p1.cend(), p3.cbegin(), p2, p4);
+}
+
+class C {
+public:
+  B getArrayBool();
+};
+class D {
+  B getArrayBool(const int &);
+  C lnode_p;
+};
+bool c;
+B D::getArrayBool(const int &) { lnode_p.getArrayBool() && c; }
+
+// { dg-final { scan-tree-dump "vectorized 1 loops" "vect" { target { i?86-*-* 
x86_64-*-* } } } }
diff --git a/gcc/tree-vect-stmts.c b/gcc/tree-vect-stmts.c
index ae14075..9aa2d4e 100644
--- a/gcc/tree-vect-stmts.c
+++ b/gcc/tree-vect-stmts.c
@@ -4697,7 +4697,26 @@ vectorizable_operation (gimple *stmt, 
gimple_stmt_iterator *gsi,
   /* If op0 is an external or constant def use a vector type with
  the same size as the output vector type.  */
   if (!vectype)
-vectype = get_same_sized_vectype (TREE_TYPE (op0), vectype_out);
+{
+  /* For boolean type we cannot determine vectype by
+invariant value (don't know whether it is a vector
+of booleans or vector of integers).  We use output
+vectype because operations on boolean don't change
+type.  */
+  if (TREE_CODE (TREE_TYPE (op0)) == BOOLEAN_TYPE)
+   {
+ if (TREE_CODE (TREE_TYPE (scalar_dest)) != BOOLEAN_TYPE)
+   {
+ if (dump_enabled_p ())
+   dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
+"not supported operation on bool value.\n");
+ return false;
+   }
+ vectype = vectype_out;
+   }
+  else
+   vectype = get_same_sized_vectype (TREE_TYPE (op0), vectype_out);
+}
   if (vec_stmt)
 gcc_assert (vectype);
   if (!vectype)


Re: Fix 61441

2015-11-05 Thread Joseph Myers
On Thu, 5 Nov 2015, Sujoy Saraswati wrote:

> > Some other places in this patch have similar issues with checking
> > HONOR_SNANS && REAL_VALUE_ISNAN when they should check if the particular
> > value is sNaN.
> 
> I modified the code to check for sNaN specifically. The modified patch 
> is below.

This is looking pretty close to ready to go in, but the patch also seems 
to be doing several things at once and it might be easier to review if 
split into separate logical pieces.  For example (this is an illustration, 
not necessarily saying it should be exactly this way): (1) Add 
REAL_VALUE_ISSIGNALING_NAN and convert existing logic that already does 
something equivalent (so no semantic change, just cleanup).  (2) Convert 
existing code that checks for all NaNs in cases where 
REAL_VALUE_ISSIGNALING_NAN would be enough.  (3) Treat rint and nearbyint 
the same.  (4) Make real.c operations properly produce quiet NaNs for 
operations with signaling NaN input.  (5) Disable various transformations 
for signaling NaN operands, where folding them loses exceptions.

This patch is also getting big enough that it would be a good idea to 
complete the copyright assignment paperwork, certainly if you plan to make 
more contributions in future, if you're not covered by a corporate 
assignment.

> @@ -1943,7 +1965,17 @@ fold_convert_const_real_from_real (tree type, cons
>REAL_VALUE_TYPE value;
>tree t;
> 
> +  /* Don't perform the operation if flag_signaling_nans is on
> + and the operand is a NaN.  */
> +  if (HONOR_SNANS (TYPE_MODE (TREE_TYPE (arg1)))
> +  && REAL_VALUE_ISSIGNALING_NAN (TREE_REAL_CST (arg1)))
> +return NULL_TREE;
> +
>real_convert (, TYPE_MODE (type), _REAL_CST (arg1));
> +  /* Make resulting NaN value to be qNaN when flag_signaling_nans
> + is off.  */
> +  if (REAL_VALUE_ISSIGNALING_NAN (value))
> +value.signalling = 0;

Shouldn't real_convert do this rather than the caller needing to do it?

-- 
Joseph S. Myers
jos...@codesourcery.com


Re: [openacc] tile, independent, default, private and firstprivate support in c/++

2015-11-05 Thread Cesar Philippidis
On 11/05/2015 09:13 AM, Nathan Sidwell wrote:
> On 11/05/15 12:01, Thomas Schwinge wrote:
> 
>> On Thu, 5 Nov 2015 06:47:58 -0800, Cesar Philippidis
>>  wrote:
>>> On 11/05/2015 04:14 AM, Thomas Schwinge wrote:
> 
>>> Sorry, I must have mis-phrased it. The spec is unclear here. There are
>>> three possible ways to interpret 'acc parallel loop reduction':
>>>
>>>1. acc parallel reduction
>>>   acc loop
>>
>> This is what you propose in your patch, but I don't think that makes
>> sense, or does it?  I'm happy to learn otherwise, but in my current
>> understanding, a reduction clause needs to be attached (at least) to the
>> innermost construct where reductions are to be processed.  (Let's also
> 
> Correct, the  above interpretation must be wrong.
> 
>> consider multi-level gang/worker/vector loops/reductions.)  So, either:
>>
>>>2. acc parallel
>>>   acc loop reduction
>>
>> ... this, or even this:
>>
>>>3. acc parallel reduction
>>>   acc loop reduction
>>
>> ..., which I'm not sure what the execution model implementation requires.
>> (Nathan?)
> 
> interpretation #2 is sufficient, I think. However, both are lacking a
> 'copy (reduction_var)', clause as otherwise there's nothing changing the
> default data attribute of 'firstprivate' (working on that patch). 
> Perhaps 'reduction' on 'parallel'  is meant to imply that  (because
> that's what makes sense), but the std doesn't say it.
> 
> In summary it's probably safe to implement interpretation #3.  That way
> we can implement the hypothesis that reductions at the outer construct
> imply copy.

OK, #3 it is.

>> And while we're at it: the very same question also applies to the private
>> clause, which -- contrary to all other (as far as I remember) clauses --
>> also is applicable to both the parallel and loop constructs:
>>
>>  #pragma acc parallel loop private([...])
>>
>> ... is to be decomposed into which of the following:
>>
>>  #pragma acc parallel private([...])
>>  #pragma acc loop
>>
>>  #pragma acc parallel
>>  #pragma acc loop private([...])
>>
>>  #pragma acc parallel private([...])
>>  #pragma acc loop private([...])
>>
>> (There is no private clause allowed to be specified with the kernels
>> construct for what it's worth, but that doesn't mean we couldn't use it
>> internally, of course, if so required.)
> 
> I think interpretation #2 or #3 make sense, and I suspect result in the
> same emitted code.

I'll probably go #2 here to make life easier with kernels.

After I make these changes (and the c++ template updates), I'll apply
them to trunk and backport them to gomp4. Thank you Jakub, Thomas and
Nathan for reviewing these patches.

Cesar


Re: [PATCH] PR 68192 Export AIX TLS symbols

2015-11-05 Thread David Edelsohn
[Explicitly copying build maintainers.]

Paolo and Alexandre,

Could you review and help with this patch?

TLS symbols in AIX display a new, different symbol type in nm output.
Libtool explicitly creates a list of exported symbols for shared
libraries using nm and does not recognize the new TLS symbols, so
those symbols are not exported.

This is a regression for TLS support on AIX.

This patch updates libtool.m4 in GCC and configure for libstdc++-v3,
libgfortran, and libgomp.  I would like to apply the patch to GCC
while I simultaneously work with the Libtool community to correct the
bug upstream.  I also would like to backport this to GCC 5.2 and GCC
4.9.x.

I have not been able to run the correct versions of autoconf to
regenerate configure directly.  I either can edit the files directly
or I would appreciate someone helping me to regenerate configure in
all library directories.

Bootstrapped on powerpc-ibm-aix7.1.0.0.

* libtool.m4 (export_symbols_cmds) [AIX]: Add global TLS "L" symbols.
* libstdc++-v3/configure: Regenerate.
* libgfortran/configure: Regenerate.
* libgomp/configure: Regenerate.

Thanks, David


ZZ
Description: Binary data


Re: [PATCH] clarify documentation of -Q --help=optimizers

2015-11-05 Thread Martin Sebor

On 11/05/2015 10:10 AM, Joseph Myers wrote:

On Thu, 5 Nov 2015, Martin Sebor wrote:


optimiziation. We seem to have consensus that even though there
might be exceptions (do we know what some of them are?) the vast
majority optimizations that have -fxxx options are in fact not
performed at -O0. I think reflecting that in the output is more
helpful than implying otherwise.


There are several options marked Optimization that are actually matters of
language semantics that I don't think have anything to do with -O options,
e.g. -fexceptions and -fcx-limited-range.  The Optimization tag really
just means that it's saved and restored under certain circumstances.


Both -fexceptions and -fcx-limited-range are reported as disabled
at -O0, so they're not the examples I'm looking for. The set of
options that are reported as disabled by -O0 with the patched GCC
and that, IIUC, are of concern (i.e., that may not need -O to take
effect) are a subset of those below.

Quickly scanning the list, I would guess -frtti, -fshort-enums,
-fsigned-zeros, and -frtapping-math should be in that category,
but there could very well be others.

If it's relatively straightforward to enumerate all such options
then maybe the way to go is to add a bit to struct cl_option to
make it possible to distinguish them from the others in the -O0
-Q --help=optimizers output.

Thanks
Martin

  -faggressive-loop-optimizations   [disabled by -O0]
  -fasynchronous-unwind-tables  [disabled by -O0]
  -fauto-inc-dec[disabled by -O0]
  -fdce [disabled by -O0]
  -fdelete-null-pointer-checks  [disabled by -O0]
  -fdse [disabled by -O0]
  -fearly-inlining  [disabled by -O0]
  -ffunction-cse[disabled by -O0]
  -fgcse-lm [disabled by -O0]
  -finline  [disabled by -O0]
  -finline-atomics  [disabled by -O0]
  -fira-hoist-pressure  [disabled by -O0]
  -fira-share-save-slots[disabled by -O0]
  -fira-share-spill-slots   [disabled by -O0]
  -fivopts  [disabled by -O0]
  -fjump-tables [disabled by -O0]
  -flifetime-dse[disabled by -O0]
  -fmath-errno  [disabled by -O0]
  -fpeephole[disabled by -O0]
  -fplt [disabled by -O0]
  -fprefetch-loop-arrays[disabled by -O0]
  -frename-registers[disabled by -O0]
  -frtti[disabled by -O0]
  -fsched-critical-path-heuristic   [disabled by -O0]
  -fsched-dep-count-heuristic   [disabled by -O0]
  -fsched-group-heuristic   [disabled by -O0]
  -fsched-interblock[disabled by -O0]
  -fsched-last-insn-heuristic   [disabled by -O0]
  -fsched-rank-heuristic[disabled by -O0]
  -fsched-spec  [disabled by -O0]
  -fsched-spec-insn-heuristic   [disabled by -O0]
  -fsched-stalled-insns-dep [disabled by -O0]
  -fschedule-fusion [disabled by -O0]
  -fshort-enums [disabled by -O0]
  -fsigned-zeros[disabled by -O0]
  -fsplit-ivs-in-unroller   [disabled by -O0]
  -fssa-backprop[disabled by -O0]
  -fstdarg-opt  [disabled by -O0]
  -fstrict-volatile-bitfields   [disabled by -O0]
  -fno-threadsafe-statics   [disabled by -O0]
  -ftrapping-math   [disabled by -O0]
  -ftree-cselim [disabled by -O0]
  -ftree-forwprop   [disabled by -O0]
  -ftree-loop-if-convert[disabled by -O0]
  -ftree-loop-im[disabled by -O0]
  -ftree-loop-ivcanon   [disabled by -O0]
  -ftree-loop-optimize  [disabled by -O0]
  -ftree-phiprop[disabled by -O0]
  -ftree-reassoc[disabled by -O0]
  -ftree-scev-cprop [disabled by -O0]
  -fvar-tracking[disabled by -O0]
  -fvar-tracking-assignments[disabled by -O0]
  -fweb [disabled by -O0]



Re: [openacc] tile, independent, default, private and firstprivate support in c/++

2015-11-05 Thread Thomas Schwinge
Hi!

Nathan, question here about clause splitting for combined OpenACC
"parallel loop" and "kernels loop" constructs, in particular:

#pragma acc parallel loop reduction([...])

On Thu, 5 Nov 2015 06:47:58 -0800, Cesar Philippidis  
wrote:
> On 11/05/2015 04:14 AM, Thomas Schwinge wrote:
> 
> > On Tue, 3 Nov 2015 14:16:59 -0800, Cesar Philippidis 
> >  wrote:
> >> This patch does the following to the c and c++ front ends:
> > 
> >>  * updates c_oacc_split_loop_clauses to filter out the loop clauses
> >>from combined parallel/kernels loops
> > 
> >>gcc/c-family/
> >>* c-omp.c (c_oacc_split_loop_clauses): Make TILE, GANG, WORKER, VECTOR,
> >>AUTO, SEQ and independent as loop clauses.  Associate REDUCTION
> >>clauses with parallel and kernels.
> > 
> >> --- a/gcc/c-family/c-omp.c
> >> +++ b/gcc/c-family/c-omp.c
> >> @@ -709,12 +709,21 @@ c_oacc_split_loop_clauses (tree clauses, tree 
> >> *not_loop_clauses)
> >>  
> >>switch (OMP_CLAUSE_CODE (clauses))
> >>  {
> >> +/* Loop clauses.  */
> >>case OMP_CLAUSE_COLLAPSE:
> >> -  case OMP_CLAUSE_REDUCTION:
> >> +  case OMP_CLAUSE_TILE:
> >> +  case OMP_CLAUSE_GANG:
> >> +  case OMP_CLAUSE_WORKER:
> >> +  case OMP_CLAUSE_VECTOR:
> >> +  case OMP_CLAUSE_AUTO:
> >> +  case OMP_CLAUSE_SEQ:
> >> +  case OMP_CLAUSE_INDEPENDENT:
> >>  OMP_CLAUSE_CHAIN (clauses) = loop_clauses;
> >>  loop_clauses = clauses;
> >>  break;
> >>  
> >> +/* Parallel/kernels clauses.  */
> >> +
> >>default:
> >>  OMP_CLAUSE_CHAIN (clauses) = *not_loop_clauses;
> >>  *not_loop_clauses = clauses;
> > 
> > Contrary to your ChangeLog entry, this is not duplicating but is moving
> > OMP_CLAUSE_REDUCTION handling.  Is that intentional?  (And, doesn't
> > anything break in the testsuite?)
> 
> Sorry, I must have mis-phrased it. The spec is unclear here. There are
> three possible ways to interpret 'acc parallel loop reduction':
> 
>   1. acc parallel reduction
>  acc loop

This is what you propose in your patch, but I don't think that makes
sense, or does it?  I'm happy to learn otherwise, but in my current
understanding, a reduction clause needs to be attached (at least) to the
innermost construct where reductions are to be processed.  (Let's also
consider multi-level gang/worker/vector loops/reductions.)  So, either:

>   2. acc parallel
>  acc loop reduction

... this, or even this:

>   3. acc parallel reduction
>  acc loop reduction

..., which I'm not sure what the execution model implementation requires.
(Nathan?)

And while we're at it: the very same question also applies to the private
clause, which -- contrary to all other (as far as I remember) clauses --
also is applicable to both the parallel and loop constructs:

#pragma acc parallel loop private([...])

... is to be decomposed into which of the following:

#pragma acc parallel private([...])
#pragma acc loop

#pragma acc parallel
#pragma acc loop private([...])

#pragma acc parallel private([...])
#pragma acc loop private([...])

(There is no private clause allowed to be specified with the kernels
construct for what it's worth, but that doesn't mean we couldn't use it
internally, of course, if so required.)


> You told me to make all of the front ends consistent, and since I
> started working on fortran first, I had c and c++ follow what it was doing.

Well, I had not asked you to "blindly" ;-) make the front ends
consistent, but rather to figure out why currently C/C++ vs. Fortran are
doing things differently, and then make that consistent, as needed.

> I haven't observed any regressions with this in in place. Then again,
> maybe we don't have sufficient test coverage. I'll do more testing.

Thanks!  (I do see some -- but not very many, curiously! -- regressions
when doing (only) this change on gomp-4_0-branch.)

As far as I'm concerned, this analysis/testing can also be done later on.
(Jakub?)  In that case, for now please just keep OMP_CLAUSE_REDUCTION as
it has been handled before.


Grüße
 Thomas


signature.asc
Description: PGP signature


Re: [PATCH] Add configure flag for operator new (std::nothrow)

2015-11-05 Thread Jonathan Wakely
On 5 November 2015 at 20:52, Daniel Gutson wrote:
> Real use cases: statistics and logging. It's a (one time) callback
> reporting that something went wrong,
> but not intended to fix things e.g. by attempting to free more memory.

Why can't that be done by replacing operator new with a user-defined
function, instead of modifying the default one?

A program that wants to make use of the altered semantics is
non-portable because it relies on a custom libstdc++ configuration
option, and has to install a custom new-handler that relies on the
altered libstdc++ behaviour. Replacing operator new to do the logging
is portable and requires no custom configuration.

(I'm not rejecting the patch, but am still trying to make sure it's a
useful change.)


Re: [PATCH] clarify documentation of -Q --help=optimizers

2015-11-05 Thread Joseph Myers
On Thu, 5 Nov 2015, Martin Sebor wrote:

> optimiziation. We seem to have consensus that even though there
> might be exceptions (do we know what some of them are?) the vast
> majority optimizations that have -fxxx options are in fact not
> performed at -O0. I think reflecting that in the output is more
> helpful than implying otherwise.

There are several options marked Optimization that are actually matters of 
language semantics that I don't think have anything to do with -O options, 
e.g. -fexceptions and -fcx-limited-range.  The Optimization tag really 
just means that it's saved and restored under certain circumstances.

-- 
Joseph S. Myers
jos...@codesourcery.com


Re: [PATCH] PR 68192 Export AIX TLS symbols

2015-11-05 Thread David Edelsohn
Hi, Richi

You mentioned GCC 5.3 release around the time of stage 1 ending in the
GCC 5 status report:

https://gcc.gnu.org/ml/gcc/2015-07/msg00197.html

If the timeframe is later, we have more time to address the AIX libtool problem.

Thanks, David

On Thu, Nov 5, 2015 at 9:13 AM, Richard Biener
 wrote:
> On November 5, 2015 5:38:45 PM GMT+01:00, David Edelsohn  
> wrote:
>>On Thu, Nov 5, 2015 at 8:34 AM, Paolo Bonzini  wrote:
>>>
>>>
>>> On 05/11/2015 17:28, David Edelsohn wrote:
 [Explicitly copying build maintainers.]

 Paolo and Alexandre,

 Could you review and help with this patch?

 TLS symbols in AIX display a new, different symbol type in nm
>>output.
 Libtool explicitly creates a list of exported symbols for shared
 libraries using nm and does not recognize the new TLS symbols, so
 those symbols are not exported.

 This is a regression for TLS support on AIX.

 This patch updates libtool.m4 in GCC and configure for libstdc++-v3,
 libgfortran, and libgomp.  I would like to apply the patch to GCC
 while I simultaneously work with the Libtool community to correct
>>the
 bug upstream.  I also would like to backport this to GCC 5.2 and GCC
 4.9.x.
>>>
>>> I think it's okay to wait for the patch to be upstream.
>>>
>>> I can help committing the patch once it is.
>>
>>Paolo,
>>
>>The patch MUST go into GCC 5.3 release, which Richi has announced that
>>he wants to release around the beginning of Stage 3.  The patch should
>>not go into GCC 5 branch without going into trunk.  This patch cannot
>>wait unless you want to block GCC 5.3.
>
> I don't remember announcing this timeframe but I won't be able to dedicate 
> the required resources until the end of November.
>
> Richard.
>
>>Thanks, David
>
>


[C PATCH] Fix error-recovery with variable-sized compound literals (PR c/68090)

2015-11-05 Thread Marek Polacek
This fixes an error-recovery problem, where we were trying to build
a C_MAYBE_CONST_EXPR even though the type is invalid.  I think let's
just skip this for error_mark_nodes.

Bootstrapped/regtested on x86_64-linux, ok for trunk?

2015-11-05  Marek Polacek  

PR c/68090
* c-parser.c (c_parser_postfix_expression_after_paren_type): Don't
deal with pre-evaluation on invalid types.

* gcc.dg/pr68090.c: New test.

diff --git gcc/c/c-parser.c gcc/c/c-parser.c
index ab324d3..e2f55b3 100644
--- gcc/c/c-parser.c
+++ gcc/c/c-parser.c
@@ -7855,7 +7855,7 @@ c_parser_postfix_expression_after_paren_type (c_parser 
*parser,
   expr.value = build_compound_literal (start_loc, type, init.value, non_const);
   expr.original_code = ERROR_MARK;
   expr.original_type = NULL;
-  if (type_expr)
+  if (type != error_mark_node && type_expr)
 {
   if (TREE_CODE (expr.value) == C_MAYBE_CONST_EXPR)
{
diff --git gcc/testsuite/gcc.dg/pr68090.c gcc/testsuite/gcc.dg/pr68090.c
index e69de29..87b3b93 100644
--- gcc/testsuite/gcc.dg/pr68090.c
+++ gcc/testsuite/gcc.dg/pr68090.c
@@ -0,0 +1,13 @@
+/* PR c/68090 */
+/* { dg-do compile } */
+/* { dg-options "" } */
+
+void
+fn (int i)
+{
+  (int[(0, 1)]) { 0 }; /* { dg-error "compound literal has variable size" } */
+  (int[i]) { 0 }; /* { dg-error "compound literal has variable size" } */
+  (int[(0, i)]) { 0 }; /* { dg-error "compound literal has variable size" } */
+  (int [][i]){ 0 }; /* { dg-error "compound literal has variable size" } */
+  (int [][(1, 2)]){ 0 }; /* { dg-error "compound literal has variable size" } 
*/
+}

Marek


Re: [PATCH 1/4][AArch64] Add scheduling and cost models for Exynos M1

2015-11-05 Thread Evandro Menezes

James,

Since other members of the "tune_params" structure were signed integers, 
even though negative numbers would make no sense for most either, I 
followed the same pattern.


Regardless, here's a patch with unsigned integers as you requested:

   [AArch64] Add extra tuning parameters for target processors

   2015-11-05  Evandro Menezes  

   gcc/

   * config/aarch64/aarch64-protos.h (tune_params): Add new members
   "max_case_values" and "cache_line_size".
   * config/aarch64/aarch64.c (aarch64_case_values_threshold): New
   function.
   (aarch64_override_options_internal): Tune heuristics based on new
   members in "tune_params".
   (TARGET_CASE_VALUES_THRESHOLD): Define macro.

Please, commit if it's alright.

Thank you,

--
Evandro Menezes

On 11/05/2015 03:22 AM, James Greenhalgh wrote:

On Wed, Nov 04, 2015 at 05:21:03PM -0600, Evandro Menezes wrote:

Please, ignore the previous patch.  This is the intended patch.

Sorry.

--
Evandro Menezes

On 11/04/2015 05:18 PM, Evandro Menezes wrote:

This patch adds extra tuning information about AArch64 targets:

* Maximum number of case values before resorting to a jump table
   The default values assumed independently of the specific backends
   may be rather low for modern processors, which sport quite efficient
   direct branch prediction, whereas indirect branch prediction is
   still typically not so efficient.  This value may be specifically
   set for a processor or left at zero to use the default values.
* L1 cache line size
   The auto-prefetcher uses this information when emitting software
   prefetch insns.

Please, commit if it's alright.

Thank you,


Thanks for the patch,


diff --git a/gcc/config/aarch64/aarch64-protos.h 
b/gcc/config/aarch64/aarch64-protos.h
index 81792bc..ecf4685 100644
--- a/gcc/config/aarch64/aarch64-protos.h
+++ b/gcc/config/aarch64/aarch64-protos.h
@@ -195,6 +195,9 @@ struct tune_params
int vec_reassoc_width;
int min_div_recip_mul_sf;
int min_div_recip_mul_df;
+  int max_case_values; /* Case values threshold; or 0 for the default.  */

If we're using an int, how about -1 as the sentinel value? (Maybe someone
really likes jump tables!). Otherwise, make this an unsigned int?


+
+  int cache_line_size; /* Cache line size; or 0 for the default.  */

unsigned int?

The patch is otherwise OK, though it needs a ChangeLog.

Thanks,
James




diff --git a/gcc/config/aarch64/aarch64-protos.h b/gcc/config/aarch64/aarch64-protos.h
index 81792bc..ecf4685 100644
--- a/gcc/config/aarch64/aarch64-protos.h
+++ b/gcc/config/aarch64/aarch64-protos.h
@@ -195,6 +195,9 @@ struct tune_params
   int vec_reassoc_width;
   int min_div_recip_mul_sf;
   int min_div_recip_mul_df;
+  unsigned int max_case_values; /* Case values threshold; or 0 for the default.  */
+
+  unsigned int cache_line_size; /* Cache line size; or 0 for the default.  */
 
 /* An enum specifying how to take into account CPU autoprefetch capabilities
during instruction scheduling:
diff --git a/gcc/config/aarch64/aarch64.c b/gcc/config/aarch64/aarch64.c
index 5c8604f..e7f1c07 100644
--- a/gcc/config/aarch64/aarch64.c
+++ b/gcc/config/aarch64/aarch64.c
@@ -355,6 +355,8 @@ static const struct tune_params generic_tunings =
   1,	/* vec_reassoc_width.  */
   2,	/* min_div_recip_mul_sf.  */
   2,	/* min_div_recip_mul_df.  */
+  0,	/* max_case_values.  */
+  0,	/* cache_line_size.  */
   tune_params::AUTOPREFETCHER_OFF,	/* autoprefetcher_model.  */
   (AARCH64_EXTRA_TUNE_NONE)	/* tune_flags.  */
 };
@@ -378,6 +380,8 @@ static const struct tune_params cortexa53_tunings =
   1,	/* vec_reassoc_width.  */
   2,	/* min_div_recip_mul_sf.  */
   2,	/* min_div_recip_mul_df.  */
+  0,	/* max_case_values.  */
+  0,	/* cache_line_size.  */
   tune_params::AUTOPREFETCHER_WEAK,	/* autoprefetcher_model.  */
   (AARCH64_EXTRA_TUNE_NONE)	/* tune_flags.  */
 };
@@ -401,6 +405,8 @@ static const struct tune_params cortexa57_tunings =
   1,	/* vec_reassoc_width.  */
   2,	/* min_div_recip_mul_sf.  */
   2,	/* min_div_recip_mul_df.  */
+  0,	/* max_case_values.  */
+  0,	/* cache_line_size.  */
   tune_params::AUTOPREFETCHER_WEAK,	/* autoprefetcher_model.  */
   (AARCH64_EXTRA_TUNE_RENAME_FMA_REGS)	/* tune_flags.  */
 };
@@ -424,6 +430,8 @@ static const struct tune_params cortexa72_tunings =
   1,	/* vec_reassoc_width.  */
   2,	/* min_div_recip_mul_sf.  */
   2,	/* min_div_recip_mul_df.  */
+  0,	/* max_case_values.  */
+  0,	/* cache_line_size.  */
   tune_params::AUTOPREFETCHER_OFF,	/* autoprefetcher_model.  */
   (AARCH64_EXTRA_TUNE_NONE)	/* tune_flags.  */
 };
@@ -446,6 +454,8 @@ static const struct tune_params thunderx_tunings =
   1,	/* vec_reassoc_width.  */
   2,	/* min_div_recip_mul_sf.  */
   2,	/* min_div_recip_mul_df.  */
+  0,	/* max_case_values.  */
+  0,	/* cache_line_size.  */
   tune_params::AUTOPREFETCHER_OFF,	/* autoprefetcher_model.  */
   (AARCH64_EXTRA_TUNE_NONE)	/* tune_flags.  */
 };
@@ -468,6 +478,8 @@ 

Re: [PATCH] clarify documentation of -Q --help=optimizers

2015-11-05 Thread Martin Sebor

On 11/05/2015 03:48 AM, Alexander Monakov wrote:

On Thu, 5 Nov 2015, Joseph Myers wrote:

On Wed, 4 Nov 2015, Martin Sebor wrote:


Improving the compiler output is a good idea. The attached patch
prints "[disabled by -O0]" instead of "[enabled]" when an optimization
option is enabled by default but when optimization (i.e., -O1 or
greater) is not enabled.


I don't think it's entirely accurate that all options marked as
Optimization in *.opt are actually disabled by -O0.  Many are, but it
depends on the actual logic controlling each optimization.


Indeed, and Richard already pointed that out when reviewing (my) previous
attempt.  Martin, please review the thread ending at
https://gcc.gnu.org/ml/gcc-patches/2012-05/msg00113.html

I guess eventually we can slightly overhaul optimization options to remove
such ambiguities, but in short term I see no way to accurately determine
whether a pass/option is ultimately in effect or not.

Martin, what about taking Richard's suggestion, tacking a short explanatory
sentence onto the end of --help=optimizers output?


I don't think that reiterating in a condensed form what the manual
doesn't make clear in many more words will help. First, even users
who do find the relevant text in the manual often misunderstand it.
Others are misled by the "[enabled]" output into expecting the
optimizations to take place at -O0. Those who are in doubt and
ask on gcc-help are then told by GCC experts that -O0 disables
all optimizations without exception, and to RTFM (see the thread
I referenced in my first post on this topic for an example).

That the individual optimizations -fxxx options have no effect
without -O1 or better is made clear in the FAQ on the Wiki (in
https://gcc.gnu.org/wiki/FAQ#optimization-options). What isn't
mentioned there is the relationship to the --help=optimizers
output.

I'd be happy to tweak the "disabled by -O0" text to something
less definitive if that would make you and Joseph more
comfortable. But the "enabled" output at -O0, even though
strictly correct, is a problem. Users don't care about the
internal state of the options. What they need to know is
whether or not their code may be subject to the corresponding
optimiziation. We seem to have consensus that even though there
might be exceptions (do we know what some of them are?) the vast
majority optimizations that have -fxxx options are in fact not
performed at -O0. I think reflecting that in the output is more
helpful than implying otherwise.

Martin


Re: [PATCH] PR 68192 Export AIX TLS symbols

2015-11-05 Thread Richard Biener
On November 5, 2015 5:38:45 PM GMT+01:00, David Edelsohn  
wrote:
>On Thu, Nov 5, 2015 at 8:34 AM, Paolo Bonzini  wrote:
>>
>>
>> On 05/11/2015 17:28, David Edelsohn wrote:
>>> [Explicitly copying build maintainers.]
>>>
>>> Paolo and Alexandre,
>>>
>>> Could you review and help with this patch?
>>>
>>> TLS symbols in AIX display a new, different symbol type in nm
>output.
>>> Libtool explicitly creates a list of exported symbols for shared
>>> libraries using nm and does not recognize the new TLS symbols, so
>>> those symbols are not exported.
>>>
>>> This is a regression for TLS support on AIX.
>>>
>>> This patch updates libtool.m4 in GCC and configure for libstdc++-v3,
>>> libgfortran, and libgomp.  I would like to apply the patch to GCC
>>> while I simultaneously work with the Libtool community to correct
>the
>>> bug upstream.  I also would like to backport this to GCC 5.2 and GCC
>>> 4.9.x.
>>
>> I think it's okay to wait for the patch to be upstream.
>>
>> I can help committing the patch once it is.
>
>Paolo,
>
>The patch MUST go into GCC 5.3 release, which Richi has announced that
>he wants to release around the beginning of Stage 3.  The patch should
>not go into GCC 5 branch without going into trunk.  This patch cannot
>wait unless you want to block GCC 5.3.

I don't remember announcing this timeframe but I won't be able to dedicate the 
required resources until the end of November.

Richard.

>Thanks, David




Re: [PATCH] PR 68192 Export AIX TLS symbols

2015-11-05 Thread David Edelsohn
On Thu, Nov 5, 2015 at 8:34 AM, Paolo Bonzini  wrote:
>
>
> On 05/11/2015 17:28, David Edelsohn wrote:
>> [Explicitly copying build maintainers.]
>>
>> Paolo and Alexandre,
>>
>> Could you review and help with this patch?
>>
>> TLS symbols in AIX display a new, different symbol type in nm output.
>> Libtool explicitly creates a list of exported symbols for shared
>> libraries using nm and does not recognize the new TLS symbols, so
>> those symbols are not exported.
>>
>> This is a regression for TLS support on AIX.
>>
>> This patch updates libtool.m4 in GCC and configure for libstdc++-v3,
>> libgfortran, and libgomp.  I would like to apply the patch to GCC
>> while I simultaneously work with the Libtool community to correct the
>> bug upstream.  I also would like to backport this to GCC 5.2 and GCC
>> 4.9.x.
>
> I think it's okay to wait for the patch to be upstream.
>
> I can help committing the patch once it is.

Paolo,

The patch MUST go into GCC 5.3 release, which Richi has announced that
he wants to release around the beginning of Stage 3.  The patch should
not go into GCC 5 branch without going into trunk.  This patch cannot
wait unless you want to block GCC 5.3.

Thanks, David


Re: [openacc] tile, independent, default, private and firstprivate support in c/++

2015-11-05 Thread Nathan Sidwell

On 11/05/15 12:01, Thomas Schwinge wrote:


On Thu, 5 Nov 2015 06:47:58 -0800, Cesar Philippidis  
wrote:

On 11/05/2015 04:14 AM, Thomas Schwinge wrote:



Sorry, I must have mis-phrased it. The spec is unclear here. There are
three possible ways to interpret 'acc parallel loop reduction':

   1. acc parallel reduction
  acc loop


This is what you propose in your patch, but I don't think that makes
sense, or does it?  I'm happy to learn otherwise, but in my current
understanding, a reduction clause needs to be attached (at least) to the
innermost construct where reductions are to be processed.  (Let's also


Correct, the  above interpretation must be wrong.


consider multi-level gang/worker/vector loops/reductions.)  So, either:


   2. acc parallel
  acc loop reduction


... this, or even this:


   3. acc parallel reduction
  acc loop reduction


..., which I'm not sure what the execution model implementation requires.
(Nathan?)


interpretation #2 is sufficient, I think. However, both are lacking a 'copy 
(reduction_var)', clause as otherwise there's nothing changing the default data 
attribute of 'firstprivate' (working on that patch).  Perhaps 'reduction' on 
'parallel'  is meant to imply that  (because that's what makes sense), but the 
std doesn't say it.


In summary it's probably safe to implement interpretation #3.  That way we can 
implement the hypothesis that reductions at the outer construct imply copy.



And while we're at it: the very same question also applies to the private
clause, which -- contrary to all other (as far as I remember) clauses --
also is applicable to both the parallel and loop constructs:

 #pragma acc parallel loop private([...])

... is to be decomposed into which of the following:

 #pragma acc parallel private([...])
 #pragma acc loop

 #pragma acc parallel
 #pragma acc loop private([...])

 #pragma acc parallel private([...])
 #pragma acc loop private([...])

(There is no private clause allowed to be specified with the kernels
construct for what it's worth, but that doesn't mean we couldn't use it
internally, of course, if so required.)


I think interpretation #2 or #3 make sense, and I suspect result in the same 
emitted code.


nathan

--
Nathan Sidwell


Re: Division Optimization in match and simplify

2015-11-05 Thread Richard Biener
On November 5, 2015 2:40:30 PM GMT+01:00, Michael Matz  wrote:
>Hi,
>
>On Wed, 4 Nov 2015, Richard Biener wrote:
>
>> Ah, it was _left_ shift of negative values that ubsan complains
>about.
>
>Note that this is only for the frontend definition of shifts.  I don't
>see 
>why gimple shouldn't define it to the only sensible definition there
>is, 
>which also happens to be the one that all CPUs in the world implement 
>(well, those using two-complement representation at least).

Yeah, but front ends and back end are not separated enough to have both.

Richard.

>
>Ciao,
>Michael.




Re: [PATCH, ARM] PR target/68059 libgcc should not use __write for printing fatal error

2015-11-05 Thread Szabolcs Nagy

On 05/11/15 15:59, Richard Earnshaw wrote:

On 05/11/15 12:30, Szabolcs Nagy wrote:

libgcc/config/arm/linux-atomic-64bit.c uses __write to print an error
message if the 64bit xchg method is not available in the kernel.

__write is not part of the public libc abi.  Since this code is only
run on linux the write syscall can be invoked directly.

And __builtin_trap is a simpler way to crash than abort.

The new behaviour on a linux kernel before v3.1:

# ./a.out
A newer kernel is required to run this binary. (__kernel_cmpxchg64 helper)
Illegal instruction

OK for trunk and backporting?


__morestack_fail in generic-morestack.c now uses writev().  Is there
some reason we can't do the same?



both write and writev should be part of libc abi on linux.

they are not in the iso c namespace so standard conforming
user code can override them, but we are trying to report a
fatal error with best effort, so that might not be an issue.

i can just s/__write/write/ if that's preferred (or use writev)
and then no inline asm syscall magic is necessary.

i just want to avoid the symbol reference that is potentially
not available in the libc (i'm not sure if the 64bit builtin
atomics are supposed to work in freestanding mode or if
dynamic linking to libgcc should work without a crash on
old kernels).


libgcc/ChangeLog:

2015-11-05  Szabolcs Nagy  

 PR target/68059
 * config/arm/linux-atomic-64bit.c (__write): Remove declaration.
 (abort): Likewise.
 (linux_write): Define.

arm_write_2.diff


diff --git a/libgcc/config/arm/linux-atomic-64bit.c 
b/libgcc/config/arm/linux-atomic-64bit.c
index cdf713c..aba3334 100644
--- a/libgcc/config/arm/linux-atomic-64bit.c
+++ b/libgcc/config/arm/linux-atomic-64bit.c
@@ -33,9 +33,6 @@ see the files COPYING3 and COPYING.RUNTIME respectively.  If 
not, see
 kernels; we check for that in an init section and bail out rather
 unceremoneously.  */

-extern unsigned int __write (int fd, const void *buf, unsigned int count);
-extern void abort (void);
-
  /* Kernel helper for compare-and-exchange.  */
  typedef int (__kernel_cmpxchg64_t) (const long long* oldval,
const long long* newval,
@@ -45,6 +42,19 @@ typedef int (__kernel_cmpxchg64_t) (const long long* oldval,
  /* Kernel helper page version number.  */
  #define __kernel_helper_version (*(unsigned int *)0x0ffc)

+static void
+linux_write (int fd, const void *buf, unsigned int count)
+{
+   register long r7 asm ("r7") = 4; /* Linux __NR_write.  */
+   register long r0 asm ("r0") = fd;
+   register long r1 asm ("r1") = (long)buf;
+   register long r2 asm ("r2") = count;
+   asm volatile ("svc 0"
+   :
+   : "r" (r7), "r" (r0), "r" (r1), "r" (r2)
+   : "memory");
+}
+
  /* Check that the kernel has a new enough version at load.  */
  static void __check_for_sync8_kernelhelper (void)
  {
@@ -53,11 +63,9 @@ static void __check_for_sync8_kernelhelper (void)
const char err[] = "A newer kernel is required to run this binary. "
"(__kernel_cmpxchg64 helper)\n";
/* At this point we need a way to crash with some information
-for the user - I'm not sure I can rely on much else being
-available at this point, so do the same as generic-morestack.c
-write () and abort ().  */
-  __write (2 /* stderr.  */, err, sizeof (err));
-  abort ();
+for the user.  */
+  linux_write (2 /* stderr.  */, err, sizeof (err));
+  __builtin_trap ();
  }
  };








Re: [PATCH] PR 68192 Export AIX TLS symbols

2015-11-05 Thread Paolo Bonzini


On 05/11/2015 17:28, David Edelsohn wrote:
> [Explicitly copying build maintainers.]
> 
> Paolo and Alexandre,
> 
> Could you review and help with this patch?
> 
> TLS symbols in AIX display a new, different symbol type in nm output.
> Libtool explicitly creates a list of exported symbols for shared
> libraries using nm and does not recognize the new TLS symbols, so
> those symbols are not exported.
> 
> This is a regression for TLS support on AIX.
> 
> This patch updates libtool.m4 in GCC and configure for libstdc++-v3,
> libgfortran, and libgomp.  I would like to apply the patch to GCC
> while I simultaneously work with the Libtool community to correct the
> bug upstream.  I also would like to backport this to GCC 5.2 and GCC
> 4.9.x.

I think it's okay to wait for the patch to be upstream.

I can help committing the patch once it is.

Paolo

> I have not been able to run the correct versions of autoconf to
> regenerate configure directly.  I either can edit the files directly
> or I would appreciate someone helping me to regenerate configure in
> all library directories.
> 
> Bootstrapped on powerpc-ibm-aix7.1.0.0.
> 
> * libtool.m4 (export_symbols_cmds) [AIX]: Add global TLS "L" symbols.
> * libstdc++-v3/configure: Regenerate.
> * libgfortran/configure: Regenerate.
> * libgomp/configure: Regenerate.
> 
> Thanks, David
> 

Index: libtool.m4
===
--- libtool.m4  (revision 229706)
+++ libtool.m4  (working copy)
@@ -4230,7 +4230,7 @@
 if $NM -V 2>&1 | $GREP 'GNU' > /dev/null; then
   _LT_TAGVAR(export_symbols_cmds, $1)='$NM -Bpg $libobjs $convenience | 
awk '\''{ if (((\$ 2 == "T") || (\$ 2 == "D") || (\$ 2 == "B") || (\$ 2 == 
"W")) && ([substr](\$ 3,1,1) != ".")) { print \$ 3 } }'\'' | sort -u > 
$export_symbols'
 else
-  _LT_TAGVAR(export_symbols_cmds, $1)='$NM -BCpg $libobjs $convenience | 
awk '\''{ if (((\$ 2 == "T") || (\$ 2 == "D") || (\$ 2 == "B")) && ([substr](\$ 
3,1,1) != ".")) { print \$ 3 } }'\'' | sort -u > $export_symbols'
+  _LT_TAGVAR(export_symbols_cmds, $1)='$NM -BCpg $libobjs $convenience | 
awk '\''{ if (((\$ 2 == "T") || (\$ 2 == "D") || (\$ 2 == "B") || (\$ 2 == 
"L")) && ([substr](\$ 3,1,1) != ".")) { print \$ 3 } }'\'' | sort -u > 
$export_symbols'
 fi
 ;;
   pw32*)
@@ -4641,7 +4641,7 @@
if $NM -V 2>&1 | $GREP 'GNU' > /dev/null; then
  _LT_TAGVAR(export_symbols_cmds, $1)='$NM -Bpg $libobjs $convenience | 
awk '\''{ if (((\$ 2 == "T") || (\$ 2 == "D") || (\$ 2 == "B") || (\$ 2 == 
"W")) && ([substr](\$ 3,1,1) != ".")) { print \$ 3 } }'\'' | sort -u > 
$export_symbols'
else
- _LT_TAGVAR(export_symbols_cmds, $1)='$NM -BCpg $libobjs $convenience 
| awk '\''{ if (((\$ 2 == "T") || (\$ 2 == "D") || (\$ 2 == "B")) && 
([substr](\$ 3,1,1) != ".")) { print \$ 3 } }'\'' | sort -u > $export_symbols'
+ _LT_TAGVAR(export_symbols_cmds, $1)='$NM -BCpg $libobjs $convenience 
| awk '\''{ if (((\$ 2 == "T") || (\$ 2 == "D") || (\$ 2 == "B") || (\$ 2 == 
"L")) && ([substr](\$ 3,1,1) != ".")) { print \$ 3 } }'\'' | sort -u > 
$export_symbols'
fi
aix_use_runtimelinking=no
 
Index: libstdc++-v3/configure
===
--- libstdc++-v3/configure  (revision 229706)
+++ libstdc++-v3/configure  (working copy)
@@ -9539,7 +9539,7 @@
if $NM -V 2>&1 | $GREP 'GNU' > /dev/null; then
  export_symbols_cmds='$NM -Bpg $libobjs $convenience | awk '\''{ if 
(((\$ 2 == "T") || (\$ 2 == "D") || (\$ 2 == "B") || (\$ 2 == "W")) && 
(substr(\$ 3,1,1) != ".")) { print \$ 3 } }'\'' | sort -u > $export_symbols'
else
- export_symbols_cmds='$NM -BCpg $libobjs $convenience | awk '\''{ if 
(((\$ 2 == "T") || (\$ 2 == "D") || (\$ 2 == "B")) && (substr(\$ 3,1,1) != 
".")) { print \$ 3 } }'\'' | sort -u > $export_symbols'
+ export_symbols_cmds='$NM -BCpg $libobjs $convenience | awk '\''{ if 
(((\$ 2 == "T") || (\$ 2 == "D") || (\$ 2 == "B") || (\$ 2 == "L")) && 
(substr(\$ 3,1,1) != ".")) { print \$ 3 } }'\'' | sort -u > $export_symbols'
fi
aix_use_runtimelinking=no
 
@@ -14058,7 +14058,7 @@
 if $NM -V 2>&1 | $GREP 'GNU' > /dev/null; then
   export_symbols_cmds_CXX='$NM -Bpg $libobjs $convenience | awk '\''{ if 
(((\$ 2 == "T") || (\$ 2 == "D") || (\$ 2 == "B") || (\$ 2 == "W")) && 
(substr(\$ 3,1,1) != ".")) { print \$ 3 } }'\'' | sort -u > $export_symbols'
 else
-  export_symbols_cmds_CXX='$NM -BCpg $libobjs $convenience | awk '\''{ if 
(((\$ 2 == "T") || (\$ 2 == "D") || (\$ 2 == "B")) && (substr(\$ 3,1,1) != 
".")) { print \$ 3 } }'\'' | sort -u > $export_symbols'
+  export_symbols_cmds_CXX='$NM -BCpg $libobjs $convenience | awk '\''{ if 
(((\$ 2 == "T") || (\$ 2 == "D") || (\$ 2 == "B") || (\$ 2 == "L")) && 
(substr(\$ 3,1,1) != ".")) { print \$ 3 } }'\'' | sort -u > $export_symbols'
 fi
 ;;
   pw32*)
Index: libgfortran/configure

Re: [PING 2] [PATCH] c++/67942 - diagnose placement new buffer overflow

2015-11-05 Thread Martin Sebor

If we use gcc_checking_assert it won't fire in release builds; let's go
with that.


Okay. Attached is an updated patch with that change.

Martin

gcc ChangeLog
2015-11-05  Martin Sebor  

	PR c++/67942
* invoke.texi (-Wplacement-new): Document new option.
	* gcc/testsuite/g++.dg/warn/Wplacement-new-size.C: New test.

gcc/c-family ChangeLog
2015-11-05  Martin Sebor  

	PR c++/67942
* c.opt (-Wplacement-new): New option.

gcc/cp ChangeLog
2015-11-05  Martin Sebor  

	PR c++/67942
	* cp/init.c (warn_placement_new_too_small): New function.
	(build_new_1): Call it.

diff --git a/gcc/c-family/c.opt b/gcc/c-family/c.opt
index 47ba070..5e9d7a3 100644
--- a/gcc/c-family/c.opt
+++ b/gcc/c-family/c.opt
@@ -760,6 +760,10 @@ Wprotocol
 ObjC ObjC++ Var(warn_protocol) Init(1) Warning
 Warn if inherited methods are unimplemented

+Wplacement-new
+C++ Var(warn_placement_new) Init(1) Warning
+Warn for placement new expressions with undefined behavior
+
 Wredundant-decls
 C ObjC C++ ObjC++ Var(warn_redundant_decls) Warning
 Warn about multiple declarations of the same object
diff --git a/gcc/cp/init.c b/gcc/cp/init.c
index 1ed8f6c..3a9c59d 100644
--- a/gcc/cp/init.c
+++ b/gcc/cp/init.c
@@ -2269,6 +2269,201 @@ throw_bad_array_new_length (void)
   return build_cxx_call (fn, 0, NULL, tf_warning_or_error);
 }

+/* Attempt to verify that the argument, OPER, of a placement new expression
+   refers to an object sufficiently large for an object of TYPE or an array
+   of NELTS of such objects when NELTS is non-null, and issue a warning when
+   it does not.  SIZE specifies the size needed to construct the object or
+   array and captures the result of NELTS * sizeof (TYPE). (SIZE could be
+   greater when the array under construction requires a cookie to store
+   NELTS.  GCC's placement new expression stores the cookie when invoking
+   a user-defined placement new operator function but not the default one.
+   Placement new expressions with user-defined placement new operator are
+   not diagnosed since we don't know how they use the buffer (this could
+   be a future extension).  */
+static void
+warn_placement_new_too_small (tree type, tree nelts, tree size, tree oper)
+{
+  location_t loc = EXPR_LOC_OR_LOC (oper, input_location);
+
+  /* The number of bytes to add to or subtract from the size of the provided
+ buffer based on an offset into an array or an array element reference.
+ Although intermediate results may be negative (as in a[3] - 2) the final
+ result cannot be.  */
+  HOST_WIDE_INT adjust = 0;
+  /* True when the size of the entire destination object should be used
+ to compute the possibly optimistic estimate of the available space.  */
+  bool use_obj_size = false;
+  /* True when the reference to the destination buffer is an ADDR_EXPR.  */
+  bool addr_expr = false;
+
+  STRIP_NOPS (oper);
+
+  /* Using a function argument or a (non-array) variable as an argument
+ to placement new is not checked since it's unknown what it might
+ point to.  */
+  if (TREE_CODE (oper) == PARM_DECL
+  || TREE_CODE (oper) == VAR_DECL
+  || TREE_CODE (oper) == COMPONENT_REF)
+return;
+
+  /* Evaluate any constant expressions.  */
+  size = fold_non_dependent_expr (size);
+
+  /* Handle the common case of array + offset expression when the offset
+ is a constant.  */
+  if (TREE_CODE (oper) == POINTER_PLUS_EXPR)
+{
+  /* If the offset is comple-time constant, use it to compute a more
+	 accurate estimate of the size of the buffer.  Otherwise, use
+	 the size of the entire array as an optimistic estimate (this
+	 may lead to false negatives).  */
+  const_tree adj = TREE_OPERAND (oper, 1);
+  if (CONSTANT_CLASS_P (adj))
+	adjust += tree_to_uhwi (adj);
+  else
+	use_obj_size = true;
+
+  oper = TREE_OPERAND (oper, 0);
+
+  STRIP_NOPS (oper);
+}
+
+  if (TREE_CODE (oper) == TARGET_EXPR)
+oper = TREE_OPERAND (oper, 1);
+  else if (TREE_CODE (oper) == ADDR_EXPR)
+{
+  addr_expr = true;
+  oper = TREE_OPERAND (oper, 0);
+}
+
+  STRIP_NOPS (oper);
+
+  if (TREE_CODE (oper) == ARRAY_REF)
+{
+  /* Similar to the offset computed above, see if the array index
+	 is a compile-time constant.  If so, and unless the offset was
+	 not a compile-time constant, use the index to determine the
+	 size of the buffer.  Otherwise, use the entire array as
+	 an optimistic estimate of the size.  */
+  const_tree adj = TREE_OPERAND (oper, 1);
+  if (!use_obj_size && CONSTANT_CLASS_P (adj))
+	adjust += tree_to_shwi (adj);
+  else
+	{
+	  use_obj_size = true;
+	  adjust = 0;
+	}
+
+  oper = TREE_OPERAND (oper, 0);
+}
+
+  /* Descend into a struct or union to find the member whose address
+ is being used as the agument.  */
+  while (TREE_CODE (oper) == COMPONENT_REF)
+oper = TREE_OPERAND (oper, 1);
+
+  if ((addr_expr || !POINTER_TYPE_P (TREE_TYPE (oper)))
+   

Re: [PATCH] PR 68192 Export AIX TLS symbols

2015-11-05 Thread Paolo Bonzini


On 05/11/2015 17:38, David Edelsohn wrote:
> On Thu, Nov 5, 2015 at 8:34 AM, Paolo Bonzini  wrote:
>>
>>
>> On 05/11/2015 17:28, David Edelsohn wrote:
>>> [Explicitly copying build maintainers.]
>>>
>>> Paolo and Alexandre,
>>>
>>> Could you review and help with this patch?
>>>
>>> TLS symbols in AIX display a new, different symbol type in nm output.
>>> Libtool explicitly creates a list of exported symbols for shared
>>> libraries using nm and does not recognize the new TLS symbols, so
>>> those symbols are not exported.
>>>
>>> This is a regression for TLS support on AIX.
>>>
>>> This patch updates libtool.m4 in GCC and configure for libstdc++-v3,
>>> libgfortran, and libgomp.  I would like to apply the patch to GCC
>>> while I simultaneously work with the Libtool community to correct the
>>> bug upstream.  I also would like to backport this to GCC 5.2 and GCC
>>> 4.9.x.
>>
>> I think it's okay to wait for the patch to be upstream.
>>
>> I can help committing the patch once it is.
> 
> The patch MUST go into GCC 5.3 release, which Richi has announced that
> he wants to release around the beginning of Stage 3.  The patch should
> not go into GCC 5 branch without going into trunk.

What is blocking the patch from being included in libtool in the next
week or two?

> This patch cannot
> wait unless you want to block GCC 5.3.

What blocks GCC 5.3 is decided by the release manager.  As a build
maintainer, I am not going to ack the patch for trunk until it is
included in Libtool, but I'm happy to be overridden by the GCC 5 release
manager.

Paolo


Re: [PATCH, ARM] PR target/68059 libgcc should not use __write for printing fatal error

2015-11-05 Thread Richard Earnshaw
On 05/11/15 12:30, Szabolcs Nagy wrote:
> libgcc/config/arm/linux-atomic-64bit.c uses __write to print an error
> message if the 64bit xchg method is not available in the kernel.
> 
> __write is not part of the public libc abi.  Since this code is only
> run on linux the write syscall can be invoked directly.
> 
> And __builtin_trap is a simpler way to crash than abort.
> 
> The new behaviour on a linux kernel before v3.1:
> 
> # ./a.out
> A newer kernel is required to run this binary. (__kernel_cmpxchg64 helper)
> Illegal instruction
> 
> OK for trunk and backporting?

__morestack_fail in generic-morestack.c now uses writev().  Is there
some reason we can't do the same?

R.

> 
> libgcc/ChangeLog:
> 
> 2015-11-05  Szabolcs Nagy  
> 
> PR target/68059
> * config/arm/linux-atomic-64bit.c (__write): Remove declaration.
> (abort): Likewise.
> (linux_write): Define.
> 
> arm_write_2.diff
> 
> 
> diff --git a/libgcc/config/arm/linux-atomic-64bit.c 
> b/libgcc/config/arm/linux-atomic-64bit.c
> index cdf713c..aba3334 100644
> --- a/libgcc/config/arm/linux-atomic-64bit.c
> +++ b/libgcc/config/arm/linux-atomic-64bit.c
> @@ -33,9 +33,6 @@ see the files COPYING3 and COPYING.RUNTIME respectively.  
> If not, see
> kernels; we check for that in an init section and bail out rather
> unceremoneously.  */
>  
> -extern unsigned int __write (int fd, const void *buf, unsigned int count);
> -extern void abort (void);
> -
>  /* Kernel helper for compare-and-exchange.  */
>  typedef int (__kernel_cmpxchg64_t) (const long long* oldval,
>   const long long* newval,
> @@ -45,6 +42,19 @@ typedef int (__kernel_cmpxchg64_t) (const long long* 
> oldval,
>  /* Kernel helper page version number.  */
>  #define __kernel_helper_version (*(unsigned int *)0x0ffc)
>  
> +static void
> +linux_write (int fd, const void *buf, unsigned int count)
> +{
> + register long r7 asm ("r7") = 4; /* Linux __NR_write.  */
> + register long r0 asm ("r0") = fd;
> + register long r1 asm ("r1") = (long)buf;
> + register long r2 asm ("r2") = count;
> + asm volatile ("svc 0"
> + :
> + : "r" (r7), "r" (r0), "r" (r1), "r" (r2)
> + : "memory");
> +}
> +
>  /* Check that the kernel has a new enough version at load.  */
>  static void __check_for_sync8_kernelhelper (void)
>  {
> @@ -53,11 +63,9 @@ static void __check_for_sync8_kernelhelper (void)
>const char err[] = "A newer kernel is required to run this binary. "
>   "(__kernel_cmpxchg64 helper)\n";
>/* At this point we need a way to crash with some information
> -  for the user - I'm not sure I can rely on much else being
> -  available at this point, so do the same as generic-morestack.c
> -  write () and abort ().  */
> -  __write (2 /* stderr.  */, err, sizeof (err));
> -  abort ();
> +  for the user.  */
> +  linux_write (2 /* stderr.  */, err, sizeof (err));
> +  __builtin_trap ();
>  }
>  };
>  
> 



Re: [openacc] tile, independent, default, private and firstprivate support in c/++

2015-11-05 Thread Jakub Jelinek
Hi!

On Wed, Nov 04, 2015 at 09:55:49AM -0800, Cesar Philippidis wrote:

So, you are going to deal with gang parsing incrementally?
Fine with me.

> +/* OpenACC 2.0:
> +   tile ( size-expr-list ) */
> +
> +static tree
> +c_parser_oacc_clause_tile (c_parser *parser, tree list)
> +{
> +  tree c, expr = error_mark_node;
> +  location_t loc, expr_loc;
> +  tree tile = NULL_TREE;
> +
> +  check_no_duplicate_clause (list, OMP_CLAUSE_TILE, "tile");
> +
> +  loc = c_parser_peek_token (parser)->location;
> +  if (!c_parser_require (parser, CPP_OPEN_PAREN, "expected %<(%>"))
> +return list;
> +
> +  vec *tvec = make_tree_vector ();

Seems I've misread your patch, thought you are using TREE_VEC, while
you are actually using TREE_LIST, but populating it in a weird way.
I think more efficient would be just to
  tree tile = NULL_TREE;
here, then:

> +
> +  vec_safe_push (tvec, expr);

  tile = tree_cons (NULL_TREE, expr, tile);

> +  if (c_parser_next_token_is (parser, CPP_COMMA))
> + c_parser_consume_token (parser);
> +}
> +  while (c_parser_next_token_is_not (parser, CPP_CLOSE_PAREN));
> +
> +  /* Consume the trailing ')'.  */
> +  c_parser_consume_token (parser);
> +
> +  c = build_omp_clause (loc, OMP_CLAUSE_TILE);
> +  tile = build_tree_list_vec (tvec);

  tile = nreverse (tile);
  
> +  OMP_CLAUSE_TILE_LIST (c) = tile;
> +  OMP_CLAUSE_CHAIN (c) = list;
> +  release_tree_vector (tvec);

and remove the release_tree_vector calls.

> +static tree
> +cp_parser_oacc_clause_tile (cp_parser *parser, location_t clause_loc, tree 
> list)

This is already too long line.

> + case OMP_CLAUSE_TILE:
> +   {
> + tree list = OMP_CLAUSE_TILE_LIST (c);
> +
> + while (list)

I'd say
for (tree list = OMP_CLAUSE_TILE_LIST (c);
 list; list = TREE_CHAIN (list))
would be more readable.

> --- a/gcc/gimplify.c
> +++ b/gcc/gimplify.c
> @@ -6995,9 +6995,18 @@ gimplify_scan_omp_clauses (tree *list_p, gimple_seq 
> *pre_p,
>   remove = true;
> break;
>  
> + case OMP_CLAUSE_TILE:
> +   for (tree list = OMP_CLAUSE_TILE_LIST (c); !remove && list;
> +list = TREE_CHAIN (list))
> + {
> +   if (gimplify_expr (_VALUE (list), pre_p, NULL,
> +  is_gimple_val, fb_rvalue) == GS_ERROR)
> + remove = true;
> + }

After all, you are already using for here ;)

Otherwise LGTM, but please clear with Thomas, I think he had some issues
with the patch.

Jakub


Re: [PATCH] clarify documentation of -Q --help=optimizers

2015-11-05 Thread Segher Boessenkool
On Thu, Nov 05, 2015 at 08:58:25AM -0700, Martin Sebor wrote:
> I don't think that reiterating in a condensed form what the manual
> doesn't make clear in many more words will help. First, even users
> who do find the relevant text in the manual often misunderstand it.
> Others are misled by the "[enabled]" output into expecting the
> optimizations to take place at -O0.

The -Q --help* output is primarily aimed at GCC developers, just
like plain -Q is.  The main problem here seems to be that the
documentation pushes beginner GCC users to use this.

Perhaps we should have a higher-level, more easy to use way to query
what flags will do what, possibly integrated with the pass manager.

> Those who are in doubt and
> ask on gcc-help are then told by GCC experts that -O0 disables
> all optimizations without exception,

And it does, for the plain meaning of "optimization"; anything else
is arguably a bug.  But people have many different understandings
of what a "compiler optimization" is, all the way to "anything the
compiler does".


Segher


Re: [C PATCH] Fix error-recovery with variable-sized compound literals (PR c/68090)

2015-11-05 Thread Joseph Myers
On Thu, 5 Nov 2015, Marek Polacek wrote:

> This fixes an error-recovery problem, where we were trying to build
> a C_MAYBE_CONST_EXPR even though the type is invalid.  I think let's
> just skip this for error_mark_nodes.
> 
> Bootstrapped/regtested on x86_64-linux, ok for trunk?

OK.

-- 
Joseph S. Myers
jos...@codesourcery.com


[gomp4] Merge trunk r229764 (2015-11-04) into gomp-4_0-branch

2015-11-05 Thread Thomas Schwinge
Hi!

Committed to gomp-4_0-branch in r229821:

commit bed2efe185cf13a52b9cf87ee52d74265f97d97b
Merge: bb6471e bf3a27b
Author: tschwinge 
Date:   Thu Nov 5 18:02:08 2015 +

svn merge -r 229562:229764 svn+ssh://gcc.gnu.org/svn/gcc/trunk


git-svn-id: svn+ssh://gcc.gnu.org/svn/gcc/branches/gomp-4_0-branch@229821 
138bc75d-0d04-0410-961f-82ee72b054a4


Grüße
 Thomas


signature.asc
Description: PGP signature


Re: [PATCH] clarify documentation of -Q --help=optimizers

2015-11-05 Thread Joseph Myers
On Thu, 5 Nov 2015, Martin Sebor wrote:

> Yes, I agree that printing "disabled by -O0" when -fexceptions is
> explicitly enabled on the command line is not right if the option
> isn't actually affected by -O0. This would be prevented if we knew
> which of the -fxxx options are or aren't controlled by -O0. Can you
> help identify them?

I don't think there's any simple way to do so; it requires checking all 
the "if (optimize)" conditions to see what is or is not conditioned that 
way.

-- 
Joseph S. Myers
jos...@codesourcery.com


Re: [PATCH] clarify documentation of -Q --help=optimizers

2015-11-05 Thread Martin Sebor

On 11/05/2015 11:07 AM, Joseph Myers wrote:

On Thu, 5 Nov 2015, Martin Sebor wrote:


There are several options marked Optimization that are actually matters of
language semantics that I don't think have anything to do with -O options,
e.g. -fexceptions and -fcx-limited-range.  The Optimization tag really
just means that it's saved and restored under certain circumstances.


Both -fexceptions and -fcx-limited-range are reported as disabled
at -O0, so they're not the examples I'm looking for. The set of


But reporting that is nonsense.


What I said (meant) was that they are reported as "[disabled]" when
-O0 is used (and they are not explicitly enabled on the command line).
It took me a while to see that you're talking about something slightly
different.


They are disabled by default for C.  If
you pass the option on the command line, it's enabled regardless of -O
options, and should not be reported as disabled.


Yes, I agree that printing "disabled by -O0" when -fexceptions is
explicitly enabled on the command line is not right if the option
isn't actually affected by -O0. This would be prevented if we knew
which of the -fxxx options are or aren't controlled by -O0. Can you
help identify them?

Martin


Re: [PATCH 6/6] Make SRA replace constant-pool loads

2015-11-05 Thread Alan Lawrence
On 3 November 2015 at 14:01, Richard Biener  wrote:
>
> Hum.  I still wonder why we need all this complication ...

Well, certainly I'd love to make it simpler, and if the complication
is because I've gone about trying to deal with especially Ada in the
wrong way...

>  I would
> expect that if
> we simply decided to completely scalarize a constant pool aggregate load then
> we can always do that.  SRA should then simply emit the _same_ IL as it does
> for other complete scalarization element accesses.  The only difference is
> that the result should be simplifiable to omit the element load from
> the constant pool.
>
> 1) The FRE pass following SRA should do that for you.
...
> That is, I'd like to see the patch greatly simplified to just consider
> constant pool
> complete scalarization as if it were a regular variable.

Hmm, can you clarify, do you mean I should *not* replace constant pool
values with their DECL_INITIAL? The attempt to substitute in the
initial value is what leads to most of the problems. For example, in
gnat/opt31.adb, create_access finds this expression accessing *.LC0:

MEM[(interfaces__unsigned_8[(sizetype) .P_BOUNDS->LB0:.P_BOUNDS->UB0 >= .P_BOUNDS->LB0 ? (sizetype) .P_BOUNDS->UB0 : (sizetype)
.P_BOUNDS->LB0 +
4294967295] *)&*.LC0][1 ...]{lb: 1 sz: 1}

this is an ARRAY_RANGE_REF of a MEM_REF of an ADDR_EXPR of *.LC0. So
far I haven't extended subst_constant_pool_initial to handle
ARRAY_RANGE_REFs, as it can't even handle this MEM_REF:

MEM[(interfaces__unsigned_8[(sizetype) .P_BOUNDS->LB0:.P_BOUNDS->UB0 >= .P_BOUNDS->LB0 ? (sizetype) .P_BOUNDS->UB0 : (sizetype)
.P_BOUNDS->LB0 +
4294967295] *)&*.LC0]

because the type here has size:

MIN_EXPR <_GLOBAL.SZ2.ada_opt31 (.P_BOUNDS->UB0, .P_BOUNDS->LB0), 17179869176>

inside the MEM_REF of the ADDR_EXPR is *.LC0, whose DECL_INITIAL is a
4-element array (fine). Sadly while the MEM_REF
type_contains_placeholder_p, the type of the outer ARRAY_RANGE_REF
does not

One possibility is that this whole construct, ARRAY_RANGE_REF that it
is, should mark *.LC0 in cannot_scalarize_away_bitmap.

However, disqualified_constants is also necessary if I want a
meaningful assertion that we do not re-add constant pool entries as
candidates after we've discovered them - or should I try to rely on
there only being one expression that accesses each constant pool
entry? (I don't think that's guaranteed as tree_output_constant_def
does hashing, I admit I haven't really tried to break that assumption)

> 2) You should be able to use fold_ctor_reference directly (in place of
> all your code
> in case offset and size are readily available - don't remember exactly how
> complete scalarization "walks" elements).  Alternatively use
> fold_const_aggregate_ref.

That should work for completely_scalarize, yes, i.e. if I can remove
the other route in create_access.

> 3) You can simplify the stmt SRA generated by simply calling fold_stmt on it,
> that will do a bit more (wasted) work compared to 2) but may be easier.
>
> I wouldn't bother with the case where we for some reason do not simplify
> the constant pool load.

Hmmm. As above, I'm not quite sure what you mean by "the constant pool
load" - if that means, substituting the DECL_INITIAL in place of the
VAR_DECL that is DECL_IN_CONSTANT_POOL, then I didn't find a general
tree substitution routine, hence writing subst_constant_pool_initial.
It might be possible to make that simpler/more generic (e.g. just call
copy_node, recurse on each operand, return the original if nothing
changed) and then fold at the end.

Thanks,
Alan


Re: [PATCH] Add configure flag for operator new (std::nothrow)

2015-11-05 Thread Daniel Gutson
On Thu, Nov 5, 2015 at 2:11 PM, Jonathan Wakely  wrote:
> On 5 November 2015 at 20:52, Daniel Gutson wrote:
>> Real use cases: statistics and logging. It's a (one time) callback
>> reporting that something went wrong,
>> but not intended to fix things e.g. by attempting to free more memory.
>
> Why can't that be done by replacing operator new with a user-defined
> function, instead of modifying the default one?
>
> A program that wants to make use of the altered semantics is
> non-portable because it relies on a custom libstdc++ configuration
> option, and has to install a custom new-handler that relies on the
> altered libstdc++ behaviour. Replacing operator new to do the logging
> is portable and requires no custom configuration.
>
> (I'm not rejecting the patch, but am still trying to make sure it's a
> useful change.)

The issue is, as I understand it, to do the actual work of operator
new, i.e. allocate memory. It should force
us to copy most of the code of the original code of operator new,
which may change on new versions of the
STL, forcing us to keep updated.
This is important when having legacy code that uses operator new (but
doesn't use a new_handler),




-- 

Daniel F. Gutson
Chief Engineering Officer, SPD

San Lorenzo 47, 3rd Floor, Office 5
Córdoba, Argentina

Phone:   +54 351 4217888 / +54 351 4218211
Skype:dgutson
LinkedIn: http://ar.linkedin.com/in/danielgutson


Re: [PING 2] [PATCH] c++/67942 - diagnose placement new buffer overflow

2015-11-05 Thread Jason Merrill

OK, thanks.

Jason


Re: [PING] Re: [PATCH] c++/67913, 67917 - fix new expression with wrong number of elements

2015-11-05 Thread Martin Sebor

On 11/02/2015 09:55 PM, Jason Merrill wrote:

On 10/26/2015 10:06 PM, Martin Sebor wrote:

+  if (TREE_CONSTANT (maybe_constant_value (outer_nelts)))
+{
+  if (tree_int_cst_lt (max_outer_nelts_tree, outer_nelts))


maybe_constant_value may return a constant, but that doesn't mean that
outer_nelts was already constant; if it wasn't, the call to
tree_int_cst_lt will fail.


Thanks for the hint. I wasn't able to trigger the failure. I suspect
outer_nelts must have already been folded at this point because the
maybe_constant_value call isn't necessary. I removed it.


Since we're moving toward delayed folding, I'd prefer to use the result
of maybe_constant_value only for this diagnostic, and then continue to
pass the unfolded value along.


Sure. Done in the attached patch.

Martin
gcc/cp/ChangeLog

2015-10-19  Martin Sebor  

	PR c++/67913
	PR c++/67927
	* call.c (build_operator_new_call): Do not assume size_check
	is non-null, analogously to the top half of the function.
	* init.c (build_new_1): Detect and diagnose array sizes in
	excess of the maximum of roughly SIZE_MAX / 2.
	Insert a runtime check only for arrays with a non-constant size.
	(build_new): Detect and diagnose negative array sizes.

gcc/testsuite/ChangeLog

2015-10-19  Martin Sebor  

	* init/new45.C: New test to verify that operator new is invoked
	with or without overhead for a cookie.

	PR c++/67927
	* init/new44.C: New test for placement new expressions for arrays
	with excessive number of elements.

	PR c++/67913
	* init/new43.C: New test for placement new expressions for arrays
	with negative number of elements.

	* other/new-size-type.C: Expect array new expression with
	an excessive number of elements to be rejected.

diff --git a/gcc/cp/call.c b/gcc/cp/call.c
index 367d42b..3f76198 100644
--- a/gcc/cp/call.c
+++ b/gcc/cp/call.c
@@ -4228,10 +4228,12 @@ build_operator_new_call (tree fnname, vec **args,
 	 {
 	   /* Update the total size.  */
 	   *size = size_binop (PLUS_EXPR, original_size, *cookie_size);
+	   if (size_check)
+	 {
 	   /* Set to (size_t)-1 if the size check fails.  */
-	   gcc_assert (size_check != NULL_TREE);
 	   *size = fold_build3 (COND_EXPR, sizetype, size_check,
 *size, TYPE_MAX_VALUE (sizetype));
+	 }
 	   /* Update the argument list to reflect the adjusted size.  */
 	   (**args)[0] = *size;
 	 }
diff --git a/gcc/cp/init.c b/gcc/cp/init.c
index 1ed8f6c..3b88098 100644
--- a/gcc/cp/init.c
+++ b/gcc/cp/init.c
@@ -2272,7 +2272,11 @@ throw_bad_array_new_length (void)
 /* Generate code for a new-expression, including calling the "operator
new" function, initializing the object, and, if an exception occurs
during construction, cleaning up.  The arguments are as for
-   build_raw_new_expr.  This may change PLACEMENT and INIT.  */
+   build_raw_new_expr.  This may change PLACEMENT and INIT.
+   TYPE is the type of the object being constructed, possibly an array
+   of NELTS elements when NELTS is non-null (in "new T[NELTS]", T may
+   be an array of the form U[inner], with the whole expression being
+   "new U[NELTS][inner]").  */

 static tree
 build_new_1 (vec **placement, tree type, tree nelts,
@@ -2292,13 +2296,16 @@ build_new_1 (vec **placement, tree type, tree nelts,
  type.)  */
   tree pointer_type;
   tree non_const_pointer_type;
+  /* The most significant array bound in int[OUTER_NELTS][inner].  */
   tree outer_nelts = NULL_TREE;
-  /* For arrays, a bounds checks on the NELTS parameter. */
+  /* For arrays with a non-constant number of elements, a bounds checks
+ on the NELTS parameter to avoid integer overflow at runtime. */
   tree outer_nelts_check = NULL_TREE;
   bool outer_nelts_from_type = false;
+  /* Number of the "inner" elements in "new T[OUTER_NELTS][inner]".  */
   offset_int inner_nelts_count = 1;
   tree alloc_call, alloc_expr;
-  /* Size of the inner array elements. */
+  /* Size of the inner array elements (those with constant dimensions). */
   offset_int inner_size;
   /* The address returned by the call to "operator new".  This node is
  a VAR_DECL and is therefore reusable.  */
@@ -2492,21 +2499,41 @@ build_new_1 (vec **placement, tree type, tree nelts,
 	}

   max_outer_nelts = wi::udiv_trunc (max_size, inner_size);
-  /* Only keep the top-most seven bits, to simplify encoding the
-	 constant in the instruction stream.  */
+  max_outer_nelts_tree = wide_int_to_tree (sizetype, max_outer_nelts);
+
+  size = size_binop (MULT_EXPR, size, convert (sizetype, nelts));
+
+  if (TREE_CONSTANT (outer_nelts))
+	{
+	  if (tree_int_cst_lt (max_outer_nelts_tree, outer_nelts))
+	{
+	  /* When the array size is constant, check it at compile time
+		 to make sure it doesn't exceed the implementation-defined
+		 maximum, as required by C++ 14 (in C++ 11 this requirement
+		 isn't explicitly stated but it's enforced anyway -- see
+		 

Re: [PATCH] clarify documentation of -Q --help=optimizers

2015-11-05 Thread Joseph Myers
On Thu, 5 Nov 2015, Martin Sebor wrote:

> > There are several options marked Optimization that are actually matters of
> > language semantics that I don't think have anything to do with -O options,
> > e.g. -fexceptions and -fcx-limited-range.  The Optimization tag really
> > just means that it's saved and restored under certain circumstances.
> 
> Both -fexceptions and -fcx-limited-range are reported as disabled
> at -O0, so they're not the examples I'm looking for. The set of

But reporting that is nonsense.  They are disabled by default for C.  If 
you pass the option on the command line, it's enabled regardless of -O 
options, and should not be reported as disabled.

-- 
Joseph S. Myers
jos...@codesourcery.com


Re: [hsa 3/12] HSA libgomp plugin

2015-11-05 Thread Joseph Myers
This new file should have the standard libgomp copyright / license notice.

-- 
Joseph S. Myers
jos...@codesourcery.com


Re: [hsa 1/12] Configuration and offloading-related changes

2015-11-05 Thread Joseph Myers
On Thu, 5 Nov 2015, Martin Jambor wrote:

> libgomp plugin to be built.  Because the plugin needs to use HSA
> run-time library, I have introduced options --with-hsa-runtime (and
> more precise --with-hsa-include and --with-hsa-lib) to help find it.

New configure options should be documented in install.texi.

-- 
Joseph S. Myers
jos...@codesourcery.com


Re: [PATCH 3/4][AArch64] Add scheduling model for Exynos M1

2015-11-05 Thread Evandro Menezes

   2015-11-05  Evandro Menezes 

   gcc/
* config/aarch64/aarch64-cores.def: Use the Exynos M1 sched model.
* config/aarch64/aarch64.md: Include "exynos-m1.md".
* config/arm/arm-cores.def: Use the Exynos M1 sched model.
* config/arm/arm.md: Include "exynos-m1.md".
* config/arm/arm-tune.md: Regenerated.
* config/arm/exynos-m1.md: New file.

This patch adds the scheduling model for Exynos M1.  I split the DFA 
into ones for GP, LS and FP, resulting in many fewer states and arcs 
than before.


Please, commit if it's alright.

Thank you,

--
Evandro Menezes


>From 6846ac01eb670a90b4e1335447ac3ecb60bbce77 Mon Sep 17 00:00:00 2001
From: Evandro Menezes 
Date: Thu, 5 Nov 2015 17:23:32 -0600
Subject: [PATCH] [AArch64] Add scheduling model for Exynos M1

2015-11-05  Evandro Menezes  

gcc/
	* config/aarch64/aarch64-cores.def: Use the Exynos M1 sched model.
	* config/aarch64/aarch64.md: Include "exynos-m1.md".
	* config/arm/arm-cores.def: Use the Exynos M1 sched model.
	* config/arm/arm.md: Include "exynos-m1.md".
	* config/arm/exynos-m1.md: New file.
---
 gcc/config/aarch64/aarch64-cores.def |   2 +-
 gcc/config/aarch64/aarch64.md|   1 +
 gcc/config/arm/arm-cores.def |   2 +-
 gcc/config/arm/arm.md|   3 +-
 gcc/config/arm/exynos-m1.md  | 974 +++
 5 files changed, 979 insertions(+), 3 deletions(-)
 create mode 100644 gcc/config/arm/exynos-m1.md

diff --git a/gcc/config/aarch64/aarch64-cores.def b/gcc/config/aarch64/aarch64-cores.def
index 0ab1ca8..c17baa3 100644
--- a/gcc/config/aarch64/aarch64-cores.def
+++ b/gcc/config/aarch64/aarch64-cores.def
@@ -43,7 +43,7 @@
 AARCH64_CORE("cortex-a53",  cortexa53, cortexa53, 8A,  AARCH64_FL_FOR_ARCH8 | AARCH64_FL_CRC, cortexa53, "0x41", "0xd03")
 AARCH64_CORE("cortex-a57",  cortexa57, cortexa57, 8A,  AARCH64_FL_FOR_ARCH8 | AARCH64_FL_CRC, cortexa57, "0x41", "0xd07")
 AARCH64_CORE("cortex-a72",  cortexa72, cortexa57, 8A,  AARCH64_FL_FOR_ARCH8 | AARCH64_FL_CRC, cortexa72, "0x41", "0xd08")
-AARCH64_CORE("exynos-m1",   exynosm1,  cortexa57, 8A,  AARCH64_FL_FOR_ARCH8 | AARCH64_FL_CRC | AARCH64_FL_CRYPTO, cortexa72, "0x53", "0x001")
+AARCH64_CORE("exynos-m1",   exynosm1,  exynosm1,  8A,  AARCH64_FL_FOR_ARCH8 | AARCH64_FL_CRC | AARCH64_FL_CRYPTO, cortexa72, "0x53", "0x001")
 AARCH64_CORE("thunderx",thunderx,  thunderx,  8A,  AARCH64_FL_FOR_ARCH8 | AARCH64_FL_CRC | AARCH64_FL_CRYPTO, thunderx,  "0x43", "0x0a1")
 AARCH64_CORE("xgene1",  xgene1,xgene1,8A,  AARCH64_FL_FOR_ARCH8, xgene1, "0x50", "0x000")
 
diff --git a/gcc/config/aarch64/aarch64.md b/gcc/config/aarch64/aarch64.md
index 6cdddf4..bd26f24 100644
--- a/gcc/config/aarch64/aarch64.md
+++ b/gcc/config/aarch64/aarch64.md
@@ -202,6 +202,7 @@
 ;; Scheduling
 (include "../arm/cortex-a53.md")
 (include "../arm/cortex-a57.md")
+(include "../arm/exynos-m1.md")
 (include "thunderx.md")
 (include "../arm/xgene1.md")
 
diff --git a/gcc/config/arm/arm-cores.def b/gcc/config/arm/arm-cores.def
index 4c35200..3448e82 100644
--- a/gcc/config/arm/arm-cores.def
+++ b/gcc/config/arm/arm-cores.def
@@ -168,7 +168,7 @@ ARM_CORE("cortex-a17.cortex-a7", cortexa17cortexa7, cortexa7,	7A,	ARM_FSET_MAKE_
 ARM_CORE("cortex-a53",	cortexa53, cortexa53,	8A,	ARM_FSET_MAKE_CPU1 (FL_LDSCHED | FL_CRC32 | FL_FOR_ARCH8A), cortex_a53)
 ARM_CORE("cortex-a57",	cortexa57, cortexa57,	8A,	ARM_FSET_MAKE_CPU1 (FL_LDSCHED | FL_CRC32 | FL_FOR_ARCH8A), cortex_a57)
 ARM_CORE("cortex-a72",	cortexa72, cortexa57,	8A,	ARM_FSET_MAKE_CPU1 (FL_LDSCHED | FL_CRC32 | FL_FOR_ARCH8A), cortex_a57)
-ARM_CORE("exynos-m1",	exynosm1,  cortexa57,	8A,	ARM_FSET_MAKE_CPU1 (FL_LDSCHED | FL_CRC32 | FL_FOR_ARCH8A), cortex_a57)
+ARM_CORE("exynos-m1",	exynosm1,  exynosm1,	8A,	ARM_FSET_MAKE_CPU1 (FL_LDSCHED | FL_CRC32 | FL_FOR_ARCH8A), cortex_a57)
 ARM_CORE("xgene1",  xgene1,xgene1,  8A,	ARM_FSET_MAKE_CPU1 (FL_LDSCHED | FL_FOR_ARCH8A),xgene1)
 
 /* V8 big.LITTLE implementations */
diff --git a/gcc/config/arm/arm.md b/gcc/config/arm/arm.md
index 02e147e..e6f07e9 100644
--- a/gcc/config/arm/arm.md
+++ b/gcc/config/arm/arm.md
@@ -377,7 +377,7 @@
 arm1136jfs,cortexa5,cortexa7,cortexa8,\
 cortexa9,cortexa12,cortexa15,cortexa17,\
 cortexa53,cortexa57,cortexm4,cortexm7,\
-marvell_pj4,xgene1")
+exynosm1,marvell_pj4,xgene1")
 	   (eq_attr "tune_cortexr4" "yes"))
   (const_string "no")
   (const_string "yes"
@@ -416,6 +416,7 @@
 (include "cortex-m7.md")
 (include "cortex-m4.md")
 (include "cortex-m4-fpu.md")
+(include "exynos-m1.md")
 (include "vfp11.md")
 (include "marvell-pj4.md")
 (include "xgene1.md")
diff --git a/gcc/config/arm/exynos-m1.md b/gcc/config/arm/exynos-m1.md
new file mode 100644
index 000..a4f1138
--- /dev/null
+++ 

Re: [PATCH] clarify documentation of -Q --help=optimizers

2015-11-05 Thread Martin Sebor

On 11/05/2015 10:09 AM, Segher Boessenkool wrote:

On Thu, Nov 05, 2015 at 08:58:25AM -0700, Martin Sebor wrote:

I don't think that reiterating in a condensed form what the manual
doesn't make clear in many more words will help. First, even users
who do find the relevant text in the manual often misunderstand it.
Others are misled by the "[enabled]" output into expecting the
optimizations to take place at -O0.


The -Q --help* output is primarily aimed at GCC developers, just
like plain -Q is.  The main problem here seems to be that the
documentation pushes beginner GCC users to use this.

Perhaps we should have a higher-level, more easy to use way to query
what flags will do what, possibly integrated with the pass manager.


I think that would be ideal (i.e., hiding each flag_xxx under
a get_flag_xxx() and set_flag_xxx() interface that would make
it possible to compute and dynamically query individual option
settings based on non-trivial dependencies). It strikes me as
quite a project.




Those who are in doubt and
ask on gcc-help are then told by GCC experts that -O0 disables
all optimizations without exception,


And it does, for the plain meaning of "optimization"; anything else
is arguably a bug.  But people have many different understandings
of what a "compiler optimization" is, all the way to "anything the
compiler does".


Yes, that also sometimes causes confusion, though it's not something
I'm trying to solve with this patch.

Martin


Merge of HSA branch

2015-11-05 Thread Martin Jambor
Hi,

we have had a few last-minute issues, so I apologize this is happening
a bit late, but I would like to start the process or merging the HSA
branch to GCC trunk.  Changes to different parts of the compiler and
libgomp are posted as individual emails in this thread.  No two
patches touch any single file and almost all of them need to be
applied together at once, I have only split the one big patch up to
ease review.

Individual changes are described in slightly more detail in their
respective messages.  If you are interested in how the HSAIL
generation works in general, I encourage you to have a look at my
Cauldron slides or presentation, only very few things have changed as
far as the general principles are concerned.  Let me just quickly stress
here that we do acceleration within a single compiler, as opposed to
LTO-ways of all the other accelerator teams.

We have bootsrapped and tested the patched GCC with and without HSA
enabled on x86_64-linux and found only a single issue (see below) a
few new HSA warnings when HSA was enabled.  We have also run the full
C, C++ and Fortran testsuite on a computer that actually has a Carrizo
HSA APU and the result was the same.

The single issue is that libgomp.oacc-c/../libgomp.oacc-c-c++-common/*
tests get run using our HSA plugin, which however cannot do OpenACC.
I believe it should be something very easy to fix and I did not want
to hold off the submission and review because of it.

I also acknowledge that we should add HSA-specific tests to the GCC
testsuite but we are only now looking at how to do that and will
welcome any guidance in this regard.

I acknowledge that the submission comes quite late and that the class
of OpenMP loops we can handle well is small, but nevertheless I would
like to ask for review and eventual acceptance to trunk and GCC 6.

I'll be grateful for any review, comments or questions,

Martin



[hsa 6/12] IPA-HSA pass

2015-11-05 Thread Martin Jambor
Hi,

when a target construct is gridified, the HSA GPU function is
associated with the CPU function throughout the compilation, so that
they can be registered as a pair in libgomp.

When a target or a parallel construct is not gridified, its body
emerges out of OMP expansion as one gimple function.  However, at some
point we need to create a special HSA function representation so that
we can modify behavior of a (very) few optimization passes for them.
Similarly, "omp declare target" functions, which ought to be callable
from HSA, should get their own representation for exactly the same
reason.

Both is done by the following new IPA pass, which creates new HSA
clones in these cases.  Moreover, it redirects the appropriate call
graph edges to be in between HSA implementations, marks HSA clones
with the flatten attribute to minimize any call overhead (which is
much more significant on GPUs) and makes sure both the CPU and GPU
functions are coupled together and remain in the same LTO partition so
that they can b registered together to libgomp.

Thanks,

Martin


2015-11-05  Martin Liska  
Martin Jambor  

* ipa-hsa.c: New file.


diff --git a/gcc/ipa-hsa.c b/gcc/ipa-hsa.c
new file mode 100644
index 000..b4cb58e
--- /dev/null
+++ b/gcc/ipa-hsa.c
@@ -0,0 +1,334 @@
+/* Callgraph based analysis of static variables.
+   Copyright (C) 2015 Free Software Foundation, Inc.
+   Contributed by Martin Liska 
+
+This file is part of GCC.
+
+GCC is free software; you can redistribute it and/or modify it under
+the terms of the GNU General Public License as published by the Free
+Software Foundation; either version 3, or (at your option) any later
+version.
+
+GCC is distributed in the hope that it will be useful, but WITHOUT ANY
+WARRANTY; without even the implied warranty of MERCHANTABILITY or
+FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
+for more details.
+
+You should have received a copy of the GNU General Public License
+along with GCC; see the file COPYING3.  If not see
+.  */
+
+/* Interprocedural HSA pass is responsible for creation of HSA clones.
+   For all these HSA clones, we emit HSAIL instructions and pass processing
+   is terminated.  */
+
+#include "config.h"
+#include "system.h"
+#include "coretypes.h"
+#include "tm.h"
+#include "is-a.h"
+#include "hash-set.h"
+#include "vec.h"
+#include "tree.h"
+#include "tree-pass.h"
+#include "function.h"
+#include "basic-block.h"
+#include "gimple.h"
+#include "dumpfile.h"
+#include "gimple-pretty-print.h"
+#include "tree-streamer.h"
+#include "stringpool.h"
+#include "cgraph.h"
+#include "print-tree.h"
+#include "symbol-summary.h"
+#include "hsa.h"
+
+namespace {
+
+/* If NODE is not versionable, warn about not emiting HSAIL and return false.
+   Otherwise return true.  */
+
+static bool
+check_warn_node_versionable (cgraph_node *node)
+{
+  if (!node->local.versionable)
+{
+  if (warning_at (EXPR_LOCATION (node->decl), OPT_Whsa,
+ HSA_SORRY_MSG))
+   inform (EXPR_LOCATION (node->decl),
+   "Function cannot be cloned");
+  return false;
+}
+  return true;
+}
+
+/* The function creates HSA clones for all functions that were either
+   marked as HSA kernels or are callable HSA functions.  Apart from that,
+   we redirect all edges that come from an HSA clone and end in another
+   HSA clone to connect these two functions.  */
+
+static unsigned int
+process_hsa_functions (void)
+{
+  struct cgraph_node *node;
+
+  if (hsa_summaries == NULL)
+hsa_summaries = new hsa_summary_t (symtab);
+
+  FOR_EACH_DEFINED_FUNCTION (node)
+{
+  hsa_function_summary *s = hsa_summaries->get (node);
+
+  /* A linked function is skipped.  */
+  if (s->m_binded_function != NULL)
+   continue;
+
+  if (s->m_kind != HSA_NONE)
+   {
+ if (!check_warn_node_versionable (node))
+   continue;
+ cgraph_node *clone = node->create_virtual_clone
+   (vec  (), NULL, NULL, "hsa");
+ TREE_PUBLIC (clone->decl) = TREE_PUBLIC (node->decl);
+ if (s->m_kind == HSA_KERNEL)
+   DECL_ATTRIBUTES (clone->decl)
+ = tree_cons (get_identifier ("flatten"), NULL_TREE,
+  DECL_ATTRIBUTES (clone->decl));
+
+ clone->force_output = true;
+ hsa_summaries->link_functions (clone, node, s->m_kind);
+
+ if (dump_file)
+   fprintf (dump_file, "HSA creates a new clone: %s, type: %s\n",
+clone->name (),
+s->m_kind == HSA_KERNEL ? "kernel" : "function");
+   }
+  else if (hsa_callable_function_p (node->decl))
+   {
+ if (!check_warn_node_versionable (node))
+   continue;
+ cgraph_node *clone = node->create_virtual_clone
+   (vec  (), NULL, NULL, "hsa");
+ TREE_PUBLIC (clone->decl) = TREE_PUBLIC (node->decl);

Re: [hsa 5/12] New HSA-related GCC options

2015-11-05 Thread Joseph Myers
The new options need to be documented in invoke.texi.

-- 
Joseph S. Myers
jos...@codesourcery.com


Re: [PATCH 3/4][AArch64] Add scheduling model for Exynos M1

2015-11-05 Thread Evandro Menezes

   2015-11-05  Evandro Menezes 

   gcc/
* config/aarch64/aarch64-cores.def: Use the Exynos M1 sched model.
* config/aarch64/aarch64.md: Include "exynos-m1.md".
* config/arm/arm-cores.def: Use the Exynos M1 sched model.
* config/arm/arm.md: Include "exynos-m1.md".
* config/arm/arm-tune.md: Regenerated.
* config/arm/exynos-m1.md: New file.

This patch adds the scheduling model for Exynos M1.  I split the DFA 
into ones for GP, LS and FP, resulting in many fewer states and arcs 
than before.


Please, commit if it's alright.

Thank you,

--
Evandro Menezes




Re: [PATCH] clarify documentation of -Q --help=optimizers

2015-11-05 Thread Segher Boessenkool
On Thu, Nov 05, 2015 at 02:04:47PM -0700, Martin Sebor wrote:
> On 11/05/2015 10:09 AM, Segher Boessenkool wrote:
> >On Thu, Nov 05, 2015 at 08:58:25AM -0700, Martin Sebor wrote:
> >>I don't think that reiterating in a condensed form what the manual
> >>doesn't make clear in many more words will help. First, even users
> >>who do find the relevant text in the manual often misunderstand it.
> >>Others are misled by the "[enabled]" output into expecting the
> >>optimizations to take place at -O0.
> >
> >The -Q --help* output is primarily aimed at GCC developers, just
> >like plain -Q is.  The main problem here seems to be that the
> >documentation pushes beginner GCC users to use this.
> >
> >Perhaps we should have a higher-level, more easy to use way to query
> >what flags will do what, possibly integrated with the pass manager.
> 
> I think that would be ideal (i.e., hiding each flag_xxx under
> a get_flag_xxx() and set_flag_xxx() interface that would make
> it possible to compute and dynamically query individual option
> settings based on non-trivial dependencies). It strikes me as
> quite a project.

I was thinking more of showing at every flag whether it is relevant at
all, or maybe not showing irrelevant ones except maybe when some
--verbose or so option is given; and if you want to get really fancy
(but also very helpful to both users and developers), have some option
that shows all dependencies for a flag to take effect.

This of course requires expressing some dependencies manually in the
.opt files, but hopefully most can be done automatically with some
magic.

> >>Those who are in doubt and
> >>ask on gcc-help are then told by GCC experts that -O0 disables
> >>all optimizations without exception,
> >
> >And it does, for the plain meaning of "optimization"; anything else
> >is arguably a bug.  But people have many different understandings
> >of what a "compiler optimization" is, all the way to "anything the
> >compiler does".
> 
> Yes, that also sometimes causes confusion, though it's not something
> I'm trying to solve with this patch.

It's not something you _can_ solve as far as I see.  There is a
considerable gap between what beginner and advanced users expect from
the compiler, understand about the compiler, and worst of all, think
they understand about the compiler.  GCC should be a useful compiler
to that whole spectrum of audiences.  Currently -Q --help shows just
the gory details, which is functionality we want to keep IMO, but not
something we want to recommend everyone to use.

Does that make sense?  I'm not sure I'm expressing myself well here,
you cannot see my frantic handwaving :-)


Segher


Re: [PING 2] [PATCH] c++/67942 - diagnose placement new buffer overflow

2015-11-05 Thread Martin Sebor

On 11/05/2015 11:13 AM, Jason Merrill wrote:

OK, thanks.


I'm afraid the last patch that I just committed breaks libstdc++
bootstrap with the following error:

/home/msebor/scm/fsf/gcc-svn/libstdc++-v3/src/c++98/ios_init.cc: In 
static member function ‘static bool std::ios_base::sync_with_stdio(bool)’:
/home/msebor/scm/fsf/gcc-svn/libstdc++-v3/src/c++98/ios_init.cc:181:59: 
internal compiler error: in tree_to_shwi, at tree.c:7297

  new (_cout) stdio_filebuf(stdout, ios_base::out);

I thought it had something to do with the recent changes made to
the patch but it looks like it's a problem that's been there all
along: the patch assumes DECL_SIZE_UNIT (oper) is non-null but
it turns out that's not a safe assumption. I don't know why my
testing didn't expose it before, despite running make bootstrap
and make check.

The following patch (only the first bit is important) fixes the
ICE. I will commit it as soon as I'm done testing it.

Sorry about the breakage.

Martin

diff --git a/gcc/cp/init.c b/gcc/cp/init.c
index 7600363..ca7de56 100644
--- a/gcc/cp/init.c
+++ b/gcc/cp/init.c
@@ -2384,12 +2384,13 @@ warn_placement_new_too_small (tree type, tree 
nelts, tree size, tree oper)

   /* Treat members of unions and members of structs uniformly, even
 though the size of a member of a union may be viewed as extending
 to the end of the union itself (it is by 
__builtin_object_size).  */

-  if (TREE_CODE (oper) == VAR_DECL || use_obj_size)
+  if ((TREE_CODE (oper) == VAR_DECL || use_obj_size)
+ && (DECL_SIZE_UNIT (oper)))
{
  /* Use the size of the entire array object when the expression
 refers to a variable or its size depends on an expression
 that's not a compile-time constant.  */
- bytes_avail = tree_to_shwi (DECL_SIZE_UNIT (oper));
+ bytes_avail = tree_to_uhwi (DECL_SIZE_UNIT (oper));
  exact_size = !use_obj_size;
}
   else



[PATCH 4/4][AArch64] Add cost model for Exynos M1

2015-11-05 Thread Evandro Menezes

   2015-10-25  Evandro Menezes 

   gcc/

   * config/aarch64/aarch64-cores.def: Use the Exynos M1 cost model.
   * config/aarch64/aarch64.c (exynosm1_addrcost_table): New variable.
   (exynosm1_regmove_cost): Likewise.
   (exynosm1_vector_cost): Likewise.
   (exynosm1_tunings): Likewise.
   * config/arm/aarch-cost-tables.h (exynosm1_extra_costs): Likewise.
   * config/arm/arm.c (arm_exynos_m1_tune): Likewise.

This patch adds the cost model for Exynos M1.  This patch depends on a 
couple of previous patches though, 
https://gcc.gnu.org/ml/gcc-patches/2015-11/msg00505.html and 
https://gcc.gnu.org/ml/gcc-patches/2015-11/msg00538.html


Please, commit if it's alright.

Thank you,

--
Evandro Menezes


>From 9b02c57fd2f2507dcc79767d7ffdb7ccec4cdd25 Mon Sep 17 00:00:00 2001
From: Evandro Menezes 
Date: Thu, 5 Nov 2015 17:58:47 -0600
Subject: [PATCH] [AArch64] Add cost model for Exynos M1

2015-10-25  Evandro Menezes  

gcc/
	* config/aarch64/aarch64-cores.def: Use the Exynos M1 cost model.
	* config/aarch64/aarch64.c (exynosm1_addrcost_table): New variable.
	(exynosm1_regmove_cost): Likewise.
	(exynosm1_vector_cost): Likewise.
	(exynosm1_tunings): Likewise.
	* config/arm/aarch-cost-tables.h (exynosm1_extra_costs): Likewise.
	* config/arm/arm.c (arm_exynos_m1_tune): Likewise.
---
 gcc/config/aarch64/aarch64-cores.def |   2 +-
 gcc/config/aarch64/aarch64.c |  66 ++
 gcc/config/arm/aarch-cost-tables.h   | 103 +++
 gcc/config/arm/arm-cores.def |   2 +-
 gcc/config/arm/arm.c |  23 
 5 files changed, 194 insertions(+), 2 deletions(-)

diff --git a/gcc/config/aarch64/aarch64-cores.def b/gcc/config/aarch64/aarch64-cores.def
index c17baa3..607a333 100644
--- a/gcc/config/aarch64/aarch64-cores.def
+++ b/gcc/config/aarch64/aarch64-cores.def
@@ -43,7 +43,7 @@
 AARCH64_CORE("cortex-a53",  cortexa53, cortexa53, 8A,  AARCH64_FL_FOR_ARCH8 | AARCH64_FL_CRC, cortexa53, "0x41", "0xd03")
 AARCH64_CORE("cortex-a57",  cortexa57, cortexa57, 8A,  AARCH64_FL_FOR_ARCH8 | AARCH64_FL_CRC, cortexa57, "0x41", "0xd07")
 AARCH64_CORE("cortex-a72",  cortexa72, cortexa57, 8A,  AARCH64_FL_FOR_ARCH8 | AARCH64_FL_CRC, cortexa72, "0x41", "0xd08")
-AARCH64_CORE("exynos-m1",   exynosm1,  exynosm1,  8A,  AARCH64_FL_FOR_ARCH8 | AARCH64_FL_CRC | AARCH64_FL_CRYPTO, cortexa72, "0x53", "0x001")
+AARCH64_CORE("exynos-m1",   exynosm1,  exynosm1,  8A,  AARCH64_FL_FOR_ARCH8 | AARCH64_FL_CRC | AARCH64_FL_CRYPTO, exynosm1,  "0x53", "0x001")
 AARCH64_CORE("thunderx",thunderx,  thunderx,  8A,  AARCH64_FL_FOR_ARCH8 | AARCH64_FL_CRC | AARCH64_FL_CRYPTO, thunderx,  "0x43", "0x0a1")
 AARCH64_CORE("xgene1",  xgene1,xgene1,8A,  AARCH64_FL_FOR_ARCH8, xgene1, "0x50", "0x000")
 
diff --git a/gcc/config/aarch64/aarch64.c b/gcc/config/aarch64/aarch64.c
index e7f1c07..d7d3f05 100644
--- a/gcc/config/aarch64/aarch64.c
+++ b/gcc/config/aarch64/aarch64.c
@@ -215,6 +215,22 @@ static const struct cpu_addrcost_table cortexa57_addrcost_table =
   0, /* imm_offset  */
 };
 
+static const struct cpu_addrcost_table exynosm1_addrcost_table =
+{
+{
+  0, /* hi  */
+  0, /* si  */
+  0, /* di  */
+  2, /* ti  */
+},
+  0, /* pre_modify  */
+  0, /* post_modify  */
+  1, /* register_offset  */
+  1, /* register_sextend  */
+  2, /* register_zextend  */
+  0, /* imm_offset  */
+};
+
 static const struct cpu_addrcost_table xgene1_addrcost_table =
 {
 {
@@ -261,6 +277,16 @@ static const struct cpu_regmove_cost cortexa53_regmove_cost =
   2 /* FP2FP  */
 };
 
+static const struct cpu_regmove_cost exynosm1_regmove_cost =
+{
+  1, /* GP2GP  */
+  /* Avoid the use of slow int<->fp moves for spilling by setting
+ their cost higher than memmov_cost (actual, 4 and 9).  */
+  9, /* GP2FP  */
+  9, /* FP2GP  */
+  1 /* FP2FP  */
+};
+
 static const struct cpu_regmove_cost thunderx_regmove_cost =
 {
   2, /* GP2GP  */
@@ -313,6 +339,22 @@ static const struct cpu_vector_cost cortexa57_vector_cost =
   1 /* cond_not_taken_branch_cost  */
 };
 
+static const struct cpu_vector_cost exynosm1_vector_cost =
+{
+  1, /* scalar_stmt_cost  */
+  5, /* scalar_load_cost  */
+  1, /* scalar_store_cost  */
+  3, /* vec_stmt_cost  */
+  3, /* vec_to_scalar_cost  */
+  3, /* scalar_to_vec_cost  */
+  5, /* vec_align_load_cost  */
+  5, /* vec_unalign_load_cost  */
+  1, /* vec_unalign_store_cost  */
+  1, /* vec_store_cost  */
+  1, /* cond_taken_branch_cost  */
+  1 /* cond_not_taken_branch_cost  */
+};
+
 /* Generic costs for vector insn classes.  */
 static const struct cpu_vector_cost xgene1_vector_cost =
 {
@@ -436,6 +478,30 @@ static const struct tune_params cortexa72_tunings =
   (AARCH64_EXTRA_TUNE_NONE)	/* tune_flags.  */
 };
 
+static const struct tune_params exynosm1_tunings =
+{
+  _extra_costs,
+  _addrcost_table,
+  _regmove_cost,
+  _vector_cost,
+  _branch_cost,
+  

[hsa 2/12] Modifications to libgomp proper

2015-11-05 Thread Martin Jambor
Hi,

The patch below contains all changes to libgomp files.  First, it adds
a new constant identifying HSA devices and a structure that is shared
between libgomp and the compiler when kernels from kernels are invoked
via dynamic parallelism.

Second it modifies the GOMP_target_41 function so that it also can take
kernel attributes (essentially the grid dimension) as a parameter and
pass it on the HSA libgomp plugin.  Because we do want HSAIL
generation to gracefully fail and use host fallback in that case, the
same function calls the host implementation if it cannot map the
requested function to an accelerated one or of a new callback
can_run_func indicates there is a problem.

We need a new hook because we use it to check for linking errors which
we cannot do when incrementally loading registered images.  And we
want to handle linking errors, so that when we cannot emit HSAIL for a
function called from a kernel (possibly in a different compilation
unit), we also resort to host fallback.

Last but not least, the patch removes data remapping when the selected
device is capable of sharing memory with the host.

Thanks,

Martin


2015-11-02  Martin Jambor  
Martin Liska  

include/
* gomp-constants.h (GOMP_DEVICE_HSA): New macro.

libgomp/
* libgomp-plugin.h (offload_target_type): New element
OFFLOAD_TARGET_TYPE_HSA.
* libgomp.h (gomp_device_descr): Extra parameter of run_func, new
field can_run_func.
* libgomp_g.h (GOMP_target_41): Add an extra parameter.
* oacc-host.c (host_run): Add an extra unused parameter.
* target.c (gomp_get_target_fn_addr): Allow failure if device shares
memory.
(GOMP_target): Assert failure did not happen.  Add extra parameter to
call of run_func.
(GOMP_target_41): Added an extra parameter, pass it to run_func.
Allow host fallback if device shares memory.  Do not remap data if
device has shared memory.
(GOMP_target_data_41): Run host fallback if device has shared memory.
(gomp_load_plugin_for_device): Also attempt to load can_run_func.


diff --git a/include/gomp-constants.h b/include/gomp-constants.h
index f834dec..46d52b3 100644
--- a/include/gomp-constants.h
+++ b/include/gomp-constants.h
@@ -160,6 +160,7 @@ enum gomp_map_kind
 #define GOMP_DEVICE_NOT_HOST   4
 #define GOMP_DEVICE_NVIDIA_PTX 5
 #define GOMP_DEVICE_INTEL_MIC  6
+#define GOMP_DEVICE_HSA7
 
 #define GOMP_DEVICE_ICV-1
 #define GOMP_DEVICE_HOST_FALLBACK  -2
@@ -212,4 +213,35 @@ enum gomp_map_kind
 #define GOMP_LAUNCH_OP(X) (((X) >> GOMP_LAUNCH_OP_SHIFT) & 0x)
 #define GOMP_LAUNCH_OP_MAX 0x
 
+/* HSA specific data structures.  */
+
+/* HSA kernel dispatch is collection of information needed for
+   a kernel dispatch.  */
+
+struct hsa_kernel_dispatch
+{
+  /* Pointer to a command queue associated with a kernel dispatch agent.  */
+  void *queue;
+  /* Pointer to reserved memory for OMP data struct copying.  */
+  void *omp_data_memory;
+  /* Pointer to a memory space used for kernel arguments passing.  */
+  void *kernarg_address;
+  /* Kernel object.  */
+  uint64_t object;
+  /* Synchronization signal used for dispatch synchronization.  */
+  uint64_t signal;
+  /* Private segment size.  */
+  uint32_t private_segment_size;
+  /* Group segment size.  */
+  uint32_t group_segment_size;
+  /* Number of children kernel dispatches.  */
+  uint64_t kernel_dispatch_count;
+  /* Number of threads.  */
+  uint32_t omp_num_threads;
+  /* Debug purpose argument.  */
+  uint64_t debug;
+  /* Kernel dispatch structures created for children kernel dispatches.  */
+  struct hsa_kernel_dispatch **children_dispatches;
+};
+
 #endif
diff --git a/libgomp/libgomp-plugin.h b/libgomp/libgomp-plugin.h
index 24fbb94..acf6eb7 100644
--- a/libgomp/libgomp-plugin.h
+++ b/libgomp/libgomp-plugin.h
@@ -48,7 +48,8 @@ enum offload_target_type
   OFFLOAD_TARGET_TYPE_HOST = 2,
   /* OFFLOAD_TARGET_TYPE_HOST_NONSHM = 3 removed.  */
   OFFLOAD_TARGET_TYPE_NVIDIA_PTX = 5,
-  OFFLOAD_TARGET_TYPE_INTEL_MIC = 6
+  OFFLOAD_TARGET_TYPE_INTEL_MIC = 6,
+  OFFLOAD_TARGET_TYPE_HSA = 7
 };
 
 /* Auxiliary struct, used for transferring pairs of addresses from plugin
diff --git a/libgomp/libgomp.h b/libgomp/libgomp.h
index 9c8b1fb..0ad42d2 100644
--- a/libgomp/libgomp.h
+++ b/libgomp/libgomp.h
@@ -876,7 +876,8 @@ struct gomp_device_descr
   void *(*dev2host_func) (int, void *, const void *, size_t);
   void *(*host2dev_func) (int, void *, const void *, size_t);
   void *(*dev2dev_func) (int, void *, const void *, size_t);
-  void (*run_func) (int, void *, void *);
+  void (*run_func) (int, void *, void *, const void *);
+  bool (*can_run_func) (void *);
 
   /* Splay tree containing information about mapped memory regions.  */
   struct splay_tree_s mem_map;
diff --git a/libgomp/libgomp_g.h 

[hsa 12/12] HSA register allocator

2015-11-05 Thread Martin Jambor
Hi,

because HSA backend is not based on RTL,we need our own, and it is in
this patch.  The allocator has been written by Michael Matz and I have
put it into a separate email so that I can add him to CC, because he
is much better suited to answer any questions or review comments.

Thanks,

Martin


2015-11-05  Michael Matz 
Martin Jambor  

* hsa-regalloc.c: New file.


diff --git a/gcc/hsa-regalloc.c b/gcc/hsa-regalloc.c
new file mode 100644
index 000..3919258
--- /dev/null
+++ b/gcc/hsa-regalloc.c
@@ -0,0 +1,711 @@
+/* HSAIL IL Register allocation and out-of-SSA.
+   Copyright (C) 2013-15 Free Software Foundation, Inc.
+   Contributed by Michael Matz 
+
+This file is part of GCC.
+
+GCC is free software; you can redistribute it and/or modify
+it under the terms of the GNU General Public License as published by
+the Free Software Foundation; either version 3, or (at your option)
+any later version.
+
+GCC is distributed in the hope that it will be useful,
+but WITHOUT ANY WARRANTY; without even the implied warranty of
+MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+GNU General Public License for more details.
+
+You should have received a copy of the GNU General Public License
+along with GCC; see the file COPYING3.  If not see
+.  */
+
+#include "config.h"
+#include "system.h"
+#include "coretypes.h"
+#include "tm.h"
+#include "is-a.h"
+#include "vec.h"
+#include "tree.h"
+#include "dominance.h"
+#include "cfg.h"
+#include "cfganal.h"
+#include "function.h"
+#include "bitmap.h"
+#include "dumpfile.h"
+#include "cgraph.h"
+#include "print-tree.h"
+#include "cfghooks.h"
+#include "symbol-summary.h"
+#include "hsa.h"
+
+
+/* Process a PHI node PHI of basic block BB as a part of naive out-f-ssa.  */
+
+static void
+naive_process_phi (hsa_insn_phi *phi)
+{
+  unsigned count = phi->operand_count ();
+  for (unsigned i = 0; i < count; i++)
+{
+  gcc_checking_assert (phi->get_op (i));
+  hsa_op_base *op = phi->get_op (i);
+  hsa_bb *hbb;
+  edge e;
+
+  if (!op)
+   break;
+
+  e = EDGE_PRED (phi->m_bb, i);
+  if (single_succ_p (e->src))
+   hbb = hsa_bb_for_bb (e->src);
+  else
+   {
+ basic_block old_dest = e->dest;
+ hbb = hsa_init_new_bb (split_edge (e));
+
+ /* If switch insn used this edge, fix jump table.  */
+ hsa_bb *source = hsa_bb_for_bb (e->src);
+ hsa_insn_sbr *sbr;
+ if (source->m_last_insn
+ && (sbr = dyn_cast  (source->m_last_insn)))
+   sbr->replace_all_labels (old_dest, hbb->m_bb);
+   }
+
+  hsa_build_append_simple_mov (phi->m_dest, op, hbb);
+}
+}
+
+/* Naive out-of SSA.  */
+
+static void
+naive_outof_ssa (void)
+{
+  basic_block bb;
+
+  hsa_cfun->m_in_ssa = false;
+
+  FOR_ALL_BB_FN (bb, cfun)
+  {
+hsa_bb *hbb = hsa_bb_for_bb (bb);
+hsa_insn_phi *phi;
+
+for (phi = hbb->m_first_phi;
+phi;
+phi = phi->m_next ? as_a  (phi->m_next): NULL)
+  naive_process_phi (phi);
+
+/* Zap PHI nodes, they will be deallocated when everything else will.  */
+hbb->m_first_phi = NULL;
+hbb->m_last_phi = NULL;
+  }
+}
+
+/* Return register class number for the given HSA TYPE.  0 means the 'c' one
+   bit register class, 1 means 's' 32 bit class, 2 stands for 'd' 64 bit class
+   and 3 for 'q' 128 bit class.  */
+
+static int
+m_reg_class_for_type (BrigType16_t type)
+{
+  switch (type)
+{
+case BRIG_TYPE_B1:
+  return 0;
+
+case BRIG_TYPE_U8:
+case BRIG_TYPE_U16:
+case BRIG_TYPE_U32:
+case BRIG_TYPE_S8:
+case BRIG_TYPE_S16:
+case BRIG_TYPE_S32:
+case BRIG_TYPE_F16:
+case BRIG_TYPE_F32:
+case BRIG_TYPE_B8:
+case BRIG_TYPE_B16:
+case BRIG_TYPE_B32:
+case BRIG_TYPE_U8X4:
+case BRIG_TYPE_S8X4:
+case BRIG_TYPE_U16X2:
+case BRIG_TYPE_S16X2:
+case BRIG_TYPE_F16X2:
+  return 1;
+
+case BRIG_TYPE_U64:
+case BRIG_TYPE_S64:
+case BRIG_TYPE_F64:
+case BRIG_TYPE_B64:
+case BRIG_TYPE_U8X8:
+case BRIG_TYPE_S8X8:
+case BRIG_TYPE_U16X4:
+case BRIG_TYPE_S16X4:
+case BRIG_TYPE_F16X4:
+case BRIG_TYPE_U32X2:
+case BRIG_TYPE_S32X2:
+case BRIG_TYPE_F32X2:
+  return 2;
+
+case BRIG_TYPE_B128:
+case BRIG_TYPE_U8X16:
+case BRIG_TYPE_S8X16:
+case BRIG_TYPE_U16X8:
+case BRIG_TYPE_S16X8:
+case BRIG_TYPE_F16X8:
+case BRIG_TYPE_U32X4:
+case BRIG_TYPE_U64X2:
+case BRIG_TYPE_S32X4:
+case BRIG_TYPE_S64X2:
+case BRIG_TYPE_F32X4:
+case BRIG_TYPE_F64X2:
+  return 3;
+
+default:
+  gcc_unreachable ();
+}
+}
+
+/* If the Ith operands of INSN is or contains a register (in an address),
+   return the address of that register operand.  If not return NULL.  */
+
+static hsa_op_reg **
+insn_reg_addr (hsa_insn_basic *insn, int i)
+{
+  hsa_op_base *op = insn->get_op (i);
+  if 

[hsa 1/12] Configuration and offloading-related changes

2015-11-05 Thread Martin Jambor
Hi,

this patch contains changes to the configuration mechanism and offload
bits, so that users can build compilers with HSA support and it plays
nicely with other accelerators despite using an altogether different
implementation approach.

With this patch, the user can request HSA support by including the
string "hsa" among the requested accelerators in
--enable-offload-targets.  This will cause the compiler to start
producing HSAIL for target OpenMP regions/functions and the hsa
libgomp plugin to be built.  Because the plugin needs to use HSA
run-time library, I have introduced options --with-hsa-runtime (and
more precise --with-hsa-include and --with-hsa-lib) to help find it.

The catch is however that there is no offload compiler for HSA and so
the wrapper should not attempt to look for it (that is what the hunk
in lto-wrapper.c does) and when HSA is the only accelerator, it is
wasteful to output LTO sections with byte-code and therefore if HSA is
the only configured accelerator, it does not set ENABLE_OFFLOADING
macro.

Finally, when the compiler has been configured for HSA but the user
disables it by omitting it in the -foffload compiler option, we need
to observe that decision.  That is what the opts.c hunk does.

Thanks,

Martin


2015-11-04  Martin Jambor  

gcc/
* Makefile.in (OBJS): Add new source files.
(GTFILES): Add hsa.c.
* configure.ac (accel_dir_suffix): Treat hsa specially.
(OFFLOAD_TARGETS): Define ENABLE_OFFLOADING according to
$enable_offloading.
(ENABLE_HSA): Define ENABLE_HSA according to $enable_hsa.
* lto-wrapper.c (compile_images_for_offload_targets): Do not attempt
to invoke offload compiler for hsa acclerator.
* opts.c (common_handle_option): Determine whether HSA offloading
should be performed.

libgomp/plugin/
* Makefrag.am: Add HSA plugin requirements.
* configfrag.ac (HSA_RUNTIME_INCLUDE): New variable.
(HSA_RUNTIME_LIB): Likewise.
(HSA_RUNTIME_CPPFLAGS): Likewise.
(HSA_RUNTIME_INCLUDE): New substitution.
(HSA_RUNTIME_LIB): Likewise.
(HSA_RUNTIME_LDFLAGS): Likewise.
(hsa-runtime): New configure option.
(hsa-runtime-include): Likewise.
(hsa-runtime-lib): Likewise.
(PLUGIN_HSA): New substitution variable.
Fill HSA_RUNTIME_INCLUDE and HSA_RUNTIME_LIB according to the new
configure options.
(PLUGIN_HSA_CPPFLAGS): Likewise.
(PLUGIN_HSA_LDFLAGS): Likewise.
(PLUGIN_HSA_LIBS): Likewise.
Check that we have access to HSA run-time.
(PLUGIN_NVPTX): New conditional.


diff --git a/gcc/Makefile.in b/gcc/Makefile.in
index 7d53a7d..2ca16b1 100644
--- a/gcc/Makefile.in
+++ b/gcc/Makefile.in
@@ -1293,6 +1293,11 @@ OBJS = \
graphite-sese-to-poly.o \
gtype-desc.o \
haifa-sched.o \
+   hsa.o \
+   hsa-gen.o \
+   hsa-regalloc.o \
+   hsa-brig.o \
+   hsa-dump.o \
hw-doloop.o \
hwint.o \
ifcvt.o \
@@ -1317,6 +1322,7 @@ OBJS = \
ipa-icf.o \
ipa-icf-gimple.o \
ipa-reference.o \
+   ipa-hsa.o \
ipa-ref.o \
ipa-utils.o \
ipa.o \
@@ -2379,6 +2385,7 @@ GTFILES = $(CPP_ID_DATA_H) $(srcdir)/input.h 
$(srcdir)/coretypes.h \
   $(srcdir)/sanopt.c \
   $(srcdir)/ipa-devirt.c \
   $(srcdir)/internal-fn.h \
+  $(srcdir)/hsa.c \
   @all_gtfiles@
 
 # Compute the list of GT header files from the corresponding C sources,
diff --git a/gcc/configure.ac b/gcc/configure.ac
index 7e22267..d0d4565 100644
--- a/gcc/configure.ac
+++ b/gcc/configure.ac
@@ -943,6 +943,13 @@ AC_SUBST(accel_dir_suffix)
 
 for tgt in `echo $enable_offload_targets | sed 's/,/ /g'`; do
   tgt=`echo $tgt | sed 's/=.*//'`
+
+  if echo "$tgt" | grep "^hsa" > /dev/null ; then
+enable_hsa=1
+  else
+enable_offloading=1
+  fi
+
   if test x"$offload_targets" = x; then
 offload_targets=$tgt
   else
@@ -951,11 +958,16 @@ for tgt in `echo $enable_offload_targets | sed 's/,/ 
/g'`; do
 done
 AC_DEFINE_UNQUOTED(OFFLOAD_TARGETS, "$offload_targets",
   [Define to offload targets, separated by commas.])
-if test x"$offload_targets" != x; then
+if test x"$enable_offloading" != x; then
   AC_DEFINE(ENABLE_OFFLOADING, 1,
 [Define this to enable support for offloading.])
 fi
 
+if test x"$enable_hsa" = x1 ; then
+  AC_DEFINE(ENABLE_HSA, 1,
+[Define this to enable support for generating HSAIL.])
+fi
+
 AC_ARG_WITH(multilib-list,
 [AS_HELP_STRING([--with-multilib-list], [select multilibs (AArch64, SH and 
x86-64 only)])],
 :,
diff --git a/gcc/lto-wrapper.c b/gcc/lto-wrapper.c
index 20e67ed..5f564d9 100644
--- a/gcc/lto-wrapper.c
+++ b/gcc/lto-wrapper.c
@@ -745,6 +745,11 @@ compile_images_for_offload_targets (unsigned in_argc, char 
*in_argv[],
   offload_names = XCNEWVEC (char *, num_targets + 1);
   for (unsigned i = 0; i < num_targets; i++)
 {
+  /* HSA does not use LTO-like 

[hsa 9/12] Small alloc-pool fix

2015-11-05 Thread Martin Jambor
Hi,

we use C++ new operators based on alloc-pools a lot in the subsequent
patches and realized that on the current trunk, such new operators
would needlessly call the placement ::new operator within the allocate
method of pool-alloc.  Fixed below by providing a new allocation
method which does not call placement new, which is only safe to use
from within a new operator.

The patch also fixes the slightly weird two parameter operator new
(which we do not use in HSA backend) so that it does not do the same.

Thanks,

Martin


2015-11-05  Martin Liska  
Martin Jambor  

* alloc-pool.h (object_allocator::vallocate): New method.
(operator new): Call vallocate instead of allocate.
(operator new): New operator.


diff --git a/gcc/alloc-pool.h b/gcc/alloc-pool.h
index 0dc05cd..46b6550 100644
--- a/gcc/alloc-pool.h
+++ b/gcc/alloc-pool.h
@@ -483,6 +483,12 @@ public:
 return ::new (m_allocator.allocate ()) T ();
   }
 
+  inline void *
+  vallocate () ATTRIBUTE_MALLOC
+  {
+return m_allocator.allocate ();
+  }
+
   inline void
   remove (T *object)
   {
@@ -523,12 +529,19 @@ struct alloc_pool_descriptor
 };
 
 /* Helper for classes that do not provide default ctor.  */
-
 template 
 inline void *
 operator new (size_t, object_allocator )
 {
-  return a.allocate ();
+  return a.vallocate ();
+}
+
+/* Helper for classes that do not provide default ctor.  */
+template 
+inline void *
+operator new (size_t, object_allocator *a)
+{
+  return a->vallocate ();
 }
 
 /* Hashtable mapping alloc_pool names to descriptors.  */


[hsa 8/12] Pass manager changes

2015-11-05 Thread Martin Jambor
Hi,

the following pathch has been actually already committed to trunk by
Martin yesterday, but is necessary if you have an older trunk.  It
allows an optimization pass to declare the functon finished by
returning TODO_discard_function.  The pass manager will then discard
the function.

Martin


2015-11-04  Martin Liska  

* cgraphunit.c (cgraph_node::expand_thunk): Call 
allocate_struct_function before init_function_start.
(cgraph_node::expand): Use push_cfun and pop_cfun.
* config/i386/i386.c (ix86_code_end): Call
allocate_struct_function before init_function_start.
* config/rs6000/rs6000.c (rs6000_code_end): Likewise.
* function.c (init_function_start): Move preamble to all
callers.
* passes.c (do_per_function_toporder): Use push_cfun and pop_cfun.
(execute_one_pass): Handle newly added TODO_discard_function.
(execute_pass_list_1): Terminate if cfun equals to NULL.
(execute_pass_list): Do not push and pop cfun, expect that
cfun is set.
* tree-pass.h (TODO_discard_function): Define.

diff --git a/gcc/cgraphunit.c b/gcc/cgraphunit.c
index 43d3185..f73d9a7 100644
--- a/gcc/cgraphunit.c
+++ b/gcc/cgraphunit.c
@@ -1618,6 +1618,7 @@ cgraph_node::expand_thunk (bool output_asm_thunks, bool 
force_gimple_thunk)
   fn_block = make_node (BLOCK);
   BLOCK_VARS (fn_block) = a;
   DECL_INITIAL (thunk_fndecl) = fn_block;
+  allocate_struct_function (thunk_fndecl, false);
   init_function_start (thunk_fndecl);
   cfun->is_thunk = 1;
   insn_locations_init ();
@@ -1632,7 +1633,6 @@ cgraph_node::expand_thunk (bool output_asm_thunks, bool 
force_gimple_thunk)
   insn_locations_finalize ();
   init_insn_lengths ();
   free_after_compilation (cfun);
-  set_cfun (NULL);
   TREE_ASM_WRITTEN (thunk_fndecl) = 1;
   thunk.thunk_p = false;
   analyzed = false;
@@ -1944,9 +1944,11 @@ cgraph_node::expand (void)
   bitmap_obstack_initialize (NULL);
 
   /* Initialize the RTL code for the function.  */
-  current_function_decl = decl;
   saved_loc = input_location;
   input_location = DECL_SOURCE_LOCATION (decl);
+
+  gcc_assert (DECL_STRUCT_FUNCTION (decl));
+  push_cfun (DECL_STRUCT_FUNCTION (decl));
   init_function_start (decl);
 
   gimple_register_cfg_hooks ();
@@ -2014,8 +2016,8 @@ cgraph_node::expand (void)
 
   /* Make sure that BE didn't give up on compiling.  */
   gcc_assert (TREE_ASM_WRITTEN (decl));
-  set_cfun (NULL);
-  current_function_decl = NULL;
+  if (cfun)
+pop_cfun ();
 
   /* It would make a lot more sense to output thunks before function body to 
get more
  forward and lest backwarding jumps.  This however would need solving 
problem
diff --git a/gcc/config/i386/i386.c b/gcc/config/i386/i386.c
index 66024e2..2a965f6 100644
--- a/gcc/config/i386/i386.c
+++ b/gcc/config/i386/i386.c
@@ -10958,6 +10958,7 @@ ix86_code_end (void)
 
   DECL_INITIAL (decl) = make_node (BLOCK);
   current_function_decl = decl;
+  allocate_struct_function (decl, false);
   init_function_start (decl);
   first_function_block_is_cold = false;
   /* Make sure unwind info is emitted for the thunk if needed.  */
diff --git a/gcc/config/rs6000/rs6000.c b/gcc/config/rs6000/rs6000.c
index 271c3f9..8bdd646 100644
--- a/gcc/config/rs6000/rs6000.c
+++ b/gcc/config/rs6000/rs6000.c
@@ -34594,6 +34594,7 @@ rs6000_code_end (void)
 
   DECL_INITIAL (decl) = make_node (BLOCK);
   current_function_decl = decl;
+  allocate_struct_function (decl, false);
   init_function_start (decl);
   first_function_block_is_cold = false;
   /* Make sure unwind info is emitted for the thunk if needed.  */
diff --git a/gcc/function.c b/gcc/function.c
index aaf49a4..0d7cabc 100644
--- a/gcc/function.c
+++ b/gcc/function.c
@@ -4957,11 +4957,6 @@ init_dummy_function_start (void)
 void
 init_function_start (tree subr)
 {
-  if (subr && DECL_STRUCT_FUNCTION (subr))
-set_cfun (DECL_STRUCT_FUNCTION (subr));
-  else
-allocate_struct_function (subr, false);
-
   /* Initialize backend, if needed.  */
   initialize_rtl ();
 
diff --git a/gcc/passes.c b/gcc/passes.c
index f87dcf4..08221ed 100644
--- a/gcc/passes.c
+++ b/gcc/passes.c
@@ -1706,7 +1706,12 @@ do_per_function_toporder (void (*callback) (function *, 
void *data), void *data)
  order[i] = NULL;
  node->process = 0;
  if (node->has_gimple_body_p ())
-   callback (DECL_STRUCT_FUNCTION (node->decl), data);
+   {
+ struct function *fn = DECL_STRUCT_FUNCTION (node->decl);
+ push_cfun (fn);
+ callback (fn, data);
+ pop_cfun ();
+   }
}
   symtab->remove_cgraph_removal_hook (hook);
 }
@@ -2347,6 +2352,23 @@ execute_one_pass (opt_pass *pass)
 
   current_pass = NULL;
 
+  if (todo_after & TODO_discard_function)
+{
+  gcc_assert (cfun);
+  /* As cgraph_node::release_body expects release 

[hsa 4/12] OpenMP lowering/expansion changes (gridification)

2015-11-05 Thread Martin Jambor
Hi,

the patch in this email contains the changes to make our OpenMP
lowering and expansion machinery produce GPU kernels for a certain
limited class of loops.  The plan is to make that class quite a big
bigger, but only the following is ready for submission now.

Basically, whenever the compiler configured for HSAIL generation
encounters the following pattern:

  #pragma omp target
  #pragma omp teams thread_limit(workgroup_size) // thread_limit is optional
  #pragma omp distribute parallel for firstprivate(n) private(i) 
other_sharing_clauses()
for (i = 0; i < n; i++)
  some_loop_body

it creates a copy of the entire target body and expands it slightly
differently for concurrent execution on a GPU.  Note that both teams
and distribute constructs are mandatory.  Moreover, currently the
distribute has to be in a combined statement with the inner for
construct.  And there are quite a few other restrictions which I hope
to alleviate over the next year, most notably implement reductions.  A
few days ago I hoped to finish writing support for collapse(2) and
collapse(3) clauses in time for stage1 but now I am a bit sceptical.

The first phase of the "gridification" process is run before omp
"scanning" phase.  We look for the pattern above, and if we encounter
one, we copy its entire body into a new gimple statement
GIMPLE_OMP_GPUKERNEL.  Within it, we mark the teams, distribute and
parallel constructs with a new flag "kernel_phony."  This flag will
then make OMP lowering phase process their sharing clauses like usual,
but the statements representing the constructs will be removed at
lowering (and thus will never be expanded).  The resulting wasteful
repackaging of data is nicely cleaned by our optimizers even at -O1.

At expansion time, we identify gomp_target statements with a kernel
and expand the kernel into a special function, with the loop
represented by the GPU grid and not control flow.  Afterwards, the
normal body of the target is expanded as usual.  Finally, we need to
take the grid dimensions stored within new fields of the target
statement by the first phase, store in a structure and pass them to
libgomp in a new parameter of GOMP_target_41.

Originally, when I started with the above pattern matching, I did not
allow any other gimple statements in between the respective omp
constructs.  That however proved to be too restrictive for two
reasons.  First, statements in pre-bodies of both distribute and for
loops needed to be accounted for when calculating the kernel grid size
(which is done before the target statement itself) and second, Fortran
parameter dereferences happily result in interleaving statements when
there were none in the user source code.

Therefore, I allow register-type stores to local non-addressable
variables in pre-bodies and also in between the OMP constructs.  All
of them are copied in front of the target statement and either used
for grid size calculation or removed as useless by later
optimizations.

For convenience of anybody reviewing the code, I'm attaching a very
simple testcase with selection of dumps that illustrate the whole
process.

While we have also been experimenting quite a bit with dynamic
parallelism, we have only been able to achieve any good performance
via this process of gridification.  The user can be notified whether a
particular target construct was gridified or not via our process of
dumping notes, which however only appear in the detailed dump.  I am
seriously considering emitting some kind of warning, when HSA-enabled
compiler is about to produce a non-gridified target code.

I hope that eventually I managed to write the gridification in a way
that interferes very little with the rest of the OMP pipeline and yet
only re-implement the bare necessary minimum of functionality that is
already there.  I'll be grateful for any feedback regarding the
approach.

Thanks,

Martin


2015-11-05  Martin Jambor  

* builtin-types.def (BT_FN_VOID_PTR_INT_PTR): New.
(BT_FN_VOID_INT_OMPFN_SIZE_PTR_PTR_PTR_UINT_PTR): Removed.
(BT_FN_VOID_INT_OMPFN_SIZE_PTR_PTR_PTR_UINT_PTR_PTR): New.
* fortran/types.def (BT_FN_VOID_PTR_INT_PTR): New.
(BT_FN_VOID_INT_OMPFN_SIZE_PTR_PTR_PTR_UINT_PTR): Removed.
(BT_FN_VOID_INT_OMPFN_SIZE_PTR_PTR_PTR_UINT_PTR_PTR): New.
* gimple-low.c (lower_stmt): Handle GIMPLE_OMP_GPUKERNEL.
* gimple-pretty-print.c (dump_gimple_omp_for): Likewise.
(dump_gimple_omp_block): Handle GF_OMP_FOR_KIND_KERNEL_BODY
(pp_gimple_stmt_1): Handle GIMPLE_OMP_GPUKERNEL.
* gimple-walk.c (walk_gimple_stmt): Likewise.
* gimple.c (gimple_build_omp_gpukernel): New function.
(gimple_omp_target_init_dimensions): Likewise.
(gimple_copy): Handle GIMPLE_OMP_GPUKERNEL.
* gimple.def (GIMPLE_OMP_TEAMS): Moved into its own layout.
(GIMPLE_OMP_GPUKERNEL): New.
* gimple.h (gf_mask): New element GF_OMP_FOR_KIND_KERNEL_BODY.
   

[hsa 5/12] New HSA-related GCC options

2015-11-05 Thread Martin Jambor
Hi,

the following small part of the merge deals with new options.  It adds
four independent things:

1) flag_disable_hsa is used by code in opts.c (in the first patch) to
   remember whether HSA has been explicitely disabled on the compiler
   command line.

2) -Whsa is a new warning we emit whenever we fail to produce HSAIL
   for some source code.  It is on by default but of course only
   emitted by HSAIL generating code so should never affect anybody who
   does not use HSA-enabled compiler and OpenMP 4 device constructs.

We have found the following two additions very useful for debugging on
the branch but will understand if they are not deemed suitable for
trunk and will gladly remove them:

3) -fdisable-hsa-gridification disables the gridification process to
   ease experimenting with dynamic parallelism.  With this option,
   HSAIL is always generated from the CPU-intended gimple.

4) Parameter hsa-gen-debug-stores will be obsolete once HSA run-time
   supports debugging traps.  Before that, we have to do with
   debugging stores to memory at defined places, which however can
   cost speed in benchmarks.  So they are only enabled with this
   parameter.  We decided to make it a parameter rather than a switch
   to emphasize the fact it will go away and to possibly allow us
   select different levels of verbosity of the stores in the future).

Thanks,

Martin


2015-11-05  Martin Jambor  

* common.opt (disable_hsa): New variable.
(-Whsa): New warning.
(-fdisable-hsa-gridification): New option.
* params.def (PARAM_HSA_GEN_DEBUG_STORES): New parameter.

diff --git a/gcc/common.opt b/gcc/common.opt
index 961a1b6..9cb52db 100644
--- a/gcc/common.opt
+++ b/gcc/common.opt
@@ -223,6 +223,10 @@ unsigned int flag_sanitize_recover = SANITIZE_UNDEFINED | 
SANITIZE_NONDEFAULT |
 Variable
 bool dump_base_name_prefixed = false
 
+; Flag whether HSA generation has been explicitely disabled
+Variable
+bool flag_disable_hsa = false
+
 ###
 Driver
 
@@ -577,6 +581,10 @@ Wfree-nonheap-object
 Common Var(warn_free_nonheap_object) Init(1) Warning
 Warn when attempting to free a non-heap object.
 
+Whsa
+Common Var(warn_hsa) Init(1) Warning
+Warn when a function cannot be expanded to HSAIL.
+
 Winline
 Common Var(warn_inline) Warning
 Warn when an inlined function cannot be inlined.
@@ -1107,6 +1115,10 @@ fdiagnostics-show-location=
 Common Joined RejectNegative Enum(diagnostic_prefixing_rule)
 -fdiagnostics-show-location=[once|every-line]  How often to emit source 
location at the beginning of line-wrapped diagnostics.
 
+fdisable-hsa-gridification
+Common Report Var(flag_disable_hsa_gridification)
+Disable HSA gridification for OMP pragmas
+
 ; Required for these enum values.
 SourceInclude
 pretty-print.h
diff --git a/gcc/params.def b/gcc/params.def
index c5d96e7..86911e2 100644
--- a/gcc/params.def
+++ b/gcc/params.def
@@ -1177,6 +1177,11 @@ DEFPARAM (PARAM_MAX_SSA_NAME_QUERY_DEPTH,
  "Maximum recursion depth allowed when querying a property of an"
  " SSA name.",
  2, 1, 0)
+
+DEFPARAM (PARAM_HSA_GEN_DEBUG_STORES,
+ "hsa-gen-debug-stores",
+ "Level of hsa debug stores verbosity",
+ 0, 0, 1)
 /*
 
 Local variables:


[hsa 7/12] Disabling the vectorizer for GPU kernels/functions

2015-11-05 Thread Martin Jambor
Hi,

in the previous email I wrote we need to "change behavior" of a few
optimization passes.  One was the flattening of GPU functions and the
other two are in the patch below.  It all comes to that, at the
moment, we need to switch off the vectorizer (only for the GPU
functions, of course).

We are actually quite close to being able to handle gimple vector
input in HSA back-end but not all the way yet, and before allowing the
vectorizer again, we will have to make sure it never produces vectors
bigger than 128bits (in GPU functions).

Thanks,

Martin


2015-11-05  Martin Jambor  

* tree-ssa-loop.c: Include cgraph.c, symbol-summary.c and hsa.h.
(pass_vectorize::gate): Do not run on HSA functions.
* tree-vectorizer.c: Include symbol-summary.c and hsa.h.
(pass_slp_vectorize::gate): Do not run on HSA functions.

diff --git a/gcc/tree-ssa-loop.c b/gcc/tree-ssa-loop.c
index 8ecd140..0d119e2 100644
--- a/gcc/tree-ssa-loop.c
+++ b/gcc/tree-ssa-loop.c
@@ -35,6 +35,9 @@ along with GCC; see the file COPYING3.  If not see
 #include "tree-inline.h"
 #include "tree-scalar-evolution.h"
 #include "tree-vectorizer.h"
+#include "cgraph.h"
+#include "symbol-summary.h"
+#include "hsa.h"
 
 
 /* A pass making sure loops are fixed up.  */
@@ -257,7 +260,8 @@ public:
   /* opt_pass methods: */
   virtual bool gate (function *fun)
 {
-  return flag_tree_loop_vectorize || fun->has_force_vectorize_loops;
+  return (flag_tree_loop_vectorize || fun->has_force_vectorize_loops)
+   && !hsa_gpu_implementation_p (fun->decl);
 }
 
   virtual unsigned int execute (function *);
diff --git a/gcc/tree-vectorizer.c b/gcc/tree-vectorizer.c
index b80a8dd..366138c 100644
--- a/gcc/tree-vectorizer.c
+++ b/gcc/tree-vectorizer.c
@@ -75,6 +75,8 @@ along with GCC; see the file COPYING3.  If not see
 #include "tree-ssa-propagate.h"
 #include "dbgcnt.h"
 #include "tree-scalar-evolution.h"
+#include "symbol-summary.h"
+#include "hsa.h"
 
 
 /* Loop or bb location.  */
@@ -675,7 +677,10 @@ public:
 
   /* opt_pass methods: */
   opt_pass * clone () { return new pass_slp_vectorize (m_ctxt); }
-  virtual bool gate (function *) { return flag_tree_slp_vectorize != 0; }
+  virtual bool gate (function *fun)
+  {
+return flag_tree_slp_vectorize && !hsa_gpu_implementation_p (fun->decl);
+  }
   virtual unsigned int execute (function *);
 
 }; // class pass_slp_vectorize


[PATCH 2/4][AArch64] Increase the loop peeling limit

2015-11-05 Thread Evandro Menezes

   2015-11-05  Evandro Menezes 

   gcc/

   * config/aarch64/aarch64.c (aarch64_override_options_internal):
   Increase loop peeling limit.

This patch increases the limit for the number of peeled insns.  With 
this change, I noticed no major regression in either Geekbench v3 or 
SPEC CPU2000 while some benchmarks, typically FP ones, improved 
significantly.


I tested this tuning on Exynos M1 and on A57.  ThunderX seems to benefit 
from this tuning too.  However, I'd appreciate comments from other 
stakeholders.


Thank you,

--
Evandro Menezes

diff --git a/gcc/config/aarch64/aarch64.c b/gcc/config/aarch64/aarch64.c
index 5c8604f..66122e7 100644
--- a/gcc/config/aarch64/aarch64.c
+++ b/gcc/config/aarch64/aarch64.c
@@ -7672,6 +7672,12 @@ aarch64_override_options_internal (struct gcc_options *opts)
 			 opts->x_param_values,
 			 global_options_set.x_param_values);
 
+  /* Increase the maximum peeling limit.  */
+  maybe_set_param_value (PARAM_MAX_COMPLETELY_PEELED_INSNS,
+			 400,
+			 opts->x_param_values,
+			 global_options_set.x_param_values);
+
   aarch64_override_options_after_change_1 (opts);
 }
 


[PATCH] enable loop fusion on isl-15

2015-11-05 Thread Sebastian Pop
   * graphite-optimize-isl.c (optimize_isl): Call
   isl_options_set_schedule_maximize_band_depth.

   * gcc.dg/graphite/fuse-1.c: New.
   * gcc.dg/graphite/fuse-2.c: New.
   * gcc.dg/graphite/interchange-13.c: Remove bogus check.
---
 gcc/graphite-optimize-isl.c|  2 +-
 gcc/testsuite/gcc.dg/graphite/fuse-1.c | 43 ++
 gcc/testsuite/gcc.dg/graphite/fuse-2.c | 43 ++
 gcc/testsuite/gcc.dg/graphite/interchange-13.c |  1 -
 4 files changed, 87 insertions(+), 2 deletions(-)
 create mode 100644 gcc/testsuite/gcc.dg/graphite/fuse-1.c
 create mode 100644 gcc/testsuite/gcc.dg/graphite/fuse-2.c

diff --git a/gcc/graphite-optimize-isl.c b/gcc/graphite-optimize-isl.c
index 53355bb..0d85975 100644
--- a/gcc/graphite-optimize-isl.c
+++ b/gcc/graphite-optimize-isl.c
@@ -404,7 +404,7 @@ optimize_isl (scop_p scop)
   isl_options_set_schedule_maximize_band_depth (scop->isl_context, 1);
 #ifdef HAVE_ISL_OPTIONS_SET_SCHEDULE_SERIALIZE_SCCS
   /* ISL-0.15 or later.  */
-  isl_options_set_schedule_serialize_sccs (scop->isl_context, 1);
+  isl_options_set_schedule_maximize_band_depth (scop->isl_context, 1);
 #else
   isl_options_set_schedule_fuse (scop->isl_context, ISL_SCHEDULE_FUSE_MIN);
 #endif
diff --git a/gcc/testsuite/gcc.dg/graphite/fuse-1.c 
b/gcc/testsuite/gcc.dg/graphite/fuse-1.c
new file mode 100644
index 000..c9bb67d
--- /dev/null
+++ b/gcc/testsuite/gcc.dg/graphite/fuse-1.c
@@ -0,0 +1,43 @@
+/* Check that the two loops are fused and that we manage to fold the two xor
+   operations.  */
+/* { dg-options "-O2 -floop-nest-optimize -fdump-tree-forwprop-all" } */
+/* { dg-do run } */
+
+/* Make sure we fuse the loops like this:
+ISL AST generated by ISL:
+for (int c0 = 0; c0 <= 99; c0 += 1) {
+  S_3(c0);
+  S_6(c0);
+  S_9(c0);
+}
+*/
+/* { dg-final { scan-tree-dump-times "ISL AST generated by ISL:.*for (int c0 = 
0; c0 <= 99; c0 += 1) \{.*S_.*(c0);.*S_.*(c0);.*S_.*(c0);.*\}" 1 "graphite" } } 
*/
+
+/* Check that after fusing the loops, the scalar computation is also fused.  */
+/* { dg-final { scan-tree-dump-times "gimple_simplified to\[^\\n\]*\\^ 12" 1 
"forwprop4" } } */
+
+
+
+#define MAX 100
+int A[MAX];
+
+extern void abort ();
+
+int
+main (void)
+{
+  int i;
+
+  for (i = 0; i < MAX; i++)
+A[i] = i;
+  for(int i=0; i

[hsa 3/12] HSA libgomp plugin

2015-11-05 Thread Martin Jambor
Hi,

the patch below adds the HSA-specific plugin for libgomp.  The plugin
implements the interface mandated by libgomp and takes care of finding
any available HSA devices, finalizing HSAIL code and running it on
HSA-capable GPUs.  The plugin does not really implement any data
movement functions (it implements them with a fatal error call)
because memory is shared in HSA environments and the previous patch
has modified libgomp proper not to call those functions on devices
with this capability.

When going over the code for the last time, I realized I did not
implement any version checks in the plugin yet, but did not want to
hold off the initial review round because of that.  I will have a look
at it as soon as possible.

Thanks,

Martin


2015-11-05  Martin Jambor  
Martin Liska  

* plugin/plugin-hsa.c: New file.



diff --git a/libgomp/plugin/plugin-hsa.c b/libgomp/plugin/plugin-hsa.c
new file mode 100644
index 000..c1b7879
--- /dev/null
+++ b/libgomp/plugin/plugin-hsa.c
@@ -0,0 +1,1309 @@
+#include 
+#include 
+#include 
+#include 
+#include "libgomp-plugin.h"
+#include "gomp-constants.h"
+#include "hsa.h"
+#include "hsa_ext_finalize.h"
+#include "dlfcn.h"
+
+/* Part of the libgomp plugin interface.  Return the name of the accelerator,
+   which is "hsa".  */
+
+const char *
+GOMP_OFFLOAD_get_name (void)
+{
+  return "hsa";
+}
+
+/* Part of the libgomp plugin interface.  Return the specific capabilities the
+   HSA accelerator have.  */
+
+unsigned int
+GOMP_OFFLOAD_get_caps (void)
+{
+  return GOMP_OFFLOAD_CAP_SHARED_MEM | GOMP_OFFLOAD_CAP_OPENMP_400;
+}
+
+/* Part of the libgomp plugin interface.  Identify as HSA accelerator.  */
+
+int
+GOMP_OFFLOAD_get_type (void)
+{
+  return OFFLOAD_TARGET_TYPE_HSA;
+}
+
+/* Return the libgomp version number we're compatible with.  There is
+   no requirement for cross-version compatibility.  */
+
+unsigned
+GOMP_OFFLOAD_version (void)
+{
+  return GOMP_VERSION;
+}
+
+/* Flag to decide whether print to stderr information about what is going on.
+   Set in init_debug depending on environment variables.  */
+
+static bool debug;
+
+/* Flag to decide if the runtime should suppress a possible fallback to host
+   execution.  */
+
+static bool suppress_host_fallback;
+
+/* Initialize debug and suppress_host_fallback according to the environment.  
*/
+
+static void
+init_enviroment_variables (void)
+{
+  if (getenv ("HSA_DEBUG"))
+debug = true;
+  else
+debug = false;
+
+  if (getenv ("HSA_SUPPRESS_HOST_FALLBACK"))
+suppress_host_fallback = true;
+  else
+suppress_host_fallback = false;
+}
+
+/* Print a logging message with PREFIX to stderr if HSA_DEBUG value
+   is set to true.  */
+
+#define HSA_LOG(prefix, ...) \
+  do \
+  { \
+if (debug) \
+  { \
+   fprintf (stderr, prefix); \
+   fprintf (stderr, __VA_ARGS__); \
+  } \
+  } \
+  while (false);
+
+/* Print a debugging message to stderr.  */
+
+#define HSA_DEBUG(...) HSA_LOG ("HSA debug: ", __VA_ARGS__)
+
+/* Print a warning message to stderr.  */
+
+#define HSA_WARNING(...) HSA_LOG ("HSA warning: ", __VA_ARGS__)
+
+/* Print HSA warning STR with an HSA STATUS code.  */
+
+static void
+hsa_warn (const char *str, hsa_status_t status)
+{
+  if (!debug)
+return;
+
+  const char* hsa_error;
+  hsa_status_string (status, _error);
+
+  unsigned l = strlen (hsa_error);
+
+  char *err = GOMP_PLUGIN_malloc (sizeof (char) * l);
+  memcpy (err, hsa_error, l - 1);
+  err[l] = '\0';
+
+  fprintf (stderr, "HSA warning: %s (%s)\n", str, err);
+
+  free (err);
+}
+
+/* Report a fatal error STR together with the HSA error corresponding to STATUS
+   and terminate execution of the current process.  */
+
+static void
+hsa_fatal (const char *str, hsa_status_t status)
+{
+  const char* hsa_error;
+  hsa_status_string (status, _error);
+  GOMP_PLUGIN_fatal ("HSA fatal error: %s (%s)", str, hsa_error);
+}
+
+struct hsa_kernel_description
+{
+  const char *name;
+  unsigned omp_data_size;
+  unsigned kernel_dependencies_count;
+  const char **kernel_dependencies;
+};
+
+/* Data passed by the static initializer of a compilation unit containing BRIG
+   to GOMP_offload_register.  */
+
+struct brig_image_desc
+{
+  hsa_ext_module_t brig_module;
+  const unsigned kernel_count;
+  struct hsa_kernel_description *kernel_infos;
+};
+
+struct agent_info;
+
+/* Information required to identify, finalize and run any given kernel.  */
+
+struct kernel_info
+{
+  /* Name of the kernel, required to locate it within the brig module.  */
+  const char *name;
+  /* Size of memory space for OMP data.  */
+  unsigned omp_data_size;
+  /* The specific agent the kernel has been or will be finalized for and run
+ on.  */
+  struct agent_info *agent;
+  /* The specific module where the kernel takes place.  */
+  struct module_info *module;
+  /* Mutex enforcing that at most once thread ever initializes a kernel for
+ use.  A thread should have locked 

[hsa 10/12] HSAIL BRIG description header file (hopefully not a licensing issue)

2015-11-05 Thread Martin Jambor
Hi,

the following patch adds a BRIG (binary representation of HSAIL)
representation description.  It is within a single header file
describing the binary structures and constants of the format.


Initially, I have created the file by copying out pieces of PDF
documentation but the latest version of the file (describing final
HSAIL 1.0) is actually taken from the HSAIL (dis)assembler developed
by HSA foundation and released by "University of Illinois/NCSA Open
Source License."

The license is "GPL-compatible" according to FSF
(http://www.gnu.org/licenses/license-list.en.html#GPLCompatibleLicenses)
so I believe that means we can put it inside GCC and I hope I also do
not need any special steering committee approval or whatnot.  At the
same time, the license comes with three restrictions that I hope I
have fulfilled by keeping them in the header comment.  Nevertheless,
if anybody knowledgeable can tell me what is the known right thing to
do (or to confirm this is indeed the right thing to do), I'll be very
happy.

Because this file originated outside GCC and we are likely to repace
it with newer versions as they come along, it does not follow GNU
coding standards.  I hope that is reasonable.

Thanks,

Martin


2015-11-05  Martin Jambor  

* hsa-brig-format.h: New file.

diff --git a/gcc/hsa-brig-format.h b/gcc/hsa-brig-format.h
new file mode 100644
index 000..f099fc6
--- /dev/null
+++ b/gcc/hsa-brig-format.h
@@ -0,0 +1,1283 @@
+/* HSAIL and BRIG related macros and definitions.
+   Copyright (c) 2013-2015, Advanced Micro Devices, Inc.
+   Copyright (C) 2013-2015 Free Software Foundation, Inc.
+
+   Majority of contents in this file has originally been distributed under the
+   University of Illinois/NCSA Open Source License.  This license mandates that
+   the following conditions are observed when distributing this file:
+
+ * Redistributions of source code must retain the above copyright notice,
+   this list of conditions and the following disclaimers.
+
+ * Redistributions in binary form must reproduce the above copyright 
notice,
+   this list of conditions and the following disclaimers in the
+   documentation and/or other materials provided with the distribution.
+
+ * Neither the names of the HSA Team, HSA Foundation, University of
+   Illinois at Urbana-Champaign, nor the names of its contributors may be
+   used to endorse or promote products derived from this Software without
+   specific prior written permission.
+
+   THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+   IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+   FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL THE
+   CONTRIBUTORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+   LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
+   FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
+   WITH THE SOFTWARE.
+
+   This file is part of GCC.
+
+   GCC is free software; you can redistribute it and/or modify
+   it under the terms of the GNU General Public License as published by
+   the Free Software Foundation; either version 3, or (at your option)
+   any later version.
+
+   GCC is distributed in the hope that it will be useful,
+   but WITHOUT ANY WARRANTY; without even the implied warranty of
+   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+   GNU General Public License for more details.
+
+   You should have received a copy of the GNU General Public License
+   along with GCC; see the file COPYING3.  If not see
+   .  */
+
+#ifndef HSA_BRIG_FORMAT_H
+#define HSA_BRIG_FORMAT_H
+
+#include "config.h"
+#include "system.h"
+
+typedef uint32_t BrigVersion32_t;
+
+enum BrigVersion {
+
+BRIG_VERSION_HSAIL_MAJOR = 1,
+BRIG_VERSION_HSAIL_MINOR = 0,
+BRIG_VERSION_BRIG_MAJOR  = 1,
+BRIG_VERSION_BRIG_MINOR  = 0
+};
+
+typedef uint8_t BrigAlignment8_t;
+
+typedef uint8_t BrigAllocation8_t;
+
+typedef uint8_t BrigAluModifier8_t;
+
+typedef uint8_t BrigAtomicOperation8_t;
+
+typedef uint32_t BrigCodeOffset32_t;
+
+typedef uint8_t BrigCompareOperation8_t;
+
+typedef uint16_t BrigControlDirective16_t;
+
+typedef uint32_t BrigDataOffset32_t;
+
+typedef BrigDataOffset32_t BrigDataOffsetCodeList32_t;
+
+typedef BrigDataOffset32_t BrigDataOffsetOperandList32_t;
+
+typedef BrigDataOffset32_t BrigDataOffsetString32_t;
+
+typedef uint8_t BrigExecutableModifier8_t;
+
+typedef uint8_t BrigImageChannelOrder8_t;
+
+typedef uint8_t BrigImageChannelType8_t;
+
+typedef uint8_t BrigImageGeometry8_t;
+
+typedef uint8_t BrigImageQuery8_t;
+
+typedef uint16_t BrigKind16_t;
+
+typedef uint8_t BrigLinkage8_t;
+
+typedef uint8_t BrigMachineModel8_t;
+
+typedef uint8_t BrigMemoryModifier8_t;
+
+typedef uint8_t BrigMemoryOrder8_t;
+
+typedef uint8_t BrigMemoryScope8_t;
+
+typedef uint16_t BrigOpcode16_t;
+

[PATCH] [graphite] do not collect cold loops in scops

2015-11-05 Thread Sebastian Pop
   * graphite-scop-detection.c (loop_is_valid_scop): Call
   optimize_loop_nest_for_speed_p.
---
 gcc/graphite-scop-detection.c | 7 +++
 1 file changed, 7 insertions(+)

diff --git a/gcc/graphite-scop-detection.c b/gcc/graphite-scop-detection.c
index ae8497d..f22e9cc 100644
--- a/gcc/graphite-scop-detection.c
+++ b/gcc/graphite-scop-detection.c
@@ -956,6 +956,13 @@ scop_detection::loop_is_valid_scop (loop_p loop, sese_l 
scop) const
   if (!scop)
 return false;
 
+  if (!optimize_loop_nest_for_speed_p (loop))
+{
+  DEBUG_PRINT (dp << "[scop-detection-fail] loop_"
+ << loop->num << " is not on a hot path.\n");
+  return false;
+}
+
   if (!can_represent_loop (loop, scop))
 {
   DEBUG_PRINT (dp << "[scop-detection-fail] cannot represent loop_"
-- 
1.9.1



Re: Fix 61441

2015-11-05 Thread Sujoy Saraswati
Hi,

> This is looking pretty close to ready to go in, but the patch also seems
> to be doing several things at once and it might be easier to review if
> split into separate logical pieces.  For example (this is an illustration,
> not necessarily saying it should be exactly this way): (1) Add
> REAL_VALUE_ISSIGNALING_NAN and convert existing logic that already does
> something equivalent (so no semantic change, just cleanup).  (2) Convert
> existing code that checks for all NaNs in cases where
> REAL_VALUE_ISSIGNALING_NAN would be enough.  (3) Treat rint and nearbyint
> the same.  (4) Make real.c operations properly produce quiet NaNs for
> operations with signaling NaN input.  (5) Disable various transformations
> for signaling NaN operands, where folding them loses exceptions.

Thanks, I will break it up into multiple patches as you proposed.

> This patch is also getting big enough that it would be a good idea to
> complete the copyright assignment paperwork, certainly if you plan to make
> more contributions in future, if you're not covered by a corporate
> assignment.

I have a corporate assignment, I will post the patches from my
corporate email id.

>> @@ -1943,7 +1965,17 @@ fold_convert_const_real_from_real (tree type, cons
>>REAL_VALUE_TYPE value;
>>tree t;
>>
>> +  /* Don't perform the operation if flag_signaling_nans is on
>> + and the operand is a NaN.  */
>> +  if (HONOR_SNANS (TYPE_MODE (TREE_TYPE (arg1)))
>> +  && REAL_VALUE_ISSIGNALING_NAN (TREE_REAL_CST (arg1)))
>> +return NULL_TREE;
>> +
>>real_convert (, TYPE_MODE (type), _REAL_CST (arg1));
>> +  /* Make resulting NaN value to be qNaN when flag_signaling_nans
>> + is off.  */
>> +  if (REAL_VALUE_ISSIGNALING_NAN (value))
>> +value.signalling = 0;
>
> Shouldn't real_convert do this rather than the caller needing to do it?

Yes, it should be. I had started by doing this within real_convert but
then saw that there are quite a few callers where I should add the
check for flag_signaling_nans. This was making the patch bigger, so
instead decided to change the caller in this particular case. I will
try to make the change in real_convert now that we are planning to
break the patch.

Regards,
Sujoy

> --
> Joseph S. Myers
> jos...@codesourcery.com


Re: [openacc] tile, independent, default, private and firstprivate support in c/++

2015-11-05 Thread Jakub Jelinek
On Thu, Nov 05, 2015 at 06:10:49PM -0800, Cesar Philippidis wrote:
> I've applied this patch to trunk. It also includes the fortran and
> template changes. Note that there is a new regression in
> gfortran.dg/goacc/combined_loop.f90. Basically, the gimplifier is
> complaining about reduction variables appearing in multiple clauses.
> E.g. 'acc parallel reduction(+:var) copy(var)'. Nathan's upcoming
> gimplifier changes should address that.

If you are relying on the OMP_CLAUSE_MAP_PRIVATE flag that I've added
on gomp-4_1-branch and then removed yesterday, feel free to re-add it,
but of course never set it for OpenMP, just OpenACC constructs
(so for OpenMP keep the gimplifier assertion, for OpenACC set it).

Jakub


[ARM][PATCH, PR 68223] arm_[su]min_cmp pattern fails

2015-11-05 Thread Michael Collison
The patterns arm_smin_cmp and arm_umin_cmp patterns fail if operand 0 
and operand 2 are equal and both are less than operand 1. The solution 
is to remove the two patterns.


2015-11-06  Michael Collison  

PR target/68223
* gcc/config/arm/arm.md (*arm_smin_cmp): Remove pattern.
(*arm_umin_cmp): Likewise.
* gcc.target/arm/mincmp.c: Remove testcase.

--
Michael Collison
Linaro Toolchain Working Group
michael.colli...@linaro.org

diff --git a/gcc/config/arm/arm.md b/gcc/config/arm/arm.md
index 02e147e..6ba1ec3 100644
--- a/gcc/config/arm/arm.md
+++ b/gcc/config/arm/arm.md
@@ -3455,44 +3455,6 @@
(set_attr "type" "multiple,multiple")]
 )
 
-;; t = (s/u)min (x, y)
-;; cc = cmp (t, z)
-;; is the same as
-;; cmp x, z
-;; cmpge(u) y, z
-
-(define_insn_and_split "*arm_smin_cmp"
-  [(set (reg:CC CC_REGNUM)
-	(compare:CC
-	 (smin:SI (match_operand:SI 0 "s_register_operand" "r")
-		  (match_operand:SI 1 "s_register_operand" "r"))
-	 (match_operand:SI 2 "s_register_operand" "r")))]
-  "TARGET_32BIT"
-  "#"
-  "&& reload_completed"
-  [(set (reg:CC CC_REGNUM)
-	(compare:CC (match_dup 0) (match_dup 2)))
-   (cond_exec (ge:CC (reg:CC CC_REGNUM) (const_int 0))
-	  (set (reg:CC CC_REGNUM)
-		   (compare:CC (match_dup 1) (match_dup 2]
-)
-
-(define_insn_and_split "*arm_umin_cmp"
-  [(set (reg:CC CC_REGNUM)
-	(compare:CC
-	 (umin:SI (match_operand:SI 0 "s_register_operand" "r")
-		  (match_operand:SI 1 "s_register_operand" "r"))
-	 (match_operand:SI 2 "s_register_operand" "r")))]
-  "TARGET_32BIT"
-  "#"
-  "&& reload_completed"
-  [(set (reg:CC CC_REGNUM)
-	(compare:CC (match_dup 0) (match_dup 2)))
-   (cond_exec (geu:CC (reg:CC CC_REGNUM) (const_int 0))
-	  (set (reg:CC CC_REGNUM)
-		   (compare:CC (match_dup 1) (match_dup 2]
-)
-
 (define_expand "umaxsi3"
   [(parallel [
 (set (match_operand:SI 0 "s_register_operand" "")
diff --git a/gcc/testsuite/gcc.target/arm/mincmp.c b/gcc/testsuite/gcc.target/arm/mincmp.c
deleted file mode 100644
index ade3bd9..000
--- a/gcc/testsuite/gcc.target/arm/mincmp.c
+++ /dev/null
@@ -1,20 +0,0 @@
-/* { dg-do compile } */
-/* { dg-options "-O2" } */
-/* { dg-require-effective-target arm32 } */
-
-#define min(x, y) ((x) <= (y)) ? (x) : (y)
-
-unsigned int 
-foo (unsigned int i, unsigned int x, unsigned int y)
-{
-  return i < (min (x, y));
-}
-
-int 
-bar (int i, int x, int y)
-{
-  return i < (min (x, y));
-}
-
-/* { dg-final { scan-assembler "cmpcs" } } */
-/* { dg-final { scan-assembler "cmpge" } } */
-- 
1.9.1



Re: [ping] Fix PR debug/66728

2015-11-05 Thread Mike Stump
On Nov 5, 2015, at 4:32 AM, Richard Biener  wrote:
> No idea on location lists but maybe this means we should just use the
> maximum supported integer mode for CONST_WIDE_INTs?

Ah, yeah, that sounds like a fine idea.  Below is that version.  I snuck in one 
more change, as it was annoying me, and it is a regression from gcc-4.8.  It 
has this effect:

@@ -55,7 +55,7 @@ test:
.long   0x72# DW_AT_type
.byte   0x10
.quad   0   # DW_AT_const_value
-   .quad   0x8000  # (null)
+   .quad   0x8000  # 
.byte   0   # end of children of DIE 0x2d
.uleb128 0x4# (DIE (0x6b) DW_TAG_base_type)
.byte   0x10# DW_AT_byte_size

This version has the added benefit of reducing all wide_ints to be so 
shortened.  We do this by changing get_full_len, which changes the world.

If there are no substantial reasons to not check it in now, I’d like to proceed 
and get it checked in.  People can refine it further in tree if they want.  Any 
objections?


Index: dwarf2out.c
===
--- dwarf2out.c (revision 229720)
+++ dwarf2out.c (working copy)
@@ -368,12 +368,14 @@
 #endif
 
 /* Get the number of HOST_WIDE_INTs needed to represent the precision
-   of the number.  */
+   of the number.  Some constants have a large uniform precision, so
+   we get the precision needed for the actual value of the number.  */
 
 static unsigned int
 get_full_len (const wide_int )
 {
-  return ((op.get_precision () + HOST_BITS_PER_WIDE_INT - 1)
+  int prec = wi::min_precision (op, UNSIGNED);
+  return ((prec + HOST_BITS_PER_WIDE_INT - 1)
  / HOST_BITS_PER_WIDE_INT);
 }
 
@@ -9010,7 +9012,7 @@
{
  dw2_asm_output_data (l, a->dw_attr_val.v.val_wide->elt (i),
   "%s", name);
- name = NULL;
+ name = "";
}
else
  for (i = 0; i < len; ++i)
@@ -9017,7 +9019,7 @@
{
  dw2_asm_output_data (l, a->dw_attr_val.v.val_wide->elt (i),
   "%s", name);
- name = NULL;
+ name = "";
}
  }
  break;
@@ -15593,8 +15595,13 @@
   return true;
 
 case CONST_WIDE_INT:
-  add_AT_wide (die, DW_AT_const_value,
-  std::make_pair (rtl, GET_MODE (rtl)));
+  {
+   wide_int w1 = std::make_pair (rtl, MAX_MODE_INT);
+   unsigned int prec = MIN (wi::min_precision (w1, UNSIGNED),
+(unsigned int)CONST_WIDE_INT_NUNITS (rtl) * 
HOST_BITS_PER_WIDE_INT);
+   wide_int w = wi::zext (w1, prec);
+   add_AT_wide (die, DW_AT_const_value, w);
+  }
   return true;
 
 case CONST_DOUBLE:
Index: rtl.h
===
--- rtl.h   (revision 229720)
+++ rtl.h   (working copy)
@@ -2086,6 +2086,7 @@
 inline unsigned int
 wi::int_traits ::get_precision (const rtx_mode_t )
 {
+  gcc_checking_assert (x.second != BLKmode && x.second != VOIDmode);
   return GET_MODE_PRECISION (x.second);
 }
 


[PATCH] Merge from gomp-4_5-branch to trunk

2015-11-05 Thread David Edelsohn
This patch broke bootstrap on AIX.

/nasfarm/edelsohn/src/src/libstdc++-v3/src/c++98/ios_init.cc: In
static member function 'static  bool
std::ios_base::sync_with_stdio(bool)':
/nasfarm/edelsohn/src/src/libstdc++-v3/src/c++98/ios_init.cc:181:59:
internal compiler error: in tree_to_shwi, at tree.c:7297
  new (_cout) stdio_filebuf(stdout, ios_base::out);

   ^

- David


Fix ipa-polymorphic-call-info ICE

2015-11-05 Thread Jan Hubicka
Hi,
this patch fixes tripple thinko when in
ipa_polymorphic_call_context::restrict_to_inner_type when dealing with an
offset that is out of the range of the type considered.  In this case function
should return true only when type is dynamic (so there may be additional type
after the known type) or derivations are allowed (so the type may get bigger).
There is check that is supposed to make this happen, but it clears the flags
before checking htem that is not a good idea.

There is also check in contains_type_p that is supposed to shortcut this 
scenario,
but instead of checking TYPE_SIZE it checks type itself (ouch).

Bootstrapped/regtested x86_64-linux, will commit it shortly.
Honza
PR ipa/68057
PR ipa/68220
* ipa-polymorphic-call.c
* (ipa_polymorphic_call_context::restrict_to_inner_type): Fix ordering
issue when offset is out of range.
(contains_type_p): Fix out of range check, clear dynamic flag.
* g++.dg/lto/pr68057_0.C: New testcase.
* g++.dg/lto/pr68057_1.C: New testcase.
* g++.dg/torture/pr68220.C: New testcase.
Index: ipa-polymorphic-call.c
===
--- ipa-polymorphic-call.c  (revision 229820)
+++ ipa-polymorphic-call.c  (working copy)
@@ -154,6 +154,8 @@ ipa_polymorphic_call_context::restrict_t
   && tree_to_shwi (TYPE_SIZE (outer_type)) >= 0
   && tree_to_shwi (TYPE_SIZE (outer_type)) <= offset)
{
+ bool der = maybe_derived_type; /* clear_outer_type will reset it.  */
+ bool dyn = dynamic;
  clear_outer_type (otr_type);
  type = otr_type;
  cur_offset = 0;
@@ -162,7 +164,7 @@ ipa_polymorphic_call_context::restrict_t
For dynamic types, we really do not have information about
size of the memory location.  It is possible that completely
different type is stored after outer_type.  */
- if (!maybe_derived_type && !dynamic)
+ if (!der && !dyn)
{
 clear_speculation ();
 invalid = true;
@@ -425,8 +427,10 @@ no_useful_type_info:
return true;
}
  else
-   clear_speculation ();
- return true;
+   {
+ clear_speculation ();
+ return true;
+   }
}
  else
{
@@ -459,15 +463,18 @@ contains_type_p (tree outer_type, HOST_W
   if (offset < 0)
 return false;
   if (TYPE_SIZE (outer_type) && TYPE_SIZE (otr_type)
-  && TREE_CODE (outer_type) == INTEGER_CST
-  && TREE_CODE (otr_type) == INTEGER_CST
-  && wi::ltu_p (wi::to_offset (outer_type), (wi::to_offset (otr_type) + 
offset)))
+  && TREE_CODE (TYPE_SIZE (outer_type)) == INTEGER_CST
+  && TREE_CODE (TYPE_SIZE (otr_type)) == INTEGER_CST
+  && wi::ltu_p (wi::to_offset (TYPE_SIZE (outer_type)),
+   (wi::to_offset (TYPE_SIZE (otr_type)) + offset)))
 return false;
 
   context.offset = offset;
   context.outer_type = TYPE_MAIN_VARIANT (outer_type);
   context.maybe_derived_type = false;
-  return context.restrict_to_inner_class (otr_type, consider_placement_new, 
consider_bases);
+  context.dynamic = false;
+  return context.restrict_to_inner_class (otr_type, consider_placement_new,
+ consider_bases);
 }
 
 
Index: testsuite/g++.dg/lto/pr68057_0.C
===
--- testsuite/g++.dg/lto/pr68057_0.C(revision 0)
+++ testsuite/g++.dg/lto/pr68057_0.C(revision 0)
@@ -0,0 +1,23 @@
+// { dg-lto-do compile }
+/* { dg-extra-ld-options { -O2 -Wno-odr -r -nostdlib } } */
+struct SPxPricer;
+struct SoPlex {
+  virtual void setPricer(SPxPricer *);
+};
+struct SPxPricer {
+  virtual void load(SoPlex *);
+};
+struct SLUFactor {
+  SLUFactor();
+  virtual ~SLUFactor();
+};
+struct SPxSolver : SoPlex {
+  SPxPricer pr;
+  SLUFactor slu;
+  SPxSolver();
+};
+struct A : SPxSolver {};
+A a;
+
+void SoPlex::setPricer(SPxPricer *p1) { p1->load(this); }
+
Index: testsuite/g++.dg/lto/pr68057_1.C
===
--- testsuite/g++.dg/lto/pr68057_1.C(revision 0)
+++ testsuite/g++.dg/lto/pr68057_1.C(revision 0)
@@ -0,0 +1,17 @@
+struct SPxPricer;
+struct SoPlex {
+  virtual void setPricer(SPxPricer *);
+};
+struct SPxPricer {
+  virtual void load(SoPlex *);
+};
+struct SLUFactor {
+  SLUFactor();
+  virtual ~SLUFactor();
+};
+struct SPxSolver : SoPlex {
+  SPxPricer pr;
+  SLUFactor slu;
+  SPxSolver();
+};
+SPxSolver::SPxSolver() { setPricer(); }
Index: testsuite/g++.dg/torture/pr68220.C
===
--- testsuite/g++.dg/torture/pr68220.C  (revision 0)
+++ testsuite/g++.dg/torture/pr68220.C  (revision 0)
@@ -0,0 +1,238 @@
+// { dg-do compile }
+namespace mpl {
+template  struct lambda;
+template  struct if_ { typedef T3 

  1   2   >