[PATCH v4 2/3] Add predict_doloop_p target hook

2019-06-12 Thread Kewen.Lin
Hi,

Many thanks for all comments on previous versions.

Comparing with the previous version, I dropped the checks
on costly niter and invalid stmt scanning.  As Richard's
suggestion, keep this as simple as possible, refine it 
if finding any cases which matter later.

Bootstrapped and regression testing passed on powerpc64le.

Is it OK for trunk?  

Thanks,
Kewen


gcc/ChangeLog

2019-06-13  Kewen Lin  

PR middle-end/80791
* target.def (predict_doloop_p): New hook.
* targhooks.h (default_predict_doloop_p): New declaration.
* targhooks.c (default_predict_doloop_p): New function.
* doc/tm.texi.in (TARGET_PREDICT_DOLOOP_P): New hook.
* doc/tm.texi: Regenerate.
* config/rs6000/rs6000.c (rs6000_predict_doloop_p): New function.
(TARGET_PREDICT_DOLOOP_P): New macro.
* tree-ssa-loop-ivopts.c (generic_predict_doloop_p): New function.

diff --git a/gcc/config/rs6000/rs6000.c b/gcc/config/rs6000/rs6000.c
index 91fafc4e766..54bcc1608ae 100644
--- a/gcc/config/rs6000/rs6000.c
+++ b/gcc/config/rs6000/rs6000.c
@@ -1909,6 +1909,9 @@ static const struct attribute_spec 
rs6000_attribute_table[] =
 #undef TARGET_CAN_USE_DOLOOP_P
 #define TARGET_CAN_USE_DOLOOP_P can_use_doloop_if_innermost
 
+#undef TARGET_PREDICT_DOLOOP_P
+#define TARGET_PREDICT_DOLOOP_P rs6000_predict_doloop_p
+
 #undef TARGET_ATOMIC_ASSIGN_EXPAND_FENV
 #define TARGET_ATOMIC_ASSIGN_EXPAND_FENV rs6000_atomic_assign_expand_fenv
 
@@ -39413,7 +39416,27 @@ rs6000_mangle_decl_assembler_name (tree decl, tree id)
   return id;
 }
 
-
+/* Predict whether the given loop in gimple will be transformed in the RTL
+   doloop_optimize pass.  This is for rs6000 target specific.  */
+
+static bool
+rs6000_predict_doloop_p (struct loop *loop)
+{
+  gcc_assert (loop);
+
+  /* On rs6000, targetm.can_use_doloop_p is actually
+ can_use_doloop_if_innermost.  Just ensure it's innermost.  */
+  if (loop->inner != NULL)
+{
+  if (dump_file && (dump_flags & TDF_DETAILS))
+   fprintf (dump_file, "Predict doloop failure due to"
+   " no innermost.\n");
+  return false;
+}
+
+  return true;
+}
+
 struct gcc_target targetm = TARGET_INITIALIZER;
 
 #include "gt-rs6000.h"
diff --git a/gcc/doc/tm.texi b/gcc/doc/tm.texi
index 622e8cf240f..69081ca0700 100644
--- a/gcc/doc/tm.texi
+++ b/gcc/doc/tm.texi
@@ -11610,6 +11610,14 @@ function version at run-time for a given set of 
function versions.
 body must be generated.
 @end deftypefn
 
+@deftypefn {Target Hook} bool TARGET_PREDICT_DOLOOP_P (struct loop *@var{loop})
+Return true if we can predict it is possible to use a low-overhead loop
+for a particular loop.  The parameter @var{loop} is a pointer to the loop.
+This target hook is required only when the target supports low-overhead
+loops, and will help some earlier middle-end passes to make some decisions.
+The default version of this hook returns false.
+@end deftypefn
+
 @deftypefn {Target Hook} bool TARGET_CAN_USE_DOLOOP_P (const widest_int 
@var{&iterations}, const widest_int @var{&iterations_max}, unsigned int 
@var{loop_depth}, bool @var{entered_at_top})
 Return true if it is possible to use low-overhead loops (@code{doloop_end}
 and @code{doloop_begin}) for a particular loop.  @var{iterations} gives the
diff --git a/gcc/doc/tm.texi.in b/gcc/doc/tm.texi.in
index 17560fce6b7..b4d57b86e2f 100644
--- a/gcc/doc/tm.texi.in
+++ b/gcc/doc/tm.texi.in
@@ -7944,6 +7944,8 @@ to by @var{ce_info}.
 
 @hook TARGET_GENERATE_VERSION_DISPATCHER_BODY
 
+@hook TARGET_PREDICT_DOLOOP_P
+
 @hook TARGET_CAN_USE_DOLOOP_P
 
 @hook TARGET_INVALID_WITHIN_DOLOOP
diff --git a/gcc/target.def b/gcc/target.def
index 7d52102c815..5063c3c0b94 100644
--- a/gcc/target.def
+++ b/gcc/target.def
@@ -4236,6 +4236,15 @@ DEFHOOK
 rtx, (machine_mode mode, rtx result, rtx val, rtx failval),
  default_speculation_safe_value)
  
+DEFHOOK
+(predict_doloop_p,
+ "Return true if we can predict it is possible to use a low-overhead loop\n\
+for a particular loop.  The parameter @var{loop} is a pointer to the loop.\n\
+This target hook is required only when the target supports low-overhead\n\
+loops, and will help some earlier middle-end passes to make some decisions.\n\
+The default version of this hook returns false.",
+ bool, (struct loop *loop),
+ default_predict_doloop_p)
 
 DEFHOOK
 (can_use_doloop_p,
diff --git a/gcc/targhooks.c b/gcc/targhooks.c
index b27111639f4..a83166d81ba 100644
--- a/gcc/targhooks.c
+++ b/gcc/targhooks.c
@@ -643,6 +643,19 @@ default_has_ifunc_p (void)
   return HAVE_GNU_INDIRECT_FUNCTION;
 }
 
+/* True if we can predict this loop is possible to be transformed to a
+   low-overhead loop, otherwise returns false.
+
+   By default, false is returned, as this hook's applicability should be
+   verified for each target.  Target maintainers should re-define the hook
+   if the target can take advantage of it.  */
+
+bool
+default_predict_doloop_p (struct loop *loop ATTRIBUTE_UNUSED)
+{
+  ret

Re: [PATCH] PR fortran/68544 -- A derived type cannot be actual argument

2019-06-12 Thread Paul Richard Thomas
Hi Steve,

That's good to go - thanks

Paul

On Wed, 12 Jun 2019 at 23:44, Steve Kargl
 wrote:
>
> The attach patch has been sitting in my tree for a year.
> It has been tested and updated as others have changed
> the gfortran code.  The patch has been compiled and
> regression tested on x86_64-*-freebsd.  OK to commit?
>
> Either testcase should provide sufficient information
> about the problem that this patch fixes.
>
> 2019-06-12  Steven G. Kargl  
>
> PR fortran/68544
> * resolve.c (is_dt_name): New function to compare symbol name against
> list of derived types.
> (resolve_actual_arglist): Use it to find wrong code.
>
> 2019-06-12  Steven G. Kargl  
>
> PR fortran/68544
> * gfortran.dg/pr68544.f90: New test.
> * gfortran.dg/pr85687.f90: Modify test for new error message.
>
>
> --
> Steve



-- 
"If you can't explain it simply, you don't understand it well enough"
- Albert Einstein


Re: [PATCH] PR fortran/88810 -- Code clean up

2019-06-12 Thread Paul Richard Thomas
Hi Steve,

As the original author of the confusing code, I want to thank you for
de-confusing it!

OK

Paul

On Wed, 12 Jun 2019 at 20:01, Steve Kargl
 wrote:
>
> This PR flags a section of confusing but correct code.
> The attach patch rewrites the code to hopefully provide
> some clarity.  It has lived my tree for around 6 months,
> so has sufferred through numerous regression tests.
> OK to commit?
>
> 2019-06-12  Steven G. Kargl  
>
> PR fortran/88810
> * dependency.c (gfc_dep_resolver): Re-arrange code to make the logic
> a bit more transparent.  Fix 2 nearby formatting issues.
>
> --
> Steve



-- 
"If you can't explain it simply, you don't understand it well enough"
- Albert Einstein


Re: [PATCH] PR fortran/89344 -- INTENT(IN) CLASS(*) cannot be assigned to

2019-06-12 Thread Paul Richard Thomas
Hi Steve,

OK - thanks

Paul

On Wed, 12 Jun 2019 at 19:42, Steve Kargl
 wrote:
>
> The attach patch has lived in my tree for 4 months.
> It's time to submit it.  Passed regression testing
> for a long time.
>
> An INTENT(in) entity that has CLASS(*) dummy argument
> should not use SELECT TYPE to then try to assign to the
> entity.  OK to commit?
>
> 2019-06-12  Steven G. Kargl  
>
> PR fortran/89344
> * expr.c (gfc_check_vardef_context): Check for INTENT(IN) variable
> in SELECT TYPE construct.
>
> 2019-06-12  Steven G. Kargl  
>
> PR fortran/89344
> * gfortran.dg/pr89344.f90: New test.
>
> --
> Steve



-- 
"If you can't explain it simply, you don't understand it well enough"
- Albert Einstein


Re: [AARCH64] Fix typo in comment

2019-06-12 Thread Kugan Vivekanandarajah
Hi Kyrill,

Thanks for the comments. Committed as you suggested.

Thanks,
Kugan

On Wed, 12 Jun 2019 at 18:07, Kyrill Tkachov
 wrote:
>
> Hi Kugan,
>
> On 6/12/19 4:59 AM, Kugan Vivekanandarajah wrote:
> > AArch64 comment for ADDSUB iterator is a typo or copy-and-paste error.
> > Attached patch fixes this. I believe this falls under obvious
> > category. I will commit it after 48hrs unless comments should be
> > better worded.
> >
> > Thanks,
> > Kugan
> >
> >
> > gcc/ChangeLog:
> >
> > 2019-06-12  Kugan Vivekanandarajah 
> >
> > * config/aarch64/iterators.md (ADDSUB): Fix typo in comment.
>
> diff --git a/gcc/config/aarch64/iterators.md
> b/gcc/config/aarch64/iterators.md
> index 2179e6f..49c8146 100644
> --- a/gcc/config/aarch64/iterators.md
> +++ b/gcc/config/aarch64/iterators.md
> @@ -1215,7 +1215,7 @@
>   ;; Signed and unsigned max operations.
>   (define_code_iterator USMAX [smax umax])
>
> -;; Code iterator for variants of vector max and min.
>
> +;; Code iterator for variants of vector plus and minus.
>
>
> I'd remove the "variants" and have it "Code iterator for plus and minus"
>
> I do agree such a change is obvious.
>
> Thanks,
>
> Kyrill
>
>   (define_code_iterator ADDSUB [plus minus])
>
>   ;; Code iterator for variants of vector saturating binary ops.
>


C++ PATCH to add test for c++/87410

2019-06-12 Thread Marek Polacek
This one was fixed by r259717, but that commit didn't include any tests.  So
it's reasonable to add something that exercises that code path.

Tested on x86_64-linux, applying to trunk.

2019-06-12  Marek Polacek  

PR c++/87410
* g++.dg/cpp1y/pr87410.C: New test.

diff --git gcc/testsuite/g++.dg/cpp1y/pr87410.C 
gcc/testsuite/g++.dg/cpp1y/pr87410.C
new file mode 100644
index 000..5a691e40b0a
--- /dev/null
+++ gcc/testsuite/g++.dg/cpp1y/pr87410.C
@@ -0,0 +1,19 @@
+// PR c++/87410
+// { dg-do compile { target c++14 } }
+
+template  using b = const char[a];
+template 
+constexpr auto g(b &, b &, f) {}
+template  auto h(b &) {
+  auto i = j(static_cast(nullptr));
+  return **i;
+}
+class k {
+  using l = k;
+  const int &m() const;
+  friend constexpr auto j(l **n) -> decltype(n) {
+g("", "", static_cast(&k::m));
+return n;
+  }
+};
+k o = h("");


[PATCH] Add --disable-tm-clone-registry libgcc configure option.

2019-06-12 Thread ilia . diachkov

Hello,

This patch adds libgcc configuration option to disable TM clone 
registry. This option helps to reduce code size for embedded targets 
which do not need transactional memory support.


Tested on x86_64 and riscv64-unknown-elf-gcc, the last is with the 
option enabled.


If the change is Ok with you, please commit it since I have no write 
access to gcc repository.


Best regards,
Ilia.

libgcc/ChangeLog:

* Makefile.in: Add @use_tm_clone_registry@.
* configure: Regenerate.
* configure.ac: Add --disable-tm-clone-registry option.

gcc/ChangeLog:

* doc/install.texi: Document --disable-tm-clone-registry.

diff --git a/gcc/doc/install.texi b/gcc/doc/install.texi
index 29d0470..1a0e8c7 100644
--- a/gcc/doc/install.texi
+++ b/gcc/doc/install.texi
@@ -1284,6 +1284,11 @@ assumptions made by the configure test are incorrect.
 Specify that the target does not support TLS.
 This is an alias for @option{--enable-tls=no}.
 
+@item --disable-tm-clone-registry
+Disable TM clone registry in libgcc. It is enabled in libgcc by default.
+This option helps to reduce code size for embedded targets which do
+not use transactional memory.
+
 @item --with-cpu=@var{cpu}
 @itemx --with-cpu-32=@var{cpu}
 @itemx --with-cpu-64=@var{cpu}
diff --git a/libgcc/Makefile.in b/libgcc/Makefile.in
index fb77881..189f9ff 100644
--- a/libgcc/Makefile.in
+++ b/libgcc/Makefile.in
@@ -259,6 +259,8 @@ PICFLAG = @PICFLAG@
 
 CET_FLAGS = @CET_FLAGS@
 
+USE_TM_CLONE_REGISTRY = @use_tm_clone_registry@
+
 # Defined in libgcc2.c, included only in the static library.
 LIB2FUNCS_ST = _eprintf __gcc_bcmp
 
@@ -299,7 +301,7 @@ CRTSTUFF_CFLAGS = -O2 $(GCC_CFLAGS) $(INCLUDES) $(MULTILIB_CFLAGS) -g0 \
   $(NO_PIE_CFLAGS) -finhibit-size-directive -fno-inline -fno-exceptions \
   -fno-zero-initialized-in-bss -fno-toplevel-reorder -fno-tree-vectorize \
   -fbuilding-libgcc -fno-stack-protector $(FORCE_EXPLICIT_EH_REGISTRY) \
-  $(INHIBIT_LIBC_CFLAGS)
+  $(INHIBIT_LIBC_CFLAGS) $(USE_TM_CLONE_REGISTRY)
 
 # Extra flags to use when compiling crt{begin,end}.o.
 CRTSTUFF_T_CFLAGS =
diff --git a/libgcc/configure.ac b/libgcc/configure.ac
index 5f11455..b1b90d2 100644
--- a/libgcc/configure.ac
+++ b/libgcc/configure.ac
@@ -261,6 +261,16 @@ fi
 ])
 AC_SUBST([force_explicit_eh_registry])
 
+AC_ARG_ENABLE([tm-clone-registry],
+[  --disable-tm-clone-registrydisable TM clone registry],
+[
+use_tm_clone_registry=
+if test "$enable_tm_clone_registry" = no; then
+  use_tm_clone_registry=-DUSE_TM_CLONE_REGISTRY=0
+fi
+])
+AC_SUBST([use_tm_clone_registry])
+
 AC_LIB_PROG_LD_GNU
 
 AC_MSG_CHECKING([for thread model used by GCC])


[PATCH] PR fortran/68544 -- A derived type cannot be actual argument

2019-06-12 Thread Steve Kargl
The attach patch has been sitting in my tree for a year.
It has been tested and updated as others have changed 
the gfortran code.  The patch has been compiled and
regression tested on x86_64-*-freebsd.  OK to commit?

Either testcase should provide sufficient information
about the problem that this patch fixes.

2019-06-12  Steven G. Kargl  

PR fortran/68544
* resolve.c (is_dt_name): New function to compare symbol name against
list of derived types.
(resolve_actual_arglist): Use it to find wrong code.

2019-06-12  Steven G. Kargl  

PR fortran/68544
* gfortran.dg/pr68544.f90: New test.
* gfortran.dg/pr85687.f90: Modify test for new error message.


-- 
Steve
Index: gcc/fortran/resolve.c
===
--- gcc/fortran/resolve.c	(revision 272219)
+++ gcc/fortran/resolve.c	(working copy)
@@ -1862,6 +1862,25 @@ resolve_procedure_expression (gfc_expr* expr)
 }
 
 
+/* Check that name is not a derived type.  */
+ 
+static bool
+is_dt_name (const char *name)
+{
+  gfc_symbol *dt_list, *dt_first;
+
+  dt_list = dt_first = gfc_derived_types;
+  for (; dt_list; dt_list = dt_list->dt_next)
+{
+  if (strcmp(dt_list->name, name) == 0)
+	return true;
+  if (dt_first == dt_list->dt_next)
+	break;
+}
+  return false;
+}
+
+
 /* Resolve an actual argument list.  Most of the time, this is just
resolving the expressions in the list.
The exception is that we sometimes have to decide whether arguments
@@ -1923,6 +1942,13 @@ resolve_actual_arglist (gfc_actual_arglist *arg, proce
 
   sym = e->symtree->n.sym;
 
+  if (sym->attr.flavor == FL_PROCEDURE && is_dt_name (sym->name))
+	{
+	  gfc_error ("Derived type %qs is used as an actual "
+		 "argument at %L", sym->name, &e->where);
+	  goto cleanup;
+	}
+
   if (sym->attr.flavor == FL_PROCEDURE
 	  || sym->attr.intrinsic
 	  || sym->attr.external)
Index: gcc/testsuite/gfortran.dg/pr68544.f90
===
--- gcc/testsuite/gfortran.dg/pr68544.f90	(nonexistent)
+++ gcc/testsuite/gfortran.dg/pr68544.f90	(working copy)
@@ -0,0 +1,13 @@
+! { dg-do compile }
+! PF fortran/68544
+program p
+   real x
+   type t
+   end type
+   x = f(t) ! { dg-error "used as an actual argument" }
+end
+subroutine b
+   type t
+   end type
+   print *, shape(t)! { dg-error "used as an actual argument" }
+end
Index: gcc/testsuite/gfortran.dg/pr85687.f90
===
--- gcc/testsuite/gfortran.dg/pr85687.f90	(revision 272219)
+++ gcc/testsuite/gfortran.dg/pr85687.f90	(working copy)
@@ -4,5 +4,5 @@
 program p
type t
end type
-   print *, rank(t)  ! { dg-error "must be a data object" }
+   print *, rank(t)  ! { dg-error "used as an actual argument" }
 end


[COMMITTED] Remove leftover prototype

2019-06-12 Thread Steve Kargl
Revision 262909 remvoed the gfc_free_dt_list function, but
failed to remove the prototype in gfortran.h. I have committed
the following patch.

Index: gcc/fortran/ChangeLog
===
--- gcc/fortran/ChangeLog   (revision 272219)
+++ gcc/fortran/ChangeLog   (working copy)
@@ -1,5 +1,9 @@
 2019-06-12  Steven G. Kargl  
 
+   * gfortran.h (gfc_free_dt_list): Remove prototype.
+
+2019-06-12  Steven G. Kargl  
+
PR fortran/90002
* array.c (gfc_free_array_spec): When freeing an array-spec, avoid
an ICE for assumed-shape coarrays 
Index: gcc/fortran/gfortran.h
===
--- gcc/fortran/gfortran.h  (revision 272219)
+++ gcc/fortran/gfortran.h  (working copy)
@@ -3117,8 +3117,6 @@ void gfc_traverse_user_op (gfc_namespace *, void (*)(g
 void gfc_save_all (gfc_namespace *);
 
 void gfc_enforce_clean_symbol_state (void);
-void gfc_free_dt_list (void);
-
 
 gfc_gsymbol *gfc_get_gsymbol (const char *, bool bind_c);
 gfc_gsymbol *gfc_find_gsymbol (gfc_gsymbol *, const char *);


-- 
Steve


Re: [C++ Patch] Further grokdeclarator locations work

2019-06-12 Thread Jason Merrill

On 6/12/19 5:36 PM, Paolo Carlini wrote:

Hi,

On 12/06/19 22:24, Jason Merrill wrote:

On 6/6/19 9:00 AM, Paolo Carlini wrote:

Hi,

only minor functional changes here - more precise locations for two 
error messages  - but I think it's a step in the right direction: it 
moves the declaration of id_loc way up, near typespec_loc and as such 
id_loc is immediately used. Then its value is updated upon the loop 
over declarator in the middle of the function and used again in the 
final part of the function. That also "frees" the simple name loc for 
other local uses, allows to simplify those checks changed rather 
recently over (name && declarator). and (unqualified_id && 
declarator). Tested x86_64-linux.


Mostly OK, except:


   if (declspecs->multiple_types_p)
 {
-  error ("two or more data types in declaration of %qs", name);
+  error_at (id_loc, "two or more data types in declaration of 
%qs", name);

   return error_mark_node;
 }


declspecs->locations[ds_type_spec]?


   if (declspecs->conflicting_specifiers_p)
 {
-  error ("conflicting specifiers in declaration of %qs", name);
+  error_at (id_loc, "conflicting specifiers in declaration of 
%qs", name);

   return error_mark_node;
 }


ds_storage_class/ds_typedef?


Ok.

Ideally, it would be nice to have available locations for the two 
leftmost data types and, for the second error, the locations of 
conflicting storage classes (eg, static and register). Other compilers 
then point to the second one, where the issue shows up. I think we 
already briefly discussed that kind of logic in the past, but in those 
cases we had available all the locations we needed and we ended up using 
gcc_rich_location and add_range, etc..


Anyway, we can certainly use typespec_loc and the minimum of 
ds_storage_class and ds_typedef, respectively, like in the below, which 
passes testing.


Thanks! Paolo.


OK.

Jason



Re: [PATCH] include MEM_REF type in tree dumps (PR 90676)

2019-06-12 Thread Martin Sebor

On 6/12/19 3:33 PM, Rainer Orth wrote:

Richard Biener  writes:


On Mon, Jun 10, 2019 at 10:37 PM Martin Sebor  wrote:


On 6/10/19 1:29 PM, Jakub Jelinek wrote:

On Mon, Jun 10, 2019 at 01:23:28PM -0600, Martin Sebor wrote:

+  else if (integer_zerop (TREE_OPERAND (node, 1))
+   /* Dump the types of INTEGER_CSTs explicitly, for we can't
+  infer them and MEM_ATTR caching will share MEM_REFs
+  with differently-typed op0s.  */
+   && TREE_CODE (TREE_OPERAND (node, 0)) != INTEGER_CST
+   /* Released SSA_NAMES have no TREE_TYPE.  */
+   && TREE_TYPE (TREE_OPERAND (node, 0)) != NULL_TREE
+   /* Same pointer types, but ignoring POINTER_TYPE vs.
+  REFERENCE_TYPE.  */
+   && (TREE_TYPE (TREE_TYPE (TREE_OPERAND (node, 0)))
+   == TREE_TYPE (TREE_TYPE (TREE_OPERAND (node, 1
+   && (TYPE_MODE (TREE_TYPE (TREE_OPERAND (node, 0)))
+   == TYPE_MODE (TREE_TYPE (TREE_OPERAND (node, 1
+   && (TYPE_REF_CAN_ALIAS_ALL (TREE_TYPE (TREE_OPERAND (node, 0)))
+   == TYPE_REF_CAN_ALIAS_ALL (TREE_TYPE (TREE_OPERAND (node, 1
+   /* Same value types ignoring qualifiers.  */
+   && (TYPE_MAIN_VARIANT (TREE_TYPE (node))
+   == TYPE_MAIN_VARIANT
+   (TREE_TYPE (TREE_TYPE (TREE_OPERAND (node, 1)


You should be using types_compatible_p rather than type equality, that is
all GIMPLE cares about.


The code above predates my change, I just factored it out into
a separate function; it does make the diff bigger.  The code
I introduced only adds the <...> if the size of the access
differs from the size of the operand. I used type sizes rather
than testing their compatibility to minimize the number of tests
that might need updating.

This is the salient bit:

+  pp_string (pp, "MEM");
+
+  tree nodetype = TREE_TYPE (node);
+  tree op0 = TREE_OPERAND (node, 0);
+  tree op1 = TREE_OPERAND (node, 1);
+  tree op1type = TYPE_MAIN_VARIANT (TREE_TYPE (op1));
+
+  if (!tree_int_cst_equal (TYPE_SIZE (nodetype),
+  TYPE_SIZE (TREE_TYPE (op1type
+   {
+ pp_string (pp, " <");
+ /* If the size of the type of the operand is not the same
+as the size of the MEM_REF expression include the type
+of the latter similar to the TDF_GIMPLE output to make
+it clear how many bytes of memory are being accessed.  */
+ dump_generic_node (pp, nodetype, spc, flags | TDF_SLIM, false);
+ pp_string (pp, "> ");
+   }


You need to guard against non-constant TYPE_SIZE here (for both
involved types) so I suggest you use operand_equal_p (..., 0).  If you
do that you need to guard against NULL_TREE TYPE_SIZE
(tree_int_cst_equal handled that as unequal).

OK with such change.
Richard.


The testsuite part cannot have been tested very thoroughly: for one,
this snippet


Please try r272218.

Martin



diff --git a/gcc/testsuite/gcc.dg/tree-ssa/ssa-dse-24.c 
b/gcc/testsuite/gcc.dg/tree-ssa/ssa-dse-24.c
--- a/gcc/testsuite/gcc.dg/tree-ssa/ssa-dse-24.c
+++ b/gcc/testsuite/gcc.dg/tree-ssa/ssa-dse-24.c
@@ -59,4 +59,5 @@ void foo(int prec,
  bar (&info);
  }
  
-/* { dg-final { scan-tree-dump-times "MEM\\\[\\(struct printf_info \\*\\)&info \\+ \[0-9\]+B\\\] = {}" 1 "dse1" } } */

+/* { dg-final { scan-tree-dump-times "MEM\\\[\\(struct printf_info \\*\\)&info \\+ 
\[0-9\]+B\\\] = {}" 1 "dse1" { target { ! store_merge } } } }
+   { dg-final { scan-tree-dump-times "MEM  \\\[\\(struct printf_info \\*\\)&info \\+ 
\[0-9\]+B\\\] = {}" 1 "dse1" { target { store_merge } } } } */


breaks gcc.sum creation with dg-extract-result.py (the default)
completely: gcc.log shows

spawn -ignore SIGHUP /var/gcc/regression/trunk/11-gcc/build/gcc/xgcc 
-B/var/gcc/regression/trunk/11-gcc/build/gcc/ 
/vol/gcc/src/hg/trunk/local/gcc/testsuite/gcc.dg/tree-ssa/ssa-dse-24.c 
-fno-diagnostics-show-caret -fno-diagnostics-show-line-numbers 
-fdiagnostics-color=never -O2 -fdump-tree-dse1 -S -o ssa-dse-24.s^M
PASS: gcc.dg/tree-ssa/ssa-dse-24.c (test for excess errors)
ERROR: (DejaGnu) proc "4" does not exist.
The error code is NONE
The info on the error is:
invalid command name "4"
 while executing
"::tcl_unknown 4"
 ("uplevel" body line 1)
 invoked from within
"uplevel 1 ::tcl_unknown $args"

due to the unquoted [4].

Even with that fixed, I see many failures:

+FAIL: g++.dg/tree-ssa/pr31146.C  -std=gnu++14  scan-tree-dump forwprop1 
"MEM[.*&i][j.*] =.* 1;"
+FAIL: g++.dg/tree-ssa/pr31146.C  -std=gnu++17  scan-tree-dump forwprop1 
"MEM[.*&i][j.*] =.* 1;"
+FAIL: g++.dg/tree-ssa/pr31146.C  -std=gnu++98  scan-tree-dump forwprop1 
"MEM[.*&i][j.*] =.* 1;"
+FAIL: g++.dg/tree-ssa/ssa-dse-1.C   scan-tree-dump-times dse1 "MEM  
[(struct FixBuf *)& + [0-9]+B] = {}" 1

on 32 and 64-bit i386-pc-solaris2.11 (and i686-pc-linux-gnu),

+FAIL: g++.dg/tree-ssa/pr19807.C  -std=gnu++14  scan-tree-dump-ti

Re: [C++ Patch] Further grokdeclarator locations work

2019-06-12 Thread Paolo Carlini

Hi,

On 12/06/19 22:24, Jason Merrill wrote:

On 6/6/19 9:00 AM, Paolo Carlini wrote:

Hi,

only minor functional changes here - more precise locations for two 
error messages  - but I think it's a step in the right direction: it 
moves the declaration of id_loc way up, near typespec_loc and as such 
id_loc is immediately used. Then its value is updated upon the loop 
over declarator in the middle of the function and used again in the 
final part of the function. That also "frees" the simple name loc for 
other local uses, allows to simplify those checks changed rather 
recently over (name && declarator). and (unqualified_id && 
declarator). Tested x86_64-linux.


Mostly OK, except:


   if (declspecs->multiple_types_p)
 {
-  error ("two or more data types in declaration of %qs", name);
+  error_at (id_loc, "two or more data types in declaration of 
%qs", name);

   return error_mark_node;
 }


declspecs->locations[ds_type_spec]?


   if (declspecs->conflicting_specifiers_p)
 {
-  error ("conflicting specifiers in declaration of %qs", name);
+  error_at (id_loc, "conflicting specifiers in declaration of 
%qs", name);

   return error_mark_node;
 }


ds_storage_class/ds_typedef?


Ok.

Ideally, it would be nice to have available locations for the two 
leftmost data types and, for the second error, the locations of 
conflicting storage classes (eg, static and register). Other compilers 
then point to the second one, where the issue shows up. I think we 
already briefly discussed that kind of logic in the past, but in those 
cases we had available all the locations we needed and we ended up using 
gcc_rich_location and add_range, etc..


Anyway, we can certainly use typespec_loc and the minimum of 
ds_storage_class and ds_typedef, respectively, like in the below, which 
passes testing.


Thanks! Paolo.

///


Index: cp/decl.c
===
--- cp/decl.c   (revision 272213)
+++ cp/decl.c   (working copy)
@@ -10456,6 +10456,8 @@ grokdeclarator (const cp_declarator *declarator,
   if (typespec_loc == UNKNOWN_LOCATION)
 typespec_loc = input_location;
 
+  location_t id_loc = declarator ? declarator->id_loc : input_location;
+
   /* Look inside a declarator for the name being declared
  and get it as a string, for an error message.  */
   for (id_declarator = declarator;
@@ -10620,8 +10622,7 @@ grokdeclarator (const cp_declarator *declarator,
  D1 ( parameter-declaration-clause) ...  */
   if (funcdef_flag && innermost_code != cdk_function)
 {
-  error_at (declarator->id_loc,
-   "function definition does not declare parameters");
+  error_at (id_loc, "function definition does not declare parameters");
   return error_mark_node;
 }
 
@@ -10629,8 +10630,7 @@ grokdeclarator (const cp_declarator *declarator,
   && innermost_code != cdk_function
   && ! (ctype && !declspecs->any_specifiers_p))
 {
-  error_at (declarator->id_loc,
-   "declaration of %qD as non-function", dname);
+  error_at (id_loc, "declaration of %qD as non-function", dname);
   return error_mark_node;
 }
 
@@ -10639,8 +10639,7 @@ grokdeclarator (const cp_declarator *declarator,
   if (UDLIT_OPER_P (dname)
  && innermost_code != cdk_function)
{
- error_at (declarator->id_loc,
-   "declaration of %qD as non-function", dname);
+ error_at (id_loc, "declaration of %qD as non-function", dname);
  return error_mark_node;
}
 
@@ -10648,14 +10647,12 @@ grokdeclarator (const cp_declarator *declarator,
{
  if (typedef_p)
{
- error_at (declarator->id_loc,
-   "declaration of %qD as %", dname);
+ error_at (id_loc, "declaration of %qD as %", dname);
  return error_mark_node;
}
  else if (decl_context == PARM || decl_context == CATCHPARM)
{
- error_at (declarator->id_loc,
-   "declaration of %qD as parameter", dname);
+ error_at (id_loc, "declaration of %qD as parameter", dname);
  return error_mark_node;
}
}
@@ -10705,13 +10702,16 @@ grokdeclarator (const cp_declarator *declarator,
  issue an error message.  */
   if (declspecs->multiple_types_p)
 {
-  error ("two or more data types in declaration of %qs", name);
+  error_at (typespec_loc,
+   "two or more data types in declaration of %qs", name);
   return error_mark_node;
 }
 
   if (declspecs->conflicting_specifiers_p)
 {
-  error ("conflicting specifiers in declaration of %qs", name);
+  error_at (min_location (declspecs->locations[ds_typedef],
+ declspecs->locations[ds_storage_class]),
+   "conflicting specifiers in declaration of %qs", name);
   return error_mark_node;
 

Re: C++ PATCH for c++/90825 - endless recursion when evaluating sizeof

2019-06-12 Thread Jason Merrill

On 6/11/19 3:59 PM, Marek Polacek wrote:

On Tue, Jun 11, 2019 at 03:05:26PM -0400, Jason Merrill wrote:

On 6/11/19 2:28 PM, Marek Polacek wrote:

On Tue, Jun 11, 2019 at 08:37:27AM -0400, Jason Merrill wrote:

On 6/11/19 7:47 AM, Jakub Jelinek wrote:

On Mon, Jun 10, 2019 at 09:59:46PM -0400, Marek Polacek wrote:

This test segvs since r269078, this hunk in particular:

@@ -4581,8 +4713,9 @@ cxx_eval_constant_expression (const constexpr_ctx *ctx, 
tree t,
  break;

case SIZEOF_EXPR:
-  r = fold_sizeof_expr (t);
-  VERIFY_CONSTANT (r);
+  r = cxx_eval_constant_expression (ctx, fold_sizeof_expr (t), lval,
+   non_constant_p, overflow_p,
+   jump_target);
  break;

In a template, fold_sizeof_expr will just create a new SIZEOF_EXPR, that is the
same, but not identical; see cxx_sizeof_expr.  Then cxx_eval_constant_expression


Not always, if it calls cxx_sizeof_expr, it will, but if it calls
cxx_sizeof_or_alignof_type it will only if the type is dependent or VLA.

So, I'd think you should call cxx_eval_constant_expression if TREE_CODE (r)
!= SIZEOF_EXPR, otherwise probably *non_constant_p = true; is in order,
maybe together with gcc_assert (ctx->quiet); ?  I'd hope that if we really
require a constant expression we evaluate it in !processing_template_decl
contexts.


Ok, I had been meaning to add the *non_constant_p bit but never did.  :(


Makes sense.  Also, cxx_sizeof_expr should probably only return a
SIZEOF_EXPR if the operand is instantiation-dependent.


That results in

FAIL: g++.dg/template/incomplete6.C  -std=c++98 (internal compiler error)
FAIL: g++.dg/template/incomplete6.C  -std=c++98 (test for excess errors)
FAIL: g++.dg/template/overload13.C  -std=c++98 (internal compiler error)
FAIL: g++.dg/template/overload13.C  -std=c++98 (test for excess errors)

because we trigger an assert in value_dependent_expression_p:

  /* If there are no operands, it must be an expression such
 as "int()". This should not happen for aggregate types
 because it would form non-constant expressions.  */
  gcc_assert (cxx_dialect >= cxx11
  || INTEGRAL_OR_ENUMERATION_TYPE_P (type));

  return false;

and in this case we have T() where T is a class, and it's in C++98.

It's not needed to fix this PR so perhaps the following could go in,
but is there anything I should do about that?


instantiation_dependent_uneval_expression_p shouldn't have that problem.


Ah, nice.


diff --git gcc/cp/constexpr.c gcc/cp/constexpr.c
index a2f29694462..443e1c7899f 100644
--- gcc/cp/constexpr.c
+++ gcc/cp/constexpr.c
@@ -4808,9 +4808,16 @@ cxx_eval_constant_expression (const constexpr_ctx *ctx, 
tree t,
 break;
   case SIZEOF_EXPR:
-  r = cxx_eval_constant_expression (ctx, fold_sizeof_expr (t), lval,
-   non_constant_p, overflow_p,
-   jump_target);
+  r = fold_sizeof_expr (t);
+  /* In a template, fold_sizeof_expr may merely create a new SIZEOF_EXPR,
+which could lead to an infinite recursion.  */
+  if (TREE_CODE (r) != SIZEOF_EXPR)
+   r = cxx_eval_constant_expression (ctx, r, lval,
+ non_constant_p, overflow_p,
+ jump_target);
+  else
+   *non_constant_p = true;


Let's also add the assert Jakub suggested.


Done.

Luckily, this also fixed c++/90832 so I'm adding a test for that, too.

Bootstrapped/regtested on x86_64-linux, ok for trunk?


OK.

Jason


Re: [PATCH] include MEM_REF type in tree dumps (PR 90676)

2019-06-12 Thread Rainer Orth
Richard Biener  writes:

> On Mon, Jun 10, 2019 at 10:37 PM Martin Sebor  wrote:
>>
>> On 6/10/19 1:29 PM, Jakub Jelinek wrote:
>> > On Mon, Jun 10, 2019 at 01:23:28PM -0600, Martin Sebor wrote:
>> >> +  else if (integer_zerop (TREE_OPERAND (node, 1))
>> >> +   /* Dump the types of INTEGER_CSTs explicitly, for we can't
>> >> +  infer them and MEM_ATTR caching will share MEM_REFs
>> >> +  with differently-typed op0s.  */
>> >> +   && TREE_CODE (TREE_OPERAND (node, 0)) != INTEGER_CST
>> >> +   /* Released SSA_NAMES have no TREE_TYPE.  */
>> >> +   && TREE_TYPE (TREE_OPERAND (node, 0)) != NULL_TREE
>> >> +   /* Same pointer types, but ignoring POINTER_TYPE vs.
>> >> +  REFERENCE_TYPE.  */
>> >> +   && (TREE_TYPE (TREE_TYPE (TREE_OPERAND (node, 0)))
>> >> +   == TREE_TYPE (TREE_TYPE (TREE_OPERAND (node, 1
>> >> +   && (TYPE_MODE (TREE_TYPE (TREE_OPERAND (node, 0)))
>> >> +   == TYPE_MODE (TREE_TYPE (TREE_OPERAND (node, 1
>> >> +   && (TYPE_REF_CAN_ALIAS_ALL (TREE_TYPE (TREE_OPERAND (node, 0)))
>> >> +   == TYPE_REF_CAN_ALIAS_ALL (TREE_TYPE (TREE_OPERAND (node, 
>> >> 1
>> >> +   /* Same value types ignoring qualifiers.  */
>> >> +   && (TYPE_MAIN_VARIANT (TREE_TYPE (node))
>> >> +   == TYPE_MAIN_VARIANT
>> >> +   (TREE_TYPE (TREE_TYPE (TREE_OPERAND (node, 1)
>> >
>> > You should be using types_compatible_p rather than type equality, that is
>> > all GIMPLE cares about.
>>
>> The code above predates my change, I just factored it out into
>> a separate function; it does make the diff bigger.  The code
>> I introduced only adds the <...> if the size of the access
>> differs from the size of the operand. I used type sizes rather
>> than testing their compatibility to minimize the number of tests
>> that might need updating.
>>
>> This is the salient bit:
>>
>> +  pp_string (pp, "MEM");
>> +
>> +  tree nodetype = TREE_TYPE (node);
>> +  tree op0 = TREE_OPERAND (node, 0);
>> +  tree op1 = TREE_OPERAND (node, 1);
>> +  tree op1type = TYPE_MAIN_VARIANT (TREE_TYPE (op1));
>> +
>> +  if (!tree_int_cst_equal (TYPE_SIZE (nodetype),
>> +  TYPE_SIZE (TREE_TYPE (op1type
>> +   {
>> + pp_string (pp, " <");
>> + /* If the size of the type of the operand is not the same
>> +as the size of the MEM_REF expression include the type
>> +of the latter similar to the TDF_GIMPLE output to make
>> +it clear how many bytes of memory are being accessed.  */
>> + dump_generic_node (pp, nodetype, spc, flags | TDF_SLIM, false);
>> + pp_string (pp, "> ");
>> +   }
>
> You need to guard against non-constant TYPE_SIZE here (for both
> involved types) so I suggest you use operand_equal_p (..., 0).  If you
> do that you need to guard against NULL_TREE TYPE_SIZE
> (tree_int_cst_equal handled that as unequal).
>
> OK with such change.
> Richard.

The testsuite part cannot have been tested very thoroughly: for one,
this snippet

diff --git a/gcc/testsuite/gcc.dg/tree-ssa/ssa-dse-24.c 
b/gcc/testsuite/gcc.dg/tree-ssa/ssa-dse-24.c
--- a/gcc/testsuite/gcc.dg/tree-ssa/ssa-dse-24.c
+++ b/gcc/testsuite/gcc.dg/tree-ssa/ssa-dse-24.c
@@ -59,4 +59,5 @@ void foo(int prec,
 bar (&info);
 }
 
-/* { dg-final { scan-tree-dump-times "MEM\\\[\\(struct printf_info \\*\\)&info 
\\+ \[0-9\]+B\\\] = {}" 1 "dse1" } } */
+/* { dg-final { scan-tree-dump-times "MEM\\\[\\(struct printf_info \\*\\)&info 
\\+ \[0-9\]+B\\\] = {}" 1 "dse1" { target { ! store_merge } } } }
+   { dg-final { scan-tree-dump-times "MEM  \\\[\\(struct printf_info 
\\*\\)&info \\+ \[0-9\]+B\\\] = {}" 1 "dse1" { target { store_merge } } } } */


breaks gcc.sum creation with dg-extract-result.py (the default)
completely: gcc.log shows

spawn -ignore SIGHUP /var/gcc/regression/trunk/11-gcc/build/gcc/xgcc 
-B/var/gcc/regression/trunk/11-gcc/build/gcc/ 
/vol/gcc/src/hg/trunk/local/gcc/testsuite/gcc.dg/tree-ssa/ssa-dse-24.c 
-fno-diagnostics-show-caret -fno-diagnostics-show-line-numbers 
-fdiagnostics-color=never -O2 -fdump-tree-dse1 -S -o ssa-dse-24.s^M
PASS: gcc.dg/tree-ssa/ssa-dse-24.c (test for excess errors)
ERROR: (DejaGnu) proc "4" does not exist.
The error code is NONE
The info on the error is:
invalid command name "4"
while executing
"::tcl_unknown 4"
("uplevel" body line 1)
invoked from within
"uplevel 1 ::tcl_unknown $args"

due to the unquoted [4].

Even with that fixed, I see many failures:

+FAIL: g++.dg/tree-ssa/pr31146.C  -std=gnu++14  scan-tree-dump forwprop1 
"MEM[.*&i][j.*] =.* 1;"
+FAIL: g++.dg/tree-ssa/pr31146.C  -std=gnu++17  scan-tree-dump forwprop1 
"MEM[.*&i][j.*] =.* 1;"
+FAIL: g++.dg/tree-ssa/pr31146.C  -std=gnu++98  scan-tree-dump forwprop1 
"MEM[.*&i][j.*] =.* 1;"
+FAIL: g++.dg/tree-ssa/ssa-dse-1.C   scan-tree-dump-times dse1 "MEM 
 [(struct FixBuf *)& \

Re: [PATCH][C++] Fix PR90801

2019-06-12 Thread Jason Merrill

On 6/12/19 7:07 AM, Richard Biener wrote:

On Tue, 11 Jun 2019, Jason Merrill wrote:


On Tue, Jun 11, 2019, 6:45 AM Richard Biener  wrote:



The following fixes the documented(!) quadraticness in
split_nonconstant_init_1 by simply marking to be removed
constructor elements and doing that in a second run over
the constructor.

More micro-optimizing would be possible by recording the
first removed element index and special-casing removing
of a single element.  If you think that's worth the extra
complexity I can work on that (hopefully the case we
increase num_split_elts but not actually split is a bug...).

Bootstrap / regtest running on x86_64-unknown-linux-gnu.

OK if that passes?



Ok, thanks.


Installed.

Below is the simplest variant re-adding optimized handling
of single-element removal.  Note the second hunk which
moves the num_split_elts increment so it doesn't happen
when split_nonconstant_init_1 returns false.

Bootstrapped and tested on x86_64-unknown-linux-gnu.

OK?


OK.

Jason



Re: Add value_range_base::contains_p

2019-06-12 Thread Aldy Hernandez

On 6/12/19 5:33 AM, Richard Biener wrote:

On Wed, Jun 12, 2019 at 1:57 AM Martin Sebor  wrote:


On 6/11/19 3:02 PM, Aldy Hernandez wrote:



On 6/11/19 12:52 PM, Martin Sebor wrote:

On 6/11/19 10:26 AM, Aldy Hernandez wrote:



On 6/11/19 12:17 PM, Martin Sebor wrote:

On 6/11/19 9:05 AM, Aldy Hernandez wrote:



On 6/11/19 9:45 AM, Richard Biener wrote:

On Tue, Jun 11, 2019 at 12:40 PM Aldy Hernandez 
wrote:


This patch cleans up the various contains, may_contain, and
value_inside_range variants we have throughout, in favor of one--
contains_p.  There should be no changes in functionality.

I have added a note to range_includes_zero_p, perhaps as a personal
question than anything else.  This function was/is returning true
for
UNDEFINED.  From a semantic sense, that doesn't make sense.
UNDEFINED
is really the empty set.  Is the functionality wrong, or should
we call
this function something else?  Either way, I'm fine removing the
comment
but I'm genuinely curious.


So this affects for example this hunk:

-  if (vr && !range_includes_p (vr, 1))
+  if (vr && (!vr->contains_p (build_one_cst (TREE_TYPE (name)))
+&& !vr->undefined_p ()))
  {

I think it's arbitrary how we compute X in UNDEFINED and I'm fine
with changing the affected predicates to return false.  This means
not testing for !vr->undefined_p here.


Excellent.



Note I very much dislike the build_one_cst () call here so please
provide an overload hiding this.


Good idea.  I love it.



Not sure why you keep range_includes_zero_p.


I wasn't sure if there was some subtle reason why we were including
undefined.

OK pending tests?


Should the second overload:

+  bool contains_p (tree) const;
+  bool contains_p (int) const;

take something like HOST_WIDE_INT or even one of those poly_ints
like build_int_cst does?  (With the former, contains_p (0) will
likely be ambiguous since 0 is int and HOST_WIDE_INT is long).


We have a type, so there should be no confusion:

+  return contains_p (build_int_cst (type (), val));

(UNDEFINED and VARYING don't have a type, so they are special cased
prior).


I didn't mean the overloads are confusing, just that there the one
that takes an int doesn't make it possible to test whether a value
outside the range of an int is in the range.  For example, in
the call

contains_p (SIZE_MAX)

the argument will get sliced (and trigger a -Woverflow).  One will
need to go back to the more verbose way of calling it.


The int version is not really meant to pass anything but simple
constants.  For anything fancy, one should really be using the tree
version.  But I can certainly change the argument to HOST_WIDE_INT if
preferred.



Changing the argument type to HOST_WIDE_INT would avoid the slicing
and warning but then make contains_p (0) ambiguous because 0 isn't
a perfect match for either void* or long (so it requires a conversion).


Just a plain 0 will match the int version, instead of the tree version,
right?  Nobody should be passing NULL to the tree version, so that seems
like a non-issue.


Right, NULL isn't a problem, but I would expect any integer to work
(I thought that's what Richard was asking for)  So my suggestion was
to have contains_p() a poly_int64 and provide just as robust an API
as build_int_cst.  The argument ends up converted to the poly_int64
anyway when it's passed to the latter.  I.e., why not define
contains_p simply like this:

bool
value_range_base::contains_p (poly_int64 val) const
{
  if (varying_p ())
return true;
  if (undefined_p ())
return false;

  return contains_p (build_int_cst (type (), val));
}


I agree that plain 'int' is bad.  Given we currently store
INTEGER_CSTs only (and not POLY_INT_CSTs) a
widest_int argument should be fine.  Note widest
because when interpreted signed all unsigned values
fit.

The existing contains_p check is also a bit fishy
for the cases TREE_TYPE of the value has a
value-range not covered in the value-ranges type...
I guess it could do

if (!int_fits_type_p (val, this->type ())
  return false;

but that changes existing behavior which happily
throws different sign constants into tree_int_cst_lt
for example...  Do we want to allow non-INTEGER_CST
val arguments at all?  That is, you make contains_p
return a bool and say "yes" when we couldn't really
decide.  This is why previously it was named
may_contain_p (and we didn't have must_contain_p).
I think making the result explicitely clear is a must
since contains_p reads like !contains_p means
it does _not_ contain.  So, either change back the
name (and provide the must variant)
or make it return the tri-state from value_inside_range.


The only reason I added the int overload was because you didn't like the 
build_one_cst call.  However, if we make it HOST_WIDE_INT or even 
widest_int, it will cause a conflict resolution with the tree version. 
I think it's best we only provide a tree version, and make due with the 
on

Re: Ping: [PATCH] PR88395 Fix Nullptr when compiling with -fconcepts

2019-06-12 Thread Jason Merrill

On 6/5/19 10:17 AM, Richard Sandiford wrote:

Thanks for the patch and sorry that there was no response.
I've added the C++ maintainers to cc:


Thanks.  In the future, mentioning C++ in the subject line will help me 
see it.



On 2019-04-08 7:20 p.m., Nicholas Krause wrote:

This fixes the caller in tsubst_requires_expr to
tsubst_constraint_variables to wrap their respective
trees in PARM_CONSTR_PARMS. This is to get the correct
parmeter constraints from the tree before calling
tsubst_constraint_variables like other callers
in constraint.cc and to fix the bug id, 88395 on
the gcc bugzilla. OK for merge?

Signed-off-by: Nicholas Krause 
---
  gcc/cp/constraint.cc | 2 +-
  1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/gcc/cp/constraint.cc b/gcc/cp/constraint.cc
index 9884eb0db50..a78d0a9a49b 100644
--- a/gcc/cp/constraint.cc
+++ b/gcc/cp/constraint.cc
@@ -1882,7 +1882,7 @@ tsubst_requires_expr (tree t, tree args,
tree parms = TREE_OPERAND (t, 0);
if (parms)
  {
-  parms = tsubst_constraint_variables (parms, args, complain, in_decl);
+  parms = tsubst_constraint_variables (PARM_CONSTR_PARMS (parms), args, 
complain, in_decl);


This change makes the testcase crash immediately because "parms" is a 
PARM_DECL, not a PARM_CONSTR.


As I just wrote on the PR, the problem in this testcase is that there's 
a recursive dependency of concepts: operator+= requires Concept, but 
checking Concept involves calling +=.  And so the compiler recurses 
infinitely and runs out of stack.  It would be good to catch this and 
give a more helpful error, but I don't think it's valid C++.


Jason


Re: [C++ Patch] Further grokdeclarator locations work

2019-06-12 Thread Jason Merrill

On 6/6/19 9:00 AM, Paolo Carlini wrote:

Hi,

only minor functional changes here - more precise locations for two 
error messages  - but I think it's a step in the right direction: it 
moves the declaration of id_loc way up, near typespec_loc and as such 
id_loc is immediately used. Then its value is updated upon the loop over 
declarator in the middle of the function and used again in the final 
part of the function. That also "frees" the simple name loc for other 
local uses, allows to simplify those checks changed rather recently over 
(name && declarator). and (unqualified_id && declarator). Tested 
x86_64-linux.


Mostly OK, except:


   if (declspecs->multiple_types_p)
 {
-  error ("two or more data types in declaration of %qs", name);
+  error_at (id_loc, "two or more data types in declaration of %qs", name);
   return error_mark_node;
 }


declspecs->locations[ds_type_spec]?


   if (declspecs->conflicting_specifiers_p)
 {
-  error ("conflicting specifiers in declaration of %qs", name);
+  error_at (id_loc, "conflicting specifiers in declaration of %qs", name);
   return error_mark_node;
 }


ds_storage_class/ds_typedef?

Jason


[Fortran,wwwdocs] faq.html - updates to www.fortran.org

2019-06-12 Thread Gerald Pfeifer
This is related to changes I made to readings.html one-and-a-half
days ago.  Essentially fortran.org underwent quite some changes,
breaking links left and right, and frankly looking a little (too)
commercial.

In any case I could not find what looks like a proper replacement
for this link, so I had to take it out.  If one of you wants to
recommend a good replacement, happy to add that again.

Committed for now.

Gerald

Index: faq.html
===
RCS file: /cvs/gcc/wwwdocs/htdocs/faq.html,v
retrieving revision 1.231
diff -u -r1.231 faq.html
--- faq.html30 Sep 2018 14:38:46 -  1.231
+++ faq.html12 Jun 2019 20:20:23 -
@@ -12,10 +12,9 @@
 
 This FAQ tries to answer specific questions concerning GCC. For
 general information regarding C, C++, and Fortran respectively, please check
-the http://c-faq.com/";>comp.lang.c FAQ,
-https://isocpp.org/faq/";>C++ FAQ,
-and the http://www.fortran.com/fortran/info.html";>Fortran
-Information page.
+the http://c-faq.com/";>comp.lang.c FAQ and the
+https://isocpp.org/faq/";>C++ FAQ.
+
 
 Other GCC-related FAQs: 
https://gcc.gnu.org/onlinedocs/libstdc++/faq.html";>


[PATCH] Fix incorrect __cpp_lib_parallel_algorithm macro definitions

2019-06-12 Thread Jonathan Wakely

This macro is defined in several palces, and we only fixed the
incorrect value in some of those places.

* include/std/algorithm (__cpp_lib_parallel_algorithm): Fix value.
* include/std/memory (__cpp_lib_parallel_algorithm): Likewise.
* include/std/numeric (__cpp_lib_parallel_algorithm): Likewise.
* testsuite/25_algorithms/pstl/feature_test.cc: New test.

Tested x86_64-linux, committed to trunk.
I'll backport this to gcc-9-branch too.

commit d51263b41ffa37d0650dd88162093e57fc03c41d
Author: redi 
Date:   Wed Jun 12 20:16:03 2019 +

Fix incorrect __cpp_lib_parallel_algorithm macro definitions

* include/std/algorithm (__cpp_lib_parallel_algorithm): Fix value.
* include/std/memory (__cpp_lib_parallel_algorithm): Likewise.
* include/std/numeric (__cpp_lib_parallel_algorithm): Likewise.
* testsuite/25_algorithms/pstl/feature_test.cc: New test.

git-svn-id: svn+ssh://gcc.gnu.org/svn/gcc/trunk@272216 
138bc75d-0d04-0410-961f-82ee72b054a4

diff --git a/libstdc++-v3/include/std/algorithm 
b/libstdc++-v3/include/std/algorithm
index 18182965fb1..124342c63f0 100644
--- a/libstdc++-v3/include/std/algorithm
+++ b/libstdc++-v3/include/std/algorithm
@@ -73,7 +73,7 @@
 #  endif
 
 // Feature test macro for parallel algorithms
-# define __cpp_lib_parallel_algorithm 201703L
+# define __cpp_lib_parallel_algorithm 201603L
 #endif // C++17
 
 #ifdef _GLIBCXX_PARALLEL
diff --git a/libstdc++-v3/include/std/memory b/libstdc++-v3/include/std/memory
index 1fb4a5f956c..3660f4d86de 100644
--- a/libstdc++-v3/include/std/memory
+++ b/libstdc++-v3/include/std/memory
@@ -407,7 +407,7 @@ _GLIBCXX_END_NAMESPACE_VERSION
 # endif
 
 // Feature test macro for parallel algorithms
-# define __cpp_lib_parallel_algorithm 201703L
+# define __cpp_lib_parallel_algorithm 201603L
 #endif // C++17
 
 #endif /* _GLIBCXX_MEMORY */
diff --git a/libstdc++-v3/include/std/numeric b/libstdc++-v3/include/std/numeric
index def1debf754..01e3ae8e35d 100644
--- a/libstdc++-v3/include/std/numeric
+++ b/libstdc++-v3/include/std/numeric
@@ -216,7 +216,7 @@ _GLIBCXX_END_NAMESPACE_VERSION
 # endif
 
 // Feature test macro for parallel algorithms
-# define __cpp_lib_parallel_algorithm 201703L
+# define __cpp_lib_parallel_algorithm 201603L
 #endif // C++17
 
 #endif /* _GLIBCXX_NUMERIC */
diff --git a/libstdc++-v3/testsuite/25_algorithms/pstl/feature_test.cc 
b/libstdc++-v3/testsuite/25_algorithms/pstl/feature_test.cc
new file mode 100644
index 000..e2ae11b5438
--- /dev/null
+++ b/libstdc++-v3/testsuite/25_algorithms/pstl/feature_test.cc
@@ -0,0 +1,50 @@
+// Copyright (C) 2019 Free Software Foundation, Inc.
+//
+// This file is part of the GNU ISO C++ Library.  This library is free
+// software; you can redistribute it and/or modify it under the
+// terms of the GNU General Public License as published by the
+// Free Software Foundation; either version 3, or (at your option)
+// any later version.
+
+// This library is distributed in the hope that it will be useful,
+// but WITHOUT ANY WARRANTY; without even the implied warranty of
+// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+// GNU General Public License for more details.
+
+// You should have received a copy of the GNU General Public License along
+// with this library; see the file COPYING3.  If not see
+// .
+
+// { dg-options "-std=gnu++17" }
+// { dg-do preprocess { target c++17 } }
+
+#include 
+#ifndef __cpp_lib_parallel_algorithm
+# error "Feature-test macro for parallel algorithms missing"
+#elif __cpp_lib_parallel_algorithm != 201603L
+# error "Feature-test macro for parallel algorithms has wrong value in 
"
+#endif
+
+#include 
+#if __cpp_lib_parallel_algorithm != 201603L
+# error "Feature-test macro for parallel algorithms has wrong value in 
"
+#endif
+
+#include 
+#if __cpp_lib_parallel_algorithm != 201603L
+# error "Feature-test macro for parallel algorithms has wrong value in 
"
+#endif
+
+// The N4810 draft does not require the macro to be defined in .
+#include 
+#if __cpp_lib_parallel_algorithm != 201603L
+# error "Feature-test macro for parallel algorithms has wrong value in 
"
+#endif
+
+// The N4810 draft does not require the macro to be defined in .
+// Include this last, because it will trigger the inclusion of TBB headers,
+// which then include , so we need to have already checked .
+#include 
+#if __cpp_lib_parallel_algorithm != 201603L
+# error "Feature-test macro for parallel algorithms has wrong value in 
"
+#endif


Re: C++ PATCH for c++/66999 - 'this' captured by reference

2019-06-12 Thread Jason Merrill

On 6/8/19 8:51 PM, Marek Polacek wrote:

This patch improves a diagnostic when parsing a lambda introducer; in 
particular,
we should handle '&this' better than we currently do.  clang/icc specifically
point out that 'this' cannot be captured by reference, let's do the same.

Bootstrapped/regtested on x86_64-linux, ok for trunk?


OK.

Jason



Re: [C++ PATCH] Add test for c++/67533 (or not?)

2019-06-12 Thread Jason Merrill

On 6/9/19 7:37 PM, Marek Polacek wrote:

This testcase used to ICE but was fixed by r259067.  We now issue

67533.C:5:26: error: conversion from ‘void’ to non-scalar type ‘Tls’ requested
 5 | thread_local Tls mytls = mytls; // { dg-error "" }
   |  ^

whereas clang++ and icc compile it.  However, the test uses a reserved 
identifier
([lex.name]/3), and I think the error is fine.



Do you think it's worth adding this test?  I thought so, just to check we don't
ICE, but who uses a mangled name as an identifier?!


OK.

I suppose get_tls_wrapper_fn could check whether a previous declaration 
has the right type, to give a more helpful diagnostic.


The original testcase makes me wonder if we should be marking 
compiler-generated  functions as no_instrument_function.


Jason


Re: [PATCH v6 00/10] New backend for the TI PRU processor

2019-06-12 Thread Dimitar Dimitrov
On вторник, 11 юни 2019 г. 14:04:41 EEST Jeff Law wrote:
> On 6/9/19 2:01 PM, Dimitar Dimitrov wrote:
> > This is the latest patch set for adding TI PRU I/O processor backend to
> > GCC. Comments from all previous series have been addressed [1], [2], [3],
> > [4], [5].> 
> > Test results can be downloaded from here:
> >http://dinux.eu/gnupru/testresults/20190607-c16eb7019be/
> > 
> > Changes since patch series v5 [5] are minimal:
> > 
> > - A few whitespace and comment fixes.
> > - doloop_end_internal length expression fix.
> > - Define TARGET_HARD_REGNO_CALL_PART_CLOBBERED hook for PRU.
> > - Use uniform naming for the PRU register number constants.
> > 
> > [1] http://gcc.gnu.org/ml/gcc-patches/2018-06/msg00775.html
> > [2] http://gcc.gnu.org/ml/gcc-patches/2018-07/msg01779.html
> > [3] http://gcc.gnu.org/ml/gcc-patches/2018-08/msg00927.html
> > [4] http://gcc.gnu.org/ml/gcc-patches/2018-09/msg00392.html
> > [5] http://gcc.gnu.org/ml/gcc-patches/2018-10/msg00979.html
> > 
> > Regards,
> > Dimitar
> > 
> > Dimitar Dimitrov (10):
> >   Initial TI PRU GCC port
> >   Initial TI PRU libgcc port
> >   testsuite: Add PRU tests
> >   testsuite: Add check for overflowed IMEM region to testsuite
> >   testsuite: Add check for unsupported TI ABI PRU features to testsuite
> >   testsuite: Remove PRU from test cases requiring hosted environment
> >   testsuite: Define PRU stack usage
> >   testsuite: Mark that PRU has one-cycle jumps
> >   testsuite: Mark that PRU uses all function pointer bits
> >   testsuite: Mark testsuite that PRU has different calling convention
> 
> [ ... ]
> My recollection was this was all ready to go, so I'm going to ack with
> the only pre-commit requirement being to fix the issue Andreas pointed
> out with the 20101011-1.c test change.
> 
> Can you please fill out the write-after-approval form (list me as your
> sponsor)
> 
> https://sourceware.org/cgi-bin/pdw/ps_form.cgi
> 
> Jeff
I committed all the patches to trunk. The 20101011-1.c issue was fixed.

Thanks,
Dimitar



[PATCH] List myself as write-after-approval

2019-06-12 Thread Dimitar Dimitrov
Committed as obvious.

ChangeLog:

2019-06-12  Dimitar Dimitrov  

* MAINTAINERS (Write After Approval): Add myself.

Signed-off-by: Dimitar Dimitrov 
---
 MAINTAINERS | 1 +
 1 file changed, 1 insertion(+)

diff --git a/MAINTAINERS b/MAINTAINERS
index 14fa95ed38b..b8d703c535d 100644
--- a/MAINTAINERS
+++ b/MAINTAINERS
@@ -365,6 +365,7 @@ Bud Davis   

 Chris Demetriou
 Sameera Deshpande  
 Wilco Dijkstra 
+Dimitar Dimitrov   
 Benoit Dupont de Dinechin  

 Jason Eckhardt 
 Bernd Edlinger 
-- 
2.11.0



Re: C++ PATCH for c++/90736 - bogus error with alignof

2019-06-12 Thread Jason Merrill

On 6/9/19 8:45 PM, Marek Polacek wrote:

The problem here is that we're getting "requested alignment is not an
integer constant" since r261971, because of this part of the patch:

@ -4676,7 +4685,11 @@ cxx_eval_constant_expression (const constexpr_ctx *ctx, 
tree t,
  conversion.  */
   return fold (t);

-   if (tcode == UNARY_PLUS_EXPR)
+   /* Handle an array's bounds having been deduced after we built
+  the wrapping expression.  */
+   if (same_type_ignoring_tlq_and_bounds_p (type, TREE_TYPE (op)))
+ r = op;
+   else if (tcode == UNARY_PLUS_EXPR)
   r = fold_convert (TREE_TYPE (t), op);
 else
   r = fold_build1 (tcode, type, op);

where type was "int" and TREE_TYPE (op) was "const int", so we just used OP
which was an INTEGER_CST of type "const int".  That resulted in a NOP_EXPR
causing the error above.  Previously, we'd fold_build, returning an INTEGER_CST
with type "int".

It seems weird to me to have an INTEGER_CST with a cv-qual type, so the
following patch handles that case in adjust_temp_type.

Bootstrapped/regtested on x86_64-linux, ok for trunk and 9?


OK.

Jason



Re: [C++ Patch] More grokdeclarator locations fixes

2019-06-12 Thread Jason Merrill

On 6/10/19 6:36 AM, Paolo Carlini wrote:

Hi,

this one requires my last patch, uses id_loc in a few additional places 
in the last part of the function.


Most of the changes should be straightforward, I only want to mention 
the "typedef name may not be a nested-name-specifier" change, where, not 
using input_location means that for a test like 
g++.old-deja/g++.jason/crash10.C:


struct A {
   enum foo { bar };
};

typedef A::foo A::foo;

the location is under the 'A' of the second 'A::foo'. The EDG front-end 
does exactly the same; clang points to the 'f' but then has the wavy 
thing reaching back to 'A' (a while ago I already mentioned that we 
don't have that option). Tested x86_64-linux.


OK.

Jason



Re: [PATCH, OpenACC] Add support for gang local storage allocation in shared memory

2019-06-12 Thread Julian Brown
On Wed, 12 Jun 2019 13:57:22 +0200
Thomas Schwinge  wrote:

> Hi!
> 
> First, thanks for picking this up, and improving the patch you
> inherited.

Thanks for review!

> I understand right that this will address some aspects of PR90115
> "OpenACC: predetermined private levels for variables declared in
> blocks" (so please mention that one in the ChangeLog updates, and
> commit log), but it doesn't address all of these aspects (and see
> also Cesar's list in
> ),
> and also not yet PR90114 "Predetermined private levels for variables
> declared in OpenACC accelerator routines"?

There's two possible reasons for placing gang-private variables in
shared memory: correct implementation of OpenACC semantics, or
optimisation, since shared memory is faster than local memory (on NVidia
devices). Handling of private variables is intimately tied with the
execution model for gangs/workers/vectors implemented by a particular
target: for PTX, that's handled in the backend using a
broadcasting/neutering scheme.

That is sufficient for code that e.g. sets a variable in worker-single
mode and expects to use the value in worker-partitioned mode. The
difficulty (semantics-wise) comes when the user wants to do something
like an atomic operation in worker-partitioned mode and expects a
worker-single variable to be shared across each partitioned worker.
Forcing use of shared memory for such variables makes that work
properly.

It is *not* sufficient for the next level down, though -- expecting to
perform atomic operations in vector-partitioned mode on a variable
that is declared in vector-single mode, i.e. so that it is supposed to
be shared across all vector elements. AFAIK, that's not
straightforward, and we haven't attempted to implement it.

I think the original motivation for this patch was optimisation, though
-- typical code won't try to use atomics in this way. Cesar's list of
caveats that you linked to seems to support that notion.

> On Fri, 7 Jun 2019 15:08:37 +0100, Julian Brown
>  wrote:
> > --- a/gcc/config/nvptx/nvptx.c
> > +++ b/gcc/config/nvptx/nvptx.c  
> 
> > @@ -5237,6 +5248,10 @@ nvptx_file_end (void)
> >  write_shared_buffer (asm_out_file, vector_red_sym,
> >  vector_red_align, vector_red_size);
> >  
> > +  if (gangprivate_shared_size)
> > +write_shared_buffer (asm_out_file, gangprivate_shared_sym,
> > +gangprivate_shared_align,
> > gangprivate_shared_size);  
> 
> Curious, what is the reason that we maintain this
> '__gangprivate_shared' variable on a per-file basis instead of on a
> per-function basis (with names '__gangprivate_shared_[function]', or
> similar), which should make it more obvious where each block of
> '.shared' memory belongs to?

I can't comment on that, I'm afraid that was a part of the patch that I
inherited and didn't alter much...

> > --- a/gcc/doc/tm.texi
> > +++ b/gcc/doc/tm.texi  
> 
> > +@deftypefn {Target Hook} rtx TARGET_GOACC_EXPAND_ACCEL_VAR (tree
> > @var{var}) +This hook, if defined, is used by accelerator target
> > back-ends to expand +specially handled kinds of VAR_DECL
> > expressions.  A particular use is to +place variables with specific
> > attributes inside special accelarator +memories.  A return value of
> > NULL indicates that the target does not +handle this VAR_DECL, and
> > normal RTL expanding is resumed. +@end deftypefn  
> 
> I guess I'm not terribly happy with the 'goacc.expand_accel_var' name.
> Using different "memories" for specially tagged DECLs seems to be a
> pretty generic concept (address spaces?), and...

This is partly another NVPTX weirdness -- the target uses address
spaces, but only within the backend, and without using the generic
middle-end address space machinery. The other reason for using an
attribute instead of assigning an address space is that the former can
be detected by the target compiler, but will be ignored by the host
compiler. Forcing use of an address space this early would mean that
the same non-standard address space would have to make sense for both
host and offloaded code.

For AMD GCN, we do use the generic address space support, and I found
that I could re-use the "oacc gangprivate" attribute -- but not the
expand_accel_var hook (expand time is too late for that target).
Instead, another new hook "TARGET_GOACC_ADJUST_GANGPRIVATE_DECL" is
called from omp-offload.c:execute_oacc_device_lower for variables that
have the "oacc gangprivate" attribute set. Those bits haven't been
posted upstream yet, though.

> > --- a/gcc/expr.c
> > +++ b/gcc/expr.c
> > @@ -9974,8 +9974,19 @@ expand_expr_real_1 (tree exp, rtx target,
> > machine_mode tmode, exp = SSA_NAME_VAR (ssa_name);
> >goto expand_decl_rtl;
> >  
> > -case PARM_DECL:
> >  case VAR_DECL:
> > +  /* Allow accel compiler to handle specific cases of
> > variables,
> > +specifically those tagged with the "oacc gangprivate"
>

[PATCH] PR fortran/88810 -- Code clean up

2019-06-12 Thread Steve Kargl
This PR flags a section of confusing but correct code.
The attach patch rewrites the code to hopefully provide
some clarity.  It has lived my tree for around 6 months,
so has sufferred through numerous regression tests.
OK to commit?

2019-06-12  Steven G. Kargl  

PR fortran/88810
* dependency.c (gfc_dep_resolver): Re-arrange code to make the logic
a bit more transparent.  Fix 2 nearby formatting issues.
 
-- 
Steve
Index: gcc/fortran/dependency.c
===
--- gcc/fortran/dependency.c	(revision 268276)
+++ gcc/fortran/dependency.c	(working copy)
@@ -2141,7 +2141,7 @@ gfc_dep_resolver (gfc_ref *lref, gfc_ref *rref, gfc_re
 
 	  /* Index for the reverse array.  */
 	  m = -1;
-	  for (n=0; n < lref->u.ar.dimen; n++)
+	  for (n = 0; n < lref->u.ar.dimen; n++)
 	{
 	  /* Handle dependency when either of array reference is vector
 		 subscript. There is no dependency if the vector indices
@@ -2163,7 +2163,8 @@ gfc_dep_resolver (gfc_ref *lref, gfc_ref *rref, gfc_re
 
 	  if (lref->u.ar.dimen_type[n] == DIMEN_RANGE
 		  && rref->u.ar.dimen_type[n] == DIMEN_RANGE)
-		this_dep = check_section_vs_section (&lref->u.ar, &rref->u.ar, n);
+		this_dep = check_section_vs_section (&lref->u.ar,
+		 &rref->u.ar, n);
 	  else if (lref->u.ar.dimen_type[n] == DIMEN_ELEMENT
 		   && rref->u.ar.dimen_type[n] == DIMEN_RANGE)
 		this_dep = gfc_check_element_vs_section (lref, rref, n);
@@ -2196,35 +2197,38 @@ gfc_dep_resolver (gfc_ref *lref, gfc_ref *rref, gfc_re
 	  if (rref->u.ar.dimen_type[n] == DIMEN_RANGE
 		&& lref->u.ar.dimen_type[n] == DIMEN_RANGE)
 		{
-		  /* Set reverse if backward dependence and not inhibited.  */
-		  if (reverse && reverse[m] == GFC_ENABLE_REVERSE)
-		reverse[m] = (this_dep == GFC_DEP_BACKWARD) ?
-			 GFC_REVERSE_SET : reverse[m];
+		  if (reverse)
+		{
+		  /* Reverse if backward dependence and not inhibited.  */
+		  if (reverse[m] == GFC_ENABLE_REVERSE
+			  && this_dep == GFC_DEP_BACKWARD)
+			reverse[m] = GFC_REVERSE_SET;
 
-		  /* Set forward if forward dependence and not inhibited.  */
-		  if (reverse && reverse[m] == GFC_ENABLE_REVERSE)
-		reverse[m] = (this_dep == GFC_DEP_FORWARD) ?
-			 GFC_FORWARD_SET : reverse[m];
+		  /* Forward if forward dependence and not inhibited.  */
+		  if (reverse[m] == GFC_ENABLE_REVERSE
+			  && this_dep == GFC_DEP_FORWARD)
+			reverse[m] = GFC_FORWARD_SET;
 
-		  /* Flag up overlap if dependence not compatible with
-		 the overall state of the expression.  */
-		  if (reverse && reverse[m] == GFC_REVERSE_SET
-		&& this_dep == GFC_DEP_FORWARD)
-		{
-	  reverse[m] = GFC_INHIBIT_REVERSE;
-		  this_dep = GFC_DEP_OVERLAP;
+		  /* Flag up overlap if dependence not compatible with
+			 the overall state of the expression.  */
+		  if (reverse[m] == GFC_REVERSE_SET
+			  && this_dep == GFC_DEP_FORWARD)
+			{
+			  reverse[m] = GFC_INHIBIT_REVERSE;
+			  this_dep = GFC_DEP_OVERLAP;
+			}
+		  else if (reverse[m] == GFC_FORWARD_SET
+			   && this_dep == GFC_DEP_BACKWARD)
+			{
+			  reverse[m] = GFC_INHIBIT_REVERSE;
+			  this_dep = GFC_DEP_OVERLAP;
+			}
 		}
-		  else if (reverse && reverse[m] == GFC_FORWARD_SET
-		&& this_dep == GFC_DEP_BACKWARD)
-		{
-	  reverse[m] = GFC_INHIBIT_REVERSE;
-		  this_dep = GFC_DEP_OVERLAP;
-		}
 
 		  /* If no intention of reversing or reversing is explicitly
 		 inhibited, convert backward dependence to overlap.  */
-		  if ((reverse == NULL && this_dep == GFC_DEP_BACKWARD)
-		  || (reverse != NULL && reverse[m] == GFC_INHIBIT_REVERSE))
+		  if ((!reverse && this_dep == GFC_DEP_BACKWARD)
+		  || (reverse && reverse[m] == GFC_INHIBIT_REVERSE))
 		this_dep = GFC_DEP_OVERLAP;
 		}
 


[PATCH] PR fortran/89344 -- INTENT(IN) CLASS(*) cannot be assigned to

2019-06-12 Thread Steve Kargl
The attach patch has lived in my tree for 4 months.
It's time to submit it.  Passed regression testing
for a long time.

An INTENT(in) entity that has CLASS(*) dummy argument
should not use SELECT TYPE to then try to assign to the
entity.  OK to commit?

2019-06-12  Steven G. Kargl  

PR fortran/89344
* expr.c (gfc_check_vardef_context): Check for INTENT(IN) variable
in SELECT TYPE construct.

2019-06-12  Steven G. Kargl  

PR fortran/89344
* gfortran.dg/pr89344.f90: New test.

-- 
Steve
Index: gcc/fortran/expr.c
===
--- gcc/fortran/expr.c	(revision 270774)
+++ gcc/fortran/expr.c	(working copy)
@@ -6086,7 +6095,12 @@ gfc_check_vardef_context (gfc_expr* e, bool pointer, b
 	check_intentin = false;
 	}
 }
-  if (check_intentin && sym->attr.intent == INTENT_IN)
+
+  if (check_intentin
+  && (sym->attr.intent == INTENT_IN
+	  || (sym->attr.select_type_temporary && sym->assoc
+	  && sym->assoc->target && sym->assoc->target->symtree
+	  && sym->assoc->target->symtree->n.sym->attr.intent == INTENT_IN)))
 {
   if (pointer && is_pointer)
 	{
@@ -6098,10 +6112,12 @@ gfc_check_vardef_context (gfc_expr* e, bool pointer, b
 	}
   if (!pointer && !is_pointer && !sym->attr.pointer)
 	{
+	  const char *name = sym->attr.select_type_temporary
+			   ? sym->assoc->target->symtree->name : sym->name;
 	  if (context)
 	gfc_error ("Dummy argument %qs with INTENT(IN) in variable"
 		   " definition context (%s) at %L",
-		   sym->name, context, &e->where);
+		   name, context, &e->where);
 	  return false;
 	}
 }
Index: /safe/sgk/gcc/gccx/gcc/testsuite/gfortran.dg/pr89344.f90
===
--- /safe/sgk/gcc/gccx/gcc/testsuite/gfortran.dg/pr89344.f90	(nonexistent)
+++ /safe/sgk/gcc/gccx/gcc/testsuite/gfortran.dg/pr89344.f90	(working copy)
@@ -0,0 +1,15 @@
+! { dg-do compile }
+program demo_setval
+   call setval(value)
+   write(*,*)'VALUE=',value
+   contains
+  subroutine setval(value)
+ class(*),intent(in) :: value
+ select type(value)
+type is (integer)
+   value = 10 ! { dg-error "in variable definition context" }
+type is (real)
+   value = 10.20  ! { dg-error "in variable definition context" }
+ end select
+  end subroutine setval
+end program demo_setval


Re: [PATCH] PR fortran/90002 -- Free array-spec for assumed-shaped coarray

2019-06-12 Thread Steve Kargl
Thanks.

Committed as revision  272201.

-- 
steve


On Wed, Jun 12, 2019 at 06:14:11AM +0100, Paul Richard Thomas wrote:
> OK, Steve
> 
> Thanks
> 
> Paul
> 
> On Wed, 12 Jun 2019 at 02:04, Steve Kargl
>  wrote:
> >
> > The attached patch fixes an ICE when freeing an array-spec
> > that involves an assumed-shaped coarray (see testcase).  The
> > patch regression tests cleanly.  I don't use coarrays, so
> > cannot say whether the testcase is valid Fortran.  I simply
> > fixed the ICE.  OK to commit?
> >
> > 2019-06-11  Steven G. Kargl  
> >
> > PR fortran/90002
> > * array.c (gfc_free_array_spec): When freeing an array-spec, avoid
> > an ICE for assumed-shape coarrays
> >
> > 2019-06-11  Steven G. Kargl  
> >
> > PR fortran/90002
> > * gfortran.dg/pr90002.f90: New test.
> >
> > --
> > Steve
> 
> 
> 
> -- 
> "If you can't explain it simply, you don't understand it well enough"
> - Albert Einstein

-- 
Steve
20170425 https://www.youtube.com/watch?v=VWUpyCsUKR4
20161221 https://www.youtube.com/watch?v=IbCHE-hONow


[PATH] Patch to fix -finstrument-functions-exclude-function-list handling of namespaces and escaped commas

2019-06-12 Thread Oliver Browne
Patch fixes following PRs:
c++/90816 - -finstrument-functions-exclude-function-list improperly
handles namespace/class definitions
c++/90809 - -finstrument-functions-exclude-function-list mishandles
comma escaping

Fixes as follows:
At flag_instrument_functions_exclude_p [gimplify.c]
Using lang_hooks.decl_printable_name (fndecl, 1) to get namespace /
class information as part of printable name to allow for
inclusion of namespace / class specification when passing symbols to
-finstrument-functions-exclude-function-list. Was
previously lang_hooks.decl_printable_name (fndecl, 0).

At add_comma_separated_to_vector [opts.c]
Added writing of a null character to w after primary loop finishes, to
account for offset between r and w when r reaches end of
passed string.

from Oliver Browne 
PR c++/90816
PR c++/90809
 * gimplify.c (flag_instrument_functions_exclude_p): include namespace
   information as part of decl name
 * opts.c (add_comma_separated_to_vector): add null character to correct
   position in last token added to token vector
Index: gimplify.c
===
--- gimplify.c 2019-06-12 19:07:26.872077000 +0100
+++ gimplify.c 2019-06-12 18:55:10.609255000 +0100
@@ -13987,11 +13987,17 @@ flag_instrument_functions_exclude_p (tre
 {
   const char *name;
-  int i;
+  unsigned int i;
   char *s;

-  name = lang_hooks.decl_printable_name (fndecl, 0);
-  FOR_EACH_VEC_ELT (*v, i, s)
+  name = lang_hooks.decl_printable_name (fndecl, 1);
+   for(i = 0; i < v->length(); i++){
+ s = (*v)[i];
+ if(strstr(name, s) != NULL){
+   return(true);
+ }
+   }
+/*  FOR_EACH_VEC_ELT (*v, i, s)
  if (strstr (name, s) != NULL)
-   return true;
+ return true;*/
 }

@@ -14278,3 +14284,3 @@ gimplify_hasher::equal (const elt_t *p1,

   return true;
-}
\ No newline at end of file
+}
Index: opts.c
===
--- opts.c 2019-06-12 19:10:04.354612000 +0100
+++ opts.c 2019-06-12 18:53:43.675852000 +0100
@@ -263,7 +263,8 @@ add_comma_separated_to_vector (void **pv
  *w++ = *r++;
 }
-  if (*token_start != '\0')
+  *w = '\0';
+  if (*token_start != '\0'){
 v->safe_push (token_start);
-
+  }
   *pvec = v;
 }
@@ -3151,3 +3152,3 @@ option_name (diagnostic_context *context
   else
 return NULL;
-}
\ No newline at end of file
+}


Re: *ping* Re: [PATCH] PR fortran/89103 - Allow blank format items in format strings

2019-06-12 Thread Steve Kargl
On Tue, Jun 11, 2019 at 11:50:40AM +0200, Jakub Jelinek wrote:
> On Tue, Jun 11, 2019 at 10:30:59AM +0100, Mark Eggleston wrote:
> >     Jim MacArthur 
> >     Mark Eggleston 
> 
> Two spaces before < instead of one.
> 
> This is not a patch review, just comments:

Mark, do you plan to address any of Jakub's comments.
Do note, I just 'OK' Jakub's patch that uses G_()
forms for the strings.

Also, do you have plans to contribute additional
patches (either for -fdec* extensions or preferrably
to help with bug fixes and new features)?  It may be
advantageous for you to get a commit bit.

-- 
Steve


Re: [PATCH] check_format fixes

2019-06-12 Thread Steve Kargl
On Wed, Jun 12, 2019 at 09:24:39AM +0200, Jakub Jelinek wrote:
> On Tue, Jun 11, 2019 at 12:49:32PM +0200, Jakub Jelinek wrote:
> > Tested on x86_64-linux with check-gfortran, ok for trunk if full 
> > bootstrap/regtest
> > passes?
> > 
> > 2019-06-11  Jakub Jelinek  
> > 
> > * io.c (check_format): Use G_(...) instead of _(...) for error values,
> > append " in format string at %L" to all strings but unexpected_element,
> > use error as gfc_error formating string instead of
> > "%s in format string at %L".  Formatting fixes.
> 
> FYI, bootstrapped/regtested successfully on x86_64-linux and i686-linux.
> 

OK.

-- 
Steve


[PATCH][openacc] Disable pass_thread_jumps for IFN_UNIQUE

2019-06-12 Thread Tom de Vries
Hi,

If we compile the openacc testcase with -fopenacc -O2, we run into a SIGSEGV
or assert.  The root cause for this is that pass_thread_jumps breaks the
invariant that OACC_FORK and OACC_JOIN mark the start and end of a
single-entry-single-exit region.

Fix this by bailing out when encountering an IFN_UNIQUE in
thread_jumps::profitable_jump_thread_path.

Bootstrapped and reg-tested on x86_64.
Build and reg-tested libgomp on x86_64 with nvptx accelerator.

OK for trunk?

Thanks,
- Tom

[openacc] Disable pass_thread_jumps for IFN_UNIQUE

2019-06-12  Tom de Vries  

PR tree-optimization/90009
* tree-ssa-threadbackward.c (thread_jumps::profitable_jump_thread_path):
Return NULL if bb contains IFN_UNIQUE.

* testsuite/libgomp.oacc-c-c++-common/pr90009.c: New test.

---
 gcc/tree-ssa-threadbackward.c  |  5 
 .../testsuite/libgomp.oacc-c-c++-common/pr90009.c  | 34 ++
 2 files changed, 39 insertions(+)

diff --git a/gcc/tree-ssa-threadbackward.c b/gcc/tree-ssa-threadbackward.c
index 81dc05dc831..1ff870ad00b 100644
--- a/gcc/tree-ssa-threadbackward.c
+++ b/gcc/tree-ssa-threadbackward.c
@@ -261,6 +261,11 @@ thread_jumps::profitable_jump_thread_path (basic_block 
bbi, tree name,
   gsi_next_nondebug (&gsi))
{
  gimple *stmt = gsi_stmt (gsi);
+ if (gimple_call_internal_p (stmt, IFN_UNIQUE))
+   {
+ m_path.pop ();
+ return NULL;
+   }
  /* Do not count empty statements and labels.  */
  if (gimple_code (stmt) != GIMPLE_NOP
  && !(gimple_code (stmt) == GIMPLE_ASSIGN
diff --git a/libgomp/testsuite/libgomp.oacc-c-c++-common/pr90009.c 
b/libgomp/testsuite/libgomp.oacc-c-c++-common/pr90009.c
new file mode 100644
index 000..58d1039dd8d
--- /dev/null
+++ b/libgomp/testsuite/libgomp.oacc-c-c++-common/pr90009.c
@@ -0,0 +1,34 @@
+/* { dg-do run } */
+
+#include 
+
+#define N 100
+
+int data[N];
+
+int
+main (void)
+{
+  int n = N, b = 3;
+#pragma acc parallel num_workers(2)
+  {
+int c;
+if (n)
+  c = 0;
+else
+  c = b;
+
+#pragma acc loop worker
+for (int i = 0; i < n; i++)
+  data[i] = 1;
+
+if (c)
+  data[0] = 2;
+  }
+
+  for (int i = 0; i < n; i++)
+if (data[i] != 1)
+  abort ();
+
+  return 0;
+}


[PATCH,RFC,V2 3/3] Setup for CTF generation and emission

2019-06-12 Thread Indu Bhagat
Initialize CTF container when -gtLEVEL is specified.  Generate CTF debug info
for global decls.  Import the CTF header from binutils.

[Changes from V1]
  - Instead of using the debug hooks infrastructure, the generation and
emission of CTF is done by exposing the CTF APIs to the rest of the 
compiler.
  - Calls to generate CTF are placed in symbol_table::finalize_compilation_unit
and rest_of_decl_compilation.
  - Call to emit CTF is placed in symbol_table::finalize_compilation_unit

gcc/ChangeLog:
 
* Makefile.in: Add ctfout.* files to GTFILES.
* cgraphunit.c (symbol_table::finalize_compilation_unit): Generate CTF
debug info for decl. Invoke CTF debug info emission.
* ctfout.c: New file.
* ctfout.h: Likewise.
* gengtype.c (open_base_files): Add ctfout.h to ifiles.
* passes.c (rest_of_decl_compilation): Generate CTF debug info for
decl.
* toplev.c (process_options): Warn and ignore -gtLEVEL if frontend is
not C.
(toplev::finalize): Finalize CTF containers.

gcc/testsuite/ChangeLog:

* gcc.dg/debug/ctf/ctf-1.c: New test.
* gcc.dg/debug/ctf/ctf-preamble-1.c: Likewise.
* gcc.dg/debug/ctf/ctf.exp: Add CTF testsuite.
* gcc.dg/debug/dwarf2-ctf-1.c: New test.

include/ChangeLog:

* ctf.h: Import from binutils.

---
 gcc/ChangeLog   |  14 +
 gcc/Makefile.in |   3 +
 gcc/cgraphunit.c|  12 +-
 gcc/ctfout.c| 163 
 gcc/ctfout.h|  52 +++
 gcc/gengtype.c  |   4 +-
 gcc/passes.c|   7 +-
 gcc/testsuite/ChangeLog |   7 +
 gcc/testsuite/gcc.dg/debug/ctf/ctf-1.c  |   6 +
 gcc/testsuite/gcc.dg/debug/ctf/ctf-preamble-1.c |  11 +
 gcc/testsuite/gcc.dg/debug/ctf/ctf.exp  |  41 ++
 gcc/testsuite/gcc.dg/debug/dwarf2-ctf-1.c   |   7 +
 gcc/toplev.c|  18 +
 include/ChangeLog   |   4 +
 include/ctf.h   | 483 
 15 files changed, 826 insertions(+), 6 deletions(-)
 create mode 100644 gcc/ctfout.c
 create mode 100644 gcc/ctfout.h
 create mode 100644 gcc/testsuite/gcc.dg/debug/ctf/ctf-1.c
 create mode 100644 gcc/testsuite/gcc.dg/debug/ctf/ctf-preamble-1.c
 create mode 100644 gcc/testsuite/gcc.dg/debug/ctf/ctf.exp
 create mode 100644 gcc/testsuite/gcc.dg/debug/dwarf2-ctf-1.c
 create mode 100644 include/ctf.h

diff --git a/gcc/Makefile.in b/gcc/Makefile.in
index d9e0885..8ce2405 100644
--- a/gcc/Makefile.in
+++ b/gcc/Makefile.in
@@ -1254,6 +1254,7 @@ OBJS = \
cfgloopanal.o \
cfgloopmanip.o \
cfgrtl.o \
+   ctfout.o \
symtab.o \
cgraph.o \
cgraphbuild.o \
@@ -2532,6 +2533,8 @@ GTFILES = $(CPPLIB_H) $(srcdir)/input.h 
$(srcdir)/coretypes.h \
   $(srcdir)/dwarf2asm.c \
   $(srcdir)/dwarf2cfi.c \
   $(srcdir)/dwarf2out.c \
+  $(srcdir)/ctfout.h \
+  $(srcdir)/ctfout.c \
   $(srcdir)/tree-vect-generic.c \
   $(srcdir)/dojump.c $(srcdir)/emit-rtl.h \
   $(srcdir)/emit-rtl.c $(srcdir)/except.h $(srcdir)/explow.c $(srcdir)/expr.c \
diff --git a/gcc/cgraphunit.c b/gcc/cgraphunit.c
index f4d6688..2873d97 100644
--- a/gcc/cgraphunit.c
+++ b/gcc/cgraphunit.c
@@ -205,6 +205,7 @@ along with GCC; see the file COPYING3.  If not see
 #include "lto-section-names.h"
 #include "stringpool.h"
 #include "attribs.h"
+#include "ctfout.h"
 
 /* Queue of cgraph nodes scheduled to be added into cgraph.  This is a
secondary queue used during optimization to accommodate passes that
@@ -2844,17 +2845,22 @@ symbol_table::finalize_compilation_unit (void)
 
   if (!seen_error ())
 {
-  /* Emit early debug for reachable functions, and by consequence,
-locally scoped symbols.  */
+  /* Emit early debug and ctf debug info for reachable functions, and by
+consequence, locally scoped symbols.  */
   struct cgraph_node *cnode;
   FOR_EACH_FUNCTION_WITH_GIMPLE_BODY (cnode)
-   (*debug_hooks->early_global_decl) (cnode->decl);
+   {
+ (*debug_hooks->early_global_decl) (cnode->decl);
+ ctf_early_global_decl (cnode->decl);
+   }
 
   /* Clean up anything that needs cleaning up after initial debug
 generation.  */
   debuginfo_early_start ();
   (*debug_hooks->early_finish) (main_input_filename);
+  ctf_early_finish (main_input_filename);
   debuginfo_early_stop ();
+
 }
 
   /* Finally drive the pass manager.  */
diff --git a/gcc/ctfout.c b/gcc/ctfout.c
new file mode 100644
index 000..debb384
--- /dev/null
+++ b/gcc/ctfout.c
@@ -0,0 +1,163 @@
+/* Output ctf format from GCC.
+   Copyright (C) 2019 Free Software Foundation, Inc.
+
+This file is part of GCC.
+
+GCC is free software; you can redistrib

[PATCH,RFC,V2 2/3] Add CTF command line options : -gtLEVEL

2019-06-12 Thread Indu Bhagat
-gtLEVEL is used to request CTF debug information and also to specify how much
CTF debug information.

[Changes from V1]
  None

gcc/ChangeLog:
 
* common.opt: Add CTF debug info options.
* doc/invoke.texi: Document the CTF debug info options.
* flag-types.h (enum ctf_debug_info_levels): New enum.
* opts.c (common_handle_option): New Function.
(set_ctf_debug_level): Handle the new CTF debug info options.

---
 gcc/ChangeLog   |  8 
 gcc/common.opt  |  9 +
 gcc/doc/invoke.texi | 16 
 gcc/flag-types.h| 13 +
 gcc/opts.c  | 26 ++
 5 files changed, 72 insertions(+)

diff --git a/gcc/common.opt b/gcc/common.opt
index e140416..499f27c 100644
--- a/gcc/common.opt
+++ b/gcc/common.opt
@@ -125,6 +125,11 @@ enum debug_info_levels debug_info_level = DINFO_LEVEL_NONE
 Variable
 bool use_gnu_debug_info_extensions
 
+; Level of CTF debugging information we are producing.  See flag-types.h
+; for the definitions of the different possible levels.
+Variable
+enum ctf_debug_info_levels ctf_debug_info_level = CTFINFO_LEVEL_NONE
+
 ; Original value of maximum field alignment in bytes, specified via
 ; -fpack-struct=.
 Variable
@@ -2995,6 +3000,10 @@ gcolumn-info
 Common Driver Var(debug_column_info,1) Init(1)
 Record DW_AT_decl_column and DW_AT_call_column in DWARF.
 
+gt
+Common Driver RejectNegative JoinedOrMissing
+Generate CTF debug information at default level.
+
 gdwarf
 Common Driver JoinedOrMissing Negative(gdwarf-)
 Generate debug information in default version of DWARF format.
diff --git a/gcc/doc/invoke.texi b/gcc/doc/invoke.texi
index 1520b2c..babf037 100644
--- a/gcc/doc/invoke.texi
+++ b/gcc/doc/invoke.texi
@@ -374,6 +374,7 @@ Objective-C and Objective-C++ Dialects}.
 @item Debugging Options
 @xref{Debugging Options,,Options for Debugging Your Program}.
 @gccoptlist{-g  -g@var{level}  -gdwarf  -gdwarf-@var{version} @gol
+-gt  -gt@var{level} @gol
 -ggdb  -grecord-gcc-switches  -gno-record-gcc-switches @gol
 -gstabs  -gstabs+  -gstrict-dwarf  -gno-strict-dwarf @gol
 -gas-loc-support  -gno-as-loc-support @gol
@@ -7784,6 +7785,21 @@ other DWARF-related options such as
 @option{-fno-dwarf2-cfi-asm}) retain a reference to DWARF Version 2
 in their names, but apply to all currently-supported versions of DWARF.
 
+@item -gt
+@itemx -gt@var{level}
+@opindex gt
+Request CTF debug information and use level to specify how much CTF debug
+information should be produced.  If -gt is specified without a value for level,
+the default level of CTF debug information is 2.
+
+Level 0 produces no CTF debug information at all.  Thus, -gt0 negates -gt.
+
+Level 1 produces CTF information for tracebacks only.  This includes callsite
+information, but does not include type information.
+
+Level 2 produces type information for entities (functions, data objects etc.)
+at file-scope or global-scope only.
+
 @item -gstabs
 @opindex gstabs
 Produce debugging information in stabs format (if that is supported),
diff --git a/gcc/flag-types.h b/gcc/flag-types.h
index a210328..61a1432 100644
--- a/gcc/flag-types.h
+++ b/gcc/flag-types.h
@@ -105,6 +105,19 @@ enum dwarf_gnat_encodings
   Emit GNAT encodings for the rest.  */
 };
 
+/* CTF debug info levels.
+   CTF debug info levels are untied with DWARF debug info levels because CTF
+   may co-exist with DWARF.  */
+enum ctf_debug_info_levels
+{
+  CTFINFO_LEVEL_NONE = 0, /* Write no CTF debug info.  */
+  CTFINFO_LEVEL_TERSE = 1,/* Write CTF information to support tracebacks
+only.  Not Implemented.  */
+  CTFINFO_LEVEL_NORMAL = 2/* Write CTF type information for all entities
+(functions, data objects, variables etc.)
+at file-scope or global-scope only.  */
+};
+
 /* Enumerate Objective-c instance variable visibility settings. */
 
 enum ivar_visibility
diff --git a/gcc/opts.c b/gcc/opts.c
index 64f94ac..a471a76 100644
--- a/gcc/opts.c
+++ b/gcc/opts.c
@@ -195,6 +195,8 @@ static void set_debug_level (enum debug_info_type type, int 
extended,
 const char *arg, struct gcc_options *opts,
 struct gcc_options *opts_set,
 location_t loc);
+static void set_ctf_debug_level (const char *arg,
+struct gcc_options *opts, location_t loc);
 static void set_fast_math_flags (struct gcc_options *opts, int set);
 static void decode_d_option (const char *arg, struct gcc_options *opts,
 location_t loc, diagnostic_context *dc);
@@ -2683,6 +2685,10 @@ common_handle_option (struct gcc_options *opts,
   opts->x_flag_stack_usage_info = value != 0;
   break;
 
+case OPT_gt:
+  set_ctf_debug_level (arg, opts, loc);
+  break;
+
 case OPT_g:
   set_debug_level (NO_DEBUG, DEFAULT_GDB_EXTENS

[PATCH,RFC,V2 0/3] Support for CTF in GCC

2019-06-12 Thread Indu Bhagat
Hello,

Thanks for the feedback on the previous patch set.

This is the second posting of the RFC patch for CTF support in GCC. This patch
set does not rely on debug hooks, but it keeps CTF and DWARF debug info
generation separated in the compiler.

For CTF generation, callsites in symbol_table::finalize_compilation_unit and
rest_of_decl_compilation are used. For CTF emission, callsite in
symbol_table::finalize_compilation_unit is used.

Summary of the GCC RFC V2 patch set :
Patch 1 and Patch 2 have remain unchanged since V1.
Patch 1 is a simple addition of a new function lang_GNU_GIMPLE to check for
GIMPLE frontend.
Patch 2 and Patch 3 set up the framework for CTF support in GCC :
-- Patch 2 adds the new command line option for generating CTF. CTF generation
   is enabled in the compiler by specifying an explicit -gt or
   -gtLEVEL[LEVEL=1,2] :

-gtLEVEL

This is used to request CTF debug information and to specify how much CTF
debug information, LEVEL[=0,1,2] can be specified. If -gt is specified
(with no LEVEL), the default value of LEVEL is 2.

-gt0 (Level 0) produces no CTF debug information at all. Thus, -gt0
negates -gt.

-gt1 (Level 1) produces CTF information for tracebacks only. This includes
CTF callsite information, but does not include type information for other
entities.

-gt2 (Level 2) produces type information for entities (functions, variables
etc.) at file-scope or global-scope only. This level of information can be
used by dynamic tracers like DTrace.

--  Patch 3 initializes the CTF container if user-level option for CTF
generation is specified. CTF is generated for all to-be-emitted global
decls if gtLEVEL of 2 is specified. 

Tested on x86_64-linux and sparc64-linux.

Thanks

Indu Bhagat (3):
  Add new function lang_GNU_GIMPLE
  Add CTF command line options : -gtLEVEL
  Setup for CTF generation and emission

 gcc/ChangeLog   |  27 ++
 gcc/Makefile.in |   3 +
 gcc/cgraphunit.c|  12 +-
 gcc/common.opt  |   9 +
 gcc/ctfout.c| 163 
 gcc/ctfout.h|  52 +++
 gcc/doc/invoke.texi |  16 +
 gcc/flag-types.h|  13 +
 gcc/gengtype.c  |   4 +-
 gcc/langhooks.c |   9 +
 gcc/langhooks.h |   1 +
 gcc/opts.c  |  26 ++
 gcc/passes.c|   7 +-
 gcc/testsuite/ChangeLog |   7 +
 gcc/testsuite/gcc.dg/debug/ctf/ctf-1.c  |   6 +
 gcc/testsuite/gcc.dg/debug/ctf/ctf-preamble-1.c |  11 +
 gcc/testsuite/gcc.dg/debug/ctf/ctf.exp  |  41 ++
 gcc/testsuite/gcc.dg/debug/dwarf2-ctf-1.c   |   7 +
 gcc/toplev.c|  18 +
 include/ChangeLog   |   4 +
 include/ctf.h   | 483 
 21 files changed, 913 insertions(+), 6 deletions(-)
 create mode 100644 gcc/ctfout.c
 create mode 100644 gcc/ctfout.h
 create mode 100644 gcc/testsuite/gcc.dg/debug/ctf/ctf-1.c
 create mode 100644 gcc/testsuite/gcc.dg/debug/ctf/ctf-preamble-1.c
 create mode 100644 gcc/testsuite/gcc.dg/debug/ctf/ctf.exp
 create mode 100644 gcc/testsuite/gcc.dg/debug/dwarf2-ctf-1.c
 create mode 100644 include/ctf.h

-- 
1.8.3.1



[PATCH,RFC,V2 1/3] Add new function lang_GNU_GIMPLE

2019-06-12 Thread Indu Bhagat
[Changes from V1]
  None

gcc/ChangeLog:

* langhooks.c (lang_GNU_GIMPLE): New Function.
* langhooks.h: New Prototype.

---
 gcc/ChangeLog   | 5 +
 gcc/langhooks.c | 9 +
 gcc/langhooks.h | 1 +
 3 files changed, 15 insertions(+)

diff --git a/gcc/langhooks.c b/gcc/langhooks.c
index 2df97f2..f3a64c1 100644
--- a/gcc/langhooks.c
+++ b/gcc/langhooks.c
@@ -825,3 +825,12 @@ lang_GNU_OBJC (void)
 {
   return strncmp (lang_hooks.name, "GNU Objective-C", 15) == 0;
 }
+
+/* Returns true if the current lang_hooks represents the GNU GIMPLE
+   frontend.  */
+
+bool
+lang_GNU_GIMPLE (void)
+{
+  return strncmp (lang_hooks.name, "GNU GIMPLE", 10) == 0;
+}
diff --git a/gcc/langhooks.h b/gcc/langhooks.h
index a45579b..0ac794e 100644
--- a/gcc/langhooks.h
+++ b/gcc/langhooks.h
@@ -570,5 +570,6 @@ extern bool lang_GNU_C (void);
 extern bool lang_GNU_CXX (void);
 extern bool lang_GNU_Fortran (void);
 extern bool lang_GNU_OBJC (void);
+extern bool lang_GNU_GIMPLE (void);
 
 #endif /* GCC_LANG_HOOKS_H */
-- 
1.8.3.1



Re: [PATCH] Do not warn with warn_unused_result for alloca(0).

2019-06-12 Thread Martin Sebor

On 6/12/19 10:40 AM, Jakub Jelinek wrote:

On Wed, Jun 12, 2019 at 10:13:57AM -0600, Martin Sebor wrote:

But GCC doesn't support such an implementation, does it?


Why would that be relevant?


Obviously because it makes no sense to cater to all conceivable
extensions provided by all sorts of implementations out there.

There are libc implementations out there that accept null pointer
arguments to functions like memcpy with zero sizes, for example.
Or those that accept overlapping objects in calls to strcpy.  GCC
itself handles those gracefully, yet warns for such constructs
nonetheless.  It's useful because those misuses are likely hidden
bugs, even if they don't always manifest themselves.


The warning would cause people to make portable
code less portable (by removing the alloca (0) calls that were added there
for portability reasons), or add hacks to work around the warning (whether
#pragma GCC diagnostic or something else).  That is something we do not
want people to do.


You asked for input and I gave it to you.  If your mind was already
made up and you're only willing to accept feedback that agrees with
your view, don't ask.

Martin


Re: [PATCH] Do not warn with warn_unused_result for alloca(0).

2019-06-12 Thread Jakub Jelinek
On Wed, Jun 12, 2019 at 10:13:57AM -0600, Martin Sebor wrote:
> But GCC doesn't support such an implementation, does it?

Why would that be relevant?  The warning would cause people to make portable
code less portable (by removing the alloca (0) calls that were added there
for portability reasons), or add hacks to work around the warning (whether
#pragma GCC diagnostic or something else).  That is something we do not
want people to do.

Jakub


Re: [PATCH] netbsd EABI support

2019-06-12 Thread coypu
I think copyright assignment is done. Thanks for bearing with me.

I noticed the version I submitted in April is missing some changes we
discussed on October 2018.

I took the patch from then and removed -matpcs too, the unnecessary
change to libgcc t-netbsd (which is the OABI configuration anyway), and
some whitespace git warned about.

Added the change to libatomic ifunc usage, since we recently claim ifunc
support on netbsd.
(Got lost in https://gcc.gnu.org/ml/gcc-patches/2019-04/msg00290.html -
that one is my fault for submitting patches badly)


Matt Thomas 
matthew green 
Nick Hudson 
Maya Rashish 

gcc/ChangeLog:

* config.gcc (arm*-*-netbsdelf*) Add support for EABI configuration
* config.host (arm*-*-netbsd*): Build driver-arm.o
* config/arm/netbsd-eabi.h: New file.
* config/arm/netbsd-elf.h: Don't pass -matpcs unconditionally.
* config/netbsd-elf.h: Define SUBTARGET_EXTRA_SPECS.

libgcc/ChangeLog:

* config.host (arm*-*-netbsdelf*): Add support for EABI configuration
* config/arm/t-netbsd: LIB1ASMFUNCS: Append to existing set.
 HOST_LIBGCC2_CFLAGS: workaround possible bug
* config/arm/t-netbsd-eabi: New file.

libatomic/ChangeLog:
* configure.tgt: Exclude arm*-*-netbsd* from try_ifunc.



---
 gcc/config.gcc  | 29 +-
 gcc/config.host |  2 +-
 gcc/config/arm/netbsd-eabi.h| 97 +
 gcc/config/arm/netbsd-elf.h |  3 +-
 gcc/config/netbsd-elf.h | 14 +
 libatomic/configure.tgt |  2 +-
 libgcc/config.host  | 11 +++-
 libgcc/config/arm/t-netbsd  |  8 +++
 libgcc/config/arm/t-netbsd-eabi | 18 ++
 9 files changed, 177 insertions(+), 7 deletions(-)
 create mode 100644 gcc/config/arm/netbsd-eabi.h
 create mode 100644 libgcc/config/arm/t-netbsd-eabi

diff --git a/gcc/config.gcc b/gcc/config.gcc
index 6b00c387247..9fe57f4c7de 100644
--- a/gcc/config.gcc
+++ b/gcc/config.gcc
@@ -1159,10 +1159,33 @@ arm*-*-freebsd*)# ARM FreeBSD EABI
with_tls=${with_tls:-gnu}
;;
 arm*-*-netbsdelf*)
-   tm_file="dbxelf.h elfos.h ${nbsd_tm_file} arm/elf.h arm/aout.h 
${tm_file} arm/netbsd-elf.h"
-   extra_options="${extra_options} netbsd.opt netbsd-elf.opt"
-   tmake_file="${tmake_file} arm/t-arm"
target_cpu_cname="strongarm"
+   tmake_file="${tmake_file} arm/t-arm"
+   tm_file="dbxelf.h elfos.h ${nbsd_tm_file} arm/elf.h"
+   extra_options="${extra_options} netbsd.opt netbsd-elf.opt"
+   case ${target} in
+   arm*eb-*) tm_defines="${tm_defines} TARGET_BIG_ENDIAN_DEFAULT=1" ;;
+   esac
+   case ${target} in
+   arm*-*-netbsdelf-*eabi*)
+   tm_file="$tm_file arm/bpabi.h arm/netbsd-elf.h arm/netbsd-eabi.h"
+   tmake_file="$tmake_file arm/t-bpabi arm/t-netbsdeabi"
+   ;;
+   *)
+   tm_file="$tm_file arm/netbsd-elf.h"
+   tmake_file="$tmake_file arm/t-netbsd"
+   ;;
+   esac
+   tm_file="${tm_file} arm/aout.h arm/arm.h"
+   case ${target} in
+   arm*-*-netbsdelf-*eabihf*)
+   tm_defines="${tm_defines} 
TARGET_DEFAULT_FLOAT_ABI=ARM_FLOAT_ABI_HARD"
+   ;;
+   esac
+   case ${target} in
+   armv6*) target_cpu_cname="arm1176jzf-s";;
+   armv7*) target_cpu_cname="generic-armv7-a";;
+   esac
;;
 arm*-*-linux-*)# ARM GNU/Linux with ELF
tm_file="dbxelf.h elfos.h gnu-user.h linux.h linux-android.h 
glibc-stdint.h arm/elf.h arm/linux-gas.h arm/linux-elf.h"
diff --git a/gcc/config.host b/gcc/config.host
index 2213404dd0e..82409e32f96 100644
--- a/gcc/config.host
+++ b/gcc/config.host
@@ -107,7 +107,7 @@ case ${host} in
;;
 esac
 ;;
-  arm*-*-freebsd* | arm*-*-linux* | arm*-*-fuchsia*)
+  arm*-*-freebsd* | arm*-*-netbsd* | arm*-*-linux* | arm*-*-fuchsia*)
 case ${target} in
   arm*-*-*)
host_extra_gcc_objs="driver-arm.o"
diff --git a/gcc/config/arm/netbsd-eabi.h b/gcc/config/arm/netbsd-eabi.h
new file mode 100644
index 000..5cbfcc92a59
--- /dev/null
+++ b/gcc/config/arm/netbsd-eabi.h
@@ -0,0 +1,97 @@
+/* Definitions of target machine for GNU compiler, NetBSD/arm ELF version.
+   Copyright (C) 2002, 2003, 2004, 2005, 2007 Free Software Foundation, Inc.
+   Contributed by Wasabi Systems, Inc.
+
+   This file is part of GCC.
+
+   GCC is free software; you can redistribute it and/or modify it
+   under the terms of the GNU General Public License as published
+   by the Free Software Foundation; either version 3, or (at your
+   option) any later version.
+
+   GCC is distributed in the hope that it will be useful, but WITHOUT
+   ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
+   or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public
+   License for more details.
+
+   You should have received a copy of the GNU General Public License
+   along 

Re: [PATCH] Do not warn with warn_unused_result for alloca(0).

2019-06-12 Thread Martin Sebor

On 6/12/19 9:25 AM, Michael Matz wrote:

Hi,

On Wed, 12 Jun 2019, Martin Sebor wrote:


Otherwise LGTM as the patch, but I'd like to hear from others whether
it is kosher to add such a special case to the warn_unused_result
attribute warning.  And if the agreement is yes, I think it should be
documented somewhere that alloca (0) will not warn even when the call
has such an attribute (probably in the description of
warn_unused_result attribute).


I'm not very happy about adding another special case to alloca
(on top of not diagnosing zero allocation by -Walloc-zero).
There is no valid use case for the zero argument, whether or not
the return value is used.


That's the thing, there _is_ a valid use case for supplying a zero
argument and then the returned value should _not_ be used.  There are
alloca implementations that do something (freeing memory) when
called with a zero size, so some (older) programs contain such calls.
Warning on those calls for the unused results is exactly the wrong thing
to do, if anything if the result is used we'd have to warn.  (That's of
course non-standard, but so is alloca itself)  And just removing these
calls isn't correct either except if it's ensured to not use an alloca
implementation with that behaviour.


But GCC doesn't support such an implementation, does it?  The only
way to use such an alloca is with -fno-builtin-alloca which should
suppress the warning.

The Linux man page highlights this and the risks of defining one's
own alloca function:

  http://man7.org/linux/man-pages/man3/alloca.3.html

In any event, the warning, just like all others, exists to help
catch common mistakes: "constructions that are not inherently
erroneous but that are risky or suggest there may have been
an error".  It's not meant to accommodate every conceivable
corner case or oddball implementation.  Users of those can
easily disable the warning #pragma GCC diagnostic.  Doing that
makes the intent explicit both to the compiler and to other
tools and programmers.

Martin



(In fact I think our builtin_alloca implementation could benefit when we
added that behaviour as well; it's a natural wish to be able to free
memory that you allocated).


Ciao,
Michael.





Re: [PATCH] Do not warn with warn_unused_result for alloca(0).

2019-06-12 Thread Michael Matz
Hi,

On Wed, 12 Jun 2019, Martin Sebor wrote:

> > Otherwise LGTM as the patch, but I'd like to hear from others whether 
> > it is kosher to add such a special case to the warn_unused_result 
> > attribute warning.  And if the agreement is yes, I think it should be 
> > documented somewhere that alloca (0) will not warn even when the call 
> > has such an attribute (probably in the description of 
> > warn_unused_result attribute).
> 
> I'm not very happy about adding another special case to alloca
> (on top of not diagnosing zero allocation by -Walloc-zero).
> There is no valid use case for the zero argument, whether or not
> the return value is used.

That's the thing, there _is_ a valid use case for supplying a zero 
argument and then the returned value should _not_ be used.  There are 
alloca implementations that do something (freeing memory) when 
called with a zero size, so some (older) programs contain such calls.  
Warning on those calls for the unused results is exactly the wrong thing 
to do, if anything if the result is used we'd have to warn.  (That's of 
course non-standard, but so is alloca itself)  And just removing these 
calls isn't correct either except if it's ensured to not use an alloca 
implementation with that behaviour.

(In fact I think our builtin_alloca implementation could benefit when we 
added that behaviour as well; it's a natural wish to be able to free 
memory that you allocated).


Ciao,
Michael.


[Vectorizer] Support masking fold left reductions

2019-06-12 Thread Alejandro Martinez Vicente
Hi,

This patch adds support in the vectorizer for masking fold left reductions.
This avoids the need to insert a conditional assignment with some identity
value.

For example, this C code:

double
f (double *restrict x, int n)
{
  double res = 0.0;
  for (int i = 0; i < n; i++)
{
  res += x[i];
}
  return res;
}

Produced this for SVE:

 :
   0:   2f00e400movid0, #0x0
   4:   713fcmp w1, #0x0
   8:   5400018db.le38 
   c:   d282mov x2, #0x0// #0
  10:   93407c21sxtwx1, w1
  14:   25f8c002mov z2.d, #0
  18:   25e11fe0whilelo p0.d, xzr, x1
  1c:   25d8e3e1ptrue   p1.d
  20:   a5e24001ld1d{z1.d}, p0/z, [x0, x2, lsl #3]
  24:   04f0e3e2incdx2
  28:   05e2c021sel z1.d, p0, z1.d, z2.d
  2c:   25e11c40whilelo p0.d, x2, x1
  30:   65d82420fadda   d0, p1, d0, z1.d
  34:   5461b.ne20   // b.any
  38:   d65f03c0ret

And now I get this:

 :
   0:   2f00e400movid0, #0x0
   4:   713fcmp w1, #0x0
   8:   5400012db.le2c 
   c:   d282mov x2, #0x0// #0
  10:   93407c21sxtwx1, w1
  14:   25e11fe0whilelo p0.d, xzr, x1
  18:   a5e24001ld1d{z1.d}, p0/z, [x0, x2, lsl #3]
  1c:   04f0e3e2incdx2
  20:   65d82020fadda   d0, p0, d0, z1.d
  24:   25e11c40whilelo p0.d, x2, x1
  28:   5481b.ne18   // b.any
  2c:   d65f03c0ret

I've added a new test and run the regression testing. Ok for trunk?

Alejandro

2019-06-12  Alejandro Martinez  

gcc/
* config/aarch64/aarch64-sve.md (mask_fold_left_plus_): Renamed
from "*fold_left_plus_", updated operands order.
* doc/md.texi (mask_fold_left_plus_@var{m}): Documented new optab.
* internal-fn.c (mask_fold_left_direct): New define.
(expand_mask_fold_left_optab_fn): Likewise.
(direct_mask_fold_left_optab_supported_p): Likewise.
* internal-fn.def (MASK_FOLD_LEFT_PLUS): New internal function.
* optabs.def (mask_fold_left_plus_optab): New optab.
* tree-vect-loop.c (mask_fold_left_plus_optab): New function to get a
masked internal_fn for a reduction ifn.
(vectorize_fold_left_reduction): Add support for masking reductions.

gcc/testsuite/
* gcc.target/aarch64/sve/fadda_1.c: New test.


mask_fold_left_v3.patch
Description: mask_fold_left_v3.patch


[committed][nvptx] Assert fork has at most one join in nvptx_find_par

2019-06-12 Thread Tom de Vries
Hi,

With the test-case of PR90009 we run into a SIGSEGV in nvptx_neuter_pars,
because par->join_insn and par->join_block are NULL.

Detect this problem earlier, by adding an assert in nvptx_find_par that
asserts that a fork cannot be paired with more than one join.

Build and tested on x86_64 with nvptx accelerator enabled.

Committed to trunk.

Thanks,
- Tom

[nvptx] Assert fork has at most one join in nvptx_find_par

2019-06-12  Tom de Vries  

PR tree-optimization/90009
* config/nvptx/nvptx.c (nvptx_find_par): Assert fork has at most join.

---
 gcc/config/nvptx/nvptx.c | 1 +
 1 file changed, 1 insertion(+)

diff --git a/gcc/config/nvptx/nvptx.c b/gcc/config/nvptx/nvptx.c
index a28099ac89d..1e34be18f75 100644
--- a/gcc/config/nvptx/nvptx.c
+++ b/gcc/config/nvptx/nvptx.c
@@ -3258,6 +3258,7 @@ nvptx_find_par (bb_insn_map_t *map, parallel *par, 
basic_block block)
unsigned mask = UINTVAL (XVECEXP (PATTERN (end), 0, 0));
 
gcc_assert (par->mask == mask);
+   gcc_assert (par->join_block == NULL);
par->join_block = block;
par->join_insn = end;
if (nvptx_needs_shared_bcast (mask))


Re: [PATCH] Do not warn with warn_unused_result for alloca(0).

2019-06-12 Thread Martin Sebor

On 6/12/19 5:37 AM, Jakub Jelinek wrote:

On Wed, Jun 12, 2019 at 01:30:14PM +0200, Martin Liška wrote:

@@ -9447,10 +9448,19 @@ do_warn_unused_result (gimple_seq seq)
  location_t loc = gimple_location (g);
  
  	  if (fdecl)

-   warning_at (loc, OPT_Wunused_result,
-   "ignoring return value of %qD "
-   "declared with attribute %",
-   fdecl);
+   {
+ /* Some C libraries use alloca(0) in order to free previously
+allocated memory by alloca calls.  */
+ if (gimple_maybe_alloca_call_p (g)
+ && gimple_call_num_args (g) == 1
+ && integer_zerop (gimple_call_arg (g, 0)))
+   ;
+ else


Wouldn't it be easier to negate the condition and avoid the weird ; else ?
I.e. if (!gimple_maybe... || gimple_call_num != 1 || !integer_zerop?


+   warning_at (loc, OPT_Wunused_result,
+   "ignoring return value of %qD declared "
+   "with attribute %",
+   fdecl);
+   }
  else
warning_at (loc, OPT_Wunused_result,
"ignoring return value of function "


Otherwise LGTM as the patch, but I'd like to hear from others whether
it is kosher to add such a special case to the warn_unused_result attribute
warning.  And if the agreement is yes, I think it should be documented
somewhere that alloca (0) will not warn even when the call has such an
attribute (probably in the description of warn_unused_result attribute).


I'm not very happy about adding another special case to alloca
(on top of not diagnosing zero allocation by -Walloc-zero).
There is no valid use case for the zero argument, whether or not
the return value is used.  When it is used it's just as dangerous
as allocating a zero-length VLA.  When it isn't used the call is
pointless entirely, and although it might be harmless, it's worth
removing from code just like any other pointless construct GCC
warns about.  So I would prefer not to suppress this warning (from
what I've read in the GDB bug it sounds like its calls to alloca(0)
are being removed).  I would also prefer to re-enable -Walloc-zero
for alloca(0).

But if there is insufficient support for this I agree that
documenting the built-ins GCC applies attribute warn_unused_result
to is a good idea.  (In fact, documenting all the attributes GCC
applies to each built-in would be helpful, but that's a much bigger
project.)

Martin


[PATCH] Improve static_assert messages for std::variant

2019-06-12 Thread Jonathan Wakely

Also fix a warning with -Wunused-parameter -Wsystem-headers.

* include/std/variant (get, get, get_if, get_if)
(variant::emplace): Change static_assert messages from "should be"
to "must be".
(hash::operator()): Remove name of unused parameter.

Tested x86_64-linux, committed to trunk.

commit 387a4a81a220a86a0a23753041191f0fef93f2da
Author: redi 
Date:   Wed Jun 12 14:52:09 2019 +

Improve static_assert messages for std::variant

Also fix a warning with -Wunused-parameter -Wsystem-headers.

* include/std/variant (get, get, get_if, get_if)
(variant::emplace): Change static_assert messages from "should be"
to "must be".
(hash::operator()): Remove name of unused parameter.

git-svn-id: svn+ssh://gcc.gnu.org/svn/gcc/trunk@272188 
138bc75d-0d04-0410-961f-82ee72b054a4

diff --git a/libstdc++-v3/include/std/variant b/libstdc++-v3/include/std/variant
index 60472b4c799..c86b0c8ccf3 100644
--- a/libstdc++-v3/include/std/variant
+++ b/libstdc++-v3/include/std/variant
@@ -1074,7 +1074,7 @@ namespace __variant
 {
   static_assert(__detail::__variant::__exactly_once<_Tp, _Types...>,
"T must occur exactly once in alternatives");
-  static_assert(!is_void_v<_Tp>, "_Tp should not be void");
+  static_assert(!is_void_v<_Tp>, "_Tp must not be void");
   return std::get<__detail::__variant::__index_of_v<_Tp, _Types...>>(__v);
 }
 
@@ -1083,7 +1083,7 @@ namespace __variant
 {
   static_assert(__detail::__variant::__exactly_once<_Tp, _Types...>,
"T must occur exactly once in alternatives");
-  static_assert(!is_void_v<_Tp>, "_Tp should not be void");
+  static_assert(!is_void_v<_Tp>, "_Tp must not be void");
   return std::get<__detail::__variant::__index_of_v<_Tp, _Types...>>(
std::move(__v));
 }
@@ -1093,7 +1093,7 @@ namespace __variant
 {
   static_assert(__detail::__variant::__exactly_once<_Tp, _Types...>,
"T must occur exactly once in alternatives");
-  static_assert(!is_void_v<_Tp>, "_Tp should not be void");
+  static_assert(!is_void_v<_Tp>, "_Tp must not be void");
   return std::get<__detail::__variant::__index_of_v<_Tp, _Types...>>(__v);
 }
 
@@ -1102,7 +1102,7 @@ namespace __variant
 {
   static_assert(__detail::__variant::__exactly_once<_Tp, _Types...>,
"T must occur exactly once in alternatives");
-  static_assert(!is_void_v<_Tp>, "_Tp should not be void");
+  static_assert(!is_void_v<_Tp>, "_Tp must not be void");
   return std::get<__detail::__variant::__index_of_v<_Tp, _Types...>>(
std::move(__v));
 }
@@ -1113,8 +1113,8 @@ namespace __variant
 {
   using _Alternative_type = variant_alternative_t<_Np, variant<_Types...>>;
   static_assert(_Np < sizeof...(_Types),
-   "The index should be in [0, number of alternatives)");
-  static_assert(!is_void_v<_Alternative_type>, "_Tp should not be void");
+   "The index must be in [0, number of alternatives)");
+  static_assert(!is_void_v<_Alternative_type>, "_Tp must not be void");
   if (__ptr && __ptr->index() == _Np)
return std::addressof(__detail::__variant::__get<_Np>(*__ptr));
   return nullptr;
@@ -1127,8 +1127,8 @@ namespace __variant
 {
   using _Alternative_type = variant_alternative_t<_Np, variant<_Types...>>;
   static_assert(_Np < sizeof...(_Types),
-   "The index should be in [0, number of alternatives)");
-  static_assert(!is_void_v<_Alternative_type>, "_Tp should not be void");
+   "The index must be in [0, number of alternatives)");
+  static_assert(!is_void_v<_Alternative_type>, "_Tp must not be void");
   if (__ptr && __ptr->index() == _Np)
return std::addressof(__detail::__variant::__get<_Np>(*__ptr));
   return nullptr;
@@ -1140,7 +1140,7 @@ namespace __variant
 {
   static_assert(__detail::__variant::__exactly_once<_Tp, _Types...>,
"T must occur exactly once in alternatives");
-  static_assert(!is_void_v<_Tp>, "_Tp should not be void");
+  static_assert(!is_void_v<_Tp>, "_Tp must not be void");
   return std::get_if<__detail::__variant::__index_of_v<_Tp, _Types...>>(
  __ptr);
 }
@@ -1151,7 +1151,7 @@ namespace __variant
 {
   static_assert(__detail::__variant::__exactly_once<_Tp, _Types...>,
"T must occur exactly once in alternatives");
-  static_assert(!is_void_v<_Tp>, "_Tp should not be void");
+  static_assert(!is_void_v<_Tp>, "_Tp must not be void");
   return std::get_if<__detail::__variant::__index_of_v<_Tp, _Types...>>(
  __ptr);
 }
@@ -1421,7 +1421,7 @@ namespace __variant
emplace(_Args&&... __args)
{
  static_assert(_Np < sizeof...(_Types),
-   "The index should be i

[PATCH] Simplify std::scoped_lock destructor

2019-06-12 Thread Jonathan Wakely

* include/std/mutex (scoped_lock::~scoped_lock()): Use fold
expression.

Thanks to Lars for pointing out this could be simplified.

Tested x86_64-linux, committed to trunk.

commit 7445abf1ee5e14e644efd0533fff9e0c125b1d2c
Author: redi 
Date:   Wed Jun 12 14:52:06 2019 +

Simplify std::scoped_lock destructor

* include/std/mutex (scoped_lock::~scoped_lock()): Use fold
expression.

git-svn-id: svn+ssh://gcc.gnu.org/svn/gcc/trunk@272187 
138bc75d-0d04-0410-961f-82ee72b054a4

diff --git a/libstdc++-v3/include/std/mutex b/libstdc++-v3/include/std/mutex
index ca2c669db9a..981b6725f7c 100644
--- a/libstdc++-v3/include/std/mutex
+++ b/libstdc++-v3/include/std/mutex
@@ -576,11 +576,7 @@ _GLIBCXX_BEGIN_NAMESPACE_VERSION
   { } // calling thread owns mutex
 
   ~scoped_lock()
-  {
-   std::apply([](_MutexTypes&... __m) {
- char __i[] __attribute__((__unused__)) = { (__m.unlock(), 0)... };
-   }, _M_devices);
-  }
+  { std::apply([](auto&... __m) { (__m.unlock(), ...); }, _M_devices); }
 
   scoped_lock(const scoped_lock&) = delete;
   scoped_lock& operator=(const scoped_lock&) = delete;


[PATCH] Replace std::to_string for integers with optimized version

2019-06-12 Thread Jonathan Wakely

The std::to_chars functions from C++17 can be used to implement
std::to_string with much better performance than calling snprintf. Only
the __detail::__to_chars_len and __detail::__to_chars_10 functions are
needed for to_string, because it always outputs base 10 representations.

The return type of __detail::__to_chars_10 should not be declared before
C++17, so the function body is extracted into a new function that can be
reused by to_string and __detail::__to_chars_10.

The existing tests for to_chars rely on to_string to check for correct
answers. Now that they use the same code that doesn't actually ensure
correctness, so add new tests for std::to_string that compare against
printf output.

* include/Makefile.am: Add new  header.
* include/Makefile.in: Regenerate.
* include/bits/basic_string.h (to_string(int), to_string(unsigned))
(to_string(long), to_string(unsigned long), to_string(long long))
(to_string(unsigned long long)): Rewrite to use __to_chars_10_impl.
* include/bits/charconv.h: New header.
(__detail::__to_chars_len): Move here from .
(__detail::__to_chars_10_impl): New function extracted from
__detail::__to_chars_10.
* include/std/charconv (__cpp_lib_to_chars): Add, but comment out.
(__to_chars_unsigned_type): New class template that reuses
__make_unsigned_selector_base::__select to pick a type.
(__unsigned_least_t): Redefine as __to_chars_unsigned_type::type.
(__detail::__to_chars_len): Move to new header.
(__detail::__to_chars_10): Add inline specifier. Move code doing the
output to __detail::__to_chars_10_impl and call that.
* include/std/version (__cpp_lib_to_chars): Add, but comment out.
* testsuite/21_strings/basic_string/numeric_conversions/char/
to_string.cc: Fix reference in comment. Remove unused variable.
* testsuite/21_strings/basic_string/numeric_conversions/char/
to_string_int.cc: New test.

Tested x86_64-linux, committed to trunk.

commit 00f08bc3d9bdc3c5bc75334dfa780918e7c7ee25
Author: redi 
Date:   Wed Jun 12 14:52:02 2019 +

Replace std::to_string for integers with optimized version

The std::to_chars functions from C++17 can be used to implement
std::to_string with much better performance than calling snprintf. Only
the __detail::__to_chars_len and __detail::__to_chars_10 functions are
needed for to_string, because it always outputs base 10 representations.

The return type of __detail::__to_chars_10 should not be declared before
C++17, so the function body is extracted into a new function that can be
reused by to_string and __detail::__to_chars_10.

The existing tests for to_chars rely on to_string to check for correct
answers. Now that they use the same code that doesn't actually ensure
correctness, so add new tests for std::to_string that compare against
printf output.

* include/Makefile.am: Add new  header.
* include/Makefile.in: Regenerate.
* include/bits/basic_string.h (to_string(int), to_string(unsigned))
(to_string(long), to_string(unsigned long), to_string(long long))
(to_string(unsigned long long)): Rewrite to use __to_chars_10_impl.
* include/bits/charconv.h: New header.
(__detail::__to_chars_len): Move here from .
(__detail::__to_chars_10_impl): New function extracted from
__detail::__to_chars_10.
* include/std/charconv (__cpp_lib_to_chars): Add, but comment out.
(__to_chars_unsigned_type): New class template that reuses
__make_unsigned_selector_base::__select to pick a type.
(__unsigned_least_t): Redefine as __to_chars_unsigned_type::type.
(__detail::__to_chars_len): Move to new header.
(__detail::__to_chars_10): Add inline specifier. Move code doing the
output to __detail::__to_chars_10_impl and call that.
* include/std/version (__cpp_lib_to_chars): Add, but comment out.
* testsuite/21_strings/basic_string/numeric_conversions/char/
to_string.cc: Fix reference in comment. Remove unused variable.
* testsuite/21_strings/basic_string/numeric_conversions/char/
to_string_int.cc: New test.

git-svn-id: svn+ssh://gcc.gnu.org/svn/gcc/trunk@272186 
138bc75d-0d04-0410-961f-82ee72b054a4

diff --git a/libstdc++-v3/include/Makefile.am b/libstdc++-v3/include/Makefile.am
index 92975b1ddc1..742f2c38ad5 100644
--- a/libstdc++-v3/include/Makefile.am
+++ b/libstdc++-v3/include/Makefile.am
@@ -102,6 +102,7 @@ bits_headers = \
${bits_srcdir}/boost_concept_check.h \
${bits_srcdir}/c++0x_warning.h \
${bits_srcdir}/char_traits.h \
+   ${bits_srcdir}/charconv.h \
${bits_srcdir}/codecvt.h \
${bits_srcdir}/concept_check.h \
${bits_srcdir}/cpp_type

[PATCH] Make vectorizer versioning re-use if-conversion versioned loops

2019-06-12 Thread Richard Biener


This avoids loop_version () calls when if-conversion already versioned
the loops and simplifies vect_loop_versioning because we need not
do as much fixup.  There's followup work to do for the profile scaling.
Honza - any suggestion on how to apply a different true/false 
profile to an existing condition and its branches?

I've built SPEC 2006 with this and see 132 loop versions re-used
from 3254 versionings done by the vectorizer (most loops do not
need if-conversion).

Bootstrap / regtest running on x86_64-unknown-linux-gnu.

Richard.

* tree-vectorizer.h (vect_loop_vectorized_call): Declare.
* tree-vectorizer.c (vect_loop_vectorized_call): Export and
also return the condition stmt.
* tree-vect-loop-manip.c (vect_loop_versioning): Reuse the
loop version created by if-conversion instead of versioning
again.

diff --git a/gcc/tree-vect-loop-manip.c b/gcc/tree-vect-loop-manip.c
index b3fae5ba4da..be4b95a14a1 100644
--- a/gcc/tree-vect-loop-manip.c
+++ b/gcc/tree-vect-loop-manip.c
@@ -3032,7 +3032,8 @@ vect_loop_versioning (loop_vec_info loop_vinfo,
 vect_create_cond_for_niters_checks (loop_vinfo, &cond_expr);
 
   if (cond_expr)
-cond_expr = force_gimple_operand_1 (cond_expr, &cond_expr_stmt_list,
+cond_expr = force_gimple_operand_1 (unshare_expr (cond_expr),
+   &cond_expr_stmt_list,
is_gimple_condexpr, NULL_TREE);
 
   if (version_align)
@@ -3076,45 +3077,77 @@ vect_loop_versioning (loop_vec_info loop_vinfo,
  is_gimple_condexpr, NULL_TREE);
   gimple_seq_add_seq (&cond_expr_stmt_list, gimplify_stmt_list);
 
-  initialize_original_copy_tables ();
   if (scalar_loop)
 {
-  edge scalar_e;
-  basic_block preheader, scalar_preheader;
+  gcond *cond;
+  gimple *call;
+  if (!(call = vect_loop_vectorized_call (scalar_loop, &cond)))
+   gcc_unreachable ();
+  condition_bb = gimple_bb (cond);
+  gimple_cond_set_condition_from_tree (cond, cond_expr);
+  update_stmt (cond);
 
-  /* We don't want to scale SCALAR_LOOP's frequencies, we need to
-scale LOOP's frequencies instead.  */
-  nloop = loop_version (scalar_loop, cond_expr, &condition_bb,
-   prob, prob.invert (), prob, prob.invert (), true);
-  scale_loop_frequencies (loop, prob);
-  /* CONDITION_BB was created above SCALAR_LOOP's preheader,
-while we need to move it above LOOP's preheader.  */
-  e = loop_preheader_edge (loop);
-  scalar_e = loop_preheader_edge (scalar_loop);
-  /* The vector loop preheader might not be empty, since new
-invariants could have been created while analyzing the loop.  */
-  gcc_assert (single_pred_p (e->src));
-  gcc_assert (empty_block_p (scalar_e->src)
- && single_pred_p (scalar_e->src));
-  gcc_assert (single_pred_p (condition_bb));
-  preheader = e->src;
-  scalar_preheader = scalar_e->src;
-  scalar_e = find_edge (condition_bb, scalar_preheader);
-  e = single_pred_edge (preheader);
-  redirect_edge_and_branch_force (single_pred_edge (condition_bb),
- scalar_preheader);
-  redirect_edge_and_branch_force (scalar_e, preheader);
-  redirect_edge_and_branch_force (e, condition_bb);
-  set_immediate_dominator (CDI_DOMINATORS, condition_bb,
-  single_pred (condition_bb));
-  set_immediate_dominator (CDI_DOMINATORS, scalar_preheader,
-  single_pred (scalar_preheader));
-  set_immediate_dominator (CDI_DOMINATORS, preheader,
-  condition_bb);
+  if (cond_expr_stmt_list)
+   {
+ cond_exp_gsi = gsi_for_stmt (call);
+ gsi_insert_seq_before (&cond_exp_gsi, cond_expr_stmt_list,
+GSI_SAME_STMT);
+   }
+
+  /* ???  if-conversion uses profile_probability::always () but
+ prob below is profile_probability::likely ().  */
+  nloop = scalar_loop;
+  if (dump_file && (dump_flags & TDF_DETAILS))
+   fprintf (dump_file, "reusing loop version created by if conversion\n");
 }
   else
-nloop = loop_version (loop, cond_expr, &condition_bb,
- prob, prob.invert (), prob, prob.invert (), true);
+{
+  initialize_original_copy_tables ();
+  nloop = loop_version (loop, cond_expr, &condition_bb,
+   prob, prob.invert (), prob, prob.invert (), true);
+  free_original_copy_tables ();
+
+  if (cond_expr_stmt_list)
+   {
+ cond_exp_gsi = gsi_last_bb (condition_bb);
+ gsi_insert_seq_before (&cond_exp_gsi, cond_expr_stmt_list,
+GSI_SAME_STMT);
+   }
+
+  /* Loop versioning violates an assumption we try to maintain during
+vectorization - that the loop exit block has a sin

Re: [PATCH][RFC] Sanitize equals and hash functions in hash-tables.

2019-06-12 Thread Martin Liška
On 6/12/19 2:50 PM, Richard Biener wrote:
> New params should always go to the end 

Ah, sorry, I'll take of that new time.

I've just changed the function signature and I can verify there are no
other calls (tested for --enable-languages=all).

Thanks,
Martin


Re: [PATCH][RFC] Sanitize equals and hash functions in hash-tables.

2019-06-12 Thread Richard Biener
On Wed, Jun 12, 2019 at 1:45 PM Martin Liška  wrote:
>
> On 6/12/19 11:41 AM, Richard Biener wrote:
> > On Wed, Jun 12, 2019 at 11:15 AM Martin Liška  wrote:
> >>
> >> On 6/12/19 10:02 AM, Martin Liška wrote:
> >>> On 6/12/19 9:59 AM, Richard Biener wrote:
>  On Tue, Jun 11, 2019 at 9:02 PM Jason Merrill  wrote:
> >
> > On 6/11/19 9:16 AM, Martin Liška wrote:
> >> On 6/11/19 2:27 PM, Jason Merrill wrote:
> >>> On 6/11/19 3:41 AM, Martin Liška wrote:
>  On 6/10/19 8:21 PM, Jason Merrill wrote:
> > On Mon, Jun 10, 2019 at 3:08 AM Martin Liška  wrote:
> >> On 6/7/19 11:43 PM, Jason Merrill wrote:
> >>> On Fri, Jun 7, 2019 at 8:14 AM Martin Liška  
> >>> wrote:
>  On 6/7/19 2:09 PM, Richard Biener wrote:
> > On Fri, Jun 7, 2019 at 2:03 PM Martin Liška  
> > wrote:
> >> On 6/7/19 10:57 AM, Richard Biener wrote:
> >>> On Mon, Jun 3, 2019 at 3:35 PM Martin Liška  
> >>> wrote:
>  On 6/1/19 12:06 AM, Jeff Law wrote:
> > On 5/22/19 3:13 AM, Martin Liška wrote:
> >> On 5/21/19 1:51 PM, Richard Biener wrote:
> >>> On Tue, May 21, 2019 at 1:02 PM Martin Liška 
> >>>  wrote:
> 
>  On 5/21/19 11:38 AM, Richard Biener wrote:
> > On Tue, May 21, 2019 at 12:07 AM Jeff Law 
> >  wrote:
> >>
> >> On 5/13/19 1:41 AM, Martin Liška wrote:
> >>> On 11/8/18 9:56 AM, Martin Liška wrote:
>  On 11/7/18 11:23 PM, Jeff Law wrote:
> > On 10/30/18 6:28 AM, Martin Liška wrote:
> >> On 10/30/18 11:03 AM, Jakub Jelinek wrote:
> >>> On Mon, Oct 29, 2018 at 04:14:21PM +0100, Martin 
> >>> Liška wrote:
>  +hashtab_chk_error ()
>  +{
>  +  fprintf (stderr, "hash table checking failed: 
>  "
>  +   "equal operator returns true for a 
>  pair "
>  +   "of values with a different hash 
>  value");
> >>> BTW, either use internal_error here, or at least 
> >>> if using fprintf
> >>> terminate with \n, in your recent mail I saw:
> >>> ...different hash valueduring RTL pass: vartrack
> >>>   ^^
> >> Sure, fixed in attached patch.
> >>
> >> Martin
> >>
>  +  gcc_unreachable ();
>  +}
> >>> Jakub
> >>>
> >> 0001-Sanitize-equals-and-hash-functions-in-hash-tables.patch
> >>
> >>   From 0d9c979c845580a98767b83c099053d36eb49bb9 
> >> Mon Sep 17 00:00:00 2001
> >> From: marxin 
> >> Date: Mon, 29 Oct 2018 09:38:21 +0100
> >> Subject: [PATCH] Sanitize equals and hash 
> >> functions in hash-tables.
> >>
> >> ---
> >>gcc/hash-table.h | 40 
> >> +++-
> >>1 file changed, 39 insertions(+), 1 deletion(-)
> >>
> >> diff --git a/gcc/hash-table.h b/gcc/hash-table.h
> >> index bd83345c7b8..694eedfc4be 100644
> >> --- a/gcc/hash-table.h
> >> +++ b/gcc/hash-table.h
> >> @@ -503,6 +503,7 @@ private:
> >>
> >>  value_type *alloc_entries (size_t n 
> >> CXX_MEM_STAT_INFO) const;
> >>  value_type *find_empty_slot_for_expand 
> >> (hashval_t);
> >> +  void verify (const compare_type &comparable, 
> >> hashval_t hash);
> >>  bool too_empty_p (unsigned int);
> >>  void expand ();
> >>  static bool is_deleted (value_type &v)
> >> @@ -882,8 +883,12 @@ hash_table >> Allocator>
> >>  if (insert == INSERT && m_size * 3 <= 
> >> m_n_elements * 4)
> >>expand ();
> 

Re: [PATCH, OpenACC] Add support for gang local storage allocation in shared memory

2019-06-12 Thread Thomas Schwinge
Hi!

First, thanks for picking this up, and improving the patch you inherited.


Then, just a few individual comments, not a complete review.

(As far as I concerned, and as far as relevant, these can be addressed
later, incrementally, of course.)


I understand right that this will address some aspects of PR90115
"OpenACC: predetermined private levels for variables declared in blocks"
(so please mention that one in the ChangeLog updates, and commit log),
but it doesn't address all of these aspects (and see also Cesar's list in
),
and also not yet PR90114 "Predetermined private levels for variables
declared in OpenACC accelerator routines"?


On Fri, 7 Jun 2019 15:08:37 +0100, Julian Brown  wrote:
> --- a/gcc/config/nvptx/nvptx.c
> +++ b/gcc/config/nvptx/nvptx.c

> @@ -5237,6 +5248,10 @@ nvptx_file_end (void)
>  write_shared_buffer (asm_out_file, vector_red_sym,
>vector_red_align, vector_red_size);
>  
> +  if (gangprivate_shared_size)
> +write_shared_buffer (asm_out_file, gangprivate_shared_sym,
> +  gangprivate_shared_align, gangprivate_shared_size);

Curious, what is the reason that we maintain this '__gangprivate_shared'
variable on a per-file basis instead of on a per-function basis (with
names '__gangprivate_shared_[function]', or similar), which should make
it more obvious where each block of '.shared' memory belongs to?


> --- a/gcc/doc/tm.texi
> +++ b/gcc/doc/tm.texi

> +@deftypefn {Target Hook} rtx TARGET_GOACC_EXPAND_ACCEL_VAR (tree @var{var})
> +This hook, if defined, is used by accelerator target back-ends to expand
> +specially handled kinds of VAR_DECL expressions.  A particular use is to
> +place variables with specific attributes inside special accelarator
> +memories.  A return value of NULL indicates that the target does not
> +handle this VAR_DECL, and normal RTL expanding is resumed.
> +@end deftypefn

I guess I'm not terribly happy with the 'goacc.expand_accel_var' name.
Using different "memories" for specially tagged DECLs seems to be a
pretty generic concept (address spaces?), and...

> --- a/gcc/expr.c
> +++ b/gcc/expr.c
> @@ -9974,8 +9974,19 @@ expand_expr_real_1 (tree exp, rtx target, machine_mode 
> tmode,
>exp = SSA_NAME_VAR (ssa_name);
>goto expand_decl_rtl;
>  
> -case PARM_DECL:
>  case VAR_DECL:
> +  /* Allow accel compiler to handle specific cases of variables,
> +  specifically those tagged with the "oacc gangprivate" attribute,
> +  which may be intended to be placed in special memory in GPUs.  */
> +  if (flag_openacc && targetm.goacc.expand_accel_var)
> + {
> +   temp = targetm.goacc.expand_accel_var (exp);
> +   if (temp)
> + return temp;
> + }
> +  /* ... fall through ...  */
> +
> +case PARM_DECL:

... I'm thus confused that there isn't already a generic mechanism
available in GCC, that we can just use instead of adding a new one here?
Thinking about the "address spaces" stuff in 'gcc/target.def' -- or is
that the wrong concept?  (I'm not familiar with all that, and haven't
looked closely.)


> --- a/gcc/omp-low.c
> +++ b/gcc/omp-low.c

> +/* Record vars listed in private clauses in CLAUSES in CTX.  This information
> +   is used to mark up variables that should be made private per-gang.  */
> +
> +static void
> +oacc_record_private_var_clauses (omp_context *ctx, tree clauses)
> +{
> +  for (tree c = clauses; c; c = OMP_CLAUSE_CHAIN (c))
> +if (OMP_CLAUSE_CODE (c) == OMP_CLAUSE_PRIVATE)
> +  {
> + tree decl = OMP_CLAUSE_DECL (c);
> + if (VAR_P (decl) && TREE_ADDRESSABLE (decl))
> +   {
> + ctx->oacc_addressable_var_decls.safe_push (decl);
> + maybe_oacc_gangprivate_vars = true;
> +   }
> +  }
> +}

Are all the relevant variables addressable?  And/or, need only those be
considered?

> +/* Record addressable vars declared in BINDVARS in CTX.  This information is
> +   used to mark up variables that should be made private per-gang.  */
> +
> +static void
> +oacc_record_vars_in_bind (omp_context *ctx, tree bindvars)
> +{
> +  for (tree v = bindvars; v; v = DECL_CHAIN (v))
> +if (VAR_P (v) && TREE_ADDRESSABLE (v))
> +  {
> + ctx->oacc_addressable_var_decls.safe_push (v);
> + maybe_oacc_gangprivate_vars = true;
> +  }
> +}

Likewise.


> +/* Mark addressable variables which are declared implicitly or explicitly as
> +   gang private with a special attribute.  These may need to have their
> +   declarations altered later on in compilation (e.g. in
> +   execute_oacc_device_lower or the backend, depending on how the OpenACC
> +   execution model is implemented on a given target) to ensure that sharing
> +   semantics are correct.  */
> +
> +static void
> +mark_oacc_gangprivate (vec *decls, omp_context *ctx)
> +{
> +  int i;
> +  tree decl;
> +
> +  FOR_EACH_VEC_ELT (*decls, i, decl)
> +{
> +  for (o

Re: [PATCH][RFC] Sanitize equals and hash functions in hash-tables.

2019-06-12 Thread Martin Liška
On 6/12/19 11:41 AM, Richard Biener wrote:
> On Wed, Jun 12, 2019 at 11:15 AM Martin Liška  wrote:
>>
>> On 6/12/19 10:02 AM, Martin Liška wrote:
>>> On 6/12/19 9:59 AM, Richard Biener wrote:
 On Tue, Jun 11, 2019 at 9:02 PM Jason Merrill  wrote:
>
> On 6/11/19 9:16 AM, Martin Liška wrote:
>> On 6/11/19 2:27 PM, Jason Merrill wrote:
>>> On 6/11/19 3:41 AM, Martin Liška wrote:
 On 6/10/19 8:21 PM, Jason Merrill wrote:
> On Mon, Jun 10, 2019 at 3:08 AM Martin Liška  wrote:
>> On 6/7/19 11:43 PM, Jason Merrill wrote:
>>> On Fri, Jun 7, 2019 at 8:14 AM Martin Liška  wrote:
 On 6/7/19 2:09 PM, Richard Biener wrote:
> On Fri, Jun 7, 2019 at 2:03 PM Martin Liška  
> wrote:
>> On 6/7/19 10:57 AM, Richard Biener wrote:
>>> On Mon, Jun 3, 2019 at 3:35 PM Martin Liška  
>>> wrote:
 On 6/1/19 12:06 AM, Jeff Law wrote:
> On 5/22/19 3:13 AM, Martin Liška wrote:
>> On 5/21/19 1:51 PM, Richard Biener wrote:
>>> On Tue, May 21, 2019 at 1:02 PM Martin Liška 
>>>  wrote:

 On 5/21/19 11:38 AM, Richard Biener wrote:
> On Tue, May 21, 2019 at 12:07 AM Jeff Law 
>  wrote:
>>
>> On 5/13/19 1:41 AM, Martin Liška wrote:
>>> On 11/8/18 9:56 AM, Martin Liška wrote:
 On 11/7/18 11:23 PM, Jeff Law wrote:
> On 10/30/18 6:28 AM, Martin Liška wrote:
>> On 10/30/18 11:03 AM, Jakub Jelinek wrote:
>>> On Mon, Oct 29, 2018 at 04:14:21PM +0100, Martin 
>>> Liška wrote:
 +hashtab_chk_error ()
 +{
 +  fprintf (stderr, "hash table checking failed: "
 +   "equal operator returns true for a 
 pair "
 +   "of values with a different hash 
 value");
>>> BTW, either use internal_error here, or at least if 
>>> using fprintf
>>> terminate with \n, in your recent mail I saw:
>>> ...different hash valueduring RTL pass: vartrack
>>>   ^^
>> Sure, fixed in attached patch.
>>
>> Martin
>>
 +  gcc_unreachable ();
 +}
>>> Jakub
>>>
>> 0001-Sanitize-equals-and-hash-functions-in-hash-tables.patch
>>
>>   From 0d9c979c845580a98767b83c099053d36eb49bb9 Mon 
>> Sep 17 00:00:00 2001
>> From: marxin 
>> Date: Mon, 29 Oct 2018 09:38:21 +0100
>> Subject: [PATCH] Sanitize equals and hash functions 
>> in hash-tables.
>>
>> ---
>>gcc/hash-table.h | 40 
>> +++-
>>1 file changed, 39 insertions(+), 1 deletion(-)
>>
>> diff --git a/gcc/hash-table.h b/gcc/hash-table.h
>> index bd83345c7b8..694eedfc4be 100644
>> --- a/gcc/hash-table.h
>> +++ b/gcc/hash-table.h
>> @@ -503,6 +503,7 @@ private:
>>
>>  value_type *alloc_entries (size_t n 
>> CXX_MEM_STAT_INFO) const;
>>  value_type *find_empty_slot_for_expand 
>> (hashval_t);
>> +  void verify (const compare_type &comparable, 
>> hashval_t hash);
>>  bool too_empty_p (unsigned int);
>>  void expand ();
>>  static bool is_deleted (value_type &v)
>> @@ -882,8 +883,12 @@ hash_table> Allocator>
>>  if (insert == INSERT && m_size * 3 <= 
>> m_n_elements * 4)
>>expand ();
>>
>> -  m_searches++;
>> +#if ENABLE_EXTRA_CHECKING
>> +if (insert == INSERT)
>> +  verify (comparable, hash);
>> +#endif
>>>

Re: [PATCH] Do not warn with warn_unused_result for alloca(0).

2019-06-12 Thread Jakub Jelinek
On Wed, Jun 12, 2019 at 01:30:14PM +0200, Martin Liška wrote:
> @@ -9447,10 +9448,19 @@ do_warn_unused_result (gimple_seq seq)
> location_t loc = gimple_location (g);
>  
> if (fdecl)
> - warning_at (loc, OPT_Wunused_result,
> - "ignoring return value of %qD "
> - "declared with attribute %",
> - fdecl);
> + {
> +   /* Some C libraries use alloca(0) in order to free previously
> +  allocated memory by alloca calls.  */
> +   if (gimple_maybe_alloca_call_p (g)
> +   && gimple_call_num_args (g) == 1
> +   && integer_zerop (gimple_call_arg (g, 0)))
> + ;
> +   else

Wouldn't it be easier to negate the condition and avoid the weird ; else ?
I.e. if (!gimple_maybe... || gimple_call_num != 1 || !integer_zerop?

> + warning_at (loc, OPT_Wunused_result,
> + "ignoring return value of %qD declared "
> + "with attribute %",
> + fdecl);
> + }
> else
>   warning_at (loc, OPT_Wunused_result,
>   "ignoring return value of function "

Otherwise LGTM as the patch, but I'd like to hear from others whether
it is kosher to add such a special case to the warn_unused_result attribute
warning.  And if the agreement is yes, I think it should be documented
somewhere that alloca (0) will not warn even when the call has such an
attribute (probably in the description of warn_unused_result attribute).

Jakub


Re: [PATCH] Do not warn with warn_unused_result for alloca(0).

2019-06-12 Thread Martin Liška
On 6/12/19 1:22 PM, Jakub Jelinek wrote:
> On Wed, Jun 12, 2019 at 01:11:09PM +0200, Martin Liška wrote:
>> 2019-06-12  Martin Liska  
>>
>>  * calls.c (special_function_p): Make it global.
>>  * calls.h (special_function_p): Declare.
> 
> Why?

Not needed any longer.

> 
>>  * tree-cfg.c (do_warn_unused_result): Do not
>>  warn for alloca(0).
>> --- a/gcc/tree-cfg.c
>> +++ b/gcc/tree-cfg.c
>> @@ -63,6 +63,7 @@ along with GCC; see the file COPYING3.  If not see
>>  #include "opts.h"
>>  #include "asan.h"
>>  #include "profile.h"
>> +#include "calls.h"
>>  
>>  /* This file contains functions for building the Control Flow Graph (CFG)
>> for a function tree.  */
>> @@ -9447,10 +9448,17 @@ do_warn_unused_result (gimple_seq seq)
>>location_t loc = gimple_location (g);
>>  
>>if (fdecl)
>> -warning_at (loc, OPT_Wunused_result,
>> -"ignoring return value of %qD "
>> -"declared with attribute %",
>> -fdecl);
>> +{
>> +  if ((special_function_p (fdecl, 0) & ECF_MAY_BE_ALLOCA)
> 
> Why not instead gimple_maybe_alloca_call_p (g) ?

Because I was not aware of the function ;)

> On the other side, you want && gimple_call_num_args (g) == 1,

Sure.

> if some alloca call had wrong declaration, you might ICE otherwise.
> 
>> +  && TREE_CODE (gimple_call_arg (g, 0)) == INTEGER_CST
>> +  && wi::to_wide (gimple_call_arg (g, 0)) == 0)
> 
> && integer_zerop (gimple_call_arg (g, 0))
> 
> instead?

Yep.

> 
> Plus you need a comment explaining why we don't warn about alloca with
> constant 0 argument.

Also done.

Is it fine now?
Martin

> 
>   Jakub
> 

>From dfdb688206cadd7fb9450ba005e8aa465ae61f7e Mon Sep 17 00:00:00 2001
From: Martin Liska 
Date: Wed, 12 Jun 2019 12:22:36 +0200
Subject: [PATCH] Do not warn with warn_unused_result for alloca(0).

gcc/ChangeLog:

2019-06-12  Martin Liska  

	* tree-cfg.c (do_warn_unused_result): Do not
	warn for alloca(0) as it's used by some C
	libraries to release previously allocated memory
	via alloca calls.

gcc/testsuite/ChangeLog:

2019-06-12  Martin Liska  

	* gcc.dg/pr78902.c: Add testing of alloca(0).
	* gcc.dg/attr-alloc_size-5.c: Do not warn
	about alloca(0).
---
 gcc/testsuite/gcc.dg/attr-alloc_size-5.c |  2 +-
 gcc/testsuite/gcc.dg/pr78902.c   |  1 +
 gcc/tree-cfg.c   | 18 ++
 3 files changed, 16 insertions(+), 5 deletions(-)

diff --git a/gcc/testsuite/gcc.dg/attr-alloc_size-5.c b/gcc/testsuite/gcc.dg/attr-alloc_size-5.c
index 7aa7cbf0c72..26ee43f87de 100644
--- a/gcc/testsuite/gcc.dg/attr-alloc_size-5.c
+++ b/gcc/testsuite/gcc.dg/attr-alloc_size-5.c
@@ -230,5 +230,5 @@ test_alloca (size_t n)
 {
   extern void* alloca (size_t);
 
-  alloca (0); /* { dg-warning "ignoring return value of '.*' declared with attribute 'warn_unused_result'" } */
+  alloca (0);
 }
diff --git a/gcc/testsuite/gcc.dg/pr78902.c b/gcc/testsuite/gcc.dg/pr78902.c
index 49efc970475..f0f4314d684 100644
--- a/gcc/testsuite/gcc.dg/pr78902.c
+++ b/gcc/testsuite/gcc.dg/pr78902.c
@@ -7,6 +7,7 @@ void foo(void)
  __builtin_malloc (1); /* { dg-warning "ignoring return value of '__builtin_malloc' declared with attribute 'warn_unused_result'" } */
  __builtin_calloc (10, 20); /* { dg-warning "ignoring return value of '__builtin_calloc' declared with attribute 'warn_unused_result'" } */
  __builtin_alloca (10); /* { dg-warning "ignoring return value of '__builtin_alloca' declared with attribute 'warn_unused_result'" } */
+ __builtin_alloca (0);
  __builtin_realloc (ptr, 100); /* { dg-warning "ignoring return value of '__builtin_realloc' declared with attribute 'warn_unused_result'" } */
  __builtin_aligned_alloc (10, 16); /* { dg-warning "ignoring return value of '__builtin_aligned_alloc' declared with attribute 'warn_unused_result'" } */
  __builtin_strdup ("pes"); /* { dg-warning "ignoring return value of '__builtin_strdup' declared with attribute 'warn_unused_result'" } */
diff --git a/gcc/tree-cfg.c b/gcc/tree-cfg.c
index a585efea3d8..0d21f3624d7 100644
--- a/gcc/tree-cfg.c
+++ b/gcc/tree-cfg.c
@@ -63,6 +63,7 @@ along with GCC; see the file COPYING3.  If not see
 #include "opts.h"
 #include "asan.h"
 #include "profile.h"
+#include "calls.h"
 
 /* This file contains functions for building the Control Flow Graph (CFG)
for a function tree.  */
@@ -9447,10 +9448,19 @@ do_warn_unused_result (gimple_seq seq)
 	  location_t loc = gimple_location (g);
 
 	  if (fdecl)
-		warning_at (loc, OPT_Wunused_result,
-			"ignoring return value of %qD "
-			"declared with attribute %",
-			fdecl);
+		{
+		  /* Some C libraries use alloca(0) in order to free previously
+		 allocated memory by alloca calls.  */
+		  if (gimple_maybe_alloca_call_p (g)
+		  && gimple_call_num_args (g) == 1
+		  && integer_zerop (gimple_call_arg (g, 0)))
+		;
+		  else
+		warning

Re: [PATCH] Do not warn with warn_unused_result for alloca(0).

2019-06-12 Thread Jakub Jelinek
On Wed, Jun 12, 2019 at 01:11:09PM +0200, Martin Liška wrote:
> 2019-06-12  Martin Liska  
> 
>   * calls.c (special_function_p): Make it global.
>   * calls.h (special_function_p): Declare.

Why?

>   * tree-cfg.c (do_warn_unused_result): Do not
>   warn for alloca(0).
> --- a/gcc/tree-cfg.c
> +++ b/gcc/tree-cfg.c
> @@ -63,6 +63,7 @@ along with GCC; see the file COPYING3.  If not see
>  #include "opts.h"
>  #include "asan.h"
>  #include "profile.h"
> +#include "calls.h"
>  
>  /* This file contains functions for building the Control Flow Graph (CFG)
> for a function tree.  */
> @@ -9447,10 +9448,17 @@ do_warn_unused_result (gimple_seq seq)
> location_t loc = gimple_location (g);
>  
> if (fdecl)
> - warning_at (loc, OPT_Wunused_result,
> - "ignoring return value of %qD "
> - "declared with attribute %",
> - fdecl);
> + {
> +   if ((special_function_p (fdecl, 0) & ECF_MAY_BE_ALLOCA)

Why not instead gimple_maybe_alloca_call_p (g) ?
On the other side, you want && gimple_call_num_args (g) == 1,
if some alloca call had wrong declaration, you might ICE otherwise.

> +   && TREE_CODE (gimple_call_arg (g, 0)) == INTEGER_CST
> +   && wi::to_wide (gimple_call_arg (g, 0)) == 0)

&& integer_zerop (gimple_call_arg (g, 0))

instead?

Plus you need a comment explaining why we don't warn about alloca with
constant 0 argument.

Jakub


[PATCH] Do not warn with warn_unused_result for alloca(0).

2019-06-12 Thread Martin Liška
On 6/11/19 6:03 PM, Jakub Jelinek wrote:
> On Tue, Jun 11, 2019 at 03:58:27PM +, Michael Matz wrote:
>> On Tue, 11 Jun 2019, Martin Liška wrote:
>>
>>> I see 3 occurrences of the alloca (0) in libiberty/regex.c, but there are 
>>> properly
>>> guarded within:
>>>
>>> # ifdef C_ALLOCA
>>>   alloca (0);
>>> # endif
>>>
>>> and then I noticed 2 more occurrences in gdb that break build right now:
>>>
>>> gdb/regcache.c:  alloca (0);
>>> gdb/top.c:  alloca (0);
>>>
>>> Is it the right approach to remove these 2 in gdb?
>>
>> It's more an indication that the annotation requesting the warning for 
>> unused results is simply overeager (aka wrong) for alloca.  (sure, the 
>> uses in gdb probably could be cleaned up as well, but that doesn't affect 
>> the wrongness of the warning).
> 
> Yeah.  Either we special-case alloca in the warn_unused_result code
> - if the call flags include ECF_MAY_BE_ALLOCA and argument is 0, don't warn,
> or don't add the attribute to alloca, or add yet another attribute that will
> be used for alloca only.
> 
>   Jakub
> 

Ok, I've got a patch for it.

Patch can bootstrap on x86_64-linux-gnu and survives regression tests.

Ready to be installed?
Thanks,
Martin
>From b659c00e54ff3bee736f502e7fa4dc233a814b66 Mon Sep 17 00:00:00 2001
From: Martin Liska 
Date: Wed, 12 Jun 2019 12:22:36 +0200
Subject: [PATCH] Do not warn with warn_unused_result for alloca(0).

gcc/ChangeLog:

2019-06-12  Martin Liska  

	* calls.c (special_function_p): Make it global.
	* calls.h (special_function_p): Declare.
	* tree-cfg.c (do_warn_unused_result): Do not
	warn for alloca(0).

gcc/testsuite/ChangeLog:

2019-06-12  Martin Liska  

	* gcc.dg/pr78902.c: Add testing of alloca(0).
	* gcc.dg/attr-alloc_size-5.c: Do not warn
	about alloca(0).
---
 gcc/calls.c  |  3 +--
 gcc/calls.h  |  1 +
 gcc/testsuite/gcc.dg/attr-alloc_size-5.c |  2 +-
 gcc/testsuite/gcc.dg/pr78902.c   |  1 +
 gcc/tree-cfg.c   | 16 
 5 files changed, 16 insertions(+), 7 deletions(-)

diff --git a/gcc/calls.c b/gcc/calls.c
index c8a42680041..be9b2ff046a 100644
--- a/gcc/calls.c
+++ b/gcc/calls.c
@@ -153,7 +153,6 @@ static void compute_argument_addresses (struct arg_data *, rtx, int);
 static rtx rtx_for_function_call (tree, tree);
 static void load_register_parameters (struct arg_data *, int, rtx *, int,
   int, int *);
-static int special_function_p (const_tree, int);
 static int check_sibcall_argument_overlap_1 (rtx);
 static int check_sibcall_argument_overlap (rtx_insn *, struct arg_data *, int);
 
@@ -578,7 +577,7 @@ emit_call_1 (rtx funexp, tree fntree ATTRIBUTE_UNUSED, tree fndecl ATTRIBUTE_UNU
Set ECF_MAY_BE_ALLOCA for any memory allocation function that might allocate
space from the stack such as alloca.  */
 
-static int
+int
 special_function_p (const_tree fndecl, int flags)
 {
   tree name_decl = DECL_NAME (fndecl);
diff --git a/gcc/calls.h b/gcc/calls.h
index 128bb513074..0d2bc888b26 100644
--- a/gcc/calls.h
+++ b/gcc/calls.h
@@ -42,5 +42,6 @@ extern tree get_attr_nonstring_decl (tree, tree * = NULL);
 extern void maybe_warn_nonstring_arg (tree, tree);
 extern bool get_size_range (tree, tree[2], bool = false);
 extern rtx rtx_for_static_chain (const_tree, bool);
+extern int special_function_p (const_tree, int);
 
 #endif // GCC_CALLS_H
diff --git a/gcc/testsuite/gcc.dg/attr-alloc_size-5.c b/gcc/testsuite/gcc.dg/attr-alloc_size-5.c
index 7aa7cbf0c72..26ee43f87de 100644
--- a/gcc/testsuite/gcc.dg/attr-alloc_size-5.c
+++ b/gcc/testsuite/gcc.dg/attr-alloc_size-5.c
@@ -230,5 +230,5 @@ test_alloca (size_t n)
 {
   extern void* alloca (size_t);
 
-  alloca (0); /* { dg-warning "ignoring return value of '.*' declared with attribute 'warn_unused_result'" } */
+  alloca (0);
 }
diff --git a/gcc/testsuite/gcc.dg/pr78902.c b/gcc/testsuite/gcc.dg/pr78902.c
index 49efc970475..f0f4314d684 100644
--- a/gcc/testsuite/gcc.dg/pr78902.c
+++ b/gcc/testsuite/gcc.dg/pr78902.c
@@ -7,6 +7,7 @@ void foo(void)
  __builtin_malloc (1); /* { dg-warning "ignoring return value of '__builtin_malloc' declared with attribute 'warn_unused_result'" } */
  __builtin_calloc (10, 20); /* { dg-warning "ignoring return value of '__builtin_calloc' declared with attribute 'warn_unused_result'" } */
  __builtin_alloca (10); /* { dg-warning "ignoring return value of '__builtin_alloca' declared with attribute 'warn_unused_result'" } */
+ __builtin_alloca (0);
  __builtin_realloc (ptr, 100); /* { dg-warning "ignoring return value of '__builtin_realloc' declared with attribute 'warn_unused_result'" } */
  __builtin_aligned_alloc (10, 16); /* { dg-warning "ignoring return value of '__builtin_aligned_alloc' declared with attribute 'warn_unused_result'" } */
  __builtin_strdup ("pes"); /* { dg-warning "ignoring return value of '__builtin_strdup' declared with attribute 'warn_unused_result'" } */
diff --git a/gcc/tree-cfg.c b/gcc/tree-cfg.c
index a585efea3d

Re: [PATCH][C++] Fix PR90801

2019-06-12 Thread Richard Biener
On Tue, 11 Jun 2019, Jason Merrill wrote:

> On Tue, Jun 11, 2019, 6:45 AM Richard Biener  wrote:
> 
> >
> > The following fixes the documented(!) quadraticness in
> > split_nonconstant_init_1 by simply marking to be removed
> > constructor elements and doing that in a second run over
> > the constructor.
> >
> > More micro-optimizing would be possible by recording the
> > first removed element index and special-casing removing
> > of a single element.  If you think that's worth the extra
> > complexity I can work on that (hopefully the case we
> > increase num_split_elts but not actually split is a bug...).
> >
> > Bootstrap / regtest running on x86_64-unknown-linux-gnu.
> >
> > OK if that passes?
> >
> 
> Ok, thanks.

Installed.

Below is the simplest variant re-adding optimized handling
of single-element removal.  Note the second hunk which
moves the num_split_elts increment so it doesn't happen
when split_nonconstant_init_1 returns false.

Bootstrapped and tested on x86_64-unknown-linux-gnu.

OK?

Richard.

2019-06-12  Richard Biener  

PR c++/90801
* typeck2.c (split_nonconstant_init_1): Properly count
num_split_elts, optimize single constructor elt removal.

Index: gcc/cp/typeck2.c
===
--- gcc/cp/typeck2.c(revision 272177)
+++ gcc/cp/typeck2.c(working copy)
@@ -603,7 +603,7 @@ cxx_incomplete_type_error (location_t lo
 static bool
 split_nonconstant_init_1 (tree dest, tree init)
 {
-  unsigned HOST_WIDE_INT idx, tidx;
+  unsigned HOST_WIDE_INT idx, tidx = HOST_WIDE_INT_M1U;
   tree field_index, value;
   tree type = TREE_TYPE (dest);
   tree inner_type = NULL;
@@ -657,9 +657,13 @@ split_nonconstant_init_1 (tree dest, tre
  if (!split_nonconstant_init_1 (sub, value))
complete_p = false;
  else
-   /* Mark element for removal.  */
-   CONSTRUCTOR_ELT (init, idx)->index = NULL_TREE;
- num_split_elts++;
+   {
+ /* Mark element for removal.  */
+ CONSTRUCTOR_ELT (init, idx)->index = NULL_TREE;
+ if (idx < tidx)
+   tidx = idx;
+ num_split_elts++;
+   }
}
  else if (!initializer_constant_valid_p (value, inner_type))
{
@@ -668,6 +672,8 @@ split_nonconstant_init_1 (tree dest, tre
 
  /* Mark element for removal.  */
  CONSTRUCTOR_ELT (init, idx)->index = NULL_TREE;
+ if (idx < tidx)
+   tidx = idx;
 
  if (TREE_CODE (field_index) == RANGE_EXPR)
{
@@ -705,17 +711,18 @@ split_nonconstant_init_1 (tree dest, tre
  num_split_elts++;
}
}
-  if (num_split_elts != 0)
+  if (num_split_elts == 1)
+   CONSTRUCTOR_ELTS (init)->ordered_remove (tidx);
+  else if (num_split_elts > 1)
{
  /* Perform the delayed ordered removal of non-constant elements
 we split out.  */
- for (tidx = 0, idx = 0; idx < CONSTRUCTOR_NELTS (init); ++idx)
+ for (idx = tidx; idx < CONSTRUCTOR_NELTS (init); ++idx)
if (CONSTRUCTOR_ELT (init, idx)->index == NULL_TREE)
  ;
else
  {
-   if (tidx != idx)
- *CONSTRUCTOR_ELT (init, tidx) = *CONSTRUCTOR_ELT (init, idx);
+   *CONSTRUCTOR_ELT (init, tidx) = *CONSTRUCTOR_ELT (init, idx);
++tidx;
  }
  vec_safe_truncate (CONSTRUCTOR_ELTS (init), tidx);


Re: [PATCH, OpenACC] Add support for gang local storage allocation in shared memory

2019-06-12 Thread Tom de Vries
On 12-06-19 12:22, Jakub Jelinek wrote:
> On Fri, Jun 07, 2019 at 03:08:37PM +0100, Julian Brown wrote:
>> diff --git a/gcc/omp-low.c b/gcc/omp-low.c
>> index a7f35ffe416..67e1e82ec00 100644
>> --- a/gcc/omp-low.c
>> +++ b/gcc/omp-low.c
>> @@ -9794,6 +9882,8 @@ lower_omp_target (gimple_stmt_iterator *gsi_p, 
>> omp_context *ctx)
>>  
>>if (offloaded)
>>  {
>> +  mark_oacc_gangprivate (&ctx->oacc_addressable_var_decls, ctx);
>> +
> 
> The above one still doesn't seem to be guarded for OpenACC constructs only.
> 
> As for the rest of the patch, you need Tom to look over the nvptx changes.

I haven't seen any nvptx changes mentioned since I ok-ed the nvptx part
( https://gcc.gnu.org/ml/gcc-patches/2018-10/msg00324.html ), so on that
basis I'd say it's still ok.

Thanks,
- Tom


Re: [PATCH, OpenACC] Add support for gang local storage allocation in shared memory

2019-06-12 Thread Jakub Jelinek
On Fri, Jun 07, 2019 at 03:08:37PM +0100, Julian Brown wrote:
> diff --git a/gcc/omp-low.c b/gcc/omp-low.c
> index a7f35ffe416..67e1e82ec00 100644
> --- a/gcc/omp-low.c
> +++ b/gcc/omp-low.c
> @@ -9794,6 +9882,8 @@ lower_omp_target (gimple_stmt_iterator *gsi_p, 
> omp_context *ctx)
>  
>if (offloaded)
>  {
> +  mark_oacc_gangprivate (&ctx->oacc_addressable_var_decls, ctx);
> +

The above one still doesn't seem to be guarded for OpenACC constructs only.

As for the rest of the patch, you need Tom to look over the nvptx changes.

Jakub


Re: [PATCH V8] Remove empty loop with assumed finiteness (PR tree-optimization/89713)

2019-06-12 Thread Richard Biener
On Tue, Jun 11, 2019 at 4:40 AM Feng Xue OS  wrote:
>
> Reformat to comply with gcc coding style.

OK for trunk.

Thanks,
Richard.

> Feng
>
> ---
> diff --git a/gcc/ChangeLog b/gcc/ChangeLog
> index 37aab79..87cc125 100644
> --- a/gcc/ChangeLog
> +++ b/gcc/ChangeLog
> @@ -1,3 +1,16 @@
> +2019-06-04  Feng Xue  
> +
> +   PR tree-optimization/89713
> +   * doc/invoke.texi (-ffinite-loops): Document new option.
> +   * common.opt (-ffinite-loops): New option.
> +   * tree-ssa-dce.c (mark_stmt_if_obviously_necessary): Mark
> +   IFN_GOACC_LOOP calls as necessary.
> +   * tree-ssa-loop-niter.c (finite_loop_p): Assume loop with an exit
> +   is finite.
> +   * omp-offload.c (oacc_xform_loop): Skip lowering if return value of
> +   IFN_GOACC_LOOP call is not used.
> +   * opts.c (default_options_table): Enable -ffinite-loops at -O2+.
> +
>  2019-06-04  Alan Modra  
>
> PR target/90689
> diff --git a/gcc/common.opt b/gcc/common.opt
> index 0e72fd0..8b0e6ad 100644
> --- a/gcc/common.opt
> +++ b/gcc/common.opt
> @@ -1437,6 +1437,10 @@ ffinite-math-only
>  Common Report Var(flag_finite_math_only) Optimization SetByCombined
>  Assume no NaNs or infinities are generated.
>
> +ffinite-loops
> +Common Report Var(flag_finite_loops) Optimization
> +Assume that loops with an exit will terminate and not loop indefinitely.
> +
>  ffixed-
>  Common Joined RejectNegative Var(common_deferred_options) Defer
>  -ffixed- Mark  as being unavailable to the compiler.
> diff --git a/gcc/doc/invoke.texi b/gcc/doc/invoke.texi
> index 91c9bb8..1e12595 100644
> --- a/gcc/doc/invoke.texi
> +++ b/gcc/doc/invoke.texi
> @@ -412,6 +412,7 @@ Objective-C and Objective-C++ Dialects}.
>  -fdevirtualize-at-ltrans  -fdse @gol
>  -fearly-inlining  -fipa-sra  -fexpensive-optimizations  -ffat-lto-objects 
> @gol
>  -ffast-math  -ffinite-math-only  -ffloat-store  
> -fexcess-precision=@var{style} @gol
> +-ffinite-loops @gol
>  -fforward-propagate  -ffp-contract=@var{style}  -ffunction-sections @gol
>  -fgcse  -fgcse-after-reload  -fgcse-las  -fgcse-lm  -fgraphite-identity @gol
>  -fgcse-sm  -fhoist-adjacent-loads  -fif-conversion @gol
> @@ -8282,6 +8283,7 @@ also turns on the following optimization flags:
>  -fdelete-null-pointer-checks @gol
>  -fdevirtualize  -fdevirtualize-speculatively @gol
>  -fexpensive-optimizations @gol
> +-ffinite-loops @gol
>  -fgcse  -fgcse-lm  @gol
>  -fhoist-adjacent-loads @gol
>  -finline-small-functions @gol
> @@ -9503,6 +9505,15 @@ that may set @code{errno} but are otherwise free of 
> side effects.  This flag is
>  enabled by default at @option{-O2} and higher if @option{-Os} is not also
>  specified.
>
> +@item -ffinite-loops
> +@opindex ffinite-loops
> +@opindex fno-finite-loops
> +Assume that a loop with an exit will eventually take the exit and not loop
> +indefinitely.  This allows the compiler to remove loops that otherwise have
> +no side-effects, not considering eventual endless looping as such.
> +
> +This option is enabled by default at @option{-O2}.
> +
>  @item -ftree-dominator-opts
>  @opindex ftree-dominator-opts
>  Perform a variety of simple scalar cleanups (constant/copy
> diff --git a/gcc/omp-offload.c b/gcc/omp-offload.c
> index 97ae47b..c8a281c 100644
> --- a/gcc/omp-offload.c
> +++ b/gcc/omp-offload.c
> @@ -300,7 +300,7 @@ oacc_xform_loop (gcall *call)
>tree chunk_size = NULL_TREE;
>unsigned mask = (unsigned) TREE_INT_CST_LOW (gimple_call_arg (call, 5));
>tree lhs = gimple_call_lhs (call);
> -  tree type = TREE_TYPE (lhs);
> +  tree type = NULL_TREE;
>tree diff_type = TREE_TYPE (range);
>tree r = NULL_TREE;
>gimple_seq seq = NULL;
> @@ -308,6 +308,15 @@ oacc_xform_loop (gcall *call)
>unsigned outer_mask = mask & (~mask + 1); // Outermost partitioning
>unsigned inner_mask = mask & ~outer_mask; // Inner partitioning (if any)
>
> +  /* Skip lowering if return value of IFN_GOACC_LOOP call is not used.  */
> +  if (!lhs)
> +{
> +  gsi_replace_with_seq (&gsi, seq, true);
> +  return;
> +}
> +
> +  type = TREE_TYPE (lhs);
> +
>  #ifdef ACCEL_COMPILER
>chunk_size = gimple_call_arg (call, 4);
>if (integer_minus_onep (chunk_size)  /* Force static allocation.  */
> diff --git a/gcc/opts.c b/gcc/opts.c
> index 64f94ac..b38bfb1 100644
> --- a/gcc/opts.c
> +++ b/gcc/opts.c
> @@ -494,6 +494,7 @@ static const struct default_options 
> default_options_table[] =
>  { OPT_LEVELS_2_PLUS, OPT_fdevirtualize, NULL, 1 },
>  { OPT_LEVELS_2_PLUS, OPT_fdevirtualize_speculatively, NULL, 1 },
>  { OPT_LEVELS_2_PLUS, OPT_fexpensive_optimizations, NULL, 1 },
> +{ OPT_LEVELS_2_PLUS, OPT_ffinite_loops, NULL, 1 },
>  { OPT_LEVELS_2_PLUS, OPT_fgcse, NULL, 1 },
>  { OPT_LEVELS_2_PLUS, OPT_fhoist_adjacent_loads, NULL, 1 },
>  { OPT_LEVELS_2_PLUS, OPT_findirect_inlining, NULL, 1 },
> diff --git a/gcc/testsuite/g++.dg/tree-ssa/empty-loop.C 
> b/gcc/testsuite/g++.dg/tree-ssa/empty-loop.C
> new file mode 10

Re: [PATCH][RFC] Sanitize equals and hash functions in hash-tables.

2019-06-12 Thread Richard Biener
On Wed, Jun 12, 2019 at 11:15 AM Martin Liška  wrote:
>
> On 6/12/19 10:02 AM, Martin Liška wrote:
> > On 6/12/19 9:59 AM, Richard Biener wrote:
> >> On Tue, Jun 11, 2019 at 9:02 PM Jason Merrill  wrote:
> >>>
> >>> On 6/11/19 9:16 AM, Martin Liška wrote:
>  On 6/11/19 2:27 PM, Jason Merrill wrote:
> > On 6/11/19 3:41 AM, Martin Liška wrote:
> >> On 6/10/19 8:21 PM, Jason Merrill wrote:
> >>> On Mon, Jun 10, 2019 at 3:08 AM Martin Liška  wrote:
>  On 6/7/19 11:43 PM, Jason Merrill wrote:
> > On Fri, Jun 7, 2019 at 8:14 AM Martin Liška  wrote:
> >> On 6/7/19 2:09 PM, Richard Biener wrote:
> >>> On Fri, Jun 7, 2019 at 2:03 PM Martin Liška  
> >>> wrote:
>  On 6/7/19 10:57 AM, Richard Biener wrote:
> > On Mon, Jun 3, 2019 at 3:35 PM Martin Liška  
> > wrote:
> >> On 6/1/19 12:06 AM, Jeff Law wrote:
> >>> On 5/22/19 3:13 AM, Martin Liška wrote:
>  On 5/21/19 1:51 PM, Richard Biener wrote:
> > On Tue, May 21, 2019 at 1:02 PM Martin Liška 
> >  wrote:
> >>
> >> On 5/21/19 11:38 AM, Richard Biener wrote:
> >>> On Tue, May 21, 2019 at 12:07 AM Jeff Law 
> >>>  wrote:
> 
>  On 5/13/19 1:41 AM, Martin Liška wrote:
> > On 11/8/18 9:56 AM, Martin Liška wrote:
> >> On 11/7/18 11:23 PM, Jeff Law wrote:
> >>> On 10/30/18 6:28 AM, Martin Liška wrote:
>  On 10/30/18 11:03 AM, Jakub Jelinek wrote:
> > On Mon, Oct 29, 2018 at 04:14:21PM +0100, Martin 
> > Liška wrote:
> >> +hashtab_chk_error ()
> >> +{
> >> +  fprintf (stderr, "hash table checking failed: "
> >> +   "equal operator returns true for a 
> >> pair "
> >> +   "of values with a different hash 
> >> value");
> > BTW, either use internal_error here, or at least if 
> > using fprintf
> > terminate with \n, in your recent mail I saw:
> > ...different hash valueduring RTL pass: vartrack
> >   ^^
>  Sure, fixed in attached patch.
> 
>  Martin
> 
> >> +  gcc_unreachable ();
> >> +}
> > Jakub
> >
>  0001-Sanitize-equals-and-hash-functions-in-hash-tables.patch
> 
>    From 0d9c979c845580a98767b83c099053d36eb49bb9 Mon 
>  Sep 17 00:00:00 2001
>  From: marxin 
>  Date: Mon, 29 Oct 2018 09:38:21 +0100
>  Subject: [PATCH] Sanitize equals and hash functions 
>  in hash-tables.
> 
>  ---
> gcc/hash-table.h | 40 
>  +++-
> 1 file changed, 39 insertions(+), 1 deletion(-)
> 
>  diff --git a/gcc/hash-table.h b/gcc/hash-table.h
>  index bd83345c7b8..694eedfc4be 100644
>  --- a/gcc/hash-table.h
>  +++ b/gcc/hash-table.h
>  @@ -503,6 +503,7 @@ private:
> 
>   value_type *alloc_entries (size_t n 
>  CXX_MEM_STAT_INFO) const;
>   value_type *find_empty_slot_for_expand 
>  (hashval_t);
>  +  void verify (const compare_type &comparable, 
>  hashval_t hash);
>   bool too_empty_p (unsigned int);
>   void expand ();
>   static bool is_deleted (value_type &v)
>  @@ -882,8 +883,12 @@ hash_table  Allocator>
>   if (insert == INSERT && m_size * 3 <= 
>  m_n_elements * 4)
> expand ();
> 
>  -  m_searches++;
>  +#if ENABLE_EXTRA_CHECKING
>  +if (insert == INSERT)
>  +  verify (comparable, hash);
>  +#endif
> 
>  +  m_sea

Re: Add value_range_base::contains_p

2019-06-12 Thread Richard Biener
On Wed, Jun 12, 2019 at 1:57 AM Martin Sebor  wrote:
>
> On 6/11/19 3:02 PM, Aldy Hernandez wrote:
> >
> >
> > On 6/11/19 12:52 PM, Martin Sebor wrote:
> >> On 6/11/19 10:26 AM, Aldy Hernandez wrote:
> >>>
> >>>
> >>> On 6/11/19 12:17 PM, Martin Sebor wrote:
>  On 6/11/19 9:05 AM, Aldy Hernandez wrote:
> >
> >
> > On 6/11/19 9:45 AM, Richard Biener wrote:
> >> On Tue, Jun 11, 2019 at 12:40 PM Aldy Hernandez 
> >> wrote:
> >>>
> >>> This patch cleans up the various contains, may_contain, and
> >>> value_inside_range variants we have throughout, in favor of one--
> >>> contains_p.  There should be no changes in functionality.
> >>>
> >>> I have added a note to range_includes_zero_p, perhaps as a personal
> >>> question than anything else.  This function was/is returning true
> >>> for
> >>> UNDEFINED.  From a semantic sense, that doesn't make sense.
> >>> UNDEFINED
> >>> is really the empty set.  Is the functionality wrong, or should
> >>> we call
> >>> this function something else?  Either way, I'm fine removing the
> >>> comment
> >>> but I'm genuinely curious.
> >>
> >> So this affects for example this hunk:
> >>
> >> -  if (vr && !range_includes_p (vr, 1))
> >> +  if (vr && (!vr->contains_p (build_one_cst (TREE_TYPE (name)))
> >> +&& !vr->undefined_p ()))
> >>  {
> >>
> >> I think it's arbitrary how we compute X in UNDEFINED and I'm fine
> >> with changing the affected predicates to return false.  This means
> >> not testing for !vr->undefined_p here.
> >
> > Excellent.
> >
> >>
> >> Note I very much dislike the build_one_cst () call here so please
> >> provide an overload hiding this.
> >
> > Good idea.  I love it.
> >
> >>
> >> Not sure why you keep range_includes_zero_p.
> >
> > I wasn't sure if there was some subtle reason why we were including
> > undefined.
> >
> > OK pending tests?
> 
>  Should the second overload:
> 
>  +  bool contains_p (tree) const;
>  +  bool contains_p (int) const;
> 
>  take something like HOST_WIDE_INT or even one of those poly_ints
>  like build_int_cst does?  (With the former, contains_p (0) will
>  likely be ambiguous since 0 is int and HOST_WIDE_INT is long).
> >>>
> >>> We have a type, so there should be no confusion:
> >>>
> >>> +  return contains_p (build_int_cst (type (), val));
> >>>
> >>> (UNDEFINED and VARYING don't have a type, so they are special cased
> >>> prior).
> >>
> >> I didn't mean the overloads are confusing, just that there the one
> >> that takes an int doesn't make it possible to test whether a value
> >> outside the range of an int is in the range.  For example, in
> >> the call
> >>
> >>contains_p (SIZE_MAX)
> >>
> >> the argument will get sliced (and trigger a -Woverflow).  One will
> >> need to go back to the more verbose way of calling it.
> >
> > The int version is not really meant to pass anything but simple
> > constants.  For anything fancy, one should really be using the tree
> > version.  But I can certainly change the argument to HOST_WIDE_INT if
> > preferred.
> >
> >>
> >> Changing the argument type to HOST_WIDE_INT would avoid the slicing
> >> and warning but then make contains_p (0) ambiguous because 0 isn't
> >> a perfect match for either void* or long (so it requires a conversion).
> >
> > Just a plain 0 will match the int version, instead of the tree version,
> > right?  Nobody should be passing NULL to the tree version, so that seems
> > like a non-issue.
>
> Right, NULL isn't a problem, but I would expect any integer to work
> (I thought that's what Richard was asking for)  So my suggestion was
> to have contains_p() a poly_int64 and provide just as robust an API
> as build_int_cst.  The argument ends up converted to the poly_int64
> anyway when it's passed to the latter.  I.e., why not define
> contains_p simply like this:
>
>bool
>value_range_base::contains_p (poly_int64 val) const
>{
>  if (varying_p ())
>return true;
>  if (undefined_p ())
>return false;
>
>  return contains_p (build_int_cst (type (), val));
>}

I agree that plain 'int' is bad.  Given we currently store
INTEGER_CSTs only (and not POLY_INT_CSTs) a
widest_int argument should be fine.  Note widest
because when interpreted signed all unsigned values
fit.

The existing contains_p check is also a bit fishy
for the cases TREE_TYPE of the value has a
value-range not covered in the value-ranges type...
I guess it could do

   if (!int_fits_type_p (val, this->type ())
 return false;

but that changes existing behavior which happily
throws different sign constants into tree_int_cst_lt
for example...  Do we want to allow non-INTEGER_CST
val arguments at all?  That is, you make contains_p
return a bool and say "yes" when we couldn't really
decide.  This is why pre

[RFC] ARM -mfpu=auto woes

2019-06-12 Thread Alexandre Oliva
I'm looking into a regression between gcc-7 and gcc-8 that causes
compilation with -mfloat-abi=hard to fail on arm-eabi with:

$ arm-eabi-gcc -c -mfloat-abi=hard t.c
cc1: error: -mfloat-abi=hard: selected processor lacks an FPU

Per the documentation, -mfpu=auto was supposed to be the default, but
the implicit -mcpu=armv7tdmi option from configure_default_options, or
the -march=armv4t derived from it, seems to be taken as choosing to
disable the fpu.  Even passing -mfpu=auto explicitly one gets the error
above; AFAICT it would only work if the default cpu had support for an
fpu, and the "regression" is caused by new checks about conflicting
options.  Does that sound right?

While trying to find a work-around, still suspecting the issue was a
lack of -mfpu=auto, I tried configuring --with-fpu=auto and ran into a
few issues.


The first problem was a shell bug in the $fpu = error test: there must
be a blank before the closing bracket.


The second problem, hit after fixing the first, was that it printed a
very confusing error message:

  Unknown target in --with-arch=

which set me down a bit of a wild goose chase until I realized the error
was about with_fpu, but the error message, presumably copied from an
earlier loop, had not been adjusted for the fact that 'which' and 'val',
set in the loop, were not set in the block below, and we don't want to
print the values in the last iteration of the loop, we want the
--with-fpu stuff.


The third problem is that chkfpu rejects auto because auto is special,
it's not in the fpu_cnames array.  I'm not sure whether it's
inappropriate to accept it, but if we could accept it so as to enable it
to be an overriding default, it could be arranged to be explicitly
accepted right in the portion of config.gcc modified by the patch below,
or in parsecpu.awk.  Any preferences?

Weirdly, *without* the obvious fixes in the patch below, --with-fpu=auto
is accepted, but after the patch it's no longer accepted, so I guess it
might make sense to refrain from installing it until there's a decision
as to whether --with-fpu=auto should be accepted.  So, any objections to
my installing this then, or even now, if --with-fpu=auto shouldn't be
accepted?


for  gcc/ChangeLog

* config.gcc: Fix ARM --with-fpu checking and error message.


diff --git a/gcc/config.gcc b/gcc/config.gcc
index 6b00c3872473..89ccbecec23d 100644
--- a/gcc/config.gcc
+++ b/gcc/config.gcc
@@ -3962,12 +3962,13 @@ case "${target}" in
 
# see if --with-fpu matches any of the supported FPUs
if [ x"$with_fpu" != x ] ; then
+ val=$with_fpu
  fpu=`awk -f ${srcdir}/config/arm/parsecpu.awk \
-   -v cmd="chkfpu $with_fpu" \
+   -v cmd="chkfpu $val" \
${srcdir}/config/arm/arm-cpus.in`
- if [ "$fpu" = "error"]
+ if [ "$fpu" = "error" ]
  then
-   echo "Unknown target in --with-$which=$val" 1>&2
+   echo "Unknown target in --with-fpu=$val" 1>&2
exit 1
  fi
fi

-- 
Alexandre Oliva, freedom fighter  he/him   https://FSFLA.org/blogs/lxo
Be the change, be Free! FSF Latin America board member
GNU Toolchain EngineerFree Software Evangelist
Hay que enGNUrecerse, pero sin perder la terGNUra jamás - Che GNUevara


Re: [PATCH] Don't adjust DECL_ALIGN of variables during estimated_stack_frame_size (PR target/90811)

2019-06-12 Thread Richard Biener
On Wed, 12 Jun 2019, Jakub Jelinek wrote:

> On Wed, Jun 12, 2019 at 11:00:43AM +0200, Richard Biener wrote:
> > > Bootstrapped/regtested on x86_64-linux and i686-linux and furthermore 
> > > tested
> > > with x86_64-linux to nvptx-none offloading with the nvptx.c PR90811 fix
> > > reverted to verify the overaligned variables are gone.  Ok for trunk?
> > 
> > Isn't there hunk missing that actually passes false?
> 
> It is that
> +add_stack_var (origvar, really_expand);
> expand_one_var is called 3 times:
>   expand_one_var (t, toplevel, true);
>   size += expand_one_var (var, true, false);
> expand_one_var (var, true, true);
> where the second one is the estimated_stack_frame_size call where it passes
> down false as really_expand.  And the patch passes it down through to
> align_local_variable.

Ah, OK.

> > I guess only CCPs bit-value/alignment tracking sees the difference.
> > 
> > Note we are already aligning variables in stor-layout.c with target
> > information so in some way this isn't a real fix.  Offloading as
> > implemented right now really leaks target dependent decisions from
> > the target to the offload target.
> 
> I was thinking about only honoring DECL_ALIGN for non-FIELD_DECLs when
> streaming in ACCEL_COMPILER only if DECL_USER_ALIGN, the risk is if
> we bump DECL_ALIGN somewhere pre-IPA on the host side and some optimization
> actually optimizes code using that DECL_ALIGN, later on we stream offloading
> out and in, downgrade DECL_ALIGN and suddenly the optimization was invalid.

Yeah, I think that's too dangerous.  We could stream the offload part
before any early optimization.  Or mark offload functions and skip
the early pipeline for them, also not do IPA on them.  But I guess
optimizing, esp. inlining into offloaded functions is important.

> > Not opposed to the change though a comment in the align_local_variable
> > hunk as to why we only adjust DECL_ALIGN late would be appreciated.
> 
> Sure, that can be done:
> 
> > > --- gcc/cfgexpand.c.jj2019-05-20 11:39:34.059816432 +0200
> > > +++ gcc/cfgexpand.c   2019-06-11 11:28:08.720932421 +0200
> > > @@ -361,7 +361,7 @@ static bool has_short_buffer;
> > > we can't do with expected alignment of the stack boundary.  */
> > >  
> > >  static unsigned int
> > > -align_local_variable (tree decl)
> > > +align_local_variable (tree decl, bool really_expand)
> > >  {
> > >unsigned int align;
> > >  
> > > @@ -370,7 +370,8 @@ align_local_variable (tree decl)
> > >else
> > >  {
> > >align = LOCAL_DECL_ALIGNMENT (decl);
> > > -  SET_DECL_ALIGN (decl, align);
> 
> +  /* Don't change DECL_ALIGN when called from estimated_stack_frame_size.
> +  That is done before IPA and could bump alignment based on host
> +  backend even for offloaded code which wants different
> +  LOCAL_DECL_ALIGNMENT.  */
> 
> > > +  if (really_expand)
> > > + SET_DECL_ALIGN (decl, align);

Looks good to me with this change.

Thanks,
Richard.


Re: [PATCH][RFC] Sanitize equals and hash functions in hash-tables.

2019-06-12 Thread Martin Liška
On 6/12/19 10:02 AM, Martin Liška wrote:
> On 6/12/19 9:59 AM, Richard Biener wrote:
>> On Tue, Jun 11, 2019 at 9:02 PM Jason Merrill  wrote:
>>>
>>> On 6/11/19 9:16 AM, Martin Liška wrote:
 On 6/11/19 2:27 PM, Jason Merrill wrote:
> On 6/11/19 3:41 AM, Martin Liška wrote:
>> On 6/10/19 8:21 PM, Jason Merrill wrote:
>>> On Mon, Jun 10, 2019 at 3:08 AM Martin Liška  wrote:
 On 6/7/19 11:43 PM, Jason Merrill wrote:
> On Fri, Jun 7, 2019 at 8:14 AM Martin Liška  wrote:
>> On 6/7/19 2:09 PM, Richard Biener wrote:
>>> On Fri, Jun 7, 2019 at 2:03 PM Martin Liška  wrote:
 On 6/7/19 10:57 AM, Richard Biener wrote:
> On Mon, Jun 3, 2019 at 3:35 PM Martin Liška  
> wrote:
>> On 6/1/19 12:06 AM, Jeff Law wrote:
>>> On 5/22/19 3:13 AM, Martin Liška wrote:
 On 5/21/19 1:51 PM, Richard Biener wrote:
> On Tue, May 21, 2019 at 1:02 PM Martin Liška  
> wrote:
>>
>> On 5/21/19 11:38 AM, Richard Biener wrote:
>>> On Tue, May 21, 2019 at 12:07 AM Jeff Law  
>>> wrote:

 On 5/13/19 1:41 AM, Martin Liška wrote:
> On 11/8/18 9:56 AM, Martin Liška wrote:
>> On 11/7/18 11:23 PM, Jeff Law wrote:
>>> On 10/30/18 6:28 AM, Martin Liška wrote:
 On 10/30/18 11:03 AM, Jakub Jelinek wrote:
> On Mon, Oct 29, 2018 at 04:14:21PM +0100, Martin 
> Liška wrote:
>> +hashtab_chk_error ()
>> +{
>> +  fprintf (stderr, "hash table checking failed: "
>> +   "equal operator returns true for a pair "
>> +   "of values with a different hash value");
> BTW, either use internal_error here, or at least if 
> using fprintf
> terminate with \n, in your recent mail I saw:
> ...different hash valueduring RTL pass: vartrack
>   ^^
 Sure, fixed in attached patch.

 Martin

>> +  gcc_unreachable ();
>> +}
> Jakub
>
 0001-Sanitize-equals-and-hash-functions-in-hash-tables.patch

   From 0d9c979c845580a98767b83c099053d36eb49bb9 Mon 
 Sep 17 00:00:00 2001
 From: marxin 
 Date: Mon, 29 Oct 2018 09:38:21 +0100
 Subject: [PATCH] Sanitize equals and hash functions in 
 hash-tables.

 ---
gcc/hash-table.h | 40 
 +++-
1 file changed, 39 insertions(+), 1 deletion(-)

 diff --git a/gcc/hash-table.h b/gcc/hash-table.h
 index bd83345c7b8..694eedfc4be 100644
 --- a/gcc/hash-table.h
 +++ b/gcc/hash-table.h
 @@ -503,6 +503,7 @@ private:

  value_type *alloc_entries (size_t n 
 CXX_MEM_STAT_INFO) const;
  value_type *find_empty_slot_for_expand 
 (hashval_t);
 +  void verify (const compare_type &comparable, 
 hashval_t hash);
  bool too_empty_p (unsigned int);
  void expand ();
  static bool is_deleted (value_type &v)
 @@ -882,8 +883,12 @@ hash_table
  if (insert == INSERT && m_size * 3 <= 
 m_n_elements * 4)
expand ();

 -  m_searches++;
 +#if ENABLE_EXTRA_CHECKING
 +if (insert == INSERT)
 +  verify (comparable, hash);
 +#endif

 +  m_searches++;
  value_type *first_deleted_slot = NULL;
  hashval_t index = hash_table_mod1 (hash, 
 m_size_prime_index);
  hashval_t hash2 = hash_table_mod2 (hash, 
 m_size_prime_index);
 @@ -930,6 +9

Re: [PATCH] Don't adjust DECL_ALIGN of variables during estimated_stack_frame_size (PR target/90811)

2019-06-12 Thread Jakub Jelinek
On Wed, Jun 12, 2019 at 11:00:43AM +0200, Richard Biener wrote:
> > Bootstrapped/regtested on x86_64-linux and i686-linux and furthermore tested
> > with x86_64-linux to nvptx-none offloading with the nvptx.c PR90811 fix
> > reverted to verify the overaligned variables are gone.  Ok for trunk?
> 
> Isn't there hunk missing that actually passes false?

It is that
+add_stack_var (origvar, really_expand);
expand_one_var is called 3 times:
  expand_one_var (t, toplevel, true);
  size += expand_one_var (var, true, false);
expand_one_var (var, true, true);
where the second one is the estimated_stack_frame_size call where it passes
down false as really_expand.  And the patch passes it down through to
align_local_variable.

> I guess only CCPs bit-value/alignment tracking sees the difference.
> 
> Note we are already aligning variables in stor-layout.c with target
> information so in some way this isn't a real fix.  Offloading as
> implemented right now really leaks target dependent decisions from
> the target to the offload target.

I was thinking about only honoring DECL_ALIGN for non-FIELD_DECLs when
streaming in ACCEL_COMPILER only if DECL_USER_ALIGN, the risk is if
we bump DECL_ALIGN somewhere pre-IPA on the host side and some optimization
actually optimizes code using that DECL_ALIGN, later on we stream offloading
out and in, downgrade DECL_ALIGN and suddenly the optimization was invalid.

> Not opposed to the change though a comment in the align_local_variable
> hunk as to why we only adjust DECL_ALIGN late would be appreciated.

Sure, that can be done:

> > --- gcc/cfgexpand.c.jj  2019-05-20 11:39:34.059816432 +0200
> > +++ gcc/cfgexpand.c 2019-06-11 11:28:08.720932421 +0200
> > @@ -361,7 +361,7 @@ static bool has_short_buffer;
> > we can't do with expected alignment of the stack boundary.  */
> >  
> >  static unsigned int
> > -align_local_variable (tree decl)
> > +align_local_variable (tree decl, bool really_expand)
> >  {
> >unsigned int align;
> >  
> > @@ -370,7 +370,8 @@ align_local_variable (tree decl)
> >else
> >  {
> >align = LOCAL_DECL_ALIGNMENT (decl);
> > -  SET_DECL_ALIGN (decl, align);

+  /* Don't change DECL_ALIGN when called from estimated_stack_frame_size.
+That is done before IPA and could bump alignment based on host
+backend even for offloaded code which wants different
+LOCAL_DECL_ALIGNMENT.  */

> > +  if (really_expand)
> > +   SET_DECL_ALIGN (decl, align);
> >  }
> >return align / BITS_PER_UNIT;
> >  }

Jakub


Re: [PATCH] Don't adjust DECL_ALIGN of variables during estimated_stack_frame_size (PR target/90811)

2019-06-12 Thread Richard Biener
On Wed, 12 Jun 2019, Jakub Jelinek wrote:

> Hi!
> 
> Apparently estimated_stack_frame_size -> expand_one_var -> add_stack_var
> -> align_local_variable changes DECL_ALIGN of stack variables from
> LOCAL_DECL_ALIGNMENT.
> 
> That is unfortunately very undesirable for offloading, because this happens
> early during IPA where we stream out LTO bytecode for the target offloading
> after it, so on the PR90811 testcase x86_64 backend says a local variable
> would be best 128-bit aligned, but in the PTX backend such alignment means
> runtime (virtual) stack adjustments because only 64-bit alignment is
> guaranteed.
> 
> The following patch makes sure we don't update DECL_ALIGN during stack frame
> size estimation, just during actual expansion.
> 
> Bootstrapped/regtested on x86_64-linux and i686-linux and furthermore tested
> with x86_64-linux to nvptx-none offloading with the nvptx.c PR90811 fix
> reverted to verify the overaligned variables are gone.  Ok for trunk?

Isn't there hunk missing that actually passes false?

> Or, is there something in GIMPLE passes that would benefit from the updated
> DECL_ALIGN during post-IPA optimizations, rather than just in RTL passes?
> If yes, we should call those somewhere post-IPA on top of this patch.
> Nothing in the testsuite showed it though.

I guess only CCPs bit-value/alignment tracking sees the difference.

Note we are already aligning variables in stor-layout.c with target
information so in some way this isn't a real fix.  Offloading as
implemented right now really leaks target dependent decisions from
the target to the offload target.

Not opposed to the change though a comment in the align_local_variable
hunk as to why we only adjust DECL_ALIGN late would be appreciated.

Richard.

> 2019-06-12  Jakub Jelinek  
> 
>   PR target/90811
>   * cfgexpand.c (align_local_variable): Add really_expand argument,
>   don't SET_DECL_ALIGN if it is false.
>   (add_stack_var): Add really_expand argument, pass it through to
>   align_local_variable.
>   (expand_one_stack_var_1): Pass true as really_expand to
>   align_local_variable.
>   (expand_one_ssa_partition): Pass true as really_expand to
>   add_stack_var.
>   (expand_one_var): Pass really_expand through to add_stack_var.
> 
> --- gcc/cfgexpand.c.jj2019-05-20 11:39:34.059816432 +0200
> +++ gcc/cfgexpand.c   2019-06-11 11:28:08.720932421 +0200
> @@ -361,7 +361,7 @@ static bool has_short_buffer;
> we can't do with expected alignment of the stack boundary.  */
>  
>  static unsigned int
> -align_local_variable (tree decl)
> +align_local_variable (tree decl, bool really_expand)
>  {
>unsigned int align;
>  
> @@ -370,7 +370,8 @@ align_local_variable (tree decl)
>else
>  {
>align = LOCAL_DECL_ALIGNMENT (decl);
> -  SET_DECL_ALIGN (decl, align);
> +  if (really_expand)
> + SET_DECL_ALIGN (decl, align);
>  }
>return align / BITS_PER_UNIT;
>  }
> @@ -418,7 +419,7 @@ alloc_stack_frame_space (poly_int64 size
>  /* Accumulate DECL into STACK_VARS.  */
>  
>  static void
> -add_stack_var (tree decl)
> +add_stack_var (tree decl, bool really_expand)
>  {
>struct stack_var *v;
>  
> @@ -446,7 +447,7 @@ add_stack_var (tree decl)
>   variables that are simultaneously live.  */
>if (known_eq (v->size, 0U))
>  v->size = 1;
> -  v->alignb = align_local_variable (decl);
> +  v->alignb = align_local_variable (decl, really_expand);
>/* An alignment of zero can mightily confuse us later.  */
>gcc_assert (v->alignb != 0);
>  
> @@ -1323,7 +1324,7 @@ expand_one_stack_var_1 (tree var)
>else
>  {
>size = tree_to_poly_uint64 (DECL_SIZE_UNIT (var));
> -  byte_align = align_local_variable (var);
> +  byte_align = align_local_variable (var, true);
>  }
>  
>/* We handle highly aligned variables in expand_stack_vars.  */
> @@ -1413,7 +1414,7 @@ expand_one_ssa_partition (tree var)
>if (!use_register_for_decl (var))
>  {
>if (defer_stack_allocation (var, true))
> - add_stack_var (var);
> + add_stack_var (var, true);
>else
>   expand_one_stack_var_1 (var);
>return;
> @@ -1695,7 +1696,7 @@ expand_one_var (tree var, bool toplevel,
>   }
>  }
>else if (defer_stack_allocation (var, toplevel))
> -add_stack_var (origvar);
> +add_stack_var (origvar, really_expand);
>else
>  {
>if (really_expand)
> 
>   Jakub
> 

-- 
Richard Biener 
SUSE Linux GmbH, Maxfeldstrasse 5, 90409 Nuernberg, Germany;
GF: Felix Imendörffer, Mary Higgins, Sri Rasiah; HRB 21284 (AG Nürnberg)

Fwd: [PATCH] aarch64' Android Support

2019-06-12 Thread 林作健
The following patch aims to add support to android for aarch64 architecture.

diff --git a/gcc/config.gcc b/gcc/config.gcc
index 09fb9ecd2cd..a8b32e34b47 100644
--- a/gcc/config.gcc
+++ b/gcc/config.gcc
@@ -1022,8 +1022,9 @@ aarch64*-*-freebsd*)
  tm_defines="${tm_defines}  TARGET_DEFAULT_ASYNC_UNWIND_TABLES=1"
  ;;
 aarch64*-*-linux*)
- tm_file="${tm_file} dbxelf.h elfos.h gnu-user.h linux.h glibc-stdint.h"
+ tm_file="${tm_file} dbxelf.h elfos.h gnu-user.h linux.h
linux-android.h glibc-stdint.h"
  tm_file="${tm_file} aarch64/aarch64-elf.h aarch64/aarch64-linux.h"
+ extra_options="${extra_options} linux-android.opt"
  tmake_file="${tmake_file} aarch64/t-aarch64 aarch64/t-aarch64-linux"
  tm_defines="${tm_defines}  TARGET_DEFAULT_ASYNC_UNWIND_TABLES=1"
  case $target in
diff --git a/gcc/config/aarch64/aarch64-linux.h
b/gcc/config/aarch64/aarch64-linux.h
index 5e8b34ded03..e1700bf7db6 100644
--- a/gcc/config/aarch64/aarch64-linux.h
+++ b/gcc/config/aarch64/aarch64-linux.h
@@ -62,22 +62,33 @@
   " %{mfix-cortex-a53-843419:--fix-cortex-a53-843419}"
 #endif

-#define LINK_SPEC LINUX_TARGET_LINK_SPEC \
+#define LINK_SPEC LINUX_OR_ANDROID_LD (LINUX_TARGET_LINK_SPEC, \
+LINUX_TARGET_LINK_SPEC " " ANDROID_LINK_SPEC) \
   CA53_ERR_835769_SPEC \
   CA53_ERR_843419_SPEC

+#undef STARTFILE_SPEC
+#define STARTFILE_SPEC \
+  LINUX_OR_ANDROID_LD (GNU_USER_TARGET_STARTFILE_SPEC, ANDROID_STARTFILE_SPEC)
+
 #define GNU_USER_TARGET_MATHFILE_SPEC \
   "%{Ofast|ffast-math|funsafe-math-optimizations:crtfastmath.o%s}"

 #undef ENDFILE_SPEC
-#define ENDFILE_SPEC   \
-  GNU_USER_TARGET_MATHFILE_SPEC " " \
-  GNU_USER_TARGET_ENDFILE_SPEC
+#define ENDFILE_SPEC \
+  LINUX_OR_ANDROID_LD (GNU_USER_TARGET_MATHFILE_SPEC " " \
+  GNU_USER_TARGET_ENDFILE_SPEC, ANDROID_ENDFILE_SPEC)
+
+#undef  LIB_SPEC
+#define LIB_SPEC \
+  LINUX_OR_ANDROID_LD (GNU_USER_TARGET_LIB_SPEC, \
+ GNU_USER_TARGET_NO_PTHREADS_LIB_SPEC " " ANDROID_LIB_SPEC)

 #define TARGET_OS_CPP_BUILTINS() \
   do \
 { \
  GNU_USER_TARGET_OS_CPP_BUILTINS(); \
+ ANDROID_TARGET_OS_CPP_BUILTINS(); \
 } \
   while (0)


Re: [PATCH][arm] Implement usadv16qi and ssadv16qi standard names

2019-06-12 Thread Kyrill Tkachov

Hi Przemyslaw

On 6/7/19 10:09 AM, Przemyslaw Wirkus wrote:

Hi all,

This patch implements the usadv16qi and ssadv16qi standard names for arm.

The V16QImode variant is important as it is the most commonly used 
pattern:

reducing vectors of bytes into an int.
The midend expects the optab to compute the absolute differences of 
operands 1
and 2 and reduce them while widening along the way up to SImode. So 
the inputs

are V16QImode and the output is V4SImode.

I've based my solution on Aarch64 usadv16qi and ssadv16qi standard names
current implementation (r260437). This solution emits below sequence of
instructions:

    VABDL.u8    tmp, op1, op2   # op1, op2 lowpart
    VABAL.u8    tmp, op1, op2   # op1, op2 highpart
    VPADAL.u16  op3, tmp

So, for the code:

$ arm-none-linux-gnueabihf-gcc -S -O3 -march=armv8-a+simd -mfpu=auto 
-mfloat-abi=hard usadv16qi.c -dp


#define N 1024
unsigned char pix1[N];
unsigned char pix2[N];

int
foo (void)
{
  int i_sum = 0;
  int i;
  for (i = 0; i < N; i++)
    i_sum += __builtin_abs (pix1[i] - pix2[i]);
  return i_sum;
}

we now generate on arm:
foo:
    movw    r3, #:lower16:pix2  @ 57    [c=4 l=4] *arm_movsi_vfp/3
    movt    r3, #:upper16:pix2  @ 58    [c=4 l=4] *arm_movt/0
    vmov.i32    q9, #0  @ v4si  @ 3 [c=4 l=4] *neon_movv4si/2
    movw    r2, #:lower16:pix1  @ 59    [c=4 l=4] *arm_movsi_vfp/3
    movt    r2, #:upper16:pix1  @ 60    [c=4 l=4] *arm_movt/0
    add r1, r3, #1024   @ 8 [c=4 l=4] *arm_addsi3/4
.L2:
    vld1.8  {q11}, [r3]!    @ 11    [c=8 l=4] 
*movmisalignv16qi_neon_load
    vld1.8  {q10}, [r2]!    @ 10    [c=8 l=4] 
*movmisalignv16qi_neon_load

    cmp r1, r3  @ 21    [c=4 l=4]  *arm_cmpsi_insn/2
    vabdl.u8    q8, d20, d22    @ 12    [c=8 l=4] neon_vabdluv8qi
    vabal.u8    q8, d21, d23    @ 15    [c=88 l=4] neon_vabaluv8qi
    vpadal.u16  q9, q8  @ 16    [c=8 l=4] neon_vpadaluv8hi
    bne .L2 @ 22    [c=16 l=4] arm_cond_branch
    vadd.i32    d18, d18, d19   @ 24    [c=120 l=4] 
quad_halves_plusv4si
    vpadd.i32   d18, d18, d18   @ 25    [c=8 l=4] 
neon_vpadd_internalv2si

    vmov.32 r0, d18[0]  @ 30    [c=12 l=4] vec_extractv2sisi/1

instead of:
foo:
    @ args = 0, pretend = 0, frame = 0
    @ frame_needed = 0, uses_anonymous_args = 0
    @ link register save eliminated.
    movw    r3, #:lower16:pix1
    movt    r3, #:upper16:pix1
    vmov.i32    q9, #0  @ v4si
    movw    r2, #:lower16:pix2
    movt    r2, #:upper16:pix2
    add r1, r3, #1024
.L2:
    vld1.8  {q8}, [r3]!
    vld1.8  {q11}, [r2]!
    vmovl.u8 q10, d16
    cmp r1, r3
    vmovl.u8 q8, d17
    vmovl.u8 q12, d22
    vmovl.u8 q11, d23
    vsub.i16    q10, q10, q12
    vsub.i16    q8, q8, q11
    vabs.s16    q10, q10
    vabs.s16    q8, q8
    vaddw.s16   q9, q9, d20
    vaddw.s16   q9, q9, d21
    vaddw.s16   q9, q9, d16
    vaddw.s16   q9, q9, d17
    bne .L2
    vadd.i32    d18, d18, d19
    vpadd.i32   d18, d18, d18
    vmov.32 r0, d18[0]

Bootstrapped and tested on arm-none-linux-gnueabihf.

Ok for trunk?

Thanks,
Przemyslaw

2019-05-29 Przemyslaw Wirkus 

    * config/arm/iterators.md (VABAL): New int iterator.
    * config/arm/neon.md (sadv16qi): New define_expand.
    * config/arm/unspecs.md ("unspec"): Define UNSPEC_VABAL_S, 
UNSPEC_VABAL_U

    values.

2019-05-29 Przemyslaw Wirkus 

    * gcc.target/arm/ssadv16qi.c: New test.
    * gcc.target/arm/usadv16qi.c: Likewise.



Changelog entries need two spaces between date, name and email.

Thank you for the patch, it looks good to me. I've committed it on your 
behalf with the tweaked ChangeLogs as r272180.


I'd also like us to emit a TARGET_DOTPROD-optimised sequence for this 
optab like in https://gcc.gnu.org/ml/gcc-patches/2019-05/msg00594.html 
but that can be a separate patch.


Thanks,

Kyrill



Re: [AARCH64] Fix typo in comment

2019-06-12 Thread Kyrill Tkachov

Hi Kugan,

On 6/12/19 4:59 AM, Kugan Vivekanandarajah wrote:

AArch64 comment for ADDSUB iterator is a typo or copy-and-paste error.
Attached patch fixes this. I believe this falls under obvious
category. I will commit it after 48hrs unless comments should be
better worded.

Thanks,
Kugan


gcc/ChangeLog:

2019-06-12  Kugan Vivekanandarajah 

    * config/aarch64/iterators.md (ADDSUB): Fix typo in comment.


diff --git a/gcc/config/aarch64/iterators.md 
b/gcc/config/aarch64/iterators.md

index 2179e6f..49c8146 100644
--- a/gcc/config/aarch64/iterators.md
+++ b/gcc/config/aarch64/iterators.md
@@ -1215,7 +1215,7 @@
 ;; Signed and unsigned max operations.
 (define_code_iterator USMAX [smax umax])

-;; Code iterator for variants of vector max and min.

+;; Code iterator for variants of vector plus and minus.


I'd remove the "variants" and have it "Code iterator for plus and minus"

I do agree such a change is obvious.

Thanks,

Kyrill

 (define_code_iterator ADDSUB [plus minus])

 ;; Code iterator for variants of vector saturating binary ops.



Re: [PATCH][RFC] Sanitize equals and hash functions in hash-tables.

2019-06-12 Thread Martin Liška
On 6/12/19 9:59 AM, Richard Biener wrote:
> On Tue, Jun 11, 2019 at 9:02 PM Jason Merrill  wrote:
>>
>> On 6/11/19 9:16 AM, Martin Liška wrote:
>>> On 6/11/19 2:27 PM, Jason Merrill wrote:
 On 6/11/19 3:41 AM, Martin Liška wrote:
> On 6/10/19 8:21 PM, Jason Merrill wrote:
>> On Mon, Jun 10, 2019 at 3:08 AM Martin Liška  wrote:
>>> On 6/7/19 11:43 PM, Jason Merrill wrote:
 On Fri, Jun 7, 2019 at 8:14 AM Martin Liška  wrote:
> On 6/7/19 2:09 PM, Richard Biener wrote:
>> On Fri, Jun 7, 2019 at 2:03 PM Martin Liška  wrote:
>>> On 6/7/19 10:57 AM, Richard Biener wrote:
 On Mon, Jun 3, 2019 at 3:35 PM Martin Liška  wrote:
> On 6/1/19 12:06 AM, Jeff Law wrote:
>> On 5/22/19 3:13 AM, Martin Liška wrote:
>>> On 5/21/19 1:51 PM, Richard Biener wrote:
 On Tue, May 21, 2019 at 1:02 PM Martin Liška  
 wrote:
>
> On 5/21/19 11:38 AM, Richard Biener wrote:
>> On Tue, May 21, 2019 at 12:07 AM Jeff Law  
>> wrote:
>>>
>>> On 5/13/19 1:41 AM, Martin Liška wrote:
 On 11/8/18 9:56 AM, Martin Liška wrote:
> On 11/7/18 11:23 PM, Jeff Law wrote:
>> On 10/30/18 6:28 AM, Martin Liška wrote:
>>> On 10/30/18 11:03 AM, Jakub Jelinek wrote:
 On Mon, Oct 29, 2018 at 04:14:21PM +0100, Martin Liška 
 wrote:
> +hashtab_chk_error ()
> +{
> +  fprintf (stderr, "hash table checking failed: "
> +   "equal operator returns true for a pair "
> +   "of values with a different hash value");
 BTW, either use internal_error here, or at least if 
 using fprintf
 terminate with \n, in your recent mail I saw:
 ...different hash valueduring RTL pass: vartrack
   ^^
>>> Sure, fixed in attached patch.
>>>
>>> Martin
>>>
> +  gcc_unreachable ();
> +}
 Jakub

>>> 0001-Sanitize-equals-and-hash-functions-in-hash-tables.patch
>>>
>>>   From 0d9c979c845580a98767b83c099053d36eb49bb9 Mon Sep 
>>> 17 00:00:00 2001
>>> From: marxin 
>>> Date: Mon, 29 Oct 2018 09:38:21 +0100
>>> Subject: [PATCH] Sanitize equals and hash functions in 
>>> hash-tables.
>>>
>>> ---
>>>gcc/hash-table.h | 40 
>>> +++-
>>>1 file changed, 39 insertions(+), 1 deletion(-)
>>>
>>> diff --git a/gcc/hash-table.h b/gcc/hash-table.h
>>> index bd83345c7b8..694eedfc4be 100644
>>> --- a/gcc/hash-table.h
>>> +++ b/gcc/hash-table.h
>>> @@ -503,6 +503,7 @@ private:
>>>
>>>  value_type *alloc_entries (size_t n 
>>> CXX_MEM_STAT_INFO) const;
>>>  value_type *find_empty_slot_for_expand (hashval_t);
>>> +  void verify (const compare_type &comparable, 
>>> hashval_t hash);
>>>  bool too_empty_p (unsigned int);
>>>  void expand ();
>>>  static bool is_deleted (value_type &v)
>>> @@ -882,8 +883,12 @@ hash_table
>>>  if (insert == INSERT && m_size * 3 <= m_n_elements 
>>> * 4)
>>>expand ();
>>>
>>> -  m_searches++;
>>> +#if ENABLE_EXTRA_CHECKING
>>> +if (insert == INSERT)
>>> +  verify (comparable, hash);
>>> +#endif
>>>
>>> +  m_searches++;
>>>  value_type *first_deleted_slot = NULL;
>>>  hashval_t index = hash_table_mod1 (hash, 
>>> m_size_prime_index);
>>>  hashval_t hash2 = hash_table_mod2 (hash, 
>>> m_size_prime_index);
>>> @@ -930,6 +935,39 @@ hash_table
>>>  return &m_entries[index];
>>>}
>>>
>>> +#if ENABLE_EXTRA_CHECKING

Re: [PATCH][RFC] Sanitize equals and hash functions in hash-tables.

2019-06-12 Thread Richard Biener
On Tue, Jun 11, 2019 at 9:02 PM Jason Merrill  wrote:
>
> On 6/11/19 9:16 AM, Martin Liška wrote:
> > On 6/11/19 2:27 PM, Jason Merrill wrote:
> >> On 6/11/19 3:41 AM, Martin Liška wrote:
> >>> On 6/10/19 8:21 PM, Jason Merrill wrote:
>  On Mon, Jun 10, 2019 at 3:08 AM Martin Liška  wrote:
> > On 6/7/19 11:43 PM, Jason Merrill wrote:
> >> On Fri, Jun 7, 2019 at 8:14 AM Martin Liška  wrote:
> >>> On 6/7/19 2:09 PM, Richard Biener wrote:
>  On Fri, Jun 7, 2019 at 2:03 PM Martin Liška  wrote:
> > On 6/7/19 10:57 AM, Richard Biener wrote:
> >> On Mon, Jun 3, 2019 at 3:35 PM Martin Liška  wrote:
> >>> On 6/1/19 12:06 AM, Jeff Law wrote:
>  On 5/22/19 3:13 AM, Martin Liška wrote:
> > On 5/21/19 1:51 PM, Richard Biener wrote:
> >> On Tue, May 21, 2019 at 1:02 PM Martin Liška  
> >> wrote:
> >>>
> >>> On 5/21/19 11:38 AM, Richard Biener wrote:
>  On Tue, May 21, 2019 at 12:07 AM Jeff Law  
>  wrote:
> >
> > On 5/13/19 1:41 AM, Martin Liška wrote:
> >> On 11/8/18 9:56 AM, Martin Liška wrote:
> >>> On 11/7/18 11:23 PM, Jeff Law wrote:
>  On 10/30/18 6:28 AM, Martin Liška wrote:
> > On 10/30/18 11:03 AM, Jakub Jelinek wrote:
> >> On Mon, Oct 29, 2018 at 04:14:21PM +0100, Martin Liška 
> >> wrote:
> >>> +hashtab_chk_error ()
> >>> +{
> >>> +  fprintf (stderr, "hash table checking failed: "
> >>> +   "equal operator returns true for a pair "
> >>> +   "of values with a different hash value");
> >> BTW, either use internal_error here, or at least if 
> >> using fprintf
> >> terminate with \n, in your recent mail I saw:
> >> ...different hash valueduring RTL pass: vartrack
> >>   ^^
> > Sure, fixed in attached patch.
> >
> > Martin
> >
> >>> +  gcc_unreachable ();
> >>> +}
> >> Jakub
> >>
> > 0001-Sanitize-equals-and-hash-functions-in-hash-tables.patch
> >
> >   From 0d9c979c845580a98767b83c099053d36eb49bb9 Mon Sep 
> > 17 00:00:00 2001
> > From: marxin 
> > Date: Mon, 29 Oct 2018 09:38:21 +0100
> > Subject: [PATCH] Sanitize equals and hash functions in 
> > hash-tables.
> >
> > ---
> >gcc/hash-table.h | 40 
> > +++-
> >1 file changed, 39 insertions(+), 1 deletion(-)
> >
> > diff --git a/gcc/hash-table.h b/gcc/hash-table.h
> > index bd83345c7b8..694eedfc4be 100644
> > --- a/gcc/hash-table.h
> > +++ b/gcc/hash-table.h
> > @@ -503,6 +503,7 @@ private:
> >
> >  value_type *alloc_entries (size_t n 
> > CXX_MEM_STAT_INFO) const;
> >  value_type *find_empty_slot_for_expand (hashval_t);
> > +  void verify (const compare_type &comparable, 
> > hashval_t hash);
> >  bool too_empty_p (unsigned int);
> >  void expand ();
> >  static bool is_deleted (value_type &v)
> > @@ -882,8 +883,12 @@ hash_table
> >  if (insert == INSERT && m_size * 3 <= m_n_elements 
> > * 4)
> >expand ();
> >
> > -  m_searches++;
> > +#if ENABLE_EXTRA_CHECKING
> > +if (insert == INSERT)
> > +  verify (comparable, hash);
> > +#endif
> >
> > +  m_searches++;
> >  value_type *first_deleted_slot = NULL;
> >  hashval_t index = hash_table_mod1 (hash, 
> > m_size_prime_index);
> >  hashval_t hash2 = hash_table_mod2 (hash, 
> > m_size_prime_index);
> > @@ -930,6 +935,39 @@ hash_table
> >  return &m_entries[index];
> >}
> >
> > +#if ENABLE_EXTRA_CHECKING
> > +
> >

[PATCH] Fix 3 warnings in nvptx.c

2019-06-12 Thread Jakub Jelinek
Hi!

While testing the second PR90811 patch with nvptx offloading, I've noticed 3
warnings in nvptx.c.
The first two hunks are obvious, the last warning is about unused mode_label
variable.
Either we can do what the patch does, or another option is throw away
both the mode_jump and mode_label temporaries, keep the last hunk unmodified
and
-   *mode_jump = label_insn;
+   if (mode == GOMP_DIM_VECTOR)
+ vector_jump = neuter_start;
+   else
+ worker_jump = neuter_start;

2019-06-12  Jakub Jelinek  

* config/nvptx/nvptx.c (nvptx_sese_number, nvptx_sese_pseudo): Don't
wrap ei variable name in the declaration in ()s.
(nvptx_single): Actually use mode_label variable.  Formatting fix.

--- gcc/config/nvptx/nvptx.c.jj 2019-06-11 23:21:28.068148871 +0200
+++ gcc/config/nvptx/nvptx.c2019-06-11 23:27:30.412633621 +0200
@@ -3551,7 +3551,7 @@ nvptx_sese_number (int n, int p, int dir
   size_t offset = (dir > 0 ? offsetof (edge_def, dest)
   : offsetof (edge_def, src));
   edge e;
-  edge_iterator (ei);
+  edge_iterator ei;
 
   FOR_EACH_EDGE (e, ei, edges)
{
@@ -3574,7 +3574,7 @@ nvptx_sese_pseudo (basic_block me, bb_se
   vec *edges, size_t offset)
 {
   edge e;
-  edge_iterator (ei);
+  edge_iterator ei;
   int hi_back = depth;
   pseudo_node_t node_back (0, depth);
   int hi_child = depth;
@@ -4402,8 +4402,10 @@ nvptx_single (unsigned mask, basic_block
   {
rtx_code_label *label = gen_label_rtx ();
rtx pred = cfun->machine->axis_predicate[mode - GOMP_DIM_WORKER];
-   rtx_insn **mode_jump = mode == GOMP_DIM_VECTOR ? &vector_jump : 
&worker_jump;
-   rtx_insn **mode_label = mode == GOMP_DIM_VECTOR ? &vector_label : 
&worker_label;
+   rtx_insn **mode_jump
+ = mode == GOMP_DIM_VECTOR ? &vector_jump : &worker_jump;
+   rtx_insn **mode_label
+ = mode == GOMP_DIM_VECTOR ? &vector_label : &worker_label;
 
if (!pred)
  {
@@ -4437,10 +4439,7 @@ nvptx_single (unsigned mask, basic_block
  emit_insn_after (gen_exit (), label_insn);
  }
 
-   if (mode == GOMP_DIM_VECTOR)
- vector_label = label_insn;
-   else
- worker_label = label_insn;
+   *mode_label = label_insn;
   }
 
   /* Now deal with propagating the branch condition.  */

Jakub


Re: [PATCH] check_format fixes

2019-06-12 Thread Jakub Jelinek
On Tue, Jun 11, 2019 at 12:49:32PM +0200, Jakub Jelinek wrote:
> Tested on x86_64-linux with check-gfortran, ok for trunk if full 
> bootstrap/regtest
> passes?
> 
> 2019-06-11  Jakub Jelinek  
> 
>   * io.c (check_format): Use G_(...) instead of _(...) for error values,
>   append " in format string at %L" to all strings but unexpected_element,
>   use error as gfc_error formating string instead of
>   "%s in format string at %L".  Formatting fixes.

FYI, bootstrapped/regtested successfully on x86_64-linux and i686-linux.

Jakub


[PATCH] Don't adjust DECL_ALIGN of variables during estimated_stack_frame_size (PR target/90811)

2019-06-12 Thread Jakub Jelinek
Hi!

Apparently estimated_stack_frame_size -> expand_one_var -> add_stack_var
-> align_local_variable changes DECL_ALIGN of stack variables from
LOCAL_DECL_ALIGNMENT.

That is unfortunately very undesirable for offloading, because this happens
early during IPA where we stream out LTO bytecode for the target offloading
after it, so on the PR90811 testcase x86_64 backend says a local variable
would be best 128-bit aligned, but in the PTX backend such alignment means
runtime (virtual) stack adjustments because only 64-bit alignment is
guaranteed.

The following patch makes sure we don't update DECL_ALIGN during stack frame
size estimation, just during actual expansion.

Bootstrapped/regtested on x86_64-linux and i686-linux and furthermore tested
with x86_64-linux to nvptx-none offloading with the nvptx.c PR90811 fix
reverted to verify the overaligned variables are gone.  Ok for trunk?

Or, is there something in GIMPLE passes that would benefit from the updated
DECL_ALIGN during post-IPA optimizations, rather than just in RTL passes?
If yes, we should call those somewhere post-IPA on top of this patch.
Nothing in the testsuite showed it though.

2019-06-12  Jakub Jelinek  

PR target/90811
* cfgexpand.c (align_local_variable): Add really_expand argument,
don't SET_DECL_ALIGN if it is false.
(add_stack_var): Add really_expand argument, pass it through to
align_local_variable.
(expand_one_stack_var_1): Pass true as really_expand to
align_local_variable.
(expand_one_ssa_partition): Pass true as really_expand to
add_stack_var.
(expand_one_var): Pass really_expand through to add_stack_var.

--- gcc/cfgexpand.c.jj  2019-05-20 11:39:34.059816432 +0200
+++ gcc/cfgexpand.c 2019-06-11 11:28:08.720932421 +0200
@@ -361,7 +361,7 @@ static bool has_short_buffer;
we can't do with expected alignment of the stack boundary.  */
 
 static unsigned int
-align_local_variable (tree decl)
+align_local_variable (tree decl, bool really_expand)
 {
   unsigned int align;
 
@@ -370,7 +370,8 @@ align_local_variable (tree decl)
   else
 {
   align = LOCAL_DECL_ALIGNMENT (decl);
-  SET_DECL_ALIGN (decl, align);
+  if (really_expand)
+   SET_DECL_ALIGN (decl, align);
 }
   return align / BITS_PER_UNIT;
 }
@@ -418,7 +419,7 @@ alloc_stack_frame_space (poly_int64 size
 /* Accumulate DECL into STACK_VARS.  */
 
 static void
-add_stack_var (tree decl)
+add_stack_var (tree decl, bool really_expand)
 {
   struct stack_var *v;
 
@@ -446,7 +447,7 @@ add_stack_var (tree decl)
  variables that are simultaneously live.  */
   if (known_eq (v->size, 0U))
 v->size = 1;
-  v->alignb = align_local_variable (decl);
+  v->alignb = align_local_variable (decl, really_expand);
   /* An alignment of zero can mightily confuse us later.  */
   gcc_assert (v->alignb != 0);
 
@@ -1323,7 +1324,7 @@ expand_one_stack_var_1 (tree var)
   else
 {
   size = tree_to_poly_uint64 (DECL_SIZE_UNIT (var));
-  byte_align = align_local_variable (var);
+  byte_align = align_local_variable (var, true);
 }
 
   /* We handle highly aligned variables in expand_stack_vars.  */
@@ -1413,7 +1414,7 @@ expand_one_ssa_partition (tree var)
   if (!use_register_for_decl (var))
 {
   if (defer_stack_allocation (var, true))
-   add_stack_var (var);
+   add_stack_var (var, true);
   else
expand_one_stack_var_1 (var);
   return;
@@ -1695,7 +1696,7 @@ expand_one_var (tree var, bool toplevel,
}
 }
   else if (defer_stack_allocation (var, toplevel))
-add_stack_var (origvar);
+add_stack_var (origvar, really_expand);
   else
 {
   if (really_expand)

Jakub