C++ PATCH to static auto in template

2018-05-10 Thread Jason Merrill
Normally we defer instantiation of static data member initializers
until they are needed, but we were doing it immediately for auto
variables.

Tested x86_64-pc-linux-gnu, applying to trunk.
commit a16c8170ac1c148087ee7798b9783656e53ab490
Author: Jason Merrill 
Date:   Wed Mar 14 20:11:38 2018 -0400

* decl.c (cp_finish_decl): Don't instantiate auto variable.

(check_static_variable_definition): Allow auto.
* constexpr.c (ensure_literal_type_for_constexpr_object): Likewise.

diff --git a/gcc/cp/constexpr.c b/gcc/cp/constexpr.c
index b4bcc6a567d..d9a4cab9f37 100644
--- a/gcc/cp/constexpr.c
+++ b/gcc/cp/constexpr.c
@@ -91,6 +91,8 @@ ensure_literal_type_for_constexpr_object (tree decl)
   if (CLASS_TYPE_P (stype) && !COMPLETE_TYPE_P (complete_type (stype)))
 	/* Don't complain here, we'll complain about incompleteness
 	   when we try to initialize the variable.  */;
+  else if (type_uses_auto (type))
+	/* We don't know the actual type yet.  */;
   else if (!literal_type_p (type))
 	{
 	  if (DECL_DECLARED_CONSTEXPR_P (decl))
diff --git a/gcc/cp/decl.c b/gcc/cp/decl.c
index d58964754b9..20ca28fc878 100644
--- a/gcc/cp/decl.c
+++ b/gcc/cp/decl.c
@@ -6803,24 +6803,18 @@ cp_finish_decl (tree decl, tree init, bool init_const_expr_p,
   && (DECL_INITIAL (decl) || init))
 DECL_INITIALIZED_IN_CLASS_P (decl) = 1;
 
+  /* Do auto deduction unless decl is a function or an uninstantiated
+ template specialization.  */
   if (TREE_CODE (decl) != FUNCTION_DECL
+  && !(init == NULL_TREE
+	   && DECL_LANG_SPECIFIC (decl)
+	   && DECL_TEMPLATE_INSTANTIATION (decl)
+	   && !DECL_TEMPLATE_INSTANTIATED (decl))
   && (auto_node = type_uses_auto (type)))
 {
   tree d_init;
   if (init == NULL_TREE)
-	{
-	  if (DECL_LANG_SPECIFIC (decl)
-	  && DECL_TEMPLATE_INSTANTIATION (decl)
-	  && !DECL_TEMPLATE_INSTANTIATED (decl))
-	{
-	  /* init is null because we're deferring instantiating the
-		 initializer until we need it.  Well, we need it now.  */
-	  instantiate_decl (decl, /*defer_ok*/true, /*expl*/false);
-	  return;
-	}
-
-	  gcc_assert (CLASS_PLACEHOLDER_TEMPLATE (auto_node));
-	}
+	gcc_assert (CLASS_PLACEHOLDER_TEMPLATE (auto_node));
   d_init = init;
   if (d_init)
 	{
@@ -9368,6 +9362,7 @@ check_static_variable_definition (tree decl, tree type)
  in check_initializer.  Similarly for inline static data members.  */
   if (DECL_P (decl)
   && (DECL_DECLARED_CONSTEXPR_P (decl)
+	  || undeduced_auto_decl (decl)
 	  || DECL_VAR_DECLARED_INLINE_P (decl)))
 return 0;
   else if (cxx_dialect >= cxx11 && !INTEGRAL_OR_ENUMERATION_TYPE_P (type))
diff --git a/gcc/testsuite/g++.dg/cpp1z/static2.C b/gcc/testsuite/g++.dg/cpp1z/static2.C
new file mode 100644
index 000..b87bfec3aaa
--- /dev/null
+++ b/gcc/testsuite/g++.dg/cpp1z/static2.C
@@ -0,0 +1,17 @@
+// { dg-additional-options -std=c++17 }
+
+template 
+struct A
+{
+  static constexpr auto x = T::x;
+};
+
+struct B;
+A a;
+
+struct B
+{
+  static constexpr auto x = 42;
+};
+
+auto x = a.x;


Re: [libstdc++, PATCH] PR libstdc++/83140 - assoc_legendre returns negated value when m is odd.

2018-05-10 Thread Ed Smith-Rowland

On 05/10/2018 01:44 PM, Rainer Orth wrote:

Hi Ed,


2018-05-07  Edward Smith-Rowland  <3dw...@verizon.net>

 PR libstdc++/83140 - assoc_legendre returns negated value when m is
odd
 * include/tr1/legendre_function.tcc (__assoc_legendre_p): Add
__phase
 argument defaulted to +1.  Doxy comments on same.
 * testsuite/special_functions/02_assoc_legendre/
 check_assoc_legendre.cc: Regen.
 * testsuite/tr1/5_numerical_facilities/special_functions/
 02_assoc_legendre/check_tr1_assoc_legendre.cc: Regen.

something went badly wrong with the regeneration of this last file: both
in your attached patch and in what you checked in, the file is empty.

Rainer


I had hosed up the ChangeLog!

CL change committed as 260149.

New Log attached.

Sorry.



2018-05-10  Edward Smith-Rowland  <3dw...@verizon.net>

PR libstdc++/83140 - assoc_legendre returns negated value when m is odd
* include/tr1/legendre_function.tcc (__assoc_legendre_p): Add __phase
argument defaulted to +1.  Doxy comments on same.
* testsuite/special_functions/02_assoc_legendre/
check_value.cc: Regen.
* testsuite/tr1/5_numerical_facilities/special_functions/
02_assoc_legendre/check_value.cc: Regen.



Re: [PATCH 1/3] Add PTWRITE builtins for x86

2018-05-10 Thread Andi Kleen
> @@ -31325,7 +31329,21 @@ ix86_init_mmx_sse_builtins (void)
>  continue;
> 
>ftype = (enum ix86_builtin_func_type) d->flag;
> -  def_builtin2 (d->mask, d->name, ftype, d->code);
> +  decl = def_builtin2 (d->mask, d->name, ftype, d->code);
> +
> +  /* Avoid edges for ptwrites generated by vartrace pass.  */
> +  if (decl)
> +{
> +  DECL_ATTRIBUTES (decl) = build_tree_list (get_identifier ("leaf"),
> +NULL_TREE);
> +  TREE_NOTHROW (decl) = 1;
> +}
> +  else
> +{
> +  ix86_builtins_isa[(int)d->code].leaf_p = true;
> +  ix86_builtins_isa[(int)d->code].nothrow_p = true;
> +}
> +
> 
> Can you please explain what is the purpose of the above change?

With the vartrace patch, which was the next patch in the original
patchkit, the compiler can generate a lot of ptwrite builtins,
and adding so many edges can slow it down. I originally copied
this from the MPX builtins (which had the same problem).

Possibly it could be in another patch, but then it would seem 
better to have the same semantics always. FWIW I don't think
the edges are needed for anything, but of course for moderate
use of the builtin it doesn't really matter.

-Andi


[PATCH] Fortran cleanup patch

2018-05-10 Thread Steve Kargl
The attached patch removed an unused function.
OK to commit?

2018-05-10  Steven G. Kargl  

   * gfortran.h: Remove prototype.
   * symbol.c (gfc_new_undo_checkpoint): Remove unused function.

-- 
Steve
Index: gcc/fortran/gfortran.h
===
--- gcc/fortran/gfortran.h	(revision 260141)
+++ gcc/fortran/gfortran.h	(working copy)
@@ -3033,7 +3033,6 @@ int gfc_get_sym_tree (const char *, gfc_namespace *, g
 int gfc_get_ha_symbol (const char *, gfc_symbol **);
 int gfc_get_ha_sym_tree (const char *, gfc_symtree **);
 
-void gfc_new_undo_checkpoint (gfc_undo_change_set &);
 void gfc_drop_last_undo_checkpoint (void);
 void gfc_restore_last_undo_checkpoint (void);
 void gfc_undo_symbols (void);
Index: gcc/fortran/symbol.c
===
--- gcc/fortran/symbol.c	(revision 260141)
+++ gcc/fortran/symbol.c	(working copy)
@@ -3484,22 +3484,6 @@ find_common_symtree (gfc_symtree *st, gfc_common_head 
 }
 
 
-/* Clear the given storage, and make it the current change set for registering
-   changed symbols.  Its contents are freed after a call to
-   gfc_restore_last_undo_checkpoint or gfc_drop_last_undo_checkpoint, but
-   it is up to the caller to free the storage itself.  It is usually a local
-   variable, so there is nothing to do anyway.  */
-
-void
-gfc_new_undo_checkpoint (gfc_undo_change_set _syms)
-{
-  chg_syms.syms = vNULL;
-  chg_syms.tbps = vNULL;
-  chg_syms.previous = latest_undo_chgset;
-  latest_undo_chgset = _syms;
-}
-
-
 /* Restore previous state of symbol.  Just copy simple stuff.  */
 
 static void


Re: [PATCH] PowerPC address support clean, patch 3 of 4

2018-05-10 Thread Michael Meissner
On Thu, May 10, 2018 at 05:20:52PM -0500, Segher Boessenkool wrote:
> On Thu, May 10, 2018 at 05:49:12PM -0400, Michael Meissner wrote:
> > > > -/* Return true if we have D-form addressing in altivec registers.  */
> > > > +/* Return true if we have D-form addressing (register+offset) in 
> > > > either a
> > > > +   specific reload register class or whether some reload register class
> > > > +   supports d-form addressing.  */
> > > >  static inline bool
> > > > -mode_supports_vmx_dform (machine_mode mode)
> > > > +mode_supports_d_form (machine_mode mode,
> > > > + enum rs6000_reload_reg_type rt = RELOAD_REG_ANY)
> > > >  {
> > > > -  return ((reg_addr[mode].addr_mask[RELOAD_REG_VMX] & 
> > > > RELOAD_REG_OFFSET) != 0);
> > > > +  return ((reg_addr[mode].addr_mask[rt] & RELOAD_REG_OFFSET) != 0);
> > > >  }
> > > 
> > > Will this overload help anything?  It does not look that way, all current
> > > callers use a different argument (and all the same).
> > 
> > All current callers just use the ANY option (except for these calls).  
> > However
> > in the future, I'm planning on calling these functions with the specific 
> > reload
> > register class (hence the change).
> 
> No, they use RELOAD_REG_VMX.  Unless there is something extra tricky
> about your patch?

Yes, point is to make it more general.

Right now, mode_supports_vmx_dform is only called to see if we have the ISA 3.0
d/ds/dq-form instructions.  It is called in secondary reload to see if a
register supports d*-form insns.  It is also called in preferred reload class
to say whether we would prefer just a FPR register for a d*-form insn or we can
also tolerate a VMX register.

However, as I contemplate reworking the memory support, I want to clean up a
lot of the code that knows certain modes can do certain address forms.  I'm
getting to hate code like:

if (mode == SFmode || mode == DFmode || ...)

So I was just trying to clean things up, and migratate to using the
reg_addr[mode].addr_mask bits.  Having the inline functions can also make the
line length smaller.

--
Michael Meissner, IBM
IBM, M/S 2506R, 550 King Street, Littleton, MA 01460-6245, USA
email: meiss...@linux.ibm.com, phone: +1 (978) 899-4797



Re: [PATCH] PowerPC address support clean, patch 3 of 4

2018-05-10 Thread Segher Boessenkool
On Thu, May 10, 2018 at 05:49:12PM -0400, Michael Meissner wrote:
> > > -/* Return true if we have D-form addressing in altivec registers.  */
> > > +/* Return true if we have D-form addressing (register+offset) in either a
> > > +   specific reload register class or whether some reload register class
> > > +   supports d-form addressing.  */
> > >  static inline bool
> > > -mode_supports_vmx_dform (machine_mode mode)
> > > +mode_supports_d_form (machine_mode mode,
> > > +   enum rs6000_reload_reg_type rt = RELOAD_REG_ANY)
> > >  {
> > > -  return ((reg_addr[mode].addr_mask[RELOAD_REG_VMX] & RELOAD_REG_OFFSET) 
> > > != 0);
> > > +  return ((reg_addr[mode].addr_mask[rt] & RELOAD_REG_OFFSET) != 0);
> > >  }
> > 
> > Will this overload help anything?  It does not look that way, all current
> > callers use a different argument (and all the same).
> 
> All current callers just use the ANY option (except for these calls).  However
> in the future, I'm planning on calling these functions with the specific 
> reload
> register class (hence the change).

No, they use RELOAD_REG_VMX.  Unless there is something extra tricky
about your patch?

> > Overloads are nice if they make things *easier* for the reader, not harder.
> > Same as with all other syntactic sugar.


Segher


Re: [PATCH] PowerPC: Reformat move insns to make them clearing, patch #5 of 5

2018-05-10 Thread Segher Boessenkool
On Thu, May 03, 2018 at 02:14:34PM -0400, Michael Meissner wrote:
> 2018-05-03  Michael Meissner  
> 
>   * config/rs6000/rs6000.md (mov_softfloat, FMOVE32):
>   Reformat alternatives and attributes so it is easier to identify
>   which constraints/attributes go with which instruction.

> +;;   MR   MT%0   MF%0   LWZSTWLI
> +;;   LIS  G-const.   F/n-const  NOP

MTLR/CTR again please.  Or MTLR/MTCTR if that fits.  Something like that.

Looks fine, please apply to trunk.  Thanks,


Segher


Re: [PATCH] PowerPC: Reformat move insns to make them clearing, patch #3 of 5

2018-05-10 Thread Segher Boessenkool
Hi,

On Thu, May 03, 2018 at 02:12:55PM -0400, Michael Meissner wrote:
>  ; ld/std require word-aligned displacements -> 'Y' constraint.
>  ; List Y->r and r->Y before r->r for reload.
> +
> +;;   STFD LFD FMR LXSDSTXSD
> +;;   LXSDXSTXSDX  XXLOR   XXLXOR  LI 0
> +;;   STD  LD  MR  MT MF

Could you make it clear which SPRs?  "MTLR/CTR" perhaps?

> +;;   NOP  MFTGPR  MFFGPR  MTVSRD  MFVSRD

You swapped MTVSRD and MFVSRD here.


Okay for trunk with those things fixed.  Thanks!


Segher


Re: [PATCH] handle local aggregate initialization in strlen (PR 83821)

2018-05-10 Thread Marc Glisse

On Thu, 10 May 2018, Martin Sebor wrote:


Can you please comment/respond to Jeff's question below and
confirm whether my understanding of the restriction (below)
is correct?


I don't remember it at all, I really should have expanded that comment...

The documentation of nonzero_chars seems to indicate that, unless 
full_string_p, it is only a lower bound on the length of the string, so 
not suitable for this kind of alias check. I don't know if we also have 
easy access to some upper bound.


(I noticed while looking at this pass that it could probably use 
POINTER_DIFF_EXPR more)


--
Marc Glisse


Re: [PATCH] PowerPC: Reformat move insns to make them clearing, patch #4 of 5

2018-05-10 Thread Segher Boessenkool
On Thu, May 03, 2018 at 02:13:42PM -0400, Michael Meissner wrote:
> 2018-05-03  Michael Meissner  
> 
>   * config/rs6000/rs6000.md (mov_softfloat64, FMOVE64):
>   Reformat alternatives and attributes so it is easier to identify
>   which constraints/attributes go with which instruction.

Okay for trunk.  Thanks!


Segher


Re: [PATCH] PowerPC: Reformat move insns to make them clearing, patch #2 of 5

2018-05-10 Thread Segher Boessenkool
On Thu, May 03, 2018 at 02:12:05PM -0400, Michael Meissner wrote:
> 2018-05-03  Michael Meissner  
> 
>   * config/rs6000/rs6000.md (mov_softfloat32, FMOVE64):
>   Reformat alternatives and attributes so it is easier to identify
>   which constraints/attributes go with which instruction.

Okay, thanks!


Segher


Re: [PATCH] PowerPC address support clean, patch 4 of 4

2018-05-10 Thread Michael Meissner
On Thu, May 10, 2018 at 04:55:02PM -0500, Segher Boessenkool wrote:
> Hi,
> 
> On Thu, May 03, 2018 at 01:23:24PM -0400, Michael Meissner wrote:
> > -/* Helper function to say whether a mode supports PRE_INC or PRE_DEC.  */
> > +/* Helper function to say whether a mode supports PRE_INC or PRE_DEC in a 
> > given
> > +   reload register class or if some reload register class supports it.  */
> >  static inline bool
> > -mode_supports_pre_incdec_p (machine_mode mode)
> > +mode_supports_pre_incdec_p (machine_mode mode,
> > +   enum rs6000_reload_reg_type rt = RELOAD_REG_ANY)
> >  {
> > -  return ((reg_addr[mode].addr_mask[RELOAD_REG_ANY] & 
> > RELOAD_REG_PRE_INCDEC)
> > - != 0);
> > +  return ((reg_addr[mode].addr_mask[rt] & RELOAD_REG_PRE_INCDEC) != 0);
> >  }
> 
> Same issue here: does the default argument help, or hurt?  The function
> names now do not describe what the function does, either :-/

I dunno, to me it describes it because each of the 3 reload register classes
can have different constraints.  So when you are in secondary reload and after
wards, you will want the specific register class.  Before register allocation,
you just want to know if any register class supports the access type for the
mode.

-- 
Michael Meissner, IBM
IBM, M/S 2506R, 550 King Street, Littleton, MA 01460-6245, USA
email: meiss...@linux.ibm.com, phone: +1 (978) 899-4797



Re: [PATCH] PowerPC: Reformat move insns to make them clearing, patch #1 of 5

2018-05-10 Thread Segher Boessenkool
On Thu, May 03, 2018 at 02:10:50PM -0400, Michael Meissner wrote:
> 2018-05-03  Michael Meissner  
> 
>   * config/rs6000/rs6000.md (mov_hardfloat32, FMOVE64):
>   Reformat alternatives and attributes so it is easier to identify
>   which constraints/attributes go with which instruction.

This is fine, thanks!


Segher


Re: [PATCH] PowerPC address support clean, patch 1 of 4

2018-05-10 Thread Michael Meissner
On Wed, May 09, 2018 at 05:54:53PM -0500, Segher Boessenkool wrote:
> Hi Mike,
> 
> On Thu, May 03, 2018 at 01:17:03PM -0400, Michael Meissner wrote:
> > 2018-05-03  Michael Meissner  
> > 
> > * config/rs6000/rs6000.c (mode_supports_dq_form): Rename
> > mode_supports_vsx_dform_quad to mode_supports_dq_form.
> > (mode_supports_vsx_dform_quad): Likewise.
> > (quad_address_p): Likewise.
> > (reg_offset_addressing_ok_p): Likewise.
> > (offsettable_ok_by_alignment): Likewise.
> > (rs6000_legitimate_offset_address_p): Likewise.
> > (legitimate_lo_sum_address_p): Likewise.
> > (rs6000_legitimize_address): Likewise.
> > (rs6000_legitimize_reload_address): Likewise.
> > (rs6000_secondary_reload_inner): Likewise.
> > (rs6000_preferred_reload_class): Likewise.
> > (rs6000_output_move_128bit): Likewise.
> 
>   * config/rs6000/rs6000.c (mode_supports_vsx_dform_quad): Rename to ...
>   (mode_supports_dq_form): ... this.  Update all callers.
> 
> 
> > --- gcc/config/rs6000/rs6000.c  (revision 259864)
> > +++ gcc/config/rs6000/rs6000.c  (working copy)
> > @@ -649,7 +649,7 @@ mode_supports_vmx_dform (machine_mode mo
> > is more limited than normal d-form addressing in that the offset must be
> > aligned on a 16-byte boundary.  */
> >  static inline bool
> > -mode_supports_vsx_dform_quad (machine_mode mode)
> > +mode_supports_dq_form (machine_mode mode)
> >  {
> >return ((reg_addr[mode].addr_mask[RELOAD_REG_ANY] & 
> > RELOAD_REG_QUAD_OFFSET)
> >   != 0);
> 
> Will this eventually handle all DQ-form, not just vector?  Is it supposed
> to?

Other than LQ (and LTPTR which GCC doesn't generate), all dq-form instructions
load/store VSX registers.  The problem is GCC will not generate LQ on little
endian systems because the registers are loaded in a big endian fashion.

It was more given we now have mode_supports_d_form and mode_supports_ds_form, I
was just making the name similar.

-- 
Michael Meissner, IBM
IBM, M/S 2506R, 550 King Street, Littleton, MA 01460-6245, USA
email: meiss...@linux.ibm.com, phone: +1 (978) 899-4797



Re: [PATCH] PowerPC address support clean, patch 4 of 4

2018-05-10 Thread Segher Boessenkool
Hi,

On Thu, May 03, 2018 at 01:23:24PM -0400, Michael Meissner wrote:
> -/* Helper function to say whether a mode supports PRE_INC or PRE_DEC.  */
> +/* Helper function to say whether a mode supports PRE_INC or PRE_DEC in a 
> given
> +   reload register class or if some reload register class supports it.  */
>  static inline bool
> -mode_supports_pre_incdec_p (machine_mode mode)
> +mode_supports_pre_incdec_p (machine_mode mode,
> + enum rs6000_reload_reg_type rt = RELOAD_REG_ANY)
>  {
> -  return ((reg_addr[mode].addr_mask[RELOAD_REG_ANY] & RELOAD_REG_PRE_INCDEC)
> -   != 0);
> +  return ((reg_addr[mode].addr_mask[rt] & RELOAD_REG_PRE_INCDEC) != 0);
>  }

Same issue here: does the default argument help, or hurt?  The function
names now do not describe what the function does, either :-/


Segher


Re: [PATCH] PowerPC address support clean, patch 3 of 4

2018-05-10 Thread Michael Meissner
On Wed, May 09, 2018 at 06:14:27PM -0500, Segher Boessenkool wrote:
> On Thu, May 03, 2018 at 01:22:10PM -0400, Michael Meissner wrote:
> > 2018-05-03  Michael Meissner  
> > 
> > * config/rs6000/rs6000.c (mode_supports_d_form): Rename
> > mode_supports_vmx_dform to mode_supports_d_form.  Add an optional
> > argument to say which reload register class to use.  Change all
> > callers to pass in the RELOAD_REG_VMX class explicitly.
> > (rs6000_secondary_reload): Likewise.
> > (rs6000_preferred_reload_class): Likewise.
> > (rs6000_secondary_reload_class): Likewise.
> 
> Please don't say "likewise" unless the change is actually similar.
> 
> > -/* Return true if we have D-form addressing in altivec registers.  */
> > +/* Return true if we have D-form addressing (register+offset) in either a
> > +   specific reload register class or whether some reload register class
> > +   supports d-form addressing.  */
> >  static inline bool
> > -mode_supports_vmx_dform (machine_mode mode)
> > +mode_supports_d_form (machine_mode mode,
> > + enum rs6000_reload_reg_type rt = RELOAD_REG_ANY)
> >  {
> > -  return ((reg_addr[mode].addr_mask[RELOAD_REG_VMX] & RELOAD_REG_OFFSET) 
> > != 0);
> > +  return ((reg_addr[mode].addr_mask[rt] & RELOAD_REG_OFFSET) != 0);
> >  }
> 
> Will this overload help anything?  It does not look that way, all current
> callers use a different argument (and all the same).

All current callers just use the ANY option (except for these calls).  However
in the future, I'm planning on calling these functions with the specific reload
register class (hence the change).

> Overloads are nice if they make things *easier* for the reader, not harder.
> Same as with all other syntactic sugar.

-- 
Michael Meissner, IBM
IBM, M/S 2506R, 550 King Street, Littleton, MA 01460-6245, USA
email: meiss...@linux.ibm.com, phone: +1 (978) 899-4797



Re: [PATCH] Use two source permute for vector initialization (PR 85692, take 2)

2018-05-10 Thread Allan Sandfeld Jensen
On Donnerstag, 10. Mai 2018 09:57:22 CEST Jakub Jelinek wrote:
> On Wed, May 09, 2018 at 04:53:19PM +0200, Allan Sandfeld Jensen wrote:
> > > > @@ -2022,8 +2022,9 @@ simplify_vector_constructor
> > > > (gimple_stmt_iterator
> > > > *gsi)>
> > > > 
> > > >elem_type = TREE_TYPE (type);
> > > >elem_size = TREE_INT_CST_LOW (TYPE_SIZE (elem_type));
> > > > 
> > > > -  vec_perm_builder sel (nelts, nelts, 1);
> > > > -  orig = NULL;
> > > > +  vec_perm_builder sel (nelts, 2, nelts);
> > > 
> > > Why this change?  I admit the vec_parm_builder arguments are confusing,
> > > but
> > > I think the second times third is the number of how many indices are
> > > being
> > > pushed into the vector, so I think (nelts, nelts, 1) is right.
> > 
> > I had the impression it was what was selected from. In any case, I changed
> > it because without I get crash when vec_perm_indices is created later
> > with a possible nparms of 2.
> 
> The documentation is apparently in vector-builder.h:
>This class is a wrapper around auto_vec for building vectors of T.
>It aims to encode each vector as npatterns interleaved patterns,
>where each pattern represents a sequence:
> 
>  { BASE0, BASE1, BASE1 + STEP, BASE1 + STEP*2, BASE1 + STEP*3, ... }
> 
>The first three elements in each pattern provide enough information
>to derive the other elements.  If all patterns have a STEP of zero,
>we only need to encode the first two elements in each pattern.
>If BASE1 is also equal to BASE0 for all patterns, we only need to
>encode the first element in each pattern.  The number of encoded
>elements per pattern is given by nelts_per_pattern.
> 
>The class can be used in two ways:
> 
>1. It can be used to build a full image of the vector, which is then
>   canonicalized by finalize ().  In this case npatterns is initially
>   the number of elements in the vector and nelts_per_pattern is
>   initially 1.
> 
>2. It can be used to build a vector that already has a known encoding.
>   This is preferred since it is more efficient and copes with
>   variable-length vectors.  finalize () then canonicalizes the encoding
>   to a simpler form if possible.
> 
> As the vector is constant width and we are building the full image of the
> vector, the right arguments are (nelts, nelts, 1) as per 1. above, and the
> finalization can perhaps change it to something more compact.
> 
> > > (and sorry for missing your patch first, the PR wasn't ASSIGNED and
> > > there
> > > was no link to gcc-patches for it).
> > 
> > It is okay. You are welcome to take it over. I am not a regular gcc
> > contributor and thus not well-versed in the details, only the basic logic
> > of how things work.
> 
> Ok, here is my version of the patch.  Bootstrapped/regtested on x86_64-linux
> and i686-linux, ok for trunk?
> 
Looks good to me if that counts for anything.

'Allan




Re: [PATCH] handle local aggregate initialization in strlen (PR 83821)

2018-05-10 Thread Martin Sebor

Hi Marc,

Can you please comment/respond to Jeff's question below and
confirm whether my understanding of the restriction (below)
is correct?

Thanks
Martin

On 04/30/2018 11:50 AM, Jeff Law wrote:

On 01/12/2018 02:30 PM, Martin Sebor wrote:

A failure in a test for the recently enhanced -Warray-bounds
warning exposed an unnecessarily broad restriction in the strlen
pass that prevents it from tracking the length of a member string
of locally defined and initialized struct:

  void f (void)
  {
struct { char s[8]; int i } a = { "1234", 5 };

if (strlen (a.s) != 4)   // not folded
  abort ();
   }

IIUC, the restriction was in place to account for writes into
an array changing or invalidating the length of a string stored
in its initial elements.  This would happen if the write either
changed the string's terminating nul byte, or if it reset one
of the prior non-nul bytes.

To reflect just this intent the restriction can be tightened
up to improve the pass' ability to track even the lengths of
string members of locally initialized aggregates.  Besides
leading to better code this change also clears up the test
failure.

Tested on x86_64-linux.

Martin


gcc-83821.diff


PR tree-optimization/83821 - local aggregate initialization defeats strlen 
optimization

gcc/ChangeLog:

PR tree-optimization/83821
* tree-ssa-strlen.c (maybe_invalidate): Consider the length of
a string when available.
(handle_char_store): Reset calloc statement on a non-nul store.

gcc/testsuite/ChangeLog:

PR tree-optimization/83821
* c-c++-common/Warray-bounds-4.c: Remove XFAIL.
* gcc.dg/strlenopt-43.c: New test.
* gcc.dg/strlenopt-44.c: Same.
* gcc.dg/tree-ssa/calloc-4.c: Same.

I see what you're trying to do.  But I'm really struggling to understand
Marc G's comment "Do not use si->nonzero_chars" since that's precisely
what your patch does.

Your patch seems reasonable on the surface, but I fear there's something
I'm missing.  Can you reach out to Marc G. to see if he recalls the
rational behind the comment.

The comment in its original form was introduced here:

commit 9f15ed6e5c148ded6e7942e75595d91151792c9b
Author: glisse 
Date:   Tue Jun 24 18:50:00 2014 +

2014-06-24  Marc Glisse  

PR tree-optimization/57742
gcc/
* tree-ssa-strlen.c (get_string_length): Ignore malloc.
(handle_builtin_malloc, handle_builtin_memset): New functions.
(strlen_optimize_stmt): Call them.
* passes.def: Move strlen after loop+dom but before vrp.
gcc/testsuite/
* g++.dg/tree-ssa/calloc.C: New testcase.
* gcc.dg/tree-ssa/calloc-1.c: Likewise.
* gcc.dg/tree-ssa/calloc-2.c: Likewise.
* gcc.dg/strlenopt-9.c: Adapt.


git-svn-id: svn+ssh://gcc.gnu.org/svn/gcc/trunk@211956
138bc75d-0d04-0410-961f-82ee72b054a4


Jeff





[PATCH, i386]: Fix and improve a couple of builtin expansions

2018-05-10 Thread Uros Bizjak
Hello!

The missing return in _xgetbv is a bug and will be backported to
release branches.

2018-05-10  Uros Bizjak  

* config/i386/i386.c (ix86_expand_builtin) :
Generate SImode target register for null target.
: Ditto.
: Optimize LSHIFTRT generation.
* config/i386/xsaveintrin.h (_xgetbv): Add missing return.

testsuite/ChangeLog:

2018-05-10  Uros Bizjak  

* gcc.target/i386/xgetsetbv.c: Check also variable arguments.

Bootstrapped and regression tested on x86_64-linux-gnu {,-m32}.

Committed to mainline SVN.

Uros.
Index: config/i386/i386.c
===
--- config/i386/i386.c  (revision 260116)
+++ config/i386/i386.c  (working copy)
@@ -37085,7 +37085,7 @@ ix86_expand_builtin (tree exp, rtx target, rtx sub
 
 case IX86_BUILTIN_RDPID:
 
-  op0 = gen_reg_rtx (TARGET_64BIT ? DImode : SImode);
+  op0 = gen_reg_rtx (word_mode);
 
   if (TARGET_64BIT)
{
@@ -37094,18 +37094,16 @@ ix86_expand_builtin (tree exp, rtx target, rtx sub
}
   else
insn = gen_rdpid (op0);
+
   emit_insn (insn);
 
-  if (target == 0)
-   {
- /* mode is VOIDmode if __builtin_rdpid has been called
-without lhs.  */
- if (mode == VOIDmode)
-   return target;
- target = gen_reg_rtx (mode);
-   }
+  if (target == 0
+ || !register_operand (target, SImode))
+   target = gen_reg_rtx (SImode);
+
   emit_move_insn (target, op0);
   return target;
+
 case IX86_BUILTIN_RDPMC:
 case IX86_BUILTIN_RDTSC:
 case IX86_BUILTIN_RDTSCP:
@@ -37164,14 +37162,9 @@ ix86_expand_builtin (tree exp, rtx target, rtx sub
  emit_move_insn (gen_rtx_MEM (SImode, op4), op2);
}
 
-  if (target == 0)
-   {
- /* mode is VOIDmode if __builtin_rd* has been called
-without lhs.  */
- if (mode == VOIDmode)
-   return target;
- target = gen_reg_rtx (mode);
-   }
+  if (target == 0
+ || !register_operand (target, DImode))
+target = gen_reg_rtx (DImode);
 
   if (TARGET_64BIT)
{
@@ -37260,25 +37253,23 @@ ix86_expand_builtin (tree exp, rtx target, rtx sub
   if (!REG_P (op0))
op0 = copy_to_mode_reg (SImode, op0);
 
+  op1 = force_reg (DImode, op1);
+
   if (TARGET_64BIT)
{
  op2 = expand_simple_binop (DImode, LSHIFTRT, op1, GEN_INT (32),
 NULL, 1, OPTAB_DIRECT);
 
+ icode = CODE_FOR_xsetbv_rex64;
+
  op2 = gen_lowpart (SImode, op2);
  op1 = gen_lowpart (SImode, op1);
- if (!REG_P (op1))
-   op1 = copy_to_mode_reg (SImode, op1);
- if (!REG_P (op2))
-   op2 = copy_to_mode_reg (SImode, op2);
- icode = CODE_FOR_xsetbv_rex64;
  pat = GEN_FCN (icode) (op0, op1, op2);
}
   else
{
- if (!REG_P (op1))
-   op1 = copy_to_mode_reg (DImode, op1);
  icode = CODE_FOR_xsetbv;
+
  pat = GEN_FCN (icode) (op0, op1);
}
   if (pat)
Index: config/i386/xsaveintrin.h
===
--- config/i386/xsaveintrin.h   (revision 260116)
+++ config/i386/xsaveintrin.h   (working copy)
@@ -59,7 +59,7 @@ extern __inline long long
 __attribute__((__gnu_inline__, __always_inline__, __artificial__))
 _xgetbv (unsigned int __A)
 {
-  __builtin_ia32_xgetbv (__A);
+  return __builtin_ia32_xgetbv (__A);
 }
 
 #ifdef __x86_64__
Index: testsuite/gcc.target/i386/xgetsetbv.c
===
--- testsuite/gcc.target/i386/xgetsetbv.c   (revision 260116)
+++ testsuite/gcc.target/i386/xgetsetbv.c   (working copy)
@@ -1,13 +1,27 @@
 /* { dg-do compile } */
 /* { dg-options "-O2 -mxsave" } */
-/* { dg-final { scan-assembler "xgetbv" } } */
-/* { dg-final { scan-assembler "xsetbv" } } */
+/* { dg-final { scan-assembler-times "xgetbv" 3 } } */
+/* { dg-final { scan-assembler-times "xsetbv" 3 } } */
 
 #include 
 
-unsigned int
-xgetsetbv (void)
+unsigned long long
+foo (unsigned x, unsigned y)
 {
+ _xsetbv (x, y);
+  return _xgetbv (x);
+}
+
+unsigned long long
+bar (unsigned x, unsigned long long y)
+{
+ _xsetbv (x, y);
+  return _xgetbv (x);
+}
+
+unsigned long long
+baz (void)
+{
  _xsetbv (0, 0);
   return _xgetbv (0);
 }


Re: Fix PR85726 (div-div suboptimization) and a rant on match.pd :s-flag

2018-05-10 Thread Hans-Peter Nilsson
> Date: Thu, 10 May 2018 07:23:05 -0500
> From: Segher Boessenkool 

> On Thu, May 10, 2018 at 10:33:39AM +0200, Marc Glisse wrote:
> > int x, y;
> > void f(int n){
> >   int c = 3 << 20;
> >   x = n / c;
> >   y = x / c;
> > }

> Without the replacement we have two dependent divisions; with the
> replacement we have two independent divisions, and that is much faster
> on some (many?) systems (that can do two fixed-point divisions in parallel),
> even if things do not simplify further.

Sometimes the case is that way, sometimes yet another: your case
can also increase register pressure, which is bad for some
(many?) systems.

brgds, H-P


Re: Fix PR85726 (div-div suboptimization) and a rant on match.pd :s-flag

2018-05-10 Thread Hans-Peter Nilsson
> Date: Thu, 10 May 2018 14:03:10 +0200
> From: Jakub Jelinek 

> On Thu, May 10, 2018 at 01:51:29PM +0200, Marc Glisse wrote:
> > > > There are probably more
> > > > complicated transformations this disables.
> > > 
> > > I'm providing an example from *real* code where the
> > > transformation is bad (admittedly just for the div-div case).
> 
> Isn't the case of the posted real-world testcase that many targets have
> instructions that can do division and modulo at the same time?

Maybe they can, but they don't.  What matters is what the tested
targets actually do.

I guess it's worth repeating: for the posted case, all the
tested targets (aarch64-unknown-linux-gnu,
powerpc64-unknown-linux-gnu, x86_64-pc-linux-gnu and
mipsisa32r2el-linux-gnu) implement the div+mod in the test-case
(by constant) using the high-part of a widened multiplication
and shift.  Actually, add ARM to that.  (The only one I found
that didn't was sparc.)

> So perhaps don't punt just because of !single_use (), but punt if
> !single_use () and one of the other uses is a modulo with the same constant
> as the second division, or perhaps similarly if the first division is
> !accompanied by modulo with the same constant too?

Maybe, but I just saw no need to limit the case, and no way to
test "other uses" anyway.

brgds, H-P



Re: [PATCH, rs6000] Map dcbtstt, dcbtt to n2=0 for __builtin_prefetch builtin.

2018-05-10 Thread Segher Boessenkool
Hi Carl,

On Thu, May 10, 2018 at 11:10:29AM -0700, Carl Love wrote:
> * config/rs6000/rs6000.md (prefetch): Generate dcbtt and dcbtstt
>   instructions if operands[2] is 0.

"and TARGET_POPCNTD"?  Or "and generating code for ISA 2.06 or later".
Something like that.

Looks good, thanks!  Okay for trunk.


Segher


Re: [PATCH, rs6000] Fix expected BE counts for vsx-vector-6.h

2018-05-10 Thread Segher Boessenkool
Hi,

On Thu, May 10, 2018 at 10:57:16AM -0700, Carl Love wrote:
> The following patch fixes issues found with the instruction counts for
> the vsx-vector-6.h test.  It was found that on a BE system where GCC is
> configured with --with-cpu=power6 the expected counts in vsx-vector-6-
> be.c do not match.  This patch fixes the expected counts when
> configuring and building GCC using --with-cpu=power6, --with-cpu=power7 
> and --with-cpu=power8 on a BE system.

> 2018-05-10 Carl Love  
>   * gcc.target/powerpc/vsx-vector-6-be.c (dg-options): Update to run on
>   Power 6 only.

Then please rename it to vsx-vector-6-be.p6.c like the others.  But, do
we want a test for p6 at all?  The test uses powerpc_vsx_ok which isn't
true on a p6 (so it runs on p7 and up, but with -mcpu=power6; do we want
to test that?)

>   (dg-final): Update xvcmpgtdp, xvcmpgedp counts for Power 6.
>   * gcc.target/powerpc/vsx-vector-6-be.p7.c (dg-final): New test file for
>   Power 7.
>   * gcc.target/powerpc/vsx-vector-6-be.p8.c (dg-final): New test file for
>   Power 8.

Did you check that the updated counts make sense, i.e. that the expected
code is generated?

Okay for trunk if so (with the rename to .p6).  Thanks!


Segher


Re: RFA (make_dispatcher_decl): PATCH for c++/83911, ICE with multiversioned constructor

2018-05-10 Thread Jason Merrill
On Fri, Mar 16, 2018 at 8:38 AM, Jason Merrill  wrote:
> On Thu, Mar 15, 2018 at 4:50 AM, Richard Biener
>  wrote:
>> On Wed, Mar 14, 2018 at 8:57 PM, Jason Merrill  wrote:
>>> Ping
>>>
>>> On Fri, Mar 2, 2018 at 1:23 PM, Jason Merrill  wrote:
 As I mentioned in the PR, the problem here is that we're replacing a
 constructor with a dispatcher function which doesn't look much like a
 constructor.  This patch adjusts make_dispatcher_decl to make it look
 more like the functions it dispatches to, but other things are certain
 to break for similar reasons down the road.  A proper solution should
 be more transparent, like thunks.

 Tested x86_64-pc-linux-gnu.  Does this seem worth applying to fix the
 regression?
>>
>> The patch looks reasonable to me, you probably know best whether
>> the cp/ parts are risky or not ;)
>>
>> So - OK from my POV.
>>
>> And yes, thunks may be a better representation for the dispatcher.
>
> It occurred to me that I could handle this more locally by deferring
> the function substitution until genericization time, so this is what
> I'm checking in:

...but now that we're in stage 1, it still seems sensible to have a
single way of checking whether something is a constructor.

Tested x86_64-pc-linux-gnu, applying to trunk.
commit 656d038fe0cf78a2432a8c9a047edc93af6d5b23
Author: Jason Merrill 
Date:   Fri Mar 16 08:32:26 2018 -0400

* cp-tree.h (DECL_CONSTRUCTOR_P): Use DECL_CXX_CONSTRUCTOR_P.

(DECL_DESTRUCTOR_P): Use DECL_CXX_DESTRUCTOR_P.

diff --git a/gcc/cp/cp-tree.h b/gcc/cp/cp-tree.h
index 2df158c9ea6..a4e0099a249 100644
--- a/gcc/cp/cp-tree.h
+++ b/gcc/cp/cp-tree.h
@@ -2731,7 +2731,7 @@ struct GTY(()) lang_decl {
 /* For FUNCTION_DECLs and TEMPLATE_DECLs: nonzero means that this function
is a constructor.  */
 #define DECL_CONSTRUCTOR_P(NODE) \
-  IDENTIFIER_CTOR_P (DECL_NAME (NODE))
+  DECL_CXX_CONSTRUCTOR_P (STRIP_TEMPLATE (NODE))
 
 /* Nonzero if NODE (a FUNCTION_DECL) is a constructor for a complete
object.  */
@@ -2760,7 +2760,7 @@ struct GTY(()) lang_decl {
 /* Nonzero if NODE (a FUNCTION_DECL or TEMPLATE_DECL)
is a destructor.  */
 #define DECL_DESTRUCTOR_P(NODE)\
-  IDENTIFIER_DTOR_P (DECL_NAME (NODE))
+  DECL_CXX_DESTRUCTOR_P (STRIP_TEMPLATE (NODE))
 
 /* Nonzero if NODE (a FUNCTION_DECL) is a destructor, but not the
specialized in-charge constructor, in-charge deleting constructor,


[PATCH] use string length to relax -Wstringop-overflow for nonstrings (PR 85623)

2018-05-10 Thread Martin Sebor

GCC 8.1 warns for unbounded (and some bounded) string comparisons
involving arrays declared attribute nonstring (i.e., char arrays
that need not be nul-terminated).  For instance:

  extern __attribute__((nonstring)) char a[4];

  int f (void)
  {
return strncmp (a, "123", sizeof a);
  }

  warning: ‘strcmp’ argument 1 declared attribute ‘nonstring’

Note that the warning refers to strcmp even though the call in
the source is to strncmp, because prior passes transform one to
the other.

The warning above is unnecessary (for strcmp) and incorrect for
strncmp because the call reads exactly four bytes from the non-
string array a regardless of the bound and so there is no risk
that it will read past the end of the array.

The attached change enhances the warning to use the length of
the string argument to suppress some of these needless warnings
for both bounded and unbounded string comparison functions.
When the length of the string is unknown, the warning uses its
size (when possible) as the upper bound on the number of accessed
bytes.  The change adds no new warnings.

I'm looking for approval to commit it to both trunk and 8-branch.

Martin
PR c/85623 - strncmp() warns about attribute 'nonstring' incorrectly in -Wstringop-overflow

gcc/ChangeLog:

	PR c/85623
	* calls.c (maybe_warn_nonstring_arg): Use string length to set
	or ajust the presumed bound on an operation to avoid unnecessary
	warnings.

gcc/testsuite/ChangeLog:

	PR c/85623
	* c-c++-common/attr-nonstring-3.c: Adjust.
	* c-c++-common/attr-nonstring-4.c: Adjust.
	* c-c++-common/attr-nonstring-6.c: New test.

diff --git a/gcc/calls.c b/gcc/calls.c
index 9eb0467..f5c8ad4 100644
--- a/gcc/calls.c
+++ b/gcc/calls.c
@@ -55,6 +55,7 @@ along with GCC; see the file COPYING3.  If not see
 #include "stringpool.h"
 #include "attribs.h"
 #include "builtins.h"
+#include "gimple-fold.h"
 
 /* Like PREFERRED_STACK_BOUNDARY but in units of bytes, not bits.  */
 #define STACK_BYTES (PREFERRED_STACK_BOUNDARY / BITS_PER_UNIT)
@@ -1612,15 +1613,36 @@ maybe_warn_nonstring_arg (tree fndecl, tree exp)
   /* The bound argument to a bounded string function like strncpy.  */
   tree bound = NULL_TREE;
 
+  /* The range of lengths of a string argument to one of the comparison
+ functions.  If the length is less than the bound it is used instead.  */
+  tree lenrng[2] = { NULL_TREE, NULL_TREE };
+
   /* It's safe to call "bounded" string functions with a non-string
  argument since the functions provide an explicit bound for this
  purpose.  */
   switch (DECL_FUNCTION_CODE (fndecl))
 {
-case BUILT_IN_STPNCPY:
-case BUILT_IN_STPNCPY_CHK:
+case BUILT_IN_STRCMP:
 case BUILT_IN_STRNCMP:
 case BUILT_IN_STRNCASECMP:
+  {
+	/* For these, if one argument refers to one or more of a set
+	   of string constants or arrays of known size, determine
+	   the range of their known or possible lengths and use it
+	   conservatively as the bound for the unbounded function,
+	   and to adjust the range of the bound of the bounded ones.  */
+	unsigned stride = with_bounds ? 2 : 1;
+	for (unsigned argno = 0; argno < nargs && !*lenrng; argno += stride)
+	  {
+	tree arg = CALL_EXPR_ARG (exp, argno);
+	if (!get_attr_nonstring_decl (arg))
+	  get_range_strlen (arg, lenrng);
+	  }
+  }
+  /* Fall through.  */
+
+case BUILT_IN_STPNCPY:
+case BUILT_IN_STPNCPY_CHK:
 case BUILT_IN_STRNCPY:
 case BUILT_IN_STRNCPY_CHK:
   {
@@ -1647,6 +1669,33 @@ maybe_warn_nonstring_arg (tree fndecl, tree exp)
   if (bound)
 get_size_range (bound, bndrng);
 
+  if (*lenrng)
+{
+  /* Add one for the nul.  */
+  lenrng[0] = const_binop (PLUS_EXPR, TREE_TYPE (lenrng[0]),
+			   lenrng[0], size_one_node);
+  lenrng[1] = const_binop (PLUS_EXPR, TREE_TYPE (lenrng[1]),
+			   lenrng[1], size_one_node);
+
+  if (!bndrng[0])
+	{
+	  /* Conservatively use the upper bound of the lengths for
+	 both the lower and the upper bound of the operation.  */
+	  bndrng[0] = lenrng[1];
+	  bndrng[1] = lenrng[1];
+	  bound = void_type_node;
+	}
+  else
+	{
+	  /* Replace the bound on the oparation with the upper bound
+	 of the length of the string if the latter is smaller.  */
+	  if (tree_int_cst_lt (lenrng[1], bndrng[0]))
+	bndrng[0] = lenrng[1];
+	  else if (tree_int_cst_lt (lenrng[1], bndrng[1]))
+	bndrng[1] = lenrng[1];
+	}
+}
+
   /* Iterate over the built-in function's formal arguments and check
  each const char* against the actual argument.  If the actual
  argument is declared attribute non-string issue a warning unless
@@ -1689,18 +1738,28 @@ maybe_warn_nonstring_arg (tree fndecl, tree exp)
 
   tree type = TREE_TYPE (decl);
 
+  /* The maximum number of array elements accessed.  */
   offset_int wibnd = 0;
   if (bndrng[0])
 	wibnd = wi::to_offset (bndrng[0]);
 
+  /* Size of the array.  */
   offset_int asize = wibnd;
 
+  /* Determine the array size.  

Re: [PATCH] Document Dual ABI for std::ios_base::failure

2018-05-10 Thread Jonathan Wakely

On 10/05/18 12:17 +0100, Jonathan Wakely wrote:

And a couple of other doc improvements, and regenerated the HTML
pages.

Please read the proposed change to using.xml and let me know if it's
clear.

* doc/xml/faq.xml: Link to C++17 status. Add note to outdated answer.
* doc/xml/manual/debug_mode.xml: Add array and forward_list to list
of C++11 containers with Debug Mode support.
* doc/xml/manual/using.xml: Document Dual ABI for ios_base::failure.
* doc/html/*: Regenerate.


Committed to trunk.



Re: [PATCH v2, rs6000] Improve Documentation of Built-In Functions Part 1

2018-05-10 Thread Segher Boessenkool
Hi!

On Wed, May 09, 2018 at 12:42:11PM -0500, Kelvin Nilsen wrote:
> 1. Change the name of the first PowerPC built-in section from 
>"PowerPC Built-in Functions" to "Basic PowerPC Built-in Functions".
>This section has never described all PowerPC built-in functions.

After the patch it looks like this:

* NDS32 Built-in Functions::
* picoChip Built-in Functions::
* Basic PowerPC Built-in Functions::
* PowerPC AltiVec/VSX Built-in Functions::
* PowerPC Hardware Transactional Memory Built-in Functions::
* PowerPC Atomic Memory Operation Functions::
* RX Built-in Functions::
* S/390 System z Built-in Functions::

I think it is nicer to just have a single "PowerPC Built-in Functions"
node, with then everything else hanging from a menu in there (as you have
for "Basic PowerPC Built-in Functions" now; so just rename that and put
the V*X, TM, atomic nodes in that menu too; probably need to make those
subsubsections or such).

> 2018-05-09  Kelvin Nilsen  
> 
>   * doc/extend.texi (PowerPC Built-in Functions): Rename this
>   subsection.
>   (Basic PowerPC Built-in Functions): The new name of the
>   subsection previously known as "PowerPC Built-in Functions".
>   (Basic PowerPC Built-in Functions Available on all Configurations):
>   New subsubsection.
>   (Basic PowerPC Built-in Functions Available on ISA 2.05): Likewise.
>   (Basic PowerPC Built-in Functions Available on ISA 2.06): Likewise.
>   (Basic PowerPC Built-in Functions Available on ISA 2.07): Likewise.
>   (Basic PowerPC Built-in Functions Available on ISA 3.0): Likewise.

But please commit this now, it's a nice improvement already.  Thanks!


Segher


Re: [v3] PATCH to make _BracketMatcher::_S_cache_size a variable

2018-05-10 Thread Jonathan Wakely

On 10/05/18 13:59 -0400, Jason Merrill wrote:

There doesn't seem to be any reason for _S_cache_size to be a function
rather than a variable.  OK for trunk?


I vaguely recall some problem with that constant, maybe it didn't work
as a variable once upon a time. If that was ever true it was fixed
long ago so the change is OK, thanks.




Re: random_device implementation

2018-05-10 Thread Jason Merrill
On Sat, May 5, 2018 at 8:34 PM, sotrdg sotrdg  wrote:
> https://github.com/euloanty/mingw-std-random_device/blob/master/random_device_gcc_withcxx11abi/random.cc

It's best to CC the gcc-patches and libstdc++ lists on library contributions.

Jason


C++ PATCHes for core issue resolutions

2018-05-10 Thread Jason Merrill
2310: We were crashing on this testcase; the proposed resolution of
2310 clarifies that we should reject it.

2267: brace and paren initialization should have the same semantics here.

Tested x86_64-pc-linux-gnu, applying to trunk.
commit ac97fad727f7ebcba2d2b345d95867331042a4af
Author: Jason Merrill 
Date:   Wed Mar 14 17:25:54 2018 -0400

Core issue 2310 - conversion to base of incomplete type.

* class.c (build_base_path): Check COMPLETE_TYPE_P for source type.

diff --git a/gcc/cp/class.c b/gcc/cp/class.c
index 30323f0a9f6..4616d8d3036 100644
--- a/gcc/cp/class.c
+++ b/gcc/cp/class.c
@@ -370,6 +370,15 @@ build_base_path (enum tree_code code,
   goto indout;
 }
 
+  if (!COMPLETE_TYPE_P (probe))
+{
+  if (complain & tf_error)
+	error ("cannot convert from %qT to base class %qT because %qT is "
+	   "incomplete", BINFO_TYPE (d_binfo), BINFO_TYPE (binfo),
+	   BINFO_TYPE (d_binfo));
+  return error_mark_node;
+}
+
   /* If we're in an NSDMI, we don't have the full constructor context yet
  that we need for converting to a virtual base, so just build a stub
  CONVERT_EXPR and expand it later in bot_replace.  */
diff --git a/gcc/testsuite/g++.dg/cpp0x/constexpr-base6.C b/gcc/testsuite/g++.dg/cpp0x/constexpr-base6.C
new file mode 100644
index 000..849ac81db78
--- /dev/null
+++ b/gcc/testsuite/g++.dg/cpp0x/constexpr-base6.C
@@ -0,0 +1,14 @@
+// CWG issue 2310
+// { dg-do compile { target c++11 } }
+// { dg-options "" }
+
+template struct check_derived_from { 
+  static A a; 
+  static constexpr B *p = 	// { dg-error "" }
+  int ar[p-p+1];
+}; 
+struct W { int i; }; 
+struct Z : W
+{
+  check_derived_from cdf;
+};
commit ced9e2797a8d1484829d140ce0147cd2be4d2091
Author: Jason Merrill 
Date:   Thu Mar 15 14:38:57 2018 -0400

CWG 2267 - list-initialization of reference temporary

* call.c (reference_binding): List-initializing a reference
temporary is copy-list-initialization.

diff --git a/gcc/cp/call.c b/gcc/cp/call.c
index d3ee152808a..30fe682e7b4 100644
--- a/gcc/cp/call.c
+++ b/gcc/cp/call.c
@@ -1560,12 +1560,10 @@ reference_binding (tree rto, tree rfrom, tree expr, bool c_cast_p, int flags,
 	  goto skip;
 	}
 	}
-  /* Otherwise, if T is a reference type, a prvalue temporary of the
-	 type referenced by T is copy-list-initialized or
-	 direct-list-initialized, depending on the kind of initialization
-	 for the reference, and the reference is bound to that temporary. */
-  conv = implicit_conversion (to, from, expr, c_cast_p,
-  flags|LOOKUP_NO_TEMP_BIND, complain);
+  /* Otherwise, if T is a reference type, a prvalue temporary of the type
+	 referenced by T is copy-list-initialized, and the reference is bound
+	 to that temporary. */
+  CONSTRUCTOR_IS_DIRECT_INIT (expr) = false;
 skip:;
 }
 
diff --git a/gcc/testsuite/g++.dg/cpp0x/initlist-ref-2267.C b/gcc/testsuite/g++.dg/cpp0x/initlist-ref-2267.C
new file mode 100644
index 000..dfd735a5add
--- /dev/null
+++ b/gcc/testsuite/g++.dg/cpp0x/initlist-ref-2267.C
@@ -0,0 +1,14 @@
+// CWG 2267
+// { dg-do compile { target c++11 } }
+
+struct A {} a; 
+struct B { explicit B(const A&); }; 
+B b1(a); // #1, ok 
+const B {a}; // { dg-error "" }
+const B (a); // { dg-error "" }
+
+struct D { D(); }; 
+struct C { explicit operator D(); } c; 
+D d1(c); // ok 
+const D {c}; // { dg-error "" }
+const D (c); // { dg-error "" }


Re: [PATCH 1/2] gcc_qsort: source code changes

2018-05-10 Thread DJ Delorie

Alexander Monakov  writes:
> I'm not sure.  It has a weaker contract compared to qsort, and I believe
> functions in libiberty are understood to provide stronger/better replacements.

The original intent of libiberty was to provide a stronger *portability*
contract, i.e. to work around differences in underlying operating
systems.  The xfoo() variants often handle error conditions also, as
that has traditionally been something that OSs do differently anyway.

Having said that, adding something to libiberty is more complicated than
adding something to gcc (it didn't used to be), and if nobody else needs
a more portable qsort, it's a wasted effort.

Libiberty is *not* a generic "toss things in here because they're useful
and generic" library, despite being used as such.  However, it is common
among a few large projects (which used to share a repo, limiting copies
of libiberty to one), and does help in code re-use.

Given all that, I'd say that an xqsort might be appropriate in
libiberty, if it was (1) able to take over for the generic qsort[1] ,
and (2) the changes are also needed or useful in one of the other
projects using libiberty.  But given that it's currently written in C++
(it would need to be C-compatible) and only used by gcc, IMHO putting it
in libiberty would be inappropriate at this time.  The fact that qsort
is defined to be nondeterministic is not a portability issue[2].

Consider that there is also gnulib, which serves a similar purpose.

[1] i.e. if replacing qsort() with xqsort() in a C or C++ program
resulted in the same behavior as far as standards imply.

[2] if the nondeterminism is a problem, you probably need to fix your
compare function ;-)


Various small C++ cleanup PATCHes

2018-05-10 Thread Jason Merrill
1) There's a function to count how many template headers we should
have, we should use it.

2) While working on something a while back I ran into trying to
instantiate a nested function while still in processing_template_decl
context, which doesn't work so well.  Let's check for that.

3) A predicate asking about user-provided functions should use
user_provided_p, not DECL_ARTIFICIAL.

4) A minor simplification of the this-capture logic.

5) Correcting wrong uses of "argument" vs. "parameter".

Tested x86_64-pc-linux-gnu, applying to trunk.
commit aad1656a3f0296ea99d79272f5d639f1a83b767c
Author: Jason Merrill 
Date:   Mon Apr 9 13:57:42 2018 -0400

* parser.c (cp_parser_class_head): Use num_template_headers_for_class.

diff --git a/gcc/cp/parser.c b/gcc/cp/parser.c
index 88db9988bd4..82b8ef87ed7 100644
--- a/gcc/cp/parser.c
+++ b/gcc/cp/parser.c
@@ -22930,20 +22930,7 @@ cp_parser_class_head (cp_parser* parser,
   /* Otherwise, count the number of templates used in TYPE and its
 	 containing scopes.  */
   else
-	{
-	  tree scope;
-
-	  for (scope = TREE_TYPE (type);
-	   scope && TREE_CODE (scope) != NAMESPACE_DECL;
-	   scope = get_containing_scope (scope))
-	if (TYPE_P (scope)
-		&& CLASS_TYPE_P (scope)
-		&& CLASSTYPE_TEMPLATE_INFO (scope)
-		&& PRIMARY_TEMPLATE_P (CLASSTYPE_TI_TEMPLATE (scope))
-		&& (!CLASSTYPE_TEMPLATE_SPECIALIZATION (scope)
-		|| uses_template_parms (CLASSTYPE_TI_ARGS (scope
-	  ++num_templates;
-	}
+	num_templates = num_template_headers_for_class (TREE_TYPE (type));
 }
   /* Otherwise, the identifier is optional.  */
   else
commit c6b45dd7a70decb3fbd75f72d1f33b30e156e150
Author: Jason Merrill 
Date:   Tue Mar 13 16:07:04 2018 -0400

Make sure we aren't trying to do a nested instantiation in template context.

* pt.c (instantiate_decl): Make sure we aren't trying to do a nested
instantiation in template context.

diff --git a/gcc/cp/pt.c b/gcc/cp/pt.c
index e8346d3bf58..790d6ea25e9 100644
--- a/gcc/cp/pt.c
+++ b/gcc/cp/pt.c
@@ -23886,6 +23886,7 @@ instantiate_decl (tree d, bool defer_ok, bool expl_inst_class_mem_p)
 push_to_top_level ();
   else
 {
+  gcc_assert (!processing_template_decl);
   push_function_context ();
   cp_unevaluated_operand = 0;
   c_inhibit_evaluation_warnings = 0;
commit df63e4ead6154887682fba12299ae414a12f49e6
Author: Jason Merrill 
Date:   Mon Mar 12 13:56:49 2018 -0400

* class.c (vbase_has_user_provided_move_assign): Use user_provided_p.

diff --git a/gcc/cp/class.c b/gcc/cp/class.c
index 0427d1224f7..30323f0a9f6 100644
--- a/gcc/cp/class.c
+++ b/gcc/cp/class.c
@@ -5017,7 +5017,7 @@ vbase_has_user_provided_move_assign (tree type)
 for (ovl_iterator iter (get_class_binding_direct
 			(type, assign_op_identifier));
 	 iter; ++iter)
-  if (!DECL_ARTIFICIAL (*iter) && move_fn_p (*iter))
+  if (user_provided_p (*iter) && move_fn_p (*iter))
 	return true;
 
   /* Do any of its bases?  */
commit 0114f237348ab82d75d2277452926fa897b24dab
Author: Jason Merrill 
Date:   Mon Mar 5 17:41:26 2018 -0500

* lambda.c (lambda_expr_this_capture): Improve logic.

diff --git a/gcc/cp/lambda.c b/gcc/cp/lambda.c
index e9b962a8f33..e3f22fcc5b9 100644
--- a/gcc/cp/lambda.c
+++ b/gcc/cp/lambda.c
@@ -743,9 +743,7 @@ lambda_expr_this_capture (tree lambda, bool add_capture_p)
 add_capture_p = false;
 
   /* Try to default capture 'this' if we can.  */
-  if (!this_capture
-  && (!add_capture_p
-  || LAMBDA_EXPR_DEFAULT_CAPTURE_MODE (lambda) != CPLD_NONE))
+  if (!this_capture)
 {
   tree lambda_stack = NULL_TREE;
   tree init = NULL_TREE;
@@ -756,9 +754,15 @@ lambda_expr_this_capture (tree lambda, bool add_capture_p)
3. a non-default capturing lambda function.  */
   for (tree tlambda = lambda; ;)
 	{
-  lambda_stack = tree_cons (NULL_TREE,
-tlambda,
-lambda_stack);
+	  if (add_capture_p
+	  && LAMBDA_EXPR_DEFAULT_CAPTURE_MODE (tlambda) == CPLD_NONE)
+	/* tlambda won't let us capture 'this'.  */
+	break;
+
+	  if (add_capture_p)
+	lambda_stack = tree_cons (NULL_TREE,
+  tlambda,
+  lambda_stack);
 
 	  tree closure = LAMBDA_EXPR_CLOSURE (tlambda);
 	  tree containing_function
@@ -807,10 +811,6 @@ lambda_expr_this_capture (tree lambda, bool add_capture_p)
 	  init = LAMBDA_EXPR_THIS_CAPTURE (tlambda);
 	  break;
 	}
-
-	  if (LAMBDA_EXPR_DEFAULT_CAPTURE_MODE (tlambda) == CPLD_NONE)
-	/* An outer lambda won't let us capture 'this'.  */
-	break;
 	}
 
   if (init)
commit fb725d091432d694c1d565710691bda335f38146
Author: Jason Merrill 
Date:   Fri Feb 9 17:25:24 2018 -0500

* decl.c (make_typename_type): s/parameters/arguments/.

* parser.c 

Re: [PATCH] Add constant folding support for next{after,toward}{,f,l} (PR libstdc++/85466)

2018-05-10 Thread Steve Ellcey

I think this patch is causing a glibc testing error.  The
tests math/bug-nextafter and math/bug-nexttoward are failing
due to underflow not getting set.  Here is a test case that
should print nothing but is currently printing the
'did not underflow' message.

#include 
#include 
#include 
#include 
#include 

float zero = 0.0;
float inf = INFINITY;

int
main (void)
{
  int result = 0;

  float i = INFINITY;
  float m = FLT_MAX;

  i = 0;
  m = FLT_MIN;
  feclearexcept (FE_ALL_EXCEPT);
  i = nextafterf (m, i);
  if (i < 0 || i >= FLT_MIN)
{
  printf ("nextafterf+ failed\n");
  ++result;
}
  if (fetestexcept (FE_UNDERFLOW) == 0)
{
  printf ("nextafterf+ did not underflow\n");
  ++result;
}
  return result;
}


Re: PATCH for fortran/85735, f951 crash on empty input

2018-05-10 Thread Steve Kargl
On Thu, May 10, 2018 at 02:17:15PM -0400, Marek Polacek wrote:
> ./f951 -quiet
> ^D
> crashes in gfc_create_decls:
>   /* Build our translation-unit decl.  */
>   current_translation_unit
> = build_translation_unit_decl (get_identifier (main_input_filename));
> because main_input_filename is null in this case.  We can fix it like
> below, where main_input_filename will be set to "".
> 
> Bootstrapped/regtested on x86_64-linux, ok for trunk?
> 

Yes.

-- 
Steve


PATCH for fortran/85735, f951 crash on empty input

2018-05-10 Thread Marek Polacek
Running
./f951 -quiet
^D
crashes in gfc_create_decls:
  /* Build our translation-unit decl.  */
  current_translation_unit
= build_translation_unit_decl (get_identifier (main_input_filename));
because main_input_filename is null in this case.  We can fix it like
below, where main_input_filename will be set to "".

Bootstrapped/regtested on x86_64-linux, ok for trunk?

2018-05-10  Marek Polacek  

PR fortran/85735
* options.c (gfc_post_options): Set main_input_filename.

diff --git gcc/fortran/options.c gcc/fortran/options.c
index 1405f1cf008..3c17a583f62 100644
--- gcc/fortran/options.c
+++ gcc/fortran/options.c
@@ -313,6 +313,7 @@ gfc_post_options (const char **pfilename)
   if (gfc_current_form == FORM_UNKNOWN)
{
  gfc_current_form = FORM_FREE;
+ main_input_filename = filename;
  gfc_warning_now (0, "Reading file %qs as free form", 
   (filename[0] == '\0') ? "" : filename);
}

Marek


Re: [PATCH, rs6000] Map dcbtstt, dcbtt to n2=0 for __builtin_prefetch builtin.

2018-05-10 Thread Carl Love
GCC Maintainers:

I updated the patch so the dcbtt and dctstt instructions are only
generated if the supported ISA is 2.06 or newer.  I addressed the use
of REG instead of REG_P and formatting issues as requested.

The regression testing of the patch was done on 

   powerpc64le-unknown-linux-gnu (Power 8 LE)

with no regressions.  

Additional manual testing to verify the correct instruction generation
was don on Power 8 LE.  The results are:

gcc -g -c prefetch.c
objdump -S -d prefetch.o | more
...

 /* data prefetch , instructions hit the cache. */

  __builtin_prefetch ([0], 0, 0);
   c:   2c 00 3f 39 addir9,r31,44
  10:   2c 4a 00 7e dcbtt   0,r9
  __builtin_prefetch ([0], 0, 1);
  14:   2c 00 3f 39 addir9,r31,44
  18:   2c 4a 00 7c dcbt0,r9
  __builtin_prefetch ([0], 0, 2);
  1c:   2c 00 3f 39 addir9,r31,44
  20:   2c 4a 00 7c dcbt0,r9
  __builtin_prefetch ([0], 0, 3);
  24:   2c 00 3f 39 addir9,r31,44
  28:   2c 4a 00 7c dcbt0,r9
  __builtin_prefetch ([0], 1, 0);
  2c:   2c 00 3f 39 addir9,r31,44
  30:   ec 49 00 7e dcbtstt 0,r9
  __builtin_prefetch ([0], 1, 1);
  34:   2c 00 3f 39 addir9,r31,44
  38:   ec 49 00 7c dcbtst  0,r9
  __builtin_prefetch ([0], 1, 2);
  3c:   2c 00 3f 39 addir9,r31,44
  40:   ec 49 00 7c dcbtst  0,r9
  __builtin_prefetch ([0], 1, 3);
  44:   2c 00 3f 39 addir9,r31,44
  48:   ec 49 00 7c dcbtst  0,r9

Please let me know if the patch looks OK for GCC mainline.

 Carl Love


test case gcc/testsuite/gcc.target/sh/prefetch.c

gcc/ChangeLog:

2018-05-10  Carl Love  

* config/rs6000/rs6000.md (prefetch): Generate dcbtt and dcbtstt
instructions if operands[2] is 0.
---
 gcc/config/rs6000/rs6000.md | 20 +---
 1 file changed, 17 insertions(+), 3 deletions(-)

diff --git a/gcc/config/rs6000/rs6000.md b/gcc/config/rs6000/rs6000.md
index 2b15cca..8536c89 100644
--- a/gcc/config/rs6000/rs6000.md
+++ b/gcc/config/rs6000/rs6000.md
@@ -13233,9 +13233,23 @@
 (match_operand:SI 2 "const_int_operand" "n"))]
   ""
 {
-  if (GET_CODE (operands[0]) == REG)
-return INTVAL (operands[1]) ? "dcbtst 0,%0" : "dcbt 0,%0";
-  return INTVAL (operands[1]) ? "dcbtst %a0" : "dcbt %a0";
+  /* dcbtstt, dcbtt and TM=0b1 support starts with ISA 2.06.  */
+  int inst_select = INTVAL (operands[2]) || !TARGET_POPCNTD;
+
+  if (REG_P (operands[0]))
+{
+  if (INTVAL (operands[1]) == 0)
+return inst_select ? "dcbt 0,%0" : "dcbtt 0,%0";
+  else
+return inst_select ? "dcbtst 0,%0" : "dcbtstt 0,%0";
+}
+  else
+{
+  if (INTVAL (operands[1]) == 0)
+return inst_select ? "dcbt %a0" : "dcbtt %a0";
+  else
+return inst_select ? "dcbtst %a0" : "dcbtstt %a0";
+}
 }
   [(set_attr "type" "load")])
 
-- 
2.7.4



Re: [PATCH][i386] Adding WAITPKG instructions

2018-05-10 Thread Uros Bizjak
On Thu, May 10, 2018 at 7:47 PM, Uros Bizjak  wrote:

> Please find attached the patch that fixes all the issues (plus some
> whitespace fixes). It is tested and generates acceptable code. Please
> update the ChangeLog (do not use past tense in the ChangeLog entry!)
> and, if it works for you, please commit the attached version.

Sure enough, I have attached the wrong version. Please find correct
patch attached to the message.

Uros.
Index: cpuid.h
===
--- cpuid.h (revision 260116)
+++ cpuid.h (working copy)
@@ -98,6 +98,7 @@
 #define bit_AVX512VBMI (1 << 1)
 #define bit_PKU(1 << 3)
 #define bit_OSPKE  (1 << 4)
+#define bit_WAITPKG(1 << 5)
 #define bit_AVX512VBMI2(1 << 6)
 #define bit_SHSTK  (1 << 7)
 #define bit_GFNI   (1 << 8)
Index: driver-i386.c
===
--- driver-i386.c   (revision 260116)
+++ driver-i386.c   (working copy)
@@ -424,6 +424,7 @@ const char *host_detect_local_cpu (int argc, const
   unsigned int has_avx512vnni = 0, has_vaes = 0;
   unsigned int has_vpclmulqdq = 0;
   unsigned int has_movdiri = 0, has_movdir64b = 0;
+  unsigned int has_waitpkg = 0;
 
   bool arch;
 
@@ -527,6 +528,7 @@ const char *host_detect_local_cpu (int argc, const
 
   has_shstk = ecx & bit_SHSTK;
   has_pconfig = edx & bit_PCONFIG;
+  has_waitpkg = ecx & bit_WAITPKG;
 }
 
   if (max_level >= 13)
@@ -1108,6 +1110,7 @@ const char *host_detect_local_cpu (int argc, const
   const char *avx512bitalg = has_avx512bitalg ? " -mavx512bitalg" : " 
-mno-avx512bitalg";
   const char *movdiri = has_movdiri ? " -mmovdiri" : " -mno-movdiri";
   const char *movdir64b = has_movdir64b ? " -mmovdir64b" : " 
-mno-movdir64b";
+  const char *waitpkg = has_waitpkg ? " -mwaitpkg" : " -mno-waitpkg";
   options = concat (options, mmx, mmx3dnow, sse, sse2, sse3, ssse3,
sse4a, cx16, sahf, movbe, aes, sha, pclmul,
popcnt, abm, lwp, fma, fma4, xop, bmi, sgx, bmi2,
@@ -1120,7 +1123,7 @@ const char *host_detect_local_cpu (int argc, const
avx512ifma, avx512vbmi, avx5124fmaps, avx5124vnniw,
clwb, mwaitx, clzero, pku, rdpid, gfni, shstk,
avx512vbmi2, avx512vnni, vaes, vpclmulqdq,
-   avx512bitalg, movdiri, movdir64b, NULL);
+   avx512bitalg, movdiri, movdir64b, waitpkg, NULL);
 }
 
 done:
Index: i386-builtin-types.def
===
--- i386-builtin-types.def  (revision 260116)
+++ i386-builtin-types.def  (working copy)
@@ -290,6 +290,7 @@ DEF_FUNCTION_TYPE (VOID, UINT64)
 DEF_FUNCTION_TYPE (VOID, UINT64, PVOID)
 DEF_FUNCTION_TYPE (VOID, UNSIGNED)
 DEF_FUNCTION_TYPE (VOID, UNSIGNED, PVOID)
+DEF_FUNCTION_TYPE (UINT8, UNSIGNED, UINT64)
 DEF_FUNCTION_TYPE (INT, PUSHORT)
 DEF_FUNCTION_TYPE (INT, PUNSIGNED)
 DEF_FUNCTION_TYPE (INT, PULONGLONG)
Index: i386-c.c
===
--- i386-c.c(revision 260116)
+++ i386-c.c(working copy)
@@ -516,6 +516,8 @@ ix86_target_macros_internal (HOST_WIDE_INT isa_fla
 def_or_undef (parse_in, "__MOVDIRI__");
   if (isa_flag2 & OPTION_MASK_ISA_MOVDIR64B)
 def_or_undef (parse_in, "__MOVDIR64B__");
+  if (isa_flag2 & OPTION_MASK_ISA_WAITPKG)
+def_or_undef (parse_in, "__WAITPKG__");
   if (TARGET_IAMCU)
 {
   def_or_undef (parse_in, "__iamcu");
Index: i386.c
===
--- i386.c  (revision 260116)
+++ i386.c  (working copy)
@@ -2772,7 +2772,8 @@ ix86_target_string (HOST_WIDE_INT isa, HOST_WIDE_I
 { "-mmovbe",   OPTION_MASK_ISA_MOVBE },
 { "-mclzero",  OPTION_MASK_ISA_CLZERO },
 { "-mmwaitx",  OPTION_MASK_ISA_MWAITX },
-{ "-mmovdir64b",   OPTION_MASK_ISA_MOVDIR64B }
+{ "-mmovdir64b",   OPTION_MASK_ISA_MOVDIR64B },
+{ "-mwaitpkg", OPTION_MASK_ISA_WAITPKG }
   };
   static struct ix86_target_opts isa_opts[] =
   {
@@ -3455,6 +3456,7 @@ ix86_option_override_internal (bool main_args_p,
   const wide_int_bitmask PTA_RDPID (0, HOST_WIDE_INT_1U << 6);
   const wide_int_bitmask PTA_PCONFIG (0, HOST_WIDE_INT_1U << 7);
   const wide_int_bitmask PTA_WBNOINVD (0, HOST_WIDE_INT_1U << 8);
+  const wide_int_bitmask PTA_WAITPKG (0, HOST_WIDE_INT_1U << 9);
 
   const wide_int_bitmask PTA_CORE2 = PTA_64BIT | PTA_MMX | PTA_SSE | PTA_SSE2
 | PTA_SSE3 | PTA_SSSE3 | PTA_CX16 | PTA_FXSR;
@@ -5387,6 +5389,7 @@ ix86_valid_target_attribute_inner_p (tree args, ch
 IX86_ATTR_ISA ("vpclmulqdq", OPT_mvpclmulqdq),
 IX86_ATTR_ISA ("movdiri", OPT_mmovdiri),
 IX86_ATTR_ISA ("movdir64b", OPT_mmovdir64b),
+IX86_ATTR_ISA ("waitpkg", OPT_mwaitpkg),
 
 /* enum options */
 IX86_ATTR_ENUM ("fpmath=", 

Re: [PATCH 1/2] gcc_qsort: source code changes

2018-05-10 Thread Alexander Monakov
On Thu, 10 May 2018, Richard Biener wrote:
> 
> Just a quick first remark - how about putting this into libiberty?  And then 
> name it xqsort? 

I'm not sure.  It has a weaker contract compared to qsort, and I believe
functions in libiberty are understood to provide stronger/better replacements.

Alexander


Re: [PATCH] PR fortran/85521 -- Zero length substrings in array aconstructors

2018-05-10 Thread Steve Kargl
On Thu, May 10, 2018 at 07:56:59PM +0200, Thomas Koenig wrote:
> Am 10.05.2018 um 17:41 schrieb Steve Kargl:
> > It is certainly possible to give a warning, but it
> > would be odd (to me) to warn about technically
> > standard conforming code.
> 
> Maybe we could add
> 
> gfc_warning (0, "Standard-conforming code found, your code may run as 
> expected");
> 
> at the end of the compilation :-)

Given the amount of nonstandard code gfortran has to cope with,
the above isn't a bad idea!  Perhaps, we can also hand out
merit badges.  :-)

-- 
Steve


[v3] PATCH to make _BracketMatcher::_S_cache_size a variable

2018-05-10 Thread Jason Merrill
There doesn't seem to be any reason for _S_cache_size to be a function
rather than a variable.  OK for trunk?
commit bf6843a87d3f64c9ffa29ce8d50aaa6fe8aabcfa
Author: Jason Merrill 
Date:   Wed Mar 14 21:39:06 2018 -0400

* include/bits/regex_compiler.h (_S_cache_size): Change from
function to variable.

diff --git a/libstdc++-v3/include/bits/regex_compiler.h b/libstdc++-v3/include/bits/regex_compiler.h
index 7e5c2073554..6eee9cb9072 100644
--- a/libstdc++-v3/include/bits/regex_compiler.h
+++ b/libstdc++-v3/include/bits/regex_compiler.h
@@ -527,14 +527,12 @@ namespace __detail
   typedef typename std::is_same<_CharT, char>::type _UseCache;
 
   static constexpr size_t
-  _S_cache_size()
-  {
-	return 1ul << (sizeof(_CharT) * __CHAR_BIT__ * int(_UseCache::value));
-  }
+  _S_cache_size =
+	1ul << (sizeof(_CharT) * __CHAR_BIT__ * int(_UseCache::value));
 
   struct _Dummy { };
   typedef typename std::conditional<_UseCache::value,
-	std::bitset<_S_cache_size()>,
+	std::bitset<_S_cache_size>,
 	_Dummy>::type _CacheT;
   typedef typename std::make_unsigned<_CharT>::type _UnsignedCharT;
 


Re: [PATCH] PR fortran/85521 -- Zero length substrings in array aconstructors

2018-05-10 Thread Thomas Koenig

Am 10.05.2018 um 17:41 schrieb Steve Kargl:

It is certainly possible to give a warning, but it
would be odd (to me) to warn about technically
standard conforming code.


Maybe we could add

gfc_warning (0, "Standard-conforming code found, your code may run as 
expected");


at the end of the compilation :-)


[PATCH, rs6000] Fix expected BE counts for vsx-vector-6.h

2018-05-10 Thread Carl Love
GCC Maintainers:

The following patch fixes issues found with the instruction counts for
the vsx-vector-6.h test.  It was found that on a BE system where GCC is
configured with --with-cpu=power6 the expected counts in vsx-vector-6-
be.c do not match.  This patch fixes the expected counts when
configuring and building GCC using --with-cpu=power6, --with-cpu=power7 
and --with-cpu=power8 on a BE system.

The patch was tested on 

powerpc64le-unknown-linux-gnu (Power 8 LE)
powerpc64-unknown-linux-gnu (Power 8 BE)  configured for power6  
powerpc64-unknown-linux-gnu (Power 8 BE)  configured for power7
    powerpc64-unknown-linux-gnu (Power 8 BE)  configured for power8

Please let me know if the patch looks OK for GCC mainline.

 Carl Love
-

gcc/testsuite/ChangeLog:

2018-05-10 Carl Love  
* gcc.target/powerpc/vsx-vector-6-be.c (dg-options): Update to run on
Power 6 only.
(dg-final): Update xvcmpgtdp, xvcmpgedp counts for Power 6.
* gcc.target/powerpc/vsx-vector-6-be.p7.c (dg-final): New test file for
Power 7.
* gcc.target/powerpc/vsx-vector-6-be.p8.c (dg-final): New test file for
Power 8.
---
 gcc/testsuite/gcc.target/powerpc/vsx-vector-6-be.c |  7 +++--
 .../gcc.target/powerpc/vsx-vector-6-be.p7.c| 34 ++
 .../gcc.target/powerpc/vsx-vector-6-be.p8.c| 34 ++
 3 files changed, 72 insertions(+), 3 deletions(-)
 create mode 100644 gcc/testsuite/gcc.target/powerpc/vsx-vector-6-be.p7.c
 create mode 100644 gcc/testsuite/gcc.target/powerpc/vsx-vector-6-be.p8.c

diff --git a/gcc/testsuite/gcc.target/powerpc/vsx-vector-6-be.c 
b/gcc/testsuite/gcc.target/powerpc/vsx-vector-6-be.c
index 3305781..aa2093c 100644
--- a/gcc/testsuite/gcc.target/powerpc/vsx-vector-6-be.c
+++ b/gcc/testsuite/gcc.target/powerpc/vsx-vector-6-be.c
@@ -1,15 +1,16 @@
 /* { dg-do compile { target { powerpc64-*-* && lp64 } } } */
 /* { dg-skip-if "" { powerpc*-*-darwin* } } */
 /* { dg-require-effective-target powerpc_vsx_ok } */
-/* { dg-options "-mvsx -O2" } */
+/* { dg-options "-mvsx -O2 -mcpu=power6" } */
+/* { dg-skip-if "do not override -mcpu" { powerpc*-*-* } { "-mcpu=*" } { 
"-mcpu=power6" } } */
 
 /* Expected instruction counts for Big Endian */
 
 /* { dg-final { scan-assembler-times "xvabsdp" 1 } } */
 /* { dg-final { scan-assembler-times "xvadddp" 1 } } */
 /* { dg-final { scan-assembler-times "xxlnor" 7 } } */
-/* { dg-final { scan-assembler-times "xvcmpeqdp" 6 } } */
-/* { dg-final { scan-assembler-times "xvcmpgtdp" 8 } } */
+/* { dg-final { scan-assembler-times "xvcmpeqdp" 9 } } */
+/* { dg-final { scan-assembler-times "xvcmpgtdp" 10 } } */
 /* { dg-final { scan-assembler-times "xvcmpgedp" 7 } } */
 /* { dg-final { scan-assembler-times "xvrdpim" 1 } } */
 /* { dg-final { scan-assembler-times "xvmaddadp" 1 } } */
diff --git a/gcc/testsuite/gcc.target/powerpc/vsx-vector-6-be.p7.c 
b/gcc/testsuite/gcc.target/powerpc/vsx-vector-6-be.p7.c
new file mode 100644
index 000..01c3c2b
--- /dev/null
+++ b/gcc/testsuite/gcc.target/powerpc/vsx-vector-6-be.p7.c
@@ -0,0 +1,34 @@
+/* { dg-do compile { target { powerpc64-*-* && lp64 } } } */
+/* { dg-skip-if "" { powerpc*-*-darwin* } } */
+/* { dg-require-effective-target powerpc_vsx_ok } */
+/* { dg-options "-mvsx -O2 -mcpu=power7" } */
+/* { dg-skip-if "do not override -mcpu" { powerpc*-*-* } { "-mcpu=*" } { 
"-mcpu=power7" } } */
+
+
+/* Expected instruction counts for Big Endian */
+
+/* { dg-final { scan-assembler-times "xvabsdp" 1 } } */
+/* { dg-final { scan-assembler-times "xvadddp" 1 } } */
+/* { dg-final { scan-assembler-times "xxlnor" 7 } } */
+/* { dg-final { scan-assembler-times "xvcmpeqdp" 6 } } */
+/* { dg-final { scan-assembler-times "xvcmpgtdp" 8 } } */
+/* { dg-final { scan-assembler-times "xvcmpgedp" 7 } } */
+/* { dg-final { scan-assembler-times "xvrdpim" 1 } } */
+/* { dg-final { scan-assembler-times "xvmaddadp" 1 } } */
+/* { dg-final { scan-assembler-times "xvmsubadp" 1 } } */
+/* { dg-final { scan-assembler-times "xvsubdp" 1 } } */
+/* { dg-final { scan-assembler-times "xvmaxdp" 1 } } */
+/* { dg-final { scan-assembler-times "xvmindp" 1 } } */
+/* { dg-final { scan-assembler-times "xvmuldp" 1 } } */
+/* { dg-final { scan-assembler-times "vperm" 1 } } */
+/* { dg-final { scan-assembler-times "xvrdpic" 1 } } */
+/* { dg-final { scan-assembler-times "xvsqrtdp" 1 } } */
+/* { dg-final { scan-assembler-times "xvrdpiz" 1 } } */
+/* { dg-final { scan-assembler-times "xvmsubasp" 1 } } */
+/* { dg-final { scan-assembler-times "xvnmaddasp" 1 } } */
+/* { dg-final { scan-assembler-times "vmsumshs" 1 } } */
+/* { dg-final { scan-assembler-times "xxland" 13 } } */
+/* { dg-final { scan-assembler-times "xxsel" 2 } } */
+
+/* Source code for the test in vsx-vector-6.h */
+#include "vsx-vector-6.h"
diff --git a/gcc/testsuite/gcc.target/powerpc/vsx-vector-6-be.p8.c 

Re: [PATCH 0/2] Introduce gcc_qsort

2018-05-10 Thread Alexander Monakov
On Thu, 10 May 2018, Richard Biener wrote:
> >   - signature-compatible to C qsort, but relaxed contract:
> > - may apply the comparator to elements in a temporary buffer
> 
> What consequences has this or rather how is this observable and makes 
> comparators behave differently? 

The only serious consequence I'm aware of is this:

the buffer must be sufficiently aligned to match the alignment requirement
of elements being sorted

Alexander


Re: [PATCH][i386] Adding WAITPKG instructions

2018-05-10 Thread Uros Bizjak
On Thu, May 10, 2018 at 3:44 PM, Peryt, Sebastian
 wrote:
>> -Original Message-
>> From: Uros Bizjak [mailto:ubiz...@gmail.com]
>> Sent: Thursday, May 10, 2018 3:26 PM
>> To: Peryt, Sebastian 
>> Cc: gcc-patches@gcc.gnu.org; Kirill Yukhin 
>> Subject: Re: [PATCH][i386] Adding WAITPKG instructions
>>
>> On Thu, May 10, 2018 at 2:50 PM, Peryt, Sebastian 
>> wrote:
>> > Hi Uros,
>> >
>> > Updated patch attached, please find comments below.
>> >
>> >> -Original Message-
>> >> From: Uros Bizjak [mailto:ubiz...@gmail.com]
>> >> Sent: Wednesday, May 9, 2018 1:47 PM
>> >> To: Peryt, Sebastian 
>> >> Cc: gcc-patches@gcc.gnu.org; Kirill Yukhin 
>> >> Subject: Re: [PATCH][i386] Adding WAITPKG instructions
>> >>
>> >> On Tue, May 8, 2018 at 1:34 PM, Peryt, Sebastian
>> >> 
>> >> wrote:
>> >> > Hi,
>> >> >
>> >> > This patch adds support for WAITPKG instructions.
>> >> >
>> >> > Is it ok for trunk and after few day for backport to GCC-8?
>> >> >
>> > (Removed)
>> >> >
>> >> >
>> >>
>> >> +case IX86_BUILTIN_UMONITOR:
>> >> +  arg0 = CALL_EXPR_ARG (exp, 0);
>> >> +  op0 = expand_normal (arg0);
>> >> +  if (!REG_P (op0))
>> >> +op0 = ix86_zero_extend_to_Pmode (op0);
>> >> +
>> >> +  emit_insn (ix86_gen_umonitor (op0));
>> >> +  return 0;
>> >>
>> >> Please see how movdir64b handles its address operand. Also, do not
>> >> use global ix86_gen_monitor, just expand directly in the same way as
>> movdir64b.
>> >>
>> >
>> > Fixed.
>> >
>> >> +case IX86_BUILTIN_UMWAIT:
>> >> +case IX86_BUILTIN_TPAUSE:
>> >> +  rtx eax, edx, op1_lo, op1_hi;
>> >> +  arg0 = CALL_EXPR_ARG (exp, 0);
>> >> +  arg1 = CALL_EXPR_ARG (exp, 1);
>> >> +  op0 = expand_normal (arg0);
>> >> +  op1 = expand_normal (arg1);
>> >> +  eax = gen_rtx_REG (SImode, AX_REG);
>> >> +  edx = gen_rtx_REG (SImode, DX_REG);
>> >> +  if (!REG_P (op0))
>> >> +op0 = copy_to_mode_reg (SImode, op0);
>> >> +  if (!REG_P (op1))
>> >> +op1 = copy_to_mode_reg (DImode, op1);
>> >> +  op1_lo = gen_lowpart (SImode, op1);
>> >> +  op1_hi = expand_shift (RSHIFT_EXPR, DImode, op1,
>> >> + GET_MODE_BITSIZE (SImode), 0, 1);
>> >> +  op1_hi = convert_modes (SImode, DImode, op1_hi, 1);
>> >> +  emit_move_insn (eax, op1_lo);
>> >> +  emit_move_insn (edx, op1_hi);
>> >> +  emit_insn (fcode == IX86_BUILTIN_UMWAIT
>> >> +? gen_umwait (op0, eax, edx)
>> >> +: gen_tpause (op0, eax, edx));
>> >> +
>> >> +  /* Return current CF value.  */
>> >> +  op3 = gen_rtx_REG (CCCmode, FLAGS_REG);
>> >> +  target = gen_rtx_LTU (QImode, op3, const0_rtx);
>> >> +
>> >> +  return target;
>> >>
>> >> For the above code, please see how xsetbv expansion and patterns are
>> >> handling their input operands. There should be two patterns, one for
>> >> 32bit and the other for 64bit targets. The patterns will need to set
>> >> FLAGS_REG, otherwise the test will be removed.
>> >>
>> >
>> > I copied what is done for xsetbv expansion and most likely I found some 
>> > bug in
>> GCC.
>> > The problem is that when I use 3 arguments and compile as 64bit
>> > version upper part of rax is not cleared. It doesn't appear when I'm using 
>> > 2 or 4
>> function arguments.
>> > Most likely error is caused by the fact that rdx is used both as an
>> > input for function and argument in instruction.
>>
>> There is no need to clear upper parts of 64bit register. As specified in the 
>> ISA
>> (and modelled with RTX pattern), the instruction (e.g.
>> tpause) reads only lower 32 bits from %rax and %rdx. Implicitly, the 
>> instruction
>> should ignore upper 32 bits by itself, so we can use SUBREGs. If this is not 
>> the
>> case, we need to use DImode input arguments in RTX pattern and explicitly 
>> emit
>> zero-extension insns to clear upper 32 bits of input arguments.
>>
>
> Ok, I agree with you regarding clearing.
>
> But there is still one thing bothering me as explained in last email. The 
> problem appears when I use 3
> arguments and compile as 64bit version. Assembly generated is different from 
> when I'm adding extra unused
> argument or removing one function argument not related to my instruction. I'm 
> talking about umonitor-1.c test, function bar.
>
> Do you see the difference? This is the problem with clearing of registers I 
> wrote previously. Why is this happening?

This happens because IX86_BUILTIN_XSETBV expander, from where your
expander was copied, creates fairly convoluted initial RTX sequence.
The compiler does it's best to optimize it, but it can't do magic.
Please use attached patch that creates:

bar:
.LFB5453:
.cfi_startproc
movq%rdx, %rax
umonitor%rdi
shrq$32, %rdx
umwait  %esi
setc%al
ret
  

Re: [PATCH 0/2] Introduce gcc_qsort

2018-05-10 Thread Alexander Monakov
On Thu, 10 May 2018, Jakub Jelinek wrote:
> Have you gathered some statistics on the element sizes and how often they
> appear in qsort calls (perhaps weighted by n*log(n) of the element count)
> across bootstrap+regtest?

No, but Adhemerval Zanella collected stats on bootstrap, and they are similar
to my observations: https://sourceware.org/ml/libc-alpha/2018-01/msg00629.html

> glibc uses indirect sorting (sorts pointers to the elements or indexes and
> just reshuffles at the end) and has special case for the most commonly used
> small element size (4/8).  With C++ templates you could achieve that even
> without macros, just by instantiating the mergesort and its helpers for the
> few common cases.  Or is that not worth it (as in, we never sort really
> large (say > 32 bytes) element sizes and the 4 or 8 bytes long element sizes
> aren't common enough to see benefit by using constant size memcpy for those
> cases?

I think it's not worth it as branches by element size are not too frequent,
off the critical path, and easy for predictors. So doing it via templates
would cause significant code growth for no speed gain.

As for indirect sorting, we rarely sort elements of size other than 4/8, so
I believe that's not worth it either.

Alexander


Re: [libstdc++, PATCH] PR libstdc++/83140 - assoc_legendre returns negated value when m is odd.

2018-05-10 Thread Rainer Orth
Hi Ed,

>>> 2018-05-07  Edward Smith-Rowland  <3dw...@verizon.net>
>>>
>>> PR libstdc++/83140 - assoc_legendre returns negated value when m is
>>> odd
>>> * include/tr1/legendre_function.tcc (__assoc_legendre_p): Add
>>> __phase
>>> argument defaulted to +1.  Doxy comments on same.
>>> * testsuite/special_functions/02_assoc_legendre/
>>> check_assoc_legendre.cc: Regen.
>>> * testsuite/tr1/5_numerical_facilities/special_functions/
>>> 02_assoc_legendre/check_tr1_assoc_legendre.cc: Regen.

something went badly wrong with the regeneration of this last file: both
in your attached patch and in what you checked in, the file is empty.

Rainer

-- 
-
Rainer Orth, Center for Biotechnology, Bielefeld University


Re: [PATCH 1/2] gcc_qsort: source code changes

2018-05-10 Thread Richard Biener
On May 10, 2018 5:56:40 PM GMT+02:00, Alexander Monakov  
wrote:
>   * sort.cc: New file.
>* system.h [!CHECKING_P] (qsort): Redirect to gcc_qsort.
>* vec.c (qsort_chk): Use gcc_qsort.

Just a quick first remark - how about putting this into libiberty?  And then 
name it xqsort? 

Richard. 

>---
>gcc/sort.cc  | 232
>+++
> gcc/system.h |   7 +-
> gcc/vec.c|   2 +-
> 3 files changed, 238 insertions(+), 3 deletions(-)
> create mode 100644 gcc/sort.cc
>
>diff --git a/gcc/sort.cc b/gcc/sort.cc
>new file mode 100644
>index 000..4faf6d45dc6
>--- /dev/null
>+++ b/gcc/sort.cc
>@@ -0,0 +1,232 @@
>+/* Platform-independent deterministic sort function.
>+   Copyright (C) 2018 Free Software Foundation, Inc.
>+   Contributed by Alexander Monakov.
>+
>+This file is part of GCC.
>+
>+GCC is free software; you can redistribute it and/or modify it
>+under the terms of the GNU General Public License as published by the
>+Free Software Foundation; either version 3, or (at your option) any
>+later version.
>+
>+GCC is distributed in the hope that it will be useful, but WITHOUT
>+ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
>+FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
>+for more details.
>+
>+You should have received a copy of the GNU General Public License
>+along with GCC; see the file COPYING3.  If not see
>+.  */
>+
>+/* This implements a sort function suitable for GCC use cases:
>+   - signature-compatible to C qsort, but relaxed contract:
>+ - may apply the comparator to elements in a temporary buffer
>+ - may abort on allocation failure
>+   - deterministic (but not necessarily stable)
>+   - fast, especially for common cases (0-5 elements of size 8 or 4)
>+
>+   The implementation uses a network sort for up to 5 elements and
>+   a merge sort on top of that.  Neither stage has branches depending
>on
>+   comparator result, trading extra arithmetic for branch
>mispredictions.  */
>+
>+#ifdef GENERATOR_FILE
>+#include "bconfig.h"
>+#else
>+#include "config.h"
>+#endif
>+
>+#include "system.h"
>+
>+#define likely(cond) __builtin_expect ((cond), 1)
>+
>+#ifdef __GNUC__
>+#define noinline __attribute__ ((__noinline__))
>+#else
>+#define noinline
>+#endif
>+
>+/* C-style qsort comparator function type.  */
>+typedef int cmp_fn (const void *, const void *);
>+
>+/* Structure holding read-mostly (read-only in netsort) context. */
>+struct sort_ctx
>+{
>+  cmp_fn *cmp; // pointer to comparator
>+  char   *out; // output buffer
>+  size_t n;// number of elements
>+  size_t size; // element size
>+};
>+
>+/* Helper for netsort. Permute, possibly in-place, 2 or 3 elements,
>+   placing E0 to C->OUT, E1 to C->OUT + C->SIZE, and so on. */
>+static void
>+reorder23 (sort_ctx *c, char *e0, char *e1, char *e2)
>+{
>+#define REORDER_23(SIZE, STRIDE, OFFSET)\
>+do {\
>+  size_t t0, t1;\
>+  memcpy (, e0 + OFFSET, SIZE);  \
>+  memcpy (, e1 + OFFSET, SIZE);  \
>+  char *out = c->out + OFFSET;  \
>+  if (likely (c->n == 3))   \
>+memcpy (out + 2*STRIDE, e2 + OFFSET, SIZE); \
>+  memcpy (out, , SIZE); out += STRIDE;   \
>+  memcpy (out, , SIZE);  \
>+} while (0)
>+
>+  if (sizeof (size_t) == 8 && likely (c->size == 8))
>+REORDER_23 (8, 8, 0);
>+  else if (likely (c->size == 4))
>+REORDER_23 (4, 4, 0);
>+  else
>+{
>+  size_t offset = 0, step = sizeof (size_t);
>+  for (; offset + step <= c->size; offset += step)
>+  REORDER_23 (step, c->size, offset);
>+  for (; offset < c->size; offset++)
>+  REORDER_23 (1, c->size, offset);
>+}
>+}
>+
>+/* Like reorder23, but permute 4 or 5 elements. */
>+static void
>+reorder45 (sort_ctx *c, char *e0, char *e1, char *e2, char *e3, char
>*e4)
>+{
>+#define REORDER_45(SIZE, STRIDE, OFFSET)\
>+do {\
>+  size_t t0, t1, t2, t3;\
>+  memcpy (, e0 + OFFSET, SIZE);  \
>+  memcpy (, e1 + OFFSET, SIZE);  \
>+  memcpy (, e2 + OFFSET, SIZE);  \
>+  memcpy (, e3 + OFFSET, SIZE);  \
>+  char *out = c->out + OFFSET;  \
>+  if (likely (c->n == 5))   \
>+memcpy (out + 4*STRIDE, e4 + OFFSET, SIZE); \
>+  memcpy (out, , SIZE); out += STRIDE;   \
>+  memcpy (out, , SIZE); out += STRIDE;   \
>+  memcpy (out, , SIZE); out += STRIDE;   \
>+  memcpy (out, , SIZE);  \
>+} while (0)
>+
>+  if (sizeof (size_t) == 8 && likely (c->size == 8))
>+REORDER_45 (8, 8, 0);
>+  else if (likely(c->size == 4))
>+REORDER_45 (4, 4, 0);
>+  else
>+{
>+  size_t offset = 0, step = sizeof (size_t);
>+  for (; offset + step <= c->size; 

Re: [PATCH 0/2] Introduce gcc_qsort

2018-05-10 Thread Richard Biener
On May 10, 2018 5:56:39 PM GMT+02:00, Alexander Monakov  
wrote:
>Hello.
>
>This introduces a replacement for qsort() in GCC. The main selling
>point is
>reproducibility (currently compiler output may change depending on how
>libc
>qsort reorders not-bitwise-identical elements that compare equal) with
>a 
>small improvement speed-wise and small code growth (under 2K on
>x86-64).
>
>The opening comment in sort.cc gives a brief implementation overview:
>
>/* This implements a sort function suitable for GCC use cases:
>   - signature-compatible to C qsort, but relaxed contract:
> - may apply the comparator to elements in a temporary buffer

What consequences has this or rather how is this observable and makes 
comparators behave differently? 

Otherwise thanks for doing this. Will review tomorrow. 

Richard. 

> - may abort on allocation failure
>   - deterministic (but not necessarily stable)
>   - fast, especially for common cases (0-5 elements of size 8 or 4)
>
>   The implementation uses a network sort for up to 5 elements and
>  a merge sort on top of that.  Neither stage has branches depending on
>comparator result, trading extra arithmetic for branch mispredictions. 
>*/
>
>I used a Sandy Bridge CPU to collect statistics on tramp3d -O2
>compilation.
>
>Overall the new implementation is roughly 30% faster compared to Glibc
>qsort,
>with 2x or more speedup for cases with tiny element count. I see one
>instance
>where the new approach is significantly (1.5x) slower: it is ipa-icf.c:
>sort_congruence_class_groups_by_decl_uid. It sorts a big array (1500
>entries)
>and needs 14 indirect loads just to reach values to compare, so when
>branch
>prediction manages to guess correctly, it allows to overlap execution
>of two
>comparators and better hide their cache miss latency.
>
>Overall GCC spends about 0.3% time under qsort, but this doesn't
>automatically
>mean that this brings a 0.1% speed improvement: it may be larger or
>smaller
>depending on how new code affects cache behavior and branch predictors
>in
>other code, and it's not trivial to measure precisely.
>
>I can go into more detail about measured stats if there's interest :)
>
>Makefile.in changes are separated to patch 2 in the hope it'd make
>review
>easier, but the two patches will need to be applied together.
>
>Bootstrapped/regtested on x86-64, OK for trunk?
>
>Alexander Monakov (2):
>  gcc_qsort: source code changes
>  gcc_qsort: build system changes
>
> gcc/Makefile.in |   9 ++-
>gcc/sort.cc | 232
>
> gcc/system.h|   7 +-
> gcc/vec.c   |   2 +-
> 4 files changed, 243 insertions(+), 7 deletions(-)
> create mode 100644 gcc/sort.cc



Re: [build] Fix Solaris gty handling (PR target/84379)

2018-05-10 Thread Eric Botcazou
> As described in the PR, there are a couple of jit testsuite failures on
> Solaris when using /bin/as.  The errors point to GC issues and indeed,
> gcc/config/sol2.c lacked GTY markup.  This patch fixes that, following
> what darwin.c does for machopic_indirections.  I confess I have no idea
> why I had to change the code the way I did except for the fact that it
> works.  While formally I don't need approval, it would be nice if
> someone in the know could have a look.

The net effect of the patch is to block GC for the DECLs registered by calls 
to solaris_elf_asm_comdat_section, that is to say, the mere fact of passing 
these DECLs to the function ensures that they will be kept.

This obviously fixes the GC failures described in PR jit/84288.  However, this 
also means that, even if the DECLs could otherwise be collected, they won't be 
anymore once they are passed to solaris_elf_asm_comdat_section.

This may be the expected behavior or may be deemed good enough.  However, you 
may want to implement a cache-like behavior instead, where the DECLs passed to 
solaris_elf_asm_comdat_section are not automatically kept, but instead only 
the slots of the hash table are kept if their associated DECL is.  See the 
documentation of ggc_cache_remove in hash-table.h for further details.

-- 
Eric Botcazou


Re: [PATCH 0/2] Introduce gcc_qsort

2018-05-10 Thread Jakub Jelinek
On Thu, May 10, 2018 at 06:56:39PM +0300, Alexander Monakov wrote:
> Overall the new implementation is roughly 30% faster compared to Glibc qsort,
> with 2x or more speedup for cases with tiny element count. I see one instance
> where the new approach is significantly (1.5x) slower: it is ipa-icf.c:
> sort_congruence_class_groups_by_decl_uid. It sorts a big array (1500 entries)
> and needs 14 indirect loads just to reach values to compare, so when branch
> prediction manages to guess correctly, it allows to overlap execution of two
> comparators and better hide their cache miss latency.
> 
> Overall GCC spends about 0.3% time under qsort, but this doesn't automatically
> mean that this brings a 0.1% speed improvement: it may be larger or smaller
> depending on how new code affects cache behavior and branch predictors in
> other code, and it's not trivial to measure precisely.
> 
> I can go into more detail about measured stats if there's interest :)
> 
> Makefile.in changes are separated to patch 2 in the hope it'd make review
> easier, but the two patches will need to be applied together.
> 
> Bootstrapped/regtested on x86-64, OK for trunk?

Have you gathered some statistics on the element sizes and how often they
appear in qsort calls (perhaps weighted by n*log(n) of the element count)
across bootstrap+regtest?

glibc uses indirect sorting (sorts pointers to the elements or indexes and
just reshuffles at the end) and has special case for the most commonly used
small element size (4/8).  With C++ templates you could achieve that even
without macros, just by instantiating the mergesort and its helpers for the
few common cases.  Or is that not worth it (as in, we never sort really
large (say > 32 bytes) element sizes and the 4 or 8 bytes long element sizes
aren't common enough to see benefit by using constant size memcpy for those
cases?

Jakub


Re: [PATCH 1/2] gcc_qsort: source code changes

2018-05-10 Thread David Malcolm
On Thu, 2018-05-10 at 18:56 +0300, Alexander Monakov wrote:
>   * sort.cc: New file.
> * system.h [!CHECKING_P] (qsort): Redirect to gcc_qsort.
> * vec.c (qsort_chk): Use gcc_qsort.

[...snip...]

I'm not a reviewer for this, but there's a lot of fiddly implementation
logic here, so maybe this code could use the selftest framework?

Maybe, in pseudo-code, something like this:

template 
static void
test_gcc_sort ()
{
   for (creation_strategy in {in-order, backwards}: // and anything else?
 for (int n = 0; n < some_limit; n++)
   {
  make_a_list_of_t (n, creation_strategy)
  gcc_sort (the_list);
  assert that the list is sorted;
  assert that the number of calls to the callback was sane
 }
}

void
test_gcc_sort_cc ()
{
   test_gcc_sort ();
   test_gcc_sort ();
   // etc; maybe some custom structs to exercise the deterministic property???
}

...or some such, to quickly get coverage of the various list sizes
(which the implementation seems to rely on heavily), in a non-release
build.



Hope this is constructive
Dave


[PING #2] [PATCH] avoid duplicate warning for strcmp with a nonstring (PR 85359)

2018-05-10 Thread Martin Sebor

Ping: https://gcc.gnu.org/ml/gcc-patches/2018-04/msg00650.html

On 04/19/2018 10:03 AM, Martin Sebor wrote:

Ping: https://gcc.gnu.org/ml/gcc-patches/2018-04/msg00650.html

This just suppresses a duplicate warning.  Please let me know
if it's preferable to defer it until GCC 9.  Otherwise, I'll
be traveling the next two weeks with only limited availability
(none the first week in May).

On 04/12/2018 02:52 PM, Martin Sebor wrote:

The attached patch makes a small tweak to avoid issuing a duplicate
warning for calls to strcmp with a nonstring argument.  The most
onerous part of this was figuring out how to test for the absence
of duplicate warnings.  The "hack" I used (dg-regexp) is in place
until a more straightforward solution becomes available.  (David
Malcolm has something planned for GCC 9.)

Martin






[PATCH 1/2] gcc_qsort: source code changes

2018-05-10 Thread Alexander Monakov
* sort.cc: New file.
* system.h [!CHECKING_P] (qsort): Redirect to gcc_qsort.
* vec.c (qsort_chk): Use gcc_qsort.

---
 gcc/sort.cc  | 232 +++
 gcc/system.h |   7 +-
 gcc/vec.c|   2 +-
 3 files changed, 238 insertions(+), 3 deletions(-)
 create mode 100644 gcc/sort.cc

diff --git a/gcc/sort.cc b/gcc/sort.cc
new file mode 100644
index 000..4faf6d45dc6
--- /dev/null
+++ b/gcc/sort.cc
@@ -0,0 +1,232 @@
+/* Platform-independent deterministic sort function.
+   Copyright (C) 2018 Free Software Foundation, Inc.
+   Contributed by Alexander Monakov.
+
+This file is part of GCC.
+
+GCC is free software; you can redistribute it and/or modify it
+under the terms of the GNU General Public License as published by the
+Free Software Foundation; either version 3, or (at your option) any
+later version.
+
+GCC is distributed in the hope that it will be useful, but WITHOUT
+ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
+FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
+for more details.
+
+You should have received a copy of the GNU General Public License
+along with GCC; see the file COPYING3.  If not see
+.  */
+
+/* This implements a sort function suitable for GCC use cases:
+   - signature-compatible to C qsort, but relaxed contract:
+ - may apply the comparator to elements in a temporary buffer
+ - may abort on allocation failure
+   - deterministic (but not necessarily stable)
+   - fast, especially for common cases (0-5 elements of size 8 or 4)
+
+   The implementation uses a network sort for up to 5 elements and
+   a merge sort on top of that.  Neither stage has branches depending on
+   comparator result, trading extra arithmetic for branch mispredictions.  */
+
+#ifdef GENERATOR_FILE
+#include "bconfig.h"
+#else
+#include "config.h"
+#endif
+
+#include "system.h"
+
+#define likely(cond) __builtin_expect ((cond), 1)
+
+#ifdef __GNUC__
+#define noinline __attribute__ ((__noinline__))
+#else
+#define noinline
+#endif
+
+/* C-style qsort comparator function type.  */
+typedef int cmp_fn (const void *, const void *);
+
+/* Structure holding read-mostly (read-only in netsort) context. */
+struct sort_ctx
+{
+  cmp_fn *cmp; // pointer to comparator
+  char   *out; // output buffer
+  size_t n;// number of elements
+  size_t size; // element size
+};
+
+/* Helper for netsort. Permute, possibly in-place, 2 or 3 elements,
+   placing E0 to C->OUT, E1 to C->OUT + C->SIZE, and so on. */
+static void
+reorder23 (sort_ctx *c, char *e0, char *e1, char *e2)
+{
+#define REORDER_23(SIZE, STRIDE, OFFSET)\
+do {\
+  size_t t0, t1;\
+  memcpy (, e0 + OFFSET, SIZE);  \
+  memcpy (, e1 + OFFSET, SIZE);  \
+  char *out = c->out + OFFSET;  \
+  if (likely (c->n == 3))   \
+memcpy (out + 2*STRIDE, e2 + OFFSET, SIZE); \
+  memcpy (out, , SIZE); out += STRIDE;   \
+  memcpy (out, , SIZE);  \
+} while (0)
+
+  if (sizeof (size_t) == 8 && likely (c->size == 8))
+REORDER_23 (8, 8, 0);
+  else if (likely (c->size == 4))
+REORDER_23 (4, 4, 0);
+  else
+{
+  size_t offset = 0, step = sizeof (size_t);
+  for (; offset + step <= c->size; offset += step)
+   REORDER_23 (step, c->size, offset);
+  for (; offset < c->size; offset++)
+   REORDER_23 (1, c->size, offset);
+}
+}
+
+/* Like reorder23, but permute 4 or 5 elements. */
+static void
+reorder45 (sort_ctx *c, char *e0, char *e1, char *e2, char *e3, char *e4)
+{
+#define REORDER_45(SIZE, STRIDE, OFFSET)\
+do {\
+  size_t t0, t1, t2, t3;\
+  memcpy (, e0 + OFFSET, SIZE);  \
+  memcpy (, e1 + OFFSET, SIZE);  \
+  memcpy (, e2 + OFFSET, SIZE);  \
+  memcpy (, e3 + OFFSET, SIZE);  \
+  char *out = c->out + OFFSET;  \
+  if (likely (c->n == 5))   \
+memcpy (out + 4*STRIDE, e4 + OFFSET, SIZE); \
+  memcpy (out, , SIZE); out += STRIDE;   \
+  memcpy (out, , SIZE); out += STRIDE;   \
+  memcpy (out, , SIZE); out += STRIDE;   \
+  memcpy (out, , SIZE);  \
+} while (0)
+
+  if (sizeof (size_t) == 8 && likely (c->size == 8))
+REORDER_45 (8, 8, 0);
+  else if (likely(c->size == 4))
+REORDER_45 (4, 4, 0);
+  else
+{
+  size_t offset = 0, step = sizeof (size_t);
+  for (; offset + step <= c->size; offset += step)
+   REORDER_45 (step, c->size, offset);
+  for (; offset < c->size; offset++)
+   REORDER_45 (1, c->size, offset);
+}
+}
+
+/* Helper for netsort. Invoke comparator CMP on E0 and E1.
+   Return E0^E1 if E0 compares less than E1, zero otherwise.
+   This is noinline to avoid code growth and 

[PATCH 2/2] gcc_qsort: build system changes

2018-05-10 Thread Alexander Monakov
* Makefile.in (OBJS-libcommon): Add sort.o.
(build/sort.o): New target.  Use it...
(BUILD_RTL): ... here, and...
(build/gencfn-macros): ... here, and...
(build/genmatch): ... here.

---
 gcc/Makefile.in | 9 +
 1 file changed, 5 insertions(+), 4 deletions(-)

diff --git a/gcc/Makefile.in b/gcc/Makefile.in
index 20bee0494b1..8ec0511704d 100644
--- a/gcc/Makefile.in
+++ b/gcc/Makefile.in
@@ -1076,7 +1076,7 @@ BUILD_LIBS = $(BUILD_LIBIBERTY)
 
 BUILD_RTL = build/rtl.o build/read-rtl.o build/ggc-none.o \
build/vec.o build/min-insn-modes.o build/gensupport.o \
-   build/print-rtl.o build/hash-table.o
+   build/print-rtl.o build/hash-table.o build/sort.o
 BUILD_MD = build/read-md.o
 BUILD_ERRORS = build/errors.o
 
@@ -1611,7 +1611,7 @@ OBJS-libcommon = diagnostic.o diagnostic-color.o 
diagnostic-show-locus.o \
pretty-print.o intl.o \
sbitmap.o \
vec.o input.o version.o hash-table.o ggc-none.o memory-block.o \
-   selftest.o selftest-diagnostic.o
+   selftest.o selftest-diagnostic.o sort.o
 
 # Objects in libcommon-target.a, used by drivers and by the core
 # compiler and containing target-dependent code.
@@ -2681,6 +2681,7 @@ build/vec.o : vec.c $(BCONFIG_H) $(SYSTEM_H) 
$(CORETYPES_H) $(VEC_H)  \
   $(GGC_H) toplev.h $(DIAGNOSTIC_CORE_H)
 build/hash-table.o : hash-table.c $(BCONFIG_H) $(SYSTEM_H) \
   $(CORETYPES_H) $(HASH_TABLE_H) $(GGC_H) toplev.h $(DIAGNOSTIC_CORE_H)
+build/sort.o : sort.cc $(BCONFIG_H) $(SYSTEM_H)
 build/inchash.o : inchash.c $(BCONFIG_H) $(SYSTEM_H) $(CORETYPES_H)\
   $(HASHTAB_H) inchash.h
 build/gencondmd.o : build/gencondmd.c $(BCONFIG_H) $(SYSTEM_H) \
@@ -2817,7 +2818,7 @@ build/genautomata$(build_exeext) : BUILD_LIBS += -lm
 
 build/genrecog$(build_exeext) : build/hash-table.o build/inchash.o
 build/gencfn-macros$(build_exeext) : build/hash-table.o build/vec.o \
-  build/ggc-none.o
+  build/ggc-none.o build/sort.o
 
 # For stage1 and when cross-compiling use the build libcpp which is
 # built with NLS disabled.  For stage2+ use the host library and
@@ -2831,7 +2832,7 @@ build/genmatch$(build_exeext): BUILD_LIBS += $(LIBINTL) 
$(LIBICONV)
 endif
 
 build/genmatch$(build_exeext) : $(BUILD_CPPLIB) \
-  $(BUILD_ERRORS) build/vec.o build/hash-table.o
+  $(BUILD_ERRORS) build/vec.o build/hash-table.o build/sort.o
 
 # These programs are not linked with the MD reader.
 build/gengtype$(build_exeext) : build/gengtype-lex.o build/gengtype-parse.o \
-- 
2.13.3



[PATCH 0/2] Introduce gcc_qsort

2018-05-10 Thread Alexander Monakov
Hello.

This introduces a replacement for qsort() in GCC. The main selling point is
reproducibility (currently compiler output may change depending on how libc
qsort reorders not-bitwise-identical elements that compare equal) with a 
small improvement speed-wise and small code growth (under 2K on x86-64).

The opening comment in sort.cc gives a brief implementation overview:

/* This implements a sort function suitable for GCC use cases:
   - signature-compatible to C qsort, but relaxed contract:
 - may apply the comparator to elements in a temporary buffer
 - may abort on allocation failure
   - deterministic (but not necessarily stable)
   - fast, especially for common cases (0-5 elements of size 8 or 4)

   The implementation uses a network sort for up to 5 elements and
   a merge sort on top of that.  Neither stage has branches depending on
   comparator result, trading extra arithmetic for branch mispredictions.  */

I used a Sandy Bridge CPU to collect statistics on tramp3d -O2 compilation.

Overall the new implementation is roughly 30% faster compared to Glibc qsort,
with 2x or more speedup for cases with tiny element count. I see one instance
where the new approach is significantly (1.5x) slower: it is ipa-icf.c:
sort_congruence_class_groups_by_decl_uid. It sorts a big array (1500 entries)
and needs 14 indirect loads just to reach values to compare, so when branch
prediction manages to guess correctly, it allows to overlap execution of two
comparators and better hide their cache miss latency.

Overall GCC spends about 0.3% time under qsort, but this doesn't automatically
mean that this brings a 0.1% speed improvement: it may be larger or smaller
depending on how new code affects cache behavior and branch predictors in
other code, and it's not trivial to measure precisely.

I can go into more detail about measured stats if there's interest :)

Makefile.in changes are separated to patch 2 in the hope it'd make review
easier, but the two patches will need to be applied together.

Bootstrapped/regtested on x86-64, OK for trunk?

Alexander Monakov (2):
  gcc_qsort: source code changes
  gcc_qsort: build system changes

 gcc/Makefile.in |   9 ++-
 gcc/sort.cc | 232 
 gcc/system.h|   7 +-
 gcc/vec.c   |   2 +-
 4 files changed, 243 insertions(+), 7 deletions(-)
 create mode 100644 gcc/sort.cc

-- 
2.13.3



Re: [PATCH] PR fortran/85521 -- Zero length substrings in array aconstructors

2018-05-10 Thread Steve Kargl
It is certainly possible to give a warning, but it
would be odd (to me) to warn about technically
standard conforming code.  gfortran doesn't warn
for zero-sized array references or zero-length
substrings in other context.

program foo
   real a(4)
   character(len=10) s
   s = '12345'
   a = 1
   print *, size(a(2:1)), len(s(3:2))
end program foo
% gfc -o z a.f90
% ./z
   0   0

-- 
steve

On Thu, May 10, 2018 at 05:18:25PM +0200, Andre Vehreschild wrote:
> Hi Steve,
> 
> the patch looks OK to me. Is it possible to give a warning
> there, at least with some higher warning-level? Or is there
> already one? I haven't tested it.
> 
> Regards,
>   Andre
> 
> On Thu, 10 May 2018 07:15:21 -0700
> Steve Kargl  wrote:
> 
> > On Thu, May 10, 2018 at 11:48:24AM +0200, Dominique d'Humières wrote:
> > > Hi Steve,
> > > 
> > > AFAICT the patch is missing.
> > > 
> > > Thanks for working on these PRs.
> > >   
> > 
> > Whoops. Looks like attached the log instead of diff.
> > 
> > Index: gcc/fortran/array.c
> > ===
> > --- gcc/fortran/array.c (revision 259945)
> > +++ gcc/fortran/array.c (working copy)
> > @@ -2046,7 +2046,8 @@ got_charlen:
> >   else
> > return true;
> >  
> > - gcc_assert (current_length != -1);
> > + if (current_length < 0)
> > +   current_length = 0;
> >  
> >   if (found_length == -1)
> > found_length = current_length;
> > Index: gcc/testsuite/gfortran.dg/pr85521_1.f90
> > ===
> > --- gcc/testsuite/gfortran.dg/pr85521_1.f90 (nonexistent)
> > +++ gcc/testsuite/gfortran.dg/pr85521_1.f90 (working copy)
> > @@ -0,0 +1,8 @@
> > +! { dg-do compile }
> > +! PR fortran/85521
> > +program p
> > +   character(3) :: c = 'abc'
> > +   character(3) :: z(1)
> > +   z = [ c(:-1) ]
> > +   print *, z
> > +end
> > Index: gcc/testsuite/gfortran.dg/pr85521_2.f90
> > ===
> > --- gcc/testsuite/gfortran.dg/pr85521_2.f90 (nonexistent)
> > +++ gcc/testsuite/gfortran.dg/pr85521_2.f90 (working copy)
> > @@ -0,0 +1,8 @@
> > +! { dg-do compile }
> > +! PR fortran/85521
> > +program p
> > +   character(3) :: c = 'abc'
> > +   character(3) :: z(1)
> > +   z = [ c(:-2) ]
> > +   print *, z
> > +end
> > 
> 
> 
> -- 
> Andre Vehreschild * Email: vehre ad gmx dot de 

-- 
Steve
20170425 https://www.youtube.com/watch?v=VWUpyCsUKR4
20161221 https://www.youtube.com/watch?v=IbCHE-hONow


Re: Document PR 84073 change in /gcc-8/porting_to.html

2018-05-10 Thread Andre Vehreschild
Hi Thomas,

looks ok to me.

- Andre

On Thu, 10 May 2018 17:19:39 +0200
Thomas König  wrote:

> Am 10.05.2018 um 14:20 schrieb Thomas Koenig:
> > Am 10.05.2018 um 12:33 schrieb Jonathan Wakely:
> >> Should the fix for PR 84073 be documented, so that users whose code is
> >> now rejected understand why, and how to fix it?
> >>
> >> https://gcc.gnu.org/bugzilla/show_bug.cgi?id=84073
> >> https://gcc.gnu.org/gcc-8/porting_to.html#fortran
> > 
> > Sounds like a good idea.
> > 
> > Since I introduced this, I'll do it within a few days (unless, of
> > course, somebody beats me to it, which I won't mind :-)
> 
> OK, not a few days, but a few hours :-)
> 
> Here is the diff.  OK to commit to gcc-docs?
> 
> Regards
> 
>   Thomas
> 
> 


-- 
Andre Vehreschild * Email: vehre ad gmx dot de 


Re: [PATCH] PR fortran/70870 -- Reject data object with default initialization

2018-05-10 Thread Andre Vehreschild
Hi Steve,

ok for trunk.

- Andre

On Wed, 9 May 2018 15:32:10 -0700
Steve Kargl  wrote:

> I plan to commit the attach patch on Saturday unless someone objects.
> 
> 2018-05-09  Steven G. Kargl  
> 
>   PR fortran/70870
>   * data.c (gfc_assign_data_value): Check that a data object does
>   not also have default initialization.
> 
> 2018-05-09  Steven G. Kargl  
> 
>   PR fortran/70870
>   * gfortran.dg/pr70870_1.f90: New test.
> 


-- 
Andre Vehreschild * Email: vehre ad gmx dot de 


Re: [C++ PATCH] Fix offsetof constexpr handling (PR c++/85662, take 4)

2018-05-10 Thread Jason Merrill
OK, thanks.

On Wed, May 9, 2018 at 4:49 PM, Jakub Jelinek  wrote:
> On Wed, May 09, 2018 at 11:01:18AM -0400, Jason Merrill wrote:
>> On Wed, May 9, 2018 at 10:47 AM, Jakub Jelinek  wrote:
>> > On Wed, May 09, 2018 at 10:40:26AM -0400, Jason Merrill wrote:
>> >> On Wed, May 9, 2018 at 4:55 AM, Jakub Jelinek  wrote:
>> >> > On Tue, May 08, 2018 at 11:28:18PM -0400, Jason Merrill wrote:
>> >> >> Maybe add a type parameter that defaults to size_type_node...
>> >> >>
>> >> >> > +   ret = fold_convert_loc (loc, TREE_TYPE (expr),
>> >> >> > +   fold_offsetof_1 (TREE_TYPE (expr), 
>> >> >> > op0));
>> >> >>
>> >> >> ...and then this can be
>> >> >>
>> >> >>   fold_offsetof (op0, TREE_TYPE (exp0))
>> >> >
>> >> > Like this then?
>> >> >
>> >> > +   ret = fold_convert_loc (loc, TREE_TYPE (expr),
>> >> > +   fold_offsetof (op0, TREE_TYPE (expr)));
>> >>
>> >> I was thinking that we then wouldn't need the fold_convert at the call
>> >> sites anymore, either.
>> >
>> > The patch only converts to non-pointer types, I'm not sure if it is
>> > desirable to do the same with pointer types (and most of the other callers
>> > don't use convert, but fold_convert which is significantly different, the
>> > former is emitting diagnostics, the latter is just an conversion + 
>> > optimization).
>>
>> Is there a reason we can't use fold_convert for the non-pointer case,
>> too?  I don't think we're interested in diagnostics from this
>> particular call.
>
> This patch instead uses convert everywhere.  Bootstrapped/regtested on
> x86_64-linux and i686-linux, ok for trunk?
>
> 2018-05-09  Jakub Jelinek  
>
> PR c++/85662
> * c-common.h (fold_offsetof_1): Removed.
> (fold_offsetof): Add TYPE argument defaulted to size_type_node and
> CTX argument defaulted to ERROR_MARK.
> * c-common.c (fold_offsetof_1): Renamed to ...
> (fold_offsetof): ... this.  Remove wrapper function.  Add TYPE
> argument, convert the pointer constant to TYPE and use size_binop
> with PLUS_EXPR instead of fold_build_pointer_plus if type is not
> a pointer type.  Adjust recursive calls.
>
> * c-fold.c (c_fully_fold_internal): Use fold_offsetof rather than
> fold_offsetof_1, pass TREE_TYPE (expr) as TYPE to it and drop the
> fold_convert_loc.
> * c-typeck.c (build_unary_op): Use fold_offsetof rather than
> fold_offsetof_1, pass argtype as TYPE to it and drop the
> fold_convert_loc.
>
> * cp-gimplify.c (cp_fold): Use fold_offsetof rather than
> fold_offsetof_1, pass TREE_TYPE (x) as TYPE to it and drop the
> fold_convert.
>
> * g++.dg/ext/offsetof2.C: New test.
>
> --- gcc/c-family/c-common.h.jj  2018-05-09 20:12:25.845258371 +0200
> +++ gcc/c-family/c-common.h 2018-05-09 20:20:02.265649121 +0200
> @@ -1033,8 +1033,8 @@ extern bool c_dump_tree (void *, tree);
>
>  extern void verify_sequence_points (tree);
>
> -extern tree fold_offsetof_1 (tree, tree_code ctx = ERROR_MARK);
> -extern tree fold_offsetof (tree);
> +extern tree fold_offsetof (tree, tree = size_type_node,
> +  tree_code ctx = ERROR_MARK);
>
>  extern int complete_array_type (tree *, tree, bool);
>
> --- gcc/c-family/c-common.c.jj  2018-05-09 20:12:25.763258297 +0200
> +++ gcc/c-family/c-common.c 2018-05-09 20:21:23.770718896 +0200
> @@ -6168,10 +6168,11 @@ c_common_to_target_charset (HOST_WIDE_IN
>
>  /* Fold an offsetof-like expression.  EXPR is a nested sequence of component
> references with an INDIRECT_REF of a constant at the bottom; much like the
> -   traditional rendering of offsetof as a macro.  Return the folded result.  
> */
> +   traditional rendering of offsetof as a macro.  TYPE is the desired type of
> +   the whole expression.  Return the folded result.  */
>
>  tree
> -fold_offsetof_1 (tree expr, enum tree_code ctx)
> +fold_offsetof (tree expr, tree type, enum tree_code ctx)
>  {
>tree base, off, t;
>tree_code code = TREE_CODE (expr);
> @@ -6196,10 +6197,10 @@ fold_offsetof_1 (tree expr, enum tree_co
>   error ("cannot apply % to a non constant address");
>   return error_mark_node;
> }
> -  return TREE_OPERAND (expr, 0);
> +  return convert (type, TREE_OPERAND (expr, 0));
>
>  case COMPONENT_REF:
> -  base = fold_offsetof_1 (TREE_OPERAND (expr, 0), code);
> +  base = fold_offsetof (TREE_OPERAND (expr, 0), type, code);
>if (base == error_mark_node)
> return base;
>
> @@ -6216,7 +6217,7 @@ fold_offsetof_1 (tree expr, enum tree_co
>break;
>
>  case ARRAY_REF:
> -  base = fold_offsetof_1 (TREE_OPERAND (expr, 0), code);
> +  base = fold_offsetof (TREE_OPERAND (expr, 0), type, code);
>if (base == error_mark_node)
> return base;
>
> @@ -6273,23 +6274,16 @@ 

Re: Document PR 84073 change in /gcc-8/porting_to.html

2018-05-10 Thread Thomas König

Am 10.05.2018 um 14:20 schrieb Thomas Koenig:

Am 10.05.2018 um 12:33 schrieb Jonathan Wakely:

Should the fix for PR 84073 be documented, so that users whose code is
now rejected understand why, and how to fix it?

https://gcc.gnu.org/bugzilla/show_bug.cgi?id=84073
https://gcc.gnu.org/gcc-8/porting_to.html#fortran


Sounds like a good idea.

Since I introduced this, I'll do it within a few days (unless, of
course, somebody beats me to it, which I won't mind :-)


OK, not a few days, but a few hours :-)

Here is the diff.  OK to commit to gcc-docs?

Regards

Thomas


Index: porting_to.html
===
RCS file: /cvs/gcc/wwwdocs/htdocs/gcc-8/porting_to.html,v
retrieving revision 1.6
diff -u -r1.6 porting_to.html
--- porting_to.html	4 May 2018 06:35:39 -	1.6
+++ porting_to.html	10 May 2018 15:16:57 -
@@ -196,7 +196,31 @@
   void foo_ (char*, int*, fortran_charlen_t);
   
 
+
+  Versions of gfortran prior to 8.1 wrongly accepted CHARACTER
+  variables with a length type parameter other than one as C
+  interoperable. For example, the code
+  
+  module mod
+use iso_c_binding
+type, bind(C) :: a
+  character(len=2,kind=c_char) :: b ! Wrong
+end type a
+character(len=2), bind(C) :: c ! Also wrong
+  end module mod
+  
+  was accepted.  To achieve similar functionality, an array of
+  LEN=1 characters can be used, for example
+  
+  module mod
+use iso_c_binding
+type, bind(C) :: a
+  character(kind=c_char) :: b(2)
+end type a
+character(kind=c_char), bind(C) :: c(2)
+  end module mod
+  
+
 Links
-
 
 


Re: [PATCH] PR fortran/85521 -- Zero length substrings in array aconstructors

2018-05-10 Thread Andre Vehreschild
Hi Steve,

the patch looks OK to me. Is it possible to give a warning there, at least with
some higher warning-level? Or is there already one? I haven't tested it.

Regards,
Andre

On Thu, 10 May 2018 07:15:21 -0700
Steve Kargl  wrote:

> On Thu, May 10, 2018 at 11:48:24AM +0200, Dominique d'Humières wrote:
> > Hi Steve,
> > 
> > AFAICT the patch is missing.
> > 
> > Thanks for working on these PRs.
> >   
> 
> Whoops. Looks like attached the log instead of diff.
> 
> Index: gcc/fortran/array.c
> ===
> --- gcc/fortran/array.c   (revision 259945)
> +++ gcc/fortran/array.c   (working copy)
> @@ -2046,7 +2046,8 @@ got_charlen:
> else
>   return true;
>  
> -   gcc_assert (current_length != -1);
> +   if (current_length < 0)
> + current_length = 0;
>  
> if (found_length == -1)
>   found_length = current_length;
> Index: gcc/testsuite/gfortran.dg/pr85521_1.f90
> ===
> --- gcc/testsuite/gfortran.dg/pr85521_1.f90   (nonexistent)
> +++ gcc/testsuite/gfortran.dg/pr85521_1.f90   (working copy)
> @@ -0,0 +1,8 @@
> +! { dg-do compile }
> +! PR fortran/85521
> +program p
> +   character(3) :: c = 'abc'
> +   character(3) :: z(1)
> +   z = [ c(:-1) ]
> +   print *, z
> +end
> Index: gcc/testsuite/gfortran.dg/pr85521_2.f90
> ===
> --- gcc/testsuite/gfortran.dg/pr85521_2.f90   (nonexistent)
> +++ gcc/testsuite/gfortran.dg/pr85521_2.f90   (working copy)
> @@ -0,0 +1,8 @@
> +! { dg-do compile }
> +! PR fortran/85521
> +program p
> +   character(3) :: c = 'abc'
> +   character(3) :: z(1)
> +   z = [ c(:-2) ]
> +   print *, z
> +end
> 


-- 
Andre Vehreschild * Email: vehre ad gmx dot de 


[patch, fortran, doc, committed] Document BACK for MINLOC and MAXLOC

2018-05-10 Thread Thomas König

Hello world,

I just commmitted the attached patch as obvious after checking that
it passes "make info", "make dvi" and "make pdf".

Regards

Thomas

2018-05-10  Thomas Koenig  

PR fortran/54613
* intrinsic.texi: Document BACK for MINLOC and MAXLOC.
Index: intrinsic.texi
===
--- intrinsic.texi	(Revision 260022)
+++ intrinsic.texi	(Arbeitskopie)
@@ -9991,8 +9991,10 @@ locations of the maximum element along each row of
 @var{DIM} direction.  If @var{MASK} is present, only the elements for
 which @var{MASK} is @code{.TRUE.} are considered.  If more than one
 element in the array has the maximum value, the location returned is
-that of the first such element in array element order.  If the array has
-zero size, or all of the elements of @var{MASK} are @code{.FALSE.}, then
+that of the first such element in array element order if the
+@var{BACK} is not present, or if it false; otherwise, the location
+returned is that of the first such element. If the array has zero
+size, or all of the elements of @var{MASK} are @code{.FALSE.}, then
 the result is an array of zeroes.  Similarly, if @var{DIM} is supplied
 and all of the elements of @var{MASK} along a given row are zero, the
 result value for that row is zero.
@@ -1,6 +10002,7 @@ result value for that row is zero.
 @item @emph{Standard}:
 Fortran 95 and later; @var{ARRAY} of @code{CHARACTER} and the
 @var{KIND} argument are available in Fortran 2003 and later.
+The @var{BACK} argument is available in Fortran 2008 and later.
 
 @item @emph{Class}:
 Transformational function
@@ -10006,8 +10009,8 @@ Transformational function
 
 @item @emph{Syntax}:
 @multitable @columnfractions .80
-@item @code{RESULT = MAXLOC(ARRAY, DIM [, MASK] [,KIND])}
-@item @code{RESULT = MAXLOC(ARRAY [, MASK] [,KIND])}
+@item @code{RESULT = MAXLOC(ARRAY, DIM [, MASK] [,KIND] [,BACK])}
+@item @code{RESULT = MAXLOC(ARRAY [, MASK] [,KIND] [,BACK])}
 @end multitable
 
 @item @emph{Arguments}:
@@ -10021,6 +10024,7 @@ inclusive.  It may not be an optional dummy argume
 and conformable with @var{ARRAY}.
 @item @var{KIND} @tab (Optional) An @code{INTEGER} initialization
 expression indicating the kind parameter of the result.
+@item @var{BACK} @tab (Optional) A scalar of type @code{LOGICAL}.
 @end multitable
 
 @item @emph{Return value}:
@@ -10343,7 +10347,9 @@ locations of the minimum element along each row of
 @var{DIM} direction.  If @var{MASK} is present, only the elements for
 which @var{MASK} is @code{.TRUE.} are considered.  If more than one
 element in the array has the minimum value, the location returned is
-that of the first such element in array element order.  If the array has
+that of the first such element in array element order if the
+@var{BACK} is not present, or if it false; otherwise, the location
+returned is that of the first such element.  If the array has
 zero size, or all of the elements of @var{MASK} are @code{.FALSE.}, then
 the result is an array of zeroes.  Similarly, if @var{DIM} is supplied
 and all of the elements of @var{MASK} along a given row are zero, the
@@ -10352,6 +10358,7 @@ result value for that row is zero.
 @item @emph{Standard}:
 Fortran 95 and later; @var{ARRAY} of @code{CHARACTER} and the
 @var{KIND} argument are available in Fortran 2003 and later.
+The @var{BACK} argument is available in Fortran 2008 and later.
 
 @item @emph{Class}:
 Transformational function
@@ -10358,8 +10365,8 @@ Transformational function
 
 @item @emph{Syntax}:
 @multitable @columnfractions .80
-@item @code{RESULT = MINLOC(ARRAY, DIM [, MASK] [,KIND])}
-@item @code{RESULT = MINLOC(ARRAY [, MASK], [,KIND])}
+@item @code{RESULT = MINLOC(ARRAY, DIM [, MASK] [,KIND] [,BACK])}
+@item @code{RESULT = MINLOC(ARRAY [, MASK], [,KIND] [,BACK])}
 @end multitable
 
 @item @emph{Arguments}:
@@ -10373,6 +10380,7 @@ inclusive.  It may not be an optional dummy argume
 and conformable with @var{ARRAY}.
 @item @var{KIND} @tab (Optional) An @code{INTEGER} initialization
 expression indicating the kind parameter of the result.
+@item @var{BACK} @tab (Optional) A scalar of type @code{LOGICAL}.
 @end multitable
 
 @item @emph{Return value}:


Quo Vadis tr1? Was: [libstdc++, PATCH] PR libstdc++/83140 - assoc_legendre returns negated value when m is odd.

2018-05-10 Thread Ed Smith-Rowland

All,


We could consider dropping the TR1 support, and just provide these
functions for ISO/IEC 29124:2010 in C++11 (or later) and for C++17.
But that decision should be taken separately, and should only happen
on trunk anyway so we need to use _Tp(+1) here.


I am in favour of splitting new versions of the special functions out of 
tr1 and into std/bits.

I personally am itching to use at least C++11 for implementation.
We have been defaulting to C++11 for, IIRC, two releases (Hence my 
-Tp{+1} slip LOL).

I have a lot of work towards this that I wanted to get into 9 anyway.

This would end the last useful thing in tr1 that's not better 
implemented elsewhere. There are certainly people using tr1. Can we 
deprecate the whole namespace?  That might be too noisy.  I think we 
should be done with maths bugs in Bugzilla pretty soon.  We should do 
whatever we decide relatively early in 9.


Ed

Also, I think 83566 should go into tr1 first because it's not a 
signature change.




Re: [PATCH] PR fortran/85521 -- Zero length substrings in array aconstructors

2018-05-10 Thread Steve Kargl
On Thu, May 10, 2018 at 11:48:24AM +0200, Dominique d'Humières wrote:
> Hi Steve,
> 
> AFAICT the patch is missing.
> 
> Thanks for working on these PRs.
> 

Whoops. Looks like attached the log instead of diff.

Index: gcc/fortran/array.c
===
--- gcc/fortran/array.c (revision 259945)
+++ gcc/fortran/array.c (working copy)
@@ -2046,7 +2046,8 @@ got_charlen:
  else
return true;
 
- gcc_assert (current_length != -1);
+ if (current_length < 0)
+   current_length = 0;
 
  if (found_length == -1)
found_length = current_length;
Index: gcc/testsuite/gfortran.dg/pr85521_1.f90
===
--- gcc/testsuite/gfortran.dg/pr85521_1.f90 (nonexistent)
+++ gcc/testsuite/gfortran.dg/pr85521_1.f90 (working copy)
@@ -0,0 +1,8 @@
+! { dg-do compile }
+! PR fortran/85521
+program p
+   character(3) :: c = 'abc'
+   character(3) :: z(1)
+   z = [ c(:-1) ]
+   print *, z
+end
Index: gcc/testsuite/gfortran.dg/pr85521_2.f90
===
--- gcc/testsuite/gfortran.dg/pr85521_2.f90 (nonexistent)
+++ gcc/testsuite/gfortran.dg/pr85521_2.f90 (working copy)
@@ -0,0 +1,8 @@
+! { dg-do compile }
+! PR fortran/85521
+program p
+   character(3) :: c = 'abc'
+   character(3) :: z(1)
+   z = [ c(:-2) ]
+   print *, z
+end

-- 
Steve


Re: [libstdc++, PATCH] PR libstdc++/83140 - assoc_legendre returns negated value when m is odd.

2018-05-10 Thread Ed Smith-Rowland

On 05/09/2018 05:30 AM, Jonathan Wakely wrote:

On 07/05/18 12:39 -0400, Ed Smith-Rowland wrote:

All,

We were using a different convention for P_l^m assoc_legendre(int l, 
int m, FloatTp x)


 - the so-called Condon-Shortley convention which includes (-1)^m.  
This unfortunately is common.


This factor is taken out to match the standard.  The underlying 
__detail code has an arg that allows you to flip this


- mostly to highlight the subtle difference.

The related sph_legendre is unaffected by this (our impl and the 
standard include the C-S phase).


OK for trunk and branches?

Ed






2018-05-07  Edward Smith-Rowland  <3dw...@verizon.net>

PR libstdc++/83140 - assoc_legendre returns negated value when m 
is odd
* include/tr1/legendre_function.tcc (__assoc_legendre_p): Add 
__phase

argument defaulted to +1.  Doxy comments on same.
* testsuite/special_functions/02_assoc_legendre/
check_assoc_legendre.cc: Regen.
* testsuite/tr1/5_numerical_facilities/special_functions/
02_assoc_legendre/check_tr1_assoc_legendre.cc: Regen.




Index: include/tr1/legendre_function.tcc
===
--- include/tr1/legendre_function.tcc    (revision 259973)
+++ include/tr1/legendre_function.tcc    (working copy)
@@ -65,7 +65,7 @@
  namespace __detail
  {
    /**
- *   @brief  Return the Legendre polynomial by recursion on order
+ *   @brief  Return the Legendre polynomial by recursion on degree
 *   @f$ l @f$.
 *
 *   The Legendre function of @f$ l @f$ and @f$ x @f$,
@@ -74,7 +74,7 @@
 * P_l(x) = \frac{1}{2^l l!}\frac{d^l}{dx^l}(x^2 - 1)^{l}
 *   @f]
 *
- *   @param  l  The order of the Legendre polynomial.  @f$l >= 
0@f$.
+ *   @param  l  The degree of the Legendre polynomial. @f$l >= 
0@f$.
 *   @param  x  The argument of the Legendre polynomial. @f$|x| 
<= 1@f$.

 */
    template
@@ -127,16 +127,19 @@
 * P_l^m(x) = (1 - x^2)^{m/2}\frac{d^m}{dx^m}P_l(x)
 *   @f]
 *
- *   @param  l  The order of the associated Legendre function.
+ *   @param  l  The degree of the associated Legendre function.
 *  @f$ l >= 0 @f$.
 *   @param  m  The order of the associated Legendre function.
 *  @f$ m <= l @f$.
 *   @param  x  The argument of the associated Legendre function.
 *  @f$ |x| <= 1 @f$.
+ *   @param  phase  The phase of the associated Legendre function.
+ *  Use -1 for the Condon-Shortley phase 
convention.

 */
    template
    _Tp
-    __assoc_legendre_p(unsigned int __l, unsigned int __m, _Tp __x)
+    __assoc_legendre_p(unsigned int __l, unsigned int __m, _Tp __x,
+   _Tp __phase = _Tp{+1})


This list-init isn't valid for C++98 i.e. when used via .
GCC seems to allow it, but Clang won't.

We could consider dropping the TR1 support, and just provide these
functions for ISO/IEC 29124:2010 in C++11 (or later) and for C++17.
But that decision should be taken separately, and should only happen
on trunk anyway so we need to use _Tp(+1) here.

OK for trunk with _Tp(+1) instead of _Tp{+1}.

Do we want to change the result of these functions on the branches?
How likely is it that changing it will affect somebody's calcuations
in a way that they don't expect from a minor release on a branch?




Here are the files applied for 260115.

As to backporting...  I did a Google and found rather more activity 
around these functions - especially legendre - than I remembered last 
time I searched.  I thought these functions were languishing, but 
apparently not.  Still low pings on ellint_3.


I *would* like to change branch 8 because it's just out.

I think I should curb my enthusiasm for branches 7 and 6.

Ed.



2018-05-10  Edward Smith-Rowland  <3dw...@verizon.net>

PR libstdc++/83140 - assoc_legendre returns negated value when m is odd
* include/tr1/legendre_function.tcc (__assoc_legendre_p): Add __phase
argument defaulted to +1.  Doxy comments on same.
* testsuite/special_functions/02_assoc_legendre/
check_assoc_legendre.cc: Regen.
* testsuite/tr1/5_numerical_facilities/special_functions/
02_assoc_legendre/check_tr1_assoc_legendre.cc: Regen.

Index: include/tr1/legendre_function.tcc
===
--- include/tr1/legendre_function.tcc   (revision 260114)
+++ include/tr1/legendre_function.tcc   (working copy)
@@ -65,7 +65,7 @@
   namespace __detail
   {
 /**
- *   @brief  Return the Legendre polynomial by recursion on order
+ *   @brief  Return the Legendre polynomial by recursion on degree
  *   @f$ l @f$.
  * 
  *   The Legendre function of @f$ l @f$ and @f$ x @f$,
@@ -74,7 +74,7 @@
  * P_l(x) = \frac{1}{2^l l!}\frac{d^l}{dx^l}(x^2 - 1)^{l}
  *   @f]
  * 
- *   @param  l  The order of the Legendre polynomial.  @f$l 

[PR 85655] Check is_single_const in intersect_with_plats

2018-05-10 Thread Martin Jambor
Hi,

bug 85655 happens because intersect_with_plats encounters a BOTTOM
lattice and does not check for it.  On closer inspection, the real check
that should be performed is is_single_const, so this patch adds it.

It has passed bootstrap and testing, LTO bootstrap is underway.  OK for
trunk if it passes?  OK for gcc-7 and gcc-6 branches after testing thee
too?

Thanks,

Martin


2018-05-10  Martin Jambor  

PR ipa/85655
* ipa-cp.c (intersect_with_plats): Check that the lattice contains
single const.

testsuite/
* g++.dg/lto/pr85655_0.C: New test.
---
 gcc/ipa-cp.c |  4 +++-
 gcc/testsuite/g++.dg/lto/pr85655_0.C | 42 
 2 files changed, 45 insertions(+), 1 deletion(-)
 create mode 100644 gcc/testsuite/g++.dg/lto/pr85655_0.C

diff --git a/gcc/ipa-cp.c b/gcc/ipa-cp.c
index 4f28a55b862..e868b9c2623 100644
--- a/gcc/ipa-cp.c
+++ b/gcc/ipa-cp.c
@@ -4127,7 +4127,9 @@ intersect_with_plats (struct ipcp_param_lattices *plats,
  if (aglat->offset - offset == item->offset)
{
  gcc_checking_assert (item->value);
- if (values_equal_for_ipcp_p (item->value, aglat->values->value))
+ if (aglat->is_single_const ()
+ && values_equal_for_ipcp_p (item->value,
+ aglat->values->value))
found = true;
  break;
}
diff --git a/gcc/testsuite/g++.dg/lto/pr85655_0.C 
b/gcc/testsuite/g++.dg/lto/pr85655_0.C
new file mode 100644
index 000..d3bbd8f4036
--- /dev/null
+++ b/gcc/testsuite/g++.dg/lto/pr85655_0.C
@@ -0,0 +1,42 @@
+// { dg-lto-do link }
+// { dg-require-effective-target shared }
+// { dg-require-effective-target fpic }
+// { dg-lto-options {{-O2 -fPIC -shared -flto}} }
+
+int a;
+void b(...);
+void c(int);
+enum { d, e, f, g, h, i, j, k };
+class l {
+public:
+  int ac;
+  bool m;
+  l(char *);
+  int n();
+};
+struct o {
+  int ad;
+  o(int p = 0) : ad(p) {}
+};
+class C : public l {
+public:
+  char q;
+  C(o) : l() { m |= ac & a ?: 9; }
+};
+class r : C {
+public:
+  char s;
+  r(o p, char) : C(p) {
+if (n()) {
+  b(a, s, "");
+  c(5);
+}
+  }
+};
+class t : C {
+public:
+  t(int) : C(d) {}
+};
+r ag('b', 0), ah(o(), 0), ai(e, 0), aj(f, 0), ak(g, 0), al(h, 0), am(k, 0),
+an(i, 0), ao(j, 0);
+t ap(0);
-- 
2.16.3



RE: [PATCH][i386] Adding WAITPKG instructions

2018-05-10 Thread Peryt, Sebastian
> -Original Message-
> From: Uros Bizjak [mailto:ubiz...@gmail.com]
> Sent: Thursday, May 10, 2018 3:26 PM
> To: Peryt, Sebastian 
> Cc: gcc-patches@gcc.gnu.org; Kirill Yukhin 
> Subject: Re: [PATCH][i386] Adding WAITPKG instructions
> 
> On Thu, May 10, 2018 at 2:50 PM, Peryt, Sebastian 
> wrote:
> > Hi Uros,
> >
> > Updated patch attached, please find comments below.
> >
> >> -Original Message-
> >> From: Uros Bizjak [mailto:ubiz...@gmail.com]
> >> Sent: Wednesday, May 9, 2018 1:47 PM
> >> To: Peryt, Sebastian 
> >> Cc: gcc-patches@gcc.gnu.org; Kirill Yukhin 
> >> Subject: Re: [PATCH][i386] Adding WAITPKG instructions
> >>
> >> On Tue, May 8, 2018 at 1:34 PM, Peryt, Sebastian
> >> 
> >> wrote:
> >> > Hi,
> >> >
> >> > This patch adds support for WAITPKG instructions.
> >> >
> >> > Is it ok for trunk and after few day for backport to GCC-8?
> >> >
> > (Removed)
> >> >
> >> >
> >>
> >> +case IX86_BUILTIN_UMONITOR:
> >> +  arg0 = CALL_EXPR_ARG (exp, 0);
> >> +  op0 = expand_normal (arg0);
> >> +  if (!REG_P (op0))
> >> +op0 = ix86_zero_extend_to_Pmode (op0);
> >> +
> >> +  emit_insn (ix86_gen_umonitor (op0));
> >> +  return 0;
> >>
> >> Please see how movdir64b handles its address operand. Also, do not
> >> use global ix86_gen_monitor, just expand directly in the same way as
> movdir64b.
> >>
> >
> > Fixed.
> >
> >> +case IX86_BUILTIN_UMWAIT:
> >> +case IX86_BUILTIN_TPAUSE:
> >> +  rtx eax, edx, op1_lo, op1_hi;
> >> +  arg0 = CALL_EXPR_ARG (exp, 0);
> >> +  arg1 = CALL_EXPR_ARG (exp, 1);
> >> +  op0 = expand_normal (arg0);
> >> +  op1 = expand_normal (arg1);
> >> +  eax = gen_rtx_REG (SImode, AX_REG);
> >> +  edx = gen_rtx_REG (SImode, DX_REG);
> >> +  if (!REG_P (op0))
> >> +op0 = copy_to_mode_reg (SImode, op0);
> >> +  if (!REG_P (op1))
> >> +op1 = copy_to_mode_reg (DImode, op1);
> >> +  op1_lo = gen_lowpart (SImode, op1);
> >> +  op1_hi = expand_shift (RSHIFT_EXPR, DImode, op1,
> >> + GET_MODE_BITSIZE (SImode), 0, 1);
> >> +  op1_hi = convert_modes (SImode, DImode, op1_hi, 1);
> >> +  emit_move_insn (eax, op1_lo);
> >> +  emit_move_insn (edx, op1_hi);
> >> +  emit_insn (fcode == IX86_BUILTIN_UMWAIT
> >> +? gen_umwait (op0, eax, edx)
> >> +: gen_tpause (op0, eax, edx));
> >> +
> >> +  /* Return current CF value.  */
> >> +  op3 = gen_rtx_REG (CCCmode, FLAGS_REG);
> >> +  target = gen_rtx_LTU (QImode, op3, const0_rtx);
> >> +
> >> +  return target;
> >>
> >> For the above code, please see how xsetbv expansion and patterns are
> >> handling their input operands. There should be two patterns, one for
> >> 32bit and the other for 64bit targets. The patterns will need to set
> >> FLAGS_REG, otherwise the test will be removed.
> >>
> >
> > I copied what is done for xsetbv expansion and most likely I found some bug 
> > in
> GCC.
> > The problem is that when I use 3 arguments and compile as 64bit
> > version upper part of rax is not cleared. It doesn't appear when I'm using 
> > 2 or 4
> function arguments.
> > Most likely error is caused by the fact that rdx is used both as an
> > input for function and argument in instruction.
> 
> There is no need to clear upper parts of 64bit register. As specified in the 
> ISA
> (and modelled with RTX pattern), the instruction (e.g.
> tpause) reads only lower 32 bits from %rax and %rdx. Implicitly, the 
> instruction
> should ignore upper 32 bits by itself, so we can use SUBREGs. If this is not 
> the
> case, we need to use DImode input arguments in RTX pattern and explicitly emit
> zero-extension insns to clear upper 32 bits of input arguments.
> 

Ok, I agree with you regarding clearing.

But there is still one thing bothering me as explained in last email. The 
problem appears when I use 3
arguments and compile as 64bit version. Assembly generated is different from 
when I'm adding extra unused
argument or removing one function argument not related to my instruction. I'm 
talking about umonitor-1.c test, function bar.

Do you see the difference? This is the problem with clearing of registers I 
wrote previously. Why is this happening?
Is it a bug?

When using 3 operands:
bar:
.LFB5450:
.cfi_startproc
movq%rdx, %rax
umonitor%rdi
movq%rdx, %rcx
shrq$32, %rcx
movq%rcx, %rdx
umwait  %esi
setc%al
ret
.cfi_endproc

When using 4 operands:
bar:
.LFB5450:
.cfi_startproc
movl%edx, %esi
umonitor%rdi
movq%rcx, %rax
shrq$32, %rax
movq%rax, %rdx
movl%ecx, %eax
umwait  %esi
setc%al
ret
.cfi_endproc


Overall I understand that patch is ok for 

[build] Fix Solaris gty handling (PR target/84379)

2018-05-10 Thread Rainer Orth
As described in the PR, there are a couple of jit testsuite failures on
Solaris when using /bin/as.  The errors point to GC issues and indeed,
gcc/config/sol2.c lacked GTY markup.  This patch fixes that, following
what darwin.c does for machopic_indirections.  I confess I have no idea
why I had to change the code the way I did except for the fact that it
works.  While formally I don't need approval, it would be nice if
someone in the know could have a look.

Bootstrappedn without regressions on i386-pc-solaris2.11 and
sparc-sun-solaris2.11 with as and gas, with and without jit.

Rainer

-- 
-
Rainer Orth, Center for Biotechnology, Bielefeld University


2018-02-08  Rainer Orth  

PR target/84379
* config.gcc (*-*-solaris2*): Set target_gtfiles,
cp_target_gtfiles.
* config/sol2.c (comdat_entry): Mark as GTY((for_user)).
(solaris_comdat_htab): Mark GTY.
(solaris_elf_asm_comdat_section): Allocate solaris_comdat_htab
with create_ggc.
Allocate comdat_entry's with ggc_alloc.
Include gt-sol2.h.

# HG changeset patch
# Parent  2c72579b3945b50373c09b9c93a37f00d94eee79
Fix Solaris gty handling  (PR target/84379)

diff --git a/gcc/config.gcc b/gcc/config.gcc
--- a/gcc/config.gcc
+++ b/gcc/config.gcc
@@ -880,6 +880,7 @@ case ${target} in
   fi
   tm_p_file="${tm_p_file} sol2-protos.h"
   tmake_file="${tmake_file} t-sol2 t-slibgcc"
+  target_gtfiles="$target_gtfiles \$(srcdir)/config/sol2.c"
   c_target_objs="${c_target_objs} sol2-c.o"
   cxx_target_objs="${cxx_target_objs} sol2-c.o sol2-cxx.o"
   extra_objs="${extra_objs} sol2.o sol2-stubs.o"
diff --git a/gcc/config/sol2.c b/gcc/config/sol2.c
--- a/gcc/config/sol2.c
+++ b/gcc/config/sol2.c
@@ -163,7 +163,7 @@ solaris_assemble_visibility (tree decl, 
 
 /* Group section information entry stored in solaris_comdat_htab.  */
 
-typedef struct comdat_entry
+typedef struct GTY((for_user)) comdat_entry
 {
   const char *name;
   unsigned int flags;
@@ -173,11 +173,10 @@ typedef struct comdat_entry
 
 /* Helpers for maintaining solaris_comdat_htab.  */
 
-struct comdat_entry_hasher : nofree_ptr_hash 
+struct comdat_entry_hasher : ggc_ptr_hash 
 {
   static inline hashval_t hash (const comdat_entry *);
   static inline bool equal (const comdat_entry *, const comdat_entry *);
-  static inline void remove (comdat_entry *);
 };
 
 inline hashval_t
@@ -195,7 +194,7 @@ comdat_entry_hasher::equal (const comdat
 
 /* Hash table of group signature symbols.  */
 
-static hash_table *solaris_comdat_htab;
+static GTY (()) hash_table *solaris_comdat_htab;
 
 /* Output assembly to switch to COMDAT group section NAME with attributes
FLAGS and group signature symbol DECL, using Sun as syntax.  */
@@ -237,14 +236,14 @@ solaris_elf_asm_comdat_section (const ch
  remember the signature symbols and emit those not marked
  TREE_SYMBOL_REFERENCED in solaris_file_end.  */
   if (!solaris_comdat_htab)
-solaris_comdat_htab = new hash_table (37);
+solaris_comdat_htab = hash_table::create_ggc (37);
 
   entry.sig = signature;
   slot = solaris_comdat_htab->find_slot (, INSERT);
 
   if (*slot == NULL)
 {
-  *slot = XCNEW (comdat_entry);
+  *slot = ggc_alloc ();
   /* Remember fragmented section name.  */
   (*slot)->name = section;
   /* Emit as regular section, .group declaration has already been done.  */
@@ -299,3 +298,5 @@ solaris_override_options (void)
   if (!HAVE_LD_EH_FRAME_CIEV3 && !global_options_set.x_dwarf_version)
 dwarf_version = 2;
 }
+
+#include "gt-sol2.h"


[build] Support SHF_EXCLUDE on non-x86 and with Solaris as

2018-05-10 Thread Rainer Orth
Prompted by PR go/85429 (gotools unconditionally using gas syntax for
setting SHF_EXCLUDE), I looked into what it takes to enable the flag
(SECTION_EXCLUDE in gcc) on Solaris with /bin/as.  Here's what I found:

* Unlike most (all?) other section flags, the Solaris/x86 as doesn't
  accept the "e" flag for SHF_EXCLUDE, but needs #exclude instead, just
  as on SPARC.

* Solaris/SPARC as does use #exclude to set the flag, completely in line
  with its section flag syntax.

* I noticed that the configure check for the "e" section flag is
  currently only run on x86, although gas on ELF targets supports it
  everywhere.

The following patch fixes all this.  There's one point to note: running
gcc_GAS_CHECK_FEATURE twice with the same cache variable for different
syntaxes of some feature won't work: as currently happens for
gcc_cv_as_shf_merge, when the first such test returns no, the second one
isn't even run, using the cached no value instead.

Bootstrapped without regressions on i386-pc-solaris2.1[01],
sparc-sun-solaris2.1[01] (each with as and gas), and
x86_64-pc-linux-gnu.  Ok for mainline?

Rainer

-- 
-
Rainer Orth, Center for Biotechnology, Bielefeld University


2018-04-18  Rainer Orth  

* configure.ac (gcc_cv_as_section_has_e): Move to common section.
Rename to...
(gcc_cv_as_section_exclude): ... this.
Try Solaris as #exclude syntax.
* configure: Regenerate.
* config.in: Regenerate.
* config/i386/i386.c (i386_solaris_elf_named_section): Handle
SECTION_EXCLUDE.
* config/sparc/sparc.c (sparc_solaris_elf_asm_named_section)
[HAVE_GAS_SECTION_EXCLUDE]: Handle SECTION_EXCLUDE.

* varasm.c (default_elf_asm_named_section): Don't check if
HAVE_GAS_SECTION_EXCLUDE is defined.

# HG changeset patch
# Parent  2c8ad65d830fdf48991fa8f278e5d0d896120b86
Support Solaris as SHF_EXCLUDE flag syntax

diff --git a/gcc/config/i386/i386.c b/gcc/config/i386/i386.c
--- a/gcc/config/i386/i386.c
+++ b/gcc/config/i386/i386.c
@@ -45240,6 +45240,15 @@ i386_solaris_elf_named_section (const ch
   solaris_elf_asm_comdat_section (name, flags, decl);
   return;
 }
+
+  /* Solaris/x86 as uses the same syntax for the SHF_EXCLUDE flags as the
+ SPARC assembler.  One cannot mix single-letter flags and #exclude, so
+ only emit the latter here.  */
+  if (flags & SECTION_EXCLUDE)
+{
+  fprintf (asm_out_file, "\t.section\t%s,#exclude\n", name);
+  return;
+}
 #endif
 
   default_elf_asm_named_section (name, flags, decl);
diff --git a/gcc/config/sparc/sparc.c b/gcc/config/sparc/sparc.c
--- a/gcc/config/sparc/sparc.c
+++ b/gcc/config/sparc/sparc.c
@@ -10502,6 +10502,10 @@ sparc_solaris_elf_asm_named_section (con
 
   if (!(flags & SECTION_DEBUG))
 fputs (",#alloc", asm_out_file);
+#if HAVE_GAS_SECTION_EXCLUDE
+  if (flags & SECTION_EXCLUDE)
+fputs (",#exclude", asm_out_file);
+#endif
   if (flags & SECTION_WRITE)
 fputs (",#write", asm_out_file);
   if (flags & SECTION_TLS)
diff --git a/gcc/configure.ac b/gcc/configure.ac
--- a/gcc/configure.ac
+++ b/gcc/configure.ac
@@ -2953,6 +2953,34 @@ if test $gcc_cv_as_eh_frame = buggy; the
   [Define if your assembler mis-optimizes .eh_frame data.])
 fi
 
+# Test if the assembler supports the section flag 'e' or #exclude for
+# specifying an excluded section.
+gcc_GAS_CHECK_FEATURE([section exclude flag], gcc_cv_as_section_exclude_e,
+ [2,22,51], [--fatal-warnings],
+ [.section foo1,"e"
+  .byte 0,0,0,0])
+if test $gcc_cv_as_section_exclude_e = no; then
+  case "${target}" in
+# Solaris as uses #exclude instead.
+*-*-solaris2*)
+  case "${target}" in
+	sparc*-*-solaris2*)
+	  conftest_s='.section "foo1", #exclude'
+	  ;;
+	i?86-*-solaris2* | x86_64-*-solaris2*)
+	  conftest_s='.section foo1, #exclude'
+	  ;;  
+  esac
+  ;;
+esac
+  gcc_GAS_CHECK_FEATURE([section exclude flag], gcc_cv_as_section_exclude_hash,,,
+[$conftest_s
+ .byte 0,0,0,0])
+fi
+AC_DEFINE_UNQUOTED(HAVE_GAS_SECTION_EXCLUDE,
+  [`if test $gcc_cv_as_section_exclude_e = yes || test $gcc_cv_as_section_exclude_hash = yes; then echo 1; else echo 0; fi`],
+[Define if your assembler supports specifying the exclude section flag.])
+
 gcc_GAS_CHECK_FEATURE(section merging support, gcc_cv_as_shf_merge,
  [elf,2,12,0], [--fatal-warnings],
  [.section .rodata.str, "aMS", @progbits, 1])
@@ -4202,16 +4230,6 @@ foo:	nop
   [AC_DEFINE(HAVE_AS_XBRACE_COMMENT_OPTION, 1,
 		[Define if your assembler supports -xbrace_comment option.])])
 
-# Test if the assembler supports the section flag 'e' for specifying
-# an excluded section.
-gcc_GAS_CHECK_FEATURE([.section with e], gcc_cv_as_section_has_e,
-  [2,22,51], [--fatal-warnings],
-[.section foo1,"e"
-.byte 0,0,0,0])
-AC_DEFINE_UNQUOTED(HAVE_GAS_SECTION_EXCLUDE,
-  [`if test 

Re: [PATCH][i386] Adding WAITPKG instructions

2018-05-10 Thread Uros Bizjak
On Thu, May 10, 2018 at 2:50 PM, Peryt, Sebastian
 wrote:
> Hi Uros,
>
> Updated patch attached, please find comments below.
>
>> -Original Message-
>> From: Uros Bizjak [mailto:ubiz...@gmail.com]
>> Sent: Wednesday, May 9, 2018 1:47 PM
>> To: Peryt, Sebastian 
>> Cc: gcc-patches@gcc.gnu.org; Kirill Yukhin 
>> Subject: Re: [PATCH][i386] Adding WAITPKG instructions
>>
>> On Tue, May 8, 2018 at 1:34 PM, Peryt, Sebastian 
>> wrote:
>> > Hi,
>> >
>> > This patch adds support for WAITPKG instructions.
>> >
>> > Is it ok for trunk and after few day for backport to GCC-8?
>> >
> (Removed)
>> >
>> >
>>
>> +case IX86_BUILTIN_UMONITOR:
>> +  arg0 = CALL_EXPR_ARG (exp, 0);
>> +  op0 = expand_normal (arg0);
>> +  if (!REG_P (op0))
>> +op0 = ix86_zero_extend_to_Pmode (op0);
>> +
>> +  emit_insn (ix86_gen_umonitor (op0));
>> +  return 0;
>>
>> Please see how movdir64b handles its address operand. Also, do not use global
>> ix86_gen_monitor, just expand directly in the same way as movdir64b.
>>
>
> Fixed.
>
>> +case IX86_BUILTIN_UMWAIT:
>> +case IX86_BUILTIN_TPAUSE:
>> +  rtx eax, edx, op1_lo, op1_hi;
>> +  arg0 = CALL_EXPR_ARG (exp, 0);
>> +  arg1 = CALL_EXPR_ARG (exp, 1);
>> +  op0 = expand_normal (arg0);
>> +  op1 = expand_normal (arg1);
>> +  eax = gen_rtx_REG (SImode, AX_REG);
>> +  edx = gen_rtx_REG (SImode, DX_REG);
>> +  if (!REG_P (op0))
>> +op0 = copy_to_mode_reg (SImode, op0);
>> +  if (!REG_P (op1))
>> +op1 = copy_to_mode_reg (DImode, op1);
>> +  op1_lo = gen_lowpart (SImode, op1);
>> +  op1_hi = expand_shift (RSHIFT_EXPR, DImode, op1,
>> + GET_MODE_BITSIZE (SImode), 0, 1);
>> +  op1_hi = convert_modes (SImode, DImode, op1_hi, 1);
>> +  emit_move_insn (eax, op1_lo);
>> +  emit_move_insn (edx, op1_hi);
>> +  emit_insn (fcode == IX86_BUILTIN_UMWAIT
>> +? gen_umwait (op0, eax, edx)
>> +: gen_tpause (op0, eax, edx));
>> +
>> +  /* Return current CF value.  */
>> +  op3 = gen_rtx_REG (CCCmode, FLAGS_REG);
>> +  target = gen_rtx_LTU (QImode, op3, const0_rtx);
>> +
>> +  return target;
>>
>> For the above code, please see how xsetbv expansion and patterns are handling
>> their input operands. There should be two patterns, one for 32bit and the 
>> other
>> for 64bit targets. The patterns will need to set FLAGS_REG, otherwise the 
>> test
>> will be removed.
>>
>
> I copied what is done for xsetbv expansion and most likely I found some bug 
> in GCC.
> The problem is that when I use 3 arguments and compile as 64bit version upper 
> part
> of rax is not cleared. It doesn't appear when I'm using 2 or 4 function 
> arguments.
> Most likely error is caused by the fact that rdx is used both as an input for 
> function and
> argument in instruction.

There is no need to clear upper parts of 64bit register. As specified
in the ISA (and modelled with RTX pattern), the instruction (e.g.
tpause) reads only lower 32 bits from %rax and %rdx. Implicitly, the
instruction should ignore upper 32 bits by itself, so we can use
SUBREGs. If this is not the case, we need to use DImode input
arguments in RTX pattern and explicitly emit zero-extension insns to
clear upper 32 bits of input arguments.

Uros.


Re: Incremental LTO linking part 7: documentation

2018-05-10 Thread Martin Jambor
Hi,

just small nits:

On Tue, May 08 2018, Jan Hubicka wrote:
> Hi,
> this patch adds documentation of -flinker-output.
>
>   * doc/invoke.texi (-flinker-output): Document
> Index: doc/invoke.texi
> ===
> --- doc/invoke.texi   (revision 260042)
> +++ doc/invoke.texi   (working copy)
> @@ -12208,6 +12208,50 @@
>  object file names should not be used as arguments.  @xref{Overall
>  Options}.
>  
> +@item -flinker-output=@var{type}
> +@opindex -flinker-output
> +This option controls the code generation of the link time optimizer.  By
> +default the linker output is determined by the linker plugin automatically. 
> For
> +debugging the compiler and in the case of incremental linking to non-lto 
> object
> +file is desired, it may be useful to control the type manually.
> +
> +If @var{type} is @samp{exec} the code generation is configured to produce 
> static
> +binary. In this case @option{-fpic} and @option{-fpie} are both disabled.
> +
> +If @var{type} is @samp{dyn} the code generation is configured to produce 
> shared
> +library. In this case @option{-fpic} or @option{-fPIC} is preserved, but not
> +enabled automatically.  This makes it possible to build shared libraries 
> without
> +position independent code on architectures this is possible, i.e. on x86.

on architectures *where* this is possible?

> +
> +If @var{type} is @samp{pie} the code generation is configured to produce
> +@option{-fpie} executable. This result in similar optimizations as 
> @samp{exec}
> +except that @option{-fpie} is not disabled if specified at compilation time.
> +
> +If @var{type} is @samp{rel} the compiler assumes that incremental linking is
> +done.  The sections containing intermediate code for link-time optimization 
> are
> +merged, pre-optimized, and output to the resulting object file. In addition, 
> if
> +@option{-ffat-lto-objects} is specified the binary code is produced for 
> future
> +non-lto linking. The object file produced by incremental linking will be 
> smaller
> +than a static library produced from the same object files.  At link-time the
> +result of incremental linking will also load faster to compiler than a static
> +library assuming that majority of objects in the library are used.
> +
> +Finally @samp{nolto-rel} configure compiler to for incremental linking where
> +code generation is forced, final binary is produced and the intermediate code
> +for later link-time optimization is stripped. When multiple object files are
> +linked together the resulting code will be optimized better than with link 
> time
> +optimizations disabled (for example, the cross-module inlining will happen),
> +most of benefits of whole program optimizations are however lost. 
> +
> +During the incremental link (by @option{-r}) the linker plugin will default 
> to
> +@option{rel}. With current interfaces to GNU Binutils it is however not
> +possible to link incrementally LTO objects and non-LTO objects into a single
> +mixed object file.  In the case any of object files in incremental link can 
> not
> +be used for link-time optimization the linker plugin will output warning and
> +use @samp{nolto-rel}. To maintain the whole program optimization it is
> +recommended to link such objects into static library instead. Alternatively 
> it
> +is possible to use H.J. Lu's binutils with support for mixed objects.
> +

I wonder whether this will be still true and what will people make of
this two years from now.  Perhaps add a reference to the current
binutils version?

Martin


RE: [PATCH][i386] Adding CLDEMOTE instruction

2018-05-10 Thread Peryt, Sebastian
> -Original Message-
> From: Uros Bizjak [mailto:ubiz...@gmail.com]
> Sent: Wednesday, May 9, 2018 1:53 PM
> To: Peryt, Sebastian 
> Cc: gcc-patches@gcc.gnu.org; Kirill Yukhin 
> Subject: Re: [PATCH][i386] Adding CLDEMOTE instruction
> 
> On Tue, May 8, 2018 at 1:58 PM, Peryt, Sebastian 
> wrote:
> > Sorry, forgot attachment.
> >
> > Sebastian
> >
> >
> > -Original Message-
> > From: Peryt, Sebastian
> > Sent: Tuesday, May 8, 2018 1:56 PM
> > To: gcc-patches@gcc.gnu.org
> > Cc: Uros Bizjak ; Kirill Yukhin
> > ; Peryt, Sebastian
> > 
> > Subject: [PATCH][i386] Adding CLDEMOTE instruction
> >
> > Hi,
> >
> > This patch adds support for CLDEMOTE instruction.
> >
> > Is it ok for trunk and after few day for backport to GCC-8?
> >
> > 2018-05-08  Sebastian Peryt  
> >
> > gcc/
> >
> > * common/config/i386/i386-common.c
> (OPTION_MASK_ISA_CLDEMOTE_SET,
> > OPTION_MASK_ISA_CLDEMOTE_UNSET): New defines.
> > (ix86_handle_option): Handle -mcldemote.
> > * config.gcc: New header.
> > * config/i386/cldemoteintrin.h: New file.
> > * config/i386/cpuid.h (bit_CLDEMOTE): New bit.
> > * config/i386/driver-i386.c (host_detect_local_cpu): Detect
> > -mcldemote.
> > * config/i386/i386-c.c (ix86_target_macros_internal): Handle
> > OPTION_MASK_ISA_CLDEMOTE.
> > * config/i386/i386.c (ix86_target_string): Added -mcldemote.
> > (ix86_valid_target_attribute_inner_p): Ditto.
> > (enum ix86_builtins): Added IX86_BUILTIN_CLDEMOTE.
> > (ix86_init_mmx_sse_builtins): Define __builtin_ia32_cldemote.
> > (ix86_expand_builtin): Expand IX86_BUILTIN_CLDEMOTE.
> > * config/i386/i386.h (TARGET_CLDEMOTE, TARGET_CLDEMOTE_P): New.
> > * config/i386/i386.md (UNSPECV_CLDEMOTE): New.
> > (cldemote): New.
> > * config/i386/i386.opt: Added -mcldemote.
> > * config/i386/x86intrin.h: New header.
> > * doc/invoke.texi: Added -mcldemote.
> >
> > 2018-05-08  Sebastian Peryt  
> >
> > gcc/testsuite/
> >
> > * gcc.target/i386/cldemote-1.c: New test.
> 
> OK for mainline.
> 
> is there a compelling reason why we want this new feature in gcc-8 release
> branch?
>

After some additional internal discussion I figured for now it's not required 
to backport it.
I'll backport it if/when it'll be required in the future.
 
> Thanks,
> Uros.

Thanks,
Sebastian


RE: [PATCH][i386] Adding WAITPKG instructions

2018-05-10 Thread Peryt, Sebastian
Hi Uros,

Updated patch attached, please find comments below.

> -Original Message-
> From: Uros Bizjak [mailto:ubiz...@gmail.com]
> Sent: Wednesday, May 9, 2018 1:47 PM
> To: Peryt, Sebastian 
> Cc: gcc-patches@gcc.gnu.org; Kirill Yukhin 
> Subject: Re: [PATCH][i386] Adding WAITPKG instructions
> 
> On Tue, May 8, 2018 at 1:34 PM, Peryt, Sebastian 
> wrote:
> > Hi,
> >
> > This patch adds support for WAITPKG instructions.
> >
> > Is it ok for trunk and after few day for backport to GCC-8?
> >
(Removed)
> >
> >
> 
> +case IX86_BUILTIN_UMONITOR:
> +  arg0 = CALL_EXPR_ARG (exp, 0);
> +  op0 = expand_normal (arg0);
> +  if (!REG_P (op0))
> +op0 = ix86_zero_extend_to_Pmode (op0);
> +
> +  emit_insn (ix86_gen_umonitor (op0));
> +  return 0;
> 
> Please see how movdir64b handles its address operand. Also, do not use global
> ix86_gen_monitor, just expand directly in the same way as movdir64b.
> 

Fixed.

> +case IX86_BUILTIN_UMWAIT:
> +case IX86_BUILTIN_TPAUSE:
> +  rtx eax, edx, op1_lo, op1_hi;
> +  arg0 = CALL_EXPR_ARG (exp, 0);
> +  arg1 = CALL_EXPR_ARG (exp, 1);
> +  op0 = expand_normal (arg0);
> +  op1 = expand_normal (arg1);
> +  eax = gen_rtx_REG (SImode, AX_REG);
> +  edx = gen_rtx_REG (SImode, DX_REG);
> +  if (!REG_P (op0))
> +op0 = copy_to_mode_reg (SImode, op0);
> +  if (!REG_P (op1))
> +op1 = copy_to_mode_reg (DImode, op1);
> +  op1_lo = gen_lowpart (SImode, op1);
> +  op1_hi = expand_shift (RSHIFT_EXPR, DImode, op1,
> + GET_MODE_BITSIZE (SImode), 0, 1);
> +  op1_hi = convert_modes (SImode, DImode, op1_hi, 1);
> +  emit_move_insn (eax, op1_lo);
> +  emit_move_insn (edx, op1_hi);
> +  emit_insn (fcode == IX86_BUILTIN_UMWAIT
> +? gen_umwait (op0, eax, edx)
> +: gen_tpause (op0, eax, edx));
> +
> +  /* Return current CF value.  */
> +  op3 = gen_rtx_REG (CCCmode, FLAGS_REG);
> +  target = gen_rtx_LTU (QImode, op3, const0_rtx);
> +
> +  return target;
> 
> For the above code, please see how xsetbv expansion and patterns are handling
> their input operands. There should be two patterns, one for 32bit and the 
> other
> for 64bit targets. The patterns will need to set FLAGS_REG, otherwise the test
> will be removed.
> 

I copied what is done for xsetbv expansion and most likely I found some bug in 
GCC.
The problem is that when I use 3 arguments and compile as 64bit version upper 
part
of rax is not cleared. It doesn't appear when I'm using 2 or 4 function 
arguments.
Most likely error is caused by the fact that rdx is used both as an input for 
function and
argument in instruction.

When using 3 operands:
bar:
.LFB5450:
.cfi_startproc
movq%rdx, %rax
umonitor%rdi
movq%rdx, %rcx
shrq$32, %rcx
movq%rcx, %rdx
umwait  %esi
setc%al
ret
.cfi_endproc

When using 4 operands:
bar:
.LFB5450:
.cfi_startproc
movl%edx, %esi
umonitor%rdi
movq%rcx, %rax
shrq$32, %rax
movq%rax, %rdx
movl%ecx, %eax
umwait  %esi
setc%al
ret
.cfi_endproc


Can you please suggest how to proceed here? I cannot open new PR without
adding this instruction first. Or maybe you know how to resolve it?

> +(define_insn "umwait"
> +  [(unspec_volatile [(match_operand:SI 0 "register_operand" "r")
> + (use (match_operand:SI 1 "register_operand" "a"))
> + (use (match_operand:SI 2 "register_operand" "d"))]
> +UNSPECV_UMWAIT)]
> +  "TARGET_WAITPKG"
> +  "umwait\t{%0}"
> +  [(set_attr "length" "3")])
> 
> No need for "use" RTX here and in other patterns. You should also remove {}
> from insn template, otherwise there will be no operand printed in some asm
> dialect.
> 

Fixed.

> Uros.

Sebastian


0001-WAITPKG-v2.patch
Description: 0001-WAITPKG-v2.patch


[PATCH] PR libstdc++/85729 add linkage specifications to headers

2018-05-10 Thread Jonathan Wakely

This works around user error where our headers get included within an
extern "C" block.

PR libstdc++/85729
* include/bits/c++config.h (__replacement_assert): Add linkage
specification.
* include/bits/std_abs.h: Add comment to closing brace of block.
* include/c_global/cstddef: Add linkage specification.
* include/c_global/cstring: Likewise.
* include/c_global/cwchar: Likewise.

Tested powerpc64le-linux, committed to trunk.


commit 72a9c99cc1fc94c6b4c1fbe2ae223d07237a817e
Author: Jonathan Wakely 
Date:   Thu May 10 12:50:55 2018 +0100

PR libstdc++/85729 add linkage specifications to headers

PR libstdc++/85729
* include/bits/c++config.h (__replacement_assert): Add linkage
specification.
* include/bits/std_abs.h: Add comment to closing brace of block.
* include/c_global/cstddef: Add linkage specification.
* include/c_global/cstring: Likewise.
* include/c_global/cwchar: Likewise.

diff --git a/libstdc++-v3/include/bits/c++config 
b/libstdc++-v3/include/bits/c++config
index bfe268da825..280f65e1ba0 100644
--- a/libstdc++-v3/include/bits/c++config
+++ b/libstdc++-v3/include/bits/c++config
@@ -438,7 +438,7 @@ namespace std
 {
   // Avoid the use of assert, because we're trying to keep the 
   // include out of the mix.
-  inline void
+  extern "C++" inline void
   __replacement_assert(const char* __file, int __line,
   const char* __function, const char* __condition)
   {
diff --git a/libstdc++-v3/include/bits/std_abs.h 
b/libstdc++-v3/include/bits/std_abs.h
index 6e4551d6597..bcea4f49203 100644
--- a/libstdc++-v3/include/bits/std_abs.h
+++ b/libstdc++-v3/include/bits/std_abs.h
@@ -105,6 +105,6 @@ _GLIBCXX_BEGIN_NAMESPACE_VERSION
 
 _GLIBCXX_END_NAMESPACE_VERSION
 } // namespace
-}
+} // extern "C"++"
 
 #endif // _GLIBCXX_BITS_STD_ABS_H
diff --git a/libstdc++-v3/include/c_global/cstddef 
b/libstdc++-v3/include/c_global/cstddef
index 36d7d716cc3..0ca3b82338a 100644
--- a/libstdc++-v3/include/c_global/cstddef
+++ b/libstdc++-v3/include/c_global/cstddef
@@ -49,13 +49,15 @@
 #include 
 #include 
 
+extern "C++"
+{
 #if __cplusplus >= 201103L
 namespace std
 {
   // We handle size_t, ptrdiff_t, and nullptr_t in c++config.h.
   using ::max_align_t;
 }
-#endif
+#endif // C++11
 
 #if __cplusplus >= 201703L
 namespace std
@@ -186,6 +188,7 @@ namespace std
 { return _IntegerType(__b); }
 
 } // namespace std
-#endif
+#endif // C++17
+} // extern "C++"
 
 #endif // _GLIBCXX_CSTDDEF
diff --git a/libstdc++-v3/include/c_global/cstring 
b/libstdc++-v3/include/c_global/cstring
index 399f41fb164..2bca01b7f08 100644
--- a/libstdc++-v3/include/c_global/cstring
+++ b/libstdc++-v3/include/c_global/cstring
@@ -68,6 +68,8 @@
 #undef strtok
 #undef strxfrm
 
+extern "C++"
+{
 namespace std _GLIBCXX_VISIBILITY(default)
 {
 _GLIBCXX_BEGIN_NAMESPACE_VERSION
@@ -119,5 +121,6 @@ _GLIBCXX_BEGIN_NAMESPACE_VERSION
 
 _GLIBCXX_END_NAMESPACE_VERSION
 } // namespace
+} // extern "C++"
 
 #endif
diff --git a/libstdc++-v3/include/c_global/cwchar 
b/libstdc++-v3/include/c_global/cwchar
index 475322ee806..7557c7768aa 100644
--- a/libstdc++-v3/include/c_global/cwchar
+++ b/libstdc++-v3/include/c_global/cwchar
@@ -132,6 +132,8 @@ namespace std
 
 #if _GLIBCXX_USE_WCHAR_T
 
+extern "C++"
+{
 namespace std _GLIBCXX_VISIBILITY(default)
 {
 _GLIBCXX_BEGIN_NAMESPACE_VERSION
@@ -231,6 +233,7 @@ _GLIBCXX_BEGIN_NAMESPACE_VERSION
 
 _GLIBCXX_END_NAMESPACE_VERSION
 } // namespace
+} // extern "C++"
 
 #if _GLIBCXX_USE_C99_WCHAR
 


Re: Fix PR85726 (div-div suboptimization) and a rant on match.pd :s-flag

2018-05-10 Thread Segher Boessenkool
On Thu, May 10, 2018 at 10:33:39AM +0200, Marc Glisse wrote:
> (not a review)
> 
> On Thu, 10 May 2018, Hans-Peter Nilsson wrote:
> 
> >Replacing a division feeding a division helps only when the
> >second division is the only user, and "fusing" the divisions is
> 
> Well, that's not quite true.
> int x, y;
> void f(int n){
>   int c = 3 << 20;
>   x = n / c;
>   y = x / c;
> }

[ fixed the typo ]

> Here we can optimize the last division to y = 0. After your patch, we 
> likely need VRP to do that simplification. There are probably more 
> complicated transformations this disables.

Without the replacement we have two dependent divisions; with the
replacement we have two independent divisions, and that is much faster
on some (many?) systems (that can do two fixed-point divisions in parallel),
even if things do not simplify further.


Segher


Re: Fix PR85726 (div-div suboptimization) and a rant on match.pd :s-flag

2018-05-10 Thread Jakub Jelinek
On Thu, May 10, 2018 at 01:51:29PM +0200, Marc Glisse wrote:
> > > There are probably more
> > > complicated transformations this disables.
> > 
> > I'm providing an example from *real* code where the
> > transformation is bad (admittedly just for the div-div case).

Isn't the case of the posted real-world testcase that many targets have
instructions that can do division and modulo at the same time?
So perhaps don't punt just because of !single_use (), but punt if
!single_use () and one of the other uses is a modulo with the same constant
as the second division, or perhaps similarly if the first division is
!accompanied by modulo with the same constant too?

Jakub


Re: Fix PR85726 (div-div suboptimization) and a rant on match.pd :s-flag

2018-05-10 Thread Marc Glisse

On Thu, 10 May 2018, Hans-Peter Nilsson wrote:


Date: Thu, 10 May 2018 10:33:39 +0200 (CEST)
From: Marc Glisse 



On Thu, 10 May 2018, Hans-Peter Nilsson wrote:


Replacing a division feeding a division helps only when the
second division is the only user, and "fusing" the divisions is


Well, that's not quite true.
int x, y;
void f(int n){
   int c = 3 << 20;
   x = n / c;
   y = n / c;


Ah, this was supposed to be

  y = x / c;

sorry.


There are probably more
complicated transformations this disables.


I'm providing an example from *real* code where the
transformation is bad (admittedly just for the div-div case).
Please provide the same in your counterargument.


I don't have this kind of example at hand. Easiest would be to try some 
large benchmark (SPEC?) with and without, but that's a pain... I agree 
that the whole question is what helps most in practice, and I don't have 
the answer, I can just provide some not-so-realistic examples to show that 
the answer is not obvious in the other direction either (I am not trying 
to reject your patch).


int x,y,z;
void f(int n){
  x = n / 3;
  y = x / 5;
  z = n / 15;
}

we would like to notice that y and z are equivalent to skip one division.

int x,y,z;
void f(int n){
  x = n / 4;
  y = x / 4;
  z = y * 16 | 15;
}

we can replace the multiplication with a bit_and, which then simplifies 
with the bit_ior (surprisingly only in RTL, we may be missing something in 
gimple).



downright bad if another user of the result of first division is
a modulus of the same value as the second division, forming a
divmod pair.  See the test-case, where for the tested
architectures (which all fail the test-case before the patch)
the div and mod are implemented using the high-part of a widened
multiplication and shift, emitted separately but combined as
late as in rtl, with the multiplicaton and shift re-used.  That
of course does not happen if later passes see (y / 48; y % 3).
While in match.pd, I noticed the corresponding mul-mul match,
which I believe should be guarded the same way.


Did you notice bad codegen because of the multiplication? You are only
adding a test for divisions. I am asking because I know such a change will
help some cases and hurt others...


I can take that part out for lack of evidence, but first I'll
argue that e.g. a multiplication by 5 won't be helped to be
transformed into a multiplication by 15; multiplying or dividing
by a larger constant is not by itself a simplification or
canonicalization.


It is a canonicalization, because it makes the result depend on a more 
primitive variable. If you make several computations based on an int n, if 
you can express many variables in terms of n, you are more likely to 
notice duplicates or simplifications, or back-propagate ranges.



To simplify, the goal of :s is to avoid increasing the number of
instructions. Normally, the transformation output is smaller (or the same
size but cheaper, simpler, more canonical) than the input.


That may be the intent, but the second sentence is not generally
true.  Any such transformation must be carefully inspected to
have that property *on their own*; it's not true for the div-div
and mul-mul case for example.  After a quick look at uses of :s
in match.pd I'd say that's actually rare and for most codes not
true, carefully remembering we're talking about the context
where the intermediate still lives after the transformation.


Note that I was talking about the case where the intermediate results can 
be eliminated, and moved on to when this isn't the case in the next 
sentence below.



But if we can't
get rid of some of the input intermediate results, the size may still
increase. :s does test single_use, but it has a special case. If the
output (possibly after several rounds of resimplifications) is at most one
instruction, then it cannot be larger than the input (we are at least
getting rid of the instruction we called the simplification
on),


(not true in the div-div or mul-mul case)


???

We start from
y = x / 5;
z = y / 3; // calling simplification on this insn

and replace it with
y = x / 5;
z = x / 15;
(we get rid of the division by 3)

the output is not larger than the input.



so the
transformation does happen even if !single_use. Originally this was only
done when the simplification led to an SSA_NAME or a constant, IIRC.


Can you please provide an example?  I had a look at a couple of
the :s uses in match.pd imagining intermediate use of the :s-ed
operand and it didn't seem that they'd help where :s doesn't
mean single_use.


The first :s in match.pd is

 (simplify
  (rdiv (rdiv:s @0 @1) @2)
  (rdiv @0 (mult @1 @2)))

normally, with:
y=a/b; // y has multiple uses
z=y/c;

producing
y=a/b;
t=b*c;
z=a/t;

is bad because it has one extra instruction. But if for instance b and c 
are constants, then that's not the case anymore and the transformation 
does not increase the number of instructions. You are probably going to 

Re: [PATCH] Handle no_sanitize attribute values in the right way (PR sanitizer/85556).

2018-05-10 Thread Jakub Jelinek
On Thu, May 10, 2018 at 12:59:39PM +0200, Martin Liška wrote:
> >From 9e2570eee9bb160b58075f6802d6ac1bb7b77341 Mon Sep 17 00:00:00 2001
> From: marxin 
> Date: Thu, 10 May 2018 10:27:02 +0200
> Subject: [PATCH] Support LLVM style of no_sanitize attribute (PR
>  sanitizer/85556).
> 
> gcc/ChangeLog:
> 
> 2018-05-10  Martin Liska  
> 
>   * doc/extend.texi: Document LLVM style format for no_sanitize
>   attribute.
> 
> gcc/c-family/ChangeLog:
> 
> 2018-05-10  Martin Liska  
> 
> PR sanitizer/85556
>   * c-attribs.c (handle_no_sanitize_attribute): Iterate all
>   TREE_LIST values.
> 
> gcc/testsuite/ChangeLog:
> 
> 2018-05-10  Martin Liska  
> 
> PR sanitizer/85556
>   * c-c++-common/ubsan/attrib-6.c: New test.

Ok, thanks.

Jakub


[PATCH] Document Dual ABI for std::ios_base::failure

2018-05-10 Thread Jonathan Wakely

And a couple of other doc improvements, and regenerated the HTML
pages.

Please read the proposed change to using.xml and let me know if it's
clear.

* doc/xml/faq.xml: Link to C++17 status. Add note to outdated answer.
* doc/xml/manual/debug_mode.xml: Add array and forward_list to list
of C++11 containers with Debug Mode support.
* doc/xml/manual/using.xml: Document Dual ABI for ios_base::failure.
* doc/html/*: Regenerate.


commit ef4d4c52eb0073627270d7b13a5bc2a89f6cef0d
Author: Jonathan Wakely 
Date:   Thu May 10 12:07:08 2018 +0100

Document Dual ABI for std::ios_base::failure

* doc/xml/faq.xml: Link to C++17 status. Add note to outdated 
answer.
* doc/xml/manual/debug_mode.xml: Add array and forward_list to list
of C++11 containers with Debug Mode support.
* doc/xml/manual/using.xml: Document Dual ABI for ios_base::failure.
* doc/html/*: Regenerate.

diff --git a/libstdc++-v3/doc/xml/faq.xml b/libstdc++-v3/doc/xml/faq.xml
index b0b1f98e641..edc07f16acb 100644
--- a/libstdc++-v3/doc/xml/faq.xml
+++ b/libstdc++-v3/doc/xml/faq.xml
@@ -742,15 +742,16 @@
 except for some corner cases.  Support for localization
 in locale may be incomplete on some non-GNU
 platforms. Also dependent on the underlying platform is support
-for wchar_t and long
-long specializations, and details of thread support.
+for wchar_t and long long specializations,
+and details of thread support.
 
 
 Long answer: See the implementation status pages for 
 C++98,
-TR1, and 
-C++11.
-C++14.
+TR1,
+C++11,
+C++14, and
+C++17.
  
   
 
@@ -891,6 +892,9 @@
 
   
   
+
+  This answer is old and probably no longer be relevant.
+
 
 Another problem is the rel_ops namespace and the 
template
 comparison operator functions contained therein.  If they become
diff --git a/libstdc++-v3/doc/xml/manual/debug_mode.xml 
b/libstdc++-v3/doc/xml/manual/debug_mode.xml
index 5082bbfb724..570c17ba28a 100644
--- a/libstdc++-v3/doc/xml/manual/debug_mode.xml
+++ b/libstdc++-v3/doc/xml/manual/debug_mode.xml
@@ -285,7 +285,19 @@ containers have additional debug capability.
   
 
 
-
+  
+std::array
+array
+__gnu_debug::array
+debug/array
+  
+  
+std::forward_list
+forward_list
+__gnu_debug::forward_list
+debug/forward_list
+  
+  
 std::unordered_map
 unordered_map
 __gnu_debug::unordered_map
diff --git a/libstdc++-v3/doc/xml/manual/using.xml 
b/libstdc++-v3/doc/xml/manual/using.xml
index 918703a5217..67f9cf5216b 100644
--- a/libstdc++-v3/doc/xml/manual/using.xml
+++ b/libstdc++-v3/doc/xml/manual/using.xml
@@ -1036,7 +1036,7 @@ g++ -Winvalid-pch -I. -include stdc++.h -H -g -O2 
hello.cc -o test.exe
 
 
  The _GLIBCXX_USE_CXX11_ABI macro (see
-) controls whether
+  ) controls whether
   the declarations in the library headers use the old or new ABI.
   So the decision of which ABI to use can be made separately for each
   source file being compiled.
@@ -1071,12 +1071,39 @@ g++ -Winvalid-pch -I. -include stdc++.h -H -g -O2 
hello.cc -o test.exe
 
 
  Although the standard exception types defined in
-  stdexcept use strings, they
+  stdexcept use strings, most
   are not defined twice, so that a std::out_of_range
   exception thrown in one file can always be caught by a suitable handler in
   another file, even if the two files are compiled with different ABIs.
 
 
+ One exception type does change when using the new ABI, namely
+  std::ios_base::failure.
+  This is necessary because the 2011 standard changed its base class from
+  std::exception to
+  std::system_error, which causes its layout to change.
+  Exceptions due to iostream errors are thrown by a function inside
+  libstdc++.so, so whether the thrown
+  exception uses the old std::ios_base::failure type
+  or the new one depends on the ABI that was active when
+  libstdc++.so was built,
+  not the ABI active in the user code that is using
+  iostreams.
+  This means that for a given build of GCC the type thrown is fixed.
+  In current releases the library throws a special type that can be caught
+  by handlers for either the old or new type,
+  but for GCC 7.1, 7.2 and 7.3 the library throws the new
+  std::ios_base::failure type,
+  and for GCC 5.x and 6.x the library throws the old type.
+  Catch handlers of type std::ios_base::failure
+  will only catch the exceptions if using a newer release,
+  or if the handler is compiled with the same ABI as the type thrown by
+  the library.
+  Handlers for std::exception will always catch
+  iostreams exceptions, because the old and new type both inherit from
+  std::exception.
+
+
 Troubleshooting
 
  If you get linker errors about undefined references to symbols


Re: Fix PR85726 (div-div suboptimization) and a rant on match.pd :s-flag

2018-05-10 Thread Hans-Peter Nilsson
> Date: Thu, 10 May 2018 10:33:39 +0200 (CEST)
> From: Marc Glisse 

> On Thu, 10 May 2018, Hans-Peter Nilsson wrote:
> 
> > Replacing a division feeding a division helps only when the
> > second division is the only user, and "fusing" the divisions is
> 
> Well, that's not quite true.
> int x, y;
> void f(int n){
>int c = 3 << 20;
>x = n / c;
>y = n / c;
> }
> 
> Here we can optimize the last division to y = 0. After your patch, we 
> likely need VRP to do that simplification.

Incorrect; the transformation can't match anything in that code
neither before or after my patch.  (Please adjust your example
to be true, I just couldn't correct it on my own to make sense.)

> There are probably more 
> complicated transformations this disables.

I'm providing an example from *real* code where the
transformation is bad (admittedly just for the div-div case).
Please provide the same in your counterargument.

> > downright bad if another user of the result of first division is
> > a modulus of the same value as the second division, forming a
> > divmod pair.  See the test-case, where for the tested
> > architectures (which all fail the test-case before the patch)
> > the div and mod are implemented using the high-part of a widened
> > multiplication and shift, emitted separately but combined as
> > late as in rtl, with the multiplicaton and shift re-used.  That
> > of course does not happen if later passes see (y / 48; y % 3).
> > While in match.pd, I noticed the corresponding mul-mul match,
> > which I believe should be guarded the same way.
> 
> Did you notice bad codegen because of the multiplication? You are only 
> adding a test for divisions. I am asking because I know such a change will 
> help some cases and hurt others...

I can take that part out for lack of evidence, but first I'll
argue that e.g. a multiplication by 5 won't be helped to be
transformed into a multiplication by 15; multiplying or dividing
by a larger constant is not by itself a simplification or
canonicalization.

> To simplify, the goal of :s is to avoid increasing the number of 
> instructions. Normally, the transformation output is smaller (or the same 
> size but cheaper, simpler, more canonical) than the input.

That may be the intent, but the second sentence is not generally
true.  Any such transformation must be carefully inspected to
have that property *on their own*; it's not true for the div-div
and mul-mul case for example.  After a quick look at uses of :s
in match.pd I'd say that's actually rare and for most codes not
true, carefully remembering we're talking about the context
where the intermediate still lives after the transformation.

> But if we can't 
> get rid of some of the input intermediate results, the size may still 
> increase. :s does test single_use, but it has a special case. If the 
> output (possibly after several rounds of resimplifications) is at most one 
> instruction, then it cannot be larger than the input (we are at least 
> getting rid of the instruction we called the simplification
> on),

(not true in the div-div or mul-mul case)

> so the 
> transformation does happen even if !single_use. Originally this was only 
> done when the simplification led to an SSA_NAME or a constant, IIRC.

Can you please provide an example?  I had a look at a couple of
the :s uses in match.pd imagining intermediate use of the :s-ed
operand and it didn't seem that they'd help where :s doesn't
mean single_use.

> Then people start wanting single_use restrictions to reduce register 
> pressure, reduce the size / number of constants, etc. And not all of those 
> want exactly the same conditions.
> 
> It is useful for high-level transformations to push the canonicalization 
> as far as possible, to notice equivalent quantities or constant bounds in 
> particular. So on a case by case basis, we use :s or single_use or 
> whatever...

Again, you're speaking as if match.pd naturally contains just
canonicalizations, but that's not true.

> If we use both y=x/3 and z=x/15 in the same function, should we make an 
> effort to detect it and rewrite to z=y/5?

I see what you did there. :)  I'd say it depends on how far
those calculations are from each other and if there's
intermediate use of y.

brgds, H-P


Re: [PATCH] Handle no_sanitize attribute values in the right way (PR sanitizer/85556).

2018-05-10 Thread Martin Liška
On 05/10/2018 11:45 AM, Jakub Jelinek wrote:
> On Thu, May 10, 2018 at 11:28:15AM +0200, Martin Liška wrote:
>> Parsing of no_sanitize attribute now supports
>> __attribute__((no_sanitize("address,undefined")))
> 
> Why is that wrong?  I don't see why we shouldn't support it that way.
> It matches how we handle other similar attributes, say target attribute.

Good, let's support both formats.

> 
>> which is wrong. And on the other hand this is not recognized:
>> __attribute__((no_sanitize("address", "undefined")))
> 
> But we can certainly add support for this too for compatibility with clang.
> 
>   Jakub
> 

Done that in updated version of the patch. I've been running bootstrap and
tests.

Ready after it finishes?

Martin
>From 9e2570eee9bb160b58075f6802d6ac1bb7b77341 Mon Sep 17 00:00:00 2001
From: marxin 
Date: Thu, 10 May 2018 10:27:02 +0200
Subject: [PATCH] Support LLVM style of no_sanitize attribute (PR
 sanitizer/85556).

gcc/ChangeLog:

2018-05-10  Martin Liska  

	* doc/extend.texi: Document LLVM style format for no_sanitize
	attribute.

gcc/c-family/ChangeLog:

2018-05-10  Martin Liska  

PR sanitizer/85556
	* c-attribs.c (handle_no_sanitize_attribute): Iterate all
	TREE_LIST values.

gcc/testsuite/ChangeLog:

2018-05-10  Martin Liska  

PR sanitizer/85556
	* c-c++-common/ubsan/attrib-6.c: New test.
---
 gcc/c-family/c-attribs.c| 20 
 gcc/doc/extend.texi |  2 ++
 gcc/testsuite/c-c++-common/ubsan/attrib-6.c | 26 ++
 3 files changed, 40 insertions(+), 8 deletions(-)
 create mode 100644 gcc/testsuite/c-c++-common/ubsan/attrib-6.c

diff --git a/gcc/c-family/c-attribs.c b/gcc/c-family/c-attribs.c
index e0630885cca..744315eec86 100644
--- a/gcc/c-family/c-attribs.c
+++ b/gcc/c-family/c-attribs.c
@@ -403,7 +403,7 @@ const struct attribute_spec c_common_attribute_table[] =
 			  0, 0, true, false, false, false,
 			  handle_no_address_safety_analysis_attribute,
 			  NULL },
-  { "no_sanitize",	  1, 1, true, false, false, false,
+  { "no_sanitize",	  1, -1, true, false, false, false,
 			  handle_no_sanitize_attribute, NULL },
   { "no_sanitize_address",0, 0, true, false, false, false,
 			  handle_no_sanitize_address_attribute, NULL },
@@ -683,22 +683,26 @@ static tree
 handle_no_sanitize_attribute (tree *node, tree name, tree args, int,
 			  bool *no_add_attrs)
 {
+  unsigned int flags = 0;
   *no_add_attrs = true;
-  tree id = TREE_VALUE (args);
   if (TREE_CODE (*node) != FUNCTION_DECL)
 {
   warning (OPT_Wattributes, "%qE attribute ignored", name);
   return NULL_TREE;
 }
 
-  if (TREE_CODE (id) != STRING_CST)
+  for (; args; args = TREE_CHAIN (args))
 {
-  error ("no_sanitize argument not a string");
-  return NULL_TREE;
-}
+  tree id = TREE_VALUE (args);
+  if (TREE_CODE (id) != STRING_CST)
+	{
+	  error ("no_sanitize argument not a string");
+	  return NULL_TREE;
+	}
 
-  char *string = ASTRDUP (TREE_STRING_POINTER (id));
-  unsigned int flags = parse_no_sanitize_attribute (string);
+  char *string = ASTRDUP (TREE_STRING_POINTER (id));
+  flags |= parse_no_sanitize_attribute (string);
+}
 
   add_no_sanitize_value (*node, flags);
 
diff --git a/gcc/doc/extend.texi b/gcc/doc/extend.texi
index 9d085844cfd..a4664cad819 100644
--- a/gcc/doc/extend.texi
+++ b/gcc/doc/extend.texi
@@ -2977,6 +2977,8 @@ mentioned in @var{sanitize_option}.  A list of values acceptable by
 @smallexample
 void __attribute__ ((no_sanitize ("alignment", "object-size")))
 f () @{ /* @r{Do something.} */; @}
+void __attribute__ ((no_sanitize ("alignment,object-size")))
+g () @{ /* @r{Do something.} */; @}
 @end smallexample
 
 @item no_sanitize_address
diff --git a/gcc/testsuite/c-c++-common/ubsan/attrib-6.c b/gcc/testsuite/c-c++-common/ubsan/attrib-6.c
new file mode 100644
index 000..2af70c8c2cf
--- /dev/null
+++ b/gcc/testsuite/c-c++-common/ubsan/attrib-6.c
@@ -0,0 +1,26 @@
+/* { dg-do compile } */
+/* { dg-options "-fsanitize=undefined" } */
+
+static void __attribute__((no_sanitize("foobar")))
+foo (void) { /* { dg-warning "attribute directive ignored" } */
+}
+
+static void __attribute__((no_sanitize("address,undefined")))
+foo2 (void) {
+}
+
+static void __attribute__((no_sanitize("address", "undefined")))
+foo3 (void) {
+}
+
+static void __attribute__((no_sanitize("address", "address", "")))
+foo4 (void) {
+}
+
+static void __attribute__((no_sanitize("address", "address", "address,address")))
+foo5 (void) {
+}
+
+static void __attribute__((no_sanitize("address", "address,kernel-address,thread,leak,undefined,vptr,shift,integer-divide-by-zero,unreachable,vla-bound,null,return,signed-integer-overflow,bounds,bounds-strict,alignment,object-size,float-divide-by-zero,float-cast-overflow,nonnull-attribute,returns-nonnull-attribute,bool,enum")))
+foo6 (void) {
+}

[Patch, fortran] PR68846 - Pointer function as LValue doesn't work when the assignment regards a dummy argument.

2018-05-10 Thread Paul Richard Thomas
Committed as obvious in revision 260113.

I will commence backporting as soon as I can.

Thanks to Mirco Valentini for the initial fix, which he posted on 2017-02-28.

Cheers

Paul

2018-05-10  Paul Thomas  

PR fortran/68846
PR fortran/70864
* resolve.c (get_temp_from_expr): The temporary must not have
dummy or intent attributes.

2018-05-10  Paul Thomas  

PR fortran/68846
* gfortran.dg/temporary_3.f90 : New test.

PR fortran/70864
* gfortran.dg/temporary_2.f90 : New test.


Re: [PATCH, v2] Recognize a missed usage of a sbfiz instruction

2018-05-10 Thread Luis Machado


On 05/09/2018 10:44 AM, Kyrill Tkachov wrote:


On 09/05/18 13:30, Luis Machado wrote:

Hi Kyrill,

On 05/08/2018 11:09 AM, Kyrill Tkachov wrote:

Hi Luis,

On 07/05/18 15:28, Luis Machado wrote:

Hi,

On 02/08/2018 10:45 AM, Luis Machado wrote:

Hi Kyrill,

On 02/08/2018 09:48 AM, Kyrill Tkachov wrote:

Hi Luis,

On 06/02/18 15:04, Luis Machado wrote:
Thanks for the feedback Kyrill. I've adjusted the v2 patch based 
on your

suggestions and re-tested the changes. Everything is still sane.


Thanks! This looks pretty good to me.

Since this is ARM-specific and fairly specific, i wonder if it 
would be

reasonable to consider it for inclusion at the current stage.


It is true that the target maintainers can choose to take
such patches at any stage. However, any patch at this stage increases
the risk of regressions being introduced and these regressions
can come bite us in ways that are very hard to anticipate.

Have a look at some of the bugs in bugzilla (or a quick scan of 
the gcc-bugs list)
for examples of the ways that things can go wrong with any of the 
myriad of GCC components

and the unexpected ways in which they can interact.

For example, I am now working on what I initially thought was a 
one-liner fix for
PR 84164 but it has expanded into a 3-patch series with a midend 
component and

target-specific changes for 2 ports.

These issues are very hard to catch during review and normal 
testing, and can sometimes take months of deep testing by
fuzzing and massive codebase rebuilds to expose, so the closer the 
commit is to a release
the higher the risk is that an obscure edge case will be unnoticed 
and unfixed in the release.


So the priority at this stage is to minimise the risk of 
destabilising the codebase,
as opposed to taking in new features and desirable performance 
improvements (like your patch!)


That is the rationale for delaying committing such changes until 
the start
of GCC 9 development. But again, this is up to the aarch64 
maintainers.
I'm sure the patch will be a perfectly fine and desirable commit 
for GCC 9.

This is just my perspective as maintainer of the arm port.


Thanks. Your explanation makes the situation pretty clear and it 
sounds very reasonable. I'll put the patch on hold until 
development is open again.


Regards,
Luis


With GCC 9 development open, i take it this patch is worth 
considering again?




Yes, I believe the latest version is at:
https://gcc.gnu.org/ml/gcc-patches/2018-02/msg00239.html ?

+(define_insn "*ashift_extv_bfiz"
+  [(set (match_operand:GPI 0 "register_operand" "=r")
+    (ashift:GPI (sign_extract:GPI (match_operand:GPI 1 
"register_operand" "r")
+  (match_operand 2 
"aarch64_simd_shift_imm_offset_" "n")
+  (match_operand 3 
"aarch64_simd_shift_imm_" "n"))

+ (match_operand 4 "aarch64_simd_shift_imm_" "n")))]
+  ""
+  "sbfiz\\t%0, %1, %4, %2"
+  [(set_attr "type" "bfx")]
+)
+


Indeed.



Can you give a bit more information about what are the values for 
operands 2,3 and 4 in your example testcases?


For sbfiz32 we have 3, 0 and 19 respectively. For sbfiz64 we have 6, 0 
and 38.


I'm trying to understand why the value of operand 3 (the bit position 
the sign-extract starts from) doesn't get validated

in any way and doesn't play any role in the output...


This may be an oversight. It seems operand 3 will always be 0 in this 
particular case i'm covering. It starts from 0, gets shifted x bits to 
the left and then y < x bits to the right). The operation is 
essentially an ashift of the bitfield followed by a sign-extension of 
the msb of the bitfield being extracted.


Having a non-zero operand 3 from RTL means the shift amount won't 
translate directly to operand 3 of sbfiz (the position). Then we'd 
need to do a calculation where we take into account operand 3 from RTL.


I'm wondering when such a RTL pattern, with a non-zero operand 3, 
would be generated though.


I think it's best to enforce that operand 3 is a zero. Maybe just match 
const_int 0 here directly.

Better safe than sorry with these things.


Indeed. I've updated the original patch with that change now.

Bootstrapped and regtested on aarch64-linux.

Thanks,
Luis
2018-05-10  Luis Machado  

	gcc/
	* config/aarch64/aarch64.md (*ashift_extv_bfiz): New pattern.

	gcc/testsuite/
	* gcc.target/aarch64/lsl_asr_sbfiz.c: New test.
---
 gcc/config/aarch64/aarch64.md| 13 +
 gcc/testsuite/gcc.target/aarch64/lsl_asr_sbfiz.c | 24 
 2 files changed, 37 insertions(+)
 create mode 100644 gcc/testsuite/gcc.target/aarch64/lsl_asr_sbfiz.c

diff --git a/gcc/config/aarch64/aarch64.md b/gcc/config/aarch64/aarch64.md
index 32a0e1f..1f943e6 100644
--- a/gcc/config/aarch64/aarch64.md
+++ b/gcc/config/aarch64/aarch64.md
@@ -4851,6 +4851,19 @@
   [(set_attr "type" "bfx")]
 )
 
+;; Match sbfiz pattern in a shift left + shift right operation.
+
+(define_insn 

Re: [PATCH][AARCH64][PR target/84882] Add mno-strict-align

2018-05-10 Thread Sudakshina Das

Ping!

On 27/03/18 13:58, Sudakshina Das wrote:

Hi

This patch adds the no variant to -mstrict-align and the corresponding
function attribute. To enable the function attribute, I have modified
aarch64_can_inline_p () to allow checks even when the callee function
has no attribute. The need for this is shown by the new test
target_attr_18.c.

Testing: Bootstrapped, regtested and added new tests that are copies
of earlier tests checking -mstrict-align with opposite scan directives.

Is this ok for trunk?

Sudi


*** gcc/ChangeLog ***

2018-03-27  Sudakshina Das  

 * common/config/aarch64/aarch64-common.c (aarch64_handle_option):
 Check val before adding MASK_STRICT_ALIGN to opts->x_target_flags.
 * config/aarch64/aarch64.opt (mstrict-align): Remove RejectNegative.
 * config/aarch64/aarch64.c (aarch64_attributes): Mark allow_neg
 as true for strict-align.
 (aarch64_can_inline_p): Perform checks even when callee has no
 attributes to check for strict alignment.
 * doc/extend.texi (AArch64 Function Attributes): Document
 no-strict-align.
 * doc/invoke.texi: (AArch64 Options): Likewise.

*** gcc/testsuite/ChangeLog ***

2018-03-27  Sudakshina Das  

 * gcc.target/aarch64/pr84882.c: New test.
 * gcc.target/aarch64/target_attr_18.c: Likewise.




[PATCH] rs6000: Remove -maltivec={be,le}

2018-05-10 Thread Segher Boessenkool
This removes the -maltivec=be and -maltivec=le options.  Those were
deprecated in GCC 8.

Altivec will keep working on both BE and LE; it is just the BE-vectors-
on-LE that is removed (the other way around was never supported).

The main change is replacing VECTOR_ELT_ORDER_BIG by BYTES_BIG_ENDIAN
(and then simplifying).

Tested on powerpc64-linux {-m32,-m64} (power7), and on powerpc64le-linux
(power9).  Committing to trunk.


Segher


2018-05-10  Segher Boessenkool  

* config/rs6000/altivec.md (altivec_vmrghb, altivec_vmrghh,
altivec_vmrghw, altivec_vmrglb, altivec_vmrglh, altivec_vmrglw): Remove
-maltivec=be support.
(vec_widen_umult_even_v16qi, vec_widen_smult_even_v16qi,
vec_widen_umult_even_v8hi, vec_widen_smult_even_v8hi,
vec_widen_umult_even_v4si, vec_widen_smult_even_v4si,
vec_widen_umult_odd_v16qi, vec_widen_smult_odd_v16qi,
vec_widen_umult_odd_v8hi, vec_widen_smult_odd_v8hi,
vec_widen_umult_odd_v4si, vec_widen_smult_odd_v4si, altivec_vpkpx,
altivec_vpksss, altivec_vpksus,
altivec_vpkuus, altivec_vpkuum, altivec_vsum2sws,
altivec_vsumsws): Adjust.
(altivec_vspltb *altivec_vspltb_internal, altivec_vsplth,
*altivec_vsplth_internal, altivec_vspltw, *altivec_vspltw_internal,
altivec_vspltsf, *altivec_vspltsf_internal): Remove -maltivec=be
support.
(altivec_vperm_, altivec_vperm__uns,
altivec_vupkhs, altivec_vupkls, altivec_vupkhpx,
altivec_vupklpx, altivec_lvsl, altivec_lvsr): Adjust.
(altivec_lvex): Delete expand.
(*altivec_lvex_internal): Rename to...
(altivec_lvex): ... this.
(altivec_lvxl_): Delete expand.
(*altivec_lvxl__internal): Rename to ...
(altivec_lvxl_): ... this.
(altivec_stvxl_): Delete expand.
(*altivec_stvxl__internal): Rename to ...
(altivec_stvxl_): ... this.
(altivec_stvex): Delete expand.
(*altivec_stvex_internal): Rename to ...
(altivec_stvex): ... this.
(doublee2, unsdoubleev4si2, doubleo2, unsdoubleov4si2,
doubleh2, unsdoublehv4si2, doublel2, unsdoublelv4si2,
reduc_plus_scal_): Adjust.
* config/rs6000/rs6000-c.c (rs6000_target_modify_macros): Adjust
comment.
(rs6000_cpu_cpp_builtins): Adjust.
(altivec_resolve_overloaded_builtin): Remove -maltivec=be support.
* config/rs6000/rs6000-protos.h (altivec_expand_lvx_be,
altivec_expand_stvx_be, altivec_expand_stvex_be): Delete.
* config/rs6000/rs6000.c (rs6000_option_override_internal): Remove
-maltivec=be support.
(rs6000_split_vec_extract_var): Adjust.
(rs6000_split_v4si_init): Adjust.
(swap_selector_for_mode): Delete.
(altivec_expand_lvx_be, altivec_expand_stvx_be,
altivec_expand_stvex_be): Delete.
(altivec_expand_lv_builtin, altivec_expand_stv_builtin): Remove
-maltivec=be support.
(rs6000_gimple_fold_builtin): Ditto.
(rs6000_generate_float2_double_code, rs6000_generate_float2_code):
Adjust.
* config/rs6000/rs6000.h (VECTOR_ELT_ORDER_BIG): Delete.
(TARGET_DIRECT_MOVE_64BIT): Adjust.
* config/rs6000/rs6000.md (split for extendsidi2 for vectors): Adjust.
* config/rs6000/rs6000.opt (maltivec=le, maltivec=be): Delete.
* config/rs6000/vsx.md (floate, unsfloatev2di, floato,
unsfloatov2di, vsignedo_v2df, vsignede_v2df, vunsignedo_v2df,
vunsignede_v2df, vsx_extract__p9, *vsx_extract_si,
*vsx_extract__p8, *vsx_extract_si_float_df,
*vsx_extract_si_float_, vsx_set__p9, vsx_set_v4sf_p9,
*vsx_insert_extract_v4sf_p9, *vsx_insert_extract_v4sf_p9_2, and an
anonymous split): Adjust.
(vsx_mergel_, vsx_mergeh_): Remove -maltivec=be support.
(vsx_xxspltd_, extract4b, insert4b): Adjust.

gcc/testsuite/
* gcc.dg/vmx/extract-be-order.c: Delete testcase.
* gcc.dg/vmx/extract-vsx-be-order.c: Delete testcase.
* gcc.dg/vmx/insert-be-order.c: Delete testcase.
* gcc.dg/vmx/insert-vsx-be-order.c: Delete testcase.
* gcc.dg/vmx/ld-be-order.c: Delete testcase.
* gcc.dg/vmx/ld-vsx-be-order.c: Delete testcase.
* gcc.dg/vmx/lde-be-order.c: Delete testcase.
* gcc.dg/vmx/ldl-be-order.c: Delete testcase.
* gcc.dg/vmx/ldl-vsx-be-order.c: Delete testcase.
* gcc.dg/vmx/merge-be-order.c: Delete testcase.
* gcc.dg/vmx/merge-vsx-be-order.c: Delete testcase.
* gcc.dg/vmx/mult-even-odd-be-order.c: Delete testcase.
* gcc.dg/vmx/pack-be-order.c: Delete testcase.
* gcc.dg/vmx/perm-be-order.c: Delete testcase.
* gcc.dg/vmx/splat-be-order.c: Delete testcase.
* gcc.dg/vmx/splat-vsx-be-order.c: Delete testcase.
* gcc.dg/vmx/st-be-order.c: Delete testcase.
* gcc.dg/vmx/st-vsx-be-order.c: Delete testcase.
 

[ping] Use response files from the driver in more cases

2018-05-10 Thread Eric Botcazou
https://gcc.gnu.org/ml/gcc-patches/2018-04/msg01172.html

Thanks in advance.

-- 
Eric Botcazou


Re: [PATCH] PR fortran/85521 -- Zero length substrings in array aconstructors

2018-05-10 Thread Dominique d'Humières
Hi Steve,

AFAICT the patch is missing.

Thanks for working on these PRs.

Dominique



Re: [PATCH 1/2] extend.texi: update Global Register Variables section

2018-05-10 Thread Alexander Monakov


On Mon, 23 Apr 2018, Alexander Monakov wrote:

> This rewrites global register vars doc to reflect that the register is no 
> longer
> reserved exclusively, but in fact is available for general allocation, and 
> also
> adds the requirement to properly inform the compiler where inline asms are
> accessing the variable.
> 
> This:
> -@item The register is not saved and restored by any functions.
> is reworded to verbosely spell out gotchas related to calls/returns.
> 
>   * extend.texi (Global Register Variables): Rewrite the bullet list.
>   Note that the register is available for allocation. Note that access
>   via inline asm must use constraints. Add note about async-signal
>   handlers. Remove paragraph about automagic register selection.
> ---
>  gcc/doc/extend.texi | 29 +++--
>  1 file changed, 19 insertions(+), 10 deletions(-)

Ping? I believe the substance of new text has been hashed out with Michael.
I'd appreciate a review for language and style issues.

Thanks.
Alexander

> diff --git a/gcc/doc/extend.texi b/gcc/doc/extend.texi
> index 5571d05d93b..f663741e36c 100644
> --- a/gcc/doc/extend.texi
> +++ b/gcc/doc/extend.texi
> @@ -9549,11 +9549,21 @@ After defining a global register variable, for the 
> current compilation
>  unit:
>  
>  @itemize @bullet
> -@item The register is reserved entirely for this use, and will not be 
> -allocated for any other purpose.
> -@item The register is not saved and restored by any functions.
> -@item Stores into this register are never deleted even if they appear to be 
> -dead, but references may be deleted, moved or simplified.
> +@item If the register is a call-saved register, call ABI is affected:
> +the register will not be restored in function epilogue sequences after
> +the variable has been assigned.  Therefore, functions cannot safely
> +return to callers that assume standard ABI.
> +@item Conversely, if the register is a call-clobbered register, making
> +calls to functions that use standard ABI may lose contents of the variable.
> +Such calls may be created by the compiler even if none are evident in
> +the original program, for example when libgcc functions are used to
> +make up for unavailable instructions.
> +@item Accesses to the variable may be optimized as usual and the register
> +remains available for allocation and use in any computations, provided that
> +observable values of the variable are not affected.
> +@item If the variable is referenced in inline assembly, the type of access
> +must be provided to the compiler via constraints (@pxref{Constraints}).
> +Accesses from basic asms are not supported.
>  @end itemize
>  
>  Note that these points @emph{only} apply to code that is compiled with the
> @@ -9595,7 +9605,10 @@ the comparison function unless the @code{qsort} 
> function itself is rebuilt.
>  Similarly, it is not safe to access the global register variables from signal
>  handlers or from more than one thread of control. Unless you recompile 
>  them specially for the task at hand, the system library routines may 
> -temporarily use the register for other things.
> +temporarily use the register for other things.  Furthermore, since the 
> register
> +is not reserved exclusively for the variable, accessing it from handlers of
> +asynchronous signals may observe unrelated temporary values residing in the
> +register.
>  
>  @cindex register variable after @code{longjmp}
>  @cindex global register after @code{longjmp}
> @@ -9610,10 +9623,6 @@ should make other arrangements to save the values of 
> the global register
>  variables, and to restore them in a @code{longjmp}. This way, the same
>  thing happens regardless of what @code{longjmp} does.
>  
> -Eventually there may be a way of asking the compiler to choose a register 
> -automatically, but first we need to figure out how it should choose and 
> -how to enable you to guide the choice.  No solution is evident.
> -
>  @node Local Register Variables
>  @subsubsection Specifying Registers for Local Variables
>  @anchor{Local Reg Vars}
> 


Re: [PATCH] Handle no_sanitize attribute values in the right way (PR sanitizer/85556).

2018-05-10 Thread Jakub Jelinek
On Thu, May 10, 2018 at 11:28:15AM +0200, Martin Liška wrote:
> Parsing of no_sanitize attribute now supports
> __attribute__((no_sanitize("address,undefined")))

Why is that wrong?  I don't see why we shouldn't support it that way.
It matches how we handle other similar attributes, say target attribute.

> which is wrong. And on the other hand this is not recognized:
> __attribute__((no_sanitize("address", "undefined")))

But we can certainly add support for this too for compatibility with clang.

Jakub


Fix bad interaction between sysroot and C++ include dir

2018-05-10 Thread Eric Botcazou
This fixes an annyoing regression introduced in 2012 by this change:
  https://codereview.appspot.com/5394041

The idea of the change is to interpolate the value passed to --with-sysroot in 
the value passed to --with-gxx-include-dir, so that you can change the sysroot 
at run time, i.e. pass --sysroot to the compiler, and have the gxx-include-dir 
automatically translated relatively to the new sysroot.

The original submission did just that:
  https://gcc.gnu.org/ml/gcc-patches/2011-11/msg01751.html
but it was probably done against a branch so the version eventually installed 
does the interpolation unconditionally, i.e. even when gxx-include-dir is the 
default C++ include path in the install tree.

The result is that, if you configure --with-sysroot with a value that happens 
to be identical to the start of the default C++ path in the install tree, the 
mechanism triggers and the C++ include path is completely broken if you pass 
--sysroot to the compiler.  IOW the same issue as the one meant to be fixed, 
but this time when --with-gxx-include-dir is not passed.

So the attached patch restores the original implementation by doing the 
interpolation only when both --with-sysroot and --with-gxx-include-dir are 
specified on the configure line.

Tested on x86-64/Linux, applied on the mainline as obvious.


2018-05-10  Eric Botcazou  

* configure.ac (gcc_gxx_include_dir_add_sysroot): Set it to 1 only
when --with-gxx-include-dir is also specified.
* configure: Regenerate.

-- 
Eric BotcazouIndex: configure.ac
===
--- configure.ac	(revision 260071)
+++ configure.ac	(working copy)
@@ -205,6 +205,11 @@ no)	;;
 *)	gcc_gxx_include_dir=$with_gxx_include_dir ;;
 esac])
 
+# If both --with-sysroot and --with-gxx-include-dir are passed, we interpolate
+# the former in the latter and, upon success, compute gcc_gxx_include_dir as
+# relative to the sysroot.
+gcc_gxx_include_dir_add_sysroot=0
+
 # This logic must match libstdc++-v3/acinclude.m4:GLIBCXX_EXPORT_INSTALL_INFO.
 if test x${gcc_gxx_include_dir} = x; then
   if test x${enable_version_specific_runtime_libs} = xyes; then
@@ -216,15 +221,10 @@ if test x${gcc_gxx_include_dir} = x; the
 fi
 gcc_gxx_include_dir="\$(libsubdir)/\$(libsubdir_to_prefix)$libstdcxx_incdir"
   fi
-fi
-
-gcc_gxx_include_dir_add_sysroot=0
-if test "${with_sysroot+set}" = set; then
+elif test "${with_sysroot+set}" = set; then
   gcc_gxx_without_sysroot=`expr "${gcc_gxx_include_dir}" : "${with_sysroot}"'\(.*\)'`
   if test "${gcc_gxx_without_sysroot}"; then
-if test x${with_sysroot} != x/; then
-  gcc_gxx_include_dir="${gcc_gxx_without_sysroot}"
-fi
+gcc_gxx_include_dir="${gcc_gxx_without_sysroot}"
 gcc_gxx_include_dir_add_sysroot=1
   fi
 fi


[PATCH] Handle no_sanitize attribute values in the right way (PR sanitizer/85556).

2018-05-10 Thread Martin Liška
Hi.

Parsing of no_sanitize attribute now supports
__attribute__((no_sanitize("address,undefined")))
which is wrong. And on the other hand this is not recognized:
__attribute__((no_sanitize("address", "undefined")))

Patch can bootstrap on x86_64-linux-gnu and survives regression tests. Then I 
would like
to backport that to GCC 8 branch.

Ready to be installed?
Martin

gcc/ChangeLog:

2018-05-10  Martin Liska  

PR sanitizer/85556
* opts.c (parse_no_sanitize_attribute): Handle only a sinle
option value.

gcc/c-family/ChangeLog:

2018-05-10  Martin Liska  

PR sanitizer/85556
* c-attribs.c (handle_no_sanitize_attribute): Iterate all
TREE_LIST values.

gcc/testsuite/ChangeLog:

2018-05-10  Martin Liska  

PR sanitizer/85556
* c-c++-common/ubsan/attrib-6.c: New test.
---
 gcc/c-family/c-attribs.c| 20 +++
 gcc/opts.c  | 30 +++--
 gcc/testsuite/c-c++-common/ubsan/attrib-6.c | 22 +
 3 files changed, 45 insertions(+), 27 deletions(-)
 create mode 100644 gcc/testsuite/c-c++-common/ubsan/attrib-6.c


diff --git a/gcc/c-family/c-attribs.c b/gcc/c-family/c-attribs.c
index e0630885cca..744315eec86 100644
--- a/gcc/c-family/c-attribs.c
+++ b/gcc/c-family/c-attribs.c
@@ -403,7 +403,7 @@ const struct attribute_spec c_common_attribute_table[] =
 			  0, 0, true, false, false, false,
 			  handle_no_address_safety_analysis_attribute,
 			  NULL },
-  { "no_sanitize",	  1, 1, true, false, false, false,
+  { "no_sanitize",	  1, -1, true, false, false, false,
 			  handle_no_sanitize_attribute, NULL },
   { "no_sanitize_address",0, 0, true, false, false, false,
 			  handle_no_sanitize_address_attribute, NULL },
@@ -683,22 +683,26 @@ static tree
 handle_no_sanitize_attribute (tree *node, tree name, tree args, int,
 			  bool *no_add_attrs)
 {
+  unsigned int flags = 0;
   *no_add_attrs = true;
-  tree id = TREE_VALUE (args);
   if (TREE_CODE (*node) != FUNCTION_DECL)
 {
   warning (OPT_Wattributes, "%qE attribute ignored", name);
   return NULL_TREE;
 }
 
-  if (TREE_CODE (id) != STRING_CST)
+  for (; args; args = TREE_CHAIN (args))
 {
-  error ("no_sanitize argument not a string");
-  return NULL_TREE;
-}
+  tree id = TREE_VALUE (args);
+  if (TREE_CODE (id) != STRING_CST)
+	{
+	  error ("no_sanitize argument not a string");
+	  return NULL_TREE;
+	}
 
-  char *string = ASTRDUP (TREE_STRING_POINTER (id));
-  unsigned int flags = parse_no_sanitize_attribute (string);
+  char *string = ASTRDUP (TREE_STRING_POINTER (id));
+  flags |= parse_no_sanitize_attribute (string);
+}
 
   add_no_sanitize_value (*node, flags);
 
diff --git a/gcc/opts.c b/gcc/opts.c
index 33efcc0d6e7..f999dccd009 100644
--- a/gcc/opts.c
+++ b/gcc/opts.c
@@ -1748,33 +1748,25 @@ parse_sanitizer_options (const char *p, location_t loc, int scode,
   return flags;
 }
 
-/* Parse string values of no_sanitize attribute passed in VALUE.
-   Values are separated with comma.  */
+/* Parse string value of no_sanitize attribute passed in VALUE.  */
 
 unsigned int
 parse_no_sanitize_attribute (char *value)
 {
   unsigned int flags = 0;
   unsigned int i;
-  char *q = strtok (value, ",");
 
-  while (q != NULL)
-{
-  for (i = 0; sanitizer_opts[i].name != NULL; ++i)
-	if (strcmp (sanitizer_opts[i].name, q) == 0)
-	  {
-	flags |= sanitizer_opts[i].flag;
-	if (sanitizer_opts[i].flag == SANITIZE_UNDEFINED)
-	  flags |= SANITIZE_UNDEFINED_NONDEFAULT;
-	break;
-	  }
-
-  if (sanitizer_opts[i].name == NULL)
-	warning (OPT_Wattributes,
-		 "%<%s%> attribute directive ignored", q);
+  for (i = 0; sanitizer_opts[i].name != NULL; ++i)
+if (strcmp (sanitizer_opts[i].name, value) == 0)
+  {
+	flags |= sanitizer_opts[i].flag;
+	if (sanitizer_opts[i].flag == SANITIZE_UNDEFINED)
+	  flags |= SANITIZE_UNDEFINED_NONDEFAULT;
+	break;
+  }
 
-  q = strtok (NULL, ",");
-}
+  if (sanitizer_opts[i].name == NULL)
+warning (OPT_Wattributes, "%<%s%> attribute directive ignored", value);
 
   return flags;
 }
diff --git a/gcc/testsuite/c-c++-common/ubsan/attrib-6.c b/gcc/testsuite/c-c++-common/ubsan/attrib-6.c
new file mode 100644
index 000..ac08dc219ec
--- /dev/null
+++ b/gcc/testsuite/c-c++-common/ubsan/attrib-6.c
@@ -0,0 +1,22 @@
+/* { dg-do compile } */
+/* { dg-options "-fsanitize=undefined" } */
+
+static void __attribute__((no_sanitize("foobar")))
+foo (void) { /* { dg-warning "attribute directive ignored" } */
+}
+
+static void __attribute__((no_sanitize("address,undefined")))
+foo2 (void) { /* { dg-warning ".address,undefined. attribute directive ignored" } */
+}
+
+static void __attribute__((no_sanitize("address", "undefined")))
+foo3 (void) {
+}
+
+static void __attribute__((no_sanitize("address", "address", "")))

Re: Fix PR85726 (div-div suboptimization) and a rant on match.pd :s-flag

2018-05-10 Thread Marc Glisse

(not a review)

On Thu, 10 May 2018, Hans-Peter Nilsson wrote:


Replacing a division feeding a division helps only when the
second division is the only user, and "fusing" the divisions is


Well, that's not quite true.
int x, y;
void f(int n){
  int c = 3 << 20;
  x = n / c;
  y = n / c;
}

Here we can optimize the last division to y = 0. After your patch, we 
likely need VRP to do that simplification. There are probably more 
complicated transformations this disables.



downright bad if another user of the result of first division is
a modulus of the same value as the second division, forming a
divmod pair.  See the test-case, where for the tested
architectures (which all fail the test-case before the patch)
the div and mod are implemented using the high-part of a widened
multiplication and shift, emitted separately but combined as
late as in rtl, with the multiplicaton and shift re-used.  That
of course does not happen if later passes see (y / 48; y % 3).
While in match.pd, I noticed the corresponding mul-mul match,
which I believe should be guarded the same way.


Did you notice bad codegen because of the multiplication? You are only 
adding a test for divisions. I am asking because I know such a change will 
help some cases and hurt others...



Now a rant on the match.pd ":s" flag, which reasonable people
may reasonably suggest I should have used in the patch instead
of the (if (single_use ...)).

Initially, I got the match.pd-language (which deserves a proper
name) all wrong.  Then I read the documentation and still got it
wrong.  I "misunderstood" that the ":s" on an operand "O" was
supposed to have the effect of conditionalize the replacement
"R" by wrapping it in "(if (single_use (O)) R)" as in the
suggested patch (above).  To wit, this does not work; it will
*not* stop the replacement as seen in the test-case (THIS IS NOT
A SUGGESTED PATCH):

(for div (trunc_div exact_div)
 (simplify
-  (div (div @0 INTEGER_CST@1) INTEGER_CST@2)
+  (div (div:s @0 INTEGER_CST@1) INTEGER_CST@2)
  (with {
bool overflow_p;
wide_int mul = wi::mul (wi::to_wide (@1), wi::to_wide (@2),

In PR69556, it seems other people seem to have read the
documentation of ":s" the same way, but are corrected by other
comments there, so I guess it's not my reading that's flawed.

I suggest preferably (1) correcting the semantics of ":s" to do
as the documentation says because I don't understand the
explanation in PR69556 comment #4 that the replacement "is still
allowed if it is a single operation as that replaces at least
one other (the one we are simplifying)"; I see that as a
complete nullification of the :s flag, making it a nop.
Alternatively (2), if the :s is *not* a nop in some case add an
example of that case and sufficient explanation to
match-and-simplify.texi *and* the suggested ":S" flag
(i.e. *really* conditionalize the replacement by a single-use of
that operand).


To simplify, the goal of :s is to avoid increasing the number of 
instructions. Normally, the transformation output is smaller (or the same 
size but cheaper, simpler, more canonical) than the input. But if we can't 
get rid of some of the input intermediate results, the size may still 
increase. :s does test single_use, but it has a special case. If the 
output (possibly after several rounds of resimplifications) is at most one 
instruction, then it cannot be larger than the input (we are at least 
getting rid of the instruction we called the simplification on), so the 
transformation does happen even if !single_use. Originally this was only 
done when the simplification led to an SSA_NAME or a constant, IIRC.


Then people start wanting single_use restrictions to reduce register 
pressure, reduce the size / number of constants, etc. And not all of those 
want exactly the same conditions.


It is useful for high-level transformations to push the canonicalization 
as far as possible, to notice equivalent quantities or constant bounds in 
particular. So on a case by case basis, we use :s or single_use or 
whatever...


If we use both y=x/3 and z=x/15 in the same function, should we make an 
effort to detect it and rewrite to z=y/5?


--
Marc Glisse


[PATCH] Do not ICE for incomplete types in ICF (PR ipa/85607).

2018-05-10 Thread Martin Liška
Hi.

It's removal of an assert at place where we calculate hash of a type.
For incomplete types, let's skip it.

Patch can bootstrap on x86_64-linux-gnu and survives regression tests.

Ready to be installed?
Martin


gcc/ChangeLog:

2018-05-09  Martin Liska  

PR ipa/85607
* ipa-icf.c (sem_item::add_type): Do not ICE for incomplete types.

gcc/testsuite/ChangeLog:

2018-05-09  Martin Liska  

PR ipa/85607
* g++.dg/ipa/pr85606.C: New test.
---
 gcc/ipa-icf.c  |  5 -
 gcc/testsuite/g++.dg/ipa/pr85606.C | 14 ++
 2 files changed, 18 insertions(+), 1 deletion(-)
 create mode 100644 gcc/testsuite/g++.dg/ipa/pr85606.C


diff --git a/gcc/ipa-icf.c b/gcc/ipa-icf.c
index f974d9f769f..7ecd0380fb7 100644
--- a/gcc/ipa-icf.c
+++ b/gcc/ipa-icf.c
@@ -1580,7 +1580,10 @@ sem_item::add_type (const_tree type, inchash::hash )
 }
   else if (RECORD_OR_UNION_TYPE_P (type))
 {
-  gcc_checking_assert (COMPLETE_TYPE_P (type));
+  /* Incomplete types must be skipped here.  */
+  if (!COMPLETE_TYPE_P (type))
+	return;
+
   hashval_t *val = optimizer->m_type_hash_cache.get (type);
 
   if (!val)
diff --git a/gcc/testsuite/g++.dg/ipa/pr85606.C b/gcc/testsuite/g++.dg/ipa/pr85606.C
new file mode 100644
index 000..b47aba2167d
--- /dev/null
+++ b/gcc/testsuite/g++.dg/ipa/pr85606.C
@@ -0,0 +1,14 @@
+// { dg-do compile }
+/* { dg-options "-O2" } */
+
+class A;	// { dg-message "forward declaration of 'class A'" }
+
+A *a;		// { dg-warning "'a' has incomplete type" }
+
+int
+main (int argc, char **argv)
+{
+  delete a;	// { dg-warning "delete" "warn" }
+  // { dg-message "note" "note" { target *-*-* } .-1 }
+  return 0;
+}



[PATCH] Use two source permute for vector initialization (PR 85692, take 2)

2018-05-10 Thread Jakub Jelinek
On Wed, May 09, 2018 at 04:53:19PM +0200, Allan Sandfeld Jensen wrote:
> > > @@ -2022,8 +2022,9 @@ simplify_vector_constructor (gimple_stmt_iterator
> > > *gsi)> 
> > >elem_type = TREE_TYPE (type);
> > >elem_size = TREE_INT_CST_LOW (TYPE_SIZE (elem_type));
> > > 
> > > -  vec_perm_builder sel (nelts, nelts, 1);
> > > -  orig = NULL;
> > > +  vec_perm_builder sel (nelts, 2, nelts);
> > 
> > Why this change?  I admit the vec_parm_builder arguments are confusing, but
> > I think the second times third is the number of how many indices are being
> > pushed into the vector, so I think (nelts, nelts, 1) is right.
> > 
> I had the impression it was what was selected from. In any case, I changed it 
> because without I get crash when vec_perm_indices is created later with a 
> possible nparms of 2.

The documentation is apparently in vector-builder.h:
   This class is a wrapper around auto_vec for building vectors of T.
   It aims to encode each vector as npatterns interleaved patterns,
   where each pattern represents a sequence:

 { BASE0, BASE1, BASE1 + STEP, BASE1 + STEP*2, BASE1 + STEP*3, ... }

   The first three elements in each pattern provide enough information
   to derive the other elements.  If all patterns have a STEP of zero,
   we only need to encode the first two elements in each pattern.
   If BASE1 is also equal to BASE0 for all patterns, we only need to
   encode the first element in each pattern.  The number of encoded
   elements per pattern is given by nelts_per_pattern.

   The class can be used in two ways:

   1. It can be used to build a full image of the vector, which is then
  canonicalized by finalize ().  In this case npatterns is initially
  the number of elements in the vector and nelts_per_pattern is
  initially 1.

   2. It can be used to build a vector that already has a known encoding.
  This is preferred since it is more efficient and copes with
  variable-length vectors.  finalize () then canonicalizes the encoding
  to a simpler form if possible.

As the vector is constant width and we are building the full image of the
vector, the right arguments are (nelts, nelts, 1) as per 1. above, and the
finalization can perhaps change it to something more compact.

> > (and sorry for missing your patch first, the PR wasn't ASSIGNED and there
> > was no link to gcc-patches for it).
> > 
> It is okay. You are welcome to take it over. I am not a regular gcc 
> contributor and thus not well-versed in the details, only the basic logic of 
> how things work.

Ok, here is my version of the patch.  Bootstrapped/regtested on x86_64-linux
and i686-linux, ok for trunk?

2018-05-10  Allan Sandfeld Jensen  
Jakub Jelinek  

PR tree-optimization/85692
* tree-ssa-forwprop.c (simplify_vector_constructor): Try two
source permute as well.

* gcc.target/i386/pr85692.c: New test.

--- gcc/tree-ssa-forwprop.c.jj  2018-05-08 18:16:36.866614130 +0200
+++ gcc/tree-ssa-forwprop.c 2018-05-09 20:44:32.621900540 +0200
@@ -2004,7 +2004,7 @@ simplify_vector_constructor (gimple_stmt
 {
   gimple *stmt = gsi_stmt (*gsi);
   gimple *def_stmt;
-  tree op, op2, orig, type, elem_type;
+  tree op, op2, orig[2], type, elem_type;
   unsigned elem_size, i;
   unsigned HOST_WIDE_INT nelts;
   enum tree_code code, conv_code;
@@ -2023,7 +2023,8 @@ simplify_vector_constructor (gimple_stmt
   elem_size = TREE_INT_CST_LOW (TYPE_SIZE (elem_type));
 
   vec_perm_builder sel (nelts, nelts, 1);
-  orig = NULL;
+  orig[0] = NULL;
+  orig[1] = NULL;
   conv_code = ERROR_MARK;
   maybe_ident = true;
   FOR_EACH_VEC_SAFE_ELT (CONSTRUCTOR_ELTS (op), i, elt)
@@ -2063,25 +2064,35 @@ simplify_vector_constructor (gimple_stmt
return false;
   op1 = gimple_assign_rhs1 (def_stmt);
   ref = TREE_OPERAND (op1, 0);
-  if (orig)
+  unsigned int j;
+  for (j = 0; j < 2; ++j)
{
- if (ref != orig)
-   return false;
-   }
-  else
-   {
- if (TREE_CODE (ref) != SSA_NAME)
-   return false;
- if (! VECTOR_TYPE_P (TREE_TYPE (ref))
- || ! useless_type_conversion_p (TREE_TYPE (op1),
- TREE_TYPE (TREE_TYPE (ref
-   return false;
- orig = ref;
+ if (!orig[j])
+   {
+ if (TREE_CODE (ref) != SSA_NAME)
+   return false;
+ if (! VECTOR_TYPE_P (TREE_TYPE (ref))
+ || ! useless_type_conversion_p (TREE_TYPE (op1),
+ TREE_TYPE (TREE_TYPE (ref
+   return false;
+ if (j && !useless_type_conversion_p (TREE_TYPE (orig[0]),
+  TREE_TYPE (ref)))
+   return false;
+ orig[j] = ref;
+ break;
+   }
+ else if (ref == orig[j])
+   break;
}
+  

Re: [PATCH] Workaround glibc <= 2.23 nextafterl/nexttowardl bug (PR tree-optimization/85699)

2018-05-10 Thread Jakub Jelinek
On Thu, May 10, 2018 at 07:53:15AM +0200, Richard Biener wrote:
> On May 9, 2018 10:52:05 PM GMT+02:00, Jakub Jelinek  wrote:
> >Hi!
> >
> >glibc <= 2.23 has buggy nextafterl/nexttowardl as can be seen on the
> >nextafter-2.c testcase.
> >
> >Do we want to workaround this bug, e.g. with the following patch?
> 
> Works for me. Was the reason to test the target libc to test the compare 
> against arithmetic? 

Yes, to verify that the GCC implementation matches the libc one.

Jakub


Re: Handle vector boolean types when calculating the SLP unroll factor

2018-05-10 Thread Richard Sandiford
Richard Biener  writes:
> On Wed, May 9, 2018 at 1:29 PM, Richard Sandiford
>  wrote:
>> Richard Biener  writes:
>>> On Wed, May 9, 2018 at 12:34 PM, Richard Sandiford
>>>  wrote:
 The SLP unrolling factor is calculated by finding the smallest
 scalar type for each SLP statement and taking the number of required
 lanes from the vector versions of those scalar types.  E.g. for an
 int32->int64 conversion, it's the vector of int32s rather than the
 vector of int64s that determines the unroll factor.

 We rely on tree-vect-patterns.c to replace boolean operations like:

bool a, b, c;
a = b & c;

 with integer operations of whatever the best size is in context.
 E.g. if b and c are fed by comparisons of ints, a, b and c will become
 the appropriate size for an int comparison.  For most targets this means
 that a, b and c will end up as int-sized themselves, but on targets like
 SVE and AVX512 with packed vector booleans, they'll instead become a
 small bitfield like :1, padded to a byte for memory purposes.
 The SLP code would then take these scalar types and try to calculate
 the vector type for them, causing the unroll factor to be much higher
 than necessary.

 This patch makes SLP use the cached vector boolean type if that's
 appropriate.  Tested on aarch64-linux-gnu (with and without SVE),
 aarch64_be-none-elf and x86_64-linux-gnu.  OK to install?

 Richard


 2018-05-09  Richard Sandiford  

 gcc/
 * tree-vect-slp.c (get_vectype_for_smallest_scalar_type): New 
 function.
 (vect_build_slp_tree_1): Use it when calculating the unroll factor.

 gcc/testsuite/
 * gcc.target/aarch64/sve/vcond_10.c: New test.
 * gcc.target/aarch64/sve/vcond_10_run.c: Likewise.
 * gcc.target/aarch64/sve/vcond_11.c: Likewise.
 * gcc.target/aarch64/sve/vcond_11_run.c: Likewise.

 Index: gcc/tree-vect-slp.c
 ===
 --- gcc/tree-vect-slp.c 2018-05-08 09:42:03.526648115 +0100
 +++ gcc/tree-vect-slp.c 2018-05-09 11:30:41.061096063 +0100
 @@ -608,6 +608,41 @@ vect_record_max_nunits (vec_info *vinfo,
return true;
  }

 +/* Return the vector type associated with the smallest scalar type in 
 STMT.  */
 +
 +static tree
 +get_vectype_for_smallest_scalar_type (gimple *stmt)
 +{
 +  stmt_vec_info stmt_info = vinfo_for_stmt (stmt);
 +  tree vectype = STMT_VINFO_VECTYPE (stmt_info);
 +  if (vectype != NULL_TREE
 +  && VECTOR_BOOLEAN_TYPE_P (vectype))
>>>
>>> Hum.  At this point you can't really rely on vector types being set...
>>
>> Not for everything, but here we only care about the result of the
>> pattern replacements, and pattern replacements do set the vector type
>> up-front.  vect_determine_vectorization_factor (which runs earlier
>> for loop vectorisation) also relies on this.
>>
 +{
 +  /* The result of a vector boolean operation has the smallest scalar
 +type unless the statement is extending an even narrower boolean.  
 */
 +  if (!gimple_assign_cast_p (stmt))
 +   return vectype;
 +
 +  tree src = gimple_assign_rhs1 (stmt);
 +  gimple *def_stmt;
 +  enum vect_def_type dt;
 +  tree src_vectype = NULL_TREE;
 +  if (vect_is_simple_use (src, stmt_info->vinfo, _stmt, ,
 + _vectype)
 + && src_vectype
 + && VECTOR_BOOLEAN_TYPE_P (src_vectype))
 +   {
 + if (TYPE_PRECISION (TREE_TYPE (src_vectype))
 + < TYPE_PRECISION (TREE_TYPE (vectype)))
 +   return src_vectype;
 + return vectype;
 +   }
 +}
 +  HOST_WIDE_INT dummy;
 +  tree scalar_type = vect_get_smallest_scalar_type (stmt, , );
 +  return get_vectype_for_scalar_type (scalar_type);
 +}
 +
  /* Verify if the scalar stmts STMTS are isomorphic, require data
 permutation or are of unsupported types of operation.  Return
 true if they are, otherwise return false and indicate in *MATCHES
 @@ -636,12 +671,11 @@ vect_build_slp_tree_1 (vec_info *vinfo,
enum tree_code first_cond_code = ERROR_MARK;
tree lhs;
bool need_same_oprnds = false;
 -  tree vectype = NULL_TREE, scalar_type, first_op1 = NULL_TREE;
 +  tree vectype = NULL_TREE, first_op1 = NULL_TREE;
optab optab;
int icode;
machine_mode optab_op2_mode;
machine_mode vec_mode;
 -  HOST_WIDE_INT dummy;
gimple *first_load = NULL, *prev_first_load = NULL;

/* For every stmt in NODE find its def