Re: [PATCH 1/3][GCC] Add new target hook asm_post_cfi_startproc

2018-11-04 Thread Richard Biener
On Fri, 2 Nov 2018, Sam Tebbs wrote:

> On 11/02/2018 05:28 PM, Sam Tebbs wrote:
> 
> > Hi all,
> >
> > This patch adds a new target hook called "asm_post_cfi_startproc". This 
> > hook is
> > intended to be used by the aarch64 backend to emit a directive that enables
> > support for unwinding frames signed with the pointer authentication B-key. 
> > This
> > hook is triggered after the ".cfi_startproc" directive is emitted in
> > gcc/dwarf2out.c.
> >
> > Bootstrapped on aarch64-none-linux-gnu and tested on aarch64-none-elf with 
> > no regressions.
> >
> > Ok for trunk?

Can you explain why existing prologue/cfi emission points are not
enough?

> > gcc/
> > 2018-11-02  Sam Tebbs
> >
> > * doc/tm.texi (TARGET_ASM_POST_CFI_STARTPROC): Define.
> > * doc/tm.texi.in (TARGET_ASM_POST_CFI_STARTPROC): Define.
> > * dwarf2out.c (dwarf2out_do_cfi_startproc): Trigger the hook.
> > * hooks.c (hook_void_FILEptr_tree): Define.
> > * hooks.h (hook_void_FILEptr_tree): Define.
> > * target.def (post_cfi_startproc): Define.
> 
> CCing global reviewers and dwarf maintainers.
> 
> 

-- 
Richard Biener 
SUSE LINUX GmbH, GF: Felix Imendoerffer, Jane Smithard, Graham Norton, HRB 
21284 (AG Nuernberg)


Re: [PATCH, OpenACC] Update documentation to mention OpenACC 2.5

2018-11-04 Thread Thomas Schwinge
Hi!

On Sun, 4 Nov 2018 18:13:09 -0700, Sandra Loosemore  
wrote:
> On 11/02/2018 04:38 AM, Chung-Lin Tang wrote:
> > Hi Thomas,
> > this patch (mostly by yourself:) ) are the changes to the documentation 
> > to now state OpenACC 2.5 support.
> > I believe this is within your maintainership scope.
> 
> I took a look at it with my doc maintainer hat on too, and the patch is 
> fine with me

Thanks for the review -- the patch was trivial enough, eh?  ;-)

> as long as there are no technical correctness issues.

Not a technical correctness issue, and it might be obvious, but we
shouldn't commit this one until we've actually gotten the bulk of the
OpenACC 2.5 changes committed to trunk.


Grüße
 Thomas


Re: [PATCH][x86_64] Fix PR87853, _mm_cmpgt_epi8 broken with -funsigned-char

2018-11-04 Thread Uros Bizjak
On Mon, Nov 5, 2018 at 2:58 AM Terry Guo  wrote:
>
> Hi there,
>
> This patch intends to fix PR87853 by involving a new 'signed char'
> vector type to avoid the impact of option -funsigned-char. Tested with
> bootstrap and regression tests on x86_64. No regressions.
>
> Is it OK to trunk and release branch?
>
> BR,
> Terry
>
> diff --git a/gcc/ChangeLog b/gcc/ChangeLog
> index ac121a8..dc10a11 100644
> --- a/gcc/ChangeLog
> +++ b/gcc/ChangeLog
> @@ -1,3 +1,12 @@
> +2018-11-05  Xuepeng Guo  
> +
> +   PR target/87853
> +   * config/i386/emmintrin.h (__v16qs): New to cope with option
> +   -funsigned-char.
> +   (_mm_cmpeq_epi8): Replace __v16qi with __v16qs.
> +   (_mm_cmplt_epi8): Likewise.
> +   (_mm_cmpgt_epi8): Likewise.

OK everywhere.

Thanks,
Uros.

>  2018-11-04  Bernd Edlinger  
>
> PR tree-optimization/86572
> diff --git a/gcc/config/i386/emmintrin.h b/gcc/config/i386/emmintrin.h
> index 7a6ff80..3c1f04b 100644
> --- a/gcc/config/i386/emmintrin.h
> +++ b/gcc/config/i386/emmintrin.h
> @@ -45,6 +45,7 @@ typedef unsigned int __v4su __attribute__
> ((__vector_size__ (16)));
>  typedef short __v8hi __attribute__ ((__vector_size__ (16)));
>  typedef unsigned short __v8hu __attribute__ ((__vector_size__ (16)));
>  typedef char __v16qi __attribute__ ((__vector_size__ (16)));
> +typedef signed char __v16qs __attribute__ ((__vector_size__ (16)));
>  typedef unsigned char __v16qu __attribute__ ((__vector_size__ (16)));
>
>  /* The Intel API is flexible enough that we must allow aliasing with other
> @@ -1295,7 +1296,7 @@ _mm_xor_si128 (__m128i __A, __m128i __B)
>  extern __inline __m128i __attribute__((__gnu_inline__,
> __always_inline__, __artificial__))
>  _mm_cmpeq_epi8 (__m128i __A, __m128i __B)
>  {
> -  return (__m128i) ((__v16qi)__A == (__v16qi)__B);
> +  return (__m128i) ((__v16qs)__A == (__v16qs)__B);
>  }
>
>  extern __inline __m128i __attribute__((__gnu_inline__,
> __always_inline__, __artificial__))
> @@ -1313,7 +1314,7 @@ _mm_cmpeq_epi32 (__m128i __A, __m128i __B)
>  extern __inline __m128i __attribute__((__gnu_inline__,
> __always_inline__, __artificial__))
>  _mm_cmplt_epi8 (__m128i __A, __m128i __B)
>  {
> -  return (__m128i) ((__v16qi)__A < (__v16qi)__B);
> +  return (__m128i) ((__v16qs)__A < (__v16qs)__B);
>  }
>
>  extern __inline __m128i __attribute__((__gnu_inline__,
> __always_inline__, __artificial__))
> @@ -1331,7 +1332,7 @@ _mm_cmplt_epi32 (__m128i __A, __m128i __B)
>  extern __inline __m128i __attribute__((__gnu_inline__,
> __always_inline__, __artificial__))
>  _mm_cmpgt_epi8 (__m128i __A, __m128i __B)
>  {
> -  return (__m128i) ((__v16qi)__A > (__v16qi)__B);
> +  return (__m128i) ((__v16qs)__A > (__v16qs)__B);
>  }
>
>  extern __inline __m128i __attribute__((__gnu_inline__,
> __always_inline__, __artificial__))


Re: [PATCH] x86: Update VFIXUPIMM* Intrinsics to align with the latest Intel SDM

2018-11-04 Thread Uros Bizjak
On Mon, Nov 5, 2018 at 6:54 AM Wei Xiao  wrote:
>
> > Please also rename these:
> >
> >  _mm512_mask_fixupimm_round_pd (__m512d __A, __mmask8 __U, __m512d __B,
> > __m512i __C, const int __imm, const int __R)
> >
> >  _mm512_mask_fixupimm_round_ps (__m512 __A, __mmask16 __U, __m512 __B,
> > __m512i __C, const int __imm, const int __R)
> >
> >  _mm_mask_fixupimm_round_sd (__m128d __A, __mmask8 __U, __m128d __B,
> >  __m128i __C, const int __imm, const int __R)
> >
> >  _mm_mask_fixupimm_round_ss (__m128 __A, __mmask8 __U, __m128 __B,
> >  __m128i __C, const int __imm, const int __R)
> >
> >  _mm512_mask_fixupimm_pd (__m512d __A, __mmask8 __U, __m512d __B,
> >   __m512i __C, const int __imm)
> >
> > _mm512_mask_fixupimm_ps (__m512 __A, __mmask16 __U, __m512 __B,
> >   __m512i __C, const int __imm)
> >
> >  _mm_mask_fixupimm_sd (__m128d __A, __mmask8 __U, __m128d __B,
> >__m128i __C, const int __imm)
> >
> >  _mm_mask_fixupimm_ss (__m128 __A, __mmask8 __U, __m128 __B,
> >__m128i __C, const int __imm)
> >
> >  _mm256_mask_fixupimm_pd (__m256d __A, __mmask8 __U, __m256d __B,
> >   __m256i __C, const int __imm)
> >
> >  _mm256_mask_fixupimm_ps (__m256 __A, __mmask8 __U, __m256 __B,
> >   __m256i __C, const int __imm)
> >
> >   _mm_mask_fixupimm_pd (__m128d __A, __mmask8 __U, __m128d __B,
> >__m128i __C, const int __imm)
> >
> >  _mm_mask_fixupimm_ps (__m128 __A, __mmask8 __U, __m128 __B,
> >__m128i __C, const int __imm)
> >
> > Uros.
>
> As attached, I have renamed above intrinsics according to
> aforementioned convention:
>
> [ __m512. __W,] __mmask. __U, __m512x __A, __m512x __B, ..., const int
> _imm, const int __R].

LGTM.

Thanks,
Uros.


Re: [PATCH] x86: Update VFIXUPIMM* Intrinsics to align with the latest Intel SDM

2018-11-04 Thread Wei Xiao
> Please also rename these:
>
>  _mm512_mask_fixupimm_round_pd (__m512d __A, __mmask8 __U, __m512d __B,
> __m512i __C, const int __imm, const int __R)
>
>  _mm512_mask_fixupimm_round_ps (__m512 __A, __mmask16 __U, __m512 __B,
> __m512i __C, const int __imm, const int __R)
>
>  _mm_mask_fixupimm_round_sd (__m128d __A, __mmask8 __U, __m128d __B,
>  __m128i __C, const int __imm, const int __R)
>
>  _mm_mask_fixupimm_round_ss (__m128 __A, __mmask8 __U, __m128 __B,
>  __m128i __C, const int __imm, const int __R)
>
>  _mm512_mask_fixupimm_pd (__m512d __A, __mmask8 __U, __m512d __B,
>   __m512i __C, const int __imm)
>
> _mm512_mask_fixupimm_ps (__m512 __A, __mmask16 __U, __m512 __B,
>   __m512i __C, const int __imm)
>
>  _mm_mask_fixupimm_sd (__m128d __A, __mmask8 __U, __m128d __B,
>__m128i __C, const int __imm)
>
>  _mm_mask_fixupimm_ss (__m128 __A, __mmask8 __U, __m128 __B,
>__m128i __C, const int __imm)
>
>  _mm256_mask_fixupimm_pd (__m256d __A, __mmask8 __U, __m256d __B,
>   __m256i __C, const int __imm)
>
>  _mm256_mask_fixupimm_ps (__m256 __A, __mmask8 __U, __m256 __B,
>   __m256i __C, const int __imm)
>
>   _mm_mask_fixupimm_pd (__m128d __A, __mmask8 __U, __m128d __B,
>__m128i __C, const int __imm)
>
>  _mm_mask_fixupimm_ps (__m128 __A, __mmask8 __U, __m128 __B,
>__m128i __C, const int __imm)
>
> Uros.

As attached, I have renamed above intrinsics according to
aforementioned convention:

[ __m512. __W,] __mmask. __U, __m512x __A, __m512x __B, ..., const int
_imm, const int __R].

Wei


update-vfixupimm-v3.diff
Description: Binary data


Re: [ARM] Implement division using vrecpe, vrecps

2018-11-04 Thread Prathamesh Kulkarni
On Fri, 2 Nov 2018 at 19:08, Wilco Dijkstra  wrote:
>
> Prathamesh Kulkarni wrote:
>
> > This is a rebased version of patch that adds a pattern to neon.md for
> > implementing division with multiplication by reciprocal using
> > vrecpe/vrecps with -funsafe-math-optimizations excluding -Os.
> > The newly added test-cases are not vectorized on armeb target with
> > -O2. I posted the analysis for that here:
> > https://gcc.gnu.org/ml/gcc-patches/2016-05/msg01765.html
>
> I don't think doing this unconditionally for any CPU is a good idea. On 
> AArch64
> we don't enable this for any core since it's not really faster (newer CPUs 
> have
> significantly improved division and the reciprocal instructions reduce 
> throughput
> of other FMAs). On wrf doing reciprocal square root is far better than 
> reciprocal
> division, but it's only faster on some specific CPUs, so it's not enabled by 
> default.
Hi Wilco,
Thanks for the suggestions. The last time I benchmarked the patch
(around Jan 2016)
I got following results with the patch for SPEC2006:

a15: +0.64% overall, 481.wrf: +6.46%
a53: +0.21% overall, 416.gamess: -1.39%, 481.wrf: +6.76%
a57: +0.35% overall, 481.wrf: +3.84%
(https://gcc.gnu.org/ml/gcc-patches/2016-01/msg01209.html)

Do these numbers look acceptable ?
I am benchmarking the patch on ToT, and will report if there are any
performance improvements found with the patch.

Thanks,
Prathamesh
>
> Wilco


[PATCH 2/3] Implement P0732R2, class types in non-type template parameters.

2018-11-04 Thread Jason Merrill
There is one significant piece of this that is not implemented yet: the
reliance on defaulted operator<=>, which someone else has been working on.
So, for the moment those lines are commented out of the testcases.

One tricky bit was treating template parameters of classtype as const
lvalues without making their decltype const; for this I used a
VIEW_CONVERT_EXPR wrapper, which previously could only appear in templates
as location wrappers.

The user-defined literal parts of P0732R2 are in the next patch.

gcc/cp/
* error.c (dump_simple_decl): Look through a template parm object.
* mangle.c (write_template_arg): Likewise.
(mangle_template_parm_object): New.
* pt.c (template_parm_object_p, get_template_parm_object): New.
(invalid_tparm_referent_p): Factor from convert_nontype_argument.
(convert_nontype_argument, invalid_nontype_parm_type_p): Handle
class-type template arguments.
* tree.c (lvalue_kind): Likewise.
gcc/c-family/
* c-cppbuiltin.c (c_cpp_builtins): Add
__cpp_nontype_template_parameter_class.
libiberty/
* cp-demangle.c (d_dump, d_make_comp, d_count_templates_scopes)
(d_print_comp_inner): Handle DEMANGLE_COMPONENT_TPARM_OBJ.
(d_special_name): Handle TA.
(d_expresion_1): Fix demangling of brace-enclosed initializer list.
include/
* demangle.h (enum demangle_component_type): Add
DEMANGLE_COMPONENT_TPARM_OBJ.
---
 gcc/cp/cp-tree.h|   2 +
 include/demangle.h  |   3 +
 gcc/c-family/c-cppbuiltin.c |   1 +
 gcc/cp/error.c  |   3 +
 gcc/cp/mangle.c |  17 +
 gcc/cp/pt.c | 352 +---
 gcc/cp/semantics.c  |  16 +-
 gcc/cp/tree.c   |   6 +
 libiberty/cp-demangle.c |  17 +-
 gcc/testsuite/g++.dg/cpp2a/feat-cxx2a.C |   6 +
 gcc/testsuite/g++.dg/cpp2a/nontype-class1.C |  33 ++
 gcc/testsuite/g++.dg/cpp2a/nontype-class2.C |  17 +
 gcc/testsuite/g++.dg/cpp2a/nontype-class3.C |  16 +
 gcc/testsuite/g++.dg/cpp2a/nontype-class4.C |  18 +
 gcc/testsuite/g++.dg/cpp2a/nontype-class5.C |  12 +
 gcc/testsuite/g++.dg/cpp2a/nontype-class7.C |  26 ++
 gcc/testsuite/g++.dg/cpp2a/nontype-class8.C |  26 ++
 gcc/testsuite/g++.dg/template/crash55.C |   2 +-
 gcc/testsuite/g++.dg/template/nontype16.C   |   2 +-
 gcc/testsuite/g++.dg/template/nontype4.C|   2 +-
 gcc/testsuite/g++.dg/template/nontype5.C|   2 +-
 gcc/testsuite/g++.dg/template/operator10.C  |   2 +-
 gcc/c-family/ChangeLog  |   5 +
 gcc/cp/ChangeLog|  10 +
 include/ChangeLog   |   5 +
 libiberty/ChangeLog |   8 +
 libiberty/testsuite/demangle-expected   |   6 +
 27 files changed, 494 insertions(+), 121 deletions(-)
 create mode 100644 gcc/testsuite/g++.dg/cpp2a/nontype-class1.C
 create mode 100644 gcc/testsuite/g++.dg/cpp2a/nontype-class2.C
 create mode 100644 gcc/testsuite/g++.dg/cpp2a/nontype-class3.C
 create mode 100644 gcc/testsuite/g++.dg/cpp2a/nontype-class4.C
 create mode 100644 gcc/testsuite/g++.dg/cpp2a/nontype-class5.C
 create mode 100644 gcc/testsuite/g++.dg/cpp2a/nontype-class7.C
 create mode 100644 gcc/testsuite/g++.dg/cpp2a/nontype-class8.C

diff --git a/gcc/cp/cp-tree.h b/gcc/cp/cp-tree.h
index 6d49744b830..61b431e5f9d 100644
--- a/gcc/cp/cp-tree.h
+++ b/gcc/cp/cp-tree.h
@@ -6746,6 +6746,7 @@ extern bool variable_template_specialization_p  (tree);
 extern bool alias_type_or_template_p(tree);
 extern bool alias_template_specialization_p (const_tree);
 extern bool dependent_alias_template_spec_p (const_tree);
+extern bool template_parm_object_p (const_tree);
 extern bool explicit_class_specialization_p (tree);
 extern bool push_tinst_level(tree);
 extern bool push_tinst_level_loc(tree, location_t);
@@ -7446,6 +7447,7 @@ extern tree mangle_tls_init_fn(tree);
 extern tree mangle_tls_wrapper_fn  (tree);
 extern bool decl_tls_wrapper_p (tree);
 extern tree mangle_ref_init_variable   (tree);
+extern tree mangle_template_parm_object(tree);
 extern char * get_mangled_vtable_map_var_name   (tree);
 extern bool mangle_return_type_p   (tree);
 extern tree mangle_decomp  (tree, vec &);
diff --git a/include/demangle.h b/include/demangle.h
index b8d57cf2951..4f920f2b7f5 100644
--- a/include/demangle.h
+++ b/include/demangle.h
@@ -392,6 +392,9 @@ enum demangle_component_type
  template argument, and the right subtree is either NULL or
  another TEMPLATE_ARGLIST node.  */
   DEMANGLE_COMPONENT_TEMPLATE_ARGLIST,
+  /* A template parameter object (C++20).  The left subtree is the
+ corresponding template 

[PATCH 3/3] Implement UDL changes from P0732R2.

2018-11-04 Thread Jason Merrill
Implementing the UDL changes was pretty straightforward; I simplified
cp_parser_userdef_string_literal using the releasing_vec type from mangle.c.

While looking at this, I realized that the string UDL template taking a
character pack that we implemented for C++14 didn't actually make it into
C++14, so I've added a pedwarn for it and no longer suggest it in the
diagnostic about an invalid UDL template.

* cp-tree.h (struct releasing_vec): Move from mangle.c.
Add get_ref method.
* parser.c (cp_parser_userdef_string_literal): Use it.  Handle
passing the string to a single template parameter of class type.
(cp_parser_template_declaration_after_parameters): Allow it.
Pedwarn about the character pack template that was proposed but not
accepted for C++14, and don't suggest it.
---
 gcc/cp/cp-tree.h  | 28 
 gcc/cp/mangle.c   | 24 ---
 gcc/cp/parser.c   | 70 ---
 gcc/testsuite/g++.dg/cpp0x/udlit-overflow.C   | 36 +-
 gcc/testsuite/g++.dg/cpp1y/pr58708.C  |  1 +
 gcc/testsuite/g++.dg/cpp1y/pr59867.C  |  1 +
 .../g++.dg/cpp1y/udlit-char-template-sfinae.C |  1 +
 ...it-char-template-vs-std-literal-operator.C |  1 +
 .../g++.dg/cpp1y/udlit-char-template.C|  1 +
 .../g++.dg/cpp1y/udlit-char-template2.C   |  1 +
 gcc/testsuite/g++.dg/cpp2a/nontype-class6.C   | 17 +
 gcc/cp/ChangeLog  |  9 +++
 12 files changed, 121 insertions(+), 69 deletions(-)
 create mode 100644 gcc/testsuite/g++.dg/cpp2a/nontype-class6.C

diff --git a/gcc/cp/cp-tree.h b/gcc/cp/cp-tree.h
index 61b431e5f9d..a895d0042ab 100644
--- a/gcc/cp/cp-tree.h
+++ b/gcc/cp/cp-tree.h
@@ -871,6 +871,34 @@ struct named_decl_hash : ggc_remove 
   static void mark_deleted (value_type) { gcc_unreachable (); }
 };
 
+/* Simplified unique_ptr clone to release a tree vec on exit.  */
+
+struct releasing_vec
+{
+  typedef vec vec_t;
+
+  releasing_vec (vec_t *v): v(v) { }
+  releasing_vec (): v(make_tree_vector ()) { }
+
+  /* Copy ops are deliberately declared but not defined,
+ copies must always be elided.  */
+  releasing_vec (const releasing_vec &);
+  releasing_vec = (const releasing_vec &);
+
+  vec_t * () const { return *v; }
+  vec_t *operator-> () const { return v; }
+  vec_t *get() const { return v; }
+  operator vec_t *() const { return v; }
+  tree& operator[] (unsigned i) const { return (*v)[i]; }
+
+  /* Necessary for use with vec** and vec*& interfaces.  */
+  vec_t *_ref () { return v; }
+
+  ~releasing_vec() { release_tree_vector (v); }
+private:
+  vec_t *v;
+};
+
 struct GTY(()) tree_template_decl {
   struct tree_decl_common common;
   tree arguments;
diff --git a/gcc/cp/mangle.c b/gcc/cp/mangle.c
index 1b323015ded..b9d8ee20116 100644
--- a/gcc/cp/mangle.c
+++ b/gcc/cp/mangle.c
@@ -1545,30 +1545,6 @@ write_abi_tags (tree tags)
   release_tree_vector (vec);
 }
 
-/* Simplified unique_ptr clone to release a tree vec on exit.  */
-
-struct releasing_vec
-{
-  typedef vec vec_t;
-
-  releasing_vec (vec_t *v): v(v) { }
-  releasing_vec (): v(make_tree_vector ()) { }
-
-  /* Copy constructor is deliberately declared but not defined,
- copies must always be elided.  */
-  releasing_vec (const releasing_vec &);
-
-  vec_t * () const { return *v; }
-  vec_t *operator-> () const { return v; }
-  vec_t *get () const { return v; }
-  operator vec_t *() const { return v; }
-  tree& operator[] (unsigned i) const { return (*v)[i]; }
-
-  ~releasing_vec() { release_tree_vector (v); }
-private:
-  vec_t *v;
-};
-
 /* True iff the TREE_LISTS T1 and T2 of ABI tags are equivalent.  */
 
 static bool
diff --git a/gcc/cp/parser.c b/gcc/cp/parser.c
index 5ea8e8ca012..30a47662f55 100644
--- a/gcc/cp/parser.c
+++ b/gcc/cp/parser.c
@@ -4565,40 +4565,47 @@ cp_parser_userdef_string_literal (tree literal)
   tree value = USERDEF_LITERAL_VALUE (literal);
   int len = TREE_STRING_LENGTH (value)
/ TREE_INT_CST_LOW (TYPE_SIZE_UNIT (TREE_TYPE (TREE_TYPE (value - 1;
-  tree decl, result;
-  vec *args;
+  tree decl;
 
   /* Build up a call to the user-defined operator.  */
   /* Lookup the name we got back from the id-expression.  */
-  args = make_tree_vector ();
+  releasing_vec rargs;
+  vec * = rargs.get_ref();
   vec_safe_push (args, value);
   vec_safe_push (args, build_int_cst (size_type_node, len));
   decl = lookup_literal_operator (name, args);
 
   if (decl && decl != error_mark_node)
-{
-  result = finish_call_expr (decl, , false, true,
-tf_warning_or_error);
-  release_tree_vector (args);
-  return result;
-}
-  release_tree_vector (args);
+return finish_call_expr (decl, , false, true,
+tf_warning_or_error);
 
-  /* Look for a template function with typename parameter CharT
- and parameter pack CharT...  Call the function with
- 

[C++ PATCH 1/3] Fix various latent issues revealed by P0732 work.

2018-11-04 Thread Jason Merrill
The initialized_type hunk fixes handling of void AGGR_INIT_EXPRs that call a
non-constructor; an AGGR_INIT_EXPR can have void type if its initialization
semantics are more complicated than just expanding the call.

The cxx_eval_vec_init_1 hunk corrects AGGR_INIT_EXPRs that were
nonsensically built to initialize an object of void type.  And the
build_aggr_init_expr hunk makes sure we don't do that again.

The ocp_convert and cxx_eval_outermost_constant_expr hunks deal with making
sure that a constant CONSTRUCTOR has the right type.

Tested x86_64-pc-linux-gnu, applying to trunk.

* cvt.c (ocp_convert): Don't wrap a CONSTRUCTOR in a NOP_EXPR.
* constexpr.c (initialized_type): Fix AGGR_INIT_EXPR handling.
(cxx_eval_vec_init_1): Correct type of AGGR_INIT_EXPR.
(cxx_eval_outermost_constant_expr): Make sure a CONSTRUCTOR has the
right type.  Don't wrap a CONSTRUCTOR if one was passed in.
* tree.c (build_aggr_init_expr): Check for void.
---
 gcc/cp/constexpr.c | 22 +-
 gcc/cp/cvt.c   | 10 +-
 gcc/cp/tree.c  |  2 ++
 gcc/cp/ChangeLog   |  7 +++
 4 files changed, 31 insertions(+), 10 deletions(-)

diff --git a/gcc/cp/constexpr.c b/gcc/cp/constexpr.c
index 7692b1727da..4fb1ba527e3 100644
--- a/gcc/cp/constexpr.c
+++ b/gcc/cp/constexpr.c
@@ -2778,8 +2778,10 @@ initialized_type (tree t)
 {
   if (TYPE_P (t))
 return t;
-  tree type = cv_unqualified (TREE_TYPE (t));
-  if (TREE_CODE (t) == CALL_EXPR || TREE_CODE (t) == AGGR_INIT_EXPR)
+  tree type = TREE_TYPE (t);
+  if (!VOID_TYPE_P (type))
+/* No need to look deeper.  */;
+  else if (TREE_CODE (t) == CALL_EXPR)
 {
   /* A constructor call has void type, so we need to look deeper.  */
   tree fn = get_function_named_in_call (t);
@@ -2787,7 +2789,9 @@ initialized_type (tree t)
  && DECL_CXX_CONSTRUCTOR_P (fn))
type = DECL_CONTEXT (fn);
 }
-  return type;
+  else if (TREE_CODE (t) == AGGR_INIT_EXPR)
+type = TREE_TYPE (AGGR_INIT_EXPR_SLOT (t));
+  return cv_unqualified (type);
 }
 
 /* We're about to initialize element INDEX of an array or class from VALUE.
@@ -3000,7 +3004,7 @@ cxx_eval_vec_init_1 (const constexpr_ctx *ctx, tree 
atype, tree init,
, elttype, LOOKUP_NORMAL,
complain);
   release_tree_vector (argvec);
-  init = build_aggr_init_expr (TREE_TYPE (init), init);
+  init = build_aggr_init_expr (elttype, init);
   pre_init = true;
 }
 
@@ -5089,7 +5093,7 @@ cxx_eval_outermost_constant_expr (tree t, bool 
allow_non_constant,
r = build_nop (TREE_TYPE (r), r);
   TREE_CONSTANT (r) = false;
 }
-  else if (non_constant_p || r == t)
+  else if (non_constant_p)
 return t;
 
   if (should_unshare)
@@ -5097,18 +5101,18 @@ cxx_eval_outermost_constant_expr (tree t, bool 
allow_non_constant,
 
   if (TREE_CODE (r) == CONSTRUCTOR && CLASS_TYPE_P (TREE_TYPE (r)))
 {
+  r = adjust_temp_type (type, r);
   if (TREE_CODE (t) == TARGET_EXPR
  && TARGET_EXPR_INITIAL (t) == r)
return t;
-  else
+  else if (TREE_CODE (t) != CONSTRUCTOR)
{
  r = get_target_expr (r);
  TREE_CONSTANT (r) = true;
- return r;
}
 }
-  else
-return r;
+
+  return r;
 }
 
 /* Returns true if T is a valid subexpression of a constant expression,
diff --git a/gcc/cp/cvt.c b/gcc/cp/cvt.c
index 315b0d6a65a..b04e9a70652 100644
--- a/gcc/cp/cvt.c
+++ b/gcc/cp/cvt.c
@@ -725,7 +725,8 @@ ocp_convert (tree type, tree expr, int convtype, int flags,
 /* We need a new temporary; don't take this shortcut.  */;
   else if (same_type_ignoring_top_level_qualifiers_p (type, TREE_TYPE (e)))
 {
-  if (same_type_p (type, TREE_TYPE (e)))
+  tree etype = TREE_TYPE (e);
+  if (same_type_p (type, etype))
/* The call to fold will not always remove the NOP_EXPR as
   might be expected, since if one of the types is a typedef;
   the comparison in fold is just equality of pointers, not a
@@ -743,9 +744,16 @@ ocp_convert (tree type, tree expr, int convtype, int flags,
{
  /* Don't build a NOP_EXPR of class type.  Instead, change the
 type of the temporary.  */
+ gcc_assert (same_type_ignoring_top_level_qualifiers_p (type, etype));
  TREE_TYPE (e) = TREE_TYPE (TARGET_EXPR_SLOT (e)) = type;
  return e;
}
+  else if (TREE_CODE (e) == CONSTRUCTOR)
+   {
+ gcc_assert (same_type_ignoring_top_level_qualifiers_p (type, etype));
+ TREE_TYPE (e) = type;
+ return e;
+   }
   else
{
  /* We shouldn't be treating objects of ADDRESSABLE type as
diff --git a/gcc/cp/tree.c b/gcc/cp/tree.c
index 74018e97bb7..51af9f2015e 100644
--- a/gcc/cp/tree.c
+++ b/gcc/cp/tree.c
@@ -576,6 +576,8 @@ build_aggr_init_expr (tree type, tree init)
   tree rval;
   int is_ctor;
 
+  

[C++ PATCH] PR c++/60503 - wrong lambda attribute syntax.

2018-11-04 Thread Jason Merrill
This patch fixes two issues with lambda attribute handling: First, it was in
the wrong place in the grammar.  Second, it was treating attributes as
applying to the whole declaration rather than to the function type, as
specified by the standard.

Tested x86_64-pc-linux-gnu, applying to trunk.

* parser.c (cp_parser_lambda_declarator_opt): Fix attribute
handling.
---
 gcc/cp/parser.c  | 11 +--
 gcc/testsuite/g++.dg/cpp0x/lambda/lambda-attr1.C |  3 +++
 gcc/testsuite/g++.dg/cpp0x/lambda/lambda-attr2.C |  5 +
 gcc/cp/ChangeLog |  6 ++
 4 files changed, 19 insertions(+), 6 deletions(-)
 create mode 100644 gcc/testsuite/g++.dg/cpp0x/lambda/lambda-attr1.C
 create mode 100644 gcc/testsuite/g++.dg/cpp0x/lambda/lambda-attr2.C

diff --git a/gcc/cp/parser.c b/gcc/cp/parser.c
index d01c92431ef..5ea8e8ca012 100644
--- a/gcc/cp/parser.c
+++ b/gcc/cp/parser.c
@@ -10596,8 +10596,6 @@ cp_parser_lambda_declarator_opt (cp_parser* parser, 
tree lambda_expr)
 
   parens.require_close (parser);
 
-  attributes = cp_parser_attributes_opt (parser);
-
   /* In the decl-specifier-seq of the lambda-declarator, each
 decl-specifier shall either be mutable or constexpr.  */
   int declares_class_or_enum;
@@ -10618,6 +10616,8 @@ cp_parser_lambda_declarator_opt (cp_parser* parser, 
tree lambda_expr)
   /* Parse optional exception specification.  */
   exception_spec = cp_parser_exception_specification_opt (parser);
 
+  attributes = cp_parser_std_attribute_spec_seq (parser);
+
   /* Parse optional trailing return type.  */
   if (cp_lexer_next_token_is (parser->lexer, CPP_DEREF))
 {
@@ -10668,15 +10668,14 @@ cp_parser_lambda_declarator_opt (cp_parser* parser, 
tree lambda_expr)
REF_QUAL_NONE,
   tx_qual,
   exception_spec,
-   /*late_return_type=*/NULL_TREE,
+   return_type,
/*requires_clause*/NULL_TREE);
 declarator->id_loc = LAMBDA_EXPR_LOCATION (lambda_expr);
-if (return_type)
-  declarator->u.function.late_return_type = return_type;
+declarator->std_attributes = attributes;
 
 fco = grokmethod (_type_specs,
  declarator,
- attributes);
+ NULL_TREE);
 if (fco != error_mark_node)
   {
DECL_INITIALIZED_IN_CLASS_P (fco) = 1;
diff --git a/gcc/testsuite/g++.dg/cpp0x/lambda/lambda-attr1.C 
b/gcc/testsuite/g++.dg/cpp0x/lambda/lambda-attr1.C
new file mode 100644
index 000..9aa6506d5ed
--- /dev/null
+++ b/gcc/testsuite/g++.dg/cpp0x/lambda/lambda-attr1.C
@@ -0,0 +1,3 @@
+// { dg-do compile { target c++11 } }
+
+auto l = []() [[noreturn]] {}; // { dg-warning "ignored" }
diff --git a/gcc/testsuite/g++.dg/cpp0x/lambda/lambda-attr2.C 
b/gcc/testsuite/g++.dg/cpp0x/lambda/lambda-attr2.C
new file mode 100644
index 000..82d88f370c4
--- /dev/null
+++ b/gcc/testsuite/g++.dg/cpp0x/lambda/lambda-attr2.C
@@ -0,0 +1,5 @@
+// PR c++/60503
+// { dg-do compile { target c++11 } }
+
+auto l = []() mutable noexcept [[ ]] {};
+auto m = []() [[ ]] mutable noexcept {}; // { dg-error "" }
diff --git a/gcc/cp/ChangeLog b/gcc/cp/ChangeLog
index bb77e0b42ab..7e9c0e2642a 100644
--- a/gcc/cp/ChangeLog
+++ b/gcc/cp/ChangeLog
@@ -1,3 +1,9 @@
+2018-11-04  Jason Merrill  
+
+   PR c++/60503 - wrong lambda attribute syntax.
+   * parser.c (cp_parser_lambda_declarator_opt): Fix attribute
+   handling.
+
 2018-11-02  Nathan Sidwell  
 
* decl.c (duplicate_decls): Refactor checks.

base-commit: 703d2f69fad4772dc4aedb5327b5e3d88e8e1843
-- 
2.17.2



[PATCH][x86_64] Fix PR87853, _mm_cmpgt_epi8 broken with -funsigned-char

2018-11-04 Thread Terry Guo
Hi there,

This patch intends to fix PR87853 by involving a new 'signed char'
vector type to avoid the impact of option -funsigned-char. Tested with
bootstrap and regression tests on x86_64. No regressions.

Is it OK to trunk and release branch?

BR,
Terry

diff --git a/gcc/ChangeLog b/gcc/ChangeLog
index ac121a8..dc10a11 100644
--- a/gcc/ChangeLog
+++ b/gcc/ChangeLog
@@ -1,3 +1,12 @@
+2018-11-05  Xuepeng Guo  
+
+   PR target/87853
+   * config/i386/emmintrin.h (__v16qs): New to cope with option
+   -funsigned-char.
+   (_mm_cmpeq_epi8): Replace __v16qi with __v16qs.
+   (_mm_cmplt_epi8): Likewise.
+   (_mm_cmpgt_epi8): Likewise.
+
 2018-11-04  Bernd Edlinger  

PR tree-optimization/86572
diff --git a/gcc/config/i386/emmintrin.h b/gcc/config/i386/emmintrin.h
index 7a6ff80..3c1f04b 100644
--- a/gcc/config/i386/emmintrin.h
+++ b/gcc/config/i386/emmintrin.h
@@ -45,6 +45,7 @@ typedef unsigned int __v4su __attribute__
((__vector_size__ (16)));
 typedef short __v8hi __attribute__ ((__vector_size__ (16)));
 typedef unsigned short __v8hu __attribute__ ((__vector_size__ (16)));
 typedef char __v16qi __attribute__ ((__vector_size__ (16)));
+typedef signed char __v16qs __attribute__ ((__vector_size__ (16)));
 typedef unsigned char __v16qu __attribute__ ((__vector_size__ (16)));

 /* The Intel API is flexible enough that we must allow aliasing with other
@@ -1295,7 +1296,7 @@ _mm_xor_si128 (__m128i __A, __m128i __B)
 extern __inline __m128i __attribute__((__gnu_inline__,
__always_inline__, __artificial__))
 _mm_cmpeq_epi8 (__m128i __A, __m128i __B)
 {
-  return (__m128i) ((__v16qi)__A == (__v16qi)__B);
+  return (__m128i) ((__v16qs)__A == (__v16qs)__B);
 }

 extern __inline __m128i __attribute__((__gnu_inline__,
__always_inline__, __artificial__))
@@ -1313,7 +1314,7 @@ _mm_cmpeq_epi32 (__m128i __A, __m128i __B)
 extern __inline __m128i __attribute__((__gnu_inline__,
__always_inline__, __artificial__))
 _mm_cmplt_epi8 (__m128i __A, __m128i __B)
 {
-  return (__m128i) ((__v16qi)__A < (__v16qi)__B);
+  return (__m128i) ((__v16qs)__A < (__v16qs)__B);
 }

 extern __inline __m128i __attribute__((__gnu_inline__,
__always_inline__, __artificial__))
@@ -1331,7 +1332,7 @@ _mm_cmplt_epi32 (__m128i __A, __m128i __B)
 extern __inline __m128i __attribute__((__gnu_inline__,
__always_inline__, __artificial__))
 _mm_cmpgt_epi8 (__m128i __A, __m128i __B)
 {
-  return (__m128i) ((__v16qi)__A > (__v16qi)__B);
+  return (__m128i) ((__v16qs)__A > (__v16qs)__B);
 }

 extern __inline __m128i __attribute__((__gnu_inline__,
__always_inline__, __artificial__))


Re: [PATCH, OpenACC] Update documentation to mention OpenACC 2.5

2018-11-04 Thread Sandra Loosemore

On 11/02/2018 04:38 AM, Chung-Lin Tang wrote:

Hi Thomas,
this patch (mostly by yourself:) ) are the changes to the documentation 
to now state OpenACC 2.5 support.

I believe this is within your maintainership scope.


I took a look at it with my doc maintainer hat on too, and the patch is 
fine with me as long as there are no technical correctness issues.


-Sandra


Re: [PATCH] Fix PR 86572

2018-11-04 Thread H.J. Lu
On Sun, Nov 4, 2018 at 10:02 AM Jeff Law  wrote:
>
> On 10/22/18 9:08 AM, Bernd Edlinger wrote:
> > Hi!
> >
> > This makes c_strlen avoid an unsafe strlen folding of const arguments
> > with non-const offset.  Currently a negative out of bounds offset
> > makes the strlen function return an extremely large number, and
> > at the same time, prevents the VRP machinery, to determine the correct
> > range if the strlen function in this case.
> >
> > Fixed by doing the whole computation in size_t and casting the
> > result back to ssize_t.
> >
> >
> > Bootstrapped and reg-tested on x86_64-pc-linux-gnu.
> > Is it OK for trunk?
> >
> >
> > Thanks
> > Bernd.
> >
> >
> > patch-pr86572.diff
> >
> > gcc:
> > 2018-10-22  Bernd Edlinger  
> >
> >   PR tree-optimization/86572
> >   * builtins.c (c_strlen): Handle negative offsets in a safe way.
> >
> > testsuite:
> > 2018-10-22  Bernd Edlinger  
> >
> >   PR tree-optimization/86572
> >   * gcc.dg/pr86572.c: New test.
> OK.
> jeff

This caused:

/export/gnu/import/git/gcc-test-ia32/src-trunk/gcc/testsuite/gcc.dg/warn-strlen-no-nul.c:56:1:
internal compiler error: verify_gimple failed^M
0x8922dc4 verify_gimple_in_seq(gimple*)^M
../../src-trunk/gcc/tree-cfg.c:5082^M
0x86899d7 gimplify_body(tree_node*, bool)^M
../../src-trunk/gcc/gimplify.c:12859^M
0x8689b8b gimplify_function_tree(tree_node*)^M
../../src-trunk/gcc/gimplify.c:12949^M
0x84f7690 cgraph_node::analyze()^M
../../src-trunk/gcc/cgraphunit.c:667^M
0x84fa1d8 analyze_functions^M
../../src-trunk/gcc/cgraphunit.c:1126^M
0x84fadd3 symbol_table::finalize_compilation_unit()^M
../../src-trunk/gcc/cgraphunit.c:2833^M
Please submit a full bug report,^M
with preprocessed source if appropriate.^M
Please include the complete backtrace with any bug report.^M
See  for instructions.^M
compiler exited with status 1
FAIL: gcc.dg/warn-strlen-no-nul.c (internal compiler error)

on i386.

-- 
H.J.


[PATCH 2/3] Support changing fentry name per function

2018-11-04 Thread Andi Kleen
From: Andi Kleen 

It can be useful to have some classes of functions that use a different
__fentry__ instrumentation than others.  Currently it is only
possible to disable instrumentation on the command line or with
no_instrument_function, but not to change the instrumentation function
on a case by case base.

Add some flexibility to allow to change the instrumentation function
name per file with an option or per function with a new attribute.
This also allows switching to nops for individual functions.

gcc/:

2018-11-04  Andi Kleen  

* config/i386/i386.c (x86_print_call_or_nop): Handle nop name.
(current_fentry_name): Add.
(x86_function_profiler): Handle fentry_name attribute.
(ix86_handle_fentry_name): Add.
(ix86_attribute_table): Add fentry_name.
* config/i386/i386.opt: Add -mfentry-name
* doc/extend.texi: Document fentry_name.
* doc/invoke.texi: Document minstrument-return.

gcc/testsuite/:

2018-11-04  Andi Kleen  

* gcc.target/i386/fentryname1.c: New test.
---
 gcc/config/i386/i386.c  | 48 ++---
 gcc/config/i386/i386.opt|  4 ++
 gcc/doc/extend.texi |  7 +++
 gcc/doc/invoke.texi |  6 ++-
 gcc/testsuite/gcc.target/i386/fentryname1.c | 15 +++
 5 files changed, 74 insertions(+), 6 deletions(-)
 create mode 100644 gcc/testsuite/gcc.target/i386/fentryname1.c

diff --git a/gcc/config/i386/i386.c b/gcc/config/i386/i386.c
index f7cd94a8139..8af8a523cb3 100644
--- a/gcc/config/i386/i386.c
+++ b/gcc/config/i386/i386.c
@@ -41206,24 +41206,41 @@ x86_field_alignment (tree type, int computed)
 static void
 x86_print_call_or_nop (FILE *file, const char *target)
 {
-  if (flag_nop_mcount)
+  if (flag_nop_mcount || !strcmp (target, "nop"))
 /* 5 byte nop: nopl 0(%[re]ax,%[re]ax,1) */
 fprintf (file, "1:" ASM_BYTE "0x0f, 0x1f, 0x44, 0x00, 0x00\n");
   else
 fprintf (file, "1:\tcall\t%s\n", target);
 }
 
+static bool
+current_fentry_name (const char **name)
+{
+  tree attr = lookup_attribute ("fentry_name",
+   DECL_ATTRIBUTES (current_function_decl));
+  if (!attr)
+return false;
+  *name = TREE_STRING_POINTER (TREE_VALUE (TREE_VALUE (attr)));
+  return true;
+}
+
 /* Output assembler code to FILE to increment profiler label # LABELNO
for profiling a function entry.  */
 void
 x86_function_profiler (FILE *file, int labelno ATTRIBUTE_UNUSED)
 {
-  const char *mcount_name = (flag_fentry ? MCOUNT_NAME_BEFORE_PROLOGUE
-: MCOUNT_NAME);
-
   if (cfun->machine->endbr_queued_at_entrance)
 fprintf (file, "\t%s\n", TARGET_64BIT ? "endbr64" : "endbr32");
 
+  const char *mcount_name = MCOUNT_NAME;
+
+  if (current_fentry_name (_name))
+;
+  else if (fentry_name)
+mcount_name = fentry_name;
+  else if (flag_fentry)
+mcount_name = MCOUNT_NAME_BEFORE_PROLOGUE;
+
   if (TARGET_64BIT)
 {
 #ifndef NO_PROFILE_COUNTERS
@@ -45044,6 +45061,26 @@ ix86_expand_round_sse4 (rtx op0, rtx op1)
 
   emit_move_insn (op0, res);
 }
+
+/* Handle fentry_name attribute.  */
+
+static tree
+ix86_handle_fentry_name (tree *node, tree name, tree args,
+int, bool *no_add_attrs)
+{
+  if (TREE_CODE (*node) == FUNCTION_DECL
+  && TREE_CODE (TREE_VALUE (args)) == STRING_CST)
+/* Do nothing else, just set the attribute.  We'll get at
+   it later with lookup_attribute.  */
+;
+  else
+{
+  warning (OPT_Wattributes, "%qE attribute ignored", name);
+  *no_add_attrs = true;
+}
+
+  return NULL_TREE;
+}
 
 
 /* Table of valid machine attributes.  */
@@ -45120,7 +45157,8 @@ static const struct attribute_spec 
ix86_attribute_table[] =
 ix86_handle_fndecl_attribute, NULL },
   { "indirect_return", 0, 0, false, true, true, false,
 NULL, NULL },
-
+  { "fentry_name", 1, 1, true, false, false, false,
+ix86_handle_fentry_name, NULL },
   /* End element.  */
   { NULL, 0, 0, false, false, false, false, NULL, NULL }
 };
diff --git a/gcc/config/i386/i386.opt b/gcc/config/i386/i386.opt
index 5925b75244f..e56f9bce9b0 100644
--- a/gcc/config/i386/i386.opt
+++ b/gcc/config/i386/i386.opt
@@ -930,6 +930,10 @@ Target Report Var(flag_nop_mcount)
 Generate mcount/__fentry__ calls as nops. To activate they need to be
 patched in.
 
+mfentry-name=
+Target RejectNegative Joined Var(fentry_name)
+Set name of __fentry__ symbol called at function entry.
+
 mskip-rax-setup
 Target Report Var(flag_skip_rax_setup)
 Skip setting up RAX register when passing variable arguments.
diff --git a/gcc/doc/extend.texi b/gcc/doc/extend.texi
index 924037ff586..c8761cda151 100644
--- a/gcc/doc/extend.texi
+++ b/gcc/doc/extend.texi
@@ -5966,6 +5966,13 @@ The @code{indirect_return} attribute can be applied to a 
function,
 as well as variable or type of function pointer to inform the
 compiler that the function may return via indirect branch.
 
+@item 

[PATCH 1/3] Support instrumenting returns of instrumented functions

2018-11-04 Thread Andi Kleen
From: Andi Kleen 

When instrumenting programs using __fentry__ it is often useful
to instrument the function return too. Traditionally this
has been done by patching the return address on the stack
frame on entry. However this is fairly complicated (trace
function has to emulate a stack) and also slow because
it causes a branch misprediction on every return.

Add an option to generate call or nop instrumentation for
every return instead, including patch sections.

This will increase the program size slightly, but can be a
lot faster and simpler.

This version only instruments true returns, not sibling
calls or tail recursion. This matches the semantics of the
original stack.

gcc/:

2018-11-04  Andi Kleen  

* config/i386/i386-opts.h (enum instrument_return): Add.
* config/i386/i386.c (output_return_instrumentation): Add.
(ix86_output_function_return): Call output_return_instrumentation.
(ix86_output_call_insn): Call output_return_instrumentation.
* config/i386/i386.opt: Add -minstrument-return=.
* doc/invoke.texi (-minstrument-return): Document.

gcc/testsuite/:

2018-11-04  Andi Kleen  

* gcc.target/i386/returninst1.c: New test.
* gcc.target/i386/returninst2.c: New test.
* gcc.target/i386/returninst3.c: New test.
---
 gcc/config/i386/i386-opts.h |  6 
 gcc/config/i386/i386.c  | 36 +
 gcc/config/i386/i386.opt| 21 
 gcc/doc/invoke.texi | 14 
 gcc/testsuite/gcc.target/i386/returninst1.c | 14 
 gcc/testsuite/gcc.target/i386/returninst2.c | 21 
 gcc/testsuite/gcc.target/i386/returninst3.c |  9 ++
 7 files changed, 121 insertions(+)
 create mode 100644 gcc/testsuite/gcc.target/i386/returninst1.c
 create mode 100644 gcc/testsuite/gcc.target/i386/returninst2.c
 create mode 100644 gcc/testsuite/gcc.target/i386/returninst3.c

diff --git a/gcc/config/i386/i386-opts.h b/gcc/config/i386/i386-opts.h
index 46366cbfa72..35e9413100e 100644
--- a/gcc/config/i386/i386-opts.h
+++ b/gcc/config/i386/i386-opts.h
@@ -119,4 +119,10 @@ enum indirect_branch {
   indirect_branch_thunk_extern
 };
 
+enum instrument_return {
+  instrument_return_none = 0,
+  instrument_return_call,
+  instrument_return_nop5
+};
+
 #endif
diff --git a/gcc/config/i386/i386.c b/gcc/config/i386/i386.c
index f9ef0b4445b..f7cd94a8139 100644
--- a/gcc/config/i386/i386.c
+++ b/gcc/config/i386/i386.c
@@ -28336,12 +28336,47 @@ ix86_output_indirect_jmp (rtx call_op)
 return "%!jmp\t%A0";
 }
 
+/* Output return instrumentation for current function if needed.  */
+
+static void
+output_return_instrumentation (void)
+{
+  if (ix86_instrument_return != instrument_return_none
+  && flag_fentry
+  && !DECL_NO_INSTRUMENT_FUNCTION_ENTRY_EXIT (cfun->decl))
+{
+  if (ix86_flag_record_return)
+   fprintf (asm_out_file, "1:\n");
+  switch (ix86_instrument_return)
+   {
+   case instrument_return_call:
+ fprintf (asm_out_file, "\tcall\t__return__\n");
+ break;
+   case instrument_return_nop5:
+ /* 5 byte nop: nopl 0(%[re]ax,%[re]ax,1)  */
+ fprintf (asm_out_file, ASM_BYTE "0x0f, 0x1f, 0x44, 0x00, 0x00\n");
+ break;
+   case instrument_return_none:
+ break;
+   }
+
+  if (ix86_flag_record_return)
+   {
+ fprintf (asm_out_file, "\t.section __return_loc, \"a\",@progbits\n");
+ fprintf (asm_out_file, "\t.%s 1b\n", TARGET_64BIT ? "quad" : "long");
+ fprintf (asm_out_file, "\t.previous\n");
+   }
+}
+}
+
 /* Output function return.  CALL_OP is the jump target.  Add a REP
prefix to RET if LONG_P is true and function return is kept.  */
 
 const char *
 ix86_output_function_return (bool long_p)
 {
+  output_return_instrumentation ();
+
   if (cfun->machine->function_return_type != indirect_branch_keep)
 {
   char thunk_name[32];
@@ -28454,6 +28489,7 @@ ix86_output_call_insn (rtx_insn *insn, rtx call_op)
 
   if (SIBLING_CALL_P (insn))
 {
+  output_return_instrumentation ();
   if (direct_p)
{
  if (ix86_nopic_noplt_attribute_p (call_op))
diff --git a/gcc/config/i386/i386.opt b/gcc/config/i386/i386.opt
index e7fbf9b6f99..5925b75244f 100644
--- a/gcc/config/i386/i386.opt
+++ b/gcc/config/i386/i386.opt
@@ -1063,3 +1063,24 @@ Support WAITPKG built-in functions and code generation.
 mcldemote
 Target Report Mask(ISA_CLDEMOTE) Var(ix86_isa_flags2) Save
 Support CLDEMOTE built-in functions and code generation.
+
+minstrument-return=
+Target Report RejectNegative Joined Enum(instrument_return) 
Var(ix86_instrument_return) Init(instrument_return_none)
+Instrument function exit in instrumented functions with __fentry__.
+
+Enum
+Name(instrument_return) Type(enum instrument_return)
+Known choices for return instrumentation with -minstrument-return=
+
+EnumValue
+Enum(instrument_return) String(none) 

[PATCH 3/3] Allow changing the fentry section per file and per function

2018-11-04 Thread Andi Kleen
From: Andi Kleen 

When dynamically patching in/out instrumentation it can be useful
to handle different classes of functions differently. Add support
for changing the fentry section name on the command line
or as a function attributes. This allows to mark functions differently,
and handle them differently in dynamic patching.

gcc/:

2018-11-04  Andi Kleen  

* config/i386/i386.c (current_fentry_section): Add.
(x86_function_profiler): Handle fentry section.
(ix86_attribute_table): Add fentry section.
* config/i386/i386.opt: Add -mfentry-section.
* doc/extend.texi: Document fentry_section attribute.
* doc/invoke.texi: Document -mfentry-section.

gcc/testsuite/:

2018-11-04  Andi Kleen  

* gcc.target/i386/fentryname2.c: New test.
* gcc.target/i386/fentryname3.c: New test.
---
 gcc/config/i386/i386.c  | 28 ++---
 gcc/config/i386/i386.opt|  4 +++
 gcc/doc/extend.texi |  6 +
 gcc/doc/invoke.texi |  6 -
 gcc/testsuite/gcc.target/i386/fentryname2.c | 15 +++
 gcc/testsuite/gcc.target/i386/fentryname3.c | 10 
 6 files changed, 65 insertions(+), 4 deletions(-)
 create mode 100644 gcc/testsuite/gcc.target/i386/fentryname2.c
 create mode 100644 gcc/testsuite/gcc.target/i386/fentryname3.c

diff --git a/gcc/config/i386/i386.c b/gcc/config/i386/i386.c
index 8af8a523cb3..56a54c7da18 100644
--- a/gcc/config/i386/i386.c
+++ b/gcc/config/i386/i386.c
@@ -41224,6 +41224,17 @@ current_fentry_name (const char **name)
   return true;
 }
 
+static bool
+current_fentry_section (const char **name)
+{
+  tree attr = lookup_attribute ("fentry_section",
+   DECL_ATTRIBUTES (current_function_decl));
+  if (!attr)
+return false;
+  *name = TREE_STRING_POINTER (TREE_VALUE (TREE_VALUE (attr)));
+  return true;
+}
+
 /* Output assembler code to FILE to increment profiler label # LABELNO
for profiling a function entry.  */
 void
@@ -41269,9 +41280,18 @@ x86_function_profiler (FILE *file, int labelno 
ATTRIBUTE_UNUSED)
   x86_print_call_or_nop (file, mcount_name);
 }
 
-  if (flag_record_mcount)
+  if (flag_record_mcount
+   || lookup_attribute ("fentry_section",
+DECL_ATTRIBUTES (current_function_decl)))
 {
-  fprintf (file, "\t.section __mcount_loc, \"a\",@progbits\n");
+  const char *sname = "__mcount_loc";
+
+  if (current_fentry_section ())
+   ;
+  else if (fentry_section)
+   sname = fentry_section;
+
+  fprintf (file, "\t.section %s, \"a\",@progbits\n", sname);
   fprintf (file, "\t.%s 1b\n", TARGET_64BIT ? "quad" : "long");
   fprintf (file, "\t.previous\n");
 }
@@ -45062,7 +45082,7 @@ ix86_expand_round_sse4 (rtx op0, rtx op1)
   emit_move_insn (op0, res);
 }
 
-/* Handle fentry_name attribute.  */
+/* Handle fentry_name / fentry_section attribute.  */
 
 static tree
 ix86_handle_fentry_name (tree *node, tree name, tree args,
@@ -45159,6 +45179,8 @@ static const struct attribute_spec 
ix86_attribute_table[] =
 NULL, NULL },
   { "fentry_name", 1, 1, true, false, false, false,
 ix86_handle_fentry_name, NULL },
+  { "fentry_section", 1, 1, true, false, false, false,
+ix86_handle_fentry_name, NULL },
   /* End element.  */
   { NULL, 0, 0, false, false, false, false, NULL, NULL }
 };
diff --git a/gcc/config/i386/i386.opt b/gcc/config/i386/i386.opt
index e56f9bce9b0..e82aca5c882 100644
--- a/gcc/config/i386/i386.opt
+++ b/gcc/config/i386/i386.opt
@@ -934,6 +934,10 @@ mfentry-name=
 Target RejectNegative Joined Var(fentry_name)
 Set name of __fentry__ symbol called at function entry.
 
+mfentry-section=
+Target RejectNegative Joined Var(fentry_section)
+Set name of section to record mrecord-mcount calls.
+
 mskip-rax-setup
 Target Report Var(flag_skip_rax_setup)
 Skip setting up RAX register when passing variable arguments.
diff --git a/gcc/doc/extend.texi b/gcc/doc/extend.texi
index c8761cda151..e9c7f0856b7 100644
--- a/gcc/doc/extend.texi
+++ b/gcc/doc/extend.texi
@@ -5973,6 +5973,12 @@ call on function entry when function instrumentation is 
enabled
 with @option{-pg -mfentry}. When @var{name} is nop then a 5 byte
 nop sequence is generated.
 
+@item fentry_section("@var{name}")
+@cindex @code{fentry_section} function attribute, x86
+On x86 targets, the @code{fentry_section} attribute sets the name
+of the section to record function entry instrumentation calls in when
+enabled with @option{-pg -mrecord-mcount}
+
 @end table
 
 On the x86, the inliner does not inline a
diff --git a/gcc/doc/invoke.texi b/gcc/doc/invoke.texi
index aaa4a596a31..06741145769 100644
--- a/gcc/doc/invoke.texi
+++ b/gcc/doc/invoke.texi
@@ -1301,7 +1301,7 @@ See RS/6000 and PowerPC Options.
 -mcmodel=@var{code-model}  -mabi=@var{name}  -maddress-mode=@var{mode} @gol
 -m32  -m64  -mx32  -m16  -miamcu  -mlarge-data-threshold=@var{num} @gol

[PATCH, d] Committed merge with upstream dmd

2018-11-04 Thread Iain Buclaw
Hi,

I've merged into the D front-end patches sent to upstream dmd, most
address problems found when building the compiler on OSX and Solaris.

This introduces a new header that pulls in system includes for use
only in the DMD front-end part of the compiler, fixing up uses of
problematic functions that are prevalent throughout the code.

Commits merged from dmd.

Fix build of the D frontend on the Hurd and KFreeBSD.
Initial patch from Matthias Klose.
https://github.com/dlang/dmd/pull/8893

Don't care about D/C++ compatibility in C++ port.
Fixes build error in https://gcc.gnu.org/PR87788
https://github.com/dlang/dmd/pull/8895

Allow compiling front-end headers with strict warnings.
https://github.com/dlang/dmd/pull/8909

Add root/system.h header for wrapping system includes.
Fixes https://gcc.gnu.org/PR87865
https://github.com/dlang/dmd/pull/8910

Move checkedint to dmd/root.
https://github.com/dlang/dmd/pull/8912

Use rmem instead of libc for malloc() and strdup().
https://github.com/dlang/dmd/pull/8913

Use align(8) for alignment of UnionExp, fixing several BUS errors
due to alignment issues on SPARC.
https://github.com/dlang/dmd/pull/8914

Don't pass NULL pointer as format parameter to errorSupplemental.
https://github.com/dlang/dmd/pull/8916

-- 
Iain

---
gcc/d/ChangeLog:

2018-11-05  Iain Buclaw  

PR d/87865
* d-system.h: New file.
---
diff --git a/gcc/d/d-system.h b/gcc/d/d-system.h
new file mode 100644
index 000..25a83b675b5
--- /dev/null
+++ b/gcc/d/d-system.h
@@ -0,0 +1,53 @@
+/* d-system.h -- DMD frontend inclusion of gcc header files.
+ * Copyright (C) 2018 Free Software Foundation, Inc.
+ *
+ * GCC is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation; either version 3, or (at your option)
+ * any later version.
+ *
+ * GCC is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with GCC; see the file COPYING3.  If not see
+ * .
+ */
+
+#ifndef GCC_D_SYSTEM_H
+#define GCC_D_SYSTEM_H
+
+#include "config.h"
+#include "system.h"
+
+/* Used by the dmd front-end to determine if we have POSIX-style IO.  */
+#define POSIX (__linux__ || __GLIBC__ || __gnu_hurd__ || __APPLE__ \
+	   || __FreeBSD__ || __OpenBSD__ || __DragonFly__ || __sun)
+
+/* Forward assert invariants to gcc_assert.  */
+#undef assert
+#define assert(EXPR) gcc_assert(EXPR)
+
+/* Forward ctype.h macros used by the dmd front-end to safe-ctype.h.  */
+#undef isalpha
+#define isalpha(c) ISALPHA(c)
+#undef isalnum
+#define isalnum(c) ISALNUM(c)
+#undef isdigit
+#define isdigit(c) ISDIGIT(c)
+#undef islower
+#define islower(c) ISLOWER(c)
+#undef isprint
+#define isprint(c) ISPRINT(c)
+#undef isspace
+#define isspace(c) ISSPACE(c)
+#undef isupper
+#define isupper(c) ISUPPER(c)
+#undef isxdigit
+#define isxdigit(c) ISXDIGIT(c)
+#undef tolower
+#define tolower(c) TOLOWER(c)
+
+#endif  /* GCC_D_SYSTEM_H  */
diff --git a/gcc/d/dmd/MERGE b/gcc/d/dmd/MERGE
new file mode 100644
index 000..7727205bed4
--- /dev/null
+++ b/gcc/d/dmd/MERGE
@@ -0,0 +1,4 @@
+6243fa6d2ceab4615a9fe21c5bc9484e52bb2d1e
+
+The first line of this file holds the git revision number of the last
+merge done from the dlang/dmd repository.
diff --git a/gcc/d/dmd/access.c b/gcc/d/dmd/access.c
index 37e9c8681d3..cd60cedc2a5 100644
--- a/gcc/d/dmd/access.c
+++ b/gcc/d/dmd/access.c
@@ -7,10 +7,7 @@
  * https://github.com/D-Programming-Language/dmd/blob/master/src/access.c
  */
 
-#include 
-#include 
-#include 
-
+#include "root/dsystem.h"
 #include "root/root.h"
 #include "root/rmem.h"
 
diff --git a/gcc/d/dmd/aggregate.h b/gcc/d/dmd/aggregate.h
index d7db82b0f0e..cac0b8efd9f 100644
--- a/gcc/d/dmd/aggregate.h
+++ b/gcc/d/dmd/aggregate.h
@@ -191,7 +191,7 @@ public:
 void semantic(Scope *sc);
 void semanticTypeInfoMembers();
 Dsymbol *search(const Loc , Identifier *ident, int flags = SearchLocalsOnly);
-const char *kind();
+const char *kind() const;
 void finalizeSize();
 bool fit(Loc loc, Scope *sc, Expressions *elements, Type *stype);
 bool isPOD();
@@ -205,7 +205,7 @@ class UnionDeclaration : public StructDeclaration
 public:
 UnionDeclaration(Loc loc, Identifier *id);
 Dsymbol *syntaxCopy(Dsymbol *s);
-const char *kind();
+const char *kind() const;
 
 UnionDeclaration *isUnionDeclaration() { return this; }
 void accept(Visitor *v) { v->visit(this); }
@@ -306,7 +306,7 @@ public:
 virtual bool isCPPinterface() const;
 bool isAbstract();
 virtual int vtblOffset() const;
-const char *kind();

[PATCH v3][C][ADA] use function descriptors instead of trampolines in C

2018-11-04 Thread Uecker, Martin

Hi Joseph,

here is a new version of this patch which adds a warning
for targets which do not support -fno-trampolines  and
only runs the test case on architectures where this is
supported. It seems that documentation for this general
feature has improved in the meantime so I only mention
C as supported.


Best,
Martin

diff --git a/gcc/ChangeLog b/gcc/ChangeLog
index 5cf291da2d5..e75500c647a 100644
--- a/gcc/ChangeLog
+++ b/gcc/ChangeLog
@@ -1,3 +1,13 @@
+2018-11-03  Martin Uecker  
+
+   * common.opt (flag_trampolines): Change default.
+   * calls.c (prepare_call_address): Remove check for
+   flag_trampolines.  Decision is now made in FEs.
+   * tree-nested.c (convert_tramp_reference_op): Likewise.
+   * toplev.c (process_options): Add warning for -fno-trampolines on
+   unsupported targets.
+   * doc/invoke.texi (-fno-trampolines): Document support for C.
+
 2018-11-02  Aaron Sawdey  
 
    * config/rs6000/rs6000-string.c (expand_strncmp_gpr_sequence): Pay
diff --git a/gcc/ada/ChangeLog b/gcc/ada/ChangeLog
index 73666129f55..a7462edfc71 100644
--- a/gcc/ada/ChangeLog
+++ b/gcc/ada/ChangeLog
@@ -1,3 +1,8 @@
+2018-11-03  Martin Uecker  
+
+   * gcc-interface/trans.c (Attribute_to_gnu): Add check for
+   flag_trampolines.
+
 2018-10-22  Eric Botcazou  
 
    * gcc-interface/utils.c (unchecked_convert): Use local variables for
diff --git a/gcc/ada/gcc-interface/trans.c b/gcc/ada/gcc-interface/trans.c
index ce2d43f989e..b79f2373c63 100644
--- a/gcc/ada/gcc-interface/trans.c
+++ b/gcc/ada/gcc-interface/trans.c
@@ -1753,7 +1753,8 @@ Attribute_to_gnu (Node_Id gnat_node, tree 
*gnu_result_type_p, int attribute)
      if ((attribute == Attr_Access
       || attribute == Attr_Unrestricted_Access)
      && targetm.calls.custom_function_descriptors > 0
-     && Can_Use_Internal_Rep (Etype (gnat_node)))
+     && Can_Use_Internal_Rep (Etype (gnat_node))
+  && (flag_trampolines != 1))
    FUNC_ADDR_BY_DESCRIPTOR (gnu_expr) = 1;
 
      /* Otherwise, we need to check that we are not violating the
@@ -4330,7 +4331,8 @@ Call_to_gnu (Node_Id gnat_node, tree *gnu_result_type_p, 
tree gnu_target,
   /* If the access type doesn't require foreign-compatible representation,
     be prepared for descriptors.  */
   if (targetm.calls.custom_function_descriptors > 0
-     && Can_Use_Internal_Rep (Etype (Prefix (Name (gnat_node)
+     && Can_Use_Internal_Rep (Etype (Prefix (Name (gnat_node
+  && (flag_trampolines != 1))
    by_descriptor = true;
 }
   else if (Nkind (Name (gnat_node)) == N_Attribute_Reference)
diff --git a/gcc/c/ChangeLog b/gcc/c/ChangeLog
index 708ef5d7da2..62823ccf5c7 100644
--- a/gcc/c/ChangeLog
+++ b/gcc/c/ChangeLog
@@ -1,3 +1,10 @@
+2018-11-03  Martin Uecker  
+
+   * c-objc-common.h: Define LANG_HOOKS_CUSTOM_FUNCTION_DESCRIPTORS.
+   * c-typeck.c (function_to_pointer_conversion): If using descriptors
+   instead of trampolines, amend function address with
+   FUNC_ADDR_BY_DESCRIPTOR and calls with ALL_EXPR_BY_DESCRIPTOR.
+
 2018-10-29  David Malcolm  
 
    * c-decl.c (implicit_decl_warning): Update "is there a suggestion"
diff --git a/gcc/c/c-objc-common.h b/gcc/c/c-objc-common.h
index 78e768c2366..ef039560eb9 100644
--- a/gcc/c/c-objc-common.h
+++ b/gcc/c/c-objc-common.h
@@ -110,4 +110,7 @@ along with GCC; see the file COPYING3.  If not see
 
 #undef LANG_HOOKS_TREE_INLINING_VAR_MOD_TYPE_P
 #define LANG_HOOKS_TREE_INLINING_VAR_MOD_TYPE_P c_vla_unspec_p
+
+#undef LANG_HOOKS_CUSTOM_FUNCTION_DESCRIPTORS
+#define LANG_HOOKS_CUSTOM_FUNCTION_DESCRIPTORS true
 #endif /* GCC_C_OBJC_COMMON */
diff --git a/gcc/c/c-typeck.c b/gcc/c/c-typeck.c
index 9d09b8d65fd..afae9de41e7 100644
--- a/gcc/c/c-typeck.c
+++ b/gcc/c/c-typeck.c
@@ -1912,7 +1912,13 @@ function_to_pointer_conversion (location_t loc, tree exp)
   if (TREE_NO_WARNING (orig_exp))
 TREE_NO_WARNING (exp) = 1;
 
-  return build_unary_op (loc, ADDR_EXPR, exp, false);
+  tree r = build_unary_op (loc, ADDR_EXPR, exp, false);
+
+  if ((TREE_CODE(r) == ADDR_EXPR)
+  && (flag_trampolines == 0))
+ FUNC_ADDR_BY_DESCRIPTOR (r) = 1;
+
+  return r;
 }
 
 /* Mark EXP as read, not just set, for set but not used -Wunused
@@ -3134,6 +3140,11 @@ build_function_call_vec (location_t loc, vec 
arg_loc,
   else
 result = build_call_array_loc (loc, TREE_TYPE (fntype),
       function, nargs, argarray);
+
+  if ((TREE_CODE (result) == CALL_EXPR)
+  && (flag_trampolines == 0))
+CALL_EXPR_BY_DESCRIPTOR (result) = 1;
+
   /* If -Wnonnull warning has been diagnosed, avoid diagnosing it again
  later.  */
   if (warned_p && TREE_CODE (result) == CALL_EXPR)
diff --git a/gcc/calls.c b/gcc/calls.c
index 8978d3b42fd..95ab7d8405b 100644
--- a/gcc/calls.c
+++ b/gcc/calls.c
@@ -230,7 +230,7 @@ prepare_call_address (tree fndecl_or_type, rtx 

Re: [PATCH] i386: Remove duplicated AVX2/AVX512 vec_dup patterns

2018-11-04 Thread H.J. Lu
On Sun, Nov 4, 2018 at 11:45 AM Uros Bizjak  wrote:
>
> On Sun, Nov 4, 2018 at 8:17 PM H.J. Lu  wrote:
> >
> > On Sun, Nov 4, 2018 at 8:41 AM Uros Bizjak  wrote:
> > >
> > > On Fri, Nov 2, 2018 at 6:25 PM H.J. Lu  wrote:
> > > >
> > > > Remove duplicated AVX2/AVX512 vec_dup patterns and replace them with
> > > > subreg.  gcc.target/i386/avx2-vbroadcastss_ps256-1.c is changed by
> > > >
> > > >  avx2_test:
> > > > .cfi_startproc
> > > > -   vmovaps x(%rip), %xmm1
> > > > -   vbroadcastss%xmm1, %ymm0
> > > > +   vbroadcastssx(%rip), %ymm0
> > > > vmovaps %ymm0, y(%rip)
> > > > vzeroupper
> > > > ret
> > > > .cfi_endproc
> > > >
> > > > gcc.target/i386/avx512vl-vbroadcast-3.c is changed by
> > > >
> > > > @@ -113,7 +113,7 @@ f10:
> > > > .cfi_startproc
> > > > vmovaps %ymm0, %ymm16
> > > > vpermilps   $85, %ymm16, %ymm16
> > > > -   vbroadcastss%xmm16, %ymm16
> > > > +   vshuff32x4  $0x0, %ymm16, %ymm16, %ymm16
> > > > vzeroupper
> > > > ret
> > > > .cfi_endproc
> > > > @@ -153,8 +153,7 @@ f12:
> > > >  f13:
> > > >  .LFB12:
> > > > .cfi_startproc
> > > > -   vmovaps (%rdi), %ymm16
> > > > -   vbroadcastss%xmm16, %ymm16
> > > > +   vbroadcastss(%rdi), %ymm16
> > > > vzeroupper
> > > > ret
> > > > .cfi_endproc
> > >
> > > Actually, we can achieve the same with pre-reload splitters. Please
> > > see the attached patch for a couple of examples and a fix for
> > > vbroadcastss that accesses the memory in wrong mode.
> > >
> >
> > My patch removes a bunch of duplicated patterns from sse.md.  But
> > yours adds a couple more patterns.   Isn't fewer patterns preferred?
>
> Playing SUBREG games before reload does not look safe to me. We would

There are plenty of SUBREG usage in i386 backend before preload.  It is
perfectly safe to do so as long as we don't create SUBREG with a different
register class from the base.  Do you have a testcase to show my SUBREG
usage is unsafe?

> like to create a simpler instruction out of the combination of vector
> load and broadcast, so I think that combine+split is the right tool
> for this simplification.

Adding new patterns doesn't simplify the issue.

> BTW: Half of my proposed patch is a fix to a avx2_pbroadcast{_1}
> pattern, which models wrong access to memory.
>

I will take look at avx2_pbroadcast{_1}.


-- 
H.J.


Re: [PATCH] i386: Remove duplicated AVX2/AVX512 vec_dup patterns

2018-11-04 Thread Uros Bizjak
On Sun, Nov 4, 2018 at 8:17 PM H.J. Lu  wrote:
>
> On Sun, Nov 4, 2018 at 8:41 AM Uros Bizjak  wrote:
> >
> > On Fri, Nov 2, 2018 at 6:25 PM H.J. Lu  wrote:
> > >
> > > Remove duplicated AVX2/AVX512 vec_dup patterns and replace them with
> > > subreg.  gcc.target/i386/avx2-vbroadcastss_ps256-1.c is changed by
> > >
> > >  avx2_test:
> > > .cfi_startproc
> > > -   vmovaps x(%rip), %xmm1
> > > -   vbroadcastss%xmm1, %ymm0
> > > +   vbroadcastssx(%rip), %ymm0
> > > vmovaps %ymm0, y(%rip)
> > > vzeroupper
> > > ret
> > > .cfi_endproc
> > >
> > > gcc.target/i386/avx512vl-vbroadcast-3.c is changed by
> > >
> > > @@ -113,7 +113,7 @@ f10:
> > > .cfi_startproc
> > > vmovaps %ymm0, %ymm16
> > > vpermilps   $85, %ymm16, %ymm16
> > > -   vbroadcastss%xmm16, %ymm16
> > > +   vshuff32x4  $0x0, %ymm16, %ymm16, %ymm16
> > > vzeroupper
> > > ret
> > > .cfi_endproc
> > > @@ -153,8 +153,7 @@ f12:
> > >  f13:
> > >  .LFB12:
> > > .cfi_startproc
> > > -   vmovaps (%rdi), %ymm16
> > > -   vbroadcastss%xmm16, %ymm16
> > > +   vbroadcastss(%rdi), %ymm16
> > > vzeroupper
> > > ret
> > > .cfi_endproc
> >
> > Actually, we can achieve the same with pre-reload splitters. Please
> > see the attached patch for a couple of examples and a fix for
> > vbroadcastss that accesses the memory in wrong mode.
> >
>
> My patch removes a bunch of duplicated patterns from sse.md.  But
> yours adds a couple more patterns.   Isn't fewer patterns preferred?

Playing SUBREG games before reload does not look safe to me. We would
like to create a simpler instruction out of the combination of vector
load and broadcast, so I think that combine+split is the right tool
for this simplification.

BTW: Half of my proposed patch is a fix to a avx2_pbroadcast{_1}
pattern, which models wrong access to memory.

Uros.


Re: [PATCH, testsuite] add "inf" target attribute

2018-11-04 Thread Jeff Law
On 11/1/18 1:30 PM, Paul Koning wrote:
> A number of test cases fail on pdp11 because they use the "inf" float value 
> which does not exist on that target (nor on VAX).  Rainer Orth and Joseph 
> Myers suggested adding a new effective-target keyword to check for this, and 
> require it for tests that have that dependency.
> 
> The attached patch implements this.  Ok for trunk?
> 
>   paul
> 
> ChangeLog:
> 
> 2018-11-01  Paul Koning  
> 
>   * doc/sourcebuild.texi (target attributes): Document new "inf"
>   effective target keyword.
OK with me.

jeff


Re: PR83750: CSE erf/erfc pair

2018-11-04 Thread Jeff Law
On 11/2/18 3:36 AM, Prathamesh Kulkarni wrote:
> Hi,
> This patch adds two transforms to match.pd to CSE erf/erfc pair.
> erfc(x) is canonicalized to 1 - erf(x) and is then reversed to 1 -
> erf(x) when canonicalization is disabled and result of erf(x) has
> single use within 1 - erf(x).
> 
> The patch regressed builtin-nonneg-1.c. The following test-case
> reproduces the issue with patch:
> 
> void test(double d1) {
>   if (signbit(erfc(d1)))
> link_failure_erfc();
> }
> 
> ssa dump:
> 
>:
>   _5 = __builtin_erf (d1_4(D));
>   _1 = 1.0e+0 - _5;
>   _6 = _1 < 0.0;
>   _2 = (int) _6;
>   if (_2 != 0)
> goto ; [INV]
>   else
> goto ; [INV]
> 
>:
>   link_failure_erfc ();
> 
>:
>   return;
> 
> As can be seen, erfc(d1) is folded to 1 - erf(d1).
> forwprop then transforms the if condition from _2 != 0
> to _5 > 1.0e+0 and that defeats DCE thus resulting in link failure
> in undefined reference to link_failure_erfc().
> 
> So, the patch adds another transform erf(x) > 1 -> 0
> which resolves the regression.
> 
> Bootstrapped+tested on x86_64-unknown-linux-gnu.
> Cross-testing on arm and aarch64 variants in progress.
> OK for trunk if passes ?
> 
> Thanks,
> Prathamesh
> 
> 
> pr83750-4.txt
> 
> 2018-11-02  Prathamesh Kulkarni  
> 
>   * match.pd (erfc(x) -> 1 - erf(x)): New pattern.
>   (1 - erf(x) -> erfc(x)): Likewise.
>   (erf(x) > 1 -> 0): Likewise.
> 
> testsuite/
>   * gcc.dg/tree-ssa/pr83750-1.c: New test
>   * gcc.dg/tree-ssa/pr83750-2.c: Likewise.
Don't we have a flag specific to honoring nans?  Would that be better to
use than flag_unsafe_math_optimizations?  As Uli mentioned, there's
other cases (where ABS (const) >= 1.0.).

jeff


Re: [PATCH v2, middle end]: Fix PR58372, internal compiler error: ix86_compute_frame_layout

2018-11-04 Thread Uros Bizjak
On Sun, Nov 4, 2018 at 6:59 PM Jeff Law  wrote:
>
> On 11/1/18 10:18 AM, Uros Bizjak wrote:
> > Hello!
> >
> > v2 of the patch hits the real problem: in pass_expand::execute
> > finish_eh_generation is called after expand_stack_alignment is called.
> > Construction of SjLj landing pads calls emit_library_call, which can
> > change crtl->preferred_stack_boundary value after all dependant
> > variables are already calculated by expand_stack_alignment.
> >
> > The solution is to move the call to finish_eh_generation in front of
> > the call to expand_stack_alignment.
> >
> > 2018-11-01  Uros Bizjak  
> >
> > PR middle-end/58372
> > * cfgexpand.c (pass_expand::execute): Move the call to
> > finish_eh_generation in front of the call to expand_stack_alignment.
> >
> > testsuite/ChangeLog:
> >
> > 2018-11-01  Uros Bizjak  
> >
> > PR middle-end/58372
> > * g++.target/i386/pr58372.C: New test.
> >
> > Patch was bootstrapped and regression tested on x86_64-linux-gnu
> > {,-m32}, all default languages plus go. Additionally, the testcase
> > from PR (and a couple of similar ones) were compiled for
> > i686-w64-mingw32 target with various combinations of
> > -mpreferred-stack-boundary= -mincoming-stack-boundary= -mforce-drap
> > and -m{no-}accumulate-outgoing-args.
> >
> > OK for mainline and release branches?
> >
> > Uros.
> >
> OK, but please add a comment indicating why the new sequencing is needed
> in the code.

Thanks, committed with the following comment:

  /* Call expand_stack_alignment after finishing all
 updates to crtl->preferred_stack_boundary.  */
  expand_stack_alignment ();

I'll backport the patch to release branches after a week without
problems in the mainline.

Uros.


Re: [PATCH] Fix setting of hotness in non-LTO mode (PR gcov-profile/77698).

2018-11-04 Thread Jeff Law
On 10/9/18 6:37 AM, Martin Liška wrote:
> Hi.
> 
> In non-LTO mode, we should not set hotness according to computed histogram
> in ipa-profile. Following patch does that and fixes the test-case isolated
> from PR.
> 
> Patch survives regression tests on x86_64-linux-gnu.
> Ready for trunk?
> Thanks,
> Martin
> 
> gcc/ChangeLog:
> 
> 2018-10-09  Martin Liska  
> 
>   PR gcov-profile/77698
>   * ipa-profile.c (ipa_profile): Adjust hotness threshold
>   only in LTO mode.
> 
> gcc/testsuite/ChangeLog:
> 
> 2018-10-09  Martin Liska  
> 
>   PR gcov-profile/77698
>   * gcc.dg/tree-prof/pr77698.c: New test.
OK
jeff


Re: [PATCH] i386: Remove duplicated AVX2/AVX512 vec_dup patterns

2018-11-04 Thread H.J. Lu
On Sun, Nov 4, 2018 at 8:41 AM Uros Bizjak  wrote:
>
> On Fri, Nov 2, 2018 at 6:25 PM H.J. Lu  wrote:
> >
> > Remove duplicated AVX2/AVX512 vec_dup patterns and replace them with
> > subreg.  gcc.target/i386/avx2-vbroadcastss_ps256-1.c is changed by
> >
> >  avx2_test:
> > .cfi_startproc
> > -   vmovaps x(%rip), %xmm1
> > -   vbroadcastss%xmm1, %ymm0
> > +   vbroadcastssx(%rip), %ymm0
> > vmovaps %ymm0, y(%rip)
> > vzeroupper
> > ret
> > .cfi_endproc
> >
> > gcc.target/i386/avx512vl-vbroadcast-3.c is changed by
> >
> > @@ -113,7 +113,7 @@ f10:
> > .cfi_startproc
> > vmovaps %ymm0, %ymm16
> > vpermilps   $85, %ymm16, %ymm16
> > -   vbroadcastss%xmm16, %ymm16
> > +   vshuff32x4  $0x0, %ymm16, %ymm16, %ymm16
> > vzeroupper
> > ret
> > .cfi_endproc
> > @@ -153,8 +153,7 @@ f12:
> >  f13:
> >  .LFB12:
> > .cfi_startproc
> > -   vmovaps (%rdi), %ymm16
> > -   vbroadcastss%xmm16, %ymm16
> > +   vbroadcastss(%rdi), %ymm16
> > vzeroupper
> > ret
> > .cfi_endproc
>
> Actually, we can achieve the same with pre-reload splitters. Please
> see the attached patch for a couple of examples and a fix for
> vbroadcastss that accesses the memory in wrong mode.
>

My patch removes a bunch of duplicated patterns from sse.md.  But
yours adds a couple more patterns.   Isn't fewer patterns preferred?

-- 
H.J.


Re: Fix D compilation on Solaris

2018-11-04 Thread Iain Buclaw
On Sun, 4 Nov 2018 at 17:50, Rainer Orth  wrote:
>
> Hi Iain,
>
> > On Sat, 3 Nov 2018 at 23:23, Iain Buclaw  wrote:
> >>
> >> On Wed, 31 Oct 2018 at 10:40, Rainer Orth  
> >> wrote:
> >> >
> >> > Hi Iain,
> >> >
> >> > > My first suspect here would be 'struct UnionExp', see 
> >> > > d/dmd/expression.h
> >> > >
> >> > > Upstream dmd use a poor man's alignment, from what I recall to be
> >> > > compatible with the dmc compiler.
> >> > >
> >> > > // Ensure that the union is suitably aligned.
> >> > > real_t for_alignment_only;
> >> > >
> >> > > What happens if you were to replace that with marking the type as
> >> > > __attribute__ ((aligned (8))) ?
> >> >
> >> > thanks for the suggestion: this worked just fine.  After a couple more
> >> > libphobos adjustments (described below), I was able to finish the build
> >> > on both sparc-sun-solaris2.11 and i386-pc-solaris2.11.
> >> >
> >> > The link tests still all fail as before, but sparc and x86 are now on
> >> > par here :-)
> >> >
> >>
> >> Hi Rainer,
> >>
> >> On making the relevant change to dmd, this header probably should
> >> remain compatible with dmc++, which unfortunately doesn't implement
> >> any __attribute__ extensions.  Does s/real_t/long double/ also prevent
> >> the alignment error from occurring?
> >>
> >
> > Actually, turns out I'm wrong and was grepping for the wrong name.
> >
> > It is supported in the form of #pragma pack(8)
> >
> > https://www.digitalmars.com/ctg/pragmas.html#pack
>
> I tried wrapping union u in #pragma pack(8)/#pragma pack().
> Unfortunately, this doesn't seem to work as I got the same SIGBUS errors
> during a sparc-sun-solaris2.11 bootstrap.  Seems we have to stay with
> the long double version instead.
>

That's alright.  I meant that #pragma pack is for DMC compatibility.

I've the changes have been made upstream:

https://github.com/dlang/dmd/pull/8907
https://github.com/dlang/dmd/pull/8914  (Backport to C++)


> I'm including the current patch here.
>
> Rainer
>
> --
> -
> Rainer Orth, Center for Biotechnology, Bielefeld University
>
>
> 2018-10-29  Iain Buclaw  
>
> gcc/d:
> * dmd/expression.h (UnionExp.u): Change for_alignment_only to long
> double.
>
> 2018-10-29  Rainer Orth  
>
> gcc:
> * config/default-d.c: Include memmodel.h.
>
> * config/sol2-d.c: New file.
> * config/t-sol2 (sol2-d.o): New rule.
> * config.gcc <*-*-solaris2*>: Set d_target_objs,
> target_has_targetdm.
>
> libphobos:
> * libdruntime/core/stdc/fenv.d [SPARC, SPARC64]: Set SPARC_Any.
> [X86, X86_64]: Set X86_Any.
> [Solaris]: Provide FE_* constants.
> * libdruntime/core/sys/posix/aio.d [Solaris] (struct aio_result,
> struct aiocb): New types.
> * libdruntime/core/sys/posix/ucontext.d [SPARC64, SPARC] (_NGREG,
> greg_t): Define.
> [SPARC64, SPARC] (struct _fpq, struct fq, struct fpregset_t): New
> types.
> * libdruntime/core/thread.d (Class Thread) [Solaris]
> (m_isRTClass): Don't declare immutable.
> * libdruntime/rt/sections_solaris.d (SectionGroup.moduleGroup):
> Declare nothrow @nogc.
> (pinLoadedLibraries, unpinLoadedLibraries, inheritLoadedLibraries)
> (cleanupLoadedLibraries): New functions.
> * src/std/datetime/systime.d (class Clock) [Solaris]
> (clock_gettime): Import.
> * src/std/math.d [SPARC, SPARC64]: Set SPARC_Any.
> (struct FloatingPointControl): Use SPARC_Any for ExceptionMask,
> ControlState.
>

The gcc changes look OK.  I will commit the front-end and library
changes though in a merge later today, as I've been sending them
upstream first.

-- 
Iain


Re: [PATCH libquadmath/PR68686]

2018-11-04 Thread Ed Smith-Rowland

On 11/3/18 10:09 PM, Jeff Law wrote:

On 10/23/18 7:45 PM, Ed Smith-Rowland wrote:

Greetings,

This is an almost trivial patch to get the correct sign for tgammaq.

I don't have a testcase as I don't know where to put one.

OK?

Ed Smith-Rowland



tgammaq.CL

2018-10-24  Edward Smith-Rowland  <3dw...@verizon.net>

PR libquadmath/68686
* math/tgammaq.c: Correct sign for negative argument.

I don't have the relevant background to evaluate this for correctness.
Can you refer back to any kind of documentation which indicates what the
sign of the return value ought to be?

Alternately, if you can point to the relevant code in glibc that handles
the resultant sign, that'd be useful too.

Note that Joseph's follow-up doesn't touch on the gamma problem AFAICT,
but instead touches on the larger issues around trying to keep the
quadmath implementations between glibc and gcc more in sync.

Jeff


I've looked at glibc lgamma, in particular signgam and I think those DTRT:


I'm pretty sure the lgamma that write to global signgam and the 
lgamma_r(x, int *signgam) DTRT.


The various __lgamma_neg* DTRT:

__lgamma_negX (REALTYPE x, int *signgamp)
{
  /* Determine the half-integer region X lies in, handle exact
 integers and determine the sign of the result.  */
  int i = __floorl (-2 * x);
  if ((i & 1) == 0 && i == -2 * x)
    return 1.0L / 0.0L;
  long double xn = ((i & 1) == 0 ? -i / 2 : (-i - 1) / 2);
  i -= 4;
  *signgamp = ((i & 2) == 0 ? -1 : 1);
...

I think the various e_lgammaX_r.c are good too:

  if (se & 0x8000)
    {
  if (x < -2.0L && x > -33.0L)
    return __lgamma_negl (x, signgamp);
  t = sin_pi (x);
  if (t == zero)
    return one / fabsl (t);    /* -integer */
  nadj = __ieee754_logl (pi / fabsl (t * x));
  if (t < zero)
    *signgamp = -1;
  x = -x;
    }

I *do* think a couple tests should be added to test-signgam-*.c to test 
alternation of signs:

  signgam = 123;                        \
  c = FUNC (b);                        \
  if (signgam == 1)                        \

    puts ("PASS: " #FUNC " (-1.5) setting signgam");    \

  else                            \
    {                            \
      puts ("FAIL: " #FUNC " (-1.5) setting signgam");    \
      result = 1;                        \
    }                            \

Add to test lgamma_negX code paths...
  signgam = 123;                        \
  c = FUNC (b);                        \
  if (signgam == -1)                    \
    puts ("PASS: " #FUNC " (-34.5) setting signgam");    \
  else                            \
    {                            \
      puts ("FAIL: " #FUNC " (-34.5) setting signgam"); \
      result = 1;                        \
    }                            \
  signgam = 123;                        \
  c = FUNC (b);                        \
  if (signgam == 1)                        \
    puts ("PASS: " #FUNC " (-35.5) setting signgam");    \
  else                            \
    {                            \
      puts ("FAIL: " #FUNC " (-35.5) setting signgam"); \
      result = 1;                        \
    }                            \


I've not dealt with glibc directly.  Do I need separate Copyright and 
all that?  Is it similar to gcc in terms of devel?





Re: gOlogy: fix debug binds in auto-inc-dec

2018-11-04 Thread Jeff Law
On 10/21/18 2:07 AM, Alexandre Oliva wrote:
> As auto_inc_dec pass combines incs and mems from different insns, it
> often causes regs to temporarily hold a value different from the one
> it would before the transformation.  Debug insns within that range
> would therefore end up binding to the wrong expression after the
> transformation.
> 
> This patch adjusts debug binds in the affected range.
> 
> Regstrapped on x86_64-, i686-, ppc64-, ppc64el-, and aarch64-linux-gnu.
> Ok to install?
> 
> for  gcc/ChangeLog
> 
>   * auto-inc-dec.c: Include valtrack.h.  Improve comments.
>   (reg_next_debug_use): New.
>   (attempt_change): Propagate adjusted expression into affected
>   debug insns.
>   (merge_in_block): Track uses in debug insns.
>   (pass_inc_dec::execute): Allocate and release
>   reg_next_debug_use.
OK.

> ---
>  gcc/auto-inc-dec.c |  128 
> +++-
>  1 file changed, 125 insertions(+), 3 deletions(-)
> 
> diff --git a/gcc/auto-inc-dec.c b/gcc/auto-inc-dec.c
> index e6dc1c30d716..064b8afd4ff9 100644
> --- a/gcc/auto-inc-dec.c
> +++ b/gcc/auto-inc-dec.c
> @@ -509,27 +529,83 @@ attempt_change (rtx new_addr, rtx inc_reg)
>gcc_assert (mov_insn);
>emit_insn_before (mov_insn, inc_insn.insn);
>regno = REGNO (inc_insn.reg0);
> +  /* ??? Could REGNO possibly be used in MEM_INSN other than in
> +  the MEM address, and still die there, so that move_dead_notes
> +  would incorrectly move the note?  */
Perhaps some kind of weird parallel where there's a memory operation of
some sort and some unrelated ALU where REGNO is a source operand in the
ALU and dies?

I'm not immediately aware of any such insn on any target, but I'm far
from an expert in all the odd things ISAs do :-)



Jeff


Re: [PATCH] Fix PR 87672

2018-11-04 Thread Jeff Law
On 10/22/18 8:59 AM, Bernd Edlinger wrote:
> Hi!
> 
> This fixes an ICE which was exposed by a previous patch of mine,
> and a wrong transformation from strcat_chk => strcpy_chk,
> which fails to adjust the object size, thus allowing too much
> memory to be accessed.
> 
> 
> Bootstrapped and reg-tested on x86_64-pc-linux-gnu.
> Is it OK for trunk?
> 
> 
> Thanks
> Bernd.
> 
> 
> patch-pr87672.diff
> 
> gcc:
> 2018-10-22  Bernd Edlinger  
> 
>   PR tree-optimization/87672
>   * gimple-fold.c (gimple_fold_builtin_stxcpy_chk): Gimplify.
>   * tree-ssa-strlen.c (handle_builtin_strcat): Adjust object size.
> 
> testsuite:
> 2018-08-26  Bernd Edlinger  
> 
>   PR tree-optimization/87672
>   * gcc.dg/pr87672.c: New test.
OK
jeff


Re: [PATCH] Fix PR 86572

2018-11-04 Thread Jeff Law
On 10/22/18 9:08 AM, Bernd Edlinger wrote:
> Hi!
> 
> This makes c_strlen avoid an unsafe strlen folding of const arguments
> with non-const offset.  Currently a negative out of bounds offset
> makes the strlen function return an extremely large number, and
> at the same time, prevents the VRP machinery, to determine the correct
> range if the strlen function in this case.
> 
> Fixed by doing the whole computation in size_t and casting the
> result back to ssize_t.
> 
> 
> Bootstrapped and reg-tested on x86_64-pc-linux-gnu.
> Is it OK for trunk?
> 
> 
> Thanks
> Bernd.
> 
> 
> patch-pr86572.diff
> 
> gcc:
> 2018-10-22  Bernd Edlinger  
> 
>   PR tree-optimization/86572
>   * builtins.c (c_strlen): Handle negative offsets in a safe way.
> 
> testsuite:
> 2018-10-22  Bernd Edlinger  
> 
>   PR tree-optimization/86572
>   * gcc.dg/pr86572.c: New test.
OK.
jeff


Re: [PATCH v2, middle end]: Fix PR58372, internal compiler error: ix86_compute_frame_layout

2018-11-04 Thread Jeff Law
On 11/1/18 10:18 AM, Uros Bizjak wrote:
> Hello!
> 
> v2 of the patch hits the real problem: in pass_expand::execute
> finish_eh_generation is called after expand_stack_alignment is called.
> Construction of SjLj landing pads calls emit_library_call, which can
> change crtl->preferred_stack_boundary value after all dependant
> variables are already calculated by expand_stack_alignment.
> 
> The solution is to move the call to finish_eh_generation in front of
> the call to expand_stack_alignment.
> 
> 2018-11-01  Uros Bizjak  
> 
> PR middle-end/58372
> * cfgexpand.c (pass_expand::execute): Move the call to
> finish_eh_generation in front of the call to expand_stack_alignment.
> 
> testsuite/ChangeLog:
> 
> 2018-11-01  Uros Bizjak  
> 
> PR middle-end/58372
> * g++.target/i386/pr58372.C: New test.
> 
> Patch was bootstrapped and regression tested on x86_64-linux-gnu
> {,-m32}, all default languages plus go. Additionally, the testcase
> from PR (and a couple of similar ones) were compiled for
> i686-w64-mingw32 target with various combinations of
> -mpreferred-stack-boundary= -mincoming-stack-boundary= -mforce-drap
> and -m{no-}accumulate-outgoing-args.
> 
> OK for mainline and release branches?
> 
> Uros.
> 
OK, but please add a comment indicating why the new sequencing is needed
in the code.

jeff


Re: [PR87815]Don't generate shift sequence for load replacement in DSE when the mode size is not compile-time constant

2018-11-04 Thread Jeff Law
On 10/31/18 5:13 AM, Renlin Li wrote:
> Hi all,
> 
> The patch adds a check if the gap is compile-time constant.
> 
> This happens when dse decides to replace the load with previous store
> value.
> The problem is that, shift sequence could not accept compile-time
> non-constant
> mode operand.
> 
> Another issue raised from this issue is the inefficient code-generation for
> general data manipulation over mask/predicate register.
> In sve, some general data processing instructions don't apply on predicate
> registers directly. In the worst(this) case, memory load/store is
> generated to reload
> the value into a general purpose register for further data processing.
> We need to improve that.
> 
> aarch64 sve test Okay, Okay to commit?
> 
> Regards,
> Renlin
> 
> gcc/ChangeLog:
> 
> 2018-10-31  Renlin Li  
> 
> PR target/87815
> * dse.c (get_stored_val): Add check for compile-time
>   constantness of gap.
> 
> gcc/testsuite/ChangeLog:
> 
> 2018-10-31  Renlin Li  
> 
> PR target/87815
> * gcc.target/aarch64/sve/pr87815.c: New.
OK
jeff


Re: Fix D compilation on Solaris

2018-11-04 Thread Rainer Orth
Hi Iain,

> On Sat, 3 Nov 2018 at 23:23, Iain Buclaw  wrote:
>>
>> On Wed, 31 Oct 2018 at 10:40, Rainer Orth  
>> wrote:
>> >
>> > Hi Iain,
>> >
>> > > My first suspect here would be 'struct UnionExp', see d/dmd/expression.h
>> > >
>> > > Upstream dmd use a poor man's alignment, from what I recall to be
>> > > compatible with the dmc compiler.
>> > >
>> > > // Ensure that the union is suitably aligned.
>> > > real_t for_alignment_only;
>> > >
>> > > What happens if you were to replace that with marking the type as
>> > > __attribute__ ((aligned (8))) ?
>> >
>> > thanks for the suggestion: this worked just fine.  After a couple more
>> > libphobos adjustments (described below), I was able to finish the build
>> > on both sparc-sun-solaris2.11 and i386-pc-solaris2.11.
>> >
>> > The link tests still all fail as before, but sparc and x86 are now on
>> > par here :-)
>> >
>>
>> Hi Rainer,
>>
>> On making the relevant change to dmd, this header probably should
>> remain compatible with dmc++, which unfortunately doesn't implement
>> any __attribute__ extensions.  Does s/real_t/long double/ also prevent
>> the alignment error from occurring?
>>
>
> Actually, turns out I'm wrong and was grepping for the wrong name.
>
> It is supported in the form of #pragma pack(8)
>
> https://www.digitalmars.com/ctg/pragmas.html#pack

I tried wrapping union u in #pragma pack(8)/#pragma pack().
Unfortunately, this doesn't seem to work as I got the same SIGBUS errors
during a sparc-sun-solaris2.11 bootstrap.  Seems we have to stay with
the long double version instead.

I'm including the current patch here.

Rainer

-- 
-
Rainer Orth, Center for Biotechnology, Bielefeld University


2018-10-29  Iain Buclaw  

gcc/d:
* dmd/expression.h (UnionExp.u): Change for_alignment_only to long
double.

2018-10-29  Rainer Orth  

gcc:
* config/default-d.c: Include memmodel.h.

* config/sol2-d.c: New file.
* config/t-sol2 (sol2-d.o): New rule.
* config.gcc <*-*-solaris2*>: Set d_target_objs,
target_has_targetdm.

libphobos:
* libdruntime/core/stdc/fenv.d [SPARC, SPARC64]: Set SPARC_Any.
[X86, X86_64]: Set X86_Any.
[Solaris]: Provide FE_* constants.
* libdruntime/core/sys/posix/aio.d [Solaris] (struct aio_result,
struct aiocb): New types.
* libdruntime/core/sys/posix/ucontext.d [SPARC64, SPARC] (_NGREG,
greg_t): Define.
[SPARC64, SPARC] (struct _fpq, struct fq, struct fpregset_t): New
types.
* libdruntime/core/thread.d (Class Thread) [Solaris]
(m_isRTClass): Don't declare immutable.
* libdruntime/rt/sections_solaris.d (SectionGroup.moduleGroup):
Declare nothrow @nogc.
(pinLoadedLibraries, unpinLoadedLibraries, inheritLoadedLibraries)
(cleanupLoadedLibraries): New functions.
* src/std/datetime/systime.d (class Clock) [Solaris]
(clock_gettime): Import.
* src/std/math.d [SPARC, SPARC64]: Set SPARC_Any.
(struct FloatingPointControl): Use SPARC_Any for ExceptionMask,
ControlState.

# HG changeset patch
# Parent  0f6ccc9cfd024f705876c70a6403268ea9dbf0a2
Fix D compilation on Solaris

diff --git a/gcc/config.gcc b/gcc/config.gcc
--- a/gcc/config.gcc
+++ b/gcc/config.gcc
@@ -918,6 +918,7 @@ case ${target} in
   target_gtfiles="$target_gtfiles \$(srcdir)/config/sol2.c"
   c_target_objs="${c_target_objs} sol2-c.o"
   cxx_target_objs="${cxx_target_objs} sol2-c.o sol2-cxx.o"
+  d_target_objs="${d_target_objs} sol2-d.o"
   extra_objs="${extra_objs} sol2.o sol2-stubs.o"
   extra_options="${extra_options} sol2.opt"
   case ${enable_threads}:${have_pthread_h}:${have_thread_h} in
@@ -925,6 +926,7 @@ case ${target} in
   thread_file=posix
   ;;
   esac
+  target_has_targetdm=yes
   ;;
 *-*-*vms*)
   extra_options="${extra_options} vms/vms.opt"
diff --git a/gcc/config/default-d.c b/gcc/config/default-d.c
--- a/gcc/config/default-d.c
+++ b/gcc/config/default-d.c
@@ -18,6 +18,7 @@ along with GCC; see the file COPYING3.  
 #include "config.h"
 #include "system.h"
 #include "coretypes.h"
+#include "memmodel.h"
 #include "tm_d.h"
 #include "d/d-target.h"
 #include "d/d-target-def.h"
diff --git a/gcc/config/sol2-d.c b/gcc/config/sol2-d.c
new file mode 100644
--- /dev/null
+++ b/gcc/config/sol2-d.c
@@ -0,0 +1,51 @@
+/* Solaris support needed only by D front-end.
+   Copyright (C) 2018 Free Software Foundation, Inc.
+
+GCC is free software; you can redistribute it and/or modify it under
+the terms of the GNU General Public License as published by the Free
+Software Foundation; either version 3, or (at your option) any later
+version.
+
+GCC is distributed in the hope that it will be useful, but WITHOUT ANY
+WARRANTY; without even the implied warranty of MERCHANTABILITY or
+FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General 

Re: Fix D compilation on Solaris

2018-11-04 Thread Rainer Orth
Hi Iain,

> On Wed, 31 Oct 2018 at 10:40, Rainer Orth  
> wrote:
>>
>> Hi Iain,
>>
>> > My first suspect here would be 'struct UnionExp', see d/dmd/expression.h
>> >
>> > Upstream dmd use a poor man's alignment, from what I recall to be
>> > compatible with the dmc compiler.
>> >
>> > // Ensure that the union is suitably aligned.
>> > real_t for_alignment_only;
>> >
>> > What happens if you were to replace that with marking the type as
>> > __attribute__ ((aligned (8))) ?
>>
>> thanks for the suggestion: this worked just fine.  After a couple more
>> libphobos adjustments (described below), I was able to finish the build
>> on both sparc-sun-solaris2.11 and i386-pc-solaris2.11.
>>
>> The link tests still all fail as before, but sparc and x86 are now on
>> par here :-)
>>
>
> Hi Rainer,
>
> On making the relevant change to dmd, this header probably should
> remain compatible with dmc++, which unfortunately doesn't implement
> any __attribute__ extensions.  Does s/real_t/long double/ also prevent
> the alignment error from occurring?

it does indeed, as checked by a sparc-sun-solaris2.11 bootstrap.

Thanks.
Rainer

-- 
-
Rainer Orth, Center for Biotechnology, Bielefeld University


Re: Fix D compilation on Solaris

2018-11-04 Thread Rainer Orth
Hi Iain,

> On Wed, 31 Oct 2018 at 10:40, Rainer Orth  
> wrote:
>>
>> Hi Iain,
>>
>> > My first suspect here would be 'struct UnionExp', see d/dmd/expression.h
>> >
>> > Upstream dmd use a poor man's alignment, from what I recall to be
>> > compatible with the dmc compiler.
>> >
>> > // Ensure that the union is suitably aligned.
>> > real_t for_alignment_only;
>> >
>> > What happens if you were to replace that with marking the type as
>> > __attribute__ ((aligned (8))) ?
>>
>> thanks for the suggestion: this worked just fine.  After a couple more
>> libphobos adjustments (described below), I was able to finish the build
>> on both sparc-sun-solaris2.11 and i386-pc-solaris2.11.
>>
>> The link tests still all fail as before, but sparc and x86 are now on
>> par here :-)
>>
>> Here are the new issues I saw while completing the sparc build:
>>
>> * math.d has two problems on 32-bit sparc:
>>
>> /vol/gcc/src/hg/trunk/local/libphobos/src/std/math.d:5278:18: error:
>> undefined identifier 'ControlState'
>>  5278 | ControlState savedState;
>>   |  ^
>> /vol/gcc/src/hg/trunk/local/libphobos/src/std/math.d:5325:25: error:
>> undefined identifier 'ControlState'
>>  5325 | static ControlState getControlState() @trusted nothrow @nogc
>>   | ^
>> /vol/gcc/src/hg/trunk/local/libphobos/src/std/math.d:5390:17: error:
>> undefined identifier 'ControlState'
>>  5390 | static void setControlState(ControlState newState) @trusted
>> nothrow @nogc
>>   | ^
>>
>>   Fixed by using the ControlState alias on both SPARC64 and SPARC.
>>
>> /vol/gcc/src/hg/trunk/local/libphobos/src/std/math.d:5211:9: error: static 
>> assert  "Not implemented for this architecture"
>>  5211 | static assert(false, "Not implemented for this 
>> architecture");
>>   | ^
>>
>>   Similarly, ExceptionMask was only defined for SPARC64.  However,
>>   looking closer it seems that the current definition only matches Linux
>>   resp. Glibc fenv.h (FE_*).  The Solaris values are different on sparc.
>>
>>   This seems to be a recurring theme, unfortunately: definitions guarded
>>   by version ($CPU) are really CRuntime_Glibc && $CPU.  I fear libphobos
>>   has to be way more careful to distinguish between definitions that
>>   only depend on the target cpu and those that are (also) OS-dependent.
>>
>
> The phobos part is largely free of version (CPU), the druntime
> bindings are in a little better shape, despite there being a lot of
> places dotted around the place that need to be checked.
>
>>   Instead of hardcoding all this, it may be worth having a look at how
>>   Go handles this: they dump the definitions with gcc
>>   -fdump-go-spec=tmp-gen-sysinfo.go and postprocess them in
>>   libgo/mksysinfo.sh.  This way, such errors and potential
>>   inconsistencies are avoided from the start.  This would also massivly
>>   simplify work for potential porters.
>>
>
> You mean a tool such as htod (header to D)?  I don't think that has
> ever gained traction in upstream.

that's a pity.  It certainly would simplify things...

> The existing C bindings for sure are brittle, and never take into
> account changes between versions (FreeBSD comes to mind), but for new
> ports, it at least tries to be consistent in that unhandled places
> always end with a 'static assert (false)' compiler error.  If I was
> just to defend the current status quo a little.

I see.  Types and interfaces changing between versions can certainly be
a problem.  However, Solaris is usually very careful not to make
incompatible changes.

>> * My previous patch had a typo, now also fixed:
>>
>> /vol/gcc/src/hg/trunk/local/libphobos/libdruntime/core/sys/posix/ucontext.d:984:20:
>> error: undefined identifier 'uint32_t'
>>   984 |   uint32_t[32] fpu_regs;
>>   |^
>>
>> While those were enough to finish the build, I noticed a couple of
>> additional issues:
>>
>> * During make check, part (or all) of libphobos was rebuilt.  I strongly
>>   suspect that this happens because contrib/gcc_update doesn't handle
>>   libphobos yet: it needs to touch generated files to avoid exactly this
>>   sort of problem.  I'll post a separate patch once tested.
>>
>> * Unlike the gdc.dg tests, many gdc.test tests appear as UNRESOLVED like
>>   this:
>>
>> UNRESOLVED: runnable/A16.d   compilation failed to produce executable
>>
>>   There's no preceding FAIL for the link failure itself.  Besides, the
>>   testname needs to include the gdc.test prefix.
>>
>> * One issue I forgot last time: when defining the SPARC64 struct
>>   fpregset_t in libdruntime/core/sys/posix/ucontext.d, one field
>>   couldn't be represented: the structure contains a union
>>
>> union fpu_fr
>> {
>> uint[32]fpu_regs;
>> double[32]  fpu_dregs;
>> /* long double[16]  fpu_qregs; */
>>
>>   but there's no D type corresponding to 

Re: Fix D compilation on Solaris

2018-11-04 Thread Rainer Orth
Hi Iain,

> On Wed, 31 Oct 2018 at 10:43, Rainer Orth  
> wrote:
>>
>> Hi Iain,
>>
>> >> My first suspect here would be 'struct UnionExp', see d/dmd/expression.h
>> >>
>> >> Upstream dmd use a poor man's alignment, from what I recall to be
>> >> compatible with the dmc compiler.
>> >>
>> >> // Ensure that the union is suitably aligned.
>> >> real_t for_alignment_only;
>> >>
>> >> What happens if you were to replace that with marking the type as
>> >> __attribute__ ((aligned (8))) ?
>> >
>> > thanks for the suggestion: this worked just fine.  After a couple more
>> > libphobos adjustments (described below), I was able to finish the build
>> > on both sparc-sun-solaris2.11 and i386-pc-solaris2.11.
>> >
>> > The link tests still all fail as before, but sparc and x86 are now on
>> > par here :-)
>>
>> and now with the updated patch ;-)
>>
>
> Thanks, the front-end and library parts should be posted upstream.
>
> Mapping would be:
> - d/dmd: https://github.com/dlang/dmd/tree/dmd-cxx
> - libdruntime/core: https://github.com/dlang/druntime
> - libphobos/src/std: https://github.com/dlang/phobos
>
> I can take care of this, then backport/merge it down here.

that would be great.  I'd like to avoid becoming involved in the
procedures of too many upstream projects if possible.  The way Ian
handles my Solaris Go changes is very convenient for me ;-)

> As for the patch itself:
>
>> --- a/gcc/config/default-d.c
>> +++ b/gcc/config/default-d.c
>> @@ -18,6 +18,7 @@ along with GCC; see the file COPYING3.
>>  #include "config.h"
>>  #include "system.h"
>>  #include "coretypes.h"
>> +#include "memmodel.h"
>>  #include "tm_d.h"
>>  #include "d/d-target.h"
>>  #include "d/d-target-def.h"
>
> Is this still required?  For sure it would cover non-glibc,
> non-solaris sparc targets though.

There are other *-protos.h files using enum memmodel beside sparc:
alpha, ia64, and tilegx.  This may or may not be a reason to keep the
include.

>> diff --git a/gcc/config/sol2-d.c b/gcc/config/sol2-d.c
>> new file mode 100644
>> --- /dev/null
>> +++ b/gcc/config/sol2-d.c
>
> [-- snip --]
>
>> +solaris_d_os_builtins (void)
>> +{
>> +  d_add_builtin_version ("Posix");
>> +  d_add_builtin_version ("Solaris"); \
>> +}
>> +
>
> I'll assume that backslash is a typo.
>
> You'll also need to add this target hook:
>
> /* Implement TARGET_D_CRITSEC_SIZE for Solaris targets.  */
>
> static unsigned
> solaris_d_critsec_size (void)
> {
>   /* This is the sizeof pthread_mutex_t.  */
>   return 24;
> }
>
> I hope that pthread_mutex_t does not differ between x86 and SPARC.

I saw it in glibc-d.c, but initially thought it were Linux-only in some
way.  Fortunately, pthread_mutex_t is identical between sparc and x86,
32 and 64-bit on Solaris.  Added to the updated patch.

>> diff --git a/gcc/config/t-sol2 b/gcc/config/t-sol2
>> --- a/gcc/config/t-sol2
>> +++ b/gcc/config/t-sol2
>> @@ -16,7 +16,7 @@
>>  # along with GCC; see the file COPYING3.  If not see
>>  # .
>>
>> -# Solaris-specific format checking and pragmas
>> +# Solaris-specific format checking and pragmas.
>>  sol2-c.o: $(srcdir)/config/sol2-c.c
>>  $(COMPILE) $<
>>  $(POSTCOMPILE)
>
> Not sure what the policy is about mixing unrelated changes in a patch here.

In general, they are frowned upon ;-)  However, in this case for a
single-character comment change in code I maintain, I believe it would
be overkill to move it to a separate check-in.

>> diff --git a/libphobos/libdruntime/core/sys/posix/ucontext.d
>> b/libphobos/libdruntime/core/sys/posix/ucontext.d
>> --- a/libphobos/libdruntime/core/sys/posix/ucontext.d
>> +++ b/libphobos/libdruntime/core/sys/posix/ucontext.d
>
> [-- snip --]
>
>> + struct fq
>> + {
>> + union FQu
>> + {
>> + double whole;
>> + _fpq fpq;
>> + };
>> + }
>
> Just an FYI, this won't do what I think you expect, 'struct fq' here
> would be an empty struct.  Better make this an anonymous union, I can
> see the same mistake done elsewhere.
>
> struct fq
> {
> union
> {
> double whole;
> _fpq fpq;
> }
> }

Thanks, all instances fixed, I hope.

>> diff --git a/libphobos/libdruntime/core/thread.d 
>> b/libphobos/libdruntime/core/thread.d
>> --- a/libphobos/libdruntime/core/thread.d
>> +++ b/libphobos/libdruntime/core/thread.d
>> @@ -1547,7 +1547,7 @@ private:
>>
>>  version (Solaris)
>>  {
>> -__gshared immutable bool m_isRTClass;
>> +__gshared bool m_isRTClass;
>>  }
>>
>>  private:
>
> This is curious, I wonder when was the last time someone tested x86
> Solaris in upstream.  What was the compilation error?

I got

/vol/gcc/src/hg/trunk/local/libphobos/libdruntime/core/thread.d:989:21: error: 
cannot modify immutable expression m_isRTClass
  989 | m_isRTClass = true;
  | ^
/vol/gcc/src/hg/trunk/local/libphobos/libdruntime/core/thread.d:997:21: error: 
cannot modify immutable expression 

Re: [PATCH] i386: Remove duplicated AVX2/AVX512 vec_dup patterns

2018-11-04 Thread Uros Bizjak
On Fri, Nov 2, 2018 at 6:25 PM H.J. Lu  wrote:
>
> Remove duplicated AVX2/AVX512 vec_dup patterns and replace them with
> subreg.  gcc.target/i386/avx2-vbroadcastss_ps256-1.c is changed by
>
>  avx2_test:
> .cfi_startproc
> -   vmovaps x(%rip), %xmm1
> -   vbroadcastss%xmm1, %ymm0
> +   vbroadcastssx(%rip), %ymm0
> vmovaps %ymm0, y(%rip)
> vzeroupper
> ret
> .cfi_endproc
>
> gcc.target/i386/avx512vl-vbroadcast-3.c is changed by
>
> @@ -113,7 +113,7 @@ f10:
> .cfi_startproc
> vmovaps %ymm0, %ymm16
> vpermilps   $85, %ymm16, %ymm16
> -   vbroadcastss%xmm16, %ymm16
> +   vshuff32x4  $0x0, %ymm16, %ymm16, %ymm16
> vzeroupper
> ret
> .cfi_endproc
> @@ -153,8 +153,7 @@ f12:
>  f13:
>  .LFB12:
> .cfi_startproc
> -   vmovaps (%rdi), %ymm16
> -   vbroadcastss%xmm16, %ymm16
> +   vbroadcastss(%rdi), %ymm16
> vzeroupper
> ret
> .cfi_endproc

Actually, we can achieve the same with pre-reload splitters. Please
see the attached patch for a couple of examples and a fix for
vbroadcastss that accesses the memory in wrong mode.

Uros.
Index: sse.md
===
--- sse.md  (revision 265740)
+++ sse.md  (working copy)
@@ -7129,6 +7129,20 @@
 (set_attr "prefix" "maybe_evex")
 (set_attr "mode" "")])
 
+(define_insn_and_split "*avx2_vec_dup_1"
+  [(set (match_operand:VF1_128_256 0 "register_operand")
+   (vec_duplicate:VF1_128_256
+ (vec_select:SF
+   (match_operand:V4SF 1 "memory_operand")
+   (parallel [(const_int 0)]]
+  "TARGET_AVX2
+   && can_create_pseudo_p ()"
+  "#"
+  "&& 1"
+  [(set (match_dup 0)
+   (vec_duplicate:VF1_128_256 (match_dup 1)))]
+  "operands[1] = adjust_address_nv (operands[1], SFmode, 0);")
+
 (define_insn "avx2_vec_dupv8sf_1"
   [(set (match_operand:V8SF 0 "register_operand" "=v")
(vec_duplicate:V8SF
@@ -7141,6 +7155,20 @@
 (set_attr "prefix" "maybe_evex")
 (set_attr "mode" "V8SF")])
 
+(define_insn_and_split "*avx2_vec_dupv8sf_1"
+  [(set (match_operand:V8SF 0 "register_operand")
+   (vec_duplicate:V8SF
+ (vec_select:SF
+   (match_operand:V4SF 1 "memory_operand")
+   (parallel [(const_int 0)]]
+  "TARGET_AVX2
+   && can_create_pseudo_p ()"
+  "#"
+  "&& 1"
+  [(set (match_dup 0)
+   (vec_duplicate:VF1_128_256 (match_dup 1)))]
+  "operands[1] = adjust_address_nv (operands[1], SFmode, 0);")
+
 (define_insn "avx512f_vec_dup_1"
   [(set (match_operand:VF_512 0 "register_operand" "=v")
(vec_duplicate:VF_512
@@ -17908,7 +17936,7 @@
   [(set (match_operand:VI 0 "register_operand" "=x,v")
(vec_duplicate:VI
  (vec_select:
-   (match_operand: 1 "nonimmediate_operand" "xm,vm")
+   (match_operand: 1 "register_operand" "x,v")
(parallel [(const_int 0)]]
   "TARGET_AVX2"
   "vpbroadcast\t{%1, %0|%0, %1}"
@@ -17918,24 +17946,64 @@
(set_attr "prefix" "vex,evex")
(set_attr "mode" "")])
 
+(define_insn_and_split "*avx2_pbroadcast_mem_1"
+  [(set (match_operand:VI 0 "register_operand")
+   (vec_duplicate:VI
+ (vec_select:
+   (match_operand: 1 "memory_operand")
+   (parallel [(const_int 0)]]
+  "TARGET_AVX2
+   && can_create_pseudo_p ()"
+  "#"
+  "&& 1"
+  [(set (match_dup 0)
+   (vec_duplicate:VI (match_dup 1)))]
+  "operands[1] = adjust_address_nv (operands[1], mode, 0);")
+
 (define_insn "avx2_pbroadcast_1"
-  [(set (match_operand:VI_256 0 "register_operand" "=x,x,v,v")
+  [(set (match_operand:VI_256 0 "register_operand" "=x,v")
(vec_duplicate:VI_256
  (vec_select:
-   (match_operand:VI_256 1 "nonimmediate_operand" "m,x,m,v")
+   (match_operand:VI_256 1 "register_operand" "x,v")
(parallel [(const_int 0)]]
   "TARGET_AVX2"
-  "@
-   vpbroadcast\t{%1, %0|%0, %1}
-   vpbroadcast\t{%x1, %0|%0, %x1}
-   vpbroadcast\t{%1, %0|%0, %1}
-   vpbroadcast\t{%x1, %0|%0, %x1}"
-  [(set_attr "isa" "*,*,,")
+  "vpbroadcast\t{%x1, %0|%0, %x1}"
+  [(set_attr "isa" "*,")
(set_attr "type" "ssemov")
(set_attr "prefix_extra" "1")
(set_attr "prefix" "vex")
(set_attr "mode" "")])
 
+(define_insn_and_split "*avx2_pbroadcast_1_mem_1"
+  [(set (match_operand:VI_256 0 "register_operand" "=x,v")
+   (vec_duplicate:VI_256
+ (vec_select:
+   (match_operand:VI_256 1 "memory_operand" "m,m")
+   (parallel [(const_int 0)]]
+  "TARGET_AVX2
+   && can_create_pseudo_p ()"
+  "#"
+  "&& 1"
+  [(set (match_dup 0)
+   (vec_duplicate:VI_256 (match_dup 1)))]
+  "operands[1] = adjust_address_nv (operands[1], mode, 0);")
+
+(define_insn "*avx2_pbroadcast_mem"
+  [(set (match_operand:VI 0 "register_operand" "=x,v")
+   (vec_duplicate:VI
+ (match_operand: 1 "memory_operand" "m,m")))]
+  "TARGET_AVX2"
+  "vpbroadcast\t{%1, 

Re: [PATCH libquadmath/PR68686]

2018-11-04 Thread Ed Smith-Rowland

On 11/3/18 10:09 PM, Jeff Law wrote:

On 10/23/18 7:45 PM, Ed Smith-Rowland wrote:

Greetings,

This is an almost trivial patch to get the correct sign for tgammaq.

I don't have a testcase as I don't know where to put one.

OK?

Ed Smith-Rowland



tgammaq.CL

2018-10-24  Edward Smith-Rowland  <3dw...@verizon.net>

PR libquadmath/68686
* math/tgammaq.c: Correct sign for negative argument.

I don't have the relevant background to evaluate this for correctness.
Can you refer back to any kind of documentation which indicates what the
sign of the return value ought to be?

Alternately, if you can point to the relevant code in glibc that handles
the resultant sign, that'd be useful too.

Note that Joseph's follow-up doesn't touch on the gamma problem AFAICT,
but instead touches on the larger issues around trying to keep the
quadmath implementations between glibc and gcc more in sync.

Jeff

Thank you for (re)considering this.The maths here can be read from a 
very good NIST website:


For the functional formulas referred to below - https://dlmf.nist.gov/5.5

For cool pictures - https://dlmf.nist.gov/5.3#i
(Aside: the reciprocal gamma function is entire - possessing no poles or 
other analytic complications anywhere in the complex plane.  A rgamma 
function might be a nice addition to the C family and to TS 18661-1 for 
that matter.)


For a table of extrema (Table 5.4.1) - https://dlmf.nist.gov/5.4#iii
These would be nice tests for accuracy as well as sign.

TL;DR:
==

Either follow the factorial-like recursion Gamma(x+1) = x Gamma(x) [DLMF 
5.5.1] backwards:

    Gamma(x-1) = Gamma(x) / (x-1)
Given that Gamma(x) is positive for x > 0 this will give alternating 
negative signs if you start with, say, x=1/2 and keep going.

A cooler formula is [DLMF 5.5.3]:
    Gamma(x) Gamma(1-x) = pi / sin(pi x)
Start with x = 3/2 and continue with higher odd half integers to see the 
sign alternation.


GLibC
=
I looked in glibc.  Unfortunately, I see how they have the same mistake:
glibc/math/w_tgammal_compat.c:
    long double
    __tgammal(long double x)
    {
        int local_signgam;
        long double y = __ieee754_gammal_r(x,_signgam);
    ...
    return local_signgam < 0 ? - y : y;
    }
I'm very sure this is where tgammaq came from.
Ditto for glibc/math/w_tgamma_compat.c and glibc/math/w_tgammaf_compat.c.

This fix will need to be done upstream.
Ed



PING: [PATCH] apply_subst_iterator: Handle define_split/define_insn_and_split

2018-11-04 Thread H.J. Lu
On Fri, Oct 26, 2018 at 12:44 AM H.J. Lu  wrote:
>
> On 10/25/18, Uros Bizjak  wrote:
> > On Fri, Oct 26, 2018 at 8:48 AM H.J. Lu  wrote:
> >>
> >> On 10/25/18, Uros Bizjak  wrote:
> >> > On Fri, Oct 26, 2018 at 8:07 AM H.J. Lu  wrote:
> >> >>
> >> >> * read-rtl.c (apply_subst_iterator): Handle
> >> >> define_insn_and_split.
> >> >> ---
> >> >>  gcc/read-rtl.c | 6 --
> >> >>  1 file changed, 4 insertions(+), 2 deletions(-)
> >> >>
> >> >> diff --git a/gcc/read-rtl.c b/gcc/read-rtl.c
> >> >> index d698dd4af4d..5957c29671a 100644
> >> >> --- a/gcc/read-rtl.c
> >> >> +++ b/gcc/read-rtl.c
> >> >> @@ -275,9 +275,11 @@ apply_subst_iterator (rtx rt, unsigned int, int
> >> >> value)
> >> >>if (value == 1)
> >> >>  return;
> >> >>gcc_assert (GET_CODE (rt) == DEFINE_INSN
> >> >> + || GET_CODE (rt) == DEFINE_INSN_AND_SPLIT
> >> >>   || GET_CODE (rt) == DEFINE_EXPAND);
> >> >
> >> > Can we also handle DEFINE_SPLIT here?
> >> >
> >>
> >> Yes, we could if there were a usage for it.  I am reluctant to add
> >> something
> >> I have no use nor test for.
> >
> > Just split one define_insn_and_split to define_insn and corresponding
> > define_split.
> >
> > define_insn_and_split is a contraction for for the define_insn and
> > corresponding define_split, so it looks weird to only handle
> > define_insn_and-split without handling define_split.
> >
>
> Here is the updated patch to handle define_split.  Tested with
>
> (define_insn "*sse4_1_v8qiv8hi2_2"
>   [(set (match_operand:V8HI 0 "register_operand")
> (any_extend:V8HI
>   (vec_select:V8QI
> (subreg:V16QI
>   (vec_concat:V2DI
> (match_operand:DI 1 "memory_operand")
> (const_int 0)) 0)
> (parallel [(const_int 0) (const_int 1)
>(const_int 2) (const_int 3)
>(const_int 4) (const_int 5)
>(const_int 6) (const_int 7)]]
>   "TARGET_SSE4_1 &&  && "
>   "#")
>
> (define_split
>   [(set (match_operand:V8HI 0 "register_operand")
> (any_extend:V8HI
>   (vec_select:V8QI
> (subreg:V16QI
>   (vec_concat:V2DI
> (match_operand:DI 1 "memory_operand")
> (const_int 0)) 0)
> (parallel [(const_int 0) (const_int 1)
>(const_int 2) (const_int 3)
>(const_int 4) (const_int 5)
>(const_int 6) (const_int 7)]]
>   "TARGET_SSE4_1 &&  && 
>&& can_create_pseudo_p ()"
>   [(set (match_dup 0)
> (any_extend:V8HI (match_dup 1)))]
> {
>   operands[1] = adjust_address_nv (operands[1], V8QImode, 0);
> })
>

PING:

https://gcc.gnu.org/ml/gcc-patches/2018-10/msg01665.html

This patch blocks an i386 backend patch.

-- 
H.J.


PING: V2 [PATCH] i386: Add pass_remove_partial_avx_dependency

2018-11-04 Thread H.J. Lu
On Fri, Oct 19, 2018 at 1:44 AM H.J. Lu  wrote:
>
> On 10/18/18, Jan Hubicka  wrote:
> >> we need to generate
> >>
> >>  vxorp[ds]   %xmmN, %xmmN, %xmmN
> >>  ...
> >>  vcvtss2sd   f(%rip), %xmmN, %xmmX
> >>  ...
> >>  vcvtsi2ss   i(%rip), %xmmN, %xmmY
> >>
> >> to avoid partial XMM register stall.  This patch adds a pass to generate
> >> a single
> >>
> >>  vxorps  %xmmN, %xmmN, %xmmN
> >>
> >> at function entry, which is shared by all SF and DF conversions, instead
> >> of generating one
> >>
> >>  vxorp[ds]   %xmmN, %xmmN, %xmmN
> >>
> >> for each SF/DF conversion.
> >>
> >> Performance impacts on SPEC CPU 2017 rate with 1 copy using
> >>
> >> -Ofast -march=native -mfpmath=sse -fno-associative-math -funroll-loops
> >>
> >> are
> >>
> >> 1. On Broadwell server:
> >>
> >> 500.perlbench_r (-0.82%)
> >> 502.gcc_r (0.73%)
> >> 505.mcf_r (-0.24%)
> >> 520.omnetpp_r (-2.22%)
> >> 523.xalancbmk_r (-1.47%)
> >> 525.x264_r (0.31%)
> >> 531.deepsjeng_r (0.27%)
> >> 541.leela_r (0.85%)
> >> 548.exchange2_r (-0.11%)
> >> 557.xz_r (-0.34%)
> >> Geomean: (-0.23%)
> >>
> >> 503.bwaves_r (0.00%)
> >> 507.cactuBSSN_r (-1.88%)
> >> 508.namd_r (0.00%)
> >> 510.parest_r (-0.56%)
> >> 511.povray_r (0.49%)
> >> 519.lbm_r (-1.28%)
> >> 521.wrf_r (-0.28%)
> >> 526.blender_r (0.55%)
> >> 527.cam4_r (-0.20%)
> >> 538.imagick_r (2.52%)
> >> 544.nab_r (-0.18%)
> >> 549.fotonik3d_r (-0.51%)
> >> 554.roms_r (-0.22%)
> >> Geomean: (0.00%)
> >
> > I wonder why the patch seems to have more effect on specint that should not
> > care much
> > about float<->double conversions?
>
> These are within noise range.
>
> >> number of vxorp[ds]:
> >>
> >> before   after   difference
> >> 145704515-69%
> >>
> >> OK for trunk?
> >
> > This looks very nice though.
> >
>
> > +  if (v4sf_const0)
> > +{
> > +  /* Generate a single vxorps at function entry and preform df
> > +  rescan. */
> > +  bb = ENTRY_BLOCK_PTR_FOR_FN (cfun)->next_bb;
> > +  insn = BB_HEAD (bb);
> > +  set = gen_rtx_SET (v4sf_const0, CONST0_RTX (V4SFmode));
> > +  set_insn = emit_insn_after (set, insn);
> > +  df_insn_rescan (set_insn);
> > +  df_process_deferred_rescans ();
> > +}
> >
> > It seems suboptimal to place the const0 at the entry of function - if the
> > conversoin happens in cold region of function this will just increase
> > register
> > pressure.  I guess right answer would be to look for the postdominance
> > frontier
>
> Did you mean "the nearest common dominator"?
>
> > of the set of all uses of the zero register?
> >
>
> Here is the updated patch to adds a pass to generate a single
>
> vxorps  %xmmN, %xmmN, %xmmN
>
> at entry of the nearest common dominator for basic blocks with SF/DF
> conversions.  OK for trunk?
>

PING:

https://gcc.gnu.org/ml/gcc-patches/2018-10/msg01175.html


-- 
H.J.


PING: V4 [PATCH] C/C++: Add -Waddress-of-packed-member

2018-11-04 Thread H.J. Lu
On Tue, Sep 25, 2018 at 8:46 AM H.J. Lu  wrote:
>
> On Fri, Aug 31, 2018 at 2:04 PM, Jason Merrill  wrote:
> > On 07/23/2018 05:24 PM, H.J. Lu wrote:
> >>
> >> On Mon, Jun 18, 2018 at 12:26 PM, Joseph Myers 
> >> wrote:
> >>>
> >>> On Mon, 18 Jun 2018, Jason Merrill wrote:
> >>>
>  On Mon, Jun 18, 2018 at 11:59 AM, Joseph Myers 
>  wrote:
> >
> > On Mon, 18 Jun 2018, Jason Merrill wrote:
> >
> >>> +  if (TREE_CODE (rhs) == COND_EXPR)
> >>> +{
> >>> +  /* Check the THEN path first.  */
> >>> +  tree op1 = TREE_OPERAND (rhs, 1);
> >>> +  context = check_address_of_packed_member (type, op1);
> >>
> >>
> >> This should handle the GNU extension of re-using operand 0 if operand
> >> 1 is omitted.
> >
> >
> > Doesn't that just use a SAVE_EXPR?
> 
> 
>  Hmm, I suppose it does, but many places in the compiler seem to expect
>  that it produces a COND_EXPR with TREE_OPERAND 1 as NULL_TREE.
> >>>
> >>>
> >>> Maybe that's used somewhere inside the C++ front end.  For C a SAVE_EXPR
> >>> is produced directly.
> >>
> >>
> >> Here is the updated patch.  Changes from the last one:
> >>
> >> 1. Handle COMPOUND_EXPR.
> >> 2. Fixed typos in comments.
> >> 3. Combined warn_for_pointer_of_packed_member and
> >> warn_for_address_of_packed_member into
> >> warn_for_address_or_pointer_of_packed_member.
> >
> >
> >> c.i:4:33: warning: converting a packed ‘struct C *’ pointer increases the
> >> alignment of ‘long int *’ pointer from 1 to 8 [-Waddress-of-packed-member]
> >
> >
> > I think this would read better as
> >
> > c.i:4:33: warning: converting a packed ‘struct C *’ pointer (alignment 1) to
> > ‘long int *’ (alignment 8) may result in an unaligned pointer value
> > [-Waddress-of-packed-member]
>
> Fixed.
>
> >> +  while (TREE_CODE (base) == ARRAY_REF)
> >> +   base = TREE_OPERAND (base, 0);
> >> +  if (TREE_CODE (base) != COMPONENT_REF)
> >> +   return NULL_TREE;
> >
> >
> > Are you deliberately not handling the other handled_component_p cases? If
> > so, there should be a comment.
>
> I changed it to
>
>  while (handled_component_p (base))
> {
>   enum tree_code code = TREE_CODE (base);
>   if (code == COMPONENT_REF)
> break;
>   switch (code)
> {
> case ARRAY_REF:
>   base = TREE_OPERAND (base, 0);
>   break;
> default:
>   /* FIXME: Can it ever happen?  */
>   gcc_unreachable ();
>   break;
> }
> }
>
> Is there a testcase to trigger this ICE? I couldn't find one.
>
> >> +  /* Check alignment of the object.  */
> >> +  if (TREE_CODE (object) == COMPONENT_REF)
> >> +{
> >> +  field = TREE_OPERAND (object, 1);
> >> +  if (TREE_CODE (field) == FIELD_DECL && DECL_PACKED (field))
> >> +   {
> >> + type_align = TYPE_ALIGN (type);
> >> + context = DECL_CONTEXT (field);
> >> + record_align = TYPE_ALIGN (context);
> >> + if ((record_align % type_align) != 0)
> >> +   return context;
> >> +   }
> >> +}
> >
> >
> > Why doesn't this recurse?  What if you have a packed field three
> > COMPONENT_REFs down?
>
> My patch works on
> [hjl@gnu-cfl-1 pr51628-4]$ cat x.i
> struct A { int i; } __attribute__ ((packed));
> struct B { struct A a; };
> struct C { struct B b; };
>
> extern struct C *p;
>
> int* g8 (void) { return >b.a.i; }
> [hjl@gnu-cfl-1 pr51628-4]$ make x.s
> /export/build/gnu/tools-build/gcc-debug/build-x86_64-linux/gcc/xgcc
> -B/export/build/gnu/tools-build/gcc-debug/build-x86_64-linux/gcc/ -O2
> -S x.i
> x.i: In function ‘g8’:
> x.i:7:25: warning: taking address of packed member of ‘struct A’ may
> result in an unaligned pointer value [-Waddress-of-packed-member]
> 7 | int* g8 (void) { return >b.a.i; }
>   | ^
> [hjl@gnu-cfl-1 pr51628-4]$
>
> If it isn't what you had in mind, can you give me a testcase?
>
> >> +  if (TREE_CODE (rhs) == COND_EXPR)
> >> +{
> >> +  /* Check the THEN path first.  */
> >> +  tree op1 = TREE_OPERAND (rhs, 1);
> >> +  context = check_address_of_packed_member (type, op1);
> >> +  if (context)
> >> +   rhs = op1;
> >> +  else
> >> +   {
> >> + /* Check the ELSE path.  */
> >> + rhs = TREE_OPERAND (rhs, 2);
> >> + context = check_address_of_packed_member (type, rhs);
> >> +   }
> >> +}
> >
> >
> > Likewise, what if you have more levels of COND_EXPR?  Or COMPOUND_EXPR
> > within COND_EXPR?
>
> Fixed, now I got
>
> [hjl@gnu-cfl-1 pr51628-5]$ cat z.i
> struct A {
>   int i;
> } __attribute__ ((packed));
>
> int*
> foo3 (struct A *p1, int *q1, int *q2, struct A *p2)
> {
>   return (q1
>   ? >i
>   : (q2 ? >i : q2));
> }
> [hjl@gnu-cfl-1 pr51628-5]$ make z.s
> /export/build/gnu/tools-build/gcc-debug/build-x86_64-linux/gcc/xgcc
> 

Re: [patch, libgfortran] PR78351 comma not terminating READ of formatted input field

2018-11-04 Thread Jerry DeLisle

On 11/4/18 1:51 AM, Bernhard Reutner-Fischer wrote:

On Sat, 3 Nov 2018 15:33:07 -0700
Jerry DeLisle  wrote:


diff --git a/libgfortran/io/transfer.c b/libgfortran/io/transfer.c
index 31198a3cc39..0d26101cef0 100644
--- a/libgfortran/io/transfer.c
+++ b/libgfortran/io/transfer.c



@@ -260,22 +250,80 @@ read_sf_internal (st_parameter_dt *dtp, size_t *length)
sseek (dtp->u.p.current_unit->s, -1, SEEK_CUR);
  }
  
-  lorig = *length;

-  if (is_char4_unit(dtp))
+  /* To support legacy code we have to scan the input string one byte
+ at a time because we don't no where an early comma may be and the


As Andreas said s/no/know/


+ requested length could go passed the end of a comma shortened


s/passed/past/


+  /* Get the first chracter of the string to establish the base


s/chracter/character/


+  /* Now we scan the rest and exit deal with an end-of-file


s/ exit// ?





Fixed all my typos, thanks.

Jerry


RE: [patch][x86_64]: AMD znver2 enablement

2018-11-04 Thread Kumar, Venkataramanan
Hi Uros and Honza,

I have committed the znver2 patch.
Ref:https://gcc.gnu.org/viewcvs/gcc?limit_changes=0=revision=265775

Thanks you.

regards,
Venkat.

> -Original Message-
> From: gcc-patches-ow...@gcc.gnu.org 
> On Behalf Of Kumar, Venkataramanan
> Sent: Sunday, November 4, 2018 12:21 AM
> To: Uros Bizjak 
> Cc: gcc-patches@gcc.gnu.org; Jan Hubicka 
> Subject: RE: [patch][x86_64]: AMD znver2 enablement
> 
> Hi Uros,
> 
> > -Original Message-
> > From: Uros Bizjak 
> > Sent: Friday, November 2, 2018 9:06 PM
> > To: Kumar, Venkataramanan 
> > Cc: gcc-patches@gcc.gnu.org; Jan Hubicka 
> > Subject: Re: [patch][x86_64]: AMD znver2 enablement
> >
> > On Wed, Oct 31, 2018 at 6:25 AM Kumar, Venkataramanan
> >  wrote:
> > >
> > > Hi Maintainers,
> > >
> > > PFA, the patch that enables support for the next generation AMD  Zen
> > > CPU
> > via -march=znver2.
> > > As of now,  znver2 is using the same costs and scheduler
> > > descriptions
> > written for znver1.
> > >
> > > We will update scheduler descriptions and costing for znver2 later
> > > as we
> > get more information.
> > >
> > > Ok for trunk?
> > >
> > > Regards,
> > > Venkat.
> > >
> > > ChangeLog gcc:
> > > * common/config/i386/i386-common.c (processor_alias_table):
> > > Add
> > znver2 entry.
> > >   * config.gcc (i[34567]86-*-linux* | ...): Add znver2.
> > >   (case ${target}): Add znver2.
> > >   * config/i386/driver-i386.c: (host_detect_local_cpu): Let
> > >   -march=native recognize znver2 processors.
> > >   * config/i386/i386-c.c (ix86_target_macros_internal): Add 
> > > znver2.
> > >   * config/i386/i386.c (m_znver2): New definition.
> > >   (m_ZNVER): New definition.
> > >   (m_AMD_MULTIPLE): Includes m_znver2.
> > >   (processor_cost_table): Add znver2 entry.
> > >   (processor_target_table): Add znver2 entry.
> > >   (get_builtin_code_for_version): Set priority for
> > >  PROCESSOR_ZNVER2.
> > > (processor_model): Add M_AMDFAM17H_ZNVER2.
> > > (arch_names_table): Ditto.
> > > (ix86_reassociation_width): Include znver2.
> > > * config/i386/i386.h (TARGET_znver2): New definition.
> > >   (struct ix86_size_cost): Add TARGET_ZNVER2.
> > >   (enum processor_type): Add PROCESSOR_ZNVER2.
> > >   * config/i386/i386.md (define_attr "cpu"): Add znver2.
> > > * config/i386/x86-tune-costs.h: (processor_costs) Add znver2 
> > > costs.
> > > * config/i386/x86-tune-sched.c: (ix86_issue_rate): Add znver2.
> > > (ix86_adjust_cost): Add znver2.
> > >   * config/i386/x86-tune.def:  Replace m_ZNVER1 by m_ZNVER
> > >   * gcc/doc/extend.texi: Add details about znver2.
> > >   * gcc/doc/invoke.texi: Add details about znver2.
> > >
> > > ChangeLog libgcc
> > >  * config/i386/cpuinfo.c: (get_amd_cpu): Add znver2.
> > >  (processor_subtypes): Ditto.
> >
> >
> > diff --git a/libgcc/config/i386/cpuinfo.h
> > b/libgcc/config/i386/cpuinfo.h index 0aa887b..86cb4ea 100644
> > --- a/libgcc/config/i386/cpuinfo.h
> > +++ b/libgcc/config/i386/cpuinfo.h
> > @@ -67,6 +67,7 @@ enum processor_subtypes
> >AMDFAM15H_BDVER3,
> >AMDFAM15H_BDVER4,
> >AMDFAM17H_ZNVER1,
> > +  AMDFAM17H_ZNVER2,
> >INTEL_COREI7_IVYBRIDGE,
> >INTEL_COREI7_HASWELL,
> >INTEL_COREI7_BROADWELL,
> >
> > As the comment above these enums says:
> >
> > /* Any new types or subtypes have to be inserted at the end. */
> >
> > So, please add new entry at the end of enum processor_types.
> >
> > diff --git a/gcc/config/i386/i386.c b/gcc/config/i386/i386.c index
> > 963c7fc..bbe3bb3 100644
> > --- a/gcc/config/i386/i386.c
> > +++ b/gcc/config/i386/i386.c
> > @@ -32269,6 +32276,7 @@ fold_builtin_cpu (tree fndecl, tree *args)
> >  M_AMDFAM15H_BDVER3,
> >  M_AMDFAM15H_BDVER4,
> >  M_AMDFAM17H_ZNVER1,
> > +M_AMDFAM17H_ZNVER2,
> >  M_INTEL_COREI7_IVYBRIDGE,
> >  M_INTEL_COREI7_HASWELL,
> >  M_INTEL_COREI7_BROADWELL,
> >
> > The above also have to be in sync with enum processor_subtypes.
> >
> > Otherwise LGTM.
> >
> > Uros.
> 
> I have updated the patch as per your review comments.  Thank you,  I will
> commit the attached patch.
> 
> Regards,
> Venkat.
> 
> ChangeLog:
> * common/config/i386/i386-common.c (processor_alias_table): Add
> znver2 entry.
>   * config.gcc (i[34567]86-*-linux* | ...): Add znver2.
>   (case ${target}): Add znver2.
>   * config/i386/driver-i386.c: (host_detect_local_cpu): Let
>   -march=native recognize znver2 processors.
>   * config/i386/i386-c.c (ix86_target_macros_internal): Add znver2.
>   * config/i386/i386.c (m_znver2): New definition.
>   (m_ZNVER): New definition.
>   (m_AMD_MULTIPLE): Includes m_znver2.
>   (processor_cost_table): Add znver2 entry.
>   (processor_target_table): Add 

Re: [patch, libgfortran] PR78351 comma not terminating READ of formatted input field

2018-11-04 Thread Bernhard Reutner-Fischer
On Sat, 3 Nov 2018 15:33:07 -0700
Jerry DeLisle  wrote:

> diff --git a/libgfortran/io/transfer.c b/libgfortran/io/transfer.c
> index 31198a3cc39..0d26101cef0 100644
> --- a/libgfortran/io/transfer.c
> +++ b/libgfortran/io/transfer.c

> @@ -260,22 +250,80 @@ read_sf_internal (st_parameter_dt *dtp, size_t *length)
>sseek (dtp->u.p.current_unit->s, -1, SEEK_CUR);
>  }
>  
> -  lorig = *length;
> -  if (is_char4_unit(dtp))
> +  /* To support legacy code we have to scan the input string one byte
> + at a time because we don't no where an early comma may be and the

As Andreas said s/no/know/

> + requested length could go passed the end of a comma shortened

s/passed/past/

> +  /* Get the first chracter of the string to establish the base

s/chracter/character/

> +  /* Now we scan the rest and exit deal with an end-of-file

s/ exit// ?




Re: [PATCH v3 3/3] or1k: gcc: initial support for openrisc

2018-11-04 Thread Stafford Horne
On Mon, Oct 29, 2018 at 02:28:11PM +, Szabolcs Nagy wrote:
> On 27/10/18 05:37, Stafford Horne wrote:
> > +++ b/gcc/config/or1k/linux.h
> > @@ -0,0 +1,44 @@
> > +/* Linux Definitions for OpenRISC.
> > +   Copyright (C) 2018 Free Software Foundation, Inc.
> > +   Contributed by Stafford Horne.
> > +
> > +   This file is part of GCC.
> > +
> > +   GCC is free software; you can redistribute it and/or modify it
> > +   under the terms of the GNU General Public License as published
> > +   by the Free Software Foundation; either version 3, or (at your
> > +   option) any later version.
> > +
> > +   GCC is distributed in the hope that it will be useful, but WITHOUT
> > +   ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
> > +   or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public
> > +   License for more details.
> > +
> > +   You should have received a copy of the GNU General Public License
> > +   along with GCC; see the file COPYING3.  If not see
> > +   .  */
> > +
> > +#ifndef GCC_OR1K_LINUX_H
> > +#define GCC_OR1K_LINUX_H
> > +
> > +/* elfos.h should have already been included.  Now just override
> > +   any conflicting definitions and add any extras.  */
> > +
> > +#define TARGET_OS_CPP_BUILTINS() \
> > +  GNU_USER_TARGET_OS_CPP_BUILTINS ()
> > +
> > +#define GLIBC_DYNAMIC_LINKER "/lib/ld-linux-or1k.so.1"
> > +
> > +#undef MUSL_DYNAMIC_LINKER
> > +#define MUSL_DYNAMIC_LINKER  "/lib/ld-musl-or1k.so.1"
> > +
> > +#undef LINK_SPEC
> > +#define LINK_SPEC "%{h*}   \
> > +   %{static:-Bstatic}  \
> > +   %{shared:-shared}   \
> > +   %{symbolic:-Bsymbolic}  \
> > +   %{!static:  \
> > + %{rdynamic:-export-dynamic}   \
> > + %{!shared:-dynamic-linker " GNU_USER_DYNAMIC_LINKER "}}"
> > +
> > +#endif /* GCC_OR1K_LINUX_H */
> 
> note that because of the -static-pie mess each
> target needs a more complicated LINK_SPEC now.

Hello,

Does something like this look better?

--- a/gcc/config/or1k/linux.h
+++ b/gcc/config/or1k/linux.h
@@ -37,8 +37,9 @@
%{static:-Bstatic}  \
%{shared:-shared}   \
%{symbolic:-Bsymbolic}  \
-   %{!static:  \
+   %{!static:%{!static-pie:\
  %{rdynamic:-export-dynamic}   \
- %{!shared:-dynamic-linker " GNU_USER_DYNAMIC_LINKER "}}"
+ %{!shared:-dynamic-linker " GNU_USER_DYNAMIC_LINKER "}}} \
+   %{static-pie:-Bstatic -pie --no-dynamic-linker -z text}"
 
 #endif /* GCC_OR1K_LINUX_H */

I have tested this out with or1k-linux-musl, but I get some LD complaints i.e.

.../or1k-linux-musl/bin/ld: .../or1k-linux-musl/lib/libc.a(exit.o): non-pic 
relocation against symbol __fini_array_end
.../or1k-linux-musl/bin/ld: .../or1k-linux-musl/lib/libc.a(exit.o): non-pic 
relocation against symbol __fini_array_start

Those are some warnings we recently added to LD, perhaps I need to rebuild the
libc.a with PIE as well.  I will try it out, but if anyone has some suggestions
that would be helpful.

> i think there could be a generic LINK_SPEC in
> config/linux.h or config/gnu-user.h that works
> for simple targets (the start file spec is
> already there) so this complex logic is not
> repeated everywhere.
> 
> or even do the -no-dynamic-linker logic in
> LINK_PIE_SPEC in gcc.c for all targets, not
> just linux, so backends don't need to do
> anything to get static-pie to work.

I see, yeah, it seems this could be made generic.  I would defer myself working
on making this generic until after or1k port is in.  Sorry, I dont have much
time to make sure it doesnt break everything/anything.  Also, To tell you the
truth I haven't heard of anyone ever running OpenRISC with pie, it has always
been something on my todo list though.  I have had some inqueries into helping
bootstrap some linux nommu machines.

-Stafford


Re: [patch, libgfortran] PR78351 comma not terminating READ of formatted input field

2018-11-04 Thread Andreas Schwab
On Nov 03 2018, Jerry DeLisle  wrote:

> +  /* To support legacy code we have to scan the input string one byte
> + at a time because we don't no where an early comma may be and the

s/no/know/

Andreas.

-- 
Andreas Schwab, sch...@linux-m68k.org
GPG Key fingerprint = 7578 EB47 D4E5 4D69 2510  2552 DF73 E780 A9DA AEC1
"And now for something completely different."


[PATCH 2/3] Add a pass to automatically add ptwrite instrumentation

2018-11-04 Thread Andi Kleen
From: Andi Kleen 

Add a new pass to automatically instrument changes to variables
with the new PTWRITE instruction on x86. PTWRITE writes a 4 or 8 byte
field into an Processor Trace log, which allows log over head
logging of informatin.

This allows to reconstruct how values later, which can be useful for
debugging or other analysis of the program behavior. With the compiler
support this can be done with without having to manually add instrumentation
to the code.

Using dwarf information this can be later mapped back to the variables.

There are new options to enable instrumentation for different types,
and also a new attribute to control analysis fine grained per
function or variable level. The attributes can be set on both
the variable and the type level, and also on structure fields.
This allows to enable tracing only for specific code in large
programs.

The pass is generic, but only the x86 backend enables the necessary
hooks. When the backend enables the necessary hooks (with -mptwrite)
there is an additional pass that looks through the code for
attribute vartrace enabled functions or variables.

The -fvartrace-locals options is experimental: it works, but it
generates redundant ptwrites because the pass doesn't use
the SSA information to minimize instrumentation. This could be optimized
later.

Currently the code can be tested with SDE, or on a Intel
Gemini Lake system with a new enough Linux kernel (v4.10+)
that supports PTWRITE for PT. Linux perf can be used to
record the values

perf record -e intel_pt/ptw=1,branch=0/ program
perf script --itrace=crw -F +synth ...

I have an experimential version of perf that can also use
dwarf information to symbolize many[1] values back to their variable
names. So far it is not in standard perf, but available at

https://git.kernel.org/pub/scm/linux/kernel/git/ak/linux-misc.git/log/?h=perf/var-resolve-4

It is currently not able to decode all variable locations to names,
but a large subset.

Longer term hopefully gdb will support this information too.

The CPU can potentially generate very data high bandwidths when
code doing a lot of computation is heavily instrumented.
This can cause some data loss in both the CPU and also in perf
logging the data when the disk cannot keep up.

Running some larger workloads most workloads do not cause
CPU level overflows, but I've seen it with -fvartrace
with crafty, and with more workloads with -fvartrace-locals.

Recommendation is to not fully instrument programs,
but only areas of interest either at the file level or using
the attributes.

The other thing is that perf and the disk often cannot keep up
with the data bandwidth for longer computations. In this case
it's possible to use perf snapshot mode (add --snapshot
to the command line above). The data will be only logged to
a memory ring buffer then, and only dump the buffers on events
of interest by sending SIGUSR2 to the perf binrary.

In the future this will be hopefully better supported with
core files and gdb.

Passes bootstrap and test suite on x86_64-linux, also
bootstrapped and tested gcc itself with full -fvartrace
and -fvartrace-locals instrumentation.

gcc/:

2018-11-03  Andi Kleen  

* Makefile.in: Add tree-vartrace.o.
* common.opt: Add -fvartrace, -fvartrace-returns,
-fvartrace-args, -fvartrace-reads, -fvartrace-writes,
-fvartrace-locals
* config/i386/i386.c (ix86_vartrace_func): Add.
(TARGET_VARTRACE_FUNC): Add.
* doc/extend.texi: Document vartrace/no_vartrace
attributes.
* doc/invoke.texi: Document -fvartrace, -fvartrace-returns,
-fvartrace-args, -fvartrace-reads, -fvartrace-writes,
-fvartrace-locals
* doc/tm.texi (TARGET_VARTRACE_FUNC): Add.
* passes.def: Add vartrace pass.
* target.def (vartrace_func): Add.
* tree-pass.h (make_pass_vartrace): Add.
* tree-vartrace.c: New file to implement vartrace pass.

gcc/c-family/:

2018-11-03  Andi Kleen  

* c-attribs.c (handle_vartrace_attribute): New function.

config/:

2018-11-03  Andi Kleen  

* bootstrap-vartrace.mk: New.
* bootstrap-vartrace-locals.mk: New.
---
 config/bootstrap-vartrace-locals.mk |   3 +
 config/bootstrap-vartrace.mk|   3 +
 gcc/Makefile.in |   1 +
 gcc/c-family/c-attribs.c|  23 ++
 gcc/common.opt  |  24 ++
 gcc/config/i386/i386.c  |  16 +
 gcc/doc/extend.texi |  13 +
 gcc/doc/invoke.texi |  29 ++
 gcc/doc/tm.texi |   4 +
 gcc/doc/tm.texi.in  |   2 +
 gcc/passes.def  |   1 +
 gcc/target.def  |   7 +
 gcc/tree-pass.h |   1 +
 gcc/tree-vartrace.c | 463 
 14 files changed, 590 insertions(+)
 create mode 100644 config/bootstrap-vartrace-locals.mk
 create mode 100644 config/bootstrap-vartrace.mk
 create 

[PATCH 1/3] Add PTWRITE builtins for x86

2018-11-04 Thread Andi Kleen
From: Andi Kleen 

Add builtins/intrinsics for PTWRITE. PTWRITE is a new instruction on Intel 
Gemini Lake/
Goldmont Plus that allows to write values into the Processor Trace log. This 
allows
very light weight instrumentation of programs.

The intrinsics are compatible to icc. Automatically enabled for Goldmont Plus.

gcc/:

2018-11-03  Andi Kleen  

* common/config/i386/i386-common.c (OPTION_MASK_ISA_PTWRITE_SET): New.
(OPTION_MASK_ISA_PTWRITE_UNSET): New.
(ix86_handle_option): Handle OPT_mptwrite.
* config/i386/cpuid.h (bit_PTWRITE): Add.
* config/i386/driver-i386.c (host_detect_local_cpu): Detect ptwrite.
* config/i386/i386-builtin.def (BDESC): Add ptwrite32/64.
* config/i386/i386-c.c (ix86_target_macros_internal): Define 
__PTWRITE__.
* config/i386/i386.c (ix86_target_string): Handle ptwrite.
(ix86_option_override_internal): Handle PTA_PTWRITE.
(ix86_valid_target_attribute_inner_p): Define ptwrite.
(def_builtin2): Force UINT64 to be 64bit only.
* config/i386/i386.h (TARGET_PTWRITE): Add.
(TARGET_PTWRITE_P): Add.
(PTA_PTWRITE): Add.
* config/i386/i386.md: Define ptwrite.
* config/i386/i386.opt: Add -mptwrite.
* config/i386/immintrin.h (_ptwrite64): Add.
(_ptwrite32): Add
* doc/extend.texi: Document __builtin_ia32_ptwrite*.
* doc/invoke.texi: Document -mptwrite.

gcc/testsuite/ChangeLog:

2018-11-03  Andi Kleen  

* gcc.target/i386/ptwrite1.c: New test.
* gcc.target/i386/ptwrite2.c: New test.
---
 gcc/common/config/i386/i386-common.c | 15 
 gcc/config/i386/cpuid.h  |  4 
 gcc/config/i386/driver-i386.c| 12 ++
 gcc/config/i386/i386-builtin.def |  4 
 gcc/config/i386/i386-c.c |  2 ++
 gcc/config/i386/i386.c   |  9 ++-
 gcc/config/i386/i386.h   |  5 +++-
 gcc/config/i386/i386.md  | 10 
 gcc/config/i386/i386.opt |  4 
 gcc/config/i386/immintrin.h  | 26 
 gcc/doc/extend.texi  |  9 +++
 gcc/doc/invoke.texi  |  7 --
 gcc/testsuite/gcc.target/i386/ptwrite1.c | 30 
 gcc/testsuite/gcc.target/i386/ptwrite2.c | 14 +++
 14 files changed, 147 insertions(+), 4 deletions(-)
 create mode 100644 gcc/testsuite/gcc.target/i386/ptwrite1.c
 create mode 100644 gcc/testsuite/gcc.target/i386/ptwrite2.c

diff --git a/gcc/common/config/i386/i386-common.c 
b/gcc/common/config/i386/i386-common.c
index f12806ef3a9..f740995c1e4 100644
--- a/gcc/common/config/i386/i386-common.c
+++ b/gcc/common/config/i386/i386-common.c
@@ -140,6 +140,7 @@ along with GCC; see the file COPYING3.  If not see
 
 #define OPTION_MASK_ISA_FSGSBASE_SET OPTION_MASK_ISA_FSGSBASE
 #define OPTION_MASK_ISA_RDRND_SET OPTION_MASK_ISA_RDRND
+#define OPTION_MASK_ISA_PTWRITE_SET OPTION_MASK_ISA_PTWRITE
 #define OPTION_MASK_ISA_F16C_SET \
   (OPTION_MASK_ISA_F16C | OPTION_MASK_ISA_AVX_SET)
 #define OPTION_MASK_ISA_MWAITX_SET OPTION_MASK_ISA_MWAITX
@@ -267,6 +268,7 @@ along with GCC; see the file COPYING3.  If not see
 
 #define OPTION_MASK_ISA_FSGSBASE_UNSET OPTION_MASK_ISA_FSGSBASE
 #define OPTION_MASK_ISA_RDRND_UNSET OPTION_MASK_ISA_RDRND
+#define OPTION_MASK_ISA_PTWRITE_UNSET OPTION_MASK_ISA_PTWRITE
 #define OPTION_MASK_ISA_F16C_UNSET OPTION_MASK_ISA_F16C
 
 #define OPTION_MASK_ISA_GENERAL_REGS_ONLY_UNSET \
@@ -1125,6 +1127,19 @@ ix86_handle_option (struct gcc_options *opts,
}
   return true;
 
+case OPT_mptwrite:
+  if (value)
+   {
+ opts->x_ix86_isa_flags2 |= OPTION_MASK_ISA_PTWRITE_SET;
+ opts->x_ix86_isa_flags2_explicit |= OPTION_MASK_ISA_PTWRITE_SET;
+   }
+  else
+   {
+ opts->x_ix86_isa_flags2 &= ~OPTION_MASK_ISA_PTWRITE_UNSET;
+ opts->x_ix86_isa_flags2_explicit |= OPTION_MASK_ISA_PTWRITE_UNSET;
+   }
+  return true;
+
 case OPT_mf16c:
   if (value)
{
diff --git a/gcc/config/i386/cpuid.h b/gcc/config/i386/cpuid.h
index 7e9e2d153dc..2e6d4a55602 100644
--- a/gcc/config/i386/cpuid.h
+++ b/gcc/config/i386/cpuid.h
@@ -126,6 +126,10 @@
 #define bit_XSAVEC (1 << 1)
 #define bit_XSAVES (1 << 3)
 
+/* PT sub leaf (%eax == 14, %ecx == 0) */
+/* %ebx */
+#define bit_PTWRITE(1 << 4)
+
 /* Signatures for different CPU implementations as returned in uses
of cpuid with level 0.  */
 #define signature_AMD_ebx  0x68747541
diff --git a/gcc/config/i386/driver-i386.c b/gcc/config/i386/driver-i386.c
index 8c830bde1dd..423b1c3827f 100644
--- a/gcc/config/i386/driver-i386.c
+++ b/gcc/config/i386/driver-i386.c
@@ -427,6 +427,8 @@ const char *host_detect_local_cpu (int argc, const char 
**argv)
   unsigned int has_waitpkg = 0;
   unsigned int has_cldemote = 0;
 
+  unsigned int has_ptwrite = 0;
+
   bool arch;
 
   unsigned int 

[PATCH 3/3] Add tests for the vartrace pass

2018-11-04 Thread Andi Kleen
From: Andi Kleen 

gcc/testsuite/:

2018-11-03  Andi Kleen  

* g++.dg/vartrace-3.C: New test.
* g++.dg/vartrace-ret.C: New test.
* g++.dg/vartrace-ret2.C: New test.
* gcc.target/i386/vartrace-1.c: New test.
* gcc.target/i386/vartrace-10.c: New test.
* gcc.target/i386/vartrace-11.c: New test.
* gcc.target/i386/vartrace-12.c: New test.
* gcc.target/i386/vartrace-13.c: New test.
* gcc.target/i386/vartrace-14.c: New test.
* gcc.target/i386/vartrace-15.c: New test.
* gcc.target/i386/vartrace-16.c: New test.
* gcc.target/i386/vartrace-2.c: New test.
* gcc.target/i386/vartrace-3.c: New test.
* gcc.target/i386/vartrace-4.c: New test.
* gcc.target/i386/vartrace-5.c: New test.
* gcc.target/i386/vartrace-6.c: New test.
* gcc.target/i386/vartrace-7.c: New test.
* gcc.target/i386/vartrace-8.c: New test.
* gcc.target/i386/vartrace-9.c: New test.
---
 gcc/testsuite/g++.dg/vartrace-3.C   | 14 +++
 gcc/testsuite/g++.dg/vartrace-ret.C | 17 +
 gcc/testsuite/g++.dg/vartrace-ret2.C| 24 
 gcc/testsuite/gcc.target/i386/vartrace-1.c  | 41 +
 gcc/testsuite/gcc.target/i386/vartrace-10.c | 13 +++
 gcc/testsuite/gcc.target/i386/vartrace-11.c | 16 
 gcc/testsuite/gcc.target/i386/vartrace-12.c | 16 
 gcc/testsuite/gcc.target/i386/vartrace-13.c | 18 +
 gcc/testsuite/gcc.target/i386/vartrace-14.c | 17 +
 gcc/testsuite/gcc.target/i386/vartrace-15.c | 12 ++
 gcc/testsuite/gcc.target/i386/vartrace-16.c | 12 ++
 gcc/testsuite/gcc.target/i386/vartrace-17.c | 23 
 gcc/testsuite/gcc.target/i386/vartrace-2.c  |  9 +
 gcc/testsuite/gcc.target/i386/vartrace-3.c  |  9 +
 gcc/testsuite/gcc.target/i386/vartrace-4.c  | 13 +++
 gcc/testsuite/gcc.target/i386/vartrace-5.c  | 11 ++
 gcc/testsuite/gcc.target/i386/vartrace-6.c  | 13 +++
 gcc/testsuite/gcc.target/i386/vartrace-7.c  | 11 ++
 gcc/testsuite/gcc.target/i386/vartrace-8.c  | 11 ++
 gcc/testsuite/gcc.target/i386/vartrace-9.c  | 10 +
 20 files changed, 310 insertions(+)
 create mode 100644 gcc/testsuite/g++.dg/vartrace-3.C
 create mode 100644 gcc/testsuite/g++.dg/vartrace-ret.C
 create mode 100644 gcc/testsuite/g++.dg/vartrace-ret2.C
 create mode 100644 gcc/testsuite/gcc.target/i386/vartrace-1.c
 create mode 100644 gcc/testsuite/gcc.target/i386/vartrace-10.c
 create mode 100644 gcc/testsuite/gcc.target/i386/vartrace-11.c
 create mode 100644 gcc/testsuite/gcc.target/i386/vartrace-12.c
 create mode 100644 gcc/testsuite/gcc.target/i386/vartrace-13.c
 create mode 100644 gcc/testsuite/gcc.target/i386/vartrace-14.c
 create mode 100644 gcc/testsuite/gcc.target/i386/vartrace-15.c
 create mode 100644 gcc/testsuite/gcc.target/i386/vartrace-16.c
 create mode 100644 gcc/testsuite/gcc.target/i386/vartrace-17.c
 create mode 100644 gcc/testsuite/gcc.target/i386/vartrace-2.c
 create mode 100644 gcc/testsuite/gcc.target/i386/vartrace-3.c
 create mode 100644 gcc/testsuite/gcc.target/i386/vartrace-4.c
 create mode 100644 gcc/testsuite/gcc.target/i386/vartrace-5.c
 create mode 100644 gcc/testsuite/gcc.target/i386/vartrace-6.c
 create mode 100644 gcc/testsuite/gcc.target/i386/vartrace-7.c
 create mode 100644 gcc/testsuite/gcc.target/i386/vartrace-8.c
 create mode 100644 gcc/testsuite/gcc.target/i386/vartrace-9.c

diff --git a/gcc/testsuite/g++.dg/vartrace-3.C 
b/gcc/testsuite/g++.dg/vartrace-3.C
new file mode 100644
index 000..13f71cca6d8
--- /dev/null
+++ b/gcc/testsuite/g++.dg/vartrace-3.C
@@ -0,0 +1,14 @@
+/* { dg-do compile { target i?86-*-* x86_64-*-* } } */
+/* { dg-options "-O2 -mptwrite -fvartrace-args " } */
+/* { dg-final { scan-assembler "ptwrite" } } */
+
+int a;
+int b(int c) 
+{
+  if (a)
+c += 1;
+  else
+c += b(a);
+  b(c);
+  return 0;
+}
diff --git a/gcc/testsuite/g++.dg/vartrace-ret.C 
b/gcc/testsuite/g++.dg/vartrace-ret.C
new file mode 100644
index 000..2a8a6753bd3
--- /dev/null
+++ b/gcc/testsuite/g++.dg/vartrace-ret.C
@@ -0,0 +1,17 @@
+/* { dg-do compile { target i?86-*-* x86_64-*-* } } */
+/* { dg-options "-O2 -mptwrite -fvartrace-returns " } */
+/* { dg-final { scan-assembler-not "ptwrite" } } */
+
+class foo { 
+public:
+short a;
+short b;
+};
+
+foo f1()
+{
+foo x = { 1, 2 };
+return x;
+}
+
+
diff --git a/gcc/testsuite/g++.dg/vartrace-ret2.C 
b/gcc/testsuite/g++.dg/vartrace-ret2.C
new file mode 100644
index 000..56842d75fb6
--- /dev/null
+++ b/gcc/testsuite/g++.dg/vartrace-ret2.C
@@ -0,0 +1,24 @@
+/* { dg-do compile { target i?86-*-* x86_64-*-* } } */
+/* { dg-options "-O2 -mptwrite -fvartrace " } */
+/* { dg-final { scan-assembler "ptwrite" } } */
+
+typedef int a;
+enum b
+{ };
+struct ac
+{
+  a operator () (a, a, a, a, a, a);
+};
+struct c
+{
+  ac ag;
+} extern ai[];
+a d;
+void
+l (a e)
+{
+  b f;
+  a g, h, i, j, k;
+  e = d;
+