date:20150726

[C++ Patch, preapproved] Prefer DECL_SOURCE_LOCATION to "+D" and "+#D" (1/n)

2015-07-26 Thread Paolo Carlini


Hi,

a first rather straightforward set of changes. Tested x86_64-linux.

Thanks,
Paolo.

/
2015-07-26  Paolo Carlini  

* decl.c (poplevel): Use Use DECL_SOURCE_LOCATION and "%qD"
in warning_at instead of "%q+D" in warning.
(warn_extern_redeclared_static): Likewise for inform.
(check_redeclaration_no_default_args): Likewise for permerror.
(duplicate_decls): Likewise.
(check_previous_goto_1): Likewise for inform.
(check_goto, start_decl, check_for_uninitialized_const_var,
start_preparsed_function, finish_function§): Likewise.
* decl2.c (build_anon_union_vars, c_parse_final_cleanups): Likewise.
* init.c (sort_mem_initializers): Likewise.
* typeck.c (convert_for_initialization): Likewise for inform.
(maybe_warn_about_returning_address_of_local): Likewise.
* typeck2.c (abstract_virtuals_error_sfinae): Likewise for inform.
(cxx_incomplete_type_diagnostic): Likewise for emit_diagnostic.
Index: decl.c
===
--- decl.c  (revision 226209)
+++ decl.c  (working copy)
@@ -640,14 +640,16 @@ poplevel (int keep, int reverse, int functionbody)
 TYPE_ATTRIBUTES (TREE_TYPE (decl)
  {
if (! TREE_USED (decl))
- warning (OPT_Wunused_variable, "unused variable %q+D", decl);
+ warning_at (DECL_SOURCE_LOCATION (decl),
+ OPT_Wunused_variable, "unused variable %qD", decl);
else if (DECL_CONTEXT (decl) == current_function_decl
 // For -Wunused-but-set-variable leave references alone.
 && TREE_CODE (TREE_TYPE (decl)) != REFERENCE_TYPE
 && errorcount == unused_but_set_errorcount)
  {
-   warning (OPT_Wunused_but_set_variable,
-"variable %q+D set but not used", decl);
+   warning_at (DECL_SOURCE_LOCATION (decl),
+   OPT_Wunused_but_set_variable,
+   "variable %qD set but not used", decl);
unused_but_set_errorcount = errorcount;
  }
  }
@@ -1157,7 +1159,8 @@ warn_extern_redeclared_static (tree newdecl, tree
 
   if (permerror (DECL_SOURCE_LOCATION (newdecl),
 "%qD was declared % and later %", newdecl))
-inform (input_location, "previous declaration of %q+D", olddecl);
+inform (DECL_SOURCE_LOCATION (olddecl),
+   "previous declaration of %qD", olddecl);
 }
 
 /* NEW_DECL is a redeclaration of OLD_DECL; both are functions or
@@ -1254,8 +1257,8 @@ check_redeclaration_no_default_args (tree decl)
t && t != void_list_node; t = TREE_CHAIN (t))
 if (TREE_PURPOSE (t))
   {
-   permerror (input_location,
-  "redeclaration of %q+#D may not have default "
+   permerror (DECL_SOURCE_LOCATION (decl),
+  "redeclaration of %q#D may not have default "
   "arguments", decl);
return;
   }
@@ -1328,8 +1331,9 @@ duplicate_decls (tree newdecl, tree olddecl, bool
   && DECL_UNINLINABLE (olddecl)
   && lookup_attribute ("noinline", DECL_ATTRIBUTES (olddecl)))
{
- if (warning (OPT_Wattributes, "function %q+D redeclared as inline",
-  newdecl))
+ if (warning_at (DECL_SOURCE_LOCATION (newdecl),
+ OPT_Wattributes, "function %qD redeclared as inline",
+ newdecl))
inform (DECL_SOURCE_LOCATION (olddecl),
"previous declaration of %qD with attribute noinline",
olddecl);
@@ -1338,8 +1342,9 @@ duplicate_decls (tree newdecl, tree olddecl, bool
   && DECL_UNINLINABLE (newdecl)
   && lookup_attribute ("noinline", DECL_ATTRIBUTES (newdecl)))
{
- if (warning (OPT_Wattributes, "function %q+D redeclared with "
-  "attribute noinline", newdecl))
+ if (warning_at (DECL_SOURCE_LOCATION (newdecl),
+ OPT_Wattributes, "function %qD redeclared with "
+ "attribute noinline", newdecl))
inform (DECL_SOURCE_LOCATION (olddecl),
"previous declaration of %qD was inline",
olddecl);
@@ -1436,8 +1441,9 @@ duplicate_decls (tree newdecl, tree olddecl, bool
  /* A near match; override the builtin.  */
 
  if (TREE_PUBLIC (newdecl))
-   warning (0, "new declaration %q+#D ambiguates built-in "
-"declaration %q#D", newdecl, olddecl);
+   warning_at (DECL_SOURCE_LOCATION (newdecl), 0,
+   "new declaration %q#D ambiguates built-in "
+   "declaration %q#D", newdecl, olddecl);
  else
warning (OPT_W

[PATCH, alpha]: Use SUBREG_P predicate

2015-07-26 Thread Uros Bizjak

2015-07-26  Uros Bizjak  

* config/alpha/alpha.c: Use SUBREG_P predicate.
* config/alpha/predicates.md: Ditto.

Bootstrapped and regression tested on alpha-linux-gnu.

Committed to mainline SVN.

Uros.
Index: config/alpha/alpha.c
===
--- config/alpha/alpha.c(revision 226216)
+++ config/alpha/alpha.c(working copy)
@@ -700,7 +700,7 @@ resolve_reload_operand (rtx op)
   if (reload_in_progress)
 {
   rtx tmp = op;
-  if (GET_CODE (tmp) == SUBREG)
+  if (SUBREG_P (tmp))
tmp = SUBREG_REG (tmp);
   if (REG_P (tmp)
  && REGNO (tmp) >= FIRST_PSEUDO_REGISTER)
@@ -891,7 +891,7 @@ alpha_legitimate_address_p (machine_mode mode, rtx
 x = XEXP (x, 0);
 
   /* Discard non-paradoxical subregs.  */
-  if (GET_CODE (x) == SUBREG
+  if (SUBREG_P (x)
   && (GET_MODE_SIZE (GET_MODE (x))
  < GET_MODE_SIZE (GET_MODE (SUBREG_REG (x)
 x = SUBREG_REG (x);
@@ -919,7 +919,7 @@ alpha_legitimate_address_p (machine_mode mode, rtx
   x = XEXP (x, 0);
 
   /* Discard non-paradoxical subregs.  */
-  if (GET_CODE (x) == SUBREG
+  if (SUBREG_P (x)
   && (GET_MODE_SIZE (GET_MODE (x))
  < GET_MODE_SIZE (GET_MODE (SUBREG_REG (x)
x = SUBREG_REG (x);
@@ -953,7 +953,7 @@ alpha_legitimate_address_p (machine_mode mode, rtx
  x = XEXP (x, 0);
 
  /* Discard non-paradoxical subregs.  */
- if (GET_CODE (x) == SUBREG
+ if (SUBREG_P (x)
  && (GET_MODE_SIZE (GET_MODE (x))
  < GET_MODE_SIZE (GET_MODE (SUBREG_REG (x)
x = SUBREG_REG (x);
Index: config/alpha/predicates.md
===
--- config/alpha/predicates.md  (revision 226216)
+++ config/alpha/predicates.md  (working copy)
@@ -134,7 +134,7 @@
 (define_predicate "hard_fp_register_operand"
   (match_operand 0 "register_operand")
 {
-  if (GET_CODE (op) == SUBREG)
+  if (SUBREG_P (op))
 op = SUBREG_REG (op);
   return REGNO_REG_CLASS (REGNO (op)) == FLOAT_REGS;
 })
@@ -143,7 +143,7 @@
 (define_predicate "hard_int_register_operand"
   (match_operand 0 "register_operand")
 {
-  if (GET_CODE (op) == SUBREG)
+  if (SUBREG_P (op))
 op = SUBREG_REG (op);
   return REGNO_REG_CLASS (REGNO (op)) == GENERAL_REGS;
 })
@@ -506,7 +506,7 @@
 (define_special_predicate "any_memory_operand"
   (match_code "mem,reg,subreg")
 {
-  if (GET_CODE (op) == SUBREG)
+  if (SUBREG_P (op))
 op = SUBREG_REG (op);
 
   if (MEM_P (op))
@@ -537,7 +537,7 @@
 (define_predicate "reg_not_elim_operand"
   (match_operand 0 "register_operand")
 {
-  if (GET_CODE (op) == SUBREG)
+  if (SUBREG_P (op))
 op = SUBREG_REG (op);
   return op != frame_pointer_rtx && op != arg_pointer_rtx;
 })

[PATCH, testsuite, alpha]: Use unsigned immediates to avoid shift-overflow warning

2015-07-26 Thread Uros Bizjak

2015-07-26  Uros Bizjak  

* gcc.target/alpha/pr66140.c (lpfc_bg_setup_bpl): Use unsigned
immediates to avoid shift-overflow warnings.

Tested on alpha-linux-gnu and committed to mainline SVN.

Uros.
Index: ChangeLog
===
--- ChangeLog   (revision 226232)
+++ ChangeLog   (working copy)
@@ -1,3 +1,8 @@
+2015-07-26  Uros Bizjak  
+
+   * gcc.target/alpha/pr66140.c (lpfc_bg_setup_bpl): Use unsigned
+   immediates to avoid shift-overflow warnings.
+
 2015-07-25  Patrick Palka  
 
PR c++/66857
Index: gcc.target/alpha/pr66140.c
===
--- gcc.target/alpha/pr66140.c  (revision 226216)
+++ gcc.target/alpha/pr66140.c  (working copy)
@@ -34,7 +34,7 @@ static inline void lpfc_bg_setup_bpl(struct lpfc_h
void *sgde;
int i;
 
-   *pde5 = (((0x85 & 0x00ff) << 24) | (*pde5 & ~(0x00ff << 24)));
+   *pde5 = (((0x85 & 0x00ffu) << 24) | (*pde5 & ~(0x00ffu << 24)));
for (i = 0, sgde = scsi_sglist(sc); i < 2; i++, sgde = sg_next(sgde))
;
 }

[libstdc++/67015, patch] Fix regex POSIX bracket parsing

2015-07-26 Thread Tim Shen

Kinda important, since "[a-z0-9-]" may be a common case.

Bootstrapped and tested.

Guess it can also be backported to 5, or even 4.9?

Thanks!


-- 
Regards,
Tim Shen

Re: [libstdc++/67015, patch] Fix regex POSIX bracket parsing

2015-07-26 Thread Tim Shen

On Sun, Jul 26, 2015 at 5:19 AM, Tim Shen  wrote:
> Kinda important, since "[a-z0-9-]" may be a common case.
>
> Bootstrapped and tested.

Actual patch...

-- 
Regards,
Tim Shen
commit e0e6c2e3b722e1453d29ad3a56d0de80046453b0
Author: Tim Shen 
Date:   Sun Jul 26 04:37:45 2015 -0700

PR libstdc++/67015

* include/bits/regex_compiler.h (_Compiler<>::_M_expression_term,
_BracketMatcher<>::_M_add_collating_element): Change signature
to make checking the and of bracket expression easier.
* include/bits/regex_compiler.tcc (_Compiler<>::_M_expression_term):
Treat '-' as a valid literal if it's at the end of bracket expression.
* testsuite/28_regex/algorithms/regex_match/cstring_bracket_01.cc:
New testcases.

diff --git a/libstdc++-v3/include/bits/regex_compiler.h 
b/libstdc++-v3/include/bits/regex_compiler.h
index 4472116..6d9799e 100644
--- a/libstdc++-v3/include/bits/regex_compiler.h
+++ b/libstdc++-v3/include/bits/regex_compiler.h
@@ -116,8 +116,10 @@ _GLIBCXX_BEGIN_NAMESPACE_VERSION
void
_M_insert_bracket_matcher(bool __neg);
 
+  // Returns true if successfully matched one term and should continue.
+  // Returns false if the compiler should move on.
   template
-   void
+   bool
_M_expression_term(pair& __last_char,
   _BracketMatcher<_TraitsT, __icase, __collate>&
   __matcher);
@@ -389,7 +391,7 @@ _GLIBCXX_BEGIN_NAMESPACE_VERSION
 #endif
   }
 
-  void
+  _StringT
   _M_add_collating_element(const _StringT& __s)
   {
auto __st = _M_traits.lookup_collatename(__s.data(),
@@ -400,6 +402,7 @@ _GLIBCXX_BEGIN_NAMESPACE_VERSION
 #ifdef _GLIBCXX_DEBUG
_M_is_ready = false;
 #endif
+   return __st;
   }
 
   void
diff --git a/libstdc++-v3/include/bits/regex_compiler.tcc 
b/libstdc++-v3/include/bits/regex_compiler.tcc
index 33d7118..f48e3c1 100644
--- a/libstdc++-v3/include/bits/regex_compiler.tcc
+++ b/libstdc++-v3/include/bits/regex_compiler.tcc
@@ -424,8 +424,7 @@ _GLIBCXX_BEGIN_NAMESPACE_VERSION
__last_char.first = true;
__last_char.second = _M_value[0];
  }
-  while (!_M_match_token(_ScannerT::_S_token_bracket_end))
-   _M_expression_term(__last_char, __matcher);
+  while (_M_expression_term(__last_char, __matcher));
   __matcher._M_ready();
   _M_stack.push(_StateSeqT(
  *_M_nfa,
@@ -434,21 +433,31 @@ _GLIBCXX_BEGIN_NAMESPACE_VERSION
 
   template
   template
-void
+bool
 _Compiler<_TraitsT>::
 _M_expression_term(pair& __last_char,
   _BracketMatcher<_TraitsT, __icase, __collate>& __matcher)
 {
+  if (_M_match_token(_ScannerT::_S_token_bracket_end))
+   return false;
+
   if (_M_match_token(_ScannerT::_S_token_collsymbol))
-   __matcher._M_add_collating_element(_M_value);
+   {
+ auto __symbol = __matcher._M_add_collating_element(_M_value);
+ if (__symbol.size() == 1)
+   {
+ __last_char.first = true;
+ __last_char.second = __symbol[0];
+   }
+   }
   else if (_M_match_token(_ScannerT::_S_token_equiv_class_name))
__matcher._M_add_equivalence_class(_M_value);
   else if (_M_match_token(_ScannerT::_S_token_char_class_name))
__matcher._M_add_character_class(_M_value, false);
-  // POSIX doesn't permit '-' as a start-range char (say [a-z--0]),
-  // except when the '-' is the first character in the bracket expression
-  // ([--0]). ECMAScript treats all '-' after a range as a normal 
character.
-  // Also see above, where _M_expression_term gets called.
+  // POSIX doesn't allow '-' as a start-range char (say [a-z--0]),
+  // except when the '-' is the first or last character in the bracket
+  // expression ([--0]). ECMAScript treats all '-' after a range as a
+  // normal character. Also see above, where _M_expression_term gets 
called.
   //
   // As a result, POSIX rejects [-], but ECMAScript doesn't.
   // Boost (1.57.0) always uses POSIX style even in its ECMAScript syntax.
@@ -459,10 +468,14 @@ _GLIBCXX_BEGIN_NAMESPACE_VERSION
{
  if (!__last_char.first)
{
+ __matcher._M_add_char(_M_value[0]);
  if (_M_value[0] == '-'
  && !(_M_flags & regex_constants::ECMAScript))
-   __throw_regex_error(regex_constants::error_range);
- __matcher._M_add_char(_M_value[0]);
+   {
+ if (_M_match_token(_ScannerT::_S_token_bracket_end))
+   return false;
+ __throw_regex_error(regex_constants::error_range);
+   }
  __last_char.first = true;
  __last_char.second = _M_value[0];
}
@@ -496,6 +509,8 @@ _GLIBCXX_BEGIN_NAMESPACE_VERSION

OMP. More constification

2015-07-26 Thread Nathan Sidwell


Jakub, Ilya,
I found some more missing consts.  The size, kind, var and function arrays 
emitted by omp-low are read only, but are not so marked.  This patch


a) adds const qualifier and marks them read only.  They now end up in .rodata 
and .data.ro.rel appropriately.


b) adds const qualifier to libgomp's routines that use the data.

The host-side version of the offloaded function takes a 'void *' argument, which 
should really be 'void  *const *', but that change rapidly went to change a lot 
of places.  Even just chaning it to 'const void *'.  So I punted on that bit and 
simply added a 'void *' cast when calling it.


I've not checked the intelmic library, and suspect that will  need some consts 
adding -- is that something you can do Ilya?


nathan
2015-07-26  Nathan Sidwell  

	libgomp/
	* libgomp.h (struct acc_dispatch_t): Constify exec_func's
	object descriptor arguments.
	(gomp_acc_insert_pointer): Constify object descriptor arguments.
	(gomp_map_vars): Likewise.
	* target.c (get_kind, gomp_map_vars, gomp_update, GOMP_target,
	GOMP_target_data, GOMP_target_update): Likewise.
	* oacc-parallel.c (find_pset, GOACC_parallel, GOACC_data_start,
	GOACC_enter_exit_data, GOACC_update): Likewise.
	* libgomp_g.h (GOMP_taeget, GOMP_target_data, GOMP_target_update,
	GOACC_data_start, GOACC_enter_exit_data, GOACC_update): Likewise.
	* plugin/plugin-nvptx.c (nvptx_exec): Likewise.
	* plugin/pligin-host.c (GOMP_OFFLOAD_openacc_parallel): Likewise.
	* oacc-mem.c (gomp_acc_insert_pointer): Likewise.

	gcc/
	* omp-low.c (lower_omp_target): Make size and kind arrays read
	only.
	(omp_finis_file): Make func and var  arrays read only.

Index: libgomp/libgomp.h
===
--- libgomp/libgomp.h	(revision 226231)
+++ libgomp/libgomp.h	(working copy)
@@ -692,8 +692,9 @@ typedef struct acc_dispatch_t
   struct target_mem_desc *data_environ;
 
   /* Execute.  */
-  void (*exec_func) (void (*) (void *), size_t, void **, void **, size_t *,
-		 unsigned short *, int, int, int, int, void *);
+  void (*exec_func) (void (*) (void *), size_t, void *const *, void *const *,
+		 const size_t *, const unsigned short *,
+		 int, int, int, int, void *);
 
   /* Async cleanup callback registration.  */
   void (*register_async_cleanup_func) (void *);
@@ -771,12 +772,14 @@ struct gomp_device_descr
   acc_dispatch_t openacc;
 };
 
-extern void gomp_acc_insert_pointer (size_t, void **, size_t *, void *);
+extern void gomp_acc_insert_pointer (size_t, void *const *,
+ const size_t *, const void *);
 extern void gomp_acc_remove_pointer (void *, bool, int, int);
 
 extern struct target_mem_desc *gomp_map_vars (struct gomp_device_descr *,
-	  size_t, void **, void **,
-	  size_t *, void *, bool, bool);
+	  size_t, void *const *, void **,
+	  const size_t *, const void *,
+	  bool, bool);
 extern void gomp_copy_from_async (struct target_mem_desc *);
 extern void gomp_unmap_vars (struct target_mem_desc *, bool);
 extern void gomp_init_device (struct gomp_device_descr *);
Index: libgomp/target.c
===
--- libgomp/target.c	(revision 226231)
+++ libgomp/target.c	(working copy)
@@ -157,10 +157,10 @@ gomp_map_vars_existing (struct gomp_devi
 }
 
 static int
-get_kind (bool is_openacc, void *kinds, int idx)
+get_kind (bool is_openacc, const void *kinds, int idx)
 {
-  return is_openacc ? ((unsigned short *) kinds)[idx]
-		: ((unsigned char *) kinds)[idx];
+  return is_openacc ? ((const unsigned short *) kinds)[idx]
+		: ((const unsigned char *) kinds)[idx];
 }
 
 static void
@@ -219,7 +219,8 @@ gomp_map_pointer (struct target_mem_desc
 
 attribute_hidden struct target_mem_desc *
 gomp_map_vars (struct gomp_device_descr *devicep, size_t mapnum,
-	   void **hostaddrs, void **devaddrs, size_t *sizes, void *kinds,
+	   void *const *hostaddrs, void **devaddrs,
+	   const size_t *sizes, const void *kinds,
 	   bool is_openacc, bool is_target)
 {
   size_t i, tgt_align, tgt_size, not_found_cnt = 0;
@@ -574,8 +575,9 @@ gomp_unmap_vars (struct target_mem_desc
 }
 
 static void
-gomp_update (struct gomp_device_descr *devicep, size_t mapnum, void **hostaddrs,
-	 size_t *sizes, void *kinds, bool is_openacc)
+gomp_update (struct gomp_device_descr *devicep, size_t mapnum,
+	 void *const *hostaddrs,
+	 const size_t *sizes, const void *kinds, bool is_openacc)
 {
   size_t i;
   struct splay_tree_key_s cur_node;
@@ -927,8 +929,8 @@ gomp_fini_device (struct gomp_device_des
 
 void
 GOMP_target (int device, void (*fn) (void *), const void *unused,
-	 size_t mapnum, void **hostaddrs, size_t *sizes,
-	 unsigned char *kinds)
+	 size_t mapnum, void *const *hostaddrs, const size_t *sizes,
+	 const unsigned char *kinds)
 {
   struct gomp_device_descr *devicep = resolve_device (device);
 
@@ -944,7 +946,7 @@ GOMP_target (int device, void (*fn) (voi

Re: [PATCH] Allow non-overflow ops in vect_is_simple_reduction_1

2015-07-26 Thread Tom de Vries


On 24/07/15 16:39, Tom de Vries wrote:

Hi,

this patch allows parallelization and vectorization of reduction
operators that are guaranteed to not overflow (such as min and max
operators), independent of the overflow behaviour of the type.

Bootstrapped and reg-tested on x86_64.

OK for trunk?

Thanks,
- Tom


Handle non-overflow reductions in graphite

2015-07-21  Tom de Vries  

	* graphite-sese-to-poly.c (is_reduction_operation_p): Allow operations
	that do not overflow.
---
 gcc/graphite-sese-to-poly.c | 15 +--
 1 file changed, 13 insertions(+), 2 deletions(-)

diff --git a/gcc/graphite-sese-to-poly.c b/gcc/graphite-sese-to-poly.c
index c583f16..531c848 100644
--- a/gcc/graphite-sese-to-poly.c
+++ b/gcc/graphite-sese-to-poly.c
@@ -2614,8 +2614,19 @@ is_reduction_operation_p (gimple stmt)
   if (FLOAT_TYPE_P (type))
 return flag_associative_math;
 
-  return (INTEGRAL_TYPE_P (type)
-	  && TYPE_OVERFLOW_WRAPS (type));
+  if (ANY_INTEGRAL_TYPE_P (type))
+{
+  if (INTEGRAL_TYPE_P (type)
+	  && TYPE_OVERFLOW_WRAPS (type))
+	return true;
+
+  if (no_overflow_tree_code (code, type))
+	return true;
+
+  return false;
+}
+
+  return false;
 }
 
 /* Returns true when PHI contains an argument ARG.  */
-- 
1.9.1

Re: [PATCH] Allow non-overflow ops in vect_is_simple_reduction_1

2015-07-26 Thread Tom de Vries


On 26/07/15 18:49, Tom de Vries wrote:

On 24/07/15 16:39, Tom de Vries wrote:

Hi,

this patch allows parallelization and vectorization of reduction
operators that are guaranteed to not overflow (such as min and max
operators), independent of the overflow behaviour of the type.

Bootstrapped and reg-tested on x86_64.

OK for trunk?

Thanks,
- Tom


[ Slip-of-the-keyboard ]

This is the graphite version of this patch.

Bootstrapped and reg-tested on x86_64.

OK for trunk?

Thanks,
- Tom

Handle non-overflow reductions in graphite

2015-07-21  Tom de Vries  

	* graphite-sese-to-poly.c (is_reduction_operation_p): Allow operations
	that do not overflow.
---
 gcc/graphite-sese-to-poly.c | 15 +--
 1 file changed, 13 insertions(+), 2 deletions(-)

diff --git a/gcc/graphite-sese-to-poly.c b/gcc/graphite-sese-to-poly.c
index c583f16..531c848 100644
--- a/gcc/graphite-sese-to-poly.c
+++ b/gcc/graphite-sese-to-poly.c
@@ -2614,8 +2614,19 @@ is_reduction_operation_p (gimple stmt)
   if (FLOAT_TYPE_P (type))
 return flag_associative_math;
 
-  return (INTEGRAL_TYPE_P (type)
-	  && TYPE_OVERFLOW_WRAPS (type));
+  if (ANY_INTEGRAL_TYPE_P (type))
+{
+  if (INTEGRAL_TYPE_P (type)
+	  && TYPE_OVERFLOW_WRAPS (type))
+	return true;
+
+  if (no_overflow_tree_code (code, type))
+	return true;
+
+  return false;
+}
+
+  return false;
 }
 
 /* Returns true when PHI contains an argument ARG.  */
-- 
1.9.1

[PATCH] Enable fixed-point reductions in graphite

2015-07-26 Thread Tom de Vries


Hi,

In patch "Don't allow unsafe reductions in graphite" ( submitted 
https://gcc.gnu.org/ml/gcc-patches/2015-07/msg01861.html , committed 
r226193) we've disabled fixed-point reductions.


In this patch we enable it again, for the case that it's safe, in other 
words, for the case that the fixed point overflow wraps.


The patch uses FIXED_POINT_TYPE_OVERFLOW_WRAPS_P, introduced in proposed 
patch https://gcc.gnu.org/ml/gcc-patches/2015-07/msg02049.html .


Bootstrapped and reg-tested on x86_64.

OK for trunk?

Thanks,
- Tom
Enable fixed-point reductions in graphite

2015-07-24  Tom de Vries  

	* graphite-sese-to-poly.c (is_reduction_operation_p): Allow wrapping
	fixed-point operations.
---
 gcc/graphite-sese-to-poly.c | 3 +++
 1 file changed, 3 insertions(+)

diff --git a/gcc/graphite-sese-to-poly.c b/gcc/graphite-sese-to-poly.c
index 531c848..aa823f8 100644
--- a/gcc/graphite-sese-to-poly.c
+++ b/gcc/graphite-sese-to-poly.c
@@ -2626,6 +2626,9 @@ is_reduction_operation_p (gimple stmt)
   return false;
 }
 
+  if (FIXED_POINT_TYPE_P (type))
+return FIXED_POINT_TYPE_OVERFLOW_WRAPS_P (type);
+
   return false;
 }
 
-- 
1.9.1

[gomp4] fiuxup openacc default handling

2015-07-26 Thread Nathan Sidwell

I've committed this update to my earlier breakout of default handling.  After 
complaining about something because of 'none', we should fall through to the 
default handling, to prevent ICEing later (on patch seriesI'm working on).  This 
matches the OMP default handling.  Also tweaked the setting of GOVD_ flags 
slightly, to make the firstprivate handling I'm working on less invasive.


nathan
2015-07-26  Nathan Sidwell  

	* gimplify.c (oacc_default_clause): Fallthrough to unspecified
	handling.  Propagate mapping from outer scope.

Index: gcc/gimplify.c
===
--- gcc/gimplify.c	(revision 226226)
+++ gcc/gimplify.c	(working copy)
@@ -5930,7 +5930,7 @@ oacc_default_clause (struct gimplify_omp
 	   DECL_NAME (lang_hooks.decls.omp_report_decl (decl)), rkind);
 	error_at (ctx->location, "enclosing OpenACC %s construct", rkind);
   }
-break;
+  /* FALLTHRU.  */
 
 case OMP_CLAUSE_DEFAULT_UNSPECIFIED:
   {
@@ -5944,33 +5944,39 @@ oacc_default_clause (struct gimplify_omp
 		  continue;
 		if (!(octx->region_type & (ORT_TARGET_DATA | ORT_TARGET)))
 		  break;
-		if (splay_tree_lookup (octx->variables, (splay_tree_key) decl))
+	  splay_tree_node n2
+		= splay_tree_lookup (octx->variables, (splay_tree_key) decl);
+	  if (n2)
+		{
+		  flags |= n2->value & GOVD_MAP;
 		  goto found_outer;
+		}
 	  }
 	  }
 
-	{
-	  tree type = TREE_TYPE (decl);
-	  /*  Should this  be REFERENCE_TYPE_P? */
-	  if (POINTER_TYPE_P (type))
-	type = TREE_TYPE (type);
+	if (is_global_var (decl) && device_resident_p (decl))
+	  flags |= GOVD_MAP_TO_ONLY | GOVD_MAP;
+	/* Scalars under kernels are default 'copy'.  */
+	else if (ctx->acc_region_kind == ARK_KERNELS)
+	  flags |= GOVD_FORCE_MAP | GOVD_MAP;
+	else if (ctx->acc_region_kind == ARK_PARALLEL)
+	  {
+	tree type = TREE_TYPE (decl);
+
+	/*  Should this  be REFERENCE_TYPE_P? */
+	if (POINTER_TYPE_P (type))
+	  type = TREE_TYPE (type);
 	
-	  /* For OpenACC regions, array and aggregate variables
-	 default to present_or_copy, while scalar variables
-	 by default are firstprivate (gang-local) in parallel.  */
-	  if (!AGGREGATE_TYPE_P (type))
-	{
-	  if (is_global_var (decl) && device_resident_p (decl))
-		flags |= GOVD_MAP_TO_ONLY;
-	  else if (ctx->acc_region_kind == ARK_PARALLEL)
-		flags |= (GOVD_GANGLOCAL | GOVD_MAP_TO_ONLY);
-	  /* Scalars under kernels are default 'copy'.  */
-	  else if (ctx->acc_region_kind == ARK_KERNELS)
-		flags |= GOVD_FORCE_MAP;
-	  else
-		gcc_unreachable ();
-	}
+	if (AGGREGATE_TYPE_P (type))
+	  /* Aggregates default to 'copy'.  This should really
+		 include GOVD_FORCE_MAP.  */
+	  flags |= GOVD_MAP;
+	else
+	  /* Scalars default tp 'firstprivate'.  */
+	  flags |= GOVD_GANGLOCAL | GOVD_MAP_TO_ONLY | GOVD_MAP;
 	  }
+	else
+	  gcc_unreachable ();
   found_outer:;
   }
   break;
@@ -6020,7 +6026,8 @@ omp_notice_variable (struct gimplify_omp
 
 	  if (is_oacc)
 	flags = oacc_default_clause (ctx, decl, in_code, flags);
-	  flags |= GOVD_MAP;
+	  else
+	flags |= GOVD_MAP;
 
 	  if (!lang_hooks.types.omp_mappable_type (TREE_TYPE (decl), is_oacc))
 	{

Re: [PATCH] Fix PR c++/18969 (invalid return statement diagnosed too late)

2015-07-26 Thread Patrick Palka

Committed with this additional change to fix a latent testcase bug:

diff --git a/gcc/testsuite/g++.old-deja/g++.jason/overload.C
b/gcc/testsuite/g++.old-deja/g++.jason/overload.C
index 6a747ff..28b029f 100644
--- a/gcc/testsuite/g++.old-deja/g++.jason/overload.C
+++ b/gcc/testsuite/g++.old-deja/g++.jason/overload.C
@@ -5,7 +5,7 @@ enum bar {};
 void operator+ (int, int);// { dg-error "" } .*
 void operator+ (bar&, int);

-template  void operator+ (int b, T& t) { return b; }
+template  void operator+ (int b, T& t) { return; }
 void operator+ (int, bar&);

 template  class foo

[PATCH] Use single shared memory block pool for all pool allocators

2015-07-26 Thread Mikhail Maltsev

Hi, all!
Recently I did some profiling of GCC to find hotspots and areas of possible
performance improvement among them. glibc malloc(3) is one of (perhaps known)
hotspots. It seemed rather strange to me that pool allocators call malloc(3) and
free(3) rather often, and spend considerable time in malloc/free. It turned out
that pool allocators are often used in a suboptimal way: current interface
allows users to choose the number of objects per allocated memory block. In some
cases this block is chosen to be rather small (e.g. ~150 bytes), and this causes
more calls of malloc/free which are relatively expensive. By simply increasing
the block size I could get some measurable performance improvement. There is
another issue: while some pools exist for the whole compilation, the other ones
are created and destroyed many times (perhaps once per compiled function), and
this also adds some overhead. I propose the following optimization which helps
to reduce it: use one common block size in all pools and create another pool,
which will hold free memory blocks. Other pools use it for allocations so that a
page freed by one pool can be immediately used by another pool (without calling
free/malloc).

To measure the effect on performance, I used C and C++ code from SPEC CPU2006
benchmark. All source files were preprocessed and put on ramdrive (tmpfs). The
compilers (cc1 and cc1plus executables) were also copied to ramdrive. Each
benchmark was then compiled in a single-threaded build (to make the results more
precise) with fixed CPU affinity mask. Compilers were built with
--enable-checking=release. The patch passed bootstrap and regtest on 
x86_64-linux.

Performance data.
Compile times at -O3, the columns are: pristine version (in seconds), patched
version (in seconds), difference.
> 400.perlbench26.86  26.68 (  -0.67%) user  0.56   0.52 (  -7.14%) 
> sys 27.45  27.23 (  -0.80%) real
> 401.bzip2 2.53   2.51 (  -0.79%) user  0.07   0.07 (  +0.00%) 
> sys  2.61   2.59 (  -0.77%) real
> 403.gcc  73.59  73.12 (  -0.64%) user  1.59   1.53 (  -3.77%) 
> sys 75.27  74.74 (  -0.70%) real
> 429.mcf0.40.4 (  +0.00%) user  0.03   0.02 ( -33.33%) 
> sys  0.44   0.44 (  +0.00%) real
> 433.milc  3.22   3.22 (  +0.00%) user  0.220.2 (  -9.09%) 
> sys  3.48   3.46 (  -0.57%) real
> 444.namd  7.54   7.47 (  -0.93%) user   0.10.1 (  +0.00%) 
> sys  7.66   7.59 (  -0.91%) real
> 445.gobmk20.24  20.03 (  -1.04%) user  0.52   0.51 (  -1.92%) 
> sys  20.8  20.59 (  -1.01%) real
> 450.soplex   19.08  18.94 (  -0.73%) user  0.87   0.91 (  +4.60%) 
> sys 19.99  19.89 (  -0.50%) real
> 453.povray   42.27  42.17 (  -0.24%) user  2.71   2.65 (  -2.21%) 
> sys 45.04  44.88 (  -0.36%) real
> 456.hmmer 7.27   7.22 (  -0.69%) user  0.31   0.31 (  +0.00%) 
> sys  7.61   7.56 (  -0.66%) real
> 458.sjeng 3.22   3.17 (  -1.55%) user  0.09   0.12 ( +33.33%) 
> sys  3.323.3 (  -0.60%) real
> 462.libquantum0.86   0.85 (  -1.16%) user  0.05   0.05 (  +0.00%) 
> sys  0.92   0.92 (  +0.00%) real
> 464.h264ref  27.62  27.38 (  -0.87%) user  0.63   0.61 (  -3.17%) 
> sys 28.28  28.02 (  -0.92%) real
> 470.lbm   0.27   0.27 (  +0.00%) user  0.01   0.02 (+100.00%) 
> sys  0.29   0.29 (  +0.00%) real
> 471.omnetpp  28.29  28.16 (  -0.46%) user   1.5   1.53 (  +2.00%) 
> sys 29.84  29.74 (  -0.34%) real
> 473.astar 1.14   1.15 (  +0.88%) user  0.05   0.04 ( -20.00%) 
> sys  1.211.2 (  -0.83%) real
> 482.sphinx3   4.65   4.63 (  -0.43%) user   0.2   0.19 (  -5.00%) 
> sys  4.88   4.84 (  -0.82%) real
> 483.xalancbmk284.5 283.51 (  -0.35%) user 20.29  20.74 (  +2.22%) 
> sys305.19 304.65 (  -0.18%) real

The results for -O1 are similar (and xalancbmk shows ~0.5% reduction of
wallclock time). As you can see, there is some measurable improvement (increased
times are seen only on short tests - having less than 2 seconds compile time -
and I suppose the difference is close to measurement error).

The patch causes slightly increased memory consumption. It is typically within 2
MB, but reaches 5 MB for omnetpp - 157 vs 152 MB. (FWIW, it should be O(1), if
we don't have memory leaks).

The size of common memory block was chosen to be 64 kB. This value seems to be
close to optimal, at least on x86_64-linux (I tried various values, though not
with complete set of benchmarks). I also seems to be enough for all objects we
currently allocate from pools (they have constant sizes, in some cases
target-dependent, but still much smaller that 64 kB limit).

OK for trunk?

Possible further improvements (RFC):
1. Find out, if it is possible to use the same technique for obstacks (and share
memory between pools and obstacks). It seems possi

Re: [PATCH] Fix PR c++/18969 (invalid return statement diagnosed too late)

2015-07-26 Thread Patrick Palka

On Sun, Jul 26, 2015 at 1:09 PM, Patrick Palka  wrote:
> Committed with this additional change to fix a latent testcase bug:
>
> diff --git a/gcc/testsuite/g++.old-deja/g++.jason/overload.C
> b/gcc/testsuite/g++.old-deja/g++.jason/overload.C
> index 6a747ff..28b029f 100644
> --- a/gcc/testsuite/g++.old-deja/g++.jason/overload.C
> +++ b/gcc/testsuite/g++.old-deja/g++.jason/overload.C
> @@ -5,7 +5,7 @@ enum bar {};
>  void operator+ (int, int);// { dg-error "" } .*
>  void operator+ (bar&, int);
>
> -template  void operator+ (int b, T& t) { return b; }
> +template  void operator+ (int b, T& t) { return; }
>  void operator+ (int, bar&);
>
>  template  class foo

Hmm, on second thought, I don't think this fix is right.  It may be
the case that the 'return b;' was there to make instantiation of that
template a compile-time error. By changing it to 'return;'
instantiation is allowed.  Is this property important here?  Should I
preserve the original property (that instantiation is a compile-time
error) by instead doing the following?

Adjust g++.old-deja/g++.jason/overload.C

gcc/testsuite/ChangeLog:

* g++.old-deja/g++.jason/overload.C: Adjust to preserve original
property that instantiation is a compile-time error.

diff --git a/gcc/testsuite/g++.old-deja/g++.jason/overload.C
b/gcc/testsuite/g++.old-deja/g++.jason/overload.C
index 28b029f..5d27713 100644
--- a/gcc/testsuite/g++.old-deja/g++.jason/overload.C
+++ b/gcc/testsuite/g++.old-deja/g++.jason/overload.C
@@ -5,7 +5,7 @@ enum bar {};
 void operator+ (int, int);// { dg-error "" } .*
 void operator+ (bar&, int);

-template  void operator+ (int b, T& t) { return; }
+template  void operator+ (int b, T& t) { (void) T::bogus; }
 void operator+ (int, bar&);

 template  class foo

Re: [PATCH] Use single shared memory block pool for all pool allocators

2015-07-26 Thread Andi Kleen

Mikhail Maltsev  writes:

> Hi, all!
> Recently I did some profiling of GCC to find hotspots and areas of possible
> performance improvement among them. glibc malloc(3) is one of (perhaps
> known)

I've been compiling gcc with tcmalloc to do a similar speedup. It would be
interesting to compare that to your patch.

Another useful optimization is to adjust the allocation size to be >=
2MB. Then modern Linux kernels often can give you a large page,
which cuts down TLB overhead. I did similar changes some time
ago for the garbage collector.

BTW I saw big differences in larger LTO builds.

-Andi

-- 
a...@linux.intel.com -- Speaking for myself only

Re: [PATCH] Use single shared memory block pool for all pool allocators

2015-07-26 Thread pinskia





> On Jul 26, 2015, at 11:50 AM, Andi Kleen  wrote:
> 
> Mikhail Maltsev  writes:
> 
>> Hi, all!
>> Recently I did some profiling of GCC to find hotspots and areas of possible
>> performance improvement among them. glibc malloc(3) is one of (perhaps
>> known)
> 
> I've been compiling gcc with tcmalloc to do a similar speedup. It would be
> interesting to compare that to your patch.
> 
> Another useful optimization is to adjust the allocation size to be >=
> 2MB. Then modern Linux kernels often can give you a large page,
> which cuts down TLB overhead. I did similar changes some time
> ago for the garbage collector.

Unless you are running with 64k pages which I do all the time on my armv8 
system.  

Thanks,
Andrew


> 
> BTW I saw big differences in larger LTO builds.
> 
> -Andi
> 
> -- 
> a...@linux.intel.com -- Speaking for myself only

Re: [PATCH 1/2] Allow REG_EQUAL for ZERO_EXTRACT

2015-07-26 Thread Andreas Schwab

Kugan  writes:

>   * cse.c (cse_insn): Fix missing check for STRICT_LOW_PART and minor
>   clean up.

This breaks 

gcc.target/m68k/tls-ie-xgot.c scan-assembler jsr __m68k_read_tp
gcc.target/m68k/tls-ie.c scan-assembler jsr __m68k_read_tp
gcc.target/m68k/tls-le-xtls.c scan-assembler jsr __m68k_read_tp
gcc.target/m68k/tls-le.c scan-assembler jsr __m68k_read_tp

Andreas.

-- 
Andreas Schwab, sch...@linux-m68k.org
GPG Key fingerprint = 58CA 54C7 6D53 942B 1756  01D3 44D5 214B 8276 4ED5
"And now for something completely different."

[V850] Hookize LIBCALL_VALUE

2015-07-26 Thread Anatoliy Sokolov


Hi.

This patch removes obsolete LIBCALL_VALUE macros from the V850 back end in
the GCC and introduces equivalent TARGET_LIBCALL_VALUE target hook.

Regression tested on v850-unknown-eabi.

OK for trunk?

2015-07-26  Anatoly Sokolov  

* config/v850/v850.h (LIBCALL_VALUE): Remove macros.
* config/v850/v850.md (RV_REGNUM): New constants.
* config/v850/v850.c (v850_libcall_value): New functions.
(v850_function_value_regno_p, v850_function_value): Use RV_REGNUM.
(TARGET_LIBCALL_VALUE): Define.

Index: gcc/config/v850/v850.md
===
--- gcc/config/v850/v850.md (revision 226237)
+++ gcc/config/v850/v850.md (working copy)
@@ -39,6 +39,7 @@
   [(ZERO_REGNUM0)  ; constant zero
(SP_REGNUM  3)  ; Stack Pointer
(GP_REGNUM  4)  ; GP Pointer
+   (RV_REGNUM  10) ; Return value register
(EP_REGNUM  30) ; EP pointer
(LP_REGNUM  31) ; Return address register
(CC_REGNUM  32) ; Condition code pseudo register
Index: gcc/config/v850/v850.c
===
--- gcc/config/v850/v850.c  (revision 226237)
+++ gcc/config/v850/v850.c  (working copy)
@@ -2979,7 +2979,7 @@
 static bool
 v850_function_value_regno_p (const unsigned int regno)
 {
-  return (regno == 10);
+  return (regno == RV_REGNUM);
 }

 /* Worker function for TARGET_RETURN_IN_MEMORY.  */
@@ -3002,9 +3002,18 @@
 const_tree fn_decl_or_type ATTRIBUTE_UNUSED,
 bool outgoing ATTRIBUTE_UNUSED)
 {
-  return gen_rtx_REG (TYPE_MODE (valtype), 10);
+  return gen_rtx_REG (TYPE_MODE (valtype), RV_REGNUM);
 }

+/* Implement TARGET_LIBCALL_VALUE.  */
+
+static rtx
+v850_libcall_value (machine_mode mode,
+   const_rtx func ATTRIBUTE_UNUSED)
+{
+  return gen_rtx_REG (mode, RV_REGNUM);
+}
+
 
 /* Worker function for TARGET_CAN_ELIMINATE.  */

@@ -3304,6 +3313,8 @@
 #define TARGET_FUNCTION_VALUE_REGNO_P v850_function_value_regno_p
 #undef  TARGET_FUNCTION_VALUE
 #define TARGET_FUNCTION_VALUE v850_function_value
+#undef  TARGET_LIBCALL_VALUE
+#define TARGET_LIBCALL_VALUE v850_libcall_value

 #undef  TARGET_PROMOTE_PROTOTYPES
 #define TARGET_PROMOTE_PROTOTYPES hook_bool_const_tree_true
Index: gcc/config/v850/v850.h
===
--- gcc/config/v850/v850.h  (revision 226237)
+++ gcc/config/v850/v850.h  (working copy)
@@ -549,12 +549,6 @@

 #define FUNCTION_ARG_REGNO_P(N) (N >= 6 && N <= 9)

-/* Define how to find the value returned by a library function
-   assuming the value has mode MODE.  */
-
-#define LIBCALL_VALUE(MODE) \
-  gen_rtx_REG (MODE, 10)
-
 #define DEFAULT_PCC_STRUCT_RETURN 0

 /* EXIT_IGNORE_STACK should be nonzero if, when returning from a function,

[MOXIE] Hookize PRINT_OPERAND and PRINT_OPERAND_ADDRESS

2015-07-26 Thread Anatoliy Sokolov


Hi.

This patch removes obsolete PRINT_OPERAND and PRINT_OPERAND_ADDRESS macros
from the MOXIE back end in the GCC and introduces equivalent
TARGET_PRINT_OPERAND and TARGET_PRINT_OPERAND_ADDRESS target hooks.

Regression tested on moxie-unknown-elf.

OK for trunk?

Anatoly.

2015-07-26  Anatoly Sokolov  

* config/moxie/moxie.h (PRINT_OPERAND,
  PRINT_OPERAND_ADDRESS): Remove macros.
* config/moxie/moxie-protos.h (moxie_print_operand,
  moxie_print_operand_address): Remove declaration.
* config/moxie/moxie.c (TARGET_PRINT_OPERAND,
  TARGET_PRINT_OPERAND_ADDRESS): Define.
  (moxie_print_operand, moxie_print_operand_address): Make static.

Index: gcc/config/moxie/moxie.c
===
--- gcc/config/moxie/moxie.c(revision 226237)
+++ gcc/config/moxie/moxie.c(working copy)
@@ -128,7 +128,7 @@

 /* The PRINT_OPERAND_ADDRESS worker.  */

-void
+static void
 moxie_print_operand_address (FILE *file, rtx x)
 {
   switch (GET_CODE (x))
@@ -175,7 +175,7 @@

 /* The PRINT_OPERAND worker.  */

-void
+static void
 moxie_print_operand (FILE *file, rtx x, int code)
 {
   rtx operand = x;
@@ -679,6 +679,11 @@
 #undef TARGET_OPTION_OVERRIDE
 #define TARGET_OPTION_OVERRIDE moxie_option_override

+#undef  TARGET_PRINT_OPERAND
+#define TARGET_PRINT_OPERAND moxie_print_operand
+#undef  TARGET_PRINT_OPERAND_ADDRESS
+#define TARGET_PRINT_OPERAND_ADDRESS moxie_print_operand_address
+
 struct gcc_target targetm = TARGET_INITIALIZER;

 #include "gt-moxie.h"
Index: gcc/config/moxie/moxie.h
===
--- gcc/config/moxie/moxie.h(revision 226237)
+++ gcc/config/moxie/moxie.h(working copy)
@@ -212,12 +212,6 @@
 #define ASM_OUTPUT_ALIGN(STREAM,POWER) \
fprintf (STREAM, "\t.p2align\t%d\n", POWER);

-/* A C compound statement to output to stdio stream STREAM the
-   assembler syntax for an instruction operand X.  */
-#define PRINT_OPERAND(STREAM, X, CODE) moxie_print_operand (STREAM, X, CODE)
-
-#define PRINT_OPERAND_ADDRESS(STREAM ,X) moxie_print_operand_address (STREAM, 
X)
-
 /* Output and Generation of Labels */

 #define GLOBAL_ASM_OP "\t.global\t"
Index: gcc/config/moxie/moxie-protos.h
===
--- gcc/config/moxie/moxie-protos.h (revision 226237)
+++ gcc/config/moxie/moxie-protos.h (working copy)
@@ -20,6 +20,4 @@
 extern void  moxie_expand_prologue (void);
 extern void  moxie_expand_epilogue (void);
 extern int   moxie_initial_elimination_offset (int, int);
-extern void  moxie_print_operand (FILE *, rtx, int);
-extern void  moxie_print_operand_address (FILE *, rtx);
 extern bool  moxie_offset_address_p (rtx);

[PATCH] Use lowpart_subreg instead of simplify_gen_subreg

2015-07-26 Thread Anatoliy Sokolov


Hello.

  This patch change function call simplify_gen_subreg (omode, x, imode,
subreg_lowpart_offset (omode, imode)) with lowpart_subreg (omode, x, imode)
and move lowpart_subreg function from loop-iv.c to simplify-rtx.c.

Bootstrapped and reg-tested on x86_64-unknown-linux-gnu.

OK for trunk?

Anatoliy.

2015-07-26  Anatoly Sokolov  

* rtl.h (lowpart_subreg): Move in file.
* loop-iv.c (lowpart_subreg): Move to...
* simplify-rtx.c (lowpart_subreg): ...here.
  (simplify_binary_operation_1): Use lowpart_subreg instead of
  simplify_gen_subreg.
* expr.c (expand_expr_real_2): Ditto.
* emit-rtl.c (gen_lowpart_common): Ditto.
* combine.c (gen_lowpart_for_combine): Ditto.
* cfgexpand.c (convert_debug_memory_address, expand_debug_expr,
  expand_debug_source_expr): Ditto.

Index: gcc/cfgexpand.c
===
--- gcc/cfgexpand.c (revision 225722)
+++ gcc/cfgexpand.c (working copy)
@@ -3632,9 +3632,7 @@
 return x;

   if (GET_MODE_PRECISION (mode) < GET_MODE_PRECISION (xmode))
-x = simplify_gen_subreg (mode, x, xmode,
-subreg_lowpart_offset
-(mode, xmode));
+x = lowpart_subreg (mode, x, xmode);
   else if (POINTERS_EXTEND_UNSIGNED > 0)
 x = gen_rtx_ZERO_EXTEND (mode, x);
   else if (!POINTERS_EXTEND_UNSIGNED)
@@ -3850,9 +3848,7 @@
  if (SCALAR_INT_MODE_P (opmode)
  && (GET_MODE_PRECISION (opmode)
  < GET_MODE_PRECISION (inner_mode)))
-   op1 = simplify_gen_subreg (opmode, op1, inner_mode,
-  subreg_lowpart_offset (opmode,
- inner_mode));
+   op1 = lowpart_subreg (opmode, op1, inner_mode);
}
  break;
default:
@@ -4011,9 +4007,7 @@
  }
else if (CONSTANT_P (op0)
 || GET_MODE_PRECISION (mode) <= GET_MODE_PRECISION 
(inner_mode))
- op0 = simplify_gen_subreg (mode, op0, inner_mode,
-subreg_lowpart_offset (mode,
-   inner_mode));
+ op0 = lowpart_subreg (mode, op0, inner_mode);
else if (UNARY_CLASS_P (exp)
 ? TYPE_UNSIGNED (TREE_TYPE (TREE_OPERAND (exp, 0)))
 : unsignedp)
@@ -4141,9 +4135,7 @@
  offmode = TYPE_MODE (TREE_TYPE (offset));

if (addrmode != offmode)
- op1 = simplify_gen_subreg (addrmode, op1, offmode,
-subreg_lowpart_offset (addrmode,
-   offmode));
+ op1 = lowpart_subreg (addrmode, op1, offmode);

/* Don't use offset_address here, we don't need a
   recognizable address, and we don't want to generate
@@ -4868,8 +4860,7 @@
 }
   else if (CONSTANT_P (op0)
   || GET_MODE_BITSIZE (mode) <= GET_MODE_BITSIZE (inner_mode))
-op0 = simplify_gen_subreg (mode, op0, inner_mode,
-  subreg_lowpart_offset (mode, inner_mode));
+op0 = lowpart_subreg (mode, op0, inner_mode);
   else if (TYPE_UNSIGNED (TREE_TYPE (exp)))
 op0 = simplify_gen_unary (ZERO_EXTEND, mode, op0, inner_mode);
   else
Index: gcc/combine.c
===
--- gcc/combine.c   (revision 225722)
+++ gcc/combine.c   (working copy)
@@ -11194,10 +11194,8 @@
  include an explicit SUBREG or we may simplify it further in combine.  */
   else
 {
-  int offset = 0;
   rtx res;

-  offset = subreg_lowpart_offset (omode, imode);
   if (imode == VOIDmode)
{
  imode = int_mode_for_mode (omode);
@@ -11205,7 +11203,7 @@
  if (x == NULL)
goto fail;
}
-  res = simplify_gen_subreg (omode, x, imode, offset);
+  res = lowpart_subreg (omode, x, imode);
   if (res)
return res;
 }
Index: gcc/emit-rtl.c
===
--- gcc/emit-rtl.c  (revision 225722)
+++ gcc/emit-rtl.c  (working copy)
@@ -1376,7 +1376,6 @@
 {
   int msize = GET_MODE_SIZE (mode);
   int xsize;
-  int offset = 0;
   machine_mode innermode;

   /* Unfortunately, this routine doesn't take a parameter for the mode of X,
@@ -1404,8 +1403,6 @@
   if (SCALAR_FLOAT_MODE_P (mode) && msize > xsize)
 return 0;

-  offset = subreg_lowpart_offset (mode, innermode);
-
   if ((GET_CODE (x) == ZERO_EXTEND || GET_CODE (x) == SIGN_EXTEND)
   && (GET_MODE_CLASS (mode) == MODE_INT
  || GET_MODE_CLASS (mode) == MODE_PARTIAL_INT))
@@ -1428,7 +1425,7 @@
   else if (GET_CODE (x) == SUBREG || REG_P (x)
   || GET_CODE (x) == CONCAT || GET_CODE (x) == CONST_VECTOR
   || CONST_DOUBLE_AS_FLO

Re: [RFC, PR66873] Use graphite for parloops

2015-07-26 Thread Tom de Vries


On 16/07/15 12:28, Richard Biener wrote:

On Thu, Jul 16, 2015 at 12:23 PM, Richard Biener
 wrote:

On Thu, Jul 16, 2015 at 12:19 PM, Thomas Schwinge
 wrote:

Hi Tom!

On Thu, 16 Jul 2015 10:46:00 +0200, Richard Biener  
wrote:

On Wed, Jul 15, 2015 at 10:26 PM, Tom de Vries  wrote:

I tried to parallelize this fortran test-case (based on autopar/outer-1.c),
[...]



So I wondered, why not always use the graphite dependency analysis in
parloops. (Of course you could use -floop-parallelize-all, but that also
changes the heuristic). So I wrote a patch for parloops to use graphite
dependency analysis by default (so without -floop-parallelize-all), but
while testing found out that all the reduction test-cases started failing
because the modifications graphite makes to the code messes up the parloops
reduction analysis.

Then I came up with this patch, which:
- first runs a parloops pass, restricted to reduction loops only,
- then runs graphite dependency analysis
- followed by a normal parloops pass run.

This way, we get to both:
- compile the reduction testcases as before, and
- profit from the better graphite dependency analysis otherwise.



graphite dependence analysis is too slow to be enabled unconditionally.
(read: hours in some simple cases - see bugzilla)


Haha, "cool"!  ;-)

Maybe it is still reasonable to use graphite to analyze the code inside
OpenACC kernels regions -- maybe such code can reasonably be expected to
not have the properties that make its analysis lengthy?  So, Tom, could
you please identify and check such PRs, to get an understanding of what
these properties are?


Like the one in PR62113 or 53852 or 59121.


Btw, it would be nice to handle this case (or at least figure out why we can't)
in GCCs dependence analysis.



I wrote an equivalent test-case in C:
...
$ cat src/gcc/testsuite/gcc.dg/autopar/outer-7.c
/* { dg-do compile } */
/* { dg-options "-O2 -ftree-parallelize-loops=2 
-fdump-tree-parloops-details -fdump-tree-optimized" } */


void abort (void);

#define N 500

int
main (void)
{
  int i, j;
  int x[N][N];
  int *y = &x[0][0];

  for (i = 0; i < N; i++)
for (j = 0; j < N; j++)
  /* y[i * N + j] == x[i][j].  */
  y[i * N + j] = i + j + 3;

  for (i = 0; i < N; i++)
for (j = 0; j < N; j++)
  if (x[i][j] != i + j + 3)
abort ();

  return 0;
}

/* Check that outer loop is parallelized.  */
/* { dg-final { scan-tree-dump-times "parallelizing outer loop" 1 
"parloops" } } */

/* { dg-final { scan-tree-dump-times "loopfn" 4 "optimized" } } */
...

With -fno-tree-loop-ivcanon to keep original iteration order we get:
...
#(Data Ref:
#  bb: 4
#  stmt: *_15 = _17;
#  ref: *_15;
#  base_object: MEM[(int *)&x];
#  Access function 0: {{0B, +, 2000}_1, +, 4}_4
#)
#(Data Ref:
#  bb: 4
#  stmt: *_15 = _17;
#  ref: *_15;
#  base_object: MEM[(int *)&x];
#  Access function 0: {{0B, +, 2000}_1, +, 4}_4
#)
  access_fn_A: {{0B, +, 2000}_1, +, 4}_4
  access_fn_B: {{0B, +, 2000}_1, +, 4}_4

 (subscript
  iterations_that_access_an_element_twice_in_A: [0]
  last_conflict: scev_not_known
  iterations_that_access_an_element_twice_in_B: [0]
  last_conflict: scev_not_known
  (Subscript distance: 0 ))
  inner loop index: 0
  loop nest: (1 4 )
  distance_vector:   0   0
  distance_vector:   1 -500
  direction_vector: ==
  direction_vector: +-
)
  FAILED: data dependencies exist across iterations
...

If we replace the y[i * N + j] with x[i][j] we get instead:
...
#(Data Ref:
#  bb: 4
#  stmt: x[i_7][j_8] = _12;
#  ref: x[i_7][j_8];
#  base_object: x;
#  Access function 0: {0, +, 1}_4
#  Access function 1: {0, +, 1}_1
#)
#(Data Ref:
#  bb: 4
#  stmt: x[i_7][j_8] = _12;
#  ref: x[i_7][j_8];
#  base_object: x;
#  Access function 0: {0, +, 1}_4
#  Access function 1: {0, +, 1}_1
#)
  access_fn_A: {0, +, 1}_4
  access_fn_B: {0, +, 1}_4

 (subscript
  iterations_that_access_an_element_twice_in_A: [0]
  last_conflict: scev_not_known
  iterations_that_access_an_element_twice_in_B: [0]
  last_conflict: scev_not_known
  (Subscript distance: 0 ))
  access_fn_A: {0, +, 1}_1
  access_fn_B: {0, +, 1}_1

 (subscript
  iterations_that_access_an_element_twice_in_A: [0]
  last_conflict: scev_not_known
  iterations_that_access_an_element_twice_in_B: [0]
  last_conflict: scev_not_known
  (Subscript distance: 0 ))
  inner loop index: 0
  loop nest: (1 4 )
  distance_vector:   0   0
  direction_vector: ==
)
  SUCCESS: may be parallelized
parallelizing outer loop 8
...

Thanks,
- Tom

Re: [wwwdocs] Mention -Wshift-overflow

2015-07-26 Thread Gerald Pfeifer

Hi Marek,

On Tue, 21 Jul 2015, Marek Polacek wrote:
> +A new command-line option -Wshift-overflow has been
> + added for the C and C++ compilers, which warns about left shift
> + overflows.  -Wshift-overflow=2 also warns about
> + left-shifting 1 into the sign bit.

from what I can tell, this is enabled by default for modern 
dialects of C and C++ (so many people will see new warnings 
when upgrading)

Shouldn't this be mentioned here?

Gerald

Re: [ARM] Optimize compare against smin/umin

2015-07-26 Thread Michael Collison


Here is an updated patch that addresses the issues you mentioned:

2015-07-24  Michael Collison  

  * gcc/config/arm/arm.md (*arm_smin_cmp): New pattern.
  (*arm_umin_cmp): Likewise.
  * gcc.target/arm/mincmp.c: Test min compare idiom.

diff --git a/gcc/config/arm/arm.md b/gcc/config/arm/arm.md
index 0be70a8..361c292 100644
--- a/gcc/config/arm/arm.md
+++ b/gcc/config/arm/arm.md
@@ -3455,6 +3455,44 @@
(set_attr "type" "multiple,multiple")]
 )

+;; t = (s/u)min (x, y)
+;; cc = cmp (t, z)
+;; is the same as
+;; cmp x, z
+;; cmpge(u) y, z
+
+(define_insn_and_split "*arm_smin_cmp"
+  [(set (reg:CC CC_REGNUM)
+(compare:CC
+ (smin:SI (match_operand:SI 0 "s_register_operand" "r")
+  (match_operand:SI 1 "s_register_operand" "r"))
+ (match_operand:SI 2 "s_register_operand" "r")))]
+  "TARGET_32BIT"
+  "#"
+  "&& reload_completed"
+  [(set (reg:CC CC_REGNUM)
+(compare:CC (match_dup 0) (match_dup 2)))
+   (cond_exec (ge:CC (reg:CC CC_REGNUM) (const_int 0))
+  (set (reg:CC CC_REGNUM)
+   (compare:CC (match_dup 1) (match_dup 2]
+)
+
+(define_insn_and_split "*arm_umin_cmp"
+  [(set (reg:CC CC_REGNUM)
+(compare:CC
+ (umin:SI (match_operand:SI 0 "s_register_operand" "r")
+  (match_operand:SI 1 "s_register_operand" "r"))
+ (match_operand:SI 2 "s_register_operand" "r")))]
+  "TARGET_32BIT"
+  "#"
+  "&& reload_completed"
+  [(set (reg:CC CC_REGNUM)
+(compare:CC (match_dup 0) (match_dup 2)))
+   (cond_exec (geu:CC (reg:CC CC_REGNUM) (const_int 0))
+  (set (reg:CC CC_REGNUM)
+   (compare:CC (match_dup 1) (match_dup 2]
+)
+
 (define_expand "umaxsi3"
   [(parallel [
 (set (match_operand:SI 0 "s_register_operand" "")
diff --git a/gcc/testsuite/gcc.target/arm/mincmp.c 
b/gcc/testsuite/gcc.target/arm/mincmp.c

new file mode 100644
index 000..2a55c6d
--- /dev/null
+++ b/gcc/testsuite/gcc.target/arm/mincmp.c
@@ -0,0 +1,18 @@
+/* { dg-do compile } */
+/* { dg-options "-O2" } */
+/* { dg-require-effective-target arm32 } */
+
+#define min(x, y) ((x) <= (y)) ? (x) : (y)
+
+unsigned int foo (unsigned int i, unsigned int x ,unsigned int y)
+{
+  return i < (min (x, y));
+}
+
+int bar (int i, int x, int y)
+{
+  return i < (min (x, y));
+}
+
+/* { dg-final { scan-assembler "cmpcs" } } */
+/* { dg-final { scan-assembler "cmpge" } } */
--
1.9.1
On 07/13/2015 04:27 AM, Ramana Radhakrishnan wrote:

On Thu, Jun 25, 2015 at 6:08 PM, Michael Collison
 wrote:

This patch is designed to optimize constructs such as:

#define min(x, y) ((x) <= (y)) ? (x) : (y)

unsignedint  foo (unsignedint  i, unsignedint  x ,unsignedint  y)
{
   return  i < (min (x, y));
}

int  bar (int  i,int  x,int  y)
{
   return  i < (min (x, y));
}

Patch was tested on arm-linux-gnueabi, arm-linux-gnueabihf,
armeb-linux-gnueabihf. Okay for trunk?

Sorry about the slow review and I wanted someone else to look at it
given I had a hand in writing this patch up.

Please add a testcase.



2015-06-24  Michael Collison  

Please fix the Changelog formatting here.


 * gcc/config/arm/arm.md (*arm_smin_cmp): New pattern.
 (*arm_umin_cmp): Likewise.

diff --git a/gcc/config/arm/arm.md b/gcc/config/arm/arm.md
index 1ac8af0..994c95f 100644
--- a/gcc/config/arm/arm.md
+++ b/gcc/config/arm/arm.md
@@ -3455,6 +3455,28 @@
 (set_attr "type" "multiple,multiple")]
  )

+;; t = (s/u)min (x, y)
+;; cc = cmp (t, z)
+;; is the same as
+;; cmp x, z
+;; cmpge(u) y, z
+
+(define_insn_and_split "*arm_smin_cmp"
+  [(set (reg:CC CC_REGNUM)
+(compare:CC
+ (smin:SI (match_operand:SI 0 "s_register_operand" "r")
+  (match_operand:SI 1 "s_register_operand" "r"))
+ (match_operand:SI 2 "s_register_operand" "r")))]
+  "TARGET_32BIT"
+  "#"
+  ""
+  [(set (reg:CC CC_REGNUM)
+(compare:CC (match_dup 0) (match_dup 2)))
+   (cond_exec (ge:CC (reg:CC CC_REGNUM) (const_int 0))
+  (set (reg:CC CC_REGNUM)
+   (compare:CC (match_dup 1) (match_dup 2]
+)


IIUC it's not entirely safe to have cond_execs in the instruction
stream prior to reload - I think the consensus was that spilling and
filling with cond-exec style instructions could end up with
non-cond-exec style spills thus destroying registers in the non
cond-exec cases. so, lets just add a reload_completed to be safe here.

See https://patches.linaro.org/6469/ for more on this topic.


+
  (define_expand "umaxsi3"
[(parallel [
  (set (match_operand:SI 0 "s_register_operand" "")
@@ -3521,6 +3543,22 @@
 (set_attr "type" "store1")]
  )

+(define_insn_and_split "*arm_umin_cmp"
+  [(set (reg:CC CC_REGNUM)
+(compare:CC
+ (umin:SI (match_operand:SI 0 "s_register_operand" "r")
+  (match_operand:SI 1 "s_register_operand" "r"))
+ (match_operand:SI 2 "s_register_operand" "r")))]
+  "TARGET_32BIT"
+  "#"
+  ""
+  [(set (reg:CC CC_REGNUM)
+(compare:CC (match_dup 0) (match_dup 2)))
+   (cond_exec (geu:CC (reg:CC CC_REGNUM) (const_int 0))
+  (set (

Re: [PATCH 1/2] Allow REG_EQUAL for ZERO_EXTRACT

2015-07-26 Thread Kugan



On 27/07/15 05:38, Andreas Schwab wrote:
> Kugan  writes:
> 
>>  * cse.c (cse_insn): Fix missing check for STRICT_LOW_PART and minor
>>  clean up.
> 
> This breaks 
> 
> gcc.target/m68k/tls-ie-xgot.c scan-assembler jsr __m68k_read_tp
> gcc.target/m68k/tls-ie.c scan-assembler jsr __m68k_read_tp
> gcc.target/m68k/tls-le-xtls.c scan-assembler jsr __m68k_read_tp
> gcc.target/m68k/tls-le.c scan-assembler jsr __m68k_read_tp

I am Looking into it now.

Thanks,
Kugan

Re: [PR66726] Factor conversion out of COND_EXPR

2015-07-26 Thread Kugan



On 24/07/15 05:05, Jeff Law wrote:
> On 07/15/2015 11:52 PM, Kugan wrote:
>>

 diff --git a/gcc/tree-ssa-reassoc.c b/gcc/tree-ssa-reassoc.c
 index 932c83a..3058eb5 100644
 --- a/gcc/tree-ssa-reassoc.c
 +++ b/gcc/tree-ssa-reassoc.c
>>>
return false;
  bb = gimple_bb (stmt);
  if (!single_succ_p (bb))
 @@ -2729,9 +2743,8 @@ final_range_test_p (gimple stmt)

  lhs = gimple_assign_lhs (stmt);
  rhs = gimple_assign_rhs1 (stmt);
 -  if (!INTEGRAL_TYPE_P (TREE_TYPE (lhs))
 -  || TREE_CODE (rhs) != SSA_NAME
 -  || TREE_CODE (TREE_TYPE (rhs)) != BOOLEAN_TYPE)
 +  if (TREE_CODE (TREE_TYPE (rhs)) != BOOLEAN_TYPE
 +  && TREE_CODE (TREE_TYPE (lhs)) != BOOLEAN_TYPE)
return false;
>>> So you're ensuring that one of the two is a boolean...  Note that
>>> previously we ensured that the rhs was a boolean and the lhs was an
>>> integral type (which I believe is true for booleans).
>>>
>>> Thus if we had
>>> bool x;
>>> int y;
>>>
>>> x = (bool) y;
>>>
>>> The old code would have rejected that case.  But I think it gets through
>>> now, right?
>>>
>>> I think once that issue is addressed, this will be good for the trunk.
>>>
>>
>> Thanks for the review. How about:
>>
>> -  if (!INTEGRAL_TYPE_P (TREE_TYPE (lhs))
>> -  || TREE_CODE (rhs) != SSA_NAME
>> -  || TREE_CODE (TREE_TYPE (rhs)) != BOOLEAN_TYPE)
>> +  if (gimple_assign_cast_p (stmt)
>> +  && (!INTEGRAL_TYPE_P (TREE_TYPE (lhs))
>> +  || TREE_CODE (rhs) != SSA_NAME
>> +  || TREE_CODE (TREE_TYPE (rhs)) != BOOLEAN_TYPE))
> But then I think you need to verify that for the  _234 = a_2(D) == 2;
> case that type of the RHS is a boolean.
> 
> ie, each case has requirements for the types.  I don't think they can be
> reasonably unified.  So something like this:
> 
> if (gimple_assign_cast_p (stmt)
> && ! (correct types for cast)
>return false;
> 
> if (!gimple_assign_cast_p (stmt)
> && ! (correct types for tcc_comparison case))
>   return false;
> 
> 
> This works because we've already verified that it's either a type
> conversion or a comparison on the RHS.
>
I thought that when !gimple_assign_cast_p (stmt), RHS will always
boolean. I have now added this check in the attached patch.

I also noticed that in maybe_optimize_range_tests, GIMPLE_COND can
have non compatible types when new_op is updated
(boolean types coming from tcc_compare results) and hence need to be
converted. Changed that as well.

Bootstrapped and regression tested on x86-64-none-linux-gnu with no new
regressions. Is this OK for trunk?

Thanks,
Kugan

diff --git a/gcc/tree-ssa-reassoc.c b/gcc/tree-ssa-reassoc.c
index efb813c..cc215b6 100644
--- a/gcc/tree-ssa-reassoc.c
+++ b/gcc/tree-ssa-reassoc.c
@@ -2707,18 +2707,32 @@ optimize_range_tests (enum tree_code opcode,
# _345 = PHI <_123(N), 1(...), 1(...)>
where _234 has bool type, _123 has single use and
bb N has a single successor M.  This is commonly used in
-   the last block of a range test.  */
+   the last block of a range test.
+
+   Also Return true if STMT is tcc_compare like:
+   :
+   ...
+   _234 = a_2(D) == 2;
 
+   :
+   # _345 = PHI <_234(N), 1(...), 1(...)>
+   _346 = (int) _345;
+   where _234 has booltype, single use and
+   bb N has a single successor M.  This is commonly used in
+   the last block of a range test.  */
 static bool
 final_range_test_p (gimple stmt)
 {
-  basic_block bb, rhs_bb;
+  basic_block bb, rhs_bb, lhs_bb;
   edge e;
   tree lhs, rhs;
   use_operand_p use_p;
   gimple use_stmt;
 
-  if (!gimple_assign_cast_p (stmt))
+  if (!gimple_assign_cast_p (stmt)
+  && (!is_gimple_assign (stmt)
+ || (TREE_CODE_CLASS (gimple_assign_rhs_code (stmt))
+ != tcc_comparison)))
 return false;
   bb = gimple_bb (stmt);
   if (!single_succ_p (bb))
@@ -2729,11 +2743,16 @@ final_range_test_p (gimple stmt)
 
   lhs = gimple_assign_lhs (stmt);
   rhs = gimple_assign_rhs1 (stmt);
-  if (!INTEGRAL_TYPE_P (TREE_TYPE (lhs))
-  || TREE_CODE (rhs) != SSA_NAME
-  || TREE_CODE (TREE_TYPE (rhs)) != BOOLEAN_TYPE)
+  if (gimple_assign_cast_p (stmt)
+  && (!INTEGRAL_TYPE_P (TREE_TYPE (lhs))
+ || TREE_CODE (rhs) != SSA_NAME
+ || TREE_CODE (TREE_TYPE (rhs)) != BOOLEAN_TYPE))
 return false;
 
+  if (!gimple_assign_cast_p (stmt)
+  && (TREE_CODE (TREE_TYPE (lhs)) != BOOLEAN_TYPE))
+  return false;
+
   /* Test whether lhs is consumed only by a PHI in the only successor bb.  */
   if (!single_imm_use (lhs, &use_p, &use_stmt))
 return false;
@@ -2743,10 +2762,20 @@ final_range_test_p (gimple stmt)
 return false;
 
   /* And that the rhs is defined in the same loop.  */
-  rhs_bb = gimple_bb (SSA_NAME_DEF_STMT (rhs));
-  if (rhs_bb == NULL
-  || !flow_bb_inside_loop_p (loop_containing_stmt (stmt), rhs_bb))
-return false;
+  if (gimple_assign_cast_p (stmt))
+{
+  if (TREE_CODE (rhs) != SSA_NAME
+ || !(rhs_bb =

[PATCH 3/9] target.h: change to use targetm.pointer_size instead of POINTER_SIZE

2015-07-26 Thread tbsaunde+gcc

From: Trevor Saunders 

gcc/ChangeLog:

2015-07-26  Trevor Saunders  

* target.h (pointer_size_units): Call targetm.pointer_size ().
---
 gcc/target.h | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/gcc/target.h b/gcc/target.h
index 6715b07..435bc7e 100644
--- a/gcc/target.h
+++ b/gcc/target.h
@@ -224,7 +224,7 @@ pack_cumulative_args (CUMULATIVE_ARGS *arg)
 inline unsigned int
 pointer_size_units ()
 {
-  return (POINTER_SIZE + BITS_PER_UNIT - 1) /  BITS_PER_UNIT;
+  return (targetm.pointer_size () + BITS_PER_UNIT - 1) /  BITS_PER_UNIT;
 }
 
 #endif /* GCC_TARGET_H */
-- 
2.4.0

[PATCH 1/9] remove POINTER_SIZE_UNITS macro

2015-07-26 Thread tbsaunde+gcc

From: Trevor Saunders 

gcc/lto/ChangeLog:

2015-07-26  Trevor Saunders  

* lto-object.c (lto_obj_begin_section): Call pointer_size_units ().

gcc/c-family/ChangeLog:

2015-07-26  Trevor Saunders  

* c-cppbuiltin.c (cpp_atomic_builtins): Call pointer_size_units ().

gcc/ChangeLog:

2015-07-26  Trevor Saunders  

* defaults.h (POINTER_SIZE_UNITS): Remove.
* dwarf2asm.c (size_of_encoded_value): Adjust.
(dw2_output_indirect_constant_1): Likewise.
* stor-layout.c (layout_type): Likewise.
* target.h (pointer_size_units): New function.
* varasm.c (assemble_addr_to_section): Adjust.
(default_assemble_integer): Likewise.
(dump_tm_clone_pairs): Likewise.
---
 gcc/c-family/c-cppbuiltin.c | 2 +-
 gcc/defaults.h  | 3 ---
 gcc/dwarf2asm.c | 4 ++--
 gcc/lto/lto-object.c| 3 ++-
 gcc/stor-layout.c   | 2 +-
 gcc/target.h| 8 
 gcc/varasm.c| 8 
 7 files changed, 18 insertions(+), 12 deletions(-)

diff --git a/gcc/c-family/c-cppbuiltin.c b/gcc/c-family/c-cppbuiltin.c
index 1beb2db..73ec8eb 100644
--- a/gcc/c-family/c-cppbuiltin.c
+++ b/gcc/c-family/c-cppbuiltin.c
@@ -674,7 +674,7 @@ cpp_atomic_builtins (cpp_reader *pfile)
 
   /* ptr_type_node can't be used here since ptr_mode is only set when
  toplev calls backend_init which is not done with -E  or pch.  */
-  psize = POINTER_SIZE_UNITS;
+  psize = pointer_size_units ();
   if (psize >= SWAP_LIMIT)
 psize = 0;
   builtin_define_with_int_value ("__GCC_ATOMIC_POINTER_LOCK_FREE", 
diff --git a/gcc/defaults.h b/gcc/defaults.h
index 9d38ba1..1dd965b 100644
--- a/gcc/defaults.h
+++ b/gcc/defaults.h
@@ -771,9 +771,6 @@ see the files COPYING3 and COPYING.RUNTIME respectively.  
If not, see
 #ifndef POINTER_SIZE
 #define POINTER_SIZE BITS_PER_WORD
 #endif
-#ifndef POINTER_SIZE_UNITS
-#define POINTER_SIZE_UNITS ((POINTER_SIZE + BITS_PER_UNIT - 1) / BITS_PER_UNIT)
-#endif
 
 
 #ifndef PIC_OFFSET_TABLE_REGNUM
diff --git a/gcc/dwarf2asm.c b/gcc/dwarf2asm.c
index 9f3c4b1..b63f82e 100644
--- a/gcc/dwarf2asm.c
+++ b/gcc/dwarf2asm.c
@@ -385,7 +385,7 @@ size_of_encoded_value (int encoding)
   switch (encoding & 0x07)
 {
 case DW_EH_PE_absptr:
-  return POINTER_SIZE_UNITS;
+  return pointer_size_units ();
 case DW_EH_PE_udata2:
   return 2;
 case DW_EH_PE_udata4:
@@ -902,7 +902,7 @@ dw2_output_indirect_constant_1 (const char *sym, tree id)
 
   sym_ref = gen_rtx_SYMBOL_REF (Pmode, sym);
   assemble_variable (decl, 1, 1, 1);
-  assemble_integer (sym_ref, POINTER_SIZE_UNITS, POINTER_SIZE, 1);
+  assemble_integer (sym_ref, pointer_size_units (), POINTER_SIZE, 1);
 
   return 0;
 }
diff --git a/gcc/lto/lto-object.c b/gcc/lto/lto-object.c
index 087c6b1..198a585 100644
--- a/gcc/lto/lto-object.c
+++ b/gcc/lto/lto-object.c
@@ -37,6 +37,7 @@ along with GCC; see the file COPYING3.  If not see
 #include "cgraph.h"
 #include "lto-section-names.h"
 #include "simple-object.h"
+#include "target.h"
 
 /* An LTO file wrapped around an simple_object.  */
 
@@ -340,7 +341,7 @@ lto_obj_begin_section (const char *name)
  && lo->sobj_w != NULL
  && lo->section == NULL);
 
-  align = ceil_log2 (POINTER_SIZE_UNITS);
+  align = ceil_log2 (pointer_size_units ());
   lo->section = simple_object_write_create_section (lo->sobj_w, name, align,
&errmsg, &err);
   if (lo->section == NULL)
diff --git a/gcc/stor-layout.c b/gcc/stor-layout.c
index 0d4f4a4..160ffe2 100644
--- a/gcc/stor-layout.c
+++ b/gcc/stor-layout.c
@@ -2229,7 +2229,7 @@ layout_type (tree type)
 
 case OFFSET_TYPE:
   TYPE_SIZE (type) = bitsize_int (POINTER_SIZE);
-  TYPE_SIZE_UNIT (type) = size_int (POINTER_SIZE_UNITS);
+  TYPE_SIZE_UNIT (type) = size_int (pointer_size_units ());
   /* A pointer might be MODE_PARTIAL_INT, but ptrdiff_t must be
 integral, which may be an __intN.  */
   SET_TYPE_MODE (type, mode_for_size (POINTER_SIZE, MODE_INT, 0));
diff --git a/gcc/target.h b/gcc/target.h
index a79f424..6715b07 100644
--- a/gcc/target.h
+++ b/gcc/target.h
@@ -219,4 +219,12 @@ pack_cumulative_args (CUMULATIVE_ARGS *arg)
 }
 #endif /* GCC_TM_H */
 
+/* Return the width of a pointer in units.  */
+
+inline unsigned int
+pointer_size_units ()
+{
+  return (POINTER_SIZE + BITS_PER_UNIT - 1) /  BITS_PER_UNIT;
+}
+
 #endif /* GCC_TARGET_H */
diff --git a/gcc/varasm.c b/gcc/varasm.c
index 6a4ba0b..8cb2ec9 100644
--- a/gcc/varasm.c
+++ b/gcc/varasm.c
@@ -1544,7 +1544,7 @@ assemble_addr_to_section (rtx symbol, section *sec)
 {
   switch_to_section (sec);
   assemble_align (POINTER_SIZE);
-  assemble_integer (symbol, POINTER_SIZE_UNITS, POINTER_SIZE, 1);
+  assemble_integer (symbol, pointer_size_units (), POINTER_SIZE, 1);
 }
 
 /* Return the numbered .ctors.N (if CONSTRUCTOR_P) or .dtors.N (if
@@ -2732,7 +2732,7 @@ default_assemble_integer (rtx x ATTRIBUTE_U

[PATCH 5/9] ubsan.c: switch from POINTER_SIZE to targetm.pointer_size ()

2015-07-26 Thread tbsaunde+gcc

From: Trevor Saunders 

gcc/ChangeLog:

2015-07-26  Trevor Saunders  

* ubsan.c (ubsan_encode_value): Call targetm.pointer_size ().
---
 gcc/ubsan.c | 3 ++-
 1 file changed, 2 insertions(+), 1 deletion(-)

diff --git a/gcc/ubsan.c b/gcc/ubsan.c
index d75c4ee..55d9440 100644
--- a/gcc/ubsan.c
+++ b/gcc/ubsan.c
@@ -63,6 +63,7 @@ along with GCC; see the file COPYING3.  If not see
 #include "tree-object-size.h"
 #include "tree-eh.h"
 #include "tree-cfg.h"
+#include "target.h"
 
 /* Map from a tree to a VAR_DECL tree.  */
 
@@ -139,7 +140,7 @@ ubsan_encode_value (tree t, bool in_expand_p)
 {
   tree type = TREE_TYPE (t);
   const unsigned int bitsize = GET_MODE_BITSIZE (TYPE_MODE (type));
-  if (bitsize <= POINTER_SIZE)
+  if (bitsize <= targetm.pointer_size ())
 switch (TREE_CODE (type))
   {
   case BOOLEAN_TYPE:
-- 
2.4.0

[PATCH 2/9] add pointer_size target hook

2015-07-26 Thread tbsaunde+gcc

From: Trevor Saunders 

gcc/ChangeLog:

2015-07-26  Trevor Saunders  

* doc/tm.texi: Regenerate.
* doc/tm.texi.in: Adjust.
* target.def (pointer_size): New hook.
* targhooks.c (default_pointer_size): New function.
* targhooks.h (default_pointer_size): New function.
---
 gcc/doc/tm.texi| 7 +++
 gcc/doc/tm.texi.in | 2 ++
 gcc/target.def | 8 
 gcc/targhooks.c| 8 
 gcc/targhooks.h| 1 +
 5 files changed, 26 insertions(+)

diff --git a/gcc/doc/tm.texi b/gcc/doc/tm.texi
index f95646c..34cc8f6 100644
--- a/gcc/doc/tm.texi
+++ b/gcc/doc/tm.texi
@@ -907,6 +907,13 @@ you must define @code{POINTERS_EXTEND_UNSIGNED}.  If you 
do not specify
 a value the default is @code{BITS_PER_WORD}.
 @end defmac
 
+@deftypefn {Target Hook} {unsigned int} TARGET_POINTER_SIZE ()
+Width of a pointer, in bits.  You must specify a value no wider than the
+ width of @code{Pmode}.  If it is not equal to the width of @code{Pmode},
+ you must define @code{POINTERS_EXTEND_UNSIGNED}.  If you do not specify
+ a value the default is @code{BITS_PER_WORD}.
+@end deftypefn
+
 @defmac POINTERS_EXTEND_UNSIGNED
 A C expression that determines how pointers should be extended from
 @code{ptr_mode} to either @code{Pmode} or @code{word_mode}.  It is
diff --git a/gcc/doc/tm.texi.in b/gcc/doc/tm.texi.in
index 2383fb9..ca08f11 100644
--- a/gcc/doc/tm.texi.in
+++ b/gcc/doc/tm.texi.in
@@ -881,6 +881,8 @@ you must define @code{POINTERS_EXTEND_UNSIGNED}.  If you do 
not specify
 a value the default is @code{BITS_PER_WORD}.
 @end defmac
 
+@hook TARGET_POINTER_SIZE
+
 @defmac POINTERS_EXTEND_UNSIGNED
 A C expression that determines how pointers should be extended from
 @code{ptr_mode} to either @code{Pmode} or @code{word_mode}.  It is
diff --git a/gcc/target.def b/gcc/target.def
index 4edc209..2e247e0 100644
--- a/gcc/target.def
+++ b/gcc/target.def
@@ -5716,6 +5716,14 @@ DEFHOOK
  void, (tree *hold, tree *clear, tree *update),
  default_atomic_assign_expand_fenv)
 
+DEFHOOK
+(pointer_size,
+"Width of a pointer, in bits.  You must specify a value no wider than the\n\
+ width of @code{Pmode}.  If it is not equal to the width of @code{Pmode},\n\
+ you must define @code{POINTERS_EXTEND_UNSIGNED}.  If you do not specify\n\
+ a value the default is @code{BITS_PER_WORD}.",
+unsigned int, (), default_pointer_size)
+
 /* Leave the boolean fields at the end.  */
 
 /* True if we can create zeroed data by switching to a BSS section
diff --git a/gcc/targhooks.c b/gcc/targhooks.c
index 3eca47e..19272c4 100644
--- a/gcc/targhooks.c
+++ b/gcc/targhooks.c
@@ -1926,4 +1926,12 @@ can_use_doloop_if_innermost (const widest_int &, const 
widest_int &,
   return loop_depth == 1;
 }
 
+/* Default implementation of TARGET_POINTER_SIZE.  */
+
+unsigned int
+default_pointer_size ()
+{
+  return POINTER_SIZE;
+}
+
 #include "gt-targhooks.h"
diff --git a/gcc/targhooks.h b/gcc/targhooks.h
index 5ae991d..6782d37 100644
--- a/gcc/targhooks.h
+++ b/gcc/targhooks.h
@@ -240,4 +240,5 @@ extern void default_setup_incoming_vararg_bounds 
(cumulative_args_t ca ATTRIBUTE
  tree type ATTRIBUTE_UNUSED,
  int *pretend_arg_size 
ATTRIBUTE_UNUSED,
  int second_time 
ATTRIBUTE_UNUSED);
+extern unsigned int default_pointer_size ();
 #endif /* GCC_TARGHOOKS_H */
-- 
2.4.0

[PATCH 0/9] start converting POINTER_SIZE to a hook

2015-07-26 Thread tbsaunde+gcc

From: Trevor Saunders 

Hi,

$subject.

patches individually bootstrapped + regtested on x86_64-linux-gnu, and run
through config-list.mk with more patches removing usage of the macro.  Ok?

Trev

Trevor Saunders (9):
  remove POINTER_SIZE_UNITS macro
  add pointer_size target hook
  target.h: change to use targetm.pointer_size instead of POINTER_SIZE
  varasm.c: switch from POINTER_SIZE to targetm.pointer_size ()
  ubsan.c: switch from POINTER_SIZE to targetm.pointer_size ()
  tree-chkp.c: switch to targetm.pointer_size ()
  stor-layout.c: switch to targetm.pointer_size ()
  tree.c: switch to targetm.pointer_size ()
  emit-rtl.c: switch to targetm.pointer_size ()

 gcc/c-family/c-cppbuiltin.c |  2 +-
 gcc/defaults.h  |  3 ---
 gcc/doc/tm.texi |  7 +++
 gcc/doc/tm.texi.in  |  2 ++
 gcc/dwarf2asm.c |  4 ++--
 gcc/emit-rtl.c  |  5 +++--
 gcc/lto/lto-object.c|  3 ++-
 gcc/stor-layout.c   |  9 +
 gcc/target.def  |  8 
 gcc/target.h|  8 
 gcc/targhooks.c |  8 
 gcc/targhooks.h |  1 +
 gcc/tree-chkp.c | 14 --
 gcc/tree.c  |  3 ++-
 gcc/ubsan.c |  3 ++-
 gcc/varasm.c| 12 ++--
 16 files changed, 65 insertions(+), 27 deletions(-)

-- 
2.4.0

[PATCH 9/9] emit-rtl.c: switch to targetm.pointer_size ()

2015-07-26 Thread tbsaunde+gcc

From: Trevor Saunders 

gcc/ChangeLog:

2015-07-26  Trevor Saunders  

* emit-rtl.c (init_derived_machine_modes): Call
targetm.pointer_size ().
---
 gcc/emit-rtl.c | 5 +++--
 1 file changed, 3 insertions(+), 2 deletions(-)

diff --git a/gcc/emit-rtl.c b/gcc/emit-rtl.c
index ed2b30b..9da93d1 100644
--- a/gcc/emit-rtl.c
+++ b/gcc/emit-rtl.c
@@ -76,7 +76,7 @@ struct target_rtl *this_target_rtl = &default_target_rtl;
 machine_mode byte_mode;/* Mode whose width is BITS_PER_UNIT.  */
 machine_mode word_mode;/* Mode whose width is BITS_PER_WORD.  */
 machine_mode double_mode;  /* Mode whose width is DOUBLE_TYPE_SIZE.  */
-machine_mode ptr_mode; /* Mode whose width is POINTER_SIZE.  */
+machine_mode ptr_mode; /* Mode whose width is targetm.pointer_size ().  */
 
 /* Datastructures maintained for currently processed function in RTL form.  */
 
@@ -5864,7 +5864,8 @@ init_derived_machine_modes (void)
word_mode = mode;
 }
 
-  ptr_mode = mode_for_size (POINTER_SIZE, GET_MODE_CLASS (Pmode), 0);
+  ptr_mode = mode_for_size (targetm.pointer_size (), GET_MODE_CLASS (Pmode),
+   0);
 }
 
 /* Create some permanent unique rtl objects shared between all functions.  */
-- 
2.4.0

[PATCH 8/9] tree.c: switch to targetm.pointer_size ()

2015-07-26 Thread tbsaunde+gcc

From: Trevor Saunders 

gcc/ChangeLog:

2015-07-26  Trevor Saunders  

* tree.c (build_common_tree_nodes): Call targetm.pointer_size ().
---
 gcc/tree.c | 3 ++-
 1 file changed, 2 insertions(+), 1 deletion(-)

diff --git a/gcc/tree.c b/gcc/tree.c
index 94263af..02cbda8 100644
--- a/gcc/tree.c
+++ b/gcc/tree.c
@@ -9986,7 +9986,8 @@ build_common_tree_nodes (bool signed_char, bool 
short_double)
 = build_pointer_type (build_type_variant (void_type_node, 1, 0));
   fileptr_type_node = ptr_type_node;
 
-  pointer_sized_int_node = build_nonstandard_integer_type (POINTER_SIZE, 1);
+  pointer_sized_int_node
+= build_nonstandard_integer_type (targetm.pointer_size (), 1);
 
   float_type_node = make_node (REAL_TYPE);
   TYPE_PRECISION (float_type_node) = FLOAT_TYPE_SIZE;
-- 
2.4.0

[PATCH 4/9] varasm.c: switch from POINTER_SIZE to targetm.pointer_size ()

2015-07-26 Thread tbsaunde+gcc

From: Trevor Saunders 

gcc/ChangeLog:

2015-07-26  Trevor Saunders  

* varasm.c (assemble_addr_to_section): Call targetm.pointer_size ().
(dump_tm_clone_pairs): Likewise.
---
 gcc/varasm.c | 10 +-
 1 file changed, 5 insertions(+), 5 deletions(-)

diff --git a/gcc/varasm.c b/gcc/varasm.c
index 8cb2ec9..238ae39 100644
--- a/gcc/varasm.c
+++ b/gcc/varasm.c
@@ -1543,8 +1543,8 @@ void
 assemble_addr_to_section (rtx symbol, section *sec)
 {
   switch_to_section (sec);
-  assemble_align (POINTER_SIZE);
-  assemble_integer (symbol, pointer_size_units (), POINTER_SIZE, 1);
+  assemble_align (targetm.pointer_size ());
+  assemble_integer (symbol, pointer_size_units (), targetm.pointer_size (), 1);
 }
 
 /* Return the numbered .ctors.N (if CONSTRUCTOR_P) or .dtors.N (if
@@ -5870,14 +5870,14 @@ dump_tm_clone_pairs (vec tm_alias_pairs)
   if (!switched)
{
  switch_to_section (targetm.asm_out.tm_clone_table_section ());
- assemble_align (POINTER_SIZE);
+ assemble_align (targetm.pointer_size ());
  switched = true;
}
 
   assemble_integer (XEXP (DECL_RTL (src), 0),
-   pointer_size_units (), POINTER_SIZE, 1);
+   pointer_size_units (), targetm.pointer_size (), 1);
   assemble_integer (XEXP (DECL_RTL (dst), 0),
-   pointer_size_units (), POINTER_SIZE, 1);
+   pointer_size_units (), targetm.pointer_size (), 1);
 }
 }
 
-- 
2.4.0

[PATCH 7/9] stor-layout.c: switch to targetm.pointer_size ()

2015-07-26 Thread tbsaunde+gcc

From: Trevor Saunders 

gcc/ChangeLog:

2015-07-26  Trevor Saunders  

* stor-layout.c (layout_type): Call targetm.pointer_size ().
---
 gcc/stor-layout.c | 7 ---
 1 file changed, 4 insertions(+), 3 deletions(-)

diff --git a/gcc/stor-layout.c b/gcc/stor-layout.c
index 160ffe2..6043398 100644
--- a/gcc/stor-layout.c
+++ b/gcc/stor-layout.c
@@ -2228,12 +2228,13 @@ layout_type (tree type)
   break;
 
 case OFFSET_TYPE:
-  TYPE_SIZE (type) = bitsize_int (POINTER_SIZE);
+  TYPE_SIZE (type) = bitsize_int (targetm.pointer_size ());
   TYPE_SIZE_UNIT (type) = size_int (pointer_size_units ());
   /* A pointer might be MODE_PARTIAL_INT, but ptrdiff_t must be
 integral, which may be an __intN.  */
-  SET_TYPE_MODE (type, mode_for_size (POINTER_SIZE, MODE_INT, 0));
-  TYPE_PRECISION (type) = POINTER_SIZE;
+  SET_TYPE_MODE (type, mode_for_size (targetm.pointer_size (), MODE_INT,
+ 0));
+  TYPE_PRECISION (type) = targetm.pointer_size ();
   break;
 
 case FUNCTION_TYPE:
-- 
2.4.0

[PATCH 6/9] tree-chkp.c: switch to targetm.pointer_size ()

2015-07-26 Thread tbsaunde+gcc

From: Trevor Saunders 

gcc/ChangeLog:

2015-07-26  Trevor Saunders  

* tree-chkp.c (chkp_build_array_ref): Call targetm.pointer_size ().
(chkp_find_bounds_for_elem): Likewise.
(chkp_find_bound_slots_1): Likewise.
(chkp_add_bounds_to_call_stmt): Likewise.
(chkp_instrument_function): Likewise.
---
 gcc/tree-chkp.c | 14 --
 1 file changed, 8 insertions(+), 6 deletions(-)

diff --git a/gcc/tree-chkp.c b/gcc/tree-chkp.c
index 8c1b48c..456e79b 100644
--- a/gcc/tree-chkp.c
+++ b/gcc/tree-chkp.c
@@ -1583,7 +1583,7 @@ chkp_build_array_ref (tree arr, tree etype, tree esize,
 
ALL_BOUNDS[i] is filled with elem bounds if there
is a field in TYPE which has pointer type and offset
-   equal to i * POINTER_SIZE in bits.  */
+   equal to i * targetm.pointer_size () in bits.  */
 static void
 chkp_find_bounds_for_elem (tree elem, tree *all_bounds,
   HOST_WIDE_INT offs,
@@ -1593,7 +1593,7 @@ chkp_find_bounds_for_elem (tree elem, tree *all_bounds,
 
   if (BOUNDED_TYPE_P (type))
 {
-  if (!all_bounds[offs / POINTER_SIZE])
+  if (!all_bounds[offs / targetm.pointer_size ()])
{
  tree temp = make_temp_ssa_name (type, NULL, "");
  gimple assign = gimple_build_assign (temp, elem);
@@ -1602,7 +1602,8 @@ chkp_find_bounds_for_elem (tree elem, tree *all_bounds,
  gsi_insert_before (iter, assign, GSI_SAME_STMT);
  gsi = gsi_for_stmt (assign);
 
- all_bounds[offs / POINTER_SIZE] = chkp_find_bounds (temp, &gsi);
+ all_bounds[offs / targetm.pointer_size ()]
+   = chkp_find_bounds (temp, &gsi);
}
 }
   else if (RECORD_OR_UNION_TYPE_P (type))
@@ -1659,7 +1660,7 @@ chkp_find_bound_slots_1 (const_tree type, bitmap 
have_bound,
 HOST_WIDE_INT offs)
 {
   if (BOUNDED_TYPE_P (type))
-bitmap_set_bit (have_bound, offs / POINTER_SIZE);
+bitmap_set_bit (have_bound, offs / targetm.pointer_size ());
   else if (RECORD_OR_UNION_TYPE_P (type))
 {
   tree field;
@@ -1906,7 +1907,7 @@ chkp_add_bounds_to_call_stmt (gimple_stmt_iterator *gsi)
   else if (chkp_type_has_pointer (type))
{
  HOST_WIDE_INT max_bounds
-   = TREE_INT_CST_LOW (TYPE_SIZE (type)) / POINTER_SIZE;
+   = TREE_INT_CST_LOW (TYPE_SIZE (type)) / targetm.pointer_size ();
  tree *all_bounds = (tree *)xmalloc (sizeof (tree) * max_bounds);
  HOST_WIDE_INT bnd_no;
 
@@ -4267,7 +4268,8 @@ chkp_instrument_function (void)
  EXECUTE_IF_SET_IN_BITMAP (slots, 0, bnd_no, bi)
{
  tree bounds = chkp_get_next_bounds_parm (arg);
- HOST_WIDE_INT offs = bnd_no * POINTER_SIZE / BITS_PER_UNIT;
+ HOST_WIDE_INT offs
+   = bnd_no * targetm.pointer_size () / BITS_PER_UNIT;
  tree addr = chkp_build_addr_expr (orig_arg);
  tree ptr = build2 (MEM_REF, ptr_type_node, addr,
 build_int_cst (ptr_type_node, offs));
-- 
2.4.0

[PATCH] Optimize certain end of loop conditions into min/max operation

2015-07-26 Thread Michael Collison

This patch is designed to optimize end of loop conditions involving of 
the form
 i < x && i < y into i < min (x, y). Loop condition involving '>' are 
handled similarly using max(x,y).

As an example:

#define N 1024

int  a[N], b[N], c[N];

void add (unsignedint  m, unsignedint  n)
{
  unsignedint  i, bound = (m < n) ? m : n;
  for  (i = 0; i < m && i < n; ++i)
a[i] = b[i] + c[i];
}


Performed bootstrap and make check on: x86_64_unknown-linux-gnu, 
arm-linux-gnueabihf, and aarch64-linux-gnu.

Okay for trunk?

2015-07-24  Michael Collison  
Andrew Pinski 

* match.pd ((x < y) && (x < z) -> x < min (y,z),
(x > y) and (x > z) -> x > max (y,z))

diff --git a/gcc/match.pd b/gcc/match.pd
index 5e8fd32..8691710 100644
--- a/gcc/match.pd
+++ b/gcc/match.pd
@@ -1793,3 +1793,17 @@ along with GCC; see the file COPYING3.  If not see
 (convert (bit_and (op (convert:utype @0) (convert:utype @1))
   (convert:utype @4)))

+
+/* Transform (@0 < @1 and @0 < @2) to use min */
+(for op (lt le)
+(simplify
+(bit_and:c (op @0 @1) (op @0 @2))
+(if (INTEGRAL_TYPE_P (TREE_TYPE (@0)))
+(op @0 (min @1 @2)
+
+/* Transform (@0 > @1 and @0 > @2) to use max */
+(for op (gt ge)
+(simplify
+(bit_and:c (op @0 @1) (op @0 @2))
+(if (INTEGRAL_TYPE_P (TREE_TYPE (@0)))
+(op @0 (max @1 @2)
--

--
Michael Collison
Linaro Toolchain Working Group
michael.colli...@linaro.org

Re: [RFC, PR66873] Use graphite for parloops

2015-07-26 Thread Sebastian Pop

On Sun, Jul 26, 2015 at 4:21 PM, Tom de Vries  wrote:
> I wrote an equivalent test-case in C:
> ...
> $ cat src/gcc/testsuite/gcc.dg/autopar/outer-7.c
> /* { dg-do compile } */
> /* { dg-options "-O2 -ftree-parallelize-loops=2 -fdump-tree-parloops-details
> -fdump-tree-optimized" } */
>
> void abort (void);
>
> #define N 500
>
> int
> main (void)
> {
>   int i, j;
>   int x[N][N];
>   int *y = &x[0][0];
>
>   for (i = 0; i < N; i++)
> for (j = 0; j < N; j++)
>   /* y[i * N + j] == x[i][j].  */
>   y[i * N + j] = i + j + 3;
>
>   for (i = 0; i < N; i++)
> for (j = 0; j < N; j++)
>   if (x[i][j] != i + j + 3)
> abort ();
>
>   return 0;
> }
>
> /* Check that outer loop is parallelized.  */
> /* { dg-final { scan-tree-dump-times "parallelizing outer loop" 1 "parloops"
> } } */
> /* { dg-final { scan-tree-dump-times "loopfn" 4 "optimized" } } */
> ...
>
> With -fno-tree-loop-ivcanon to keep original iteration order we get:
> ...
> #(Data Ref:
> #  bb: 4
> #  stmt: *_15 = _17;
> #  ref: *_15;
> #  base_object: MEM[(int *)&x];
> #  Access function 0: {{0B, +, 2000}_1, +, 4}_4
> #)
> #(Data Ref:
> #  bb: 4
> #  stmt: *_15 = _17;
> #  ref: *_15;
> #  base_object: MEM[(int *)&x];
> #  Access function 0: {{0B, +, 2000}_1, +, 4}_4
> #)
>   access_fn_A: {{0B, +, 2000}_1, +, 4}_4
>   access_fn_B: {{0B, +, 2000}_1, +, 4}_4
>
>  (subscript
>   iterations_that_access_an_element_twice_in_A: [0]
>   last_conflict: scev_not_known
>   iterations_that_access_an_element_twice_in_B: [0]
>   last_conflict: scev_not_known
>   (Subscript distance: 0 ))
>   inner loop index: 0
>   loop nest: (1 4 )
>   distance_vector:   0   0
>   distance_vector:   1 -500
>   direction_vector: ==
>   direction_vector: +-
> )
>   FAILED: data dependencies exist across iterations
> ...
>
> If we replace the y[i * N + j] with x[i][j] we get instead:
> ...
> #(Data Ref:
> #  bb: 4
> #  stmt: x[i_7][j_8] = _12;
> #  ref: x[i_7][j_8];
> #  base_object: x;
> #  Access function 0: {0, +, 1}_4
> #  Access function 1: {0, +, 1}_1
> #)
> #(Data Ref:
> #  bb: 4
> #  stmt: x[i_7][j_8] = _12;
> #  ref: x[i_7][j_8];
> #  base_object: x;
> #  Access function 0: {0, +, 1}_4
> #  Access function 1: {0, +, 1}_1
> #)
>   access_fn_A: {0, +, 1}_4
>   access_fn_B: {0, +, 1}_4
>
>  (subscript
>   iterations_that_access_an_element_twice_in_A: [0]
>   last_conflict: scev_not_known
>   iterations_that_access_an_element_twice_in_B: [0]
>   last_conflict: scev_not_known
>   (Subscript distance: 0 ))
>   access_fn_A: {0, +, 1}_1
>   access_fn_B: {0, +, 1}_1
>
>  (subscript
>   iterations_that_access_an_element_twice_in_A: [0]
>   last_conflict: scev_not_known
>   iterations_that_access_an_element_twice_in_B: [0]
>   last_conflict: scev_not_known
>   (Subscript distance: 0 ))
>   inner loop index: 0
>   loop nest: (1 4 )
>   distance_vector:   0   0
>   direction_vector: ==
> )
>   SUCCESS: may be parallelized
> parallelizing outer loop 8
> ...

It looks like a delinearization pass could help reconstruct a two
dimension array reference, and make the Banerjee dependence test
succeed.
Note that Graphite works in this case just because the loop bounds are
statically defined: N is 500.  Now if you have N passed in as a
function parameter, Graphite would also fail to compute the
dependence, as it cannot represent "i * N", so we would also need the
delinearization pass for Graphite.
Here is a bug that I recently opened for that:
https://gcc.gnu.org/bugzilla/show_bug.cgi?id=66981

Sebastian

Re: [PATCH] Optimize certain end of loop conditions into min/max operation

2015-07-26 Thread Bin.Cheng

On Mon, Jul 27, 2015 at 11:41 AM, Michael Collison
 wrote:
> This patch is designed to optimize end of loop conditions involving of the
> form
>  i < x && i < y into i < min (x, y). Loop condition involving '>' are
> handled similarly using max(x,y).
> As an example:
>
> #define N 1024
>
> int  a[N], b[N], c[N];
>
> void add (unsignedint  m, unsignedint  n)
> {
>   unsignedint  i, bound = (m < n) ? m : n;
>   for  (i = 0; i < m && i < n; ++i)
> a[i] = b[i] + c[i];
> }
>
>
> Performed bootstrap and make check on: x86_64_unknown-linux-gnu,
> arm-linux-gnueabihf, and aarch64-linux-gnu.
> Okay for trunk?
>
> 2015-07-24  Michael Collison  
> Andrew Pinski 
>
> * match.pd ((x < y) && (x < z) -> x < min (y,z),
> (x > y) and (x > z) -> x > max (y,z))
>
> diff --git a/gcc/match.pd b/gcc/match.pd
> index 5e8fd32..8691710 100644
> --- a/gcc/match.pd
> +++ b/gcc/match.pd
> @@ -1793,3 +1793,17 @@ along with GCC; see the file COPYING3.  If not see
>  (convert (bit_and (op (convert:utype @0) (convert:utype @1))
>(convert:utype @4)))
>
> +
> +/* Transform (@0 < @1 and @0 < @2) to use min */
> +(for op (lt le)
> +(simplify
> +(bit_and:c (op @0 @1) (op @0 @2))
> +(if (INTEGRAL_TYPE_P (TREE_TYPE (@0)))
> +(op @0 (min @1 @2)
> +
> +/* Transform (@0 > @1 and @0 > @2) to use max */
> +(for op (gt ge)
> +(simplify
> +(bit_and:c (op @0 @1) (op @0 @2))
> +(if (INTEGRAL_TYPE_P (TREE_TYPE (@0)))
> +(op @0 (max @1 @2)

Could you please give a test case for it?  Also IIUC, this is not only
simplification, but also loop invariant hoist, so how does it check
invariantness?

Thanks,
bin
> --
>
> --
> Michael Collison
> Linaro Toolchain Working Group
> michael.colli...@linaro.org
>

Re: [PATCH] Optimize certain end of loop conditions into min/max operation

2015-07-26 Thread Bin.Cheng

On Mon, Jul 27, 2015 at 12:23 PM, Bin.Cheng  wrote:
> On Mon, Jul 27, 2015 at 11:41 AM, Michael Collison
>  wrote:
>> This patch is designed to optimize end of loop conditions involving of the
>> form
>>  i < x && i < y into i < min (x, y). Loop condition involving '>' are
>> handled similarly using max(x,y).
>> As an example:
>>
>> #define N 1024
>>
>> int  a[N], b[N], c[N];
>>
>> void add (unsignedint  m, unsignedint  n)
>> {
>>   unsignedint  i, bound = (m < n) ? m : n;
>>   for  (i = 0; i < m && i < n; ++i)
>> a[i] = b[i] + c[i];
>> }
>>
>>
>> Performed bootstrap and make check on: x86_64_unknown-linux-gnu,
>> arm-linux-gnueabihf, and aarch64-linux-gnu.
>> Okay for trunk?
>>
>> 2015-07-24  Michael Collison  
>> Andrew Pinski 
>>
>> * match.pd ((x < y) && (x < z) -> x < min (y,z),
>> (x > y) and (x > z) -> x > max (y,z))
>>
>> diff --git a/gcc/match.pd b/gcc/match.pd
>> index 5e8fd32..8691710 100644
>> --- a/gcc/match.pd
>> +++ b/gcc/match.pd
>> @@ -1793,3 +1793,17 @@ along with GCC; see the file COPYING3.  If not see
>>  (convert (bit_and (op (convert:utype @0) (convert:utype @1))
>>(convert:utype @4)))
>>
>> +
>> +/* Transform (@0 < @1 and @0 < @2) to use min */
>> +(for op (lt le)
>> +(simplify
>> +(bit_and:c (op @0 @1) (op @0 @2))
>> +(if (INTEGRAL_TYPE_P (TREE_TYPE (@0)))
>> +(op @0 (min @1 @2)
>> +
>> +/* Transform (@0 > @1 and @0 > @2) to use max */
>> +(for op (gt ge)
>> +(simplify
>> +(bit_and:c (op @0 @1) (op @0 @2))
>> +(if (INTEGRAL_TYPE_P (TREE_TYPE (@0)))
>> +(op @0 (max @1 @2)
>
> Could you please give a test case for it?  Also IIUC, this is not only
> simplification, but also loop invariant hoist, so how does it check
> invariantness?

Sorry I realized this patch only does simplification and then let lim
pass decide if it can be moved?  In this way, there is no invariant
problem, please ignore previous message.

Thanks,
bin

38 matches

Mail list logo