Hi,

This patch hookizes the use of MOVE_RATIO in
tree-inline.c:estimate_move_cost as TARGET_ESTIMATE_BLOCK_COPY_NINSNS.
This hook should return an estimate for the number of instructions
which will be emitted to copy a block of memory.

tree-inline.c uses this in inlining heuristics to estimate the cost of
moving an object. The implementation is lacking, and will likely
underestimate the size of most copies.

An initial iteration of this patch migrated tree-inline.c to use
move_by_pieces_profitable_p and move_by_pieces_ninsns, but this
proved painful for performance on ARM.

This patch puts the control in the hands of the backend, and uses
the existing logic as a default.

Bootstrapped on x86_64, ARM, AArch64.

Ok?

Thanks,
James

---
2014-09-25  James Greenhalgh  <james.greenha...@arm.com>

        * target.def (estimate_block_copy_ninsns): New.
        * targhooks.h (default_estimate_block_copy_ninsns): New.
        * targhooks.c (default_estimate_block_copy_ninsns): New.
        * tree-inline.c (estimate_move_cost): Use new target hook.
        * doc/tm.texi.in (TARGET_ESTIMATE_BLOCK_COPY_NINSNS): New.
        * doc/tm.texi: Regenerate.
diff --git a/gcc/doc/tm.texi b/gcc/doc/tm.texi
index 162aa30..f59641a 100644
--- a/gcc/doc/tm.texi
+++ b/gcc/doc/tm.texi
@@ -6147,6 +6147,19 @@ in code size, for example where the number of insns emitted to perform a
 move would be greater than that of a library call.
 @end deftypefn
 
+@deftypefn {Target Hook} {unsigned int} TARGET_ESTIMATE_BLOCK_COPY_NINSNS (HOST_WIDE_INT @var{size}, bool @var{speed_p})
+This target hook should return an estimate of the number of
+instructions which will be emitted when copying an object with a size
+in units @var{size}.
+
+The parameter @var{speed_p} is true if the code is currently being
+optimized for speed rather than size.
+
+Where the block copy would be implemented using a library call, the
+estimate should be for the number of instructions required to set up
+and perform that call.
+@end deftypefn
+
 @defmac MOVE_MAX_PIECES
 A C expression used by @code{move_by_pieces} to determine the largest unit
 a load or store used to copy memory is.  Defaults to @code{MOVE_MAX}.
diff --git a/gcc/doc/tm.texi.in b/gcc/doc/tm.texi.in
index 1894745..d2a4386 100644
--- a/gcc/doc/tm.texi.in
+++ b/gcc/doc/tm.texi.in
@@ -4604,6 +4604,8 @@ that hook in preference to this macro, which is deprecated.
 
 @hook TARGET_MOVE_BY_PIECES_PROFITABLE_P
 
+@hook TARGET_ESTIMATE_BLOCK_COPY_NINSNS
+
 @defmac MOVE_MAX_PIECES
 A C expression used by @code{move_by_pieces} to determine the largest unit
 a load or store used to copy memory is.  Defaults to @code{MOVE_MAX}.
diff --git a/gcc/target.def b/gcc/target.def
index 0fd6235..10f3b2e 100644
--- a/gcc/target.def
+++ b/gcc/target.def
@@ -3079,6 +3079,21 @@ move would be greater than that of a library call.",
  bool, (unsigned int size, unsigned int alignment, bool speed_p),
  default_move_by_pieces_profitable_p)
 
+DEFHOOK
+(estimate_block_copy_ninsns,
+ "This target hook should return an estimate of the number of\n\
+instructions which will be emitted when copying an object with a size\n\
+in units @var{size}.\n\
+\n\
+The parameter @var{speed_p} is true if the code is currently being\n\
+optimized for speed rather than size.\n\
+\n\
+Where the block copy would be implemented using a library call, the\n\
+estimate should be for the number of instructions required to set up\n\
+and perform that call.",
+ unsigned int, (HOST_WIDE_INT size, bool speed_p),
+ default_estimate_block_copy_ninsns)
+
 /* True for MODE if the target expects that registers in this mode will
    be allocated to registers in a small register class.  The compiler is
    allowed to use registers explicitly used in the rtl as spill registers
diff --git a/gcc/targhooks.c b/gcc/targhooks.c
index ffe7080..eb0a4cd 100644
--- a/gcc/targhooks.c
+++ b/gcc/targhooks.c
@@ -1437,6 +1437,16 @@ default_move_by_pieces_profitable_p (unsigned int size ATTRIBUTE_UNUSED,
 #endif
 }
 
+unsigned int
+default_estimate_block_copy_ninsns (HOST_WIDE_INT size, bool speed_p)
+{
+  if (size < 0 || size > MOVE_MAX_PIECES * get_move_ratio (speed_p))
+    /* Cost of a memcpy call, 3 arguments and the call.  */
+    return 4;
+  else
+    return ((size + MOVE_MAX_PIECES - 1) / MOVE_MAX_PIECES);
+}
+
 bool
 default_profile_before_prologue (void)
 {
diff --git a/gcc/targhooks.h b/gcc/targhooks.h
index 93f21f8..f76ad31 100644
--- a/gcc/targhooks.h
+++ b/gcc/targhooks.h
@@ -183,6 +183,7 @@ extern int default_register_move_cost (enum machine_mode, reg_class_t,
 
 extern bool default_move_by_pieces_profitable_p (unsigned int,
 						 unsigned int, bool);
+extern unsigned int default_estimate_block_copy_ninsns (HOST_WIDE_INT, bool);
 
 extern bool default_profile_before_prologue (void);
 extern reg_class_t default_preferred_reload_class (rtx, reg_class_t);
diff --git a/gcc/tree-inline.c b/gcc/tree-inline.c
index ad474a5..e5f8653 100644
--- a/gcc/tree-inline.c
+++ b/gcc/tree-inline.c
@@ -3617,7 +3617,7 @@ tree_inlinable_function_p (tree fn)
    cost based on whether optimizing for size or speed according to SPEED_P.  */
 
 int
-estimate_move_cost (tree type, bool ARG_UNUSED (speed_p))
+estimate_move_cost (tree type, bool speed_p)
 {
   HOST_WIDE_INT size;
 
@@ -3635,11 +3635,7 @@ estimate_move_cost (tree type, bool ARG_UNUSED (speed_p))
 
   size = int_size_in_bytes (type);
 
-  if (size < 0 || size > MOVE_MAX_PIECES * MOVE_RATIO (speed_p))
-    /* Cost of a memcpy call, 3 arguments and the call.  */
-    return 4;
-  else
-    return ((size + MOVE_MAX_PIECES - 1) / MOVE_MAX_PIECES);
+  return targetm.estimate_block_copy_ninsns (size, speed_p);
 }
 
 /* Returns cost of operation CODE, according to WEIGHTS  */

Reply via email to