Investigating size increase of LRA generated code on PPC64 (> 0.2% on SPEC2006), I found that register usage leveling can hurt cross-jumping optimization for ppc besides if-conversion one for targets with conditional execution.

So I decided to introduce a machine target hook switching on the optimization (I believe introducing an option would be to much). The following patch implements it. Currently only x86/x86-64 uses register usage leveling.

The patch was successfully bootstrapped on x86/x86-64 (it actually does not change generated code on x86/x86-64).

  Committed as rev. 199459.

2013-05-30  Vladimir Makarov  <vmaka...@redhat.com>

        * target.def (register_usage_leveling_p): New hook.
        * targhooks.c (default_register_usage_leveling_p): New.
        * targhooks.h (default_register_usage_leveling_p): New prototype.
        * lra-assigns.c (register_usage_leveling_p): Use the hook.
        * doc/tm.texi.in (TARGET_REGISTER_USAGE_LEVELING_P): New hook.
        * doc/tm.texi: Update.
        * config/i386/i386.c (TARGET_REGISTER_USAGE_LEVELING_P): Define.

Index: config/i386/i386.c
===================================================================
--- config/i386/i386.c	(revision 199453)
+++ config/i386/i386.c	(working copy)
@@ -42812,6 +42812,9 @@ ix86_memmodel_check (unsigned HOST_WIDE_
 #undef TARGET_REGISTER_PRIORITY
 #define TARGET_REGISTER_PRIORITY ix86_register_priority
 
+#undef TARGET_REGISTER_USAGE_LEVELING_P
+#define TARGET_REGISTER_USAGE_LEVELING_P hook_bool_void_true
+
 #undef TARGET_LEGITIMATE_CONSTANT_P
 #define TARGET_LEGITIMATE_CONSTANT_P ix86_legitimate_constant_p
 
Index: doc/tm.texi
===================================================================
--- doc/tm.texi	(revision 199453)
+++ doc/tm.texi	(working copy)
@@ -2898,6 +2898,10 @@ A target hook which returns true if we u
 A target hook which returns the register priority number to which the  register @var{hard_regno} belongs to.  The bigger the number, the  more preferable the hard register usage (when all other conditions are  the same).  This hook can be used to prefer some hard register over  others in LRA.  For example, some x86-64 register usage needs  additional prefix which makes instructions longer.  The hook can  return lower priority number for such registers make them less favorable  and as result making the generated code smaller.    The default version of this target hook returns always zero.
 @end deftypefn
 
+@deftypefn {Target Hook} bool TARGET_REGISTER_USAGE_LEVELING_P (void)
+A target hook which returns true if we need register usage leveling.  That means if a few hard registers are equally good for the  assignment, we choose the least used hard register.  The register  usage leveling may be profitable for some targets.  Don't use the  usage leveling for targets with conditional execution or targets  with big register files as it hurts if-conversion and cross-jumping  optimizations.    The default version of this target hook returns always false.
+@end deftypefn
+
 @deftypefn {Target Hook} bool TARGET_DIFFERENT_ADDR_DISPLACEMENT_P (void)
 A target hook which returns true if an address with the same structure  can have different maximal legitimate displacement.  For example, the  displacement can depend on memory mode or on operand combinations in  the insn.    The default version of this target hook returns always false.
 @end deftypefn
Index: doc/tm.texi.in
===================================================================
--- doc/tm.texi.in	(revision 199453)
+++ doc/tm.texi.in	(working copy)
@@ -2870,6 +2870,8 @@ as below:
 
 @hook TARGET_REGISTER_PRIORITY
 
+@hook TARGET_REGISTER_USAGE_LEVELING_P
+
 @hook TARGET_DIFFERENT_ADDR_DISPLACEMENT_P
 
 @hook TARGET_SPILL_CLASS
Index: lra-assigns.c
===================================================================
--- lra-assigns.c	(revision 199453)
+++ lra-assigns.c	(working copy)
@@ -603,11 +603,7 @@ find_hard_regno_for (int regno, int *cos
 	  if (best_hard_regno < 0 || hard_regno_costs[hard_regno] < best_cost
 	      || (hard_regno_costs[hard_regno] == best_cost
 		  && (priority > best_priority
-		      /* Hard register usage leveling actually results
-			 in bigger code for targets with conditional
-			 execution like ARM because it reduces chance
-			 of if-conversion after LRA.  */
-		      || (! targetm.have_conditional_execution ()
+		      || (targetm.register_usage_leveling_p ()
 			  && priority == best_priority
 			  && best_usage > lra_hard_reg_usage[hard_regno]))))
 	    {
Index: target.def
===================================================================
--- target.def	(revision 199453)
+++ target.def	(working copy)
@@ -2444,6 +2444,21 @@ DEFHOOK
  int, (int),
  default_register_priority)
 
+/* Return true if we need register usage leveling.  */
+DEFHOOK
+(register_usage_leveling_p,
+ "A target hook which returns true if we need register usage leveling.\
+  That means if a few hard registers are equally good for the\
+  assignment, we choose the least used hard register.  The register\
+  usage leveling may be profitable for some targets.  Don't use the\
+  usage leveling for targets with conditional execution or targets\
+  with big register files as it hurts if-conversion and cross-jumping\
+  optimizations.\
+  \
+  The default version of this target hook returns always false.",
+ bool, (void),
+ default_register_usage_leveling_p)
+
 /* Return true if maximal address displacement can be different.  */
 DEFHOOK
 (different_addr_displacement_p,
Index: targhooks.c
===================================================================
--- targhooks.c	(revision 199453)
+++ targhooks.c	(working copy)
@@ -859,6 +859,12 @@ default_register_priority (int hard_regn
 }
 
 extern bool
+default_register_usage_leveling_p (void)
+{
+  return false;
+}
+
+extern bool
 default_different_addr_displacement_p (void)
 {
   return false;
Index: targhooks.h
===================================================================
--- targhooks.h	(revision 199453)
+++ targhooks.h	(working copy)
@@ -135,6 +135,7 @@ extern int default_return_pops_args (tre
 extern reg_class_t default_branch_target_register_class (void);
 extern bool default_lra_p (void);
 extern int default_register_priority (int);
+extern bool default_register_usage_leveling_p (void);
 extern bool default_different_addr_displacement_p (void);
 extern reg_class_t default_secondary_reload (bool, rtx, reg_class_t,
 					     enum machine_mode,

Reply via email to