On x86_64, when the expected size of memcpy/memset is known (e.g, with
FDO), libcall strategy is used with the size is > 8192. This value is
hard coded, which makes it hard to do performance tuning. This patch
adds two new parameters to do that. Potential usage includes
per-application libcall strategy min-size tuning based on summary data
with FDO (e.g, instruction workset size).

Bootstrap and tested on x86_64/linux. Ok for trunk?

thanks,

David


2013-08-02  Xinliang David Li  <davi...@google.com>

        * params.def: New parameters.
        * config/i386/i386.c (ix86_option_override_internal):
        Override default libcall size limit with parameters.
Index: config/i386/i386.c
===================================================================
--- config/i386/i386.c  (revision 201458)
+++ config/i386/i386.c  (working copy)
@@ -156,7 +156,7 @@ struct processor_costs ix86_size_cost =
 };
 
 /* Processor costs (relative to an add) */
-static const
+static
 struct processor_costs i386_cost = {   /* 386 specific costs */
   COSTS_N_INSNS (1),                   /* cost of an add instruction */
   COSTS_N_INSNS (1),                   /* cost of a lea instruction */
@@ -226,7 +226,7 @@ struct processor_costs i386_cost = {        /*
   1,                                   /* cond_not_taken_branch_cost.  */
 };
 
-static const
+static
 struct processor_costs i486_cost = {   /* 486 specific costs */
   COSTS_N_INSNS (1),                   /* cost of an add instruction */
   COSTS_N_INSNS (1),                   /* cost of a lea instruction */
@@ -298,7 +298,7 @@ struct processor_costs i486_cost = {        /*
   1,                                   /* cond_not_taken_branch_cost.  */
 };
 
-static const
+static
 struct processor_costs pentium_cost = {
   COSTS_N_INSNS (1),                   /* cost of an add instruction */
   COSTS_N_INSNS (1),                   /* cost of a lea instruction */
@@ -368,7 +368,7 @@ struct processor_costs pentium_cost = {
   1,                                   /* cond_not_taken_branch_cost.  */
 };
 
-static const
+static
 struct processor_costs pentiumpro_cost = {
   COSTS_N_INSNS (1),                   /* cost of an add instruction */
   COSTS_N_INSNS (1),                   /* cost of a lea instruction */
@@ -447,7 +447,7 @@ struct processor_costs pentiumpro_cost =
   1,                                   /* cond_not_taken_branch_cost.  */
 };
 
-static const
+static
 struct processor_costs geode_cost = {
   COSTS_N_INSNS (1),                   /* cost of an add instruction */
   COSTS_N_INSNS (1),                   /* cost of a lea instruction */
@@ -518,7 +518,7 @@ struct processor_costs geode_cost = {
   1,                                   /* cond_not_taken_branch_cost.  */
 };
 
-static const
+static
 struct processor_costs k6_cost = {
   COSTS_N_INSNS (1),                   /* cost of an add instruction */
   COSTS_N_INSNS (2),                   /* cost of a lea instruction */
@@ -591,7 +591,7 @@ struct processor_costs k6_cost = {
   1,                                   /* cond_not_taken_branch_cost.  */
 };
 
-static const
+static
 struct processor_costs athlon_cost = {
   COSTS_N_INSNS (1),                   /* cost of an add instruction */
   COSTS_N_INSNS (2),                   /* cost of a lea instruction */
@@ -664,7 +664,7 @@ struct processor_costs athlon_cost = {
   1,                                   /* cond_not_taken_branch_cost.  */
 };
 
-static const
+static
 struct processor_costs k8_cost = {
   COSTS_N_INSNS (1),                   /* cost of an add instruction */
   COSTS_N_INSNS (2),                   /* cost of a lea instruction */
@@ -1265,7 +1265,7 @@ struct processor_costs btver2_cost = {
   1,                                   /* cond_not_taken_branch_cost.  */
 };
 
-static const
+static
 struct processor_costs pentium4_cost = {
   COSTS_N_INSNS (1),                   /* cost of an add instruction */
   COSTS_N_INSNS (3),                   /* cost of a lea instruction */
@@ -1336,7 +1336,7 @@ struct processor_costs pentium4_cost = {
   1,                                   /* cond_not_taken_branch_cost.  */
 };
 
-static const
+static
 struct processor_costs nocona_cost = {
   COSTS_N_INSNS (1),                   /* cost of an add instruction */
   COSTS_N_INSNS (1),                   /* cost of a lea instruction */
@@ -1409,7 +1409,7 @@ struct processor_costs nocona_cost = {
   1,                                   /* cond_not_taken_branch_cost.  */
 };
 
-static const
+static
 struct processor_costs atom_cost = {
   COSTS_N_INSNS (1),                   /* cost of an add instruction */
   COSTS_N_INSNS (1) + 1,               /* cost of a lea instruction */
@@ -1556,7 +1556,7 @@ struct processor_costs slm_cost = {
 };
 
 /* Generic64 should produce code tuned for Nocona and K8.  */
-static const
+static
 struct processor_costs generic64_cost = {
   COSTS_N_INSNS (1),                   /* cost of an add instruction */
   /* On all chips taken into consideration lea is 2 cycles and more.  With
@@ -1635,7 +1635,7 @@ struct processor_costs generic64_cost =
 };
 
 /* core_cost should produce code tuned for Core familly of CPUs.  */
-static const
+static
 struct processor_costs core_cost = {
   COSTS_N_INSNS (1),                   /* cost of an add instruction */
   /* On all chips taken into consideration lea is 2 cycles and more.  With
@@ -1717,7 +1717,7 @@ struct processor_costs core_cost = {
 
 /* Generic32 should produce code tuned for PPro, Pentium4, Nocona,
    Athlon and K8.  */
-static const
+static
 struct processor_costs generic32_cost = {
   COSTS_N_INSNS (1),                   /* cost of an add instruction */
   COSTS_N_INSNS (1) + 1,               /* cost of a lea instruction */
@@ -4021,6 +4021,34 @@ ix86_option_override_internal (bool main
   /* Handle stack protector */
   if (!global_options_set.x_ix86_stack_protector_guard)
     ix86_stack_protector_guard = TARGET_HAS_BIONIC ? SSP_GLOBAL : SSP_TLS;
+
+  /* Now override the memcpy/memset inline strategy parameters  */
+  if (PARAM_VALUE (PARAM_MEMCPY_LIBCALL_MIN_SIZE) != -1
+      || PARAM_VALUE (PARAM_MEMSET_LIBCALL_MIN_SIZE) != -1)
+    {
+      const struct stringop_algs *algs[2];
+      int k;
+      int min_sizes[2];
+
+      algs[0] = &ix86_cost->memset[TARGET_64BIT != 0];
+      algs[1] = &ix86_cost->memcpy[TARGET_64BIT != 0];
+
+      min_sizes[0] = PARAM_VALUE (PARAM_MEMSET_LIBCALL_MIN_SIZE);
+      min_sizes[1] = PARAM_VALUE (PARAM_MEMCPY_LIBCALL_MIN_SIZE);
+
+      for (k = 0; k < 2; k++)
+        {
+          if (min_sizes[k] == -1)
+            continue;
+
+          for (i = 0; i < MAX_STRINGOP_ALGS - 1; i++)
+            {
+              if (algs[k]->size[i].max >= min_sizes[k]
+                  || algs[k]->size[i + 1].alg == libcall)
+                *const_cast<int *>(&algs[k]->size[i].max) = min_sizes[k] - 1;
+            }
+        }
+    }
 }
 
 /* Implement the TARGET_OPTION_OVERRIDE hook.  */
Index: params.def
===================================================================
--- params.def  (revision 201458)
+++ params.def  (working copy)
@@ -117,6 +117,18 @@ DEFPARAM (PARAM_COMDAT_SHARING_PROBABILI
          "Probability that COMDAT function will be shared with different 
compilation unit",
          20, 0, 0)
 
+/* Use libcall strategy when the expected size is no less than this parameter 
for memcpy.  */
+DEFPARAM (PARAM_MEMCPY_LIBCALL_MIN_SIZE,
+         "memcpy-libcall-min-size",
+         "The minimal expected size to force libcall expansion strategy for 
memcpy",
+         -1, 1, 0)
+
+/* Use libcall strategy when the expected size is no less than this parameter 
for memset.  */
+DEFPARAM (PARAM_MEMSET_LIBCALL_MIN_SIZE,
+         "memset-libcall-min-size",
+         "The minimal expected size to force libcall expansion strategy for 
memset",
+         -1, 1, 0)
+
 /* Limit on probability of entry BB.  */
 DEFPARAM (PARAM_PARTIAL_INLINING_ENTRY_PROBABILITY,
          "partial-inlining-entry-probability",

Reply via email to