Some code size and timing number (profile use compile) are collected. Summary:

Compile time for profile-use compilation increase for all cases --
this is probably not a big issue as this is for peak performance.

It is more interesting to look at the size numbers. C++ program size
actually decrease in many cases (which match the results from our
internal benchmarks). For some benchmark such as povray, it reduce
quite a bit (either due to more DFE, or better cleanups -- have not
looked at details).   For C programs, especially smaller ones, the
size increase (some significantly).

David

1. xalancbmk:

Size (total size of object files, not text size which is correlated)

Before:  17,801,488
After :   17,252,032
Change:  -3%

Time (make all with -j1)

Before: 248 seconds
After  : 286 seconds
Change: 15%

2.  povray:

Size

Before: 2,855,690
After:   2,113,754
Change: -25%

Time:

Before:  36.5s
After:    43.2 s
Change: +18%

3. eon

Size

Before:  3,988,460
After :    4,099,108
Change: +2.8%


Time

Before: 53.1s
After:    56.8s
Change: +6.9%


4. gcc


Size

Before:  14,162,908
After:    15,089,106
Change: +6.5%

Time

Before: 57.3s
After:    60.7s
Change:  +5.9%


5. Parser:

Size

Before: 1,175,509
After:   1,568,135
Change: +33.4%

Time

Before: 5.3s
After:  12.9s
Change: +143%



On Fri, May 20, 2011 at 8:53 AM, Xinliang David Li <davi...@google.com> wrote:
> On Fri, May 20, 2011 at 2:12 AM, Richard Guenther
> <richard.guent...@gmail.com> wrote:
>> On Thu, May 19, 2011 at 8:26 PM, Xinliang David Li <davi...@google.com> 
>> wrote:
>>> I have done some SPEC testing evaluating the performance impact of
>>> your patch.  They look very positive.  LIPO got helped even more than
>>> FDO (I only did SPEC2k LIPO testing).
>>
>> Did you also check impact on compile-time and code-size?
>
> Not yet for SPEC -- will pick some benchmarks and take a look.
>
> David
>
>>
>>> Thanks,
>>>
>>> David
>>>
>>> 1. SPEC06 (C/C++)  with FDO
>>>
>>>                                   before          after         Improvement
>>> ---------------------------------------------------------------------------------
>>>       400.perlbench           27.4           28.2      2.89%         <---
>>>           401.bzip2           18.1           18.2      0.28%
>>>             403.gcc           25.5           26.3      3.26%           <---
>>>             429.mcf           26.0           26.0      0.08%
>>>           445.gobmk           22.6           23.2      2.30%         <----
>>>           456.hmmer           20.1           19.8     -1.25%
>>>           458.sjeng           23.6           23.6     -0.42%
>>>      462.libquantum           57.1           56.9     -0.40%
>>>         464.h264ref           34.4           34.1     -0.70%
>>>         471.omnetpp           18.8           18.9      0.53%
>>>           473.astar           16.6           17.0      2.53%         <---
>>>       483.xalancbmk           27.4           28.5      3.79%    <---
>>>        999.specrand           94.9           98.4      3.71%      <---
>>>          450.soplex           34.5           33.8     -2.00%
>>>          447.dealII           32.0           31.9     -0.34%
>>>          453.povray           25.9           28.3      9.02%       <---
>>>         482.sphinx3           32.6           31.4     -3.50%
>>>
>>>
>>> 2. SPEC2k FDO
>>>
>>>                                        before               after
>>>  Improvement
>>> --------------------------------------------------------------------------------------------------
>>>            164.gzip                1308                1372      4.95%
>>>             175.vpr                1723                1805      4.76%
>>>             176.gcc                2407                2504      4.01%
>>>             181.mcf                1724                1748      1.38%
>>>          186.crafty                2292                2349      2.47%
>>>          197.parser                1457                1601      9.88%
>>>             252.eon                2557                2588      1.22%
>>>         253.perlbmk                2479                2574      3.83%
>>>             254.gap                1996                2013      0.84%
>>>          255.vortex                2683                2798      4.31%
>>>           256.bzip2                1833                1829     -0.26%
>>>           300.twolf                2321                2359      1.63%
>>>            188.ammp                 771                 766     -0.72%
>>>          183.equake                1071                1071      0.05%
>>>             179.art                2954                2979      0.85%
>>>
>>>
>>> 3.  SPEC2k LIPO:
>>>
>>>                                       before              after
>>>  Improvement
>>> -------------------------------------------------------------------------------------------------
>>>            164.gzip                1311                1405      7.18%
>>>             175.vpr                1732                1772      2.35%
>>>             176.gcc                2462                2559      3.96%
>>>             181.mcf                1723                1731      0.50%
>>>          186.crafty                2552                2662      4.33%
>>>          197.parser                1468                1671     13.78%
>>>             252.eon                2690                3000     11.49%
>>>         253.perlbmk                2545                2611      2.60%
>>>             254.gap                2097                2152      2.60%
>>>          255.vortex                2949                3719     26.11%
>>>           256.bzip2                1864                1935      3.78%
>>>           300.twolf                2371                2471      4.22%
>>>            188.ammp                 771                 774      0.41%
>>>          183.equake                1081                1081     -0.04%
>>>             179.art                2878                2884      0.24%
>>>
>>>
>>> 4. SPEC2k LIPO vs FDO before the change:
>>>
>>>                                       FDO(before)    LIPO(before) 
>>> Improvement
>>> -----------------------------------------------------------------------------------------------------
>>>           164.gzip                1308                1311      0.22%
>>>             175.vpr                1723                1732      0.53%
>>>             176.gcc                2407                2462      2.27%
>>>             181.mcf                1724                1723     -0.07%
>>>          186.crafty                2292                2552     11.32%
>>>          197.parser                1457                1468      0.81%
>>>             252.eon                2557                2690      5.20%
>>>         253.perlbmk                2479                2545      2.66%
>>>             254.gap                1996                2097      5.04%
>>>          255.vortex                2683                2949      9.91%
>>>           256.bzip2                1833                1864      1.67%
>>>           300.twolf                2321                2371      2.15%
>>>            188.ammp                 771                 771     -0.04%
>>>          183.equake                1071                1081      1.02%
>>>             179.art                2954                2878     -2.59%
>>>
>>>
>>>
>>> 5. SPEC2k LIPO vs FDO  after the change:
>>>                                     FDO(after)        LIPO(after) 
>>> Improvement
>>> ------------------------------------------------------------------------------------------------
>>>            164.gzip                1372                1405      2.36%
>>>             175.vpr                1805                1772     -1.79%
>>>             176.gcc                2504                2559      2.21%
>>>             181.mcf                1748                1731     -0.94%
>>>          186.crafty                2349                2662     13.35%
>>>          197.parser                1601                1671      4.38%
>>>             252.eon                2588                3000     15.88%
>>>         253.perlbmk                2574                2611      1.44%
>>>             254.gap                2013                2152      6.87%
>>>          255.vortex                2798                3719     32.88%
>>>           256.bzip2                1829                1935      5.79%
>>>           300.twolf                2359                2471      4.75%
>>>            188.ammp                 766                 774      1.10%
>>>          183.equake                1071                1081      0.92%
>>>             179.art                2979                2884     -3.18%
>>>
>>>
>>>
>>> On Wed, May 18, 2011 at 12:53 PM, Mark Heffernan <meh...@google.com> wrote:
>>>> Verified identical binaries created and submitted.
>>>>
>>>> Mark
>>>>
>>>> On Wed, May 18, 2011 at 11:37 AM, Xinliang David Li <davi...@google.com> 
>>>> wrote:
>>>>> Ok with that change to google/main with some retesting.
>>>>>
>>>>> David
>>>>>
>>>>> On Wed, May 18, 2011 at 11:34 AM, Mark Heffernan <meh...@google.com> 
>>>>> wrote:
>>>>>> On Wed, May 18, 2011 at 10:52 AM, Xinliang David Li <davi...@google.com> 
>>>>>> wrote:
>>>>>>> The new change won't help those. Your original place will be ok if you
>>>>>>> test profile_arcs and branch_probability flags.
>>>>>>
>>>>>> Ah, yes.  I see your point now. Reverted to the original change with
>>>>>> condition profile_arc_flag and flag_branch_probabilities.
>>>>>>
>>>>>> Mark
>>>>>>
>>>>>>>
>>>>>>> David
>>>>>>>
>>>>>>>
>>>>>>> On Wed, May 18, 2011 at 10:39 AM, Mark Heffernan <meh...@google.com> 
>>>>>>> wrote:
>>>>>>>> On Tue, May 17, 2011 at 11:34 PM, Xinliang David Li 
>>>>>>>> <davi...@google.com>
>>>>>>>> wrote:
>>>>>>>>>
>>>>>>>>> To make consistent inline decisions between profile-gen and
>>>>>>>>> profile-use, probably better to check these two:
>>>>>>>>>
>>>>>>>>> flag_profile_arcs and flag_branch_probabilities.  -fprofile-use
>>>>>>>>> enables profile-arcs, and value profiling is enabled only when
>>>>>>>>> edge/branch profiling is enabled (so no need to be checked).
>>>>>>>>
>>>>>>>> I changed the location where these parameters are set to someplace more
>>>>>>>> appropriate (to where the flags are set when profile gen/use is 
>>>>>>>> indicated).
>>>>>>>>  Verified identical binaries are generated.
>>>>>>>> OK as updated?
>>>>>>>>
>>>>>>>> Mark
>>>>>>>> 2011-05-18  Mark Heffernan  <meh...@google.com>
>>>>>>>> * opts.c (set_profile_parameters): New function.
>>>>>>>> Index: opts.c
>>>>>>>> ===================================================================
>>>>>>>> --- opts.c      (revision 173666)
>>>>>>>> +++ opts.c      (working copy)
>>>>>>>> @@ -1209,6 +1209,25 @@ print_specific_help (unsigned int includ
>>>>>>>>                        opts->x_help_columns, opts, lang_mask);
>>>>>>>>  }
>>>>>>>>
>>>>>>>> +
>>>>>>>> +/* Set parameters to more appropriate values when profile information
>>>>>>>> +   is available.  */
>>>>>>>> +static void
>>>>>>>> +set_profile_parameters (struct gcc_options *opts,
>>>>>>>> +                       struct gcc_options *opts_set)
>>>>>>>> +{
>>>>>>>> +  /* With accurate profile information, inlining is much more
>>>>>>>> +     selective and makes better decisions, so increase the
>>>>>>>> +     inlining function size limits.  */
>>>>>>>> +  maybe_set_param_value
>>>>>>>> +    (PARAM_MAX_INLINE_INSNS_SINGLE, 1000,
>>>>>>>> +     opts->x_param_values, opts_set->x_param_values);
>>>>>>>> +  maybe_set_param_value
>>>>>>>> +    (PARAM_MAX_INLINE_INSNS_AUTO, 1000,
>>>>>>>> +     opts->x_param_values, opts_set->x_param_values);
>>>>>>>> +}
>>>>>>>> +
>>>>>>>> +
>>>>>>>>  /* Handle target- and language-independent options.  Return zero to
>>>>>>>>     generate an "unknown option" message.  Only options that need
>>>>>>>>     extra handling need to be listed here; if you simply want
>>>>>>>> @@ -1560,6 +1579,7 @@ common_handle_option (struct gcc_options
>>>>>>>>         opts->x_flag_unswitch_loops = value;
>>>>>>>>        if (!opts_set->x_flag_gcse_after_reload)
>>>>>>>>         opts->x_flag_gcse_after_reload = value;
>>>>>>>> +      set_profile_parameters (opts, opts_set);
>>>>>>>>        break;
>>>>>>>>
>>>>>>>>      case OPT_fprofile_generate_:
>>>>>>>> @@ -1580,6 +1600,7 @@ common_handle_option (struct gcc_options
>>>>>>>>          is done.  */
>>>>>>>>        if (!opts_set->x_flag_ipa_reference && in_lto_p)
>>>>>>>>          opts->x_flag_ipa_reference = false;
>>>>>>>> +      set_profile_parameters (opts, opts_set);
>>>>>>>>        break;
>>>>>>>>
>>>>>>>>      case OPT_fshow_column:
>>>>>>>>
>>>>>>>
>>>>>>
>>>>>
>>>>
>>>
>>
>

Reply via email to