danielcdh updated this revision to Diff 112574.
danielcdh marked 2 inline comments as done.
danielcdh added a comment.

updated the patch to put it into function attribute so that it works with 
ThinLTO


https://reviews.llvm.org/D37091

Files:
  docs/ClangCommandLineReference.rst
  include/clang/Driver/Options.td
  include/clang/Frontend/CodeGenOptions.def
  lib/CodeGen/CodeGenFunction.cpp
  lib/Driver/ToolChains/Clang.cpp
  lib/Frontend/CompilerInvocation.cpp
  test/Driver/clang_f_opts.c

Index: test/Driver/clang_f_opts.c
===================================================================
--- test/Driver/clang_f_opts.c
+++ test/Driver/clang_f_opts.c
@@ -53,6 +53,9 @@
 // CHECK-REROLL-LOOPS: "-freroll-loops"
 // CHECK-NO-REROLL-LOOPS-NOT: "-freroll-loops"
 
+// RUN: %clang -### -S -faccurate-sample-profile %s 2>&1 | FileCheck -check-prefix=CHECK-ACCURATE-SAMPLE-PROFILE %s
+// CHECK-ACCURATE-SAMPLE-PROFILE: "-faccurate-sample-profile"
+
 // RUN: %clang -### -S -fprofile-sample-use=%S/Inputs/file.prof %s 2>&1 | FileCheck -check-prefix=CHECK-SAMPLE-PROFILE %s
 // CHECK-SAMPLE-PROFILE: "-fprofile-sample-use={{.*}}/file.prof"
 
Index: lib/Frontend/CompilerInvocation.cpp
===================================================================
--- lib/Frontend/CompilerInvocation.cpp
+++ lib/Frontend/CompilerInvocation.cpp
@@ -652,6 +652,8 @@
 
   Opts.NoUseJumpTables = Args.hasArg(OPT_fno_jump_tables);
 
+  Opts.AccurateSampleProfile = Args.hasArg(OPT_faccurate_sample_profile);
+
   Opts.PrepareForLTO = Args.hasArg(OPT_flto, OPT_flto_EQ);
   Opts.EmitSummaryIndex = false;
   if (Arg *A = Args.getLastArg(OPT_flto_EQ)) {
Index: lib/Driver/ToolChains/Clang.cpp
===================================================================
--- lib/Driver/ToolChains/Clang.cpp
+++ lib/Driver/ToolChains/Clang.cpp
@@ -2340,6 +2340,10 @@
                     true))
     CmdArgs.push_back("-fno-jump-tables");
 
+  if (Args.hasFlag(options::OPT_faccurate_sample_profile,
+                   options::OPT_fno_accurate_sample_profile, false))
+    CmdArgs.push_back("-faccurate-sample-profile");
+
   if (!Args.hasFlag(options::OPT_fpreserve_as_comments,
                     options::OPT_fno_preserve_as_comments, true))
     CmdArgs.push_back("-fno-preserve-as-comments");
Index: lib/CodeGen/CodeGenFunction.cpp
===================================================================
--- lib/CodeGen/CodeGenFunction.cpp
+++ lib/CodeGen/CodeGenFunction.cpp
@@ -837,6 +837,10 @@
   Fn->addFnAttr("no-jump-tables",
                 llvm::toStringRef(CGM.getCodeGenOpts().NoUseJumpTables));
 
+  // Add accurate-sample-profile value.
+  if (CGM.getCodeGenOpts().AccurateSampleProfile)
+    Fn->addFnAttr("accurate-sample-profile");
+
   if (getLangOpts().OpenCL) {
     // Add metadata for a kernel function.
     if (const FunctionDecl *FD = dyn_cast_or_null<FunctionDecl>(D))
Index: include/clang/Frontend/CodeGenOptions.def
===================================================================
--- include/clang/Frontend/CodeGenOptions.def
+++ include/clang/Frontend/CodeGenOptions.def
@@ -183,6 +183,7 @@
 CODEGENOPT(UnwindTables      , 1, 0) ///< Emit unwind tables.
 CODEGENOPT(VectorizeLoop     , 1, 0) ///< Run loop vectorizer.
 CODEGENOPT(VectorizeSLP      , 1, 0) ///< Run SLP vectorizer.
+CODEGENOPT(AccurateSampleProfile, 1, 0) ///< Sample profile is accurate.
 
   /// Attempt to use register sized accesses to bit-fields in structures, when
   /// possible.
Index: include/clang/Driver/Options.td
===================================================================
--- include/clang/Driver/Options.td
+++ include/clang/Driver/Options.td
@@ -587,6 +587,14 @@
 def fPIE : Flag<["-"], "fPIE">, Group<f_Group>;
 def fno_PIE : Flag<["-"], "fno-PIE">, Group<f_Group>;
 def faccess_control : Flag<["-"], "faccess-control">, Group<f_Group>;
+def faccurate_sample_profile : Flag<["-"], "faccurate-sample-profile">,
+  Group<f_Group>, Flags<[DriverOption, CC1Option]>,
+  HelpText<"If sample profile is accurate, we will mark all un-sampled "
+           "callsite as cold. Otherwise, treat callsites without profile "
+           "samples as if we have no profile">;
+def fno_accurate_sample_profile : Flag<["-"], "fno-accurate-sample-profile">,
+  Group<f_Group>, Flags<[DriverOption]>;
+
 def fallow_unsupported : Flag<["-"], "fallow-unsupported">, Group<f_Group>;
 def fapple_kext : Flag<["-"], "fapple-kext">, Group<f_Group>, Flags<[CC1Option]>,
   HelpText<"Use Apple's kernel extensions ABI">;
@@ -643,6 +651,10 @@
     Alias<fno_profile_sample_use>;
 def fauto_profile_EQ : Joined<["-"], "fauto-profile=">,
     Alias<fprofile_sample_use_EQ>;
+def fauto_profile_accurate : Flag<["-"], "fauto-profile-accurate">,
+    Group<f_Group>, Alias<faccurate_sample_profile>;
+def fno_auto_profile_accurate : Flag<["-"], "fno-auto-profile-accurate">,
+    Group<f_Group>, Alias<fno_accurate_sample_profile>;
 def fdebug_info_for_profiling : Flag<["-"], "fdebug-info-for-profiling">, Group<f_Group>,
     Flags<[CC1Option]>,
     HelpText<"Emit extra debug info to make sample profile more accurate.">;
Index: docs/ClangCommandLineReference.rst
===================================================================
--- docs/ClangCommandLineReference.rst
+++ docs/ClangCommandLineReference.rst
@@ -170,6 +170,14 @@
 
 .. option:: -exported\_symbols\_list <arg>
 
+.. option:: -faccurate-sample-profile, -fno-accurate-sample-profile
+.. program:: clang
+
+If the sample profile is accurate, callsites without profile samples are marked
+as cold. Otherwise, treat un-sampled callsites as if we have no profile. This
+option can be used to enable more aggressive size optimization based on
+profiles.
+
 .. option:: -faligned-new=<arg>
 
 .. option:: -fcuda-approx-transcendentals, -fno-cuda-approx-transcendentals
_______________________________________________
cfe-commits mailing list
cfe-commits@lists.llvm.org
http://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits

Reply via email to