danielcdh updated this revision to Diff 112574. danielcdh marked 2 inline comments as done. danielcdh added a comment.
updated the patch to put it into function attribute so that it works with ThinLTO https://reviews.llvm.org/D37091 Files: docs/ClangCommandLineReference.rst include/clang/Driver/Options.td include/clang/Frontend/CodeGenOptions.def lib/CodeGen/CodeGenFunction.cpp lib/Driver/ToolChains/Clang.cpp lib/Frontend/CompilerInvocation.cpp test/Driver/clang_f_opts.c
Index: test/Driver/clang_f_opts.c =================================================================== --- test/Driver/clang_f_opts.c +++ test/Driver/clang_f_opts.c @@ -53,6 +53,9 @@ // CHECK-REROLL-LOOPS: "-freroll-loops" // CHECK-NO-REROLL-LOOPS-NOT: "-freroll-loops" +// RUN: %clang -### -S -faccurate-sample-profile %s 2>&1 | FileCheck -check-prefix=CHECK-ACCURATE-SAMPLE-PROFILE %s +// CHECK-ACCURATE-SAMPLE-PROFILE: "-faccurate-sample-profile" + // RUN: %clang -### -S -fprofile-sample-use=%S/Inputs/file.prof %s 2>&1 | FileCheck -check-prefix=CHECK-SAMPLE-PROFILE %s // CHECK-SAMPLE-PROFILE: "-fprofile-sample-use={{.*}}/file.prof" Index: lib/Frontend/CompilerInvocation.cpp =================================================================== --- lib/Frontend/CompilerInvocation.cpp +++ lib/Frontend/CompilerInvocation.cpp @@ -652,6 +652,8 @@ Opts.NoUseJumpTables = Args.hasArg(OPT_fno_jump_tables); + Opts.AccurateSampleProfile = Args.hasArg(OPT_faccurate_sample_profile); + Opts.PrepareForLTO = Args.hasArg(OPT_flto, OPT_flto_EQ); Opts.EmitSummaryIndex = false; if (Arg *A = Args.getLastArg(OPT_flto_EQ)) { Index: lib/Driver/ToolChains/Clang.cpp =================================================================== --- lib/Driver/ToolChains/Clang.cpp +++ lib/Driver/ToolChains/Clang.cpp @@ -2340,6 +2340,10 @@ true)) CmdArgs.push_back("-fno-jump-tables"); + if (Args.hasFlag(options::OPT_faccurate_sample_profile, + options::OPT_fno_accurate_sample_profile, false)) + CmdArgs.push_back("-faccurate-sample-profile"); + if (!Args.hasFlag(options::OPT_fpreserve_as_comments, options::OPT_fno_preserve_as_comments, true)) CmdArgs.push_back("-fno-preserve-as-comments"); Index: lib/CodeGen/CodeGenFunction.cpp =================================================================== --- lib/CodeGen/CodeGenFunction.cpp +++ lib/CodeGen/CodeGenFunction.cpp @@ -837,6 +837,10 @@ Fn->addFnAttr("no-jump-tables", llvm::toStringRef(CGM.getCodeGenOpts().NoUseJumpTables)); + // Add accurate-sample-profile value. + if (CGM.getCodeGenOpts().AccurateSampleProfile) + Fn->addFnAttr("accurate-sample-profile"); + if (getLangOpts().OpenCL) { // Add metadata for a kernel function. if (const FunctionDecl *FD = dyn_cast_or_null<FunctionDecl>(D)) Index: include/clang/Frontend/CodeGenOptions.def =================================================================== --- include/clang/Frontend/CodeGenOptions.def +++ include/clang/Frontend/CodeGenOptions.def @@ -183,6 +183,7 @@ CODEGENOPT(UnwindTables , 1, 0) ///< Emit unwind tables. CODEGENOPT(VectorizeLoop , 1, 0) ///< Run loop vectorizer. CODEGENOPT(VectorizeSLP , 1, 0) ///< Run SLP vectorizer. +CODEGENOPT(AccurateSampleProfile, 1, 0) ///< Sample profile is accurate. /// Attempt to use register sized accesses to bit-fields in structures, when /// possible. Index: include/clang/Driver/Options.td =================================================================== --- include/clang/Driver/Options.td +++ include/clang/Driver/Options.td @@ -587,6 +587,14 @@ def fPIE : Flag<["-"], "fPIE">, Group<f_Group>; def fno_PIE : Flag<["-"], "fno-PIE">, Group<f_Group>; def faccess_control : Flag<["-"], "faccess-control">, Group<f_Group>; +def faccurate_sample_profile : Flag<["-"], "faccurate-sample-profile">, + Group<f_Group>, Flags<[DriverOption, CC1Option]>, + HelpText<"If sample profile is accurate, we will mark all un-sampled " + "callsite as cold. Otherwise, treat callsites without profile " + "samples as if we have no profile">; +def fno_accurate_sample_profile : Flag<["-"], "fno-accurate-sample-profile">, + Group<f_Group>, Flags<[DriverOption]>; + def fallow_unsupported : Flag<["-"], "fallow-unsupported">, Group<f_Group>; def fapple_kext : Flag<["-"], "fapple-kext">, Group<f_Group>, Flags<[CC1Option]>, HelpText<"Use Apple's kernel extensions ABI">; @@ -643,6 +651,10 @@ Alias<fno_profile_sample_use>; def fauto_profile_EQ : Joined<["-"], "fauto-profile=">, Alias<fprofile_sample_use_EQ>; +def fauto_profile_accurate : Flag<["-"], "fauto-profile-accurate">, + Group<f_Group>, Alias<faccurate_sample_profile>; +def fno_auto_profile_accurate : Flag<["-"], "fno-auto-profile-accurate">, + Group<f_Group>, Alias<fno_accurate_sample_profile>; def fdebug_info_for_profiling : Flag<["-"], "fdebug-info-for-profiling">, Group<f_Group>, Flags<[CC1Option]>, HelpText<"Emit extra debug info to make sample profile more accurate.">; Index: docs/ClangCommandLineReference.rst =================================================================== --- docs/ClangCommandLineReference.rst +++ docs/ClangCommandLineReference.rst @@ -170,6 +170,14 @@ .. option:: -exported\_symbols\_list <arg> +.. option:: -faccurate-sample-profile, -fno-accurate-sample-profile +.. program:: clang + +If the sample profile is accurate, callsites without profile samples are marked +as cold. Otherwise, treat un-sampled callsites as if we have no profile. This +option can be used to enable more aggressive size optimization based on +profiles. + .. option:: -faligned-new=<arg> .. option:: -fcuda-approx-transcendentals, -fno-cuda-approx-transcendentals
_______________________________________________ cfe-commits mailing list cfe-commits@lists.llvm.org http://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits