Amir updated this revision to Diff 538271.
Amir added a comment.

Rebase + reformatted with `black` (D150761 <https://reviews.llvm.org/D150761>)


Repository:
  rG LLVM Github Monorepo

CHANGES SINCE LAST ACTION
  https://reviews.llvm.org/D143617/new/

https://reviews.llvm.org/D143617

Files:
  clang/CMakeLists.txt
  clang/cmake/caches/BOLT.cmake
  clang/utils/perf-training/CMakeLists.txt
  clang/utils/perf-training/bolt.lit.cfg
  clang/utils/perf-training/bolt.lit.site.cfg.in
  clang/utils/perf-training/perf-helper.py

Index: clang/utils/perf-training/perf-helper.py
===================================================================
--- clang/utils/perf-training/perf-helper.py
+++ clang/utils/perf-training/perf-helper.py
@@ -67,6 +67,71 @@
     return 0
 
 
+def perf(args):
+    parser = argparse.ArgumentParser(
+        prog="perf-helper perf", description="perf wrapper for BOLT profile collection"
+    )
+    parser.add_argument(
+        "--lbr", required=False, action="store_true", help="Use perf with branch stacks"
+    )
+    parser.add_argument("cmd", nargs="*", help="")
+
+    # Use python's arg parser to handle all leading option arguments, but pass
+    # everything else through to perf
+    first_cmd = next(arg for arg in args if not arg.startswith("--"))
+    last_arg_idx = args.index(first_cmd)
+
+    opts = parser.parse_args(args[:last_arg_idx])
+    # cmd = shlex.split(args[last_arg_idx:])
+    cmd = args[last_arg_idx:]
+
+    perf_args = []
+    perf_args.extend(
+        (
+            "perf",
+            "record",
+            "--event=cycles:u",
+            "--freq=max",
+            "--output=%d.perf.data" % os.getpid(),
+        )
+    )
+    if opts.lbr:
+        perf_args += ["--branch-filter=any,u"]
+    perf_args.extend(cmd)
+
+    start_time = time.time()
+    subprocess.check_call(perf_args)
+
+    elapsed = time.time() - start_time
+    print("... data collection took %.4fs" % elapsed)
+    return 0
+
+
+def perf2bolt(args):
+    parser = argparse.ArgumentParser(
+        prog="perf-helper perf2bolt",
+        description="perf2bolt conversion wrapper for perf.data files",
+    )
+    parser.add_argument("p2b_path", help="Path to llvm-bolt")
+    parser.add_argument("path", help="Path containing perf.data files")
+    parser.add_argument("binary", help="Input binary")
+    parser.add_argument(
+        "--nolbr", required=False, action="store_true", help="Use -nl perf2bolt mode"
+    )
+    opts = parser.parse_args(args)
+
+    p2b_args = []
+    p2b_args.extend(
+        (opts.p2b_path, opts.binary, "--aggregate-only", "--profile-format=yaml")
+    )
+    if opts.nolbr:
+        p2b_args += ["-nl"]
+    p2b_args += ["-p"]
+    for filename in findFilesWithExtension(opts.path, "perf.data"):
+        subprocess.check_call(p2b_args + [filename, "-o", filename + ".fdata"])
+    return 0
+
+
 def dtrace(args):
     parser = argparse.ArgumentParser(
         prog="perf-helper dtrace",
@@ -507,6 +572,8 @@
     "cc1": cc1,
     "gen-order-file": genOrderFile,
     "merge-fdata": merge_fdata,
+    "perf": perf,
+    "perf2bolt": perf2bolt,
 }
 
 
Index: clang/utils/perf-training/bolt.lit.site.cfg.in
===================================================================
--- clang/utils/perf-training/bolt.lit.site.cfg.in
+++ clang/utils/perf-training/bolt.lit.site.cfg.in
@@ -9,6 +9,7 @@
 config.target_triple = "@LLVM_TARGET_TRIPLE@"
 config.python_exe = "@Python3_EXECUTABLE@"
 config.clang_obj_root = path(r"@CLANG_BINARY_DIR@")
+config.clang_bolt_mode = "@CLANG_BOLT@"
 
 # Let the main config do the real work.
 lit_config.load_config(config, "@CLANG_SOURCE_DIR@/utils/perf-training/bolt.lit.cfg")
Index: clang/utils/perf-training/bolt.lit.cfg
===================================================================
--- clang/utils/perf-training/bolt.lit.cfg
+++ clang/utils/perf-training/bolt.lit.cfg
@@ -6,15 +6,52 @@
 import os
 import subprocess
 
-config.clang = os.path.realpath(lit.util.which('clang-bolt.inst', config.clang_tools_dir)).replace('\\', '/')
+clang_binary = "clang"
+perf_wrapper = ""
+if config.clang_bolt_mode.lower() == "instrument":
+    clang_binary = "clang-bolt.inst"
+else:  # perf or LBR
+    perf_wrapper = "%s %s/perf-helper.py perf" % (
+        config.python_exe,
+        config.perf_helper_dir,
+    )
+    if config.clang_bolt_mode.lower() == "lbr":
+        perf_wrapper += " --lbr"
+    perf_wrapper += " -- "
 
-config.name = 'Clang Perf Training'
-config.suffixes = ['.c', '.cc', '.cpp', '.m', '.mm', '.cu', '.ll', '.cl', '.s', '.S', '.modulemap', '.test']
+config.clang = os.path.realpath(
+    lit.util.which(clang_binary, config.clang_tools_dir)
+).replace("\\", "/")
+
+config.name = "Clang Perf Training"
+config.suffixes = [
+    ".c",
+    ".cc",
+    ".cpp",
+    ".m",
+    ".mm",
+    ".cu",
+    ".ll",
+    ".cl",
+    ".s",
+    ".S",
+    ".modulemap",
+    ".test",
+]
 
 use_lit_shell = os.environ.get("LIT_USE_INTERNAL_SHELL")
 config.test_format = lit.formats.ShTest(use_lit_shell == "0")
-config.substitutions.append( ('%clang_cpp_skip_driver', ' %s --driver-mode=g++ ' % (config.clang)))
-config.substitutions.append( ('%clang_cpp', ' %s --driver-mode=g++ ' % (config.clang)))
-config.substitutions.append( ('%clang_skip_driver', ' %s ' % (config.clang)))
-config.substitutions.append( ('%clang', ' %s ' % (config.clang) ) )
-config.substitutions.append( ('%test_root', config.test_exec_root ) )
+config.substitutions.append(
+    (
+        "%clang_cpp_skip_driver",
+        " %s %s --driver-mode=g++ " % (perf_wrapper, config.clang),
+    )
+)
+config.substitutions.append(
+    ("%clang_cpp", " %s %s --driver-mode=g++ " % (perf_wrapper, config.clang))
+)
+config.substitutions.append(
+    ("%clang_skip_driver", " %s %s " % (perf_wrapper, config.clang))
+)
+config.substitutions.append(("%clang", " %s %s " % (perf_wrapper, config.clang)))
+config.substitutions.append(("%test_root", config.test_exec_root))
Index: clang/utils/perf-training/CMakeLists.txt
===================================================================
--- clang/utils/perf-training/CMakeLists.txt
+++ clang/utils/perf-training/CMakeLists.txt
@@ -62,7 +62,7 @@
     DEPENDS generate-dtrace-logs)
 endif()
 
-if(CLANG_BOLT_INSTRUMENT AND NOT LLVM_BUILD_INSTRUMENTED)
+if(CLANG_BOLT AND NOT LLVM_BUILD_INSTRUMENTED)
   configure_lit_site_cfg(
     ${CMAKE_CURRENT_SOURCE_DIR}/bolt.lit.site.cfg.in
     ${CMAKE_CURRENT_BINARY_DIR}/bolt-fdata/lit.site.cfg
@@ -71,16 +71,38 @@
   add_lit_testsuite(generate-bolt-fdata "Generating BOLT profile for Clang"
     ${CMAKE_CURRENT_BINARY_DIR}/bolt-fdata/
     EXCLUDE_FROM_CHECK_ALL
-    DEPENDS clang-instrumented clear-bolt-fdata
+    DEPENDS clang-bolt-training-deps clear-bolt-fdata clear-perf-data
     )
 
   add_custom_target(clear-bolt-fdata
     COMMAND "${Python3_EXECUTABLE}" ${CMAKE_CURRENT_SOURCE_DIR}/perf-helper.py clean ${CMAKE_CURRENT_BINARY_DIR} fdata
     COMMENT "Clearing old BOLT fdata")
 
+  add_custom_target(clear-perf-data
+    COMMAND "${Python3_EXECUTABLE}" ${CMAKE_CURRENT_SOURCE_DIR}/perf-helper.py clean ${CMAKE_CURRENT_BINARY_DIR} perf.data
+    COMMENT "Clearing old perf data")
+
+  string(TOUPPER "${CLANG_BOLT}" uppercase_CLANG_BOLT)
+  # Pass extra flag in no-LBR mode
+  if (uppercase_CLANG_BOLT STREQUAL "PERF")
+    set(BOLT_NO_LBR "--nolbr")
+  endif()
+
+  add_custom_target(merge-fdata-deps)
+  if (uppercase_CLANG_BOLT STREQUAL "INSTRUMENT")
+    add_dependencies(merge-fdata-deps generate-bolt-fdata)
+  else()
+    # Convert perf profiles into fdata
+    add_custom_target(convert-perf-fdata
+      COMMAND "${Python3_EXECUTABLE}" ${CMAKE_CURRENT_SOURCE_DIR}/perf-helper.py perf2bolt $<TARGET_FILE:llvm-bolt> ${CMAKE_CURRENT_BINARY_DIR} $<TARGET_FILE:clang> ${BOLT_NO_LBR}
+      COMMENT "Converting perf files to BOLT fdata"
+      DEPENDS llvm-bolt generate-bolt-fdata)
+    add_dependencies(merge-fdata-deps convert-perf-fdata)
+  endif()
+
   # Merge profiles into one using merge-fdata
   add_custom_target(clang-bolt-profile
     COMMAND "${Python3_EXECUTABLE}" ${CMAKE_CURRENT_SOURCE_DIR}/perf-helper.py merge-fdata $<TARGET_FILE:merge-fdata> ${CMAKE_CURRENT_BINARY_DIR}/prof.fdata ${CMAKE_CURRENT_BINARY_DIR}
     COMMENT "Merging BOLT fdata"
-    DEPENDS merge-fdata generate-bolt-fdata)
+    DEPENDS merge-fdata merge-fdata-deps)
 endif()
Index: clang/cmake/caches/BOLT.cmake
===================================================================
--- clang/cmake/caches/BOLT.cmake
+++ clang/cmake/caches/BOLT.cmake
@@ -1,5 +1,5 @@
 set(CMAKE_BUILD_TYPE Release CACHE STRING "")
-set(CLANG_BOLT_INSTRUMENT ON CACHE BOOL "")
+set(CLANG_BOLT "INSTRUMENT" CACHE STRING "")
 set(CMAKE_EXE_LINKER_FLAGS "-Wl,--emit-relocs,-znow" CACHE STRING "")
 
 set(LLVM_ENABLE_PROJECTS "bolt;clang" CACHE STRING "")
Index: clang/CMakeLists.txt
===================================================================
--- clang/CMakeLists.txt
+++ clang/CMakeLists.txt
@@ -849,23 +849,38 @@
   endforeach()
 endif()
 
-if (CLANG_BOLT_INSTRUMENT AND NOT LLVM_BUILD_INSTRUMENTED)
+set(CLANG_BOLT "INSTRUMENT" CACHE STRING "Apply BOLT optimization to Clang. \
+  May be specified as Instrument or Perf or LBR to use a particular profiling \
+  mechanism.")
+string(TOUPPER "${CLANG_BOLT}" uppercase_CLANG_BOLT)
+
+if (CLANG_BOLT AND NOT LLVM_BUILD_INSTRUMENTED)
   set(CLANG_PATH ${LLVM_RUNTIME_OUTPUT_INTDIR}/clang)
   set(CLANG_INSTRUMENTED ${CLANG_PATH}-bolt.inst)
   set(BOLT_FDATA ${CMAKE_CURRENT_BINARY_DIR}/utils/perf-training/prof.fdata)
 
-  # Instrument clang with BOLT
-  add_custom_target(clang-instrumented
-    DEPENDS ${CLANG_INSTRUMENTED}
-  )
-  add_custom_command(OUTPUT ${CLANG_INSTRUMENTED}
-    DEPENDS clang llvm-bolt
-    COMMAND llvm-bolt ${CLANG_PATH} -o ${CLANG_INSTRUMENTED}
-      -instrument --instrumentation-file-append-pid
-      --instrumentation-file=${BOLT_FDATA}
-    COMMENT "Instrumenting clang binary with BOLT"
-    VERBATIM
-  )
+  # Pass extra flag in no-LBR mode
+  if (uppercase_CLANG_BOLT STREQUAL "PERF")
+    set(BOLT_NO_LBR "-nl")
+  endif()
+
+  if (uppercase_CLANG_BOLT STREQUAL "INSTRUMENT")
+    # Instrument clang with BOLT
+    add_custom_target(clang-instrumented
+      DEPENDS ${CLANG_INSTRUMENTED}
+    )
+    add_custom_command(OUTPUT ${CLANG_INSTRUMENTED}
+      DEPENDS clang llvm-bolt
+      COMMAND llvm-bolt ${CLANG_PATH} -o ${CLANG_INSTRUMENTED}
+        -instrument --instrumentation-file-append-pid
+        --instrumentation-file=${BOLT_FDATA}
+      COMMENT "Instrumenting clang binary with BOLT"
+      VERBATIM
+    )
+    add_custom_target(clang-bolt-training-deps DEPENDS clang-instrumented)
+  else() # perf or LBR
+    add_custom_target(clang-bolt-training-deps DEPENDS clang)
+  endif()
 
   # Optimize original (pre-bolt) Clang using the collected profile
   set(CLANG_OPTIMIZED ${CMAKE_CURRENT_BINARY_DIR}/clang.bolt)
@@ -879,6 +894,7 @@
       -data ${BOLT_FDATA}
       -reorder-blocks=ext-tsp -reorder-functions=hfsort+ -split-functions
       -split-all-cold -split-eh -dyno-stats -icf=1 -use-gnu-stack
+      ${BOLT_NO_LBR}
     COMMAND ${CMAKE_COMMAND} -E rename ${CLANG_OPTIMIZED} $<TARGET_FILE:clang>
     COMMENT "Optimizing Clang with BOLT"
     VERBATIM
_______________________________________________
cfe-commits mailing list
cfe-commits@lists.llvm.org
https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits

Reply via email to