Amir updated this revision to Diff 481138.
Amir added a comment.
Herald added a project: LLVM.
Herald added a subscriber: llvm-commits.

Documentation


Repository:
  rG LLVM Github Monorepo

CHANGES SINCE LAST ACTION
  https://reviews.llvm.org/D139496/new/

https://reviews.llvm.org/D139496

Files:
  clang/CMakeLists.txt
  clang/cmake/caches/BOLT.cmake
  llvm/docs/AdvancedBuilds.rst

Index: llvm/docs/AdvancedBuilds.rst
===================================================================
--- llvm/docs/AdvancedBuilds.rst
+++ llvm/docs/AdvancedBuilds.rst
@@ -241,6 +241,56 @@
 
   $ ninja stage2-clang-bolt
 
+BOLT profile
+------------
+BOLT uses the profile collected by either Linux `perf` or via BOLT's own
+instrumentation. Both modes are supported by CMake automation, with
+instrumentation being the default in `BOLT.cmake` and `BOLT-PGO.cmake`.
+
+It's strongly recommended to use `perf` if host system supports it as it
+is a significantly faster and potentially more reliable method:
+
+.. code-block:: console
+
+  $ cmake <...> -DCLANG_BOLT_PERF=ON \
+      -C <path to source>/clang/cmake/caches/BOLT.cmake
+
+If the host system supports profiling branch stacks (e.g. AMD or Intel LBR
+(Last Branch Record), Armv9-A BRBE (Branch Record Buffer Extension)), it can be
+enabled with `-DCLANG_BOLT_PERF_LBR` to further improve the profile quality:
+
+.. code-block:: console
+
+  $ cmake <...> -DCLANG_BOLT_PERF=ON -DCLANG_BOLT_PERF_LBR=ON \
+      -C <path to source>/clang/cmake/caches/BOLT.cmake
+
+The following matrix describes supported profiling methods. Note that Linux/ELF
+is the only supported platform.
+
+============ =============== ========== ===================
+Architecture Instrumentation Linux perf Linux perf with LBR
+============ =============== ========== ===================
+x86_64       Yes             Yes        Yes
+AArch64      No              Yes        Not tested
+============ =============== ========== ===================
+
+Profiling targets
+-----------------
+BOLT profile is collected from building one of in-tree projects/targets with
+Clang as a workload. The following configuration options can be used to change
+the profiling build:
+
+**CLANG_BOLT_PROJECTS**
+  Projects to enable in profiling build. Defaults to `llvm`.
+
+**CLANG_BOLT_TARGETS**
+  Targets to build in profiling build. Defaults to `count` in instrumentation
+  build and `FileCheck` in perf-build.
+
+**CLANG_BOLT_EXTRA_CMAKE_FLAGS**
+  Extra CMake flags to pass to profiling build at configuration time.
+
+
 3-Stage Non-Determinism
 =======================
 
Index: clang/cmake/caches/BOLT.cmake
===================================================================
--- clang/cmake/caches/BOLT.cmake
+++ clang/cmake/caches/BOLT.cmake
@@ -1,15 +1,17 @@
 set(CMAKE_BUILD_TYPE Release CACHE STRING "")
 set(CLANG_BOLT_INSTRUMENT ON CACHE BOOL "")
-set(CLANG_BOLT_INSTRUMENT_PROJECTS "llvm" CACHE STRING "")
-set(CLANG_BOLT_INSTRUMENT_TARGETS "count" CACHE STRING "")
+set(CLANG_BOLT_PERF OFF CACHE BOOL "")
+set(CLANG_BOLT_PERF_LBR OFF CACHE BOOL "")
+
+set(CLANG_BOLT_PROJECTS "llvm" CACHE STRING "")
+if (CLANG_BOLT_PERF)
+  set(CLANG_BOLT_INSTRUMENT OFF CACHE BOOL "" FORCE)
+  set(CLANG_BOLT_TARGETS "FileCheck" CACHE STRING "")
+else()
+  set(CLANG_BOLT_TARGETS "count" CACHE STRING "")
+endif()
 set(CMAKE_EXE_LINKER_FLAGS "-Wl,--emit-relocs,-znow" CACHE STRING "")
-set(CLANG_BOLT_INSTRUMENT_EXTRA_CMAKE_FLAGS "" CACHE STRING "")
+set(CLANG_BOLT_EXTRA_CMAKE_FLAGS "" CACHE STRING "")
 
 set(LLVM_ENABLE_PROJECTS "bolt;clang" CACHE STRING "")
 set(LLVM_TARGETS_TO_BUILD Native CACHE STRING "")
-
-# Disable function splitting enabled by default in GCC8+
-if("${CMAKE_CXX_COMPILER_ID}" MATCHES "GNU")
-  set(CMAKE_C_FLAGS "${CMAKE_C_FLAGS} -fno-reorder-blocks-and-partition")
-  set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -fno-reorder-blocks-and-partition")
-endif()
Index: clang/CMakeLists.txt
===================================================================
--- clang/CMakeLists.txt
+++ clang/CMakeLists.txt
@@ -869,67 +869,106 @@
   endforeach()
 endif()
 
-if (CLANG_BOLT_INSTRUMENT AND NOT LLVM_BUILD_INSTRUMENTED)
+if (CLANG_BOLT_INSTRUMENT OR CLANG_BOLT_PERF AND NOT LLVM_BUILD_INSTRUMENTED)
   set(CLANG_PATH ${LLVM_RUNTIME_OUTPUT_INTDIR}/clang)
   set(CLANGXX_PATH ${CLANG_PATH}++)
-  set(CLANG_INSTRUMENTED ${CLANG_PATH}-bolt.inst)
-  set(CLANGXX_INSTRUMENTED ${CLANGXX_PATH}-bolt.inst)
   set(CLANG_OPTIMIZED ${CLANG_PATH}-bolt)
   set(CLANGXX_OPTIMIZED ${CLANGXX_PATH}-bolt)
 
-  # Instrument clang with BOLT
-  add_custom_target(clang-instrumented
-    DEPENDS ${CLANG_INSTRUMENTED}
-  )
-  add_custom_command(OUTPUT ${CLANG_INSTRUMENTED}
-    DEPENDS clang llvm-bolt
-    COMMAND llvm-bolt ${CLANG_PATH} -o ${CLANG_INSTRUMENTED}
-      -instrument --instrumentation-file-append-pid
-      --instrumentation-file=${CMAKE_CURRENT_BINARY_DIR}/prof.fdata
-    COMMENT "Instrumenting clang binary with BOLT"
-    VERBATIM
-  )
+  if (CLANG_BOLT_INSTRUMENT)
+    set(CLANG_INSTRUMENTED ${CLANG_PATH}-bolt.inst)
+    set(CLANGXX_INSTRUMENTED ${CLANGXX_PATH}-bolt.inst)
 
-  # Make a symlink from clang-bolt.inst to clang++-bolt.inst
-  add_custom_target(clang++-instrumented
-    DEPENDS ${CLANGXX_INSTRUMENTED}
-  )
-  add_custom_command(OUTPUT ${CLANGXX_INSTRUMENTED}
-    DEPENDS clang-instrumented
-    COMMAND ${CMAKE_COMMAND} -E create_symlink
-      ${CLANG_INSTRUMENTED}
-      ${CLANGXX_INSTRUMENTED}
-    COMMENT "Creating symlink from BOLT instrumented clang to clang++"
-    VERBATIM
-  )
+    # Instrument clang with BOLT
+    add_custom_target(clang-instrumented
+      DEPENDS ${CLANG_INSTRUMENTED}
+    )
+    add_custom_command(OUTPUT ${CLANG_INSTRUMENTED}
+      DEPENDS clang llvm-bolt
+      COMMAND llvm-bolt ${CLANG_PATH} -o ${CLANG_INSTRUMENTED}
+        -instrument --instrumentation-file-append-pid
+        --instrumentation-file=${CMAKE_CURRENT_BINARY_DIR}/prof.fdata
+      COMMAND ${CMAKE_COMMAND} -E create_symlink
+        ${CLANG_INSTRUMENTED}
+        ${CLANGXX_INSTRUMENTED}
+      COMMENT "Instrumenting clang binary with BOLT"
+      VERBATIM
+    )
+  endif()
+
+  # Set variables for profile collection step
+  if (CLANG_BOLT_INSTRUMENT)
+    set(CLANG_BOLT_CC ${CLANG_INSTRUMENTED})
+    set(CLANG_BOLT_CXX ${CLANGXX_INSTRUMENTED})
+  else() # CLANG_BOLT_PERF
+    set(CLANG_BOLT_CC ${CLANG_PATH})
+    set(CLANG_BOLT_CXX ${CLANGXX_PATH})
+
+    # Perf sampling:
+    # - use maximum frequency to reduce training time
+    # - use cycle events instead of branches - empirically found to produce
+    #   better results
+    # - if available, enable taken branch stack/LBR sampling
+    #   (-j/--branch-filter)
+    set(PERF_CMDLINE
+      perf record --event=cycles:u
+      --output=${CMAKE_CURRENT_BINARY_DIR}/prof.data
+      --freq=max
+    )
+    if (CLANG_BOLT_PERF_LBR)
+      list(APPEND PERF_CMDLINE --branch-filter=any,u)
+    endif()
+    list(APPEND PERF_CMDLINE --)
+  endif()
+
+  # Build specified targets to collect the profile
+  add_custom_target(bolt-profile-deps)
+  if (CLANG_BOLT_INSTRUMENT)
+    add_dependencies(bolt-profile-deps clang-instrumented)
+    set(CLANG_BOLT_PROFILE ${CMAKE_CURRENT_BINARY_DIR}/prof.fdata)
+  else()
+    add_dependencies(bolt-profile-deps clang)
+    set(CLANG_BOLT_PROFILE ${CMAKE_CURRENT_BINARY_DIR}/prof.data)
+  endif()
+  set(STAMP_DIR ${CMAKE_CURRENT_BINARY_DIR}/bolt-clang-stamps/)
+  set(BINARY_DIR ${CMAKE_CURRENT_BINARY_DIR}/bolt-clang-bins/)
+  add_custom_target(bolt-clang-clear
+    DEPENDS ${CMAKE_CURRENT_BINARY_DIR}/bolt-clang-cleared
+    )
+  add_custom_command(
+    OUTPUT ${CMAKE_CURRENT_BINARY_DIR}/bolt-clang-cleared
+    DEPENDS bolt-profile-deps
+    COMMAND ${CMAKE_COMMAND} -E remove_directory ${BINARY_DIR}
+    COMMAND ${CMAKE_COMMAND} -E make_directory ${BINARY_DIR}
+    COMMAND ${CMAKE_COMMAND} -E remove_directory ${STAMP_DIR}
+    COMMAND ${CMAKE_COMMAND} -E make_directory ${STAMP_DIR}
+    COMMENT "Clobberring bolt-clang build and stamp directories"
+    )
 
-  # Build specified targets with instrumented Clang to collect the profile
-  set(STAMP_DIR ${CMAKE_CURRENT_BINARY_DIR}/bolt-instrumented-clang-stamps/)
-  set(BINARY_DIR ${CMAKE_CURRENT_BINARY_DIR}/bolt-instrumented-clang-bins/)
   set(build_configuration "$<CONFIG>")
   include(ExternalProject)
-  ExternalProject_Add(bolt-instrumentation-profile
-    DEPENDS clang++-instrumented
-    PREFIX bolt-instrumentation-profile
+  ExternalProject_Add(bolt-profile
+    DEPENDS bolt-profile-deps
+    PREFIX bolt-profile
     SOURCE_DIR ${CMAKE_SOURCE_DIR}
     STAMP_DIR ${STAMP_DIR}
     BINARY_DIR ${BINARY_DIR}
     EXCLUDE_FROM_ALL 1
     CMAKE_ARGS
-                ${CLANG_BOLT_INSTRUMENT_EXTRA_CMAKE_FLAGS}
+                ${CLANG_BOLT_EXTRA_CMAKE_FLAGS}
                 # We shouldn't need to set this here, but INSTALL_DIR doesn't
                 # seem to work, so instead I'm passing this through
                 -DCMAKE_INSTALL_PREFIX=${CMAKE_INSTALL_PREFIX}
-                -DCMAKE_C_COMPILER=${CLANG_INSTRUMENTED}
-                -DCMAKE_CXX_COMPILER=${CLANGXX_INSTRUMENTED}
-                -DCMAKE_ASM_COMPILER=${CLANG_INSTRUMENTED}
+                -DCMAKE_C_COMPILER=${CLANG_BOLT_CC}
+                -DCMAKE_CXX_COMPILER=${CLANG_BOLT_CXX}
+                -DCMAKE_ASM_COMPILER=${CLANG_BOLT_CC}
                 -DCMAKE_ASM_COMPILER_ID=Clang
-                -DCMAKE_BUILD_TYPE=Release
-                -DLLVM_ENABLE_PROJECTS=${CLANG_BOLT_INSTRUMENT_PROJECTS}
+                -DCMAKE_BUILD_TYPE=${CMAKE_BUILD_TYPE}
+                -DLLVM_ENABLE_PROJECTS=${CLANG_BOLT_PROJECTS}
                 -DLLVM_TARGETS_TO_BUILD=${LLVM_TARGETS_TO_BUILD}
-    BUILD_COMMAND ${CMAKE_COMMAND} --build ${BINARY_DIR}
+    BUILD_COMMAND ${PERF_CMDLINE} ${CMAKE_COMMAND} --build ${BINARY_DIR}
                                    --config ${build_configuration}
-                                   --target ${CLANG_BOLT_INSTRUMENT_TARGETS}
+                                   --target ${CLANG_BOLT_TARGETS}
     INSTALL_COMMAND ""
     STEP_TARGETS configure build
     USES_TERMINAL_CONFIGURE 1
@@ -937,21 +976,31 @@
     USES_TERMINAL_INSTALL 1
   )
 
-  # Merge profiles into one using merge-fdata
   add_custom_target(clang-bolt-profile
-    DEPENDS ${CMAKE_CURRENT_BINARY_DIR}/prof.fdata
-  )
-  add_custom_command(OUTPUT ${CMAKE_CURRENT_BINARY_DIR}/prof.fdata
-    DEPENDS merge-fdata bolt-instrumentation-profile-build
-    WORKING_DIRECTORY ${CMAKE_CURRENT_BINARY_DIR}
-    COMMAND ${Python3_EXECUTABLE}
-      ${CMAKE_CURRENT_SOURCE_DIR}/utils/perf-training/perf-helper.py merge-fdata
-      $<TARGET_FILE:merge-fdata> ${CMAKE_CURRENT_BINARY_DIR}/prof.fdata
-      ${CMAKE_CURRENT_BINARY_DIR}
-    COMMENT "Preparing BOLT profile"
-    VERBATIM
+    DEPENDS ${CLANG_BOLT_PROFILE}
   )
+  if (CLANG_BOLT_INSTRUMENT)
+    # Merge profiles into one using merge-fdata
+    add_custom_command(OUTPUT ${CLANG_BOLT_PROFILE}
+      DEPENDS bolt-profile-build merge-fdata
+      WORKING_DIRECTORY ${CMAKE_CURRENT_BINARY_DIR}
+      COMMAND ${Python3_EXECUTABLE}
+        ${CMAKE_CURRENT_SOURCE_DIR}/utils/perf-training/perf-helper.py merge-fdata
+        $<TARGET_FILE:merge-fdata> prof.fdata .
+      COMMENT "Preparing BOLT profile"
+      VERBATIM
+    )
+  else() # CLANG_BOLT_PERF
+    # Profile is produced by perf by running the build
+    add_custom_command(OUTPUT ${CLANG_BOLT_PROFILE}
+      DEPENDS bolt-profile-build
+    )
+  endif()
 
+  # Pass extra flag in no-LBR mode
+  if (CLANG_BOLT_PERF AND NOT CLANG_BOLT_PERF_LBR)
+    set(CLANG_BOLT_NO_LBR "-nl")
+  endif()
   # Optimize original (pre-bolt) Clang using the collected profile
   add_custom_target(clang-bolt
     DEPENDS ${CLANG_OPTIMIZED}
@@ -960,9 +1009,10 @@
     DEPENDS clang-bolt-profile
     COMMAND llvm-bolt ${CLANG_PATH}
       -o ${CLANG_OPTIMIZED}
-      -data ${CMAKE_CURRENT_BINARY_DIR}/prof.fdata
+      -data ${CLANG_BOLT_PROFILE}
       -reorder-blocks=ext-tsp -reorder-functions=hfsort+ -split-functions
-      -split-all-cold -split-eh -dyno-stats -icf=1 -use-gnu-stack
+      -split-all-cold -split-eh -dyno-stats -icf=1 -use-gnu-stack -plt=hot
+      ${CLANG_BOLT_NO_LBR}
     COMMAND ${CMAKE_COMMAND} -E rename ${CLANG_OPTIMIZED} ${CLANG_PATH}-${CLANG_VERSION_MAJOR}
     COMMENT "Optimizing Clang with BOLT"
     VERBATIM
_______________________________________________
cfe-commits mailing list
cfe-commits@lists.llvm.org
https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits

Reply via email to