https://github.com/pawosm-arm updated https://github.com/llvm/llvm-project/pull/197947
>From c068a33916b7fdaedf69fe9c695b370f81e2d996 Mon Sep 17 00:00:00 2001 From: Paul Osmialowski <[email protected]> Date: Mon, 11 May 2026 08:11:25 +0000 Subject: [PATCH] [flang][cmake][perf-training] Optimize flang with PGO and BOLT This is an attempt to replicate similar fearture already available to clang. The changes in this patch were made with an intent to reuse as much of existing infrastructure as possible. Namely, two-stage build arrangement, perf-helper.py script and the means for building of the instrumented binaries have been all incorporated in this approach. It was deliberately chosen to optimize clang along with flang as they are mostly working together in the final toolchain. See the flang/cmake/caches/README.txt for more details. --- clang/CMakeLists.txt | 12 +- flang/CMakeLists.txt | 1 + flang/cmake/caches/BOLT-PGO.cmake | 21 +++ flang/cmake/caches/BOLT.cmake | 18 +++ .../caches/PGO-stage2-instrumented.cmake | 26 ++++ flang/cmake/caches/PGO-stage2.cmake | 3 + flang/cmake/caches/PGO.cmake | 36 +++++ flang/cmake/caches/README.txt | 97 ++++++++++++ flang/tools/flang-driver/CMakeLists.txt | 73 +++++++++ flang/utils/perf-training/CMakeLists.txt | 140 ++++++++++++++++++ flang/utils/perf-training/README.txt | 6 + flang/utils/perf-training/bolt.lit.cfg | 59 ++++++++ .../utils/perf-training/bolt.lit.site.cfg.in | 20 +++ .../utils/perf-training/f90/hello-openmp.f95 | 14 ++ flang/utils/perf-training/f90/hello-world.f95 | 8 + flang/utils/perf-training/f90/hello.f | 24 +++ .../perf-training/f90/module_torture.f95 | 47 ++++++ flang/utils/perf-training/f90/sincos.f95 | 24 +++ .../perf-training/flang-modules/build.test | 10 ++ flang/utils/perf-training/lit.cfg | 50 +++++++ flang/utils/perf-training/lit.site.cfg.in | 18 +++ flang/utils/perf-training/order-files.lit.cfg | 43 ++++++ .../perf-training/order-files.lit.site.cfg.in | 13 ++ 23 files changed, 759 insertions(+), 4 deletions(-) create mode 100644 flang/cmake/caches/BOLT-PGO.cmake create mode 100644 flang/cmake/caches/BOLT.cmake create mode 100644 flang/cmake/caches/PGO-stage2-instrumented.cmake create mode 100644 flang/cmake/caches/PGO-stage2.cmake create mode 100644 flang/cmake/caches/PGO.cmake create mode 100644 flang/cmake/caches/README.txt create mode 100644 flang/utils/perf-training/CMakeLists.txt create mode 100644 flang/utils/perf-training/README.txt create mode 100644 flang/utils/perf-training/bolt.lit.cfg create mode 100644 flang/utils/perf-training/bolt.lit.site.cfg.in create mode 100644 flang/utils/perf-training/f90/hello-openmp.f95 create mode 100644 flang/utils/perf-training/f90/hello-world.f95 create mode 100644 flang/utils/perf-training/f90/hello.f create mode 100644 flang/utils/perf-training/f90/module_torture.f95 create mode 100644 flang/utils/perf-training/f90/sincos.f95 create mode 100644 flang/utils/perf-training/flang-modules/build.test create mode 100644 flang/utils/perf-training/lit.cfg create mode 100644 flang/utils/perf-training/lit.site.cfg.in create mode 100644 flang/utils/perf-training/order-files.lit.cfg create mode 100644 flang/utils/perf-training/order-files.lit.site.cfg.in diff --git a/clang/CMakeLists.txt b/clang/CMakeLists.txt index 40506fc8a1546..022899cf67fe4 100644 --- a/clang/CMakeLists.txt +++ b/clang/CMakeLists.txt @@ -806,11 +806,15 @@ if (CLANG_ENABLE_BOOTSTRAP) if(LLVM_BUILD_INSTRUMENTED) string(TOUPPER "${LLVM_BUILD_INSTRUMENTED}" LLVM_BUILD_INSTRUMENTED) if (LLVM_BUILD_INSTRUMENTED STREQUAL "CSSPGO") - add_dependencies(clang-bootstrap-deps generate-sprofdata) - set(PGO_OPT -DLLVM_SPROFDATA_FILE=${CMAKE_CURRENT_BINARY_DIR}/utils/perf-training/clang.sprofdata) + set(PGO_OPT_SPROFDATA "${CMAKE_CURRENT_BINARY_DIR}/utils/perf-training/clang.sprofdata" CACHE STRING "") + set(PGO_OPT_SPROFDATA_PROVIDER generate-sprofdata CACHE STRING "") + add_dependencies(clang-bootstrap-deps ${PGO_OPT_SPROFDATA_PROVIDER}) + set(PGO_OPT -DLLVM_SPROFDATA_FILE=${PGO_OPT_SPROFDATA}) else() - add_dependencies(clang-bootstrap-deps generate-profdata) - set(PGO_OPT -DLLVM_PROFDATA_FILE=${CMAKE_CURRENT_BINARY_DIR}/utils/perf-training/clang.profdata) + set(PGO_OPT_PROFDATA "${CMAKE_CURRENT_BINARY_DIR}/utils/perf-training/clang.profdata" CACHE STRING "") + set(PGO_OPT_PROFDATA_PROVIDER generate-profdata CACHE STRING "") + add_dependencies(clang-bootstrap-deps ${PGO_OPT_PROFDATA_PROVIDER}) + set(PGO_OPT -DLLVM_PROFDATA_FILE=${PGO_OPT_PROFDATA}) endif() # Use the current tools for LTO instead of the instrumented ones list(APPEND _BOOTSTRAP_DEFAULT_PASSTHROUGH diff --git a/flang/CMakeLists.txt b/flang/CMakeLists.txt index be0b1f3d9b270..ab776172ebc86 100644 --- a/flang/CMakeLists.txt +++ b/flang/CMakeLists.txt @@ -513,6 +513,7 @@ if (FLANG_INCLUDE_TESTS) if (FLANG_GTEST_AVAIL) add_subdirectory(unittests) endif () + add_subdirectory(utils/perf-training) endif() option(FLANG_INCLUDE_DOCS "Generate build targets for the Flang docs." diff --git a/flang/cmake/caches/BOLT-PGO.cmake b/flang/cmake/caches/BOLT-PGO.cmake new file mode 100644 index 0000000000000..95373275ef22b --- /dev/null +++ b/flang/cmake/caches/BOLT-PGO.cmake @@ -0,0 +1,21 @@ +set(BOLT_PGO_CMAKE_CACHE "PGO" CACHE STRING "") +set(LLVM_ENABLE_PROJECTS "bolt;clang;flang;lld" CACHE STRING "") +set(LLVM_ENABLE_RUNTIMES "compiler-rt;flang-rt;libunwind;openmp" CACHE STRING "") + +set(CLANG_BOOTSTRAP_TARGETS + stage2-clang-bolt + stage2-flang-bolt + stage2-distribution + stage2-install-distribution + CACHE STRING "") +set(BOOTSTRAP_CLANG_BOOTSTRAP_TARGETS + clang-bolt + flang-bolt + distribution + install-distribution + CACHE STRING "") + +set(PGO_BUILD_CONFIGURATION + ${CMAKE_CURRENT_LIST_DIR}/BOLT.cmake + CACHE STRING "") +include(${CMAKE_CURRENT_LIST_DIR}/${BOLT_PGO_CMAKE_CACHE}.cmake) diff --git a/flang/cmake/caches/BOLT.cmake b/flang/cmake/caches/BOLT.cmake new file mode 100644 index 0000000000000..b8c5bf4019cdc --- /dev/null +++ b/flang/cmake/caches/BOLT.cmake @@ -0,0 +1,18 @@ +set(CMAKE_BUILD_TYPE Release CACHE STRING "") +set(CLANG_BOLT "INSTRUMENT" CACHE STRING "") +set(FLANG_BOLT ${CLANG_BOLT} CACHE STRING "") +set(CMAKE_EXE_LINKER_FLAGS "-Wl,--emit-relocs,-znow" CACHE STRING "") +set(CMAKE_SHARED_LINKER_FLAGS "-Wl,--emit-relocs,-znow" CACHE STRING "") + +set(LLVM_ENABLE_PROJECTS "bolt;clang;flang" CACHE STRING "") +set(LLVM_ENABLE_RUNTIMES "compiler-rt;flang-rt;libunwind;openmp" CACHE STRING "") +set(LLVM_TARGETS_TO_BUILD Native CACHE STRING "") + +# setup toolchain +set(LLVM_INSTALL_TOOLCHAIN_ONLY ON CACHE BOOL "") +set(LLVM_DISTRIBUTION_COMPONENTS + clang + clang-resource-headers + flang + runtimes + CACHE STRING "") diff --git a/flang/cmake/caches/PGO-stage2-instrumented.cmake b/flang/cmake/caches/PGO-stage2-instrumented.cmake new file mode 100644 index 0000000000000..334ab4a0e188d --- /dev/null +++ b/flang/cmake/caches/PGO-stage2-instrumented.cmake @@ -0,0 +1,26 @@ +set(CLANG_ENABLE_BOOTSTRAP ON CACHE BOOL "") +set(CLANG_BOOTSTRAP_TARGETS + distribution + install-distribution + install-distribution-toolchain + check-all + check-llvm + check-clang + check-flang + test-suite CACHE STRING "") +set(FLANG_PGO_TRAINING_CLANG_COUPLING ON CACHE BOOL "") +set(PGO_OPT_PROFDATA "${CMAKE_BINARY_DIR}/flang.profdata" CACHE STRING "") +set(PGO_OPT_PROFDATA_PROVIDER generate-flang-profdata CACHE STRING "") + +if(PGO_BUILD_CONFIGURATION) + include(${PGO_BUILD_CONFIGURATION}) + set(CLANG_BOOTSTRAP_CMAKE_ARGS + -C ${PGO_BUILD_CONFIGURATION} + CACHE STRING "") +else() + include(${CMAKE_CURRENT_LIST_DIR}/PGO-stage2.cmake) + + set(CLANG_BOOTSTRAP_CMAKE_ARGS + -C ${CMAKE_CURRENT_LIST_DIR}/PGO-stage2.cmake + CACHE STRING "") +endif() diff --git a/flang/cmake/caches/PGO-stage2.cmake b/flang/cmake/caches/PGO-stage2.cmake new file mode 100644 index 0000000000000..1f92a6de4d22d --- /dev/null +++ b/flang/cmake/caches/PGO-stage2.cmake @@ -0,0 +1,3 @@ +set(CMAKE_BUILD_TYPE RELEASE CACHE STRING "") +set(LLVM_ENABLE_PROJECTS "clang;flang;lld" CACHE STRING "") +set(LLVM_ENABLE_RUNTIMES "compiler-rt;flang-rt;libcxx;libcxxabi;libunwind;openmp" CACHE STRING "") diff --git a/flang/cmake/caches/PGO.cmake b/flang/cmake/caches/PGO.cmake new file mode 100644 index 0000000000000..b2c9379c89213 --- /dev/null +++ b/flang/cmake/caches/PGO.cmake @@ -0,0 +1,36 @@ +set(CMAKE_BUILD_TYPE RELEASE CACHE STRING "") +set(CLANG_ENABLE_BOOTSTRAP ON CACHE BOOL "") + +set(LLVM_ENABLE_PROJECTS "clang;flang;lld" CACHE STRING "") +set(LLVM_ENABLE_RUNTIMES "compiler-rt;flang-rt;libcxx;libcxxabi;libunwind;openmp" CACHE STRING "") + +set(LLVM_TARGETS_TO_BUILD Native CACHE STRING "") +set(BOOTSTRAP_LLVM_BUILD_INSTRUMENTED IR CACHE BOOL "") +set(CLANG_BOOTSTRAP_TARGETS + generate-flang-profdata + stage2 + stage2-distribution + stage2-install-distribution + stage2-install-distribution-toolchain + stage2-check-all + stage2-check-llvm + stage2-check-clang + stage2-check-flang + stage2-test-suite CACHE STRING "") +set(FLANG_PGO_TRAINING_CLANG_COUPLING ON CACHE BOOL "") +set(PGO_OPT_PROFDATA "${CMAKE_BINARY_DIR}/flang.profdata" CACHE STRING "") +set(PGO_OPT_PROFDATA_PROVIDER generate-flang-profdata CACHE STRING "") + +if(PGO_INSTRUMENT_LTO) + set(BOOTSTRAP_LLVM_ENABLE_LTO ${PGO_INSTRUMENT_LTO} CACHE BOOL "") + set(BOOTSTRAP_BOOTSTRAP_LLVM_ENABLE_LTO ${PGO_INSTRUMENT_LTO} CACHE BOOL "") +endif() + +if(PGO_BUILD_CONFIGURATION) + set(EXTRA_ARGS -DPGO_BUILD_CONFIGURATION=${PGO_BUILD_CONFIGURATION}) +endif() + +set(CLANG_BOOTSTRAP_CMAKE_ARGS + ${EXTRA_ARGS} + -C ${CMAKE_CURRENT_LIST_DIR}/PGO-stage2-instrumented.cmake + CACHE STRING "") diff --git a/flang/cmake/caches/README.txt b/flang/cmake/caches/README.txt new file mode 100644 index 0000000000000..52a0f762f68db --- /dev/null +++ b/flang/cmake/caches/README.txt @@ -0,0 +1,97 @@ +CMake Caches +============ + +This directory contains CMake cache scripts that pre-populate the CMakeCache in +a build directory with commonly used settings. + +You can use the caches files with the following CMake invocation: + +cmake -G <build system> + -C <path to cache file> + [additional CMake options (i.e. -DCMAKE_INSTALL_PREFIX=<install path>)] + <path to llvm> + +Options specified on the command line will override options in the cache files. + +The following cache files exist. + +BOLT +---- + +The post-link binaries optimization using runtime profiling. Note that the +dependencies for optimizing flang were deliberately set to optimize clang too. +You can configure your build directory with the following invocation of CMake: + +cmake -G <generator> -C <path_to_flang>/cmake/caches/BOLT.cmake <source dir> + +After configuration the following additional targets will be generated: + +clang-bolt: +After building clang, it will be executed over the training test suite and +optimized with BOLT. + +flang-bolt: +After building clang and flang, they will be executed over the training test +suite and optimized with BOLT. + +PGO +--- + +The PGO CMake cache can be used to generate a multi-stage instrumented compiler. +You can configure your build directory with the following invocation of CMake: + +cmake -G <generator> -C <path_to_clang>/cmake/caches/PGO.cmake <source dir> + +After configuration the following additional targets will be generated: + +stage2-instrumented: +Builds a stage1 compiler, runtime, and required tools (llvm-config, +llvm-profdata) then uses that compiler to build an instrumented stage2 compiler. + +stage2-instrumented-generate-profdata: +Depends on "stage2-instrumented" and will use the instrumented compiler to +generate profdata based on the training files in <compiler>/utils/perf-training + +stage2: +Depends on "stage2-instrumented-generate-profdata" and will use the stage1 +compiler with the stage2 profdata to build a PGO-optimized compiler. + +BOLT-PGO +-------- + +This combines both BOLT and PGO for better optimization. Note that the +dependencies for optimizing flang were deliberately set to optimize clang too. +You can configure your build directory with the following invocation of CMake: + +cmake -G <generator> <source dir> \ + -C <path_to_flang>/cmake/caches/BOLT-PGO.cmake \ + -DBOOTSTRAP_LLVM_ENABLE_LLD=ON \ + -DBOOTSTRAP_BOOTSTRAP_LLVM_ENABLE_LLD=ON \ + -DPGO_INSTRUMENT_LTO=Thin + +After configuration the following additional targets will be generated: + +stage2-clang-bolt: +After doing PGO optimization, the PGO-optimized clang binary is being bolted. + +stage2-flang-bolt: +After doing PGO optimization, the PGO-optimized clang and flang binaries are +being bolted. + +Using an external test suite +---------------------------- + +An external test suite (namely, llvm-test-suite) can be used in order to provide +a larger set of training data. It can be configured for training clang, flang, +or both. For example: + +cmake -G Ninja $HOME/llvm-project/llvm \ + -C $HOME/llvm-project/flang/cmake/caches/BOLT-PGO.cmake \ + -DBOOTSTRAP_LLVM_ENABLE_LLD=ON \ + -DBOOTSTRAP_BOOTSTRAP_LLVM_ENABLE_LLD=ON \ + -DBOOTSTRAP_CLANG_PGO_TRAINING_DATA_SOURCE_DIR=$HOME/llvm-test-suite \ + -DBOOTSTRAP_CLANG_PGO_TRAINING_DEPS=llvm-size \ + -DBOOTSTRAP_FLANG_PGO_TRAINING_DATA_SOURCE_DIR=$HOME/llvm-test-suite \ + -DBOOTSTRAP_FLANG_PGO_TRAINING_DATA_SOURCE_CMAKE_ARGS="-DTEST_SUITE_SUBDIRS=Fortran" \ + -DBOOTSTRAP_FLANG_PGO_TRAINING_DEPS=llvm-size \ + -DPGO_INSTRUMENT_LTO=Thin diff --git a/flang/tools/flang-driver/CMakeLists.txt b/flang/tools/flang-driver/CMakeLists.txt index 4dfc0d40cd55d..9bedee7f0b799 100644 --- a/flang/tools/flang-driver/CMakeLists.txt +++ b/flang/tools/flang-driver/CMakeLists.txt @@ -12,9 +12,28 @@ set( LLVM_LINK_COMPONENTS TargetParser ) +set(FLANG_BOLT_ALLOWLIST INSTRUMENT PERF LBR) +set(FLANG_BOLT OFF CACHE STRING "Apply BOLT optimization to flang. \ +May be specified as one of ${FLANG_BOLT_ALLOWLIST} to use a particular profiling \ + mechanism.") +string(TOUPPER "${FLANG_BOLT}" FLANG_BOLT) +if (FLANG_BOLT AND NOT FLANG_BOLT IN_LIST FLANG_BOLT_ALLOWLIST) + message(FATAL_ERROR "Specified FLANG_BOLT value '${FLANG_BOLT}' is not one of ${FLANG_BOLT_ALLOWLIST}.") +endif() + +if (FLANG_BOLT AND NOT LLVM_BUILD_INSTRUMENTED) + set(FLANG_BOLT_DEPS clear-flang-bolt-fdata llvm-bolt llvm-readobj clang-bolt) + if (NOT FLANG_BOLT STREQUAL "INSTRUMENT") + list(APPEND FLANG_BOLT_DEPS clear-flang-perf-data) + endif() +endif() + add_flang_tool(flang driver.cpp fc1_main.cpp + + DEPENDS + ${FLANG_BOLT_DEPS} ) target_link_libraries(flang @@ -46,6 +65,60 @@ endif() install(TARGETS flang DESTINATION "${CMAKE_INSTALL_BINDIR}") +if (FLANG_BOLT AND NOT LLVM_BUILD_INSTRUMENTED) + # Add a flang-bolt target for backwards compatibility. + add_custom_target(flang-bolt DEPENDS flang) + + set(FLANG_BOLT_INSTRUMENTED "flang-bolt.inst" CACHE STRING + "Name of BOLT-instrumented flang binary") + set(FLANG_INSTRUMENTED ${LLVM_RUNTIME_OUTPUT_INTDIR}/${FLANG_BOLT_INSTRUMENTED}) + set(PERF_TRAINING_BINARY_DIR ${CMAKE_CURRENT_BINARY_DIR}/../../utils/perf-training) + set(FLANG_BOLT_FDATA ${PERF_TRAINING_BINARY_DIR}/flang-prof.fdata) + get_llvm_lit_path( + lit_base_dir + lit_file_name + ALLOW_EXTERNAL + ) + set(LIT_COMMAND "${lit_base_dir}/${lit_file_name}") + + set(FLANG_BOLT_INPUTS $<TARGET_FILE:flang>) + set(FLANG_INSTRUMENTED_OUTPUTS ${FLANG_INSTRUMENTED}) + + # Add in dynamically linked libraries, if needs be. Currently only supported + # on Linux because it relies on LD_PRELOAD for instrumentation. + if (CMAKE_SYSTEM_NAME STREQUAL "Linux") + if (LLVM_LINK_LLVM_DYLIB) + set(LLVM_BOLT_INSTRUMENTED "LLVM-bolt.inst" CACHE STRING + "Name of BOLT-instrumented LLVM library") + set(LLVM_INSTRUMENTED ${LLVM_RUNTIME_OUTPUT_INTDIR}/${LLVM_BOLT_INSTRUMENTED}) + list(APPEND FLANG_BOLT_INPUTS $<TARGET_FILE:LLVM>) + list(APPEND FLANG_INSTRUMENTED_OUTPUTS ${LLVM_INSTRUMENTED}) + endif() + endif() + + # This POST_BUILD command is executed unconditionally even if the flang target + # is already built. We need to wrap the whole bolt optimization process in + # a single python wrapper, so that we can first check if the binary has + # already been optimized and then exit early with a 0 status if it has. + add_custom_command( + TARGET flang POST_BUILD + COMMAND "${Python3_EXECUTABLE}" ${LLVM_MAIN_SRC_DIR}/../clang/utils/perf-training/perf-helper.py + bolt-optimize + --method ${FLANG_BOLT} + --input "${FLANG_BOLT_INPUTS}" + --instrumented-output "${FLANG_INSTRUMENTED_OUTPUTS}" + --fdata ${FLANG_BOLT_FDATA} + --perf-training-binary-dir ${PERF_TRAINING_BINARY_DIR} + --readelf $<TARGET_FILE:llvm-readobj> + --bolt $<TARGET_FILE:llvm-bolt> + --lit "${LIT_COMMAND}" + --merge-fdata $<TARGET_FILE:merge-fdata> + COMMENT "Optimizing flang with BOLT" + USES_TERMINAL + VERBATIM + ) +endif() + # Keep "flang-new" as a symlink for backwards compatiblity. Remove once "flang" # is a widely adopted name. add_flang_symlink(flang-new flang) diff --git a/flang/utils/perf-training/CMakeLists.txt b/flang/utils/perf-training/CMakeLists.txt new file mode 100644 index 0000000000000..d2b42ec432f71 --- /dev/null +++ b/flang/utils/perf-training/CMakeLists.txt @@ -0,0 +1,140 @@ +include(LLVMExternalProjectUtils) + +set(FLANG_PGO_TRAINING_DATA "${CMAKE_CURRENT_SOURCE_DIR}" CACHE PATH + "The path to a lit testsuite containing samples for PGO and order file generation" + ) +set(FLANG_PGO_TRAINING_DATA_SOURCE_DIR OFF CACHE STRING "Path to source directory containing cmake project with source files to use for generating flang pgo data") +set(FLANG_PGO_TRAINING_DATA_SOURCE_CMAKE_ARGS "" CACHE STRING "Extra CMake flags to pass to the cmake project with source files to use for generating flang pgo data") +set(FLANG_PGO_TRAINING_DEPS "" CACHE STRING "Extra dependencies needed to build the PGO training data.") +set(FLANG_PGO_TRAINING_CLANG_COUPLING ON CACHE BOOL "Train clang and flang together") +if(FLANG_PGO_TRAINING_CLANG_COUPLING) + set(CLANG_PGO_TRAINING_DATA_SOURCE_DIR OFF CACHE STRING "Path to source directory containing cmake project with source files to use for generating clang pgo data") + set(CLANG_PGO_TRAINING_DATA_SOURCE_CMAKE_ARGS "" CACHE STRING "Extra CMake flags to pass to the cmake project with source files to use for generating clang pgo data") +endif() + +set(CLANG_CURRENT_BINARY_DIR ${CMAKE_CURRENT_BINARY_DIR}/../../../clang/utils/perf-training) + +set(PERF_HELPER ${LLVM_MAIN_SRC_DIR}/../clang/utils/perf-training/perf-helper.py) + +add_custom_target(clear-flang-perf-data + COMMAND "${Python3_EXECUTABLE}" ${PERF_HELPER} clean ${CMAKE_CURRENT_BINARY_DIR} perf.data + COMMENT "Clearing old flang perf data") + +option(FLANG_PGO_TRAINING_USE_LLVM_BUILD "Use LLVM build for generating PGO data" ON) + +llvm_canonicalize_cmake_booleans( + FLANG_PGO_TRAINING_USE_LLVM_BUILD +) + +if(LLVM_BUILD_INSTRUMENTED) + configure_lit_site_cfg( + ${CMAKE_CURRENT_SOURCE_DIR}/lit.site.cfg.in + ${CMAKE_CURRENT_BINARY_DIR}/pgo-data/lit.site.cfg + ) + + add_lit_testsuite(generate-flang-profraw "Generating flang PGO data" + ${CMAKE_CURRENT_BINARY_DIR}/pgo-data/ + EXCLUDE_FROM_CHECK_ALL + DEPENDS flang flang-rt + ) + if(FLANG_PGO_TRAINING_CLANG_COUPLING) + add_lit_testsuite(generate-clang-profraw "Generating clang PGO data" + ${CLANG_CURRENT_BINARY_DIR}/pgo-data/ + EXCLUDE_FROM_CHECK_ALL + DEPENDS clang + ) + endif() + + add_custom_target(clear-flang-profraw + COMMAND "${Python3_EXECUTABLE}" ${PERF_HELPER} clean ${CMAKE_CURRENT_BINARY_DIR} ${CMAKE_BINARY_DIR}/profiles/ profraw + COMMENT "Clearing old flang profraw data") + if (FLANG_PGO_TRAINING_CLANG_COUPLING) + add_dependencies(clear-flang-profraw clear-profraw) + endif() + + if(NOT LLVM_PROFDATA) + find_program(LLVM_PROFDATA llvm-profdata) + endif() + + if(NOT LLVM_PROFDATA) + message(STATUS "To enable merging PGO data LLVM_PROFDATA has to point to llvm-profdata") + else() + set(PROFDATA ${CMAKE_BINARY_DIR}/flang.profdata) + set(PROFRAW_TARGETS "") + set(PROFRAW_DIRS "") + set(PROFRAW_DEPS "") + if(FLANG_PGO_TRAINING_CLANG_COUPLING) + list(APPEND PROFRAW_TARGETS generate-clang-profraw) + list(APPEND PROFRAW_DIRS ${CLANG_CURRENT_BINARY_DIR}) + list(APPEND PROFRAW_DEPS clang) + if (CLANG_PGO_TRAINING_DATA_SOURCE_DIR) + llvm_ExternalProject_Add(generate-clang-profraw-external + ${CLANG_PGO_TRAINING_DATA_SOURCE_DIR} + USE_TOOLCHAIN + EXLUDE_FROM_ALL + NO_INSTALL + CMAKE_ARGS "${CLANG_PGO_TRAINING_DATA_SOURCE_CMAKE_ARGS}") + list(APPEND PROFRAW_TARGETS generate-clang-profraw-external) + endif() + endif() + list(APPEND PROFRAW_TARGETS generate-flang-profraw) + list(APPEND PROFRAW_DIRS + ${CMAKE_CURRENT_BINARY_DIR} + ${CMAKE_BINARY_DIR}/profiles/) + list(APPEND PROFRAW_DEPS + flang + flang-rt) + if (FLANG_PGO_TRAINING_DATA_SOURCE_DIR) + llvm_ExternalProject_Add(generate-flang-profraw-external + ${FLANG_PGO_TRAINING_DATA_SOURCE_DIR} + USE_TOOLCHAIN + ENABLE_FORTRAN + EXLUDE_FROM_ALL + NO_INSTALL + CMAKE_ARGS "${FLANG_PGO_TRAINING_DATA_SOURCE_CMAKE_ARGS}") + list(APPEND PROFRAW_TARGETS generate-flang-profraw-external) + endif() + add_custom_command( + OUTPUT ${PROFDATA} + # PROFRAW_TARGETS are custom targets which are always considered stale. + # If we add them here to 'DEPENDS', then it will always execute and running + # ninja install && ninja check-all will result in the profile data being + # generated twice, and cause the ninja check-all build to fail with errors like: + # `ld.lld: error: Function Import: link error: linking module flags 'ProfileSummary': IDs have conflicting values in` + # Therefore we call these targets manually as part of this custom command, + # which will only run if flang or ${FLANG_PGO_TRAINING_DEPS} are updated. + COMMAND ${CMAKE_COMMAND} --build ${CMAKE_BINARY_DIR} --target ${PROFRAW_TARGETS} + COMMAND "${Python3_EXECUTABLE}" ${PERF_HELPER} merge ${LLVM_PROFDATA} ${PROFDATA} ${PROFRAW_DIRS} + COMMENT "Merging flang profdata" + DEPENDS ${PROFRAW_DEPS} ${FLANG_PGO_TRAINING_DEPS} clear-flang-profraw + ) + add_custom_target(generate-flang-profdata DEPENDS ${PROFDATA}) + + if(FLANG_PGO_TRAINING_CLANG_COUPLING) + string(TOUPPER "${LLVM_BUILD_INSTRUMENTED}" LLVM_BUILD_UPPER_INSTRUMENTED) + if (LLVM_BUILD_UPPER_INSTRUMENTED STREQUAL "CSSPGO") + message(STATUS "CSSPGO of flang is not supported") + else() + add_dependencies(clang-bootstrap-deps generate-flang-profdata) + endif() + endif() + endif() +endif() + +if(FLANG_BOLT AND NOT LLVM_BUILD_INSTRUMENTED) + configure_lit_site_cfg( + ${CMAKE_CURRENT_SOURCE_DIR}/bolt.lit.site.cfg.in + ${CMAKE_CURRENT_BINARY_DIR}/bolt-fdata/lit.site.cfg + ) + + add_lit_testsuite(generate-flang-bolt-fdata "Generating BOLT profile for flang" + ${CMAKE_CURRENT_BINARY_DIR}/bolt-fdata/ + EXCLUDE_FROM_CHECK_ALL + DEPENDS clear-flang-bolt-fdata clear-flang-perf-data + ) + + add_custom_target(clear-flang-bolt-fdata + COMMAND "${Python3_EXECUTABLE}" ${PERF_HELPER} clean ${CMAKE_CURRENT_BINARY_DIR} fdata + COMMENT "Clearing old flang BOLT fdata") + +endif() diff --git a/flang/utils/perf-training/README.txt b/flang/utils/perf-training/README.txt new file mode 100644 index 0000000000000..31e1ec0a5e570 --- /dev/null +++ b/flang/utils/perf-training/README.txt @@ -0,0 +1,6 @@ +========================== + Performance Training Data +========================== + +This directory contains simple source files for use as training data for +generating PGO data and linker order files. diff --git a/flang/utils/perf-training/bolt.lit.cfg b/flang/utils/perf-training/bolt.lit.cfg new file mode 100644 index 0000000000000..a795effc8aca0 --- /dev/null +++ b/flang/utils/perf-training/bolt.lit.cfg @@ -0,0 +1,59 @@ +# -*- Python -*- + +from lit import Test +import lit.formats +import lit.util +import os +import re +import subprocess + +flang_bolt_mode = config.flang_bolt_mode.lower() +flang_binary = "flang" +perf_wrapper = f"{config.python_exe} {config.perf_helper_dir}/perf-helper.py perf " + +if flang_bolt_mode == "instrument": + perf_wrapper = "" + flang_binary = config.flang_bolt_name +elif flang_bolt_mode == "lbr": + perf_wrapper += " --lbr -- " +elif flang_bolt_mode == "perf": + perf_wrapper += " -- " +else: + assert 0, "Unsupported flang bolt mode" + +flang_nowrapper = os.path.realpath( + lit.util.which(flang_binary, config.flang_tools_dir) +).replace("\\", "/") +config.flang = perf_wrapper + flang_nowrapper +config.cmake_compiler_args = "-DCMAKE_Fortran_COMPILER='{0}'".format( + re.sub(r"\s+", ";", flang_nowrapper) +) + +config.name = "Flang Perf Training" +config.suffixes = [ + ".f", + ".f90", + ".f95", + ".f03", + ".f08", + ".f18", + ".F", + ".F90", + ".F95", + ".F03", + ".F08", + ".F18", + ".test", +] + +use_lit_shell = os.environ.get("LIT_USE_INTERNAL_SHELL") +config.test_format = lit.formats.ShTest(use_lit_shell == "0") +config.substitutions.append(("%flang_skip_driver", config.flang)) +config.substitutions.append(("%flang", config.flang)) +config.substitutions.append(("%test_root", config.test_exec_root)) +config.substitutions.append(("%cmake_compiler_args", config.cmake_compiler_args)) +config.substitutions.append(('%cmake_generator', config.cmake_generator)) +config.substitutions.append(('%cmake', config.cmake_exe)) +config.substitutions.append(('%llvm_src_dir', config.llvm_src_dir)) +config.substitutions.append(('%module_src_dir', config.module_src_dir)) +config.substitutions.append(('%perf_wrapper', perf_wrapper)) diff --git a/flang/utils/perf-training/bolt.lit.site.cfg.in b/flang/utils/perf-training/bolt.lit.site.cfg.in new file mode 100644 index 0000000000000..f5236dbe856e6 --- /dev/null +++ b/flang/utils/perf-training/bolt.lit.site.cfg.in @@ -0,0 +1,20 @@ +@LIT_SITE_CFG_IN_HEADER@ + +import sys + +config.flang_tools_dir = lit_config.substitute("@CURRENT_TOOLS_DIR@") +config.perf_helper_dir = "@LLVM_MAIN_SRC_DIR@/../clang/utils/perf-training" +config.test_exec_root = "@CMAKE_CURRENT_BINARY_DIR@" +config.test_source_root = "@FLANG_PGO_TRAINING_DATA@" +config.target_triple = "@LLVM_TARGET_TRIPLE@" +config.python_exe = "@Python3_EXECUTABLE@" +config.flang_obj_root = path(r"@FLANG_BINARY_DIR@") +config.flang_bolt_mode = "@FLANG_BOLT@" +config.flang_bolt_name = "@FLANG_BOLT_INSTRUMENTED@" +config.cmake_exe = "@CMAKE_COMMAND@" +config.llvm_src_dir ="@CMAKE_SOURCE_DIR@" +config.module_src_dir = "@FLANG_SOURCE_DIR@/module" +config.cmake_generator ="@CMAKE_GENERATOR@" + +# Let the main config do the real work. +lit_config.load_config(config, "@FLANG_SOURCE_DIR@/utils/perf-training/bolt.lit.cfg") diff --git a/flang/utils/perf-training/f90/hello-openmp.f95 b/flang/utils/perf-training/f90/hello-openmp.f95 new file mode 100644 index 0000000000000..ea13bab62cf7d --- /dev/null +++ b/flang/utils/perf-training/f90/hello-openmp.f95 @@ -0,0 +1,14 @@ +! RUN: %flang -fopenmp -c %s +! RUN: %flang_skip_driver -fopenmp -c %s + +program hello_openmp + implicit none + integer :: i + + !$omp parallel private(i) + do i = 0, 64 + print *, "Hello ", i + !$omp barrier + end do + !$omp end parallel +end program diff --git a/flang/utils/perf-training/f90/hello-world.f95 b/flang/utils/perf-training/f90/hello-world.f95 new file mode 100644 index 0000000000000..85d431c5e07c6 --- /dev/null +++ b/flang/utils/perf-training/f90/hello-world.f95 @@ -0,0 +1,8 @@ +! RUN: %flang -c %s +! RUN: %flang_skip_driver -c %s + +program hello_world + implicit none + + print *, 'Hello, World!' +end program diff --git a/flang/utils/perf-training/f90/hello.f b/flang/utils/perf-training/f90/hello.f new file mode 100644 index 0000000000000..d55602840449e --- /dev/null +++ b/flang/utils/perf-training/f90/hello.f @@ -0,0 +1,24 @@ +! RUN: %flang -c %s +! RUN: %flang_skip_driver -c %s + + PROGRAM HELLO + IMPLICIT NONE + INTEGER I + INTEGER NUM + CHARACTER ARG * 32 + + NUM = 0 + CALL GETARG(1, ARG) + IF (LEN_TRIM(ARG) .GT. 0) THEN + READ (ARG, *, IOSTAT = I) NUM + END IF + IF (NUM .GT. 0) THEN + DO 10 I = 1, NUM + WRITE (*, 100) I +10 CONTINUE + ELSE + WRITE (*, 200) + END IF +100 FORMAT(' ', I3, '. Hello') +200 FORMAT(' Hello, world!') + END PROGRAM HELLO diff --git a/flang/utils/perf-training/f90/module_torture.f95 b/flang/utils/perf-training/f90/module_torture.f95 new file mode 100644 index 0000000000000..ea2f5d51ebef7 --- /dev/null +++ b/flang/utils/perf-training/f90/module_torture.f95 @@ -0,0 +1,47 @@ +! RUN: %flang -c %s +! RUN: %flang_skip_driver -c %s + +module example_module + implicit none + + abstract interface + + subroutine sub_i + implicit none + end subroutine + + end interface + +contains + + subroutine call_internal(string) + implicit none + character(len=*), intent(in) :: string + + call call_it(print_it) + + contains + + subroutine print_it + implicit none + + print *, string + end subroutine + + end subroutine + + subroutine call_it(sub) + implicit none + procedure(sub_i) :: sub + + call sub + end subroutine + +end module + +program module_torture + use example_module + implicit none + + call call_internal("Hello, World!") +end program diff --git a/flang/utils/perf-training/f90/sincos.f95 b/flang/utils/perf-training/f90/sincos.f95 new file mode 100644 index 0000000000000..ad7a53c42b6e7 --- /dev/null +++ b/flang/utils/perf-training/f90/sincos.f95 @@ -0,0 +1,24 @@ +! RUN: %flang -O3 -c %s +! RUN: %flang_skip_driver -O3 -c %s + +program sincos_example + implicit none + integer, parameter :: size = 16 + integer :: i, max_iter + real(8), dimension(size) :: dresult1, dresult2 + character(len = 32) :: arg + + max_iter = 10 + if (command_argument_count() .gt. 0) then + call get_command_argument(1, arg) + read(arg, *), max_iter + end if + if (max_iter .gt. size) stop + do i = 1, max_iter + dresult1(i) = dsin(dble(i)) + dresult2(i) = dcos(dble(i)) + end do + do i = 1, max_iter + print *, dresult1(i), dresult2(i) + end do +end program diff --git a/flang/utils/perf-training/flang-modules/build.test b/flang/utils/perf-training/flang-modules/build.test new file mode 100644 index 0000000000000..83ebcfef4db55 --- /dev/null +++ b/flang/utils/perf-training/flang-modules/build.test @@ -0,0 +1,10 @@ +RUN: rm -rf %t && mkdir -p %t +RUN: %perf_wrapper %flang -DFLANG_SUPPORT_R16 -I%t -cpp -fsyntax-only -module-dir %t %module_src_dir/__fortran_builtins.f90 +RUN: %perf_wrapper %flang -DFLANG_SUPPORT_R16 -I%t -cpp -fsyntax-only -module-dir %t %module_src_dir/__fortran_ieee_exceptions.f90 +RUN: %perf_wrapper %flang -DFLANG_SUPPORT_R16 -I%t -cpp -fsyntax-only -module-dir %t %module_src_dir/flang_debug.f90 +RUN: %perf_wrapper %flang -DFLANG_SUPPORT_R16 -I%t -cpp -fsyntax-only -module-dir %t %module_src_dir/ieee_arithmetic.f90 +RUN: %perf_wrapper %flang -DFLANG_SUPPORT_R16 -I%t -cpp -fsyntax-only -module-dir %t %module_src_dir/ieee_exceptions.f90 +RUN: %perf_wrapper %flang -DFLANG_SUPPORT_R16 -I%t -cpp -fsyntax-only -module-dir %t %module_src_dir/ieee_features.f90 +RUN: %perf_wrapper %flang -DFLANG_SUPPORT_R16 -I%t -cpp -fsyntax-only -module-dir %t %module_src_dir/iso_c_binding.f90 +RUN: %perf_wrapper %flang -DFLANG_SUPPORT_R16 -I%t -cpp -fsyntax-only -module-dir %t %module_src_dir/iso_fortran_env_impl.f90 +RUN: %perf_wrapper %flang -DFLANG_SUPPORT_R16 -I%t -cpp -fsyntax-only -module-dir %t %module_src_dir/iso_fortran_env.f90 diff --git a/flang/utils/perf-training/lit.cfg b/flang/utils/perf-training/lit.cfg new file mode 100644 index 0000000000000..f379425d996f8 --- /dev/null +++ b/flang/utils/perf-training/lit.cfg @@ -0,0 +1,50 @@ +# -*- Python -*- + +from lit import Test +import lit.formats +import lit.util +import subprocess + +def getSysrootFlagsOnDarwin(config, lit_config): + # On Darwin, support relocatable SDKs by providing flang with a + # default system root path. + if 'darwin' in config.target_triple: + try: + out = subprocess.check_output(['xcrun', '--show-sdk-path']).strip().decode() + res = 0 + except OSError: + res = -1 + if res == 0 and out: + sdk_path = out + lit_config.note('using SDKROOT: %r' % sdk_path) + return '-isysroot %s' % sdk_path + return '' + +sysroot_flags = getSysrootFlagsOnDarwin(config, lit_config) + +config.flang = lit.util.which('flang', config.flang_tools_dir).replace('\\', '/') + +config.name = 'Flang Perf Training' +config.suffixes = ['.f', '.f90', '.f95', '.f03', '.f08', '.f18', '.F', '.F90', '.F95', '.F03', '.F08', '.F18', '.test'] + +if not config.use_llvm_build: + config.excludes = ['llvm-support'] + +fc1_wrapper = '%s %s/perf-helper.py cc1' % (config.python_exe, config.perf_helper_dir) + +use_lit_shell = os.environ.get("LIT_USE_INTERNAL_SHELL") +config.test_format = lit.formats.ShTest(use_lit_shell == "0") +config.cmake_compiler_args = '-DCMAKE_Fortran_COMPILER="{0}"'.format( + config.flang.replace(' ', ';') +) +config.substitutions.append( ('%flang_skip_driver', ' %s %s %s ' % (fc1_wrapper, config.flang, sysroot_flags))) +config.substitutions.append( ('%flang', '%s %s ' % (config.flang, sysroot_flags) ) ) +config.substitutions.append( ('%test_root', config.test_exec_root ) ) +config.substitutions.append( ('%cmake_compiler_args', config.cmake_compiler_args)) +config.substitutions.append( ('%cmake_generator', config.cmake_generator ) ) +config.substitutions.append( ('%cmake', config.cmake_exe ) ) +config.substitutions.append( ('%llvm_src_dir', config.llvm_src_dir ) ) +config.substitutions.append( ('%module_src_dir', config.module_src_dir ) ) +config.substitutions.append( ('%perf_wrapper', '' ) ) + +config.environment['LLVM_PROFILE_FILE'] = 'perf-training-%4m.profraw' diff --git a/flang/utils/perf-training/lit.site.cfg.in b/flang/utils/perf-training/lit.site.cfg.in new file mode 100644 index 0000000000000..74494a37d13c8 --- /dev/null +++ b/flang/utils/perf-training/lit.site.cfg.in @@ -0,0 +1,18 @@ +@LIT_SITE_CFG_IN_HEADER@ + +import sys + +config.flang_tools_dir = lit_config.substitute("@CURRENT_TOOLS_DIR@") +config.perf_helper_dir = "@LLVM_MAIN_SRC_DIR@/../clang/utils/perf-training" +config.test_exec_root = "@CMAKE_CURRENT_BINARY_DIR@" +config.test_source_root = "@FLANG_PGO_TRAINING_DATA@" +config.target_triple = "@LLVM_TARGET_TRIPLE@" +config.python_exe = "@Python3_EXECUTABLE@" +config.cmake_exe = "@CMAKE_COMMAND@" +config.llvm_src_dir ="@CMAKE_SOURCE_DIR@" +config.module_src_dir ="@FLANG_SOURCE_DIR@/module" +config.cmake_generator ="@CMAKE_GENERATOR@" +config.use_llvm_build = @FLANG_PGO_TRAINING_USE_LLVM_BUILD@ + +# Let the main config do the real work. +lit_config.load_config(config, "@FLANG_SOURCE_DIR@/utils/perf-training/lit.cfg") diff --git a/flang/utils/perf-training/order-files.lit.cfg b/flang/utils/perf-training/order-files.lit.cfg new file mode 100644 index 0000000000000..eae90cceb7f05 --- /dev/null +++ b/flang/utils/perf-training/order-files.lit.cfg @@ -0,0 +1,43 @@ +# -*- Python -*- + +from lit import Test +import lit.formats +import lit.util +import os +import subprocess + +def getSysrootFlagsOnDarwin(config, lit_config): + # On Darwin, support relocatable SDKs by providing flang with a + # default system root path. + if 'darwin' in config.target_triple: + try: + out = subprocess.check_output(['xcrun', '--show-sdk-path']).strip() + res = 0 + except OSError: + res = -1 + if res == 0 and out: + sdk_path = out + lit_config.note('using SDKROOT: %r' % sdk_path) + return '-isysroot %s' % sdk_path + return '' + +sysroot_flags = getSysrootFlagsOnDarwin(config, lit_config) + +config.flang = os.path.realpath(lit.util.which('flang', config.flang_tools_dir)).replace('\\', '/') + +config.name = 'Flang Perf Training' +config.suffixes = ['.f', '.f90', '.f95', '.f03', '.f08', '.f18', '.F', '.F90', '.F95', '.F03', '.F08', '.F18', '.test'] + +dtrace_wrapper = '%s %s/perf-helper.py dtrace' % (config.python_exe, config.perf_helper_dir) +dtrace_wrapper_fc1 = '%s %s/perf-helper.py dtrace --cc1' % (config.python_exe, config.perf_helper_dir) + +if 'darwin' in config.target_triple: + lit_config.note('using DTrace oneshot probe') + dtrace_wrapper = '%s --use-oneshot' % dtrace_wrapper + dtrace_wrapper_fc1 = '%s --use-oneshot' % dtrace_wrapper_fc1 + +use_lit_shell = os.environ.get("LIT_USE_INTERNAL_SHELL") +config.test_format = lit.formats.ShTest(use_lit_shell == "0") +config.substitutions.append( ('%flang_skip_driver', ' %s %s %s ' % (dtrace_wrapper_fc1, config.flang, sysroot_flags))) +config.substitutions.append( ('%flang', ' %s %s %s ' % (dtrace_wrapper, config.flang, sysroot_flags) ) ) +config.substitutions.append( ('%test_root', config.test_exec_root ) ) diff --git a/flang/utils/perf-training/order-files.lit.site.cfg.in b/flang/utils/perf-training/order-files.lit.site.cfg.in new file mode 100644 index 0000000000000..7646f1504cd63 --- /dev/null +++ b/flang/utils/perf-training/order-files.lit.site.cfg.in @@ -0,0 +1,13 @@ +@LIT_SITE_CFG_IN_HEADER@ + +import sys + +config.flang_tools_dir = lit_config.substitute("@CURRENT_TOOLS_DIR@") +config.perf_helper_dir = "@LLVM_MAIN_SRC_DIR@/../clang/utils/perf-training" +config.test_exec_root = "@CMAKE_CURRENT_BINARY_DIR@" +config.test_source_root = "@FLANG_PGO_TRAINING_DATA@" +config.target_triple = "@LLVM_TARGET_TRIPLE@" +config.python_exe = "@Python3_EXECUTABLE@" + +# Let the main config do the real work. +lit_config.load_config(config, "@FLANG_SOURCE_DIR@/utils/perf-training/order-files.lit.cfg") _______________________________________________ cfe-commits mailing list [email protected] https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits
