[libclc] [CMake][libclc] Improve dependencies to avoid build errors (PR #95018)

2024-06-11 Thread Tim Creech via cfe-commits

https://github.com/tcreech-intel edited 
https://github.com/llvm/llvm-project/pull/95018
___
cfe-commits mailing list
cfe-commits@lists.llvm.org
https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits


[libclc] [CMake][libclc] Improve dependencies to avoid build errors (PR #95018)

2024-06-11 Thread Tim Creech via cfe-commits

tcreech-intel wrote:

Yes, the article you link to is highly relevant. Without this PR we're hitting 
the problem described in example #4.

We can promote the issue to a build failure by choosing a custom command which 
can't run concurrently with itself:
```cmake
cmake_minimum_required(VERSION 3.2)

add_custom_command(
OUTPUT gen
# Try to grab a lock and fail immediately if we can't:
COMMAND flock -x -n gen.lock sleep 1
COMMAND cmake -E echo Hello > gen
)

add_custom_target(
my-all-1 ALL DEPENDS gen
)

add_custom_target(
my-all-2 ALL DEPENDS gen
)
```

With the above `make` will succeed, but `make -j2` will fail. The issue is not 
exposed with Ninja.
This is the same behavior I see in libclc today, except more jobs are needed.
Adding an intermediate target and depending on both the new target and the file 
does seem to fix all of our problems.

I've added the file dependencies back in b707fc7dea2e, and I think it fixes the 
build issue while preserving the incremental build behavior you described.
Could you please give it a try?


https://github.com/llvm/llvm-project/pull/95018
___
cfe-commits mailing list
cfe-commits@lists.llvm.org
https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits


[libclc] [CMake][libclc] Improve dependencies to avoid build errors (PR #95018)

2024-06-11 Thread Tim Creech via cfe-commits

https://github.com/tcreech-intel updated 
https://github.com/llvm/llvm-project/pull/95018

>From 3e85695cc62abf8fe0943421708b5db67750b4ea Mon Sep 17 00:00:00 2001
From: Tim Creech 
Date: Mon, 10 Jun 2024 11:07:55 -0400
Subject: [PATCH 1/2] [libclc] Improve dependencies to avoid build errors

With the Makefile generator and particularly high build parallelism some
intermediate dependencies may be generated redundantly and concurrently,
leading to build failures.

To fix this, arrange for libclc's add_custom_commands to depend on
targets rather than directly on files.

This follows CMake documentation's [1] guidance on add_custom_command:

> Do not list the output in more than one independent target that may
> build in parallel or the instances of the rule may conflict. Instead,
> use the add_custom_target() command to drive the command and make the
> other targets depend on that one.

Eliminating the redundant commands also improves build times.

1. https://cmake.org/cmake/help/v3.29/command/add_custom_command.html
---
 libclc/CMakeLists.txt| 14 ++
 libclc/cmake/modules/AddLibclc.cmake |  6 --
 2 files changed, 14 insertions(+), 6 deletions(-)

diff --git a/libclc/CMakeLists.txt b/libclc/CMakeLists.txt
index 9858ae905983f..ba4561d941e90 100644
--- a/libclc/CMakeLists.txt
+++ b/libclc/CMakeLists.txt
@@ -374,15 +374,21 @@ foreach( t ${LIBCLC_TARGETS_TO_BUILD} )
 OUTPUT ${output_file}
 EXTRA_OPTS "${mcpu}" -fno-builtin -nostdlib
"${build_flags}" -I${PROJECT_SOURCE_DIR}/${file_dir}
+DEPENDENCIES generate_convert.cl clspv-generate_convert.cl
   )
   list( APPEND bytecode_files ${output_file} )
 endforeach()
 
-set( builtins_link_lib_tgt builtins.link.${arch_suffix} )
+set( builtins_comp_lib_tgt builtins.comp.${arch_suffix} )
+add_custom_target( ${builtins_comp_lib_tgt}
+  DEPENDS ${bytecode_files}
+)
 
+set( builtins_link_lib_tgt builtins.link.${arch_suffix} )
 link_bc(
   TARGET ${builtins_link_lib_tgt}
   INPUTS ${bytecode_files}
+  DEPENDENCIES ${builtins_comp_lib_tgt}
 )
 
 set( builtins_link_lib 
$ )
@@ -391,7 +397,7 @@ foreach( t ${LIBCLC_TARGETS_TO_BUILD} )
   set( spv_suffix ${arch_suffix}.spv )
   add_custom_command( OUTPUT ${spv_suffix}
 COMMAND libclc::llvm-spirv ${spvflags} -o ${spv_suffix} 
${builtins_link_lib}
-DEPENDS ${builtins_link_lib}
+DEPENDS ${builtins_link_lib_tgt}
   )
   add_custom_target( "prepare-${spv_suffix}" ALL DEPENDS "${spv_suffix}" )
   install( FILES ${CMAKE_CURRENT_BINARY_DIR}/${spv_suffix}
@@ -403,7 +409,7 @@ foreach( t ${LIBCLC_TARGETS_TO_BUILD} )
   add_custom_command( OUTPUT ${builtins_opt_lib_tgt}.bc
 COMMAND libclc::opt ${opt_flags} -o ${builtins_opt_lib_tgt}.bc
   ${builtins_link_lib}
-DEPENDS libclc::opt ${builtins_link_lib}
+DEPENDS libclc::opt ${builtins_link_lib_tgt}
   )
   add_custom_target( ${builtins_opt_lib_tgt}
 ALL DEPENDS ${builtins_opt_lib_tgt}.bc
@@ -418,7 +424,7 @@ foreach( t ${LIBCLC_TARGETS_TO_BUILD} )
   set( obj_suffix ${arch_suffix}.bc )
   add_custom_command( OUTPUT ${obj_suffix}
 COMMAND prepare_builtins -o ${obj_suffix} ${builtins_opt_lib}
-DEPENDS ${builtins_opt_lib} prepare_builtins )
+DEPENDS ${builtins_opt_lib_tgt} prepare_builtins )
   add_custom_target( prepare-${obj_suffix} ALL DEPENDS ${obj_suffix} )
 
   # nvptx-- targets don't include workitem builtins
diff --git a/libclc/cmake/modules/AddLibclc.cmake 
b/libclc/cmake/modules/AddLibclc.cmake
index 7f4620fa6a21d..e70be31f4480b 100644
--- a/libclc/cmake/modules/AddLibclc.cmake
+++ b/libclc/cmake/modules/AddLibclc.cmake
@@ -80,11 +80,13 @@ endfunction()
 # Custom target to create
 # * INPUT  ...
 # List of bytecode files to link together
+# * DEPENDENCIES  ...
+# List of extra dependencies to inject
 function(link_bc)
   cmake_parse_arguments(ARG
 ""
 "TARGET"
-"INPUTS"
+"INPUTS;DEPENDENCIES"
 ${ARGN}
   )
 
@@ -106,7 +108,7 @@ function(link_bc)
   add_custom_command(
 OUTPUT ${ARG_TARGET}.bc
 COMMAND libclc::llvm-link -o ${ARG_TARGET}.bc ${LINK_INPUT_ARG}
-DEPENDS libclc::llvm-link ${ARG_INPUTS} ${RSP_FILE}
+DEPENDS libclc::llvm-link ${ARG_DEPENDENCIES} ${RSP_FILE}
   )
 
   add_custom_target( ${ARG_TARGET} ALL DEPENDS ${ARG_TARGET}.bc )

>From b707fc7dea2e5e57bc3e4f01e27b9ca7bbf0e398 Mon Sep 17 00:00:00 2001
From: Tim Creech 
Date: Tue, 11 Jun 2024 10:02:12 -0400
Subject: [PATCH 2/2] fixup: specify both file and target dependencies

---
 libclc/CMakeLists.txt| 6 +++---
 libclc/cmake/modules/AddLibclc.cmake | 2 +-
 2 files changed, 4 insertions(+), 4 deletions(-)

diff --git a/libclc/CMakeLists.txt b/libclc/CMakeLists.txt
index ba4561d941e90..ef8d21b167623 100644
--- a/libclc/CMakeLists.txt
+++ b/libclc/CMakeLists.txt
@@ -397,7 +397,7 @@ foreach( t 

[libclc] [CMake][libclc] Improve dependencies to avoid build errors (PR #95018)

2024-06-10 Thread Tim Creech via cfe-commits

https://github.com/tcreech-intel edited 
https://github.com/llvm/llvm-project/pull/95018
___
cfe-commits mailing list
cfe-commits@lists.llvm.org
https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits


[libclc] [libclc] Improve dependencies to avoid build errors (PR #95018)

2024-06-10 Thread Tim Creech via cfe-commits

https://github.com/tcreech-intel created 
https://github.com/llvm/llvm-project/pull/95018

With the Makefile generator and particularly high build parallelism some 
intermediate dependencies may be generated redundantly and concurrently, 
leading to build failures.

To fix this, arrange for libclc's add_custom_commands to depend on targets 
rather than directly on files.

This follows CMake documentation's[^1] guidance on add_custom_command:

> Do not list the output in more than one independent target that may
> build in parallel or the instances of the rule may conflict. Instead,
> use the add_custom_target() command to drive the command and make the
> other targets depend on that one.

Eliminating the redundant commands also improves build times.

[^1]: https://cmake.org/cmake/help/v3.29/command/add_custom_command.html

>From 3e85695cc62abf8fe0943421708b5db67750b4ea Mon Sep 17 00:00:00 2001
From: Tim Creech 
Date: Mon, 10 Jun 2024 11:07:55 -0400
Subject: [PATCH] [libclc] Improve dependencies to avoid build errors

With the Makefile generator and particularly high build parallelism some
intermediate dependencies may be generated redundantly and concurrently,
leading to build failures.

To fix this, arrange for libclc's add_custom_commands to depend on
targets rather than directly on files.

This follows CMake documentation's [1] guidance on add_custom_command:

> Do not list the output in more than one independent target that may
> build in parallel or the instances of the rule may conflict. Instead,
> use the add_custom_target() command to drive the command and make the
> other targets depend on that one.

Eliminating the redundant commands also improves build times.

1. https://cmake.org/cmake/help/v3.29/command/add_custom_command.html
---
 libclc/CMakeLists.txt| 14 ++
 libclc/cmake/modules/AddLibclc.cmake |  6 --
 2 files changed, 14 insertions(+), 6 deletions(-)

diff --git a/libclc/CMakeLists.txt b/libclc/CMakeLists.txt
index 9858ae905983f..ba4561d941e90 100644
--- a/libclc/CMakeLists.txt
+++ b/libclc/CMakeLists.txt
@@ -374,15 +374,21 @@ foreach( t ${LIBCLC_TARGETS_TO_BUILD} )
 OUTPUT ${output_file}
 EXTRA_OPTS "${mcpu}" -fno-builtin -nostdlib
"${build_flags}" -I${PROJECT_SOURCE_DIR}/${file_dir}
+DEPENDENCIES generate_convert.cl clspv-generate_convert.cl
   )
   list( APPEND bytecode_files ${output_file} )
 endforeach()
 
-set( builtins_link_lib_tgt builtins.link.${arch_suffix} )
+set( builtins_comp_lib_tgt builtins.comp.${arch_suffix} )
+add_custom_target( ${builtins_comp_lib_tgt}
+  DEPENDS ${bytecode_files}
+)
 
+set( builtins_link_lib_tgt builtins.link.${arch_suffix} )
 link_bc(
   TARGET ${builtins_link_lib_tgt}
   INPUTS ${bytecode_files}
+  DEPENDENCIES ${builtins_comp_lib_tgt}
 )
 
 set( builtins_link_lib 
$ )
@@ -391,7 +397,7 @@ foreach( t ${LIBCLC_TARGETS_TO_BUILD} )
   set( spv_suffix ${arch_suffix}.spv )
   add_custom_command( OUTPUT ${spv_suffix}
 COMMAND libclc::llvm-spirv ${spvflags} -o ${spv_suffix} 
${builtins_link_lib}
-DEPENDS ${builtins_link_lib}
+DEPENDS ${builtins_link_lib_tgt}
   )
   add_custom_target( "prepare-${spv_suffix}" ALL DEPENDS "${spv_suffix}" )
   install( FILES ${CMAKE_CURRENT_BINARY_DIR}/${spv_suffix}
@@ -403,7 +409,7 @@ foreach( t ${LIBCLC_TARGETS_TO_BUILD} )
   add_custom_command( OUTPUT ${builtins_opt_lib_tgt}.bc
 COMMAND libclc::opt ${opt_flags} -o ${builtins_opt_lib_tgt}.bc
   ${builtins_link_lib}
-DEPENDS libclc::opt ${builtins_link_lib}
+DEPENDS libclc::opt ${builtins_link_lib_tgt}
   )
   add_custom_target( ${builtins_opt_lib_tgt}
 ALL DEPENDS ${builtins_opt_lib_tgt}.bc
@@ -418,7 +424,7 @@ foreach( t ${LIBCLC_TARGETS_TO_BUILD} )
   set( obj_suffix ${arch_suffix}.bc )
   add_custom_command( OUTPUT ${obj_suffix}
 COMMAND prepare_builtins -o ${obj_suffix} ${builtins_opt_lib}
-DEPENDS ${builtins_opt_lib} prepare_builtins )
+DEPENDS ${builtins_opt_lib_tgt} prepare_builtins )
   add_custom_target( prepare-${obj_suffix} ALL DEPENDS ${obj_suffix} )
 
   # nvptx-- targets don't include workitem builtins
diff --git a/libclc/cmake/modules/AddLibclc.cmake 
b/libclc/cmake/modules/AddLibclc.cmake
index 7f4620fa6a21d..e70be31f4480b 100644
--- a/libclc/cmake/modules/AddLibclc.cmake
+++ b/libclc/cmake/modules/AddLibclc.cmake
@@ -80,11 +80,13 @@ endfunction()
 # Custom target to create
 # * INPUT  ...
 # List of bytecode files to link together
+# * DEPENDENCIES  ...
+# List of extra dependencies to inject
 function(link_bc)
   cmake_parse_arguments(ARG
 ""
 "TARGET"
-"INPUTS"
+"INPUTS;DEPENDENCIES"
 ${ARGN}
   )
 
@@ -106,7 +108,7 @@ function(link_bc)
   add_custom_command(
 OUTPUT ${ARG_TARGET}.bc
 COMMAND libclc::llvm-link -o ${ARG_TARGET}.bc ${LINK_INPUT_ARG}
-  

[clang] Improve documented sampling profiler steps to best known methods (PR #88438)

2024-04-27 Thread Tim Creech via cfe-commits


@@ -2547,22 +2547,40 @@ usual build cycle when using sample profilers for 
optimization:
used in the first step. The only requirement is that you build the code
with the same debug info options and ``-fprofile-sample-use``.
 
+   On Linux:
+
.. code-block:: console
 
  $ clang++ -O2 -gline-tables-only \
-fdebug-info-for-profiling -funique-internal-linkage-names \
-fprofile-sample-use=code.prof code.cc -o code
 
-  [OPTIONAL] Sampling-based profiles can have inaccuracies or missing block/
-  edge counters. The profile inference algorithm (profi) can be used to infer
-  missing blocks and edge counts, and improve the quality of profile data.
-  Enable it with ``-fsample-profile-use-profi``.
+   On Windows:
 
-  .. code-block:: console
+   .. code-block:: winbatch
+
+ > clang-cl -O2 -gdwarf -gline-tables-only ^

tcreech-intel wrote:

Good idea. I've updated the clang-cl examples to use cl-style forward-slash 
options when possible. There are still a few cases (`-gdwarf 
-gline-tables-only`) where only the hyphen version is understood, and also some 
cases (`/clang:-fdebug-info-for-profiling 
/clang:-funique-internal-linkage-names`) where the hyphen version is understood 
only with `/clang:`.

https://github.com/llvm/llvm-project/pull/88438
___
cfe-commits mailing list
cfe-commits@lists.llvm.org
https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits


[clang] Improve documented sampling profiler steps to best known methods (PR #88438)

2024-04-27 Thread Tim Creech via cfe-commits

https://github.com/tcreech-intel updated 
https://github.com/llvm/llvm-project/pull/88438

>From fe3404cbdf78b434f16f8351dc242175b4543112 Mon Sep 17 00:00:00 2001
From: Tim Creech 
Date: Thu, 11 Apr 2024 16:03:52 -0400
Subject: [PATCH 1/4] Improve documented sampling profiler steps to best known
 methods

1. Add `-fdebug-info-for-profiling -funique-internal-linkage-names`,
   which improve the usefulness of debug info for profiling.

2. Recommend the use of `br_inst_retired.near_taken:uppp`, which
   provides the most precise results on supporting hardware.  Mention
   `branches:u` as a more portable backup.

   Both should portray execution counts better than the default event
   (`cycles`) and have a better chance of working as an unprivileged
   user due to the `:u` modifier.
---
 clang/docs/UsersManual.rst | 16 ++--
 1 file changed, 10 insertions(+), 6 deletions(-)

diff --git a/clang/docs/UsersManual.rst b/clang/docs/UsersManual.rst
index c464bc3a69adc5..818841285cfae5 100644
--- a/clang/docs/UsersManual.rst
+++ b/clang/docs/UsersManual.rst
@@ -2443,13 +2443,15 @@ usual build cycle when using sample profilers for 
optimization:
usual build flags that you always build your application with. The only
requirement is that DWARF debug info including source line information is
generated. This DWARF information is important for the profiler to be able
-   to map instructions back to source line locations.
+   to map instructions back to source line locations. The usefulness of this
+   DWARF information can be improved with the ``-fdebug-info-for-profiling``
+   and ``-funique-internal-linkage-names`` options.
 
-   On Linux, ``-g`` or just ``-gline-tables-only`` is sufficient:
+   On Linux:
 
.. code-block:: console
 
- $ clang++ -O2 -gline-tables-only code.cc -o code
+ $ clang++ -O2 -gline-tables-only -fdebug-info-for-profiling 
-funique-internal-linkage-names code.cc -o code
 
While MSVC-style targets default to CodeView debug information, DWARF debug
information is required to generate source-level LLVM profiles. Use
@@ -2457,13 +2459,13 @@ usual build cycle when using sample profilers for 
optimization:
 
.. code-block:: console
 
- $ clang-cl -O2 -gdwarf -gline-tables-only coff-profile.cpp -fuse-ld=lld 
-link -debug:dwarf
+ $ clang-cl -O2 -gdwarf -gline-tables-only 
/clang:-fdebug-info-for-profiling /clang:-funique-internal-linkage-names 
code.cc -o code -fuse-ld=lld -link -debug:dwarf
 
 2. Run the executable under a sampling profiler. The specific profiler
you use does not really matter, as long as its output can be converted
into the format that the LLVM optimizer understands.
 
-   Two such profilers are the the Linux Perf profiler
+   Two such profilers are the Linux Perf profiler
(https://perf.wiki.kernel.org/) and Intel's Sampling Enabling Product (SEP),
available as part of `Intel VTune

`_.
@@ -2477,7 +2479,9 @@ usual build cycle when using sample profilers for 
optimization:
 
.. code-block:: console
 
- $ perf record -b ./code
+ $ perf record -b -e BR_INST_RETIRED.NEAR_TAKEN:uppp ./code
+
+   If the event above is unavailable, ``branches:u`` is probably next-best.
 
Note the use of the ``-b`` flag. This tells Perf to use the Last Branch
Record (LBR) to record call chains. While this is not strictly required,

>From add91ec329f60eef6ecf79d6d5c9a548a8d6bcfe Mon Sep 17 00:00:00 2001
From: Tim Creech 
Date: Mon, 22 Apr 2024 11:11:36 -0400
Subject: [PATCH 2/4] fixup: add uniqueing note, match debug flags

---
 clang/docs/UsersManual.rst | 27 ++-
 1 file changed, 22 insertions(+), 5 deletions(-)

diff --git a/clang/docs/UsersManual.rst b/clang/docs/UsersManual.rst
index 818841285cfae5..b87fc7f2aaa4dd 100644
--- a/clang/docs/UsersManual.rst
+++ b/clang/docs/UsersManual.rst
@@ -2314,6 +2314,8 @@ are listed below.
on ELF targets when using the integrated assembler. This flag currently
only has an effect on ELF targets.
 
+.. _funique_internal_linkage_names:
+
 .. option:: -f[no]-unique-internal-linkage-names
 
Controls whether Clang emits a unique (best-effort) symbol name for internal
@@ -2451,15 +2453,27 @@ usual build cycle when using sample profilers for 
optimization:
 
.. code-block:: console
 
- $ clang++ -O2 -gline-tables-only -fdebug-info-for-profiling 
-funique-internal-linkage-names code.cc -o code
+ $ clang++ -O2 -gline-tables-only \
+   -fdebug-info-for-profiling -funique-internal-linkage-names \
+   code.cc -o code
 
While MSVC-style targets default to CodeView debug information, DWARF debug
information is required to generate source-level LLVM profiles. Use
``-gdwarf`` to include DWARF debug information:
 
-   .. code-block:: console
+   .. code-block:: winbatch
+
+ $ clang-cl -O2 -gdwarf -gline-tables-only ^
+   

[clang] Improve documented sampling profiler steps to best known methods (PR #88438)

2024-04-25 Thread Tim Creech via cfe-commits

https://github.com/tcreech-intel updated 
https://github.com/llvm/llvm-project/pull/88438

>From fe3404cbdf78b434f16f8351dc242175b4543112 Mon Sep 17 00:00:00 2001
From: Tim Creech 
Date: Thu, 11 Apr 2024 16:03:52 -0400
Subject: [PATCH 1/3] Improve documented sampling profiler steps to best known
 methods

1. Add `-fdebug-info-for-profiling -funique-internal-linkage-names`,
   which improve the usefulness of debug info for profiling.

2. Recommend the use of `br_inst_retired.near_taken:uppp`, which
   provides the most precise results on supporting hardware.  Mention
   `branches:u` as a more portable backup.

   Both should portray execution counts better than the default event
   (`cycles`) and have a better chance of working as an unprivileged
   user due to the `:u` modifier.
---
 clang/docs/UsersManual.rst | 16 ++--
 1 file changed, 10 insertions(+), 6 deletions(-)

diff --git a/clang/docs/UsersManual.rst b/clang/docs/UsersManual.rst
index c464bc3a69adc5..818841285cfae5 100644
--- a/clang/docs/UsersManual.rst
+++ b/clang/docs/UsersManual.rst
@@ -2443,13 +2443,15 @@ usual build cycle when using sample profilers for 
optimization:
usual build flags that you always build your application with. The only
requirement is that DWARF debug info including source line information is
generated. This DWARF information is important for the profiler to be able
-   to map instructions back to source line locations.
+   to map instructions back to source line locations. The usefulness of this
+   DWARF information can be improved with the ``-fdebug-info-for-profiling``
+   and ``-funique-internal-linkage-names`` options.
 
-   On Linux, ``-g`` or just ``-gline-tables-only`` is sufficient:
+   On Linux:
 
.. code-block:: console
 
- $ clang++ -O2 -gline-tables-only code.cc -o code
+ $ clang++ -O2 -gline-tables-only -fdebug-info-for-profiling 
-funique-internal-linkage-names code.cc -o code
 
While MSVC-style targets default to CodeView debug information, DWARF debug
information is required to generate source-level LLVM profiles. Use
@@ -2457,13 +2459,13 @@ usual build cycle when using sample profilers for 
optimization:
 
.. code-block:: console
 
- $ clang-cl -O2 -gdwarf -gline-tables-only coff-profile.cpp -fuse-ld=lld 
-link -debug:dwarf
+ $ clang-cl -O2 -gdwarf -gline-tables-only 
/clang:-fdebug-info-for-profiling /clang:-funique-internal-linkage-names 
code.cc -o code -fuse-ld=lld -link -debug:dwarf
 
 2. Run the executable under a sampling profiler. The specific profiler
you use does not really matter, as long as its output can be converted
into the format that the LLVM optimizer understands.
 
-   Two such profilers are the the Linux Perf profiler
+   Two such profilers are the Linux Perf profiler
(https://perf.wiki.kernel.org/) and Intel's Sampling Enabling Product (SEP),
available as part of `Intel VTune

`_.
@@ -2477,7 +2479,9 @@ usual build cycle when using sample profilers for 
optimization:
 
.. code-block:: console
 
- $ perf record -b ./code
+ $ perf record -b -e BR_INST_RETIRED.NEAR_TAKEN:uppp ./code
+
+   If the event above is unavailable, ``branches:u`` is probably next-best.
 
Note the use of the ``-b`` flag. This tells Perf to use the Last Branch
Record (LBR) to record call chains. While this is not strictly required,

>From add91ec329f60eef6ecf79d6d5c9a548a8d6bcfe Mon Sep 17 00:00:00 2001
From: Tim Creech 
Date: Mon, 22 Apr 2024 11:11:36 -0400
Subject: [PATCH 2/3] fixup: add uniqueing note, match debug flags

---
 clang/docs/UsersManual.rst | 27 ++-
 1 file changed, 22 insertions(+), 5 deletions(-)

diff --git a/clang/docs/UsersManual.rst b/clang/docs/UsersManual.rst
index 818841285cfae5..b87fc7f2aaa4dd 100644
--- a/clang/docs/UsersManual.rst
+++ b/clang/docs/UsersManual.rst
@@ -2314,6 +2314,8 @@ are listed below.
on ELF targets when using the integrated assembler. This flag currently
only has an effect on ELF targets.
 
+.. _funique_internal_linkage_names:
+
 .. option:: -f[no]-unique-internal-linkage-names
 
Controls whether Clang emits a unique (best-effort) symbol name for internal
@@ -2451,15 +2453,27 @@ usual build cycle when using sample profilers for 
optimization:
 
.. code-block:: console
 
- $ clang++ -O2 -gline-tables-only -fdebug-info-for-profiling 
-funique-internal-linkage-names code.cc -o code
+ $ clang++ -O2 -gline-tables-only \
+   -fdebug-info-for-profiling -funique-internal-linkage-names \
+   code.cc -o code
 
While MSVC-style targets default to CodeView debug information, DWARF debug
information is required to generate source-level LLVM profiles. Use
``-gdwarf`` to include DWARF debug information:
 
-   .. code-block:: console
+   .. code-block:: winbatch
+
+ $ clang-cl -O2 -gdwarf -gline-tables-only ^
+   

[clang] Improve documented sampling profiler steps to best known methods (PR #88438)

2024-04-22 Thread Tim Creech via cfe-commits

https://github.com/tcreech-intel updated 
https://github.com/llvm/llvm-project/pull/88438

>From fe3404cbdf78b434f16f8351dc242175b4543112 Mon Sep 17 00:00:00 2001
From: Tim Creech 
Date: Thu, 11 Apr 2024 16:03:52 -0400
Subject: [PATCH 1/2] Improve documented sampling profiler steps to best known
 methods

1. Add `-fdebug-info-for-profiling -funique-internal-linkage-names`,
   which improve the usefulness of debug info for profiling.

2. Recommend the use of `br_inst_retired.near_taken:uppp`, which
   provides the most precise results on supporting hardware.  Mention
   `branches:u` as a more portable backup.

   Both should portray execution counts better than the default event
   (`cycles`) and have a better chance of working as an unprivileged
   user due to the `:u` modifier.
---
 clang/docs/UsersManual.rst | 16 ++--
 1 file changed, 10 insertions(+), 6 deletions(-)

diff --git a/clang/docs/UsersManual.rst b/clang/docs/UsersManual.rst
index c464bc3a69adc5..818841285cfae5 100644
--- a/clang/docs/UsersManual.rst
+++ b/clang/docs/UsersManual.rst
@@ -2443,13 +2443,15 @@ usual build cycle when using sample profilers for 
optimization:
usual build flags that you always build your application with. The only
requirement is that DWARF debug info including source line information is
generated. This DWARF information is important for the profiler to be able
-   to map instructions back to source line locations.
+   to map instructions back to source line locations. The usefulness of this
+   DWARF information can be improved with the ``-fdebug-info-for-profiling``
+   and ``-funique-internal-linkage-names`` options.
 
-   On Linux, ``-g`` or just ``-gline-tables-only`` is sufficient:
+   On Linux:
 
.. code-block:: console
 
- $ clang++ -O2 -gline-tables-only code.cc -o code
+ $ clang++ -O2 -gline-tables-only -fdebug-info-for-profiling 
-funique-internal-linkage-names code.cc -o code
 
While MSVC-style targets default to CodeView debug information, DWARF debug
information is required to generate source-level LLVM profiles. Use
@@ -2457,13 +2459,13 @@ usual build cycle when using sample profilers for 
optimization:
 
.. code-block:: console
 
- $ clang-cl -O2 -gdwarf -gline-tables-only coff-profile.cpp -fuse-ld=lld 
-link -debug:dwarf
+ $ clang-cl -O2 -gdwarf -gline-tables-only 
/clang:-fdebug-info-for-profiling /clang:-funique-internal-linkage-names 
code.cc -o code -fuse-ld=lld -link -debug:dwarf
 
 2. Run the executable under a sampling profiler. The specific profiler
you use does not really matter, as long as its output can be converted
into the format that the LLVM optimizer understands.
 
-   Two such profilers are the the Linux Perf profiler
+   Two such profilers are the Linux Perf profiler
(https://perf.wiki.kernel.org/) and Intel's Sampling Enabling Product (SEP),
available as part of `Intel VTune

`_.
@@ -2477,7 +2479,9 @@ usual build cycle when using sample profilers for 
optimization:
 
.. code-block:: console
 
- $ perf record -b ./code
+ $ perf record -b -e BR_INST_RETIRED.NEAR_TAKEN:uppp ./code
+
+   If the event above is unavailable, ``branches:u`` is probably next-best.
 
Note the use of the ``-b`` flag. This tells Perf to use the Last Branch
Record (LBR) to record call chains. While this is not strictly required,

>From add91ec329f60eef6ecf79d6d5c9a548a8d6bcfe Mon Sep 17 00:00:00 2001
From: Tim Creech 
Date: Mon, 22 Apr 2024 11:11:36 -0400
Subject: [PATCH 2/2] fixup: add uniqueing note, match debug flags

---
 clang/docs/UsersManual.rst | 27 ++-
 1 file changed, 22 insertions(+), 5 deletions(-)

diff --git a/clang/docs/UsersManual.rst b/clang/docs/UsersManual.rst
index 818841285cfae5..b87fc7f2aaa4dd 100644
--- a/clang/docs/UsersManual.rst
+++ b/clang/docs/UsersManual.rst
@@ -2314,6 +2314,8 @@ are listed below.
on ELF targets when using the integrated assembler. This flag currently
only has an effect on ELF targets.
 
+.. _funique_internal_linkage_names:
+
 .. option:: -f[no]-unique-internal-linkage-names
 
Controls whether Clang emits a unique (best-effort) symbol name for internal
@@ -2451,15 +2453,27 @@ usual build cycle when using sample profilers for 
optimization:
 
.. code-block:: console
 
- $ clang++ -O2 -gline-tables-only -fdebug-info-for-profiling 
-funique-internal-linkage-names code.cc -o code
+ $ clang++ -O2 -gline-tables-only \
+   -fdebug-info-for-profiling -funique-internal-linkage-names \
+   code.cc -o code
 
While MSVC-style targets default to CodeView debug information, DWARF debug
information is required to generate source-level LLVM profiles. Use
``-gdwarf`` to include DWARF debug information:
 
-   .. code-block:: console
+   .. code-block:: winbatch
+
+ $ clang-cl -O2 -gdwarf -gline-tables-only ^
+   

[clang] Improve documented sampling profiler steps to best known methods (PR #88438)

2024-04-19 Thread Tim Creech via cfe-commits


@@ -2443,27 +2443,29 @@ usual build cycle when using sample profilers for 
optimization:
usual build flags that you always build your application with. The only
requirement is that DWARF debug info including source line information is
generated. This DWARF information is important for the profiler to be able
-   to map instructions back to source line locations.
+   to map instructions back to source line locations. The usefulness of this
+   DWARF information can be improved with the ``-fdebug-info-for-profiling``
+   and ``-funique-internal-linkage-names`` options.

tcreech-intel wrote:

Thanks, @chrulski-intel -- good point. I'll add a brief note.

@williamweixiao, I think you're right that they should match. I'll update those 
steps.

https://github.com/llvm/llvm-project/pull/88438
___
cfe-commits mailing list
cfe-commits@lists.llvm.org
https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits


[clang] Improve documented sampling profiler steps to best known methods (PR #88438)

2024-04-11 Thread Tim Creech via cfe-commits

tcreech-intel wrote:

@williamweixiao, @HaohaiWen, this updates the docs to describe best practices 
given #83972.

It seems `-fdebug-info-for-profiling` can be particularly important. Without it 
we were discarding nearly half of the samples in some cases.

https://github.com/llvm/llvm-project/pull/88438
___
cfe-commits mailing list
cfe-commits@lists.llvm.org
https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits


[clang] Improve documented sampling profiler steps to best known methods (PR #88438)

2024-04-11 Thread Tim Creech via cfe-commits

https://github.com/tcreech-intel ready_for_review 
https://github.com/llvm/llvm-project/pull/88438
___
cfe-commits mailing list
cfe-commits@lists.llvm.org
https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits


[clang] Improve documented sampling profiler steps to best known methods (PR #88438)

2024-04-11 Thread Tim Creech via cfe-commits

https://github.com/tcreech-intel created 
https://github.com/llvm/llvm-project/pull/88438

1. Add `-fdebug-info-for-profiling -funique-internal-linkage-names`, which 
improve the usefulness of debug info for profiling.

2. Recommend the use of `br_inst_retired.near_taken:uppp`, which provides the 
most precise results on supporting hardware.  Mention `branches:u` as a more 
portable backup.

   Both should portray execution counts better than the default event 
(`cycles`) and have a better chance of working as an unprivileged user due to 
the `:u` modifier.

>From fe3404cbdf78b434f16f8351dc242175b4543112 Mon Sep 17 00:00:00 2001
From: Tim Creech 
Date: Thu, 11 Apr 2024 16:03:52 -0400
Subject: [PATCH] Improve documented sampling profiler steps to best known
 methods

1. Add `-fdebug-info-for-profiling -funique-internal-linkage-names`,
   which improve the usefulness of debug info for profiling.

2. Recommend the use of `br_inst_retired.near_taken:uppp`, which
   provides the most precise results on supporting hardware.  Mention
   `branches:u` as a more portable backup.

   Both should portray execution counts better than the default event
   (`cycles`) and have a better chance of working as an unprivileged
   user due to the `:u` modifier.
---
 clang/docs/UsersManual.rst | 16 ++--
 1 file changed, 10 insertions(+), 6 deletions(-)

diff --git a/clang/docs/UsersManual.rst b/clang/docs/UsersManual.rst
index c464bc3a69adc5..818841285cfae5 100644
--- a/clang/docs/UsersManual.rst
+++ b/clang/docs/UsersManual.rst
@@ -2443,13 +2443,15 @@ usual build cycle when using sample profilers for 
optimization:
usual build flags that you always build your application with. The only
requirement is that DWARF debug info including source line information is
generated. This DWARF information is important for the profiler to be able
-   to map instructions back to source line locations.
+   to map instructions back to source line locations. The usefulness of this
+   DWARF information can be improved with the ``-fdebug-info-for-profiling``
+   and ``-funique-internal-linkage-names`` options.
 
-   On Linux, ``-g`` or just ``-gline-tables-only`` is sufficient:
+   On Linux:
 
.. code-block:: console
 
- $ clang++ -O2 -gline-tables-only code.cc -o code
+ $ clang++ -O2 -gline-tables-only -fdebug-info-for-profiling 
-funique-internal-linkage-names code.cc -o code
 
While MSVC-style targets default to CodeView debug information, DWARF debug
information is required to generate source-level LLVM profiles. Use
@@ -2457,13 +2459,13 @@ usual build cycle when using sample profilers for 
optimization:
 
.. code-block:: console
 
- $ clang-cl -O2 -gdwarf -gline-tables-only coff-profile.cpp -fuse-ld=lld 
-link -debug:dwarf
+ $ clang-cl -O2 -gdwarf -gline-tables-only 
/clang:-fdebug-info-for-profiling /clang:-funique-internal-linkage-names 
code.cc -o code -fuse-ld=lld -link -debug:dwarf
 
 2. Run the executable under a sampling profiler. The specific profiler
you use does not really matter, as long as its output can be converted
into the format that the LLVM optimizer understands.
 
-   Two such profilers are the the Linux Perf profiler
+   Two such profilers are the Linux Perf profiler
(https://perf.wiki.kernel.org/) and Intel's Sampling Enabling Product (SEP),
available as part of `Intel VTune

`_.
@@ -2477,7 +2479,9 @@ usual build cycle when using sample profilers for 
optimization:
 
.. code-block:: console
 
- $ perf record -b ./code
+ $ perf record -b -e BR_INST_RETIRED.NEAR_TAKEN:uppp ./code
+
+   If the event above is unavailable, ``branches:u`` is probably next-best.
 
Note the use of the ``-b`` flag. This tells Perf to use the Last Branch
Record (LBR) to record call chains. While this is not strictly required,

___
cfe-commits mailing list
cfe-commits@lists.llvm.org
https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits


[clang] [llvm] Update documentation and release notes for llvm-profgen COFF support (PR #84864)

2024-03-21 Thread Tim Creech via cfe-commits

tcreech-intel wrote:

> please resolve the conflicts.

Thanks for the review, @williamweixiao. I've resolved the conflicts in 2312821.

https://github.com/llvm/llvm-project/pull/84864
___
cfe-commits mailing list
cfe-commits@lists.llvm.org
https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits


[clang] [llvm] Update documentation and release notes for llvm-profgen COFF support (PR #84864)

2024-03-21 Thread Tim Creech via cfe-commits

https://github.com/tcreech-intel updated 
https://github.com/llvm/llvm-project/pull/84864

>From 4dc108d0d290ee5fd6a73c029c051fdb2215d00a Mon Sep 17 00:00:00 2001
From: Tim Creech 
Date: Mon, 11 Mar 2024 22:35:59 -0400
Subject: [PATCH 1/5] Update documentation and release notes for llvm-profgen
 COFF support

This change:
- Updates the existing Clang User's Manual section on SPGO so that it
  describes how to use llvm-profgen to perform SPGO on Windows. This is
  new functionality implemented in #83972.
- Fixes a minor typo in the existing llvm-profgen invocation example.
- Adds an LLVM release note on this new functionality in llvm-profgen.
---
 clang/docs/UsersManual.rst | 47 +++---
 llvm/docs/ReleaseNotes.rst |  5 
 2 files changed, 44 insertions(+), 8 deletions(-)

diff --git a/clang/docs/UsersManual.rst b/clang/docs/UsersManual.rst
index 7391e4cf3a9aeb..9cf313c3727125 100644
--- a/clang/docs/UsersManual.rst
+++ b/clang/docs/UsersManual.rst
@@ -2410,20 +2410,35 @@ usual build cycle when using sample profilers for 
optimization:
 
 1. Build the code with source line table information. You can use all the
usual build flags that you always build your application with. The only
-   requirement is that you add ``-gline-tables-only`` or ``-g`` to the
-   command line. This is important for the profiler to be able to map
-   instructions back to source line locations.
+   requirement is that DWARF debug info including source line information is
+   generated. This DWARF information is important for the profiler to be able
+   to map instructions back to source line locations.
+
+   On Linux, ``-g`` or just ``-gline-tables-only`` is sufficient:
 
.. code-block:: console
 
  $ clang++ -O2 -gline-tables-only code.cc -o code
 
+   It is also possible to include DWARF in Windows binaries:
+
+   .. code-block:: console
+
+ $ clang-cl -O2 -gdwarf -gline-tables-only coff-profile.cpp -fuse-ld=lld 
-link -debug:dwarf
+
 2. Run the executable under a sampling profiler. The specific profiler
you use does not really matter, as long as its output can be converted
-   into the format that the LLVM optimizer understands. Currently, there
-   exists a conversion tool for the Linux Perf profiler
-   (https://perf.wiki.kernel.org/), so these examples assume that you
-   are using Linux Perf to profile your code.
+   into the format that the LLVM optimizer understands.
+
+   Two such profilers are the the Linux Perf profiler
+   (https://perf.wiki.kernel.org/) and Intel's Sampling Enabling Product (SEP),
+   available as part of `Intel VTune
+   
`_.
+
+   The LLVM tool ``llvm-profgen`` can convert output of either Perf or SEP. An
+   external tool, AutoFDO, also supports Linux Perf output.
+
+   When using Perf:
 
.. code-block:: console
 
@@ -2434,6 +2449,15 @@ usual build cycle when using sample profilers for 
optimization:
it provides better call information, which improves the accuracy of
the profile data.
 
+   When using SEP:
+
+   .. code-block:: console
+
+ $ sep -start -ec BR_INST_RETIRED.NEAR_TAKEN:precise=yes:pdir -lbr 
no_filter:usr -perf-script ip,brstack -app ./code
+
+   This produces a ``perf.data.script`` output which can be used with
+   ``llvm-profgen``'s ``--perfscript`` input option.
+
 3. Convert the collected profile data to LLVM's sample profile format.
This is currently supported via the AutoFDO converter ``create_llvm_prof``.
It is available at https://github.com/google/autofdo. Once built and
@@ -2454,7 +2478,14 @@ usual build cycle when using sample profilers for 
optimization:
 
.. code-block:: console
 
- $ llvm-profgen --binary=./code --output=code.prof--perfdata=perf.data
+ $ llvm-profgen --binary=./code --output=code.prof --perfdata=perf.data
+
+   When using SEP the output is in the textual format corresponding to
+   `llvm-profgen --perfscript`. For example:
+
+   .. code-block:: console
+
+ $ llvm-profgen --binary=./code --output=code.prof 
--perfscript=perf.data.script
 
 
 4. Build the code again using the collected profile. This step feeds
diff --git a/llvm/docs/ReleaseNotes.rst b/llvm/docs/ReleaseNotes.rst
index b34a5f31c5eb0a..c2bbc647bc18e6 100644
--- a/llvm/docs/ReleaseNotes.rst
+++ b/llvm/docs/ReleaseNotes.rst
@@ -157,6 +157,11 @@ Changes to the LLVM tools
   ``--set-symbols-visibility`` options for ELF input to change the
   visibility of symbols.
 
+* llvm-profgen now supports COFF+DWARF binaries. This enables Sample-based PGO
+  on Windows using Intel VTune's SEP. For details on usage, see the `end-user
+  documentation for SPGO
+  `_.
+
 Changes to LLDB
 -
 

>From 53f4c5dc84d71fd4efa5384818ecfc3401a0e7f6 Mon Sep 17 00:00:00 2001
From: Tim Creech 
Date: Tue, 12 Mar 2024 09:14:27 -0400

[clang] [llvm] Update documentation and release notes for llvm-profgen COFF support (PR #84864)

2024-03-12 Thread Tim Creech via cfe-commits


@@ -2454,7 +2481,14 @@ usual build cycle when using sample profilers for 
optimization:
 
.. code-block:: console
 
- $ llvm-profgen --binary=./code --output=code.prof--perfdata=perf.data
+ $ llvm-profgen --binary=./code --output=code.prof --perfdata=perf.data
+
+   When using SEP the output is in the textual format corresponding to
+   ``llvm-profgen --perfscript``. For example:
+
+   .. code-block:: console
+
+ $ llvm-profgen --binary=./code --output=code.prof 
--perfscript=perf.data.script

tcreech-intel wrote:

Thanks. Fixed.

https://github.com/llvm/llvm-project/pull/84864
___
cfe-commits mailing list
cfe-commits@lists.llvm.org
https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits


[clang] [llvm] Update documentation and release notes for llvm-profgen COFF support (PR #84864)

2024-03-12 Thread Tim Creech via cfe-commits

https://github.com/tcreech-intel updated 
https://github.com/llvm/llvm-project/pull/84864

>From 4dc108d0d290ee5fd6a73c029c051fdb2215d00a Mon Sep 17 00:00:00 2001
From: Tim Creech 
Date: Mon, 11 Mar 2024 22:35:59 -0400
Subject: [PATCH 1/5] Update documentation and release notes for llvm-profgen
 COFF support

This change:
- Updates the existing Clang User's Manual section on SPGO so that it
  describes how to use llvm-profgen to perform SPGO on Windows. This is
  new functionality implemented in #83972.
- Fixes a minor typo in the existing llvm-profgen invocation example.
- Adds an LLVM release note on this new functionality in llvm-profgen.
---
 clang/docs/UsersManual.rst | 47 +++---
 llvm/docs/ReleaseNotes.rst |  5 
 2 files changed, 44 insertions(+), 8 deletions(-)

diff --git a/clang/docs/UsersManual.rst b/clang/docs/UsersManual.rst
index 7391e4cf3a9aeb..9cf313c3727125 100644
--- a/clang/docs/UsersManual.rst
+++ b/clang/docs/UsersManual.rst
@@ -2410,20 +2410,35 @@ usual build cycle when using sample profilers for 
optimization:
 
 1. Build the code with source line table information. You can use all the
usual build flags that you always build your application with. The only
-   requirement is that you add ``-gline-tables-only`` or ``-g`` to the
-   command line. This is important for the profiler to be able to map
-   instructions back to source line locations.
+   requirement is that DWARF debug info including source line information is
+   generated. This DWARF information is important for the profiler to be able
+   to map instructions back to source line locations.
+
+   On Linux, ``-g`` or just ``-gline-tables-only`` is sufficient:
 
.. code-block:: console
 
  $ clang++ -O2 -gline-tables-only code.cc -o code
 
+   It is also possible to include DWARF in Windows binaries:
+
+   .. code-block:: console
+
+ $ clang-cl -O2 -gdwarf -gline-tables-only coff-profile.cpp -fuse-ld=lld 
-link -debug:dwarf
+
 2. Run the executable under a sampling profiler. The specific profiler
you use does not really matter, as long as its output can be converted
-   into the format that the LLVM optimizer understands. Currently, there
-   exists a conversion tool for the Linux Perf profiler
-   (https://perf.wiki.kernel.org/), so these examples assume that you
-   are using Linux Perf to profile your code.
+   into the format that the LLVM optimizer understands.
+
+   Two such profilers are the the Linux Perf profiler
+   (https://perf.wiki.kernel.org/) and Intel's Sampling Enabling Product (SEP),
+   available as part of `Intel VTune
+   
`_.
+
+   The LLVM tool ``llvm-profgen`` can convert output of either Perf or SEP. An
+   external tool, AutoFDO, also supports Linux Perf output.
+
+   When using Perf:
 
.. code-block:: console
 
@@ -2434,6 +2449,15 @@ usual build cycle when using sample profilers for 
optimization:
it provides better call information, which improves the accuracy of
the profile data.
 
+   When using SEP:
+
+   .. code-block:: console
+
+ $ sep -start -ec BR_INST_RETIRED.NEAR_TAKEN:precise=yes:pdir -lbr 
no_filter:usr -perf-script ip,brstack -app ./code
+
+   This produces a ``perf.data.script`` output which can be used with
+   ``llvm-profgen``'s ``--perfscript`` input option.
+
 3. Convert the collected profile data to LLVM's sample profile format.
This is currently supported via the AutoFDO converter ``create_llvm_prof``.
It is available at https://github.com/google/autofdo. Once built and
@@ -2454,7 +2478,14 @@ usual build cycle when using sample profilers for 
optimization:
 
.. code-block:: console
 
- $ llvm-profgen --binary=./code --output=code.prof--perfdata=perf.data
+ $ llvm-profgen --binary=./code --output=code.prof --perfdata=perf.data
+
+   When using SEP the output is in the textual format corresponding to
+   `llvm-profgen --perfscript`. For example:
+
+   .. code-block:: console
+
+ $ llvm-profgen --binary=./code --output=code.prof 
--perfscript=perf.data.script
 
 
 4. Build the code again using the collected profile. This step feeds
diff --git a/llvm/docs/ReleaseNotes.rst b/llvm/docs/ReleaseNotes.rst
index b34a5f31c5eb0a..c2bbc647bc18e6 100644
--- a/llvm/docs/ReleaseNotes.rst
+++ b/llvm/docs/ReleaseNotes.rst
@@ -157,6 +157,11 @@ Changes to the LLVM tools
   ``--set-symbols-visibility`` options for ELF input to change the
   visibility of symbols.
 
+* llvm-profgen now supports COFF+DWARF binaries. This enables Sample-based PGO
+  on Windows using Intel VTune's SEP. For details on usage, see the `end-user
+  documentation for SPGO
+  `_.
+
 Changes to LLDB
 -
 

>From 53f4c5dc84d71fd4efa5384818ecfc3401a0e7f6 Mon Sep 17 00:00:00 2001
From: Tim Creech 
Date: Tue, 12 Mar 2024 09:14:27 -0400

[clang] [llvm] Update documentation and release notes for llvm-profgen COFF support (PR #84864)

2024-03-12 Thread Tim Creech via cfe-commits

https://github.com/tcreech-intel ready_for_review 
https://github.com/llvm/llvm-project/pull/84864
___
cfe-commits mailing list
cfe-commits@lists.llvm.org
https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits


[clang] [llvm] Update documentation and release notes for llvm-profgen COFF support (PR #84864)

2024-03-12 Thread Tim Creech via cfe-commits


@@ -2410,20 +2410,35 @@ usual build cycle when using sample profilers for 
optimization:
 
 1. Build the code with source line table information. You can use all the
usual build flags that you always build your application with. The only
-   requirement is that you add ``-gline-tables-only`` or ``-g`` to the
-   command line. This is important for the profiler to be able to map
-   instructions back to source line locations.
+   requirement is that DWARF debug info including source line information is
+   generated. This DWARF information is important for the profiler to be able
+   to map instructions back to source line locations.
+
+   On Linux, ``-g`` or just ``-gline-tables-only`` is sufficient:
 
.. code-block:: console
 
  $ clang++ -O2 -gline-tables-only code.cc -o code
 
+   It is also possible to include DWARF in Windows binaries:

tcreech-intel wrote:

I've expanded on this a little in 
[a5e879c](https://github.com/llvm/llvm-project/pull/84864/commits/a5e879ce5016fee9cf3109bb9fc7785c396ac509).

https://github.com/llvm/llvm-project/pull/84864
___
cfe-commits mailing list
cfe-commits@lists.llvm.org
https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits


[clang] [llvm] Update documentation and release notes for llvm-profgen COFF support (PR #84864)

2024-03-12 Thread Tim Creech via cfe-commits


@@ -2410,20 +2410,35 @@ usual build cycle when using sample profilers for 
optimization:
 
 1. Build the code with source line table information. You can use all the
usual build flags that you always build your application with. The only
-   requirement is that you add ``-gline-tables-only`` or ``-g`` to the
-   command line. This is important for the profiler to be able to map
-   instructions back to source line locations.
+   requirement is that DWARF debug info including source line information is
+   generated. This DWARF information is important for the profiler to be able
+   to map instructions back to source line locations.
+
+   On Linux, ``-g`` or just ``-gline-tables-only`` is sufficient:
 
.. code-block:: console
 
  $ clang++ -O2 -gline-tables-only code.cc -o code
 
+   It is also possible to include DWARF in Windows binaries:
+
+   .. code-block:: console
+
+ $ clang-cl -O2 -gdwarf -gline-tables-only coff-profile.cpp -fuse-ld=lld 
-link -debug:dwarf
+
 2. Run the executable under a sampling profiler. The specific profiler
you use does not really matter, as long as its output can be converted
-   into the format that the LLVM optimizer understands. Currently, there
-   exists a conversion tool for the Linux Perf profiler
-   (https://perf.wiki.kernel.org/), so these examples assume that you
-   are using Linux Perf to profile your code.
+   into the format that the LLVM optimizer understands.
+
+   Two such profilers are the the Linux Perf profiler
+   (https://perf.wiki.kernel.org/) and Intel's Sampling Enabling Product (SEP),
+   available as part of `Intel VTune
+   
`_.
+
+   The LLVM tool ``llvm-profgen`` can convert output of either Perf or SEP. An
+   external tool, AutoFDO, also supports Linux Perf output.

tcreech-intel wrote:

Yes, probably a good idea to include a link here now that it's the first 
mention of AutoFDO. Fixed in 
[a5e879c](https://github.com/llvm/llvm-project/pull/84864/commits/a5e879ce5016fee9cf3109bb9fc7785c396ac509).

https://github.com/llvm/llvm-project/pull/84864
___
cfe-commits mailing list
cfe-commits@lists.llvm.org
https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits


[clang] [llvm] Update documentation and release notes for llvm-profgen COFF support (PR #84864)

2024-03-12 Thread Tim Creech via cfe-commits


@@ -2410,20 +2410,35 @@ usual build cycle when using sample profilers for 
optimization:
 
 1. Build the code with source line table information. You can use all the
usual build flags that you always build your application with. The only
-   requirement is that you add ``-gline-tables-only`` or ``-g`` to the
-   command line. This is important for the profiler to be able to map
-   instructions back to source line locations.
+   requirement is that DWARF debug info including source line information is
+   generated. This DWARF information is important for the profiler to be able
+   to map instructions back to source line locations.
+
+   On Linux, ``-g`` or just ``-gline-tables-only`` is sufficient:
 
.. code-block:: console
 
  $ clang++ -O2 -gline-tables-only code.cc -o code
 
+   It is also possible to include DWARF in Windows binaries:
+
+   .. code-block:: console
+
+ $ clang-cl -O2 -gdwarf -gline-tables-only coff-profile.cpp -fuse-ld=lld 
-link -debug:dwarf
+
 2. Run the executable under a sampling profiler. The specific profiler
you use does not really matter, as long as its output can be converted
-   into the format that the LLVM optimizer understands. Currently, there
-   exists a conversion tool for the Linux Perf profiler
-   (https://perf.wiki.kernel.org/), so these examples assume that you
-   are using Linux Perf to profile your code.
+   into the format that the LLVM optimizer understands.
+
+   Two such profilers are the the Linux Perf profiler
+   (https://perf.wiki.kernel.org/) and Intel's Sampling Enabling Product (SEP),

tcreech-intel wrote:

Thanks -- I've made this more clear in 
[a5e879c](https://github.com/llvm/llvm-project/pull/84864/commits/a5e879ce5016fee9cf3109bb9fc7785c396ac509).

https://github.com/llvm/llvm-project/pull/84864
___
cfe-commits mailing list
cfe-commits@lists.llvm.org
https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits


[clang] [llvm] Update documentation and release notes for llvm-profgen COFF support (PR #84864)

2024-03-12 Thread Tim Creech via cfe-commits


@@ -2434,6 +2449,15 @@ usual build cycle when using sample profilers for 
optimization:
it provides better call information, which improves the accuracy of
the profile data.
 
+   When using SEP:
+
+   .. code-block:: console
+
+ $ sep -start -ec BR_INST_RETIRED.NEAR_TAKEN:precise=yes:pdir -lbr 
no_filter:usr -perf-script ip,brstack -app ./code

tcreech-intel wrote:

Fixed in 
[53f4c5d](https://github.com/llvm/llvm-project/pull/84864/commits/53f4c5dc84d71fd4efa5384818ecfc3401a0e7f6).

https://github.com/llvm/llvm-project/pull/84864
___
cfe-commits mailing list
cfe-commits@lists.llvm.org
https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits


[clang] [llvm] Update documentation and release notes for llvm-profgen COFF support (PR #84864)

2024-03-12 Thread Tim Creech via cfe-commits

https://github.com/tcreech-intel updated 
https://github.com/llvm/llvm-project/pull/84864

>From 4dc108d0d290ee5fd6a73c029c051fdb2215d00a Mon Sep 17 00:00:00 2001
From: Tim Creech 
Date: Mon, 11 Mar 2024 22:35:59 -0400
Subject: [PATCH 1/4] Update documentation and release notes for llvm-profgen
 COFF support

This change:
- Updates the existing Clang User's Manual section on SPGO so that it
  describes how to use llvm-profgen to perform SPGO on Windows. This is
  new functionality implemented in #83972.
- Fixes a minor typo in the existing llvm-profgen invocation example.
- Adds an LLVM release note on this new functionality in llvm-profgen.
---
 clang/docs/UsersManual.rst | 47 +++---
 llvm/docs/ReleaseNotes.rst |  5 
 2 files changed, 44 insertions(+), 8 deletions(-)

diff --git a/clang/docs/UsersManual.rst b/clang/docs/UsersManual.rst
index 7391e4cf3a9aeb..9cf313c3727125 100644
--- a/clang/docs/UsersManual.rst
+++ b/clang/docs/UsersManual.rst
@@ -2410,20 +2410,35 @@ usual build cycle when using sample profilers for 
optimization:
 
 1. Build the code with source line table information. You can use all the
usual build flags that you always build your application with. The only
-   requirement is that you add ``-gline-tables-only`` or ``-g`` to the
-   command line. This is important for the profiler to be able to map
-   instructions back to source line locations.
+   requirement is that DWARF debug info including source line information is
+   generated. This DWARF information is important for the profiler to be able
+   to map instructions back to source line locations.
+
+   On Linux, ``-g`` or just ``-gline-tables-only`` is sufficient:
 
.. code-block:: console
 
  $ clang++ -O2 -gline-tables-only code.cc -o code
 
+   It is also possible to include DWARF in Windows binaries:
+
+   .. code-block:: console
+
+ $ clang-cl -O2 -gdwarf -gline-tables-only coff-profile.cpp -fuse-ld=lld 
-link -debug:dwarf
+
 2. Run the executable under a sampling profiler. The specific profiler
you use does not really matter, as long as its output can be converted
-   into the format that the LLVM optimizer understands. Currently, there
-   exists a conversion tool for the Linux Perf profiler
-   (https://perf.wiki.kernel.org/), so these examples assume that you
-   are using Linux Perf to profile your code.
+   into the format that the LLVM optimizer understands.
+
+   Two such profilers are the the Linux Perf profiler
+   (https://perf.wiki.kernel.org/) and Intel's Sampling Enabling Product (SEP),
+   available as part of `Intel VTune
+   
`_.
+
+   The LLVM tool ``llvm-profgen`` can convert output of either Perf or SEP. An
+   external tool, AutoFDO, also supports Linux Perf output.
+
+   When using Perf:
 
.. code-block:: console
 
@@ -2434,6 +2449,15 @@ usual build cycle when using sample profilers for 
optimization:
it provides better call information, which improves the accuracy of
the profile data.
 
+   When using SEP:
+
+   .. code-block:: console
+
+ $ sep -start -ec BR_INST_RETIRED.NEAR_TAKEN:precise=yes:pdir -lbr 
no_filter:usr -perf-script ip,brstack -app ./code
+
+   This produces a ``perf.data.script`` output which can be used with
+   ``llvm-profgen``'s ``--perfscript`` input option.
+
 3. Convert the collected profile data to LLVM's sample profile format.
This is currently supported via the AutoFDO converter ``create_llvm_prof``.
It is available at https://github.com/google/autofdo. Once built and
@@ -2454,7 +2478,14 @@ usual build cycle when using sample profilers for 
optimization:
 
.. code-block:: console
 
- $ llvm-profgen --binary=./code --output=code.prof--perfdata=perf.data
+ $ llvm-profgen --binary=./code --output=code.prof --perfdata=perf.data
+
+   When using SEP the output is in the textual format corresponding to
+   `llvm-profgen --perfscript`. For example:
+
+   .. code-block:: console
+
+ $ llvm-profgen --binary=./code --output=code.prof 
--perfscript=perf.data.script
 
 
 4. Build the code again using the collected profile. This step feeds
diff --git a/llvm/docs/ReleaseNotes.rst b/llvm/docs/ReleaseNotes.rst
index b34a5f31c5eb0a..c2bbc647bc18e6 100644
--- a/llvm/docs/ReleaseNotes.rst
+++ b/llvm/docs/ReleaseNotes.rst
@@ -157,6 +157,11 @@ Changes to the LLVM tools
   ``--set-symbols-visibility`` options for ELF input to change the
   visibility of symbols.
 
+* llvm-profgen now supports COFF+DWARF binaries. This enables Sample-based PGO
+  on Windows using Intel VTune's SEP. For details on usage, see the `end-user
+  documentation for SPGO
+  `_.
+
 Changes to LLDB
 -
 

>From 53f4c5dc84d71fd4efa5384818ecfc3401a0e7f6 Mon Sep 17 00:00:00 2001
From: Tim Creech 
Date: Tue, 12 Mar 2024 09:14:27 -0400

[clang] [llvm] Update documentation and release notes for llvm-profgen COFF support (PR #84864)

2024-03-12 Thread Tim Creech via cfe-commits


@@ -2434,6 +2449,15 @@ usual build cycle when using sample profilers for 
optimization:
it provides better call information, which improves the accuracy of
the profile data.
 
+   When using SEP:
+
+   .. code-block:: console
+
+ $ sep -start -ec BR_INST_RETIRED.NEAR_TAKEN:precise=yes:pdir -lbr 
no_filter:usr -perf-script ip,brstack -app ./code

tcreech-intel wrote:

I had omitted `-out` for the sake of brevity, but maybe this is confusing. I'll 
update it and name a specific perf.data.script file below.

https://github.com/llvm/llvm-project/pull/84864
___
cfe-commits mailing list
cfe-commits@lists.llvm.org
https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits


[clang] [llvm] Update documentation and release notes for llvm-profgen COFF support (PR #84864)

2024-03-11 Thread Tim Creech via cfe-commits

https://github.com/tcreech-intel created 
https://github.com/llvm/llvm-project/pull/84864

This change:
- Updates the existing Clang User's Manual section on SPGO so that it describes 
how to use llvm-profgen to perform SPGO on Windows. This is new functionality 
implemented in #83972.
- Fixes a minor typo in the existing llvm-profgen invocation example.
- Adds an LLVM release note on this new functionality in llvm-profgen.

>From 4dc108d0d290ee5fd6a73c029c051fdb2215d00a Mon Sep 17 00:00:00 2001
From: Tim Creech 
Date: Mon, 11 Mar 2024 22:35:59 -0400
Subject: [PATCH] Update documentation and release notes for llvm-profgen COFF
 support

This change:
- Updates the existing Clang User's Manual section on SPGO so that it
  describes how to use llvm-profgen to perform SPGO on Windows. This is
  new functionality implemented in #83972.
- Fixes a minor typo in the existing llvm-profgen invocation example.
- Adds an LLVM release note on this new functionality in llvm-profgen.
---
 clang/docs/UsersManual.rst | 47 +++---
 llvm/docs/ReleaseNotes.rst |  5 
 2 files changed, 44 insertions(+), 8 deletions(-)

diff --git a/clang/docs/UsersManual.rst b/clang/docs/UsersManual.rst
index 7391e4cf3a9aeb..9cf313c3727125 100644
--- a/clang/docs/UsersManual.rst
+++ b/clang/docs/UsersManual.rst
@@ -2410,20 +2410,35 @@ usual build cycle when using sample profilers for 
optimization:
 
 1. Build the code with source line table information. You can use all the
usual build flags that you always build your application with. The only
-   requirement is that you add ``-gline-tables-only`` or ``-g`` to the
-   command line. This is important for the profiler to be able to map
-   instructions back to source line locations.
+   requirement is that DWARF debug info including source line information is
+   generated. This DWARF information is important for the profiler to be able
+   to map instructions back to source line locations.
+
+   On Linux, ``-g`` or just ``-gline-tables-only`` is sufficient:
 
.. code-block:: console
 
  $ clang++ -O2 -gline-tables-only code.cc -o code
 
+   It is also possible to include DWARF in Windows binaries:
+
+   .. code-block:: console
+
+ $ clang-cl -O2 -gdwarf -gline-tables-only coff-profile.cpp -fuse-ld=lld 
-link -debug:dwarf
+
 2. Run the executable under a sampling profiler. The specific profiler
you use does not really matter, as long as its output can be converted
-   into the format that the LLVM optimizer understands. Currently, there
-   exists a conversion tool for the Linux Perf profiler
-   (https://perf.wiki.kernel.org/), so these examples assume that you
-   are using Linux Perf to profile your code.
+   into the format that the LLVM optimizer understands.
+
+   Two such profilers are the the Linux Perf profiler
+   (https://perf.wiki.kernel.org/) and Intel's Sampling Enabling Product (SEP),
+   available as part of `Intel VTune
+   
`_.
+
+   The LLVM tool ``llvm-profgen`` can convert output of either Perf or SEP. An
+   external tool, AutoFDO, also supports Linux Perf output.
+
+   When using Perf:
 
.. code-block:: console
 
@@ -2434,6 +2449,15 @@ usual build cycle when using sample profilers for 
optimization:
it provides better call information, which improves the accuracy of
the profile data.
 
+   When using SEP:
+
+   .. code-block:: console
+
+ $ sep -start -ec BR_INST_RETIRED.NEAR_TAKEN:precise=yes:pdir -lbr 
no_filter:usr -perf-script ip,brstack -app ./code
+
+   This produces a ``perf.data.script`` output which can be used with
+   ``llvm-profgen``'s ``--perfscript`` input option.
+
 3. Convert the collected profile data to LLVM's sample profile format.
This is currently supported via the AutoFDO converter ``create_llvm_prof``.
It is available at https://github.com/google/autofdo. Once built and
@@ -2454,7 +2478,14 @@ usual build cycle when using sample profilers for 
optimization:
 
.. code-block:: console
 
- $ llvm-profgen --binary=./code --output=code.prof--perfdata=perf.data
+ $ llvm-profgen --binary=./code --output=code.prof --perfdata=perf.data
+
+   When using SEP the output is in the textual format corresponding to
+   `llvm-profgen --perfscript`. For example:
+
+   .. code-block:: console
+
+ $ llvm-profgen --binary=./code --output=code.prof 
--perfscript=perf.data.script
 
 
 4. Build the code again using the collected profile. This step feeds
diff --git a/llvm/docs/ReleaseNotes.rst b/llvm/docs/ReleaseNotes.rst
index b34a5f31c5eb0a..c2bbc647bc18e6 100644
--- a/llvm/docs/ReleaseNotes.rst
+++ b/llvm/docs/ReleaseNotes.rst
@@ -157,6 +157,11 @@ Changes to the LLVM tools
   ``--set-symbols-visibility`` options for ELF input to change the
   visibility of symbols.
 
+* llvm-profgen now supports COFF+DWARF binaries. This enables Sample-based PGO
+  on Windows using Intel VTune's