Re: [PATCH 6/7] lto: squash order of symbols in partitions

2024-05-14 Thread Jan Hubicka
> This patch squashes order of symbols in individual partitions, so that
> their relative order is conserved, but is not influenced by symbols in
> other partitions.
> Order of cloned symbols is set to 0. This should be fine because order
> specifies order of symbols in input files, which cloned symbols are not
> part of.

The current use of order is somewhat broken (after converting cgraph to
C++, that is a while).
The original code was setting order at the time function was finalized,
which made them to be output in same order as the bodies appear in
source code (with -fno-toplevel-reorder build at least).

With this logic the clones should have same order as originals, so they
appear next to tihem.

Later initialization of order was moved to register_symbol that
is king of wrong since frontends are allowed to produce symbols early.
So it would be nice to fix this problem and make sure that order of
clons is sane.

I guess this is bit of independent of the rest of caching, so maybe we
can first get the other patches in and then worry about order?
> 
> This is important for incremental LTO because if there is a new symbol,
> it otherwise shifts order of all symbols with higher order, which would
> diverge them all.
> 
> Bootstrapped/regtested on x86_64-pc-linux-gnu
> 
> gcc/ChangeLog:
> 
>   * lto-cgraph.cc (lto_output_node): Add and use order_remap.
>   (lto_output_varpool_node): Likewise.
>   (output_symtab): Likewise.
>   * lto-streamer-out.cc (produce_asm): Likewise.
>   (output_function): Likewise.
>   (output_constructor): Likewise.
>   (copy_function_or_variable): Likewise.
>   (cmp_int): New.
>   (lto_output): Generate order_remap.
>   * lto-streamer.h (produce_asm): Add order_remap.
>   (output_symtab): Likewise.
> ---
>  gcc/lto-cgraph.cc   | 20 
>  gcc/lto-streamer-out.cc | 71 +
>  gcc/lto-streamer.h  |  5 +--
>  3 files changed, 73 insertions(+), 23 deletions(-)
> 
> diff --git a/gcc/lto-cgraph.cc b/gcc/lto-cgraph.cc
> index 32c0f5ac6db..a7530290fba 100644
> --- a/gcc/lto-cgraph.cc
> +++ b/gcc/lto-cgraph.cc
> @@ -381,7 +381,8 @@ reachable_from_this_partition_p (struct cgraph_node 
> *node, lto_symtab_encoder_t
>  
>  static void
>  lto_output_node (struct lto_simple_output_block *ob, struct cgraph_node 
> *node,
> -  lto_symtab_encoder_t encoder)
> +  lto_symtab_encoder_t encoder,
> +  hash_map, int>* order_remap)
>  {
>unsigned int tag;
>struct bitpack_d bp;
> @@ -405,7 +406,9 @@ lto_output_node (struct lto_simple_output_block *ob, 
> struct cgraph_node *node,
>  
>streamer_write_enum (ob->main_stream, LTO_symtab_tags, LTO_symtab_last_tag,
>  tag);
> -  streamer_write_hwi_stream (ob->main_stream, node->order);
> +
> +  int order = flag_wpa ? *order_remap->get (node->order) : node->order;
> +  streamer_write_hwi_stream (ob->main_stream, order);
>  
>/* In WPA mode, we only output part of the call-graph.  Also, we
>   fake cgraph node attributes.  There are two cases that we care.
> @@ -585,7 +588,8 @@ lto_output_node (struct lto_simple_output_block *ob, 
> struct cgraph_node *node,
>  
>  static void
>  lto_output_varpool_node (struct lto_simple_output_block *ob, varpool_node 
> *node,
> -  lto_symtab_encoder_t encoder)
> +  lto_symtab_encoder_t encoder,
> +  hash_map, int>* order_remap)
>  {
>bool boundary_p = !lto_symtab_encoder_in_partition_p (encoder, node);
>bool encode_initializer_p
> @@ -602,7 +606,8 @@ lto_output_varpool_node (struct lto_simple_output_block 
> *ob, varpool_node *node,
>  
>streamer_write_enum (ob->main_stream, LTO_symtab_tags, LTO_symtab_last_tag,
>  LTO_symtab_variable);
> -  streamer_write_hwi_stream (ob->main_stream, node->order);
> +  int order = flag_wpa ? *order_remap->get (node->order) : node->order;
> +  streamer_write_hwi_stream (ob->main_stream, order);
>lto_output_var_decl_ref (ob->decl_state, ob->main_stream, node->decl);
>bp = bitpack_create (ob->main_stream);
>bp_pack_value (&bp, node->externally_visible, 1);
> @@ -967,7 +972,7 @@ compute_ltrans_boundary (lto_symtab_encoder_t in_encoder)
>  /* Output the part of the symtab in SET and VSET.  */
>  
>  void
> -output_symtab (void)
> +output_symtab (hash_map, int>* order_remap)
>  {
>struct cgraph_node *node;
>struct lto_simple_output_block *ob;
> @@ -994,9 +999,10 @@ output_symtab (void)
>  {
>symtab_node *node = lto_symtab_encoder_deref (encoder, i);
>if (cgraph_node *cnode = dyn_cast  (node))
> -lto_output_node (ob, cnode, encoder);
> + lto_output_node (ob, cnode, encoder, order_remap);
>else
> - lto_output_varpool_node (ob, dyn_cast (node), encoder);
> + lto_output_varpool_node (ob, dyn_cast (node), encoder,
> +  order_remap);
>  }
>  
>  

[PATCH 6/7] lto: squash order of symbols in partitions

2023-11-17 Thread Michal Jires
This patch squashes order of symbols in individual partitions, so that
their relative order is conserved, but is not influenced by symbols in
other partitions.
Order of cloned symbols is set to 0. This should be fine because order
specifies order of symbols in input files, which cloned symbols are not
part of.

This is important for incremental LTO because if there is a new symbol,
it otherwise shifts order of all symbols with higher order, which would
diverge them all.

Bootstrapped/regtested on x86_64-pc-linux-gnu

gcc/ChangeLog:

* lto-cgraph.cc (lto_output_node): Add and use order_remap.
(lto_output_varpool_node): Likewise.
(output_symtab): Likewise.
* lto-streamer-out.cc (produce_asm): Likewise.
(output_function): Likewise.
(output_constructor): Likewise.
(copy_function_or_variable): Likewise.
(cmp_int): New.
(lto_output): Generate order_remap.
* lto-streamer.h (produce_asm): Add order_remap.
(output_symtab): Likewise.
---
 gcc/lto-cgraph.cc   | 20 
 gcc/lto-streamer-out.cc | 71 +
 gcc/lto-streamer.h  |  5 +--
 3 files changed, 73 insertions(+), 23 deletions(-)

diff --git a/gcc/lto-cgraph.cc b/gcc/lto-cgraph.cc
index 32c0f5ac6db..a7530290fba 100644
--- a/gcc/lto-cgraph.cc
+++ b/gcc/lto-cgraph.cc
@@ -381,7 +381,8 @@ reachable_from_this_partition_p (struct cgraph_node *node, 
lto_symtab_encoder_t
 
 static void
 lto_output_node (struct lto_simple_output_block *ob, struct cgraph_node *node,
-lto_symtab_encoder_t encoder)
+lto_symtab_encoder_t encoder,
+hash_map, int>* order_remap)
 {
   unsigned int tag;
   struct bitpack_d bp;
@@ -405,7 +406,9 @@ lto_output_node (struct lto_simple_output_block *ob, struct 
cgraph_node *node,
 
   streamer_write_enum (ob->main_stream, LTO_symtab_tags, LTO_symtab_last_tag,
   tag);
-  streamer_write_hwi_stream (ob->main_stream, node->order);
+
+  int order = flag_wpa ? *order_remap->get (node->order) : node->order;
+  streamer_write_hwi_stream (ob->main_stream, order);
 
   /* In WPA mode, we only output part of the call-graph.  Also, we
  fake cgraph node attributes.  There are two cases that we care.
@@ -585,7 +588,8 @@ lto_output_node (struct lto_simple_output_block *ob, struct 
cgraph_node *node,
 
 static void
 lto_output_varpool_node (struct lto_simple_output_block *ob, varpool_node 
*node,
-lto_symtab_encoder_t encoder)
+lto_symtab_encoder_t encoder,
+hash_map, int>* order_remap)
 {
   bool boundary_p = !lto_symtab_encoder_in_partition_p (encoder, node);
   bool encode_initializer_p
@@ -602,7 +606,8 @@ lto_output_varpool_node (struct lto_simple_output_block 
*ob, varpool_node *node,
 
   streamer_write_enum (ob->main_stream, LTO_symtab_tags, LTO_symtab_last_tag,
   LTO_symtab_variable);
-  streamer_write_hwi_stream (ob->main_stream, node->order);
+  int order = flag_wpa ? *order_remap->get (node->order) : node->order;
+  streamer_write_hwi_stream (ob->main_stream, order);
   lto_output_var_decl_ref (ob->decl_state, ob->main_stream, node->decl);
   bp = bitpack_create (ob->main_stream);
   bp_pack_value (&bp, node->externally_visible, 1);
@@ -967,7 +972,7 @@ compute_ltrans_boundary (lto_symtab_encoder_t in_encoder)
 /* Output the part of the symtab in SET and VSET.  */
 
 void
-output_symtab (void)
+output_symtab (hash_map, int>* order_remap)
 {
   struct cgraph_node *node;
   struct lto_simple_output_block *ob;
@@ -994,9 +999,10 @@ output_symtab (void)
 {
   symtab_node *node = lto_symtab_encoder_deref (encoder, i);
   if (cgraph_node *cnode = dyn_cast  (node))
-lto_output_node (ob, cnode, encoder);
+   lto_output_node (ob, cnode, encoder, order_remap);
   else
-   lto_output_varpool_node (ob, dyn_cast (node), encoder);
+   lto_output_varpool_node (ob, dyn_cast (node), encoder,
+order_remap);
 }
 
   /* Go over the nodes in SET again to write edges.  */
diff --git a/gcc/lto-streamer-out.cc b/gcc/lto-streamer-out.cc
index a1bbea8fc68..9448ab195d5 100644
--- a/gcc/lto-streamer-out.cc
+++ b/gcc/lto-streamer-out.cc
@@ -2212,7 +2212,8 @@ output_cfg (struct output_block *ob, struct function *fn)
a function, set FN to the decl for that function.  */
 
 void
-produce_asm (struct output_block *ob, tree fn)
+produce_asm (struct output_block *ob, tree fn,
+hash_map, int>* order_remap)
 {
   enum lto_section_type section_type = ob->section_type;
   struct lto_function_header header;
@@ -2221,9 +,11 @@ produce_asm (struct output_block *ob, tree fn)
   if (section_type == LTO_section_function_body)
 {
   const char *name = IDENTIFIER_POINTER (DECL_ASSEMBLER_NAME (fn));
-  section_name = lto_get_section_name (section_type, name,
-