https://gcc.gnu.org/g:c731d295897ad0c1d0117068033e05954b9215ca
commit r16-6793-gc731d295897ad0c1d0117068033e05954b9215ca Author: Kugan Vivekanandarajah <[email protected]> Date: Thu Jan 15 14:40:32 2026 +1100 [Autofdo] Add hierarchical discriminator for loop unrolling Add hierarchical discriminator support for loop unrolling. Assigns multiplicity and copyid discriminators to distinguish unrolled iterations. gcc/ChangeLog: * cfgloopmanip.cc (duplicate_loop_body_to_header_edge): Assign hierarchical discriminators for loop unrolling. * cfgloopmanip.h (DLTHE_RECORD_HIERARCHICAL_DISCRIMINATOR): New flag. * tree-ssa-loop-ivcanon.cc (try_unroll_loop_completely): Pass flag to enable hierarchical discriminator assignment. (try_peel_loop): Likewise. gcc/testsuite/ChangeLog: * gcc.dg/hierarchical-discriminator-unroll.c: New test. Signed-off-by: Kugan Vivekanandarajah <[email protected]> Diff: --- gcc/cfgloopmanip.cc | 78 ++++++++++++++++++++++ gcc/cfgloopmanip.h | 4 ++ .../gcc.dg/hierarchical-discriminator-unroll.c | 37 ++++++++++ gcc/tree-ssa-loop-ivcanon.cc | 7 +- 4 files changed, 124 insertions(+), 2 deletions(-) diff --git a/gcc/cfgloopmanip.cc b/gcc/cfgloopmanip.cc index 81b64f7d063c..d8acbad6e55f 100644 --- a/gcc/cfgloopmanip.cc +++ b/gcc/cfgloopmanip.cc @@ -34,6 +34,7 @@ along with GCC; see the file COPYING3. If not see #include "sreal.h" #include "tree-cfg.h" #include "tree-pass.h" +#include "hierarchical_discriminator.h" static void copy_loops_to (class loop **, int, class loop *); @@ -1405,6 +1406,40 @@ duplicate_loop_body_to_header_edge (class loop *loop, edge e, spec_edges[SE_LATCH] = latch_edge; place_after = e->src; + location_t loop_loc = UNKNOWN_LOCATION; + unsigned int loop_copyid_base = 0; + + /* Find a location from the loop header - works for both GIMPLE and RTL. */ + if (current_ir_type () == IR_GIMPLE) + { + gimple *last = last_nondebug_stmt (loop->header); + loop_loc = last ? gimple_location (last) : UNKNOWN_LOCATION; + } + else + { + /* For RTL, try to find an instruction with a valid location. */ + rtx_insn *insn = BB_END (loop->header); + while (insn && insn != BB_HEAD (loop->header)) + { + /* Only check location if this is a valid insn. */ + if (INSN_P (insn)) + { + location_t loc = INSN_LOCATION (insn); + if (loc != UNKNOWN_LOCATION) + { + loop_loc = get_pure_location (loc); + break; + } + } + insn = PREV_INSN (insn); + } + } + + /* Allocate copyid base for this loop duplication - works for both + GIMPLE and RTL since allocator is per-function. */ + if (loop_loc != UNKNOWN_LOCATION) + loop_copyid_base = allocate_copyid_base (loop_loc, ndupl); + for (j = 0; j < ndupl; j++) { /* Copy loops. */ @@ -1422,6 +1457,49 @@ duplicate_loop_body_to_header_edge (class loop *loop, edge e, new_bbs[i]->aux = (void *)(size_t)(j + 1); } + /* Assign hierarchical discriminators to distinguish loop iterations. */ + if (flags & DLTHE_RECORD_HIERARCHICAL_DISCRIMINATOR + && loop_copyid_base > 0) + { + /* Calculate copyid for this iteration. */ + unsigned int copyid = loop_copyid_base + j; + if (copyid > DISCR_COPYID_MAX) + copyid = DISCR_COPYID_MAX; + + if (current_ir_type () == IR_GIMPLE) + { + /* Update all basic blocks created in this iteration. */ + for (i = 0; i < n; i++) + assign_discriminators_to_bb (new_bbs[i], 0, copyid); + } + else + { + /* For RTL, manually update instruction locations. */ + for (i = 0; i < n; i++) + { + basic_block bb = new_bbs[i]; + rtx_insn *insn; + + /* Iterate through all instructions in the block. */ + FOR_BB_INSNS (bb, insn) + { + if (INSN_HAS_LOCATION (insn)) + { + location_t loc = INSN_LOCATION (insn); + /* Get existing discriminator components. */ + discriminator_components comp + = get_discriminator_components_from_loc (loc); + comp.copyid = copyid; + + /* Apply hierarchical discriminator format. */ + INSN_LOCATION (insn) + = location_with_discriminator_components (loc, comp); + } + } + } + } + } + /* Note whether the blocks and edges belong to an irreducible loop. */ if (add_irreducible_flag) { diff --git a/gcc/cfgloopmanip.h b/gcc/cfgloopmanip.h index 8cc81e1c9d4b..396e6d69e8c4 100644 --- a/gcc/cfgloopmanip.h +++ b/gcc/cfgloopmanip.h @@ -34,6 +34,10 @@ enum a complete peeling. */ #define DLTHE_FLAG_FLAT_PROFILE 8 /* Profile is flat; do not reduce count by unroll factor. */ +#define DLTHE_RECORD_HIERARCHICAL_DISCRIMINATOR 16 /* Assign hierarchical + discriminators to + distinguish loop + iterations. */ extern edge mfb_kj_edge; extern bool remove_path (edge, bool * = NULL, bitmap = NULL); diff --git a/gcc/testsuite/gcc.dg/hierarchical-discriminator-unroll.c b/gcc/testsuite/gcc.dg/hierarchical-discriminator-unroll.c new file mode 100644 index 000000000000..c22baaeee38f --- /dev/null +++ b/gcc/testsuite/gcc.dg/hierarchical-discriminator-unroll.c @@ -0,0 +1,37 @@ +/* Test that loop unrolling assigns copyid discriminators. + { dg-do compile } + { dg-options "-S -O2 -g -fno-tree-vectorize" } */ + +int a[100]; +int +test_unroll (void) +{ + int sum = 0; + int i; + + /* Small fixed-count loop that should be completely unrolled */ + #pragma GCC unroll 4 + for (i = 0; i < 4; i++) + { + /* Each unrolled iteration gets a distinct copyid (1, 2, 3, 4) */ + asm ("nop"); + sum += a[i] * 2; + } + + return sum; +} + +/* Expected discriminators from the assembly (hierarchical format: [Base:8][Multiplicity:7][CopyID:11][Unused:6]): + Loop unrolling with ndupl=4: + - allocate_copyid_base(loc, 4) returns base=1 (first time) + - Iteration 0: copyid = 1+0 = 1, multiplicity=0 → 0|(0<<8)|(1<<15) = 32768 + - Iteration 1: copyid = 1+1 = 2, multiplicity=0 → 0|(0<<8)|(2<<15) = 65536 + - Iteration 2: copyid = 1+2 = 3, multiplicity=0 → 0|(0<<8)|(3<<15) = 98304 + - Iteration 3: copyid = 1+3 = 4, multiplicity=0 → 0|(0<<8)|(4<<15) = 131072 +*/ + +/* Each unrolled iteration should have a different discriminator */ +/* { dg-final { scan-assembler "\\.loc 1 17 7 is_stmt 0 discriminator 32768" } } */ +/* { dg-final { scan-assembler "\\.loc 1 17 7 is_stmt 0 discriminator 65536" } } */ +/* { dg-final { scan-assembler "\\.loc 1 17 7 is_stmt 0 discriminator 98304" } } */ +/* { dg-final { scan-assembler "\\.loc 1 17 7 is_stmt 0 discriminator 131072" } } */ diff --git a/gcc/tree-ssa-loop-ivcanon.cc b/gcc/tree-ssa-loop-ivcanon.cc index 86906a05e5a4..fb2ef44a3d3c 100644 --- a/gcc/tree-ssa-loop-ivcanon.cc +++ b/gcc/tree-ssa-loop-ivcanon.cc @@ -65,6 +65,7 @@ along with GCC; see the file COPYING3. If not see #include "tree-ssa-sccvn.h" #include "tree-vectorizer.h" /* For find_loop_location */ #include "dbgcnt.h" +#include "hierarchical_discriminator.h" /* Specifies types of loops that may be unrolled. */ @@ -980,7 +981,8 @@ try_unroll_loop_completely (class loop *loop, if (!gimple_duplicate_loop_body_to_header_edge ( loop, loop_preheader_edge (loop), n_unroll, wont_exit, exit, &edges_to_remove, - DLTHE_FLAG_UPDATE_FREQ | DLTHE_FLAG_COMPLETTE_PEEL)) + DLTHE_FLAG_UPDATE_FREQ | DLTHE_FLAG_COMPLETTE_PEEL + | DLTHE_RECORD_HIERARCHICAL_DISCRIMINATOR)) { free_original_copy_tables (); if (dump_file && (dump_flags & TDF_DETAILS)) @@ -1222,7 +1224,8 @@ try_peel_loop (class loop *loop, if (!gimple_duplicate_loop_body_to_header_edge ( loop, loop_preheader_edge (loop), npeel, wont_exit, exit, - &edges_to_remove, DLTHE_FLAG_UPDATE_FREQ)) + &edges_to_remove, + DLTHE_FLAG_UPDATE_FREQ | DLTHE_RECORD_HIERARCHICAL_DISCRIMINATOR)) { free_original_copy_tables (); return false;
