Module: Mesa
Branch: main
Commit: 65237f8bbcaae12929fa4c35a71626f3c53f5b56
URL:    
http://cgit.freedesktop.org/mesa/mesa/commit/?id=65237f8bbcaae12929fa4c35a71626f3c53f5b56

Author: Ian Romanick <[email protected]>
Date:   Wed Nov 29 11:15:26 2023 -0800

intel/fs: Don't add MOV instructions to DO blocks in combine constants

There was a subtle bug related to CFG tracking. Namely, some branch
instructions may point *only* to the block after the DO instruction
for the loop. If the MOV instructions are in the DO block, the may not
have liveness properly tracked.

Like in !25132, having the MOV instructions in blocks that might
contain other instructions helps scheduling.

shader-db:

All Broadwell and newer Intel GPUs had similar results (Ice Lake shown)
total cycles in shared programs: 848577248 -> 848557268 (<.01%)
cycles in affected programs: 78256396 -> 78236416 (-0.03%)
helped: 361 / HURT: 18

fossil-db:

All Skylake and newer Intel GPUs had similar results (Ice Lake shown)
Totals:
Cycles: 15021501924 -> 15021372904 (-0.00%); split: -0.00%, +0.00%

Totals from 735 (0.11% of 656080) affected shaders:
Cycles: 676429502 -> 676300482 (-0.02%); split: -0.02%, +0.00%

Reviewed-by: Francisco Jerez <[email protected]>
Part-of: <https://gitlab.freedesktop.org/mesa/mesa/-/merge_requests/26439>

---

 src/intel/compiler/brw_fs_combine_constants.cpp | 63 +++++++++++++++++++++++--
 1 file changed, 59 insertions(+), 4 deletions(-)

diff --git a/src/intel/compiler/brw_fs_combine_constants.cpp 
b/src/intel/compiler/brw_fs_combine_constants.cpp
index f2bec5e5f92..0dd8769f6bb 100644
--- a/src/intel/compiler/brw_fs_combine_constants.cpp
+++ b/src/intel/compiler/brw_fs_combine_constants.cpp
@@ -1591,14 +1591,61 @@ fs_visitor::opt_combine_constants()
       }
    }
 
+   bool rebuild_cfg = false;
+
    /* Insert MOVs to load the constant values into GRFs. */
    for (int i = 0; i < table.len; i++) {
       struct imm *imm = &table.imm[i];
+
       /* Insert it either before the instruction that generated the immediate
        * or after the last non-control flow instruction of the common ancestor.
        */
-      exec_node *n = (imm->inst ? imm->inst :
-                      imm->block->last_non_control_flow_inst()->next);
+      exec_node *n;
+      bblock_t *insert_block;
+      if (imm->inst != nullptr) {
+         n = imm->inst;
+         insert_block = imm->block;
+      } else {
+         if (imm->block->start()->opcode == BRW_OPCODE_DO) {
+            /* DO blocks are weird. They can contain only the single DO
+             * instruction. As a result, MOV instructions cannot be added to
+             * the DO block.
+             */
+            bblock_t *next_block = imm->block->next();
+            if (next_block->starts_with_control_flow()) {
+               /* This is the difficult case. This occurs for code like
+                *
+                *    do {
+                *       do {
+                *          ...
+                *       } while (...);
+                *    } while (...);
+                *
+                * when the MOV instructions need to be inserted between the
+                * two DO instructions.
+                *
+                * To properly handle this scenario, a new block would need to
+                * be inserted. Doing so would require modifying arbitrary many
+                * CONTINUE, BREAK, and WHILE instructions to point to the new
+                * block.
+                *
+                * It is unlikely that this would ever be correct. Instead,
+                * insert the MOV instructions in the known wrong place and
+                * rebuild the CFG at the end of the pass.
+                */
+               insert_block = imm->block;
+               n = insert_block->last_non_control_flow_inst()->next;
+
+               rebuild_cfg = true;
+            } else {
+               insert_block = next_block;
+               n = insert_block->start();
+            }
+         } else {
+            insert_block = imm->block;
+            n = insert_block->last_non_control_flow_inst()->next;
+         }
+      }
 
       /* From the BDW and CHV PRM, 3D Media GPGPU, Special Restrictions:
        *
@@ -1613,7 +1660,7 @@ fs_visitor::opt_combine_constants()
        * both HF slots within a DWord with the constant.
        */
       const uint32_t width = devinfo->ver == 8 && imm->is_half_float ? 2 : 1;
-      const fs_builder ibld = bld.at(imm->block, n).exec_all().group(width, 0);
+      const fs_builder ibld = bld.at(insert_block, n).exec_all().group(width, 
0);
 
       fs_reg reg(VGRF, imm->nr);
       reg.offset = imm->subreg_offset;
@@ -1785,8 +1832,16 @@ fs_visitor::opt_combine_constants()
       }
    }
 
+   if (rebuild_cfg) {
+      delete cfg;
+      cfg = NULL;
+      calculate_cfg();
+   }
+
    ralloc_free(const_ctx);
-   invalidate_analysis(DEPENDENCY_INSTRUCTIONS | DEPENDENCY_VARIABLES);
+
+   invalidate_analysis(DEPENDENCY_INSTRUCTIONS | DEPENDENCY_VARIABLES |
+                       (rebuild_cfg ? DEPENDENCY_BLOCKS : DEPENDENCY_NOTHING));
 
    return true;
 }

Reply via email to