Module: Mesa
Branch: master
Commit: cc8613c8d5ea4e5497a1c4d6cdcd201a250eb519
URL:    
http://cgit.freedesktop.org/mesa/mesa/commit/?id=cc8613c8d5ea4e5497a1c4d6cdcd201a250eb519

Author: Rhys Perry <[email protected]>
Date:   Wed Dec  9 12:52:21 2020 +0000

aco: add fallback algorithm in get_reg()

The generated code is often terrible, but the situations where this is
needed are rare.

Signed-off-by: Rhys Perry <[email protected]>
Reviewed-by: Tony Wasserka <[email protected]>
Reviewed-by: Daniel Schürmann <[email protected]>
Part-of: <https://gitlab.freedesktop.org/mesa/mesa/-/merge_requests/8713>

---

 src/amd/compiler/aco_register_allocation.cpp | 148 ++++++++++++++++++++++++---
 1 file changed, 135 insertions(+), 13 deletions(-)

diff --git a/src/amd/compiler/aco_register_allocation.cpp 
b/src/amd/compiler/aco_register_allocation.cpp
index 2929ba29b00..16c341a55b1 100644
--- a/src/amd/compiler/aco_register_allocation.cpp
+++ b/src/amd/compiler/aco_register_allocation.cpp
@@ -849,8 +849,8 @@ std::pair<PhysReg, bool> get_reg_simple(ra_ctx& ctx,
 }
 
 /* collect variables from a register area and clear reg_file */
-std::set<std::pair<unsigned, unsigned>> collect_vars(ra_ctx& ctx, 
RegisterFile& reg_file,
-                                                     const PhysRegInterval 
reg_interval)
+std::set<std::pair<unsigned, unsigned>> find_vars(ra_ctx& ctx, RegisterFile& 
reg_file,
+                                                  const PhysRegInterval 
reg_interval)
 {
    std::set<std::pair<unsigned, unsigned>> vars;
    for (PhysReg j : reg_interval) {
@@ -862,21 +862,29 @@ std::set<std::pair<unsigned, unsigned>> 
collect_vars(ra_ctx& ctx, RegisterFile&
             if (id) {
                assignment& var = ctx.assignments[id];
                vars.emplace(var.rc.bytes(), id);
-               reg_file.clear(var.reg, var.rc);
-               if (!reg_file[j])
-                  break;
             }
          }
       } else if (reg_file[j] != 0) {
          unsigned id = reg_file[j];
          assignment& var = ctx.assignments[id];
          vars.emplace(var.rc.bytes(), id);
-         reg_file.clear(var.reg, var.rc);
       }
    }
    return vars;
 }
 
+/* collect variables from a register area and clear reg_file */
+std::set<std::pair<unsigned, unsigned>> collect_vars(ra_ctx& ctx, 
RegisterFile& reg_file,
+                                                     const PhysRegInterval 
reg_interval)
+{
+   std::set<std::pair<unsigned, unsigned>> vars = find_vars(ctx, reg_file, 
reg_interval);
+   for (std::pair<unsigned, unsigned> size_id : vars) {
+      assignment& var = ctx.assignments[size_id.second];
+      reg_file.clear(var.reg, var.rc);
+   }
+   return vars;
+}
+
 bool get_regs_for_copies(ra_ctx& ctx,
                          RegisterFile& reg_file,
                          std::vector<std::pair<Operand, Definition>>& 
parallelcopies,
@@ -1232,7 +1240,7 @@ bool get_reg_specified(ra_ctx& ctx,
    return true;
 }
 
-void increase_register_file(ra_ctx& ctx, RegType type) {
+bool increase_register_file(ra_ctx& ctx, RegType type) {
    uint16_t max_addressible_sgpr = ctx.program->sgpr_limit;
    uint16_t max_addressible_vgpr = ctx.program->vgpr_limit;
    if (type == RegType::vgpr && ctx.program->max_reg_demand.vgpr < 
max_addressible_vgpr) {
@@ -1240,10 +1248,82 @@ void increase_register_file(ra_ctx& ctx, RegType type) {
    } else if (type == RegType::sgpr && ctx.program->max_reg_demand.sgpr < 
max_addressible_sgpr) {
       update_vgpr_sgpr_demand(ctx.program,  
RegisterDemand(ctx.program->max_reg_demand.vgpr, 
ctx.program->max_reg_demand.sgpr + 1));
    } else {
-      //FIXME: if nothing helps, shift-rotate the registers to make space
-      aco_err(ctx.program, "Failed to allocate registers during shader 
compilation.");
-      abort();
+      return false;
+   }
+   return true;
+}
+
+struct IDAndRegClass {
+   IDAndRegClass(unsigned id_, RegClass rc_) : id(id_), rc(rc_) {}
+
+   unsigned id;
+   RegClass rc;
+};
+
+struct IDAndInfo {
+   IDAndInfo(unsigned id_, DefInfo info_) : id(id_), info(info_) {}
+
+   unsigned id;
+   DefInfo info;
+};
+
+/* Reallocates vars by sorting them and placing each variable after the 
previous
+ * one. If one of the variables has 0xffffffff as an ID, the register assigned
+ * for that variable will be returned.
+ */
+PhysReg compact_relocate_vars(ra_ctx& ctx, const std::vector<IDAndRegClass>& 
vars,
+                              std::vector<std::pair<Operand, Definition>>& 
parallelcopies,
+                              PhysReg start)
+{
+   /* This function assumes RegisterDemand/live_var_analysis rounds up 
sub-dword
+    * temporary sizes to dwords.
+    */
+   std::vector<IDAndInfo> sorted;
+   for (IDAndRegClass var : vars) {
+      DefInfo info(ctx, ctx.pseudo_dummy, var.rc, -1);
+      sorted.emplace_back(var.id, info);
+   }
+
+   std::sort(sorted.begin(), sorted.end(), [&ctx](const IDAndInfo& a,
+                                                  const IDAndInfo& b) {
+      unsigned a_stride = a.info.stride * (a.info.rc.is_subdword() ? 1 : 4);
+      unsigned b_stride = b.info.stride * (b.info.rc.is_subdword() ? 1 : 4);
+      if (a_stride > b_stride)
+         return true;
+      if (a_stride < b_stride)
+         return false;
+      if (a.id == 0xffffffff || b.id == 0xffffffff)
+         return a.id == 0xffffffff; /* place 0xffffffff before others if 
possible, not for any reason */
+      return ctx.assignments[a.id].reg < ctx.assignments[b.id].reg;
+   });
+
+   PhysReg next_reg = start;
+   PhysReg space_reg;
+   for (IDAndInfo& var : sorted) {
+      unsigned stride = var.info.rc.is_subdword() ? var.info.stride : 
var.info.stride * 4;
+      next_reg.reg_b = align(next_reg.reg_b, MAX2(stride, 4));
+
+      /* 0xffffffff is a special variable ID used reserve a space for killed
+       * operands and definitions.
+       */
+      if (var.id != 0xffffffff) {
+         if (next_reg != ctx.assignments[var.id].reg) {
+            RegClass rc = ctx.assignments[var.id].rc;
+            Temp tmp(var.id, rc);
+
+            Operand pc_op(tmp);
+            pc_op.setFixed(ctx.assignments[var.id].reg);
+            Definition pc_def(next_reg, rc);
+            parallelcopies.emplace_back(pc_op, pc_def);
+         }
+      } else {
+         space_reg = next_reg;
+      }
+
+      next_reg = next_reg.advance(var.info.rc.size() * 4);
    }
+
+   return space_reg;
 }
 
 bool is_mimg_vaddr_intact(ra_ctx& ctx, RegisterFile& reg_file, Instruction 
*instr)
@@ -1369,9 +1449,48 @@ PhysReg get_reg(ra_ctx& ctx,
     * too many moves. */
    assert(reg_file.count_zero(info.bounds) >= info.size);
 
-   //FIXME: if nothing helps, shift-rotate the registers to make space
+   if (!increase_register_file(ctx, info.rc.type())) {
+      /* fallback algorithm: reallocate all variables at once */
+      unsigned def_size = info.rc.size();
+      for (Definition def : instr->definitions) {
+         if (ctx.assignments[def.tempId()].assigned && def.regClass().type() 
== info.rc.type())
+            def_size += def.regClass().size();
+      }
+
+      unsigned killed_op_size = 0;
+      for (Operand op : instr->operands) {
+         if (op.isTemp() && op.isKillBeforeDef() && op.regClass().type() == 
info.rc.type())
+            killed_op_size += op.regClass().size();
+      }
+
+      const PhysRegInterval regs = get_reg_bounds(ctx.program, info.rc.type());
+
+      /* reallocate passthrough variables and non-killed operands */
+      std::vector<IDAndRegClass> vars;
+      for (const std::pair<unsigned, unsigned>& var : find_vars(ctx, reg_file, 
regs))
+         vars.emplace_back(var.second, ctx.assignments[var.second].rc);
+      vars.emplace_back(0xffffffff, RegClass(info.rc.type(), MAX2(def_size, 
killed_op_size)));
+
+      PhysReg space = compact_relocate_vars(ctx, vars, parallelcopies, 
regs.lo());
+
+      /* reallocate killed operands */
+      std::vector<IDAndRegClass> killed_op_vars;
+      for (Operand op : instr->operands) {
+         if (op.isKillBeforeDef() && op.regClass().type() == info.rc.type())
+            killed_op_vars.emplace_back(op.tempId(), op.regClass());
+      }
+      compact_relocate_vars(ctx, killed_op_vars, parallelcopies, space);
+
+      /* reallocate definitions */
+      std::vector<IDAndRegClass> def_vars;
+      for (Definition def : instr->definitions) {
+         if (ctx.assignments[def.tempId()].assigned && def.regClass().type() 
== info.rc.type())
+            def_vars.emplace_back(def.tempId(), def.regClass());
+      }
+      def_vars.emplace_back(0xffffffff, info.rc);
+      return compact_relocate_vars(ctx, def_vars, parallelcopies, space);
+   }
 
-   increase_register_file(ctx, info.rc.type());
    return get_reg(ctx, reg_file, temp, parallelcopies, instr, operand_index);
 }
 
@@ -1504,7 +1623,10 @@ PhysReg get_reg_create_vector(ra_ctx& ctx,
    success = get_regs_for_copies(ctx, tmp_file, pc, vars, bounds, instr, 
PhysRegInterval { best_pos, size });
 
    if (!success) {
-      increase_register_file(ctx, temp.type());
+      if (!increase_register_file(ctx, temp.type())) {
+         /* use the fallback algorithm in get_reg() */
+         return get_reg(ctx, reg_file, temp, parallelcopies, instr);
+      }
       return get_reg_create_vector(ctx, reg_file, temp, parallelcopies, instr);
    }
 

_______________________________________________
mesa-commit mailing list
[email protected]
https://lists.freedesktop.org/mailman/listinfo/mesa-commit

Reply via email to