Improves register pressure, since otherwise we end up emitting
loads for all the elements in the RHS and them emitting
stores for all elements in the LHS.
---
 src/glsl/lower_ubo_reference.cpp | 56 ++++++++++++++++++++++++++++++++++++++--
 1 file changed, 54 insertions(+), 2 deletions(-)

diff --git a/src/glsl/lower_ubo_reference.cpp b/src/glsl/lower_ubo_reference.cpp
index b82d800..8ec8346 100644
--- a/src/glsl/lower_ubo_reference.cpp
+++ b/src/glsl/lower_ubo_reference.cpp
@@ -155,6 +155,7 @@ public:
                       ir_rvalue *offset);
 
    bool check_for_buffer_array_copy(ir_assignment *ir);
+   bool check_for_buffer_struct_copy(ir_assignment *ir);
    void check_for_ssbo_store(ir_assignment *ir);
    void write_to_memory(ir_dereference *deref,
                         ir_variable *var,
@@ -1190,12 +1191,60 @@ 
lower_ubo_reference_visitor::check_for_buffer_array_copy(ir_assignment *ir)
    return true;
 }
 
+bool
+lower_ubo_reference_visitor::check_for_buffer_struct_copy(ir_assignment *ir)
+{
+   if (!ir || !ir->lhs || !ir->rhs)
+      return false;
+
+   /* LHS and RHS must be records */
+   if (!ir->lhs->type->is_record() || !ir->rhs->type->is_record())
+      return false;
+
+   /* RHS must be a buffer-backed variable. This is what can cause the problem
+    * since it would lead to a series of loads that need to live until we
+    * see the writes to the LHS.
+    */
+   ir_variable *rhs_var = ir->rhs->variable_referenced();
+   if (!rhs_var || !is_buffer_backed_variable(rhs_var))
+      return false;
+
+   /* Split the struct copy into individual element copies to reduce
+    * register pressure
+    */
+   ir_dereference *rhs_deref = ir->rhs->as_dereference();
+   if (!rhs_deref)
+      return false;
+
+   ir_dereference *lhs_deref = ir->lhs->as_dereference();
+   if (!lhs_deref)
+      return false;
+
+   assert(lhs_deref->type->record_compare(rhs_deref->type));
+   mem_ctx = ralloc_parent(shader->ir);
+
+   for (unsigned i = 0; i < lhs_deref->type->length; i++) {
+      const char *field_name = lhs_deref->type->fields.structure[i].name;
+      ir_dereference *lhs_field =
+         new(mem_ctx) ir_dereference_record(lhs_deref->clone(mem_ctx, NULL),
+                                            field_name);
+      ir_dereference *rhs_field =
+         new(mem_ctx) ir_dereference_record(rhs_deref->clone(mem_ctx, NULL),
+                                            field_name);
+      ir->insert_after(assign(lhs_field, rhs_field));
+   }
+
+   ir->remove();
+   progress = true;
+   return true;
+}
+
 ir_visitor_status
 lower_ubo_reference_visitor::visit_enter(ir_assignment *ir)
 {
-   /* Array copies could involve large amounts of load/store
+   /* Array and struct copies could involve large amounts of load/store
     * operations. To improve register pressure we want to special-case
-    * these and split array copies into individual element copies.
+    * these and split them into individual element copies.
     * This way we avoid emitting all the loads for the RHS first and
     * all the writes for the LHS second and register usage is more
     * efficient.
@@ -1203,6 +1252,9 @@ lower_ubo_reference_visitor::visit_enter(ir_assignment 
*ir)
    if (check_for_buffer_array_copy(ir))
       return visit_continue_with_parent;
 
+   if (check_for_buffer_struct_copy(ir))
+      return visit_continue_with_parent;
+
    check_ssbo_unsized_array_length_assignment(ir);
    check_for_ssbo_store(ir);
    return rvalue_visit(ir);
-- 
1.9.1

_______________________________________________
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
http://lists.freedesktop.org/mailman/listinfo/mesa-dev

Reply via email to