duplicates when and indirect is used as an array index. This patch
assumes that NIR will eventually be able to clean this up.
---
src/glsl/lower_packed_varyings.cpp | 421
+++++++++++++++++++++++++++++++++++++
1 file changed, 421 insertions(+)
diff --git a/src/glsl/lower_packed_varyings.cpp
b/src/glsl/lower_packed_varyings.cpp
index b606cc8..9522969 100644
--- a/src/glsl/lower_packed_varyings.cpp
+++ b/src/glsl/lower_packed_varyings.cpp
@@ -148,10 +148,28 @@
#include "ir.h"
#include "ir_builder.h"
#include "ir_optimization.h"
+#include "ir_rvalue_visitor.h"
#include "program/prog_instruction.h"
+#include "util/hash_table.h"
using namespace ir_builder;
+/**
+ * Creates new type for and array when the base type changes.
+ */
+static const glsl_type *
+update_packed_array_type(const glsl_type *type, const glsl_type
*packed_type)
+{
+ const glsl_type *element_type = type->fields.array;
+ if (element_type->is_array()) {
+ const glsl_type *new_array_type =
+ update_packed_array_type(element_type, packed_type);
+ return glsl_type::get_array_instance(new_array_type,
type->length);
+ } else {
+ return glsl_type::get_array_instance(packed_type, type->length);
+ }
+}
+
static bool
needs_lowering(ir_variable *var, bool has_enhanced_layouts,
bool disable_varying_packing)
@@ -205,6 +223,51 @@ create_packed_var(void * const mem_ctx, const
char *packed_name,
return packed_var;
}
+/**
+ * Creates a packed varying for the tessellation packing.
+ */
+static ir_variable *
+create_tess_packed_var(void *mem_ctx, ir_variable *unpacked_var)
+{
+ /* create packed varying name using location */
+ char location_str[11];
+ snprintf(location_str, 11, "%d", unpacked_var->data.location);
+ char *packed_name;
+ if ((ir_variable_mode) unpacked_var->data.mode ==
ir_var_shader_out)
+ packed_name = ralloc_asprintf(mem_ctx, "packed_out:%s",
location_str);
+ else
+ packed_name = ralloc_asprintf(mem_ctx, "packed_in:%s",
location_str);
+
+ const glsl_type *packed_type;
+ switch (unpacked_var->type->without_array()->base_type) {
+ case GLSL_TYPE_UINT:
+ packed_type = glsl_type::uvec4_type;
+ break;
+ case GLSL_TYPE_INT:
+ packed_type = glsl_type::ivec4_type;
+ break;
+ case GLSL_TYPE_FLOAT:
+ packed_type = glsl_type::vec4_type;
+ break;
+ case GLSL_TYPE_DOUBLE:
+ packed_type = glsl_type::dvec4_type;
+ break;
+ default:
+ assert(!"Unexpected type in tess varying packing");
+ return NULL;
+ }
+
+ /* Create array new array type */
+ if (unpacked_var->type->is_array()) {
+ packed_type = update_packed_array_type(unpacked_var->type,
packed_type);
+ }
+
+ return create_packed_var(mem_ctx, packed_name, packed_type,
unpacked_var,
+ (ir_variable_mode)
unpacked_var->data.mode,
+ unpacked_var->data.location,
+ unpacked_var->type->is_array());
+}
+
namespace {
/**
@@ -763,6 +826,296 @@
lower_packed_varyings_gs_splicer::visit_leave(ir_emit_vertex *ev)
}
+/**
+ * For tessellation shaders we cannot just copy everything to the
packed
+ * varyings like we do in other stages as tessellation uses shared
memory for
+ * varyings, therefore it is only safe to copy array elements that the
shader
+ * actually uses.
+ *
+ * This class searches the IR for uses of varyings and then creates
+ * instructions that copy those vars to a packed varying. This means
it is
+ * easy to end up with duplicate copies if the varying is used more
than once,
+ * also arrays of arrays create a duplicate copy for each dimension
that
+ * exists. These issues are not easily resolved without breaking
various
+ * corner cases so we leave it to a later IR stage to clean up the
mess.
+ */
+class lower_packed_varyings_tess_visitor : public ir_rvalue_visitor
+{
+public:
+ lower_packed_varyings_tess_visitor(void *mem_ctx, hash_table
*varyings,
+ ir_variable_mode mode)
+ : mem_ctx(mem_ctx), varyings(varyings), mode(mode)
+ {
+ }
+
+ virtual ~lower_packed_varyings_tess_visitor()
+ {
+ }
+
+ virtual ir_visitor_status visit_leave(ir_assignment *);
+ virtual ir_visitor_status visit_leave(ir_dereference_array *);
+
+ ir_dereference *create_dereference(ir_dereference *deref,
+ unsigned *dimensions);
+ unsigned create_extra_array_dereference(unsigned inner_dimension,
+ const glsl_type
**types_list,
+ ir_dereference
**packed_deref_list,
+ ir_dereference
**deref_list);
+ ir_variable *get_packed_var(ir_variable *var);
+ void handle_rvalue(ir_rvalue **rvalue);
+
+ /**
+ * Exec list into which the visitor should insert the packing
instructions.
+ * Caller provides this list; it should insert the instructions
into the
+ * appropriate place in the shader once the visitor has finished
running.
+ */
+ exec_list new_instructions;
+
+private:
+ /**
+ * Memory context used to allocate new instructions for the shader.
+ */
+ void * const mem_ctx;
+
+ hash_table *varyings;
+
+ ir_variable_mode mode;
+};
+
+/**
+ * Search the hash table for a packed varying for this variable.
+ */
+ir_variable *
+lower_packed_varyings_tess_visitor::get_packed_var(ir_variable *var)
+{
+ assert(var);
+
+ const struct hash_entry *entry =
+ _mesa_hash_table_search(varyings, var);
+
+ return entry ? (ir_variable *) entry->data : NULL;
+}
+
+ir_dereference *
+lower_packed_varyings_tess_visitor::create_dereference(ir_dereference
*deref,
+ unsigned
*dimension)
+{
+ ir_dereference_array *deref_array = deref->as_dereference_array();
+ if (deref_array) {
+ (*dimension)--;
+ ir_dereference *array =
+ create_dereference(deref_array->array->as_dereference(),
dimension);
+ return new(this->mem_ctx)
+ ir_dereference_array(array, deref_array->array_index);
+ } else {
+ ir_variable *unpacked_var = deref->variable_referenced();
+ ir_variable *packed_var = get_packed_var(unpacked_var);
+ return new(this->mem_ctx) ir_dereference_variable(packed_var);
+ }
+}
+/**
+ * This creates the extra derefs needed to copy the full array. For
example if
+ * we have:
+ *
+ * layout(location = 0, component = 3) in float b[][6];
+ * layout(location = 0, component = 3) out float b_tcs[][6];
+ * ...
+ * b_tcs[gl_InvocationID] = b[gl_InvocationID];
+ *
+ * We need to copy all the inner array elements to the new packed
varying:
+ *
+ * packed_out:26[gl_InvocationID][0].yzw =
b_tcs[gl_InvocationID][0];
+ * ...
+ * packed_out:26[gl_InvocationID][5].yzw =
b_tcs[gl_InvocationID][5];
+ */
+unsigned
+lower_packed_varyings_tess_visitor::create_extra_array_dereference(unsigned
inner_dimension,
+
const glsl_type **types_list,
+
ir_dereference **packed_deref_list,
+
ir_dereference **deref_list)
+{
+ unsigned outer_deref_array_size;
+ if (inner_dimension != 0)
+ outer_deref_array_size =
+ create_extra_array_dereference(inner_dimension - 1,
types_list,
+ packed_deref_list,
deref_list);
+ else {
+ assert(types_list[inner_dimension]->length > 0);
+ outer_deref_array_size = 1;
+ }
+
+ unsigned deref_array_size =
+ types_list[inner_dimension]->length * outer_deref_array_size;
+
+ /* Create new lists to store the new instructions in */
+ ir_dereference **new_packed_deref_list = (ir_dereference **)
+ rzalloc_array_size(mem_ctx, sizeof(ir_dereference *),
deref_array_size);
+ ir_dereference **new_deref_list = (ir_dereference **)
+ rzalloc_array_size(mem_ctx, sizeof(ir_dereference *),
deref_array_size);
+
+ unsigned list_count = 0;
+ for (unsigned i = 0; i < types_list[inner_dimension]->length; i++)
{
+ for (unsigned j = 0; j < outer_deref_array_size; j++) {
+ /* Clone the outer dimension derefs */
+ ir_dereference *deref_clone =
deref_list[j]->clone(this->mem_ctx, NULL);
+ ir_dereference *packed_deref_clone =
packed_deref_list[j]->clone(this->mem_ctx, NULL);
+
+ /* Create new derefs for the inner dimiension */
+ ir_constant *constant = new(this->mem_ctx) ir_constant(i);
+ new_packed_deref_list[list_count] = new(this->mem_ctx)
+ ir_dereference_array(packed_deref_clone, constant);
+
+ ir_constant *constant2 = new(this->mem_ctx) ir_constant(i);
+ new_deref_list[list_count] = new(this->mem_ctx)
+ ir_dereference_array(deref_clone, constant2);
+ list_count++;
+ }
+ }
+
+ /* Copy the new derefs so the caller can access them */
+ for (unsigned j = 0; j < list_count; j++) {
+ packed_deref_list[j] = new_packed_deref_list[j];
+ deref_list[j] = new_deref_list[j];
+ }
+ return deref_array_size;
+}
+
+void
+lower_packed_varyings_tess_visitor::handle_rvalue(ir_rvalue **rvalue)
+{
+ if (!*rvalue)
+ return;
+
+ ir_dereference *deref = (*rvalue)->as_dereference();
+
+ if (!deref)
+ return;
+
+ ir_variable *unpacked_var = deref->variable_referenced();
+ ir_variable *packed_var = get_packed_var(unpacked_var);
+
+ /* If the variable is packed then create a new dereference and wrap
it in
+ * a swizzle to get the correct values as specified by the
component
+ * qualifier.
+ */
+ if (packed_var) {
+ /* Count array dimensions */
+ const glsl_type *type = packed_var->type;
+ unsigned dimensions = 0;
+ while (type->is_array()) {
+ type = type->fields.array;
+ dimensions++;
+ }
+
+ /* Create a type list in reverse order (inner -> outer arrays)
as this
+ * is the order the IR works in.
+ */
+ const glsl_type **types_list = (const glsl_type **)
+ rzalloc_array_size(mem_ctx, sizeof(glsl_type *), dimensions);
+ unsigned order = dimensions;
+ type = unpacked_var->type;
+ while (type->is_array()) {
+ types_list[--order] = type;
+ type = type->fields.array;
+ }
+
+ /* Create a derefence for the packed var and clone the unpacked
deref */
+ unsigned inner_dimension = dimensions;
+ ir_dereference *packed = create_dereference(deref,
&inner_dimension);
+ ir_dereference *cloned_deref = deref->clone(this->mem_ctx,
NULL);
+
+ /* If needed create extra derefs so we can copy all inner array
elements
+ * of a multi-dimensional array.
+ */
+ unsigned instruction_count;
+ ir_dereference **packed_deref;
+ ir_dereference **unpacked_deref;
+ if (inner_dimension != 0) {
+ instruction_count =
+ types_list[inner_dimension - 1]->arrays_of_arrays_size();
+
+ /* Create new lists to store the new instructions in */
+ packed_deref = (ir_dereference **)
+ rzalloc_array_size(mem_ctx, sizeof(ir_dereference *),
+ instruction_count);
+ unpacked_deref = (ir_dereference **)
+ rzalloc_array_size(mem_ctx, sizeof(ir_dereference *),
+ instruction_count);
+
+ /* Pass in the outer array derefs that already exist */
+ packed_deref[0] = packed;
+ unpacked_deref[0] = cloned_deref;
+
+ instruction_count =
+ create_extra_array_dereference(inner_dimension - 1,
types_list,
+ packed_deref,
unpacked_deref);
+ } else {
+ instruction_count = 1;
+ packed_deref = &packed;
+ unpacked_deref = &cloned_deref;
+ }
+
+ /* Wrap packed derefs in a swizzle and the create assignment */
+ unsigned swizzle_values[4] = { 0, 0, 0, 0 };
+ unsigned components =
+ unpacked_var->type->without_array()->vector_elements;
+ for (unsigned i = 0; i < components; ++i) {
+ swizzle_values[i] = i + unpacked_var->data.location_frac;
+ }
+
+ for (unsigned i = 0; i < instruction_count; i++) {
+ ir_swizzle *swiz = new(this->mem_ctx)
ir_swizzle(packed_deref[i], swizzle_values,
+ components);
+ ir_assignment *assign;
+ if (mode == ir_var_shader_out) {
+ assign = new (this->mem_ctx) ir_assignment(swiz,
unpacked_deref[i]);
+ } else {
+ assign = new (this->mem_ctx)
ir_assignment(unpacked_deref[i], swiz);
+ }
+ new_instructions.push_tail(assign);
+ }
+ }
+}
+
+ir_visitor_status
+lower_packed_varyings_tess_visitor::visit_leave(ir_dereference_array
*ir)
+{
+ /* The array index is not the target of the assignment, so clear
the
+ * 'in_assignee' flag. Restore it after returning from the array
index.
+ */
+ const bool was_in_assignee = this->in_assignee;
+ this->in_assignee = false;
+ handle_rvalue(&ir->array_index);
+ this->in_assignee = was_in_assignee;
+
+ ir_rvalue *rvalue = ir;
+ handle_rvalue(&rvalue);
+
+ return visit_continue;
+}
+
+ir_visitor_status
+lower_packed_varyings_tess_visitor::visit_leave(ir_assignment *ir)
+{
+ handle_rvalue(&ir->rhs);
+ ir->rhs->accept(this);
+
+ /* The normal rvalue visitor skips the LHS of assignments, but we
+ * need to process those just the same.
+ */
+ ir_rvalue *lhs = ir->lhs;
+ handle_rvalue(&lhs);
+ ir->lhs->accept(this);
+
+ if (ir->condition) {
+ handle_rvalue(&ir->condition);
+ ir->condition->accept(this);
+ }
+
+ return visit_continue;
+}
+
+
void
lower_packed_varyings(void *mem_ctx, unsigned locations_used,
ir_variable_mode mode, gl_shader *shader,
@@ -817,5 +1170,73 @@ lower_packed_varyings(void *mem_ctx, unsigned
locations_used,
main_func_sig->body.head->insert_before(&new_instructions);
main_func_sig->body.head->insert_before(&new_variables);
}
+ } else {
+ /* Build a hash table with all the varyings we can pack. For the
+ * tessellation stages we only pack varyings that have location
+ * and component layout qualifiers as packing varying without
these
+ * makes things much more difficult.
+ */
+ hash_table *varyings = _mesa_hash_table_create(NULL,
_mesa_hash_pointer,
+
_mesa_key_pointer_equal);
+
+ ir_variable **packed_varyings = (ir_variable **)
+ rzalloc_array_size(mem_ctx, sizeof(*packed_varyings),
+ locations_used);
+
+ foreach_in_list(ir_instruction, node, shader->ir) {
+ ir_variable *var = node->as_variable();
+ if (var == NULL)
+ continue;
+
+ if (var->data.mode != mode ||
+ var->data.location < (int) base_location ||
+ !needs_lowering(var, has_enhanced_layouts, true))
+ continue;
+
+ /* Clone the variable for program resource list before
+ * it gets modified and lost.
+ */
+ if (!shader->packed_varyings)
+ shader->packed_varyings = new (shader) exec_list;
+
+ shader->packed_varyings->push_tail(var->clone(shader, NULL));
+
+ /* Get the packed varying for this location or create a new
one. */
+ ir_variable *packed_var;
+ if (packed_varyings[var->data.location - base_location]) {
+ packed_var = packed_varyings[var->data.location -
base_location];
+ } else {
+ /* Create the new packed varying */
+ packed_var = create_tess_packed_var(mem_ctx, var);
+ var->insert_before(packed_var);
+ packed_varyings[var->data.location - base_location] =
packed_var;
+ }
+
+ /* Add to varyings the hash table with the old varying as a
key, and
+ * the packed varying as the data. This will be used later in
the
+ * visitor to look-up variables that need to be replaced.
+ */
+ _mesa_hash_table_insert(varyings, var, packed_var);
+
+ /* Change the old varying into an ordinary global, dead code
+ * elimination will clean this up for us later on.
+ */
+ assert(var->data.mode != ir_var_temporary);
+ var->data.mode = ir_var_auto;
+ }
+
+ /* Find varying dereferences */
+ /* Create instructions that copy varyings to/from temporaries */
+ lower_packed_varyings_tess_visitor visitor(mem_ctx, varyings,
mode);
+ visitor.run(shader->ir);
+
+ /* Insert instructions that copy varyings to/from temporaries */
+ if (mode == ir_var_shader_out) {
+ main_func_sig->body.append_list(&visitor.new_instructions);
+ } else {
+
main_func_sig->body.head->insert_before(&visitor.new_instructions);
+ }
+
+ _mesa_hash_table_destroy(varyings, NULL);
}
}