On 10/07/15 22:47, Jordan Justen wrote: > On 2015-07-10 03:13:44, Iago Toral Quiroga wrote: >> From: Samuel Iglesias Gonsalvez <sigles...@igalia.com> >> >> Extend the existing lower_ubo_reference pass to also detect SSBO writes >> and lower them to __intrinsic_store_ssbo intrinsics. >> >> Signed-off-by: Samuel Iglesias Gonsalvez <sigles...@igalia.com> >> --- >> src/glsl/lower_ubo_reference.cpp | 441 >> +++++++++++++++++++++++++++------------ >> 1 file changed, 311 insertions(+), 130 deletions(-) >> >> diff --git a/src/glsl/lower_ubo_reference.cpp >> b/src/glsl/lower_ubo_reference.cpp >> index a61ff29..460b490 100644 >> --- a/src/glsl/lower_ubo_reference.cpp >> +++ b/src/glsl/lower_ubo_reference.cpp >> @@ -37,6 +37,7 @@ >> #include "ir_builder.h" >> #include "ir_rvalue_visitor.h" >> #include "main/macros.h" >> +#include "glsl_parser_extras.h" >> >> using namespace ir_builder; >> >> @@ -139,12 +140,31 @@ public: >> } >> >> void handle_rvalue(ir_rvalue **rvalue); >> - void emit_ubo_loads(ir_dereference *deref, ir_variable *base_offset, >> - unsigned int deref_offset, bool row_major, >> - int matrix_columns); >> + ir_visitor_status visit_enter(ir_assignment *ir); >> + >> + void setup_for_load_or_write(ir_variable *var, > > How about setup_for_load_or_store, or setup_for_buffer_access? >
I prefer the former, as it is also called for UBO reads. >> + ir_dereference *deref, >> + ir_rvalue **offset, >> + unsigned *const_offset, >> + bool *row_major, >> + int *matrix_columns); >> ir_expression *ubo_load(const struct glsl_type *type, >> ir_rvalue *offset); >> >> + >> + void check_for_ssbo_write(ir_assignment *ir); > > Since we are using load elsewhere, I guess 'store' is better than > 'write'. > OK >> + void write_to_memory(ir_dereference *deref, >> + ir_variable *var, >> + ir_variable *write_var, >> + unsigned write_mask); >> + ir_call *ssbo_write(ir_rvalue *deref, ir_rvalue *offset, >> + unsigned write_mask); > > store > >> + >> + void emit_reads_or_writes(bool is_write, ir_dereference *deref, >> + ir_variable *base_offset, unsigned int >> deref_offset, >> + bool row_major, int matrix_columns, >> + unsigned write_mask); > > What about emit_access? > OK >> + >> void *mem_ctx; >> struct gl_shader *shader; >> struct gl_uniform_buffer_variable *ubo_var; >> @@ -218,26 +238,20 @@ interface_field_name(void *mem_ctx, char *base_name, >> ir_dereference *d, >> } >> >> void >> -lower_ubo_reference_visitor::handle_rvalue(ir_rvalue **rvalue) >> +lower_ubo_reference_visitor::setup_for_load_or_write(ir_variable *var, >> + ir_dereference *deref, >> + ir_rvalue **offset, >> + unsigned *const_offset, >> + bool *row_major, >> + int *matrix_columns) >> { >> - if (!*rvalue) >> - return; >> - >> - ir_dereference *deref = (*rvalue)->as_dereference(); >> - if (!deref) >> - return; >> - >> - ir_variable *var = deref->variable_referenced(); >> - if (!var || !var->is_in_buffer_block()) >> - return; >> - >> - mem_ctx = ralloc_parent(*rvalue); >> - >> + /* Fix out the name of the interface block */ > > Maybe 'Fix out' => 'Determine'? > OK >> ir_rvalue *nonconst_block_index; >> const char *const field_name = >> interface_field_name(mem_ctx, (char *) >> var->get_interface_type()->name, >> deref, &nonconst_block_index); >> >> + /* Locate the ubo block by interface name */ >> this->uniform_block = NULL; >> for (unsigned i = 0; i < shader->NumUniformBlocks; i++) { >> if (strcmp(field_name, shader->UniformBlocks[i].Name) == 0) { >> @@ -263,10 +277,10 @@ lower_ubo_reference_visitor::handle_rvalue(ir_rvalue >> **rvalue) >> >> assert(this->uniform_block); >> >> - ir_rvalue *offset = new(mem_ctx) ir_constant(0u); >> - unsigned const_offset = 0; >> - bool row_major = is_dereferenced_thing_row_major(deref); >> - int matrix_columns = 1; >> + *offset = new(mem_ctx) ir_constant(0u); >> + *const_offset = 0; >> + *row_major = is_dereferenced_thing_row_major(deref); >> + *matrix_columns = 1; >> >> /* Calculate the offset to the start of the region of the UBO >> * dereferenced by *rvalue. This may be a variable offset if an >> @@ -275,76 +289,76 @@ lower_ubo_reference_visitor::handle_rvalue(ir_rvalue >> **rvalue) >> while (deref) { >> switch (deref->ir_type) { >> case ir_type_dereference_variable: { >> - const_offset += ubo_var->Offset; >> - deref = NULL; >> - break; >> + *const_offset += ubo_var->Offset; >> + deref = NULL; >> + break; >> } >> >> case ir_type_dereference_array: { >> - ir_dereference_array *deref_array = (ir_dereference_array *)deref; >> - unsigned array_stride; >> - if (deref_array->array->type->is_matrix() && row_major) { >> - /* When loading a vector out of a row major matrix, the >> - * step between the columns (vectors) is the size of a >> - * float, while the step between the rows (elements of a >> - * vector) is handled below in emit_ubo_loads. >> - */ >> - array_stride = 4; >> + ir_dereference_array *deref_array = (ir_dereference_array *) deref; >> + unsigned array_stride; >> + if (deref_array->array->type->is_matrix() && *row_major) { >> + /* When loading a vector out of a row major matrix, the >> + * step between the columns (vectors) is the size of a >> + * float, while the step between the rows (elements of a >> + * vector) is handled below in emit_ubo_loads. >> + */ >> + array_stride = 4; >> if (deref_array->array->type->is_double()) >> array_stride *= 2; >> - matrix_columns = deref_array->array->type->matrix_columns; >> + *matrix_columns = deref_array->array->type->matrix_columns; >> } else if (deref_array->type->is_interface()) { >> /* We're processing an array dereference of an interface >> instance >> - * array. The thing being dereferenced *must* be a variable >> - * dereference because intefaces cannot be embedded an other >> - * types. In terms of calculating the offsets for the lowering >> - * pass, we don't care about the array index. All elements of an >> - * interface instance array will have the same offsets relative >> to >> - * the base of the block that backs them. >> + * array. The thing being dereferenced *must* be a variable >> + * dereference because interfaces cannot be embedded in other >> + * types. In terms of calculating the offsets for the lowering >> + * pass, we don't care about the array index. All elements of an >> + * interface instance array will have the same offsets relative >> to >> + * the base of the block that backs them. >> */ >> assert(deref_array->array->as_dereference_variable()); >> deref = deref_array->array->as_dereference(); >> break; >> - } else { >> + } else { >> /* Whether or not the field is row-major (because it might be a >> - * bvec2 or something) does not affect the array itself. We >> need >> + * bvec2 or something) does not affect the array itself. We need >> * to know whether an array element in its entirety is >> row-major. >> */ >> const bool array_row_major = >> is_dereferenced_thing_row_major(deref_array); >> >> - array_stride = deref_array->type->std140_size(array_row_major); >> - array_stride = glsl_align(array_stride, 16); >> - } >> + array_stride = deref_array->type->std140_size(array_row_major); >> + array_stride = glsl_align(array_stride, 16); >> + } >> >> ir_rvalue *array_index = deref_array->array_index; >> if (array_index->type->base_type == GLSL_TYPE_INT) >> array_index = i2u(array_index); >> >> - ir_constant *const_index = >> + ir_constant *const_index = >> array_index->constant_expression_value(NULL); >> - if (const_index) { >> - const_offset += array_stride * const_index->value.u[0]; >> - } else { >> - offset = add(offset, >> - mul(array_index, >> - new(mem_ctx) ir_constant(array_stride))); >> - } >> - deref = deref_array->array->as_dereference(); >> - break; >> + if (const_index) { >> + *const_offset += array_stride * const_index->value.u[0]; >> + } else { >> + *offset = add(*offset, >> + mul(array_index, >> + new(mem_ctx) ir_constant(array_stride))); >> + } >> + deref = deref_array->array->as_dereference(); >> + break; >> } >> >> case ir_type_dereference_record: { >> - ir_dereference_record *deref_record = (ir_dereference_record >> *)deref; >> - const glsl_type *struct_type = deref_record->record->type; >> - unsigned intra_struct_offset = 0; >> + ir_dereference_record *deref_record = (ir_dereference_record *) >> deref; >> + const glsl_type *struct_type = deref_record->record->type; >> + unsigned intra_struct_offset = 0; >> >> - for (unsigned int i = 0; i < struct_type->length; i++) { >> - const glsl_type *type = struct_type->fields.structure[i].type; >> + for (unsigned int i = 0; i < struct_type->length; i++) { >> + const glsl_type *type = struct_type->fields.structure[i].type; >> >> - ir_dereference_record *field_deref = >> - new(mem_ctx) ir_dereference_record(deref_record->record, >> - >> struct_type->fields.structure[i].name); >> + ir_dereference_record *field_deref = new(mem_ctx) >> + ir_dereference_record(deref_record->record, >> + struct_type->fields.structure[i].name); >> const bool field_row_major = >> is_dereferenced_thing_row_major(field_deref); >> >> @@ -352,11 +366,12 @@ lower_ubo_reference_visitor::handle_rvalue(ir_rvalue >> **rvalue) >> >> unsigned field_align = >> type->std140_base_alignment(field_row_major); >> >> - intra_struct_offset = glsl_align(intra_struct_offset, >> field_align); >> + intra_struct_offset = glsl_align(intra_struct_offset, >> field_align); >> + >> + if (strcmp(struct_type->fields.structure[i].name, >> + deref_record->field) == 0) >> + break; >> >> - if (strcmp(struct_type->fields.structure[i].name, >> - deref_record->field) == 0) >> - break; >> intra_struct_offset += type->std140_size(field_row_major); >> >> /* If the field just examined was itself a structure, apply rule >> @@ -371,19 +386,49 @@ lower_ubo_reference_visitor::handle_rvalue(ir_rvalue >> **rvalue) >> field_align); >> >> } >> - } >> - >> - const_offset += intra_struct_offset; >> + } >> >> - deref = deref_record->record->as_dereference(); >> - break; >> + *const_offset += intra_struct_offset; >> + deref = deref_record->record->as_dereference(); >> + break; >> } >> + >> default: >> - assert(!"not reached"); >> - deref = NULL; >> - break; >> + assert(!"not reached"); >> + deref = NULL; >> + break; >> } >> } >> +} >> + >> +void >> +lower_ubo_reference_visitor::handle_rvalue(ir_rvalue **rvalue) >> +{ >> + if (!*rvalue) >> + return; >> + >> + ir_dereference *deref = (*rvalue)->as_dereference(); >> + if (!deref) >> + return; >> + >> + ir_variable *var = deref->variable_referenced(); >> + if (!var || !var->is_in_buffer_block()) >> + return; >> + >> + mem_ctx = ralloc_parent(*rvalue); >> + >> + ir_rvalue *offset = NULL; >> + unsigned const_offset; >> + bool row_major; >> + int matrix_columns; >> + >> + /* Compute the offset to the start if the dereference as well as other >> + * information we need to configure the write >> + */ >> + setup_for_load_or_write(var, deref, >> + &offset, &const_offset, >> + &row_major, &matrix_columns); >> + assert(offset); >> >> /* Now that we've calculated the offset to the start of the >> * dereference, walk over the type and emit loads into a temporary. >> @@ -401,7 +446,8 @@ lower_ubo_reference_visitor::handle_rvalue(ir_rvalue >> **rvalue) >> base_ir->insert_before(assign(load_offset, offset)); >> >> deref = new(mem_ctx) ir_dereference_variable(load_var); >> - emit_ubo_loads(deref, load_offset, const_offset, row_major, >> matrix_columns); >> + emit_reads_or_writes(false, deref, load_offset, const_offset, >> + row_major, matrix_columns, 0); >> *rvalue = deref; >> >> progress = true; >> @@ -420,74 +466,127 @@ lower_ubo_reference_visitor::ubo_load(const glsl_type >> *type, >> >> } >> >> +static bool >> +shader_storage_buffer_object(const _mesa_glsl_parse_state *state) >> +{ >> + return state->ARB_shader_storage_buffer_object_enable; >> +} >> + >> +ir_call * >> +lower_ubo_reference_visitor::ssbo_write(ir_rvalue *deref, >> + ir_rvalue *offset, >> + unsigned write_mask) >> +{ >> + exec_list sig_params; >> + >> + ir_variable *block_ref = new(mem_ctx) >> + ir_variable(glsl_type::uint_type, "block_ref" , ir_var_function_in); >> + sig_params.push_tail(block_ref); >> + >> + ir_variable *offset_ref = new(mem_ctx) >> + ir_variable(glsl_type::uint_type, "offset" , ir_var_function_in); >> + sig_params.push_tail(offset_ref); >> + >> + ir_variable *val_ref = new(mem_ctx) >> + ir_variable(deref->type, "value" , ir_var_function_in); >> + sig_params.push_tail(val_ref); >> + >> + ir_variable *writemask_ref = new(mem_ctx) >> + ir_variable(glsl_type::uint_type, "write_mask" , ir_var_function_in); >> + sig_params.push_tail(writemask_ref); >> + >> + ir_function_signature *sig = new(mem_ctx) >> + ir_function_signature(glsl_type::void_type, >> shader_storage_buffer_object); >> + assert(sig); >> + sig->replace_parameters(&sig_params); >> + sig->is_intrinsic = true; >> + >> + ir_function *f = new(mem_ctx) ir_function("__intrinsic_store_ssbo"); >> + f->add_signature(sig); >> + >> + exec_list call_params; >> + call_params.push_tail(this->uniform_block->clone(mem_ctx, NULL)); >> + call_params.push_tail(offset->clone(mem_ctx, NULL)); >> + call_params.push_tail(deref->clone(mem_ctx, NULL)); >> + call_params.push_tail(new(mem_ctx) ir_constant(write_mask)); >> + return new(mem_ctx) ir_call(sig, NULL, &call_params); >> +} >> + >> +static inline int >> +writemask_for_size(unsigned n) >> +{ >> + return ((1 << n) - 1); >> +} >> + >> /** >> - * Takes LHS and emits a series of assignments into its components >> - * from the UBO variable at variable_offset + deref_offset. >> - * >> - * Recursively calls itself to break the deref down to the point that >> - * the ir_binop_ubo_load expressions generated are contiguous scalars >> - * or vectors. >> + * Takes a deref and recursively calls itself to break the deref down to the >> + * point that the reads or writes generated are contiguous scalars or >> vectors. >> */ >> void >> -lower_ubo_reference_visitor::emit_ubo_loads(ir_dereference *deref, >> - ir_variable *base_offset, >> - unsigned int deref_offset, >> - bool row_major, >> - int matrix_columns) >> +lower_ubo_reference_visitor::emit_reads_or_writes(bool is_write, >> + ir_dereference *deref, >> + ir_variable *base_offset, >> + unsigned int deref_offset, >> + bool row_major, >> + int matrix_columns, >> + unsigned write_mask) >> { >> if (deref->type->is_record()) { >> unsigned int field_offset = 0; >> >> for (unsigned i = 0; i < deref->type->length; i++) { >> - const struct glsl_struct_field *field = >> - &deref->type->fields.structure[i]; >> - ir_dereference *field_deref = >> - new(mem_ctx) ir_dereference_record(deref->clone(mem_ctx, NULL), >> - field->name); >> - >> - field_offset = >> - glsl_align(field_offset, >> + const struct glsl_struct_field *field = >> + &deref->type->fields.structure[i]; >> + ir_dereference *field_deref = >> + new(mem_ctx) ir_dereference_record(deref->clone(mem_ctx, NULL), >> + field->name); >> + >> + field_offset = >> + glsl_align(field_offset, >> field->type->std140_base_alignment(row_major)); >> >> - emit_ubo_loads(field_deref, base_offset, deref_offset + >> field_offset, >> - row_major, 1); >> + emit_reads_or_writes(is_write, field_deref, base_offset, >> + deref_offset + field_offset, >> + row_major, 1, >> + >> writemask_for_size(field_deref->type->vector_elements)); >> >> - field_offset += field->type->std140_size(row_major); >> + field_offset += field->type->std140_size(row_major); > > Tab issue here? Yeah, it seems. > > With changes Reviewed-by: Jordan Justen <jordan.l.jus...@intel.com> > Thanks! Sam >> } >> return; >> } >> >> if (deref->type->is_array()) { >> unsigned array_stride = >> - glsl_align(deref->type->fields.array->std140_size(row_major), >> - 16); >> + glsl_align(deref->type->fields.array->std140_size(row_major), 16); >> >> for (unsigned i = 0; i < deref->type->length; i++) { >> - ir_constant *element = new(mem_ctx) ir_constant(i); >> - ir_dereference *element_deref = >> - new(mem_ctx) ir_dereference_array(deref->clone(mem_ctx, NULL), >> - element); >> - emit_ubo_loads(element_deref, base_offset, >> - deref_offset + i * array_stride, >> - row_major, 1); >> + ir_constant *element = new(mem_ctx) ir_constant(i); >> + ir_dereference *element_deref = >> + new(mem_ctx) ir_dereference_array(deref->clone(mem_ctx, NULL), >> + element); >> + emit_reads_or_writes(is_write, element_deref, base_offset, >> + deref_offset + i * array_stride, >> + row_major, 1, >> + >> writemask_for_size(element_deref->type->vector_elements)); >> } >> return; >> } >> >> if (deref->type->is_matrix()) { >> for (unsigned i = 0; i < deref->type->matrix_columns; i++) { >> - ir_constant *col = new(mem_ctx) ir_constant(i); >> - ir_dereference *col_deref = >> - new(mem_ctx) ir_dereference_array(deref->clone(mem_ctx, NULL), >> - col); >> + ir_constant *col = new(mem_ctx) ir_constant(i); >> + ir_dereference *col_deref = >> + new(mem_ctx) ir_dereference_array(deref->clone(mem_ctx, NULL), >> col); >> >> if (row_major) { >> /* For a row-major matrix, the next column starts at the next >> * element. >> */ >> int size_mul = deref->type->is_double() ? 8 : 4; >> - emit_ubo_loads(col_deref, base_offset, deref_offset + i * >> size_mul, >> - row_major, deref->type->matrix_columns); >> + emit_reads_or_writes(is_write, col_deref, base_offset, >> + deref_offset + i * size_mul, >> + row_major, deref->type->matrix_columns, >> + >> writemask_for_size(col_deref->type->vector_elements)); >> } else { >> /* std140 always rounds the stride of arrays (and matrices) to a >> * vec4, so matrices are always 16 between columns/rows. With >> @@ -495,21 +594,25 @@ >> lower_ubo_reference_visitor::emit_ubo_loads(ir_dereference *deref, >> */ >> int size_mul = (deref->type->is_double() && >> deref->type->vector_elements > 2) ? 32 : 16; >> - emit_ubo_loads(col_deref, base_offset, deref_offset + i * >> size_mul, >> - row_major, deref->type->matrix_columns); >> + emit_reads_or_writes(is_write, col_deref, base_offset, >> + deref_offset + i * size_mul, >> + row_major, deref->type->matrix_columns, >> + >> writemask_for_size(col_deref->type->vector_elements)); >> } >> } >> return; >> } >> >> - assert(deref->type->is_scalar() || >> - deref->type->is_vector()); >> + assert(deref->type->is_scalar() || deref->type->is_vector()); >> >> if (!row_major) { >> - ir_rvalue *offset = add(base_offset, >> - new(mem_ctx) ir_constant(deref_offset)); >> - base_ir->insert_before(assign(deref->clone(mem_ctx, NULL), >> - ubo_load(deref->type, offset))); >> + ir_rvalue *offset = >> + add(base_offset, new(mem_ctx) ir_constant(deref_offset)); >> + if (is_write) >> + base_ir->insert_after(ssbo_write(deref, offset, write_mask)); >> + else >> + base_ir->insert_before(assign(deref->clone(mem_ctx, NULL), >> + ubo_load(deref->type, offset))); >> } else { >> unsigned N = deref->type->is_double() ? 8 : 4; >> >> @@ -527,22 +630,100 @@ >> lower_ubo_reference_visitor::emit_ubo_loads(ir_dereference *deref, >> assert(matrix_columns <= 4); >> unsigned matrix_stride = glsl_align(matrix_columns * N, 16); >> >> - const glsl_type *ubo_type = deref->type->base_type == GLSL_TYPE_FLOAT >> ? >> + const glsl_type *deref_type = deref->type->base_type == >> GLSL_TYPE_FLOAT ? >> glsl_type::float_type : glsl_type::double_type; >> >> for (unsigned i = 0; i < deref->type->vector_elements; i++) { >> - ir_rvalue *chan_offset = >> - add(base_offset, >> - new(mem_ctx) ir_constant(deref_offset + i * matrix_stride)); >> - >> - base_ir->insert_before(assign(deref->clone(mem_ctx, NULL), >> - ubo_load(ubo_type, >> - chan_offset), >> - (1U << i))); >> + ir_rvalue *chan_offset = >> + add(base_offset, >> + new(mem_ctx) ir_constant(deref_offset + i * matrix_stride)); >> + if (is_write) { >> + base_ir->insert_after(ssbo_write(swizzle(deref, i, 1), >> chan_offset, 1)); >> + } else { >> + base_ir->insert_before(assign(deref->clone(mem_ctx, NULL), >> + ubo_load(deref_type, chan_offset), >> + (1U << i))); >> + } >> } >> } >> } >> >> +void >> +lower_ubo_reference_visitor::write_to_memory(ir_dereference *deref, >> + ir_variable *var, >> + ir_variable *write_var, >> + unsigned write_mask) >> +{ >> + ir_rvalue *offset = NULL; >> + unsigned const_offset; >> + bool row_major; >> + int matrix_columns; >> + >> + /* Compute the offset to the start if the dereference as well as other >> + * information we need to configure the write >> + */ >> + setup_for_load_or_write(var, deref, >> + &offset, &const_offset, >> + &row_major, &matrix_columns); >> + assert(offset); >> + >> + /* Now emit writes from the temporary to memory */ >> + ir_variable *write_offset = >> + new(mem_ctx) ir_variable(glsl_type::uint_type, >> + "ssbo_write_temp_offset", >> + ir_var_temporary); >> + >> + base_ir->insert_before(write_offset); >> + base_ir->insert_before(assign(write_offset, offset)); >> + >> + deref = new(mem_ctx) ir_dereference_variable(write_var); >> + emit_reads_or_writes(true, deref, write_offset, const_offset, >> + row_major, matrix_columns, write_mask); >> +} >> + >> +void >> +lower_ubo_reference_visitor::check_for_ssbo_write(ir_assignment *ir) >> +{ >> + if (!ir || !ir->lhs) >> + return; >> + >> + ir_rvalue *rvalue = ir->lhs->as_rvalue(); >> + if (!rvalue) >> + return; >> + >> + ir_dereference *deref = ir->lhs->as_dereference(); >> + if (!deref) >> + return; >> + >> + ir_variable *var = ir->lhs->variable_referenced(); >> + if (!var || !var->is_in_buffer_block()) >> + return; >> + >> + /* We have a write to a buffer variable, so declare a temporary and >> rewrite >> + * the assignment so that the temporary is the LHS. >> + */ >> + mem_ctx = ralloc_parent(shader->ir); >> + >> + const glsl_type *type = rvalue->type; >> + ir_variable *write_var = new(mem_ctx) ir_variable(type, >> + "ssbo_write_temp", >> + ir_var_temporary); >> + base_ir->insert_before(write_var); >> + ir->lhs = new(mem_ctx) ir_dereference_variable(write_var); >> + >> + /* Now we have to write the value assigned to the temporary back to >> memory */ >> + write_to_memory(deref, var, write_var, ir->write_mask); >> + progress = true; >> +} >> + >> + >> +ir_visitor_status >> +lower_ubo_reference_visitor::visit_enter(ir_assignment *ir) >> +{ >> + check_for_ssbo_write(ir); >> + return rvalue_visit(ir); >> +} >> + >> } /* unnamed namespace */ >> >> void >> -- >> 1.9.1 >> > _______________________________________________ mesa-dev mailing list mesa-dev@lists.freedesktop.org http://lists.freedesktop.org/mailman/listinfo/mesa-dev