--- src/mesa/state_tracker/st_glsl_to_tgsi.cpp | 392 ++++++++++++++++++++-------- 1 files changed, 287 insertions(+), 105 deletions(-)
diff --git a/src/mesa/state_tracker/st_glsl_to_tgsi.cpp b/src/mesa/state_tracker/st_glsl_to_tgsi.cpp index cecceca..b4cf76e 100644 --- a/src/mesa/state_tracker/st_glsl_to_tgsi.cpp +++ b/src/mesa/state_tracker/st_glsl_to_tgsi.cpp @@ -230,14 +230,16 @@ public: class variable_storage : public exec_node { public: variable_storage(ir_variable *var, gl_register_file file, int index) - : file(file), index(index), var(var) + : file(file), index(index), type(var->type), is_array(var->type->is_array() || var->type->is_record() || var->type->is_matrix()), is_reladdressed(false) { /* empty */ } gl_register_file file; int index; - ir_variable *var; /* variable that maps to this, if any */ + const glsl_type *type; /* variable that maps to this, if any */ + bool is_array; + bool is_reladdressed; }; class immediate_storage : public exec_node { @@ -286,6 +288,220 @@ public: st_src_reg return_reg; }; +static int type_size(const glsl_type *type); +static int swizzle_for_size(int size); + +class variable_store { + friend class glsl_to_tgsi_variable_allocator; +protected: + void *mem_ctx; + hash_table* variables; + unsigned next_temp; + unsigned next_temp_array; + static void reindex_reladdress(const void *, void *, void *); + static void reindex_non_reladdress(const void *, void *, void *); + void reindex_rvalue(); + void reindex_rvalue_reladdressed(); + variable_storage* rvalue_regs; + unsigned rvalue_regs_count; + +public: + bool native_integers; + unsigned temp_amount() const; + unsigned temp_array_amount() const; + variable_store(); + ~variable_store(); + variable_storage *find_variable_storage(class ir_variable *var) const; + variable_storage *push(class ir_variable *, gl_register_file, int); + variable_storage *push(class ir_variable *); + variable_storage *retrieve_anonymous_temp(unsigned); + st_src_reg get_temp(const glsl_type *type); + void optimise_access(void); + unsigned *reindex_table; +}; + +unsigned +variable_store::temp_amount() const +{ + return next_temp; +} + +unsigned +variable_store::temp_array_amount() const +{ + return next_temp_array; +} + +variable_store::variable_store():mem_ctx(ralloc_context(NULL)),next_temp(1),next_temp_array(1),rvalue_regs_count(0),rvalue_regs(NULL) +{ + variables = hash_table_ctor(0,hash_table_pointer_hash,hash_table_pointer_compare); +} + +variable_store::~variable_store() +{ + hash_table_dtor(variables); + ralloc_free(mem_ctx); +} + +variable_storage * +variable_store::find_variable_storage(ir_variable *var) const +{ + return (class variable_storage *) hash_table_find(variables,var); +} + +variable_storage* +variable_store::push(class ir_variable *var, gl_register_file file, int index) +{ + variable_storage *storage = new (mem_ctx) variable_storage(var,file,index); + hash_table_insert(variables,storage,var); + return storage; +} + +variable_storage* +variable_store::push(ir_variable *ir) +{ + variable_storage* retval = push(ir, PROGRAM_TEMPORARY, next_temp); + next_temp += type_size(ir->type); + if (ir->type->is_array() || ir->type->is_record() || ir->type->is_matrix()) { + retval->is_array = true; + } + return retval; +} + +variable_storage* +variable_store::retrieve_anonymous_temp(unsigned reg) +{ + for (unsigned i = 0; i < rvalue_regs_count; i++) { + unsigned range_start = rvalue_regs[i].index; + unsigned range_end = range_start + type_size(rvalue_regs[i].type) - 1; + if (reg >= range_start && reg <= range_end) { + return rvalue_regs + i; + } + } + printf ("Failed to get storage"); + exit(1); +} + +/** + * In the initial pass of codegen, we assign temporary numbers to + * intermediate results. (not SSA -- variable assignments will reuse + * storage). + */ +st_src_reg +variable_store::get_temp(const glsl_type *type) +{ + st_src_reg src; + rvalue_regs_count++; + rvalue_regs = reralloc(mem_ctx,rvalue_regs,variable_storage,rvalue_regs_count); + variable_storage &entry = rvalue_regs[rvalue_regs_count - 1]; + + src.type = native_integers ? type->base_type : GLSL_TYPE_FLOAT; + src.file = PROGRAM_TEMPORARY; + src.index = next_temp; + src.reladdr = NULL; + next_temp += type_size(type); + + entry.file = PROGRAM_TEMPORARY; + entry.index = src.index; + entry.type = type; + + if (type->is_array() || type->is_record() || type->is_matrix()) { + entry.is_array = true; + } + + if (type->is_array() || type->is_record()) { + src.swizzle = SWIZZLE_NOOP; + } else { + src.swizzle = swizzle_for_size(type->vector_elements); + } + src.negate = 0; + + return src; +} + +void variable_store::reindex_reladdress(const void *key, void *data, void *closure) +{ + ir_variable *var = (ir_variable *) key; + variable_storage *storage = (variable_storage *) data; + variable_store *store = (variable_store *) closure; + + if (storage->file == PROGRAM_TEMPORARY && storage->is_array) { + + if (storage->is_reladdressed) { + unsigned old_index = storage->index; + storage->index = store->next_temp_array; + size_t sz = type_size(storage->type); + store->next_temp_array += sz; + for (unsigned i = 0; i < sz; i++) { + store->reindex_table[old_index + i] = storage->index + i; + } + } + } +} + +void variable_store::reindex_non_reladdress(const void *key, void *data, void *closure) +{ + ir_variable *var = (ir_variable *) key; + variable_storage *storage = (variable_storage *) data; + variable_store *store = (variable_store *) closure; + + if (storage->file == PROGRAM_TEMPORARY && !storage->is_reladdressed) { + unsigned old_index = storage->index; + size_t sz = type_size(storage->type); + storage->index = store->next_temp; + store->next_temp += sz; + for (unsigned i = 0; i < sz; i++) { + store->reindex_table[old_index + i] = storage->index + i; + } + } +} + +void +variable_store::reindex_rvalue_reladdressed() +{ + for (unsigned i = 0; i < rvalue_regs_count; i++) { + variable_storage &storage = rvalue_regs[i]; + if (storage.is_reladdressed) { + unsigned old_index = storage.index; + size_t sz = type_size(storage.type); + storage.index = next_temp_array; + next_temp_array += sz; + for (unsigned i = 0; i < sz; i++) { + reindex_table[old_index + i] = storage.index + i; + } + } + } +} + +void +variable_store::reindex_rvalue() +{ + for (unsigned i = 0; i < rvalue_regs_count; i++) { + variable_storage &storage = rvalue_regs[i]; + if (!storage.is_reladdressed) { + unsigned old_index = storage.index; + size_t sz = type_size(storage.type); + storage.index = next_temp; + next_temp += sz; + for (unsigned i = 0; i < sz; i++) { + reindex_table[old_index + i] = storage.index + i; + } + } + } +} + +void +variable_store::optimise_access(void) +{ + reindex_table = rzalloc_array(mem_ctx,unsigned,next_temp); + next_temp_array = 1; + hash_table_call_foreach(variables,variable_store::reindex_reladdress,this); + reindex_rvalue_reladdressed(); + next_temp = next_temp_array + 1; + hash_table_call_foreach(variables,variable_store::reindex_non_reladdress,this); + reindex_rvalue(); +} + class glsl_to_tgsi_visitor : public ir_visitor { public: glsl_to_tgsi_visitor(); @@ -298,8 +514,6 @@ public: struct gl_shader_program *shader_program; struct gl_shader_compiler_options *options; - int next_temp; - int num_address_regs; int samplers_used; bool indirect_addr_temps; @@ -308,14 +522,11 @@ public: int glsl_version; bool native_integers; - variable_storage *find_variable_storage(ir_variable *var); - int add_constant(gl_register_file file, gl_constant_value values[4], int size, int datatype, GLuint *swizzle_out); function_entry *get_function_signature(ir_function_signature *sig); - st_src_reg get_temp(const glsl_type *type); void reladdr_to_temp(ir_instruction *ir, st_src_reg *reg, int *num_reladdr); st_src_reg st_src_reg_for_float(float val); @@ -352,7 +563,7 @@ public: st_src_reg result; /** List of variable_storage */ - exec_list variables; + variable_store store; /** List of immediate_storage */ exec_list immediates; @@ -427,6 +638,7 @@ public: int eliminate_dead_code_advanced(void); void merge_registers(void); void renumber_registers(void); + void renumber_temp_regs(unsigned*); void *mem_ctx; }; @@ -797,7 +1009,7 @@ glsl_to_tgsi_visitor::emit_scs(ir_instruction *ir, unsigned op, * that will be written by the SCS instrution, we'll need a temporary. */ if (scs_mask != unsigned(dst.writemask)) { - tmp = get_temp(glsl_type::vec4_type); + tmp = store.get_temp(glsl_type::vec4_type); } for (unsigned i = 0; i < 4; i++) { @@ -967,48 +1179,6 @@ type_size(const struct glsl_type *type) } } -/** - * In the initial pass of codegen, we assign temporary numbers to - * intermediate results. (not SSA -- variable assignments will reuse - * storage). - */ -st_src_reg -glsl_to_tgsi_visitor::get_temp(const glsl_type *type) -{ - st_src_reg src; - - src.type = native_integers ? type->base_type : GLSL_TYPE_FLOAT; - src.file = PROGRAM_TEMPORARY; - src.index = next_temp; - src.reladdr = NULL; - next_temp += type_size(type); - - if (type->is_array() || type->is_record()) { - src.swizzle = SWIZZLE_NOOP; - } else { - src.swizzle = swizzle_for_size(type->vector_elements); - } - src.negate = 0; - - return src; -} - -variable_storage * -glsl_to_tgsi_visitor::find_variable_storage(ir_variable *var) -{ - - variable_storage *entry; - - foreach_iter(exec_list_iterator, iter, this->variables) { - entry = (variable_storage *)iter.get(); - - if (entry->var == var) - return entry; - } - - return NULL; -} - void glsl_to_tgsi_visitor::visit(ir_variable *ir) { @@ -1040,8 +1210,7 @@ glsl_to_tgsi_visitor::visit(ir_variable *ir) st_dst_reg dst; if (i == ir->num_state_slots) { /* We'll set the index later. */ - storage = new(mem_ctx) variable_storage(ir, PROGRAM_STATE_VAR, -1); - this->variables.push_tail(storage); + storage = store.push(ir, PROGRAM_STATE_VAR, -1); dst = undef_dst; } else { @@ -1051,10 +1220,7 @@ glsl_to_tgsi_visitor::visit(ir_variable *ir) */ assert((int) ir->num_state_slots == type_size(ir->type)); - storage = new(mem_ctx) variable_storage(ir, PROGRAM_TEMPORARY, - this->next_temp); - this->variables.push_tail(storage); - this->next_temp += type_size(ir->type); + storage = store.push(ir); dst = st_dst_reg(st_src_reg(PROGRAM_TEMPORARY, storage->index, native_integers ? ir->type->base_type : GLSL_TYPE_FLOAT)); @@ -1205,7 +1371,7 @@ glsl_to_tgsi_visitor::try_emit_mad(ir_expression *ir, int mul_operand) ir->operands[nonmul_operand]->accept(this); c = this->result; - this->result = get_temp(ir->type); + this->result = store.get_temp(ir->type); result_dst = st_dst_reg(this->result); result_dst.writemask = (1 << ir->type->vector_elements) - 1; emit(ir, TGSI_OPCODE_MAD, result_dst, a, b, c); @@ -1247,7 +1413,7 @@ glsl_to_tgsi_visitor::try_emit_mad_for_and_not(ir_expression *ir, int try_operan b.negate = ~b.negate; - this->result = get_temp(ir->type); + this->result = store.get_temp(ir->type); emit(ir, TGSI_OPCODE_MAD, st_dst_reg(this->result), a, b, a); return true; @@ -1288,7 +1454,7 @@ glsl_to_tgsi_visitor::try_emit_sat(ir_expression *ir) new_inst = (glsl_to_tgsi_instruction *)this->instructions.get_tail(); new_inst->saturate = true; } else { - this->result = get_temp(ir->type); + this->result = store.get_temp(ir->type); st_dst_reg result_dst = st_dst_reg(this->result); result_dst.writemask = (1 << ir->type->vector_elements) - 1; glsl_to_tgsi_instruction *inst; @@ -1309,7 +1475,7 @@ glsl_to_tgsi_visitor::reladdr_to_temp(ir_instruction *ir, emit_arl(ir, address_reg, *reg->reladdr); if (*num_reladdr != 1) { - st_src_reg temp = get_temp(glsl_type::vec4_type); + st_src_reg temp = store.get_temp(glsl_type::vec4_type); emit(ir, TGSI_OPCODE_MOV, st_dst_reg(temp), *reg); *reg = temp; @@ -1378,7 +1544,7 @@ glsl_to_tgsi_visitor::visit(ir_expression *ir) /* Storage for our result. Ideally for an assignment we'd be using * the actual storage for the result here, instead. */ - result_src = get_temp(ir->type); + result_src = store.get_temp(ir->type); /* convenience for the emit functions below. */ result_dst = st_dst_reg(result_src); /* Limit writes to the channels that will be used by result_src later. @@ -1508,7 +1674,7 @@ glsl_to_tgsi_visitor::visit(ir_expression *ir) /* "==" operator producing a scalar boolean. */ if (ir->operands[0]->type->is_vector() || ir->operands[1]->type->is_vector()) { - st_src_reg temp = get_temp(native_integers ? + st_src_reg temp = store.get_temp(native_integers ? glsl_type::get_instance(ir->operands[0]->type->base_type, 4, 1) : glsl_type::vec4_type); @@ -1566,7 +1732,7 @@ glsl_to_tgsi_visitor::visit(ir_expression *ir) /* "!=" operator producing a scalar boolean. */ if (ir->operands[0]->type->is_vector() || ir->operands[1]->type->is_vector()) { - st_src_reg temp = get_temp(native_integers ? + st_src_reg temp = store.get_temp(native_integers ? glsl_type::get_instance(ir->operands[0]->type->base_type, 4, 1) : glsl_type::vec4_type); emit(ir, TGSI_OPCODE_SNE, st_dst_reg(temp), op[0], op[1]); @@ -2220,7 +2386,7 @@ glsl_to_tgsi_visitor::visit(ir_assignment *ir) * If TGSI had a UCMP instruction or similar, this extra * instruction would not be necessary. */ - condition_temp = get_temp(glsl_type::vec4_type); + condition_temp = store.get_temp(glsl_type::vec4_type); condition.negate = 0; emit(ir, TGSI_OPCODE_I2F, st_dst_reg(condition_temp), condition); condition_temp.swizzle = condition.swizzle; @@ -2277,7 +2443,7 @@ glsl_to_tgsi_visitor::visit(ir_constant *ir) * get lucky, copy propagation will eliminate the extra moves. */ if (ir->type->base_type == GLSL_TYPE_STRUCT) { - st_src_reg temp_base = get_temp(ir->type); + st_src_reg temp_base = store.get_temp(ir->type); st_dst_reg temp = st_dst_reg(temp_base); foreach_iter(exec_list_iterator, iter, ir->components) { @@ -2301,7 +2467,7 @@ glsl_to_tgsi_visitor::visit(ir_constant *ir) } if (ir->type->is_array()) { - st_src_reg temp_base = get_temp(ir->type); + st_src_reg temp_base = store.get_temp(ir->type); st_dst_reg temp = st_dst_reg(temp_base); int size = type_size(ir->type->fields.array); @@ -2324,7 +2490,7 @@ glsl_to_tgsi_visitor::visit(ir_constant *ir) } if (ir->type->is_matrix()) { - st_src_reg mat = get_temp(ir->type); + st_src_reg mat = store.get_temp(ir->type); st_dst_reg mat_column = st_dst_reg(mat); for (i = 0; i < ir->type->matrix_columns; i++) { @@ -2414,18 +2580,14 @@ glsl_to_tgsi_visitor::get_function_signature(ir_function_signature *sig) ir_variable *param = (ir_variable *)iter.get(); variable_storage *storage; - storage = find_variable_storage(param); + storage = store.find_variable_storage(param); assert(!storage); - storage = new(mem_ctx) variable_storage(param, PROGRAM_TEMPORARY, - this->next_temp); - this->variables.push_tail(storage); - - this->next_temp += type_size(param->type); + storage =store.push(param); } if (!sig->return_type->is_void()) { - entry->return_reg = get_temp(sig->return_type); + entry->return_reg = store.get_temp(sig->return_type); } else { entry->return_reg = undef_src; } @@ -2450,7 +2612,7 @@ glsl_to_tgsi_visitor::visit(ir_call *ir) if (param->mode == ir_var_in || param->mode == ir_var_inout) { - variable_storage *storage = find_variable_storage(param); + variable_storage *storage = store.find_variable_storage(param); assert(storage); param_rval->accept(this); @@ -2486,7 +2648,7 @@ glsl_to_tgsi_visitor::visit(ir_call *ir) if (param->mode == ir_var_out || param->mode == ir_var_inout) { - variable_storage *storage = find_variable_storage(param); + variable_storage *storage = store.find_variable_storage(param); assert(storage); st_src_reg r; @@ -2530,7 +2692,7 @@ glsl_to_tgsi_visitor::visit(ir_texture *ir) * we're doing plain old texturing. The optimization passes on * glsl_to_tgsi_visitor should handle cleaning up our mess in that case. */ - coord = get_temp(glsl_type::vec4_type); + coord = store.get_temp(glsl_type::vec4_type); coord_dst = st_dst_reg(coord); emit(ir, TGSI_OPCODE_MOV, coord_dst, this->result); } @@ -2543,7 +2705,7 @@ glsl_to_tgsi_visitor::visit(ir_texture *ir) /* Storage for our result. Ideally for an assignment we'd be using * the actual storage for the result here, instead. */ - result_src = get_temp(glsl_type::vec4_type); + result_src = store.get_temp(glsl_type::vec4_type); result_dst = st_dst_reg(result_src); switch (ir->op) { @@ -2613,7 +2775,7 @@ glsl_to_tgsi_visitor::visit(ir_texture *ir) */ ir->shadow_comparitor->accept(this); - tmp_src = get_temp(glsl_type::vec4_type); + tmp_src = store.get_temp(glsl_type::vec4_type); st_dst_reg tmp_dst = st_dst_reg(tmp_src); /* Projective division not allowed for array samplers. */ @@ -2779,7 +2941,7 @@ glsl_to_tgsi_visitor::visit(ir_if *ir) * have something to set cond_update on. */ if (cond_inst == prev_inst) { - st_src_reg temp = get_temp(glsl_type::bool_type); + st_src_reg temp = store.get_temp(glsl_type::bool_type); cond_inst = emit(ir->condition, TGSI_OPCODE_MOV, st_dst_reg(temp), result); } cond_inst->cond_update = GL_TRUE; @@ -2805,7 +2967,6 @@ glsl_to_tgsi_visitor::visit(ir_if *ir) glsl_to_tgsi_visitor::glsl_to_tgsi_visitor() { result.file = PROGRAM_UNDEFINED; - next_temp = 1; next_signature_id = 1; num_immediates = 0; current_function = NULL; @@ -3266,17 +3427,18 @@ glsl_to_tgsi_visitor::get_last_temp_write(int index) void glsl_to_tgsi_visitor::copy_propagate(void) { + int next_temp = store.temp_amount(); glsl_to_tgsi_instruction **acp = rzalloc_array(mem_ctx, glsl_to_tgsi_instruction *, - this->next_temp * 4); - int *acp_level = rzalloc_array(mem_ctx, int, this->next_temp * 4); + next_temp * 4); + int *acp_level = rzalloc_array(mem_ctx, int, next_temp * 4); int level = 0; foreach_iter(exec_list_iterator, iter, this->instructions) { glsl_to_tgsi_instruction *inst = (glsl_to_tgsi_instruction *)iter.get(); assert(inst->dst.file != PROGRAM_TEMPORARY - || inst->dst.index < this->next_temp); + || inst->dst.index < next_temp); /* First, do any copy propagation possible into the src regs. */ for (int r = 0; r < 3; r++) { @@ -3336,7 +3498,7 @@ glsl_to_tgsi_visitor::copy_propagate(void) case TGSI_OPCODE_BGNLOOP: case TGSI_OPCODE_ENDLOOP: /* End of a basic block, clear the ACP entirely. */ - memset(acp, 0, sizeof(*acp) * this->next_temp * 4); + memset(acp, 0, sizeof(*acp) * next_temp * 4); break; case TGSI_OPCODE_IF: @@ -3348,7 +3510,7 @@ glsl_to_tgsi_visitor::copy_propagate(void) /* Clear all channels written inside the block from the ACP, but * leaving those that were not touched. */ - for (int r = 0; r < this->next_temp; r++) { + for (int r = 0; r < next_temp; r++) { for (int c = 0; c < 4; c++) { if (!acp[4 * r + c]) continue; @@ -3369,13 +3531,13 @@ glsl_to_tgsi_visitor::copy_propagate(void) /* Any temporary might be written, so no copy propagation * across this instruction. */ - memset(acp, 0, sizeof(*acp) * this->next_temp * 4); + memset(acp, 0, sizeof(*acp) * next_temp * 4); } else if (inst->dst.file == PROGRAM_OUTPUT && inst->dst.reladdr) { /* Any output might be written, so no copy propagation * from outputs across this instruction. */ - for (int r = 0; r < this->next_temp; r++) { + for (int r = 0; r < next_temp; r++) { for (int c = 0; c < 4; c++) { if (!acp[4 * r + c]) continue; @@ -3396,7 +3558,7 @@ glsl_to_tgsi_visitor::copy_propagate(void) } /* Clear where it's used as src. */ - for (int r = 0; r < this->next_temp; r++) { + for (int r = 0; r < next_temp; r++) { for (int c = 0; c < 4; c++) { if (!acp[4 * r + c]) continue; @@ -3457,8 +3619,8 @@ void glsl_to_tgsi_visitor::eliminate_dead_code(void) { int i; - - for (i=0; i < this->next_temp; i++) { + int next_temp = store.temp_amount(); + for (i=0; i < next_temp; i++) { int last_read = get_last_temp_read(i); int j = 0; @@ -3492,10 +3654,11 @@ glsl_to_tgsi_visitor::eliminate_dead_code(void) int glsl_to_tgsi_visitor::eliminate_dead_code_advanced(void) { + int next_temp = store.temp_amount(); glsl_to_tgsi_instruction **writes = rzalloc_array(mem_ctx, glsl_to_tgsi_instruction *, - this->next_temp * 4); - int *write_level = rzalloc_array(mem_ctx, int, this->next_temp * 4); + next_temp * 4); + int *write_level = rzalloc_array(mem_ctx, int, next_temp * 4); int level = 0; int removed = 0; @@ -3503,7 +3666,7 @@ glsl_to_tgsi_visitor::eliminate_dead_code_advanced(void) glsl_to_tgsi_instruction *inst = (glsl_to_tgsi_instruction *)iter.get(); assert(inst->dst.file != PROGRAM_TEMPORARY - || inst->dst.index < this->next_temp); + || inst->dst.index < next_temp); switch (inst->op) { case TGSI_OPCODE_BGNLOOP: @@ -3518,7 +3681,7 @@ glsl_to_tgsi_visitor::eliminate_dead_code_advanced(void) * dead code of this type, so it shouldn't make a difference as long as * the dead code elimination pass in the GLSL compiler does its job. */ - memset(writes, 0, sizeof(*writes) * this->next_temp * 4); + memset(writes, 0, sizeof(*writes) * next_temp * 4); break; case TGSI_OPCODE_ENDIF: @@ -3526,7 +3689,7 @@ glsl_to_tgsi_visitor::eliminate_dead_code_advanced(void) /* Promote the recorded level of all channels written inside the * preceding if or else block to the level above the if/else block. */ - for (int r = 0; r < this->next_temp; r++) { + for (int r = 0; r < next_temp; r++) { for (int c = 0; c < 4; c++) { if (!writes[4 * r + c]) continue; @@ -3554,7 +3717,7 @@ glsl_to_tgsi_visitor::eliminate_dead_code_advanced(void) /* Any temporary might be read, so no dead code elimination * across this instruction. */ - memset(writes, 0, sizeof(*writes) * this->next_temp * 4); + memset(writes, 0, sizeof(*writes) * next_temp * 4); } else if (inst->src[i].file == PROGRAM_TEMPORARY) { /* Clear where it's used as src. */ int src_chans = 1 << GET_SWZ(inst->src[i].swizzle, 0); @@ -3595,7 +3758,7 @@ glsl_to_tgsi_visitor::eliminate_dead_code_advanced(void) } /* Anything still in the write array at this point is dead code. */ - for (int r = 0; r < this->next_temp; r++) { + for (int r = 0; r < next_temp; r++) { for (int c = 0; c < 4; c++) { glsl_to_tgsi_instruction *inst = writes[4 * r + c]; if (inst) @@ -3682,6 +3845,7 @@ glsl_to_tgsi_visitor::merge_registers(void) * by optimization passes. */ void glsl_to_tgsi_visitor::renumber_registers(void) +void glsl_to_tgsi_visitor::renumber_temp_regs(unsigned *reindex_table) { int i = 0; int new_index = 0; @@ -3691,6 +3855,21 @@ glsl_to_tgsi_visitor::renumber_registers(void) if (i != new_index) rename_temp_register(i, new_index); new_index++; + foreach_iter(exec_list_iterator, iter, this->instructions) { + glsl_to_tgsi_instruction *inst = (glsl_to_tgsi_instruction *)iter.get(); + unsigned j; + + for (j=0; j < num_inst_src_regs(inst->op); j++) { + if (inst->src[j].file == PROGRAM_TEMPORARY) { + unsigned index = inst->src[j].index; + inst->src[j].index = reindex_table[index]; + } + } + + if (inst->dst.file == PROGRAM_TEMPORARY) { + unsigned index = inst->dst.index; + inst->dst.index = reindex_table[index]; + } } this->next_temp = new_index; @@ -3720,7 +3899,6 @@ get_pixel_transfer_visitor(struct st_fragment_program *fp, v->glsl_version = original->glsl_version; v->native_integers = original->native_integers; v->options = original->options; - v->next_temp = original->next_temp; v->num_address_regs = original->num_address_regs; v->samplers_used = prog->SamplersUsed = original->samplers_used; v->indirect_addr_temps = original->indirect_addr_temps; @@ -3732,7 +3910,7 @@ get_pixel_transfer_visitor(struct st_fragment_program *fp, * TEX colorTemp, fragment.texcoord[0], texture[0], 2D; */ coord = st_src_reg(PROGRAM_INPUT, FRAG_ATTRIB_TEX0, glsl_type::vec2_type); - src0 = v->get_temp(glsl_type::vec4_type); + src0 = v->store.get_temp(glsl_type::vec4_type); dst0 = st_dst_reg(src0); inst = v->emit(NULL, TGSI_OPCODE_TEX, dst0, coord); inst->sampler = 0; @@ -3762,7 +3940,7 @@ get_pixel_transfer_visitor(struct st_fragment_program *fp, } if (pixel_maps) { - st_src_reg temp = v->get_temp(glsl_type::vec4_type); + st_src_reg temp = v->store.get_temp(glsl_type::vec4_type); st_dst_reg temp_dst = st_dst_reg(temp); assert(st->pixel_xfer.pixelmap_texture); @@ -3850,7 +4028,6 @@ get_bitmap_visitor(struct st_fragment_program *fp, v->glsl_version = original->glsl_version; v->native_integers = original->native_integers; v->options = original->options; - v->next_temp = original->next_temp; v->num_address_regs = original->num_address_regs; v->samplers_used = prog->SamplersUsed = original->samplers_used; v->indirect_addr_temps = original->indirect_addr_temps; @@ -3859,7 +4036,7 @@ get_bitmap_visitor(struct st_fragment_program *fp, /* TEX tmp0, fragment.texcoord[0], texture[0], 2D; */ coord = st_src_reg(PROGRAM_INPUT, FRAG_ATTRIB_TEX0, glsl_type::vec2_type); - src0 = v->get_temp(glsl_type::vec4_type); + src0 = v->store.get_temp(glsl_type::vec4_type); dst0 = st_dst_reg(src0); inst = v->emit(NULL, TGSI_OPCODE_TEX, dst0, coord); inst->sampler = samplerIndex; @@ -4542,6 +4719,7 @@ st_translate_program( unsigned i; enum pipe_error ret = PIPE_OK; + int next_temp = program->store.temp_amount(); assert(numInputs <= Elements(t->inputs)); assert(numOutputs <= Elements(t->outputs)); @@ -4695,7 +4873,7 @@ st_translate_program( * in sequential order. Else, we declare them on demand elsewhere. * (Note: the number of temporaries is equal to program->next_temp) */ - for (i = 0; i < (unsigned)program->next_temp; i++) { + for (i = 0; i < (unsigned)next_temp; i++) { /* XXX use TGSI_FILE_TEMPORARY_ARRAY when it's supported by ureg */ t->temps[i] = ureg_DECL_temporary(t->ureg); } @@ -4879,6 +5057,7 @@ get_mesa_program(struct gl_context *ctx, v->options = options; v->glsl_version = ctx->Const.GLSLVersion; v->native_integers = ctx->Const.NativeIntegers; + v->store.native_integers = v->native_integers; _mesa_generate_parameters_list_for_uniforms(shader_program, shader, prog->Parameters); @@ -4929,6 +5108,9 @@ get_mesa_program(struct gl_context *ctx, } #endif + v->store.optimise_access(); + v->renumber_temp_regs(v->store.reindex_table); + if (!screen->get_shader_param(screen, pipe_shader_type, PIPE_SHADER_CAP_OUTPUT_READ)) { /* Remove reads to output registers, and to varyings in vertex shaders. */ -- 1.7.7 _______________________________________________ mesa-dev mailing list mesa-dev@lists.freedesktop.org http://lists.freedesktop.org/mailman/listinfo/mesa-dev