--- src/mesa/drivers/dri/i965/brw_fs.cpp | 4 +- src/mesa/drivers/dri/i965/brw_vec4.h | 13 ++++ src/mesa/drivers/dri/i965/brw_vec4_visitor.cpp | 89 +++++++++++++++++++++--- 3 files changed, 93 insertions(+), 13 deletions(-)
diff --git a/src/mesa/drivers/dri/i965/brw_fs.cpp b/src/mesa/drivers/dri/i965/brw_fs.cpp index 6ecaa6c..d095e86 100644 --- a/src/mesa/drivers/dri/i965/brw_fs.cpp +++ b/src/mesa/drivers/dri/i965/brw_fs.cpp @@ -462,7 +462,7 @@ fs_visitor::emit_general_interpolation(ir_variable *ir) * field of the setup reg. */ for (unsigned int k = 0; k < type->vector_elements; k++) { - struct brw_reg interp = interp_reg(location, k); + struct brw_reg interp = interp_reg(location, k + ir->horizontal_location); interp = suboffset(interp, 3); interp.type = reg->type; emit(FS_OPCODE_CINTERP, attr, fs_reg(interp)); @@ -482,7 +482,7 @@ fs_visitor::emit_general_interpolation(ir_variable *ir) k == 3 && !(c->key.proj_attrib_mask & (1 << location))) { emit(BRW_OPCODE_MOV, attr, fs_reg(1.0f)); } else { - struct brw_reg interp = interp_reg(location, k); + struct brw_reg interp = interp_reg(location, k + ir->horizontal_location); brw_wm_barycentric_interp_mode barycoord_mode; if (interpolation_mode == INTERP_QUALIFIER_SMOOTH) barycoord_mode = BRW_WM_PERSPECTIVE_PIXEL_BARYCENTRIC; diff --git a/src/mesa/drivers/dri/i965/brw_vec4.h b/src/mesa/drivers/dri/i965/brw_vec4.h index 2555fa7..54aa99e 100644 --- a/src/mesa/drivers/dri/i965/brw_vec4.h +++ b/src/mesa/drivers/dri/i965/brw_vec4.h @@ -293,6 +293,17 @@ public: class vec4_visitor : public ir_visitor { +protected: + class output_info : public exec_node { + public: + const ir_variable *var; + const dst_reg *reg; + output_info(const ir_variable *v, const dst_reg *r) + : var(v), reg(r) + { + } + }; + public: vec4_visitor(struct brw_vs_compile *c, struct gl_shader_program *prog, struct brw_shader *shader); @@ -388,6 +399,7 @@ public: */ dst_reg output_reg[BRW_VERT_RESULT_MAX]; const char *output_reg_annotation[BRW_VERT_RESULT_MAX]; + exec_list custom_outputs; int uniform_size[MAX_UNIFORMS]; int uniform_vector_size[MAX_UNIFORMS]; int uniforms; @@ -504,6 +516,7 @@ public: void emit_clip_distances(struct brw_reg reg, int offset); void emit_generic_urb_slot(dst_reg reg, int vert_result); void emit_urb_slot(int mrf, int vert_result); + void emit_custom_output(bool is_first_pass, int &max_mrf, int result_vert_to_slot[]); void emit_urb_writes(void); src_reg get_scratch_offset(vec4_instruction *inst, diff --git a/src/mesa/drivers/dri/i965/brw_vec4_visitor.cpp b/src/mesa/drivers/dri/i965/brw_vec4_visitor.cpp index 13ba18b..fd76cff 100644 --- a/src/mesa/drivers/dri/i965/brw_vec4_visitor.cpp +++ b/src/mesa/drivers/dri/i965/brw_vec4_visitor.cpp @@ -855,16 +855,32 @@ vec4_visitor::visit(ir_variable *ir) case ir_var_out: reg = new(mem_ctx) dst_reg(this, ir->type); - - for (int i = 0; i < type_size(ir->type); i++) { - output_reg[ir->location + i] = *reg; - output_reg[ir->location + i].reg_offset = i; - output_reg[ir->location + i].type = - brw_type_for_base_type(ir->type->get_scalar_type()); - output_reg_annotation[ir->location + i] = ir->name; + if (ir->location < VERT_RESULT_VAR0) { + for (int i = 0; i < type_size(ir->type); i++) { + output_reg[ir->location + i] = *reg; + output_reg[ir->location + i].reg_offset = i; + output_reg[ir->location + i].type = + brw_type_for_base_type(ir->type->get_scalar_type()); + output_reg_annotation[ir->location + i] = ir->name; + } + } else { + output_info *oi = new (mem_ctx) output_info(ir, reg); + custom_outputs.push_tail(oi); } - break; + /* components = (ir->type->is_array())?ir->type->fields.array->vector_elements:ir->type->vector_elements; + if (!output_reg_annotation[ir->location]) { + // The reg has not been set + reg = new(mem_ctx) dst_reg(this, ir->type); + + } else { + // The reg has already been set : this output is packed + reg = &(output_reg[ir->location]); + output_reg[ir->location].writemask |= (((1 << components) - 1) << ir->horizontal_location); + char * new_annotation = ralloc_asprintf(mem_ctx, "%s ; %s", output_reg_annotation[ir->location], ir->name); + output_reg_annotation[ir->location] = new_annotation; + }*/ + break; case ir_var_auto: case ir_var_temporary: reg = new(mem_ctx) dst_reg(this, ir->type); @@ -2148,6 +2164,8 @@ vec4_visitor::emit_clip_distances(struct brw_reg reg, int offset) void vec4_visitor::emit_generic_urb_slot(dst_reg reg, int vert_result) { + if (vert_result >= VERT_RESULT_VAR0) + return; assert (vert_result < VERT_RESULT_MAX); reg.type = output_reg[vert_result].type; current_annotation = output_reg_annotation[vert_result]; @@ -2203,6 +2221,50 @@ vec4_visitor::emit_urb_slot(int mrf, int vert_result) } } +void +vec4_visitor::emit_custom_output(bool is_first_pass, int &max_mrf, int result_vert_to_slot[]) +{ + int new_max = 0; + foreach_list_const(node, &custom_outputs) { + output_info *oi = (output_info *) node; + current_annotation = oi->var->name; + const glsl_type *const type = oi->var->type; + unsigned components = (type->is_array())?type->fields.array->vector_elements:type->vector_elements; + src_reg associed_gpr = src_reg(*(oi->reg)); + unsigned swz = swizzle_for_size(components); + associed_gpr.swizzle = BRW_SWIZZLE4( + BRW_GET_SWZ(swz,(0 + oi->var->horizontal_location) % 4), + BRW_GET_SWZ(swz,(1 + oi->var->horizontal_location) % 4), + BRW_GET_SWZ(swz,(2 + oi->var->horizontal_location) % 4), + BRW_GET_SWZ(swz,(3 + oi->var->horizontal_location) % 4) + ); + + unsigned size = type_size(type); + for (unsigned i = 0; i < size; i++) { + unsigned mrf = result_vert_to_slot[oi->var->location + i] + 2; + if (is_first_pass && mrf > max_mrf) + break; + else if (!is_first_pass) { + if (mrf > max_mrf) + mrf = mrf - max_mrf + 1; + else + continue; + } + new_max = MAX2(mrf, new_max); + dst_reg reg = dst_reg(MRF, mrf); + reg.type = brw_type_for_base_type(oi->var->type); + associed_gpr.reg_offset = i; + + reg.writemask = ((1 << components) - 1) << oi->var->horizontal_location; + /* Copy the register, saturating if necessary */ + vec4_instruction *inst = emit(MOV(reg, + associed_gpr)); + } + } + if (!is_first_pass) + max_mrf = new_max; +} + static int align_interleaved_urb_mlen(struct brw_context *brw, int mlen) { @@ -2277,6 +2339,8 @@ vec4_visitor::emit_urb_writes() break; } } + + emit_custom_output(true, max_usable_mrf, c->vue_map.vert_result_to_slot); current_annotation = "URB write"; vec4_instruction *inst = emit(VS_OPCODE_URB_WRITE); @@ -2286,18 +2350,20 @@ vec4_visitor::emit_urb_writes() /* Optional second URB write */ if (!inst->eot) { - mrf = base_mrf + 1; + /*mrf = base_mrf + 1; for (; slot < c->vue_map.num_slots; ++slot) { assert(mrf < max_usable_mrf); emit_urb_slot(mrf++, c->vue_map.slot_to_vert_result[slot]); - } + }*/ + int tmp = max_usable_mrf; + emit_custom_output(false, tmp, c->vue_map.vert_result_to_slot); current_annotation = "URB write"; inst = emit(VS_OPCODE_URB_WRITE); inst->base_mrf = base_mrf; - inst->mlen = align_interleaved_urb_mlen(brw, mrf - base_mrf); + inst->mlen = align_interleaved_urb_mlen(brw, tmp); inst->eot = true; /* URB destination offset. In the previous write, we got MRFs * 2-13 minus the one header MRF, so 12 regs. URB offset is in @@ -2616,6 +2682,7 @@ vec4_visitor::vec4_visitor(struct brw_vs_compile *c, this->live_intervals_valid = false; this->uniforms = 0; + memset(output_reg_annotation, 0, sizeof(output_reg_annotation)); } vec4_visitor::~vec4_visitor() -- 1.7.7 _______________________________________________ mesa-dev mailing list mesa-dev@lists.freedesktop.org http://lists.freedesktop.org/mailman/listinfo/mesa-dev