Re: [Mesa-dev] [PATCH] i965/fs: Implement SIMD16 dual source blending.
Hi Jason, On mié, 2014-09-17 at 11:39 -0700, Jason Ekstrand wrote: I haven't tested this yet, just looked it over. I've got a couple of inline comments below. One general comment though: I'm currently working on a bunch of compiler stuff that reworks the way we do FB writes. In particular, it reworks things to use GRF registers instead of the MRF. It probably wouldn't be too bad for me to rebase on top of this or to rebase your patch on top of what I'm doing. I just thought I'd warn you about the conflict. Thanks for the warning, I suppose it should be easy for me to rebase this patch on top of your work if you happen to land yours sooner. On Wed, Sep 17, 2014 at 4:36 AM, Iago Toral Quiroga ito...@igalia.com wrote: From the SNB PRM, volume 4, part 1, page 193: The dual source render target messages only have SIMD8 forms due to maximum message length limitations. SIMD16 pixel shaders must send two of these messages to cover all of the pixels. Each message contains two colors (4 channels each) for each pixel in the message payload. Bugzilla: https://bugs.freedesktop.org/show_bug.cgi?id=82831 --- src/mesa/drivers/dri/i965/brw_eu.h | 1 + src/mesa/drivers/dri/i965/brw_eu_emit.c| 3 +- src/mesa/drivers/dri/i965/brw_fs_generator.cpp | 14 +++-- src/mesa/drivers/dri/i965/brw_fs_visitor.cpp | 41 +++--- 4 files changed, 45 insertions(+), 14 deletions(-) I tested this on SandyBridge and IvyBridge. No piglit regressions in these platforms, but would be nice if someone could test this in later platforms too. I only noticed these two tests for dual source blending in piglit though: tests/spec/ext_framebuffer_multisample/alpha-to-one-dual-src-blend.cpp tests/spec/ext_framebuffer_multisample/alpha-to-coverage-dual-src-blend.cpp The first one fails, in both platforms with and without my patch. The second one passes in both platforms, with and without my patch. I also tested this with a seprate test program to verify that it worked, at least, in a simple case. diff --git a/src/mesa/drivers/dri/i965/brw_eu.h b/src/mesa/drivers/dri/i965/brw_eu.h index e6c26e3..5908ba5 100644 --- a/src/mesa/drivers/dri/i965/brw_eu.h +++ b/src/mesa/drivers/dri/i965/brw_eu.h @@ -266,6 +266,7 @@ void brw_fb_WRITE(struct brw_compile *p, unsigned msg_length, unsigned response_length, bool eot, + bool last_render_target, bool header_present); void brw_SAMPLE(struct brw_compile *p, diff --git a/src/mesa/drivers/dri/i965/brw_eu_emit.c b/src/mesa/drivers/dri/i965/brw_eu_emit.c index 39f94e9..ffdbe6d 100644 --- a/src/mesa/drivers/dri/i965/brw_eu_emit.c +++ b/src/mesa/drivers/dri/i965/brw_eu_emit.c @@ -2251,6 +2251,7 @@ void brw_fb_WRITE(struct brw_compile *p, unsigned msg_length, unsigned response_length, bool eot, + bool last_render_target, bool header_present) { struct brw_context *brw = p-brw; @@ -2290,7 +2291,7 @@ void brw_fb_WRITE(struct brw_compile *p, msg_type, msg_length, header_present, - eot, /* last render target write */ + last_render_target, response_length, eot, 0 /* send_commit_msg */); diff --git a/src/mesa/drivers/dri/i965/brw_fs_generator.cpp b/src/mesa/drivers/dri/i965/brw_fs_generator.cpp index 1bc10f5..a4b84aa 100644 --- a/src/mesa/drivers/dri/i965/brw_fs_generator.cpp +++ b/src/mesa/drivers/dri/i965/brw_fs_generator.cpp @@ -121,9 +121,12 @@ fs_generator::fire_fb_write(fs_inst *inst, if (inst-opcode == FS_OPCODE_REP_FB_WRITE) msg_control = BRW_DATAPORT_RENDER_TARGET_WRITE_SIMD16_SINGLE_SOURCE_REPLICATED; - else if (prog_data-dual_src_blend) - msg_control = BRW_DATAPORT_RENDER_TARGET_WRITE_SIMD8_DUAL_SOURCE_SUBSPAN01; - else if (dispatch_width == 16) + else if (prog_data-dual_src_blend) { + if (dispatch_width == 8 || !inst-eot) +
Re: [Mesa-dev] [PATCH 17/20] i965: Make instruction lists local to the bblocks.
On Tue, Sep 02, 2014 at 09:34:28PM -0700, Matt Turner wrote: --- src/mesa/drivers/dri/i965/brw_cfg.cpp | 62 - src/mesa/drivers/dri/i965/brw_cfg.h| 77 +- .../drivers/dri/i965/brw_dead_control_flow.cpp | 6 +- src/mesa/drivers/dri/i965/brw_fs.cpp | 6 +- src/mesa/drivers/dri/i965/brw_fs_cse.cpp | 6 -- .../dri/i965/brw_fs_peephole_predicated_break.cpp | 6 +- src/mesa/drivers/dri/i965/brw_fs_reg_allocate.cpp | 4 +- src/mesa/drivers/dri/i965/brw_fs_sel_peephole.cpp | 4 +- .../drivers/dri/i965/brw_schedule_instructions.cpp | 10 +-- src/mesa/drivers/dri/i965/brw_shader.cpp | 14 src/mesa/drivers/dri/i965/brw_vec4_cse.cpp | 6 -- src/mesa/drivers/dri/i965/intel_asm_annotation.c | 4 +- 12 files changed, 114 insertions(+), 91 deletions(-) diff --git a/src/mesa/drivers/dri/i965/brw_cfg.cpp b/src/mesa/drivers/dri/i965/brw_cfg.cpp index 8714b68..44e7744 100644 --- a/src/mesa/drivers/dri/i965/brw_cfg.cpp +++ b/src/mesa/drivers/dri/i965/brw_cfg.cpp I really like this. Indentation in this file looks weird but it is the current code using tabs and your changes spaces. Reviewed-by: Topi Pohjolainen topi.pohjolai...@intel.com @@ -54,9 +54,7 @@ bblock_t::bblock_t(cfg_t *cfg) : cfg(cfg), start_ip(0), end_ip(0), num(0), if_block(NULL), else_block(NULL) { - start = NULL; - end = NULL; - + instructions.make_empty(); parents.make_empty(); children.make_empty(); } @@ -119,8 +117,8 @@ bblock_t::can_combine_with(const bblock_t *that) const if ((const bblock_t *)this-link.next != that) return false; - if (ends_block(this-end) || - starts_block(that-start)) + if (ends_block(this-end()) || + starts_block(that-start())) return false; return true; @@ -138,8 +136,8 @@ bblock_t::combine_with(bblock_t *that) } this-end_ip = that-end_ip; - this-end = that-end; this-else_block = that-else_block; + this-instructions.append_list(that-instructions); this-cfg-remove_block(that); } @@ -148,9 +146,7 @@ void bblock_t::dump(backend_visitor *v) { int ip = this-start_ip; - for (backend_instruction *inst = (backend_instruction *)this-start; - inst != this-end-next; - inst = (backend_instruction *) inst-next) { + foreach_inst_in_block(backend_instruction, inst, this) { fprintf(stderr, %5d: , ip); v-dump_instruction(inst); ip++; @@ -178,16 +174,15 @@ cfg_t::cfg_t(exec_list *instructions) set_next_block(cur, entry, ip); - entry-start = (backend_instruction *) instructions-get_head(); - - foreach_in_list(backend_instruction, inst, instructions) { - cur-end = inst; - + foreach_in_list_safe(backend_instruction, inst, instructions) { /* set_next_block wants the post-incremented ip */ ip++; switch (inst-opcode) { case BRW_OPCODE_IF: + inst-remove(); + cur-instructions.push_tail(inst); + /* Push our information onto a stack so we can recover from * nested ifs. */ @@ -202,44 +197,46 @@ cfg_t::cfg_t(exec_list *instructions) * instructions. */ next = new_block(); - next-start = (backend_instruction *)inst-next; cur_if-add_successor(mem_ctx, next); set_next_block(cur, next, ip); break; case BRW_OPCODE_ELSE: + inst-remove(); + cur-instructions.push_tail(inst); + cur_else = cur; next = new_block(); - next-start = (backend_instruction *)inst-next; cur_if-add_successor(mem_ctx, next); set_next_block(cur, next, ip); break; case BRW_OPCODE_ENDIF: { - if (cur-start == inst) { + if (cur-instructions.is_empty()) { /* New block was just created; use it. */ cur_endif = cur; } else { cur_endif = new_block(); -cur_endif-start = inst; -cur-end = (backend_instruction *)inst-prev; cur-add_successor(mem_ctx, cur_endif); set_next_block(cur, cur_endif, ip - 1); } + inst-remove(); + cur-instructions.push_tail(inst); + if (cur_else) { cur_else-add_successor(mem_ctx, cur_endif); } else { cur_if-add_successor(mem_ctx, cur_endif); } - assert(cur_if-end-opcode == BRW_OPCODE_IF); - assert(!cur_else || cur_else-end-opcode == BRW_OPCODE_ELSE); + assert(cur_if-end()-opcode == BRW_OPCODE_IF); + assert(!cur_else || cur_else-end()-opcode == BRW_OPCODE_ELSE); cur_if-if_block = cur_if; cur_if-else_block = cur_else; @@ -269,25 +266,28 @@ cfg_t::cfg_t(exec_list *instructions) */ cur_while = new_block();
Re: [Mesa-dev] [PATCH 20/20] i965: Add and use functions to get next/prev blocks.
On Thu, Sep 04, 2014 at 01:26:45PM -0700, Matt Turner wrote: On Tue, Sep 2, 2014 at 9:34 PM, Matt Turner matts...@gmail.com wrote: diff --git a/src/mesa/drivers/dri/i965/brw_dead_control_flow.cpp b/src/mesa/drivers/dri/i965/brw_dead_control_flow.cpp index 557c3ad..8a7f42a 100644 --- a/src/mesa/drivers/dri/i965/brw_dead_control_flow.cpp +++ b/src/mesa/drivers/dri/i965/brw_dead_control_flow.cpp @@ -52,20 +52,20 @@ dead_control_flow_eliminate(backend_visitor *v) continue; backend_instruction *if_inst = NULL, *else_inst = NULL; - backend_instruction *prev_inst = ((bblock_t *)endif_block-link.prev)-end(); + backend_instruction *prev_inst = endif_block-next()-end(); This is obviously supposed to be -prev(), not -next(). Fixed locally. With that patches 19 and 20 are: Reviewed-by: Topi Pohjolainen topi.pohjolai...@intel.com ___ mesa-dev mailing list mesa-dev@lists.freedesktop.org http://lists.freedesktop.org/mailman/listinfo/mesa-dev
Re: [Mesa-dev] [PATCH 21/37] i965/gen6/gs: Implement support for gl_PrimitiveIdIn.
On Thu, Aug 14, 2014 at 4:11 AM, Iago Toral Quiroga ito...@igalia.com wrote: For this we will need to move PrimitiveID information, delivered in the thread payload in r0.1, to a separate register (we use GS_OPCODE_SET_PRIMITIVE_ID for this), then map the corresponding varying slot to that register in the setup_payload() method. Notice that we cannot use a virtual register as the destination for the PrimitiveID because we need to map all input attributes to hardware registers in setup_payload(), which happens before virtual registers are mapped to hardware registers. We could work around that issue if we were able to compute the first non-payload register in emit_prolog() and move the PrimitiveID information to that register, but we can't because at that point we still don't know the final number uniforms that will be included in the payload. So, what we do is to place PrimitiveID information in r1, which is always delivered as part of the payload but its only populated with data relevant for transform feedback when we set GEN6_GS_SVBI_PAYLOAD_ENABLE in the 3DSTATE_GS state packet. When we implement transform feedback, we wil make sure to move the value of r1 to another register before we overwrite it with the PrimitiveID. --- src/mesa/drivers/dri/i965/gen6_gs_visitor.cpp | 69 ++- src/mesa/drivers/dri/i965/gen6_gs_visitor.h | 2 + 2 files changed, 70 insertions(+), 1 deletion(-) diff --git a/src/mesa/drivers/dri/i965/gen6_gs_visitor.cpp b/src/mesa/drivers/dri/i965/gen6_gs_visitor.cpp index 4a440eb..b45c381 100644 --- a/src/mesa/drivers/dri/i965/gen6_gs_visitor.cpp +++ b/src/mesa/drivers/dri/i965/gen6_gs_visitor.cpp @@ -31,6 +31,8 @@ #include gen6_gs_visitor.h +const unsigned MAX_GS_INPUT_VERTICES = 6; + namespace brw { void @@ -38,6 +40,7 @@ gen6_gs_visitor::emit_prolog() { vec4_gs_visitor::emit_prolog(); + this-current_annotation = gen6 prolog; /* Gen6 geometry shaders require to allocate an initial VUE handle via * FF_SYNC message, however the documentation remarks that only one thread * can write to the URB simultaneously and the FF_SYNC message provides the @@ -59,7 +62,6 @@ gen6_gs_visitor::emit_prolog() * flags for the next vertex come right after the data items and flags for * the previous vertex. */ - this-current_annotation = gen6 prolog; Seems like this belongs in i965/gen6/gs: Add initial implementation for a gen6 geometry shader visitor. (Or, perhaps just drop the change...) Patches 21-26 (gs-support-snb-for-submission-02092014) i965/gen6/gs: Implement support for gl_PrimitiveIdIn. i965/gen6/gs: Assign geometry shader VUE map properly. i965/gen6/gs: Enable texture units and upload sampler state. i965/gen6/gs: implement GS_OPCODE_SVB_WRITE opcode i965/gen6/gs: implement GS_OPCODE_SVB_SET_DST_INDEX opcode i965/gen6/gs: implement GS_OPCODE_FF_SYNC_SET_PRIMITIVES opcode Reviewed-by: Jordan Justen jordan.l.jus...@intel.com this-vertex_output = src_reg(this, glsl_type::uint_type, (prog_data-vue_map.num_slots + 1) * @@ -94,6 +96,30 @@ gen6_gs_visitor::emit_prolog() */ this-prim_count = src_reg(this, glsl_type::uint_type); emit(MOV(dst_reg(this-prim_count), 0u)); + + /* PrimitveID is delivered in r0.1 of the thread payload. If the program +* needs it we have to move it to a separate register where we can map +* the atttribute. +* +* Notice that we cannot use a virtual register for this, because we need to +* map all input attributes to hardware registers in setup_payload(), +* which happens before virtual registers are mapped to hardware registers. +* We could work around that issue if we were able to compute the first +* non-payload register here and move the PrimitiveID information to that +* register, but we can't because at this point we don't know the final +* number uniforms that will be included in the payload. +* +* So, what we do is to place PrimitiveID information in r1, which is always +* delivered as part of the payload, but its only populated with data +* relevant for transform feedback when we set GEN6_GS_SVBI_PAYLOAD_ENABLE +* in the 3DSTATE_GS state packet. That information can be obtained by other +* means though, so we can safely use r1 for this purpose. +*/ + if (c-prog_data.include_primitive_id) { + this-primitive_id = + src_reg(retype(brw_vec8_grf(1, 0), BRW_REGISTER_TYPE_UD)); + emit(GS_OPCODE_SET_PRIMITIVE_ID, dst_reg(this-primitive_id)); + } } void @@ -410,4 +436,45 @@ gen6_gs_visitor::emit_thread_end() inst-mlen = 1; } +void +gen6_gs_visitor::setup_payload() +{ + int attribute_map[BRW_VARYING_SLOT_COUNT * MAX_GS_INPUT_VERTICES]; + + /* Attributes are going to be interleaved, so one register contains two +
Re: [Mesa-dev] [PATCH 27/37] i965/gen6/gs: Add an additional parameter to the FF_SYNC opcode.
On Thu, Aug 14, 2014 at 4:11 AM, Iago Toral Quiroga ito...@igalia.com wrote: From: Samuel Iglesias Gonsalvez sigles...@igalia.com We will use this parameter in later patches to provide information relevant to transform feedback that needs to be set as part of the FF_SYNC message. Signed-off-by: Samuel Iglesias Gonsalvez sigles...@igalia.com --- src/mesa/drivers/dri/i965/brw_defines.h | 4 src/mesa/drivers/dri/i965/brw_vec4.h | 3 ++- src/mesa/drivers/dri/i965/brw_vec4_generator.cpp | 16 +--- src/mesa/drivers/dri/i965/gen6_gs_visitor.cpp| 3 ++- 4 files changed, 21 insertions(+), 5 deletions(-) diff --git a/src/mesa/drivers/dri/i965/brw_defines.h b/src/mesa/drivers/dri/i965/brw_defines.h index 6e8b998..b0d6d9f 100644 --- a/src/mesa/drivers/dri/i965/brw_defines.h +++ b/src/mesa/drivers/dri/i965/brw_defines.h @@ -1030,6 +1030,10 @@ enum opcode { * FF_SYNC operation. * * - src1 is the number of primitives written. +* +* - src2 is the value to hold in M0.0: number of SO vertices to write +* and number of SO primitives needed. Its value will be overwritten +* with the SVBI values if transform feedback is enabled. */ GS_OPCODE_FF_SYNC, diff --git a/src/mesa/drivers/dri/i965/brw_vec4.h b/src/mesa/drivers/dri/i965/brw_vec4.h index 763cb23..58a5aac 100644 --- a/src/mesa/drivers/dri/i965/brw_vec4.h +++ b/src/mesa/drivers/dri/i965/brw_vec4.h @@ -679,7 +679,8 @@ private: struct brw_reg src2); void generate_gs_ff_sync(struct brw_reg dst, struct brw_reg src0, -struct brw_reg src1); +struct brw_reg src1, +struct brw_reg src2); void generate_gs_set_primitive_id(struct brw_reg dst); void generate_oword_dual_block_offsets(struct brw_reg m1, struct brw_reg index); diff --git a/src/mesa/drivers/dri/i965/brw_vec4_generator.cpp b/src/mesa/drivers/dri/i965/brw_vec4_generator.cpp index d4554f5..c69b305 100644 --- a/src/mesa/drivers/dri/i965/brw_vec4_generator.cpp +++ b/src/mesa/drivers/dri/i965/brw_vec4_generator.cpp @@ -734,7 +734,8 @@ vec4_generator::generate_gs_ff_sync_set_primitives(struct brw_reg dst, void vec4_generator::generate_gs_ff_sync(struct brw_reg dst, struct brw_reg src0, -struct brw_reg src1) +struct brw_reg src1, +struct brw_reg src2) { /* We use dst to setup the ff_sync header, so we expect it to be * initialized to R0 by the caller. Here we overwrite dword 0 (cleared @@ -744,7 +745,7 @@ vec4_generator::generate_gs_ff_sync(struct brw_reg dst, brw_push_insn_state(p); brw_set_default_mask_control(p, BRW_MASK_DISABLE); brw_set_default_access_mode(p, BRW_ALIGN_1); - brw_MOV(p, get_element_ud(dst, 0), brw_imm_ud(0)); + brw_MOV(p, get_element_ud(dst, 0), get_element_ud(src2, 0)); brw_MOV(p, get_element_ud(dst, 1), get_element_ud(src1, 0)); brw_set_default_access_mode(p, BRW_ALIGN_16); brw_pop_insn_state(p); @@ -763,6 +764,15 @@ vec4_generator::generate_gs_ff_sync(struct brw_reg dst, brw_set_default_access_mode(p, BRW_ALIGN_1); brw_set_default_mask_control(p, BRW_MASK_DISABLE); brw_MOV(p, get_element_ud(dst, 0), get_element_ud(src0, 0)); + + /* src2 is not an immediate when we use transform feedback */ + if (src2.file != BRW_IMMEDIATE_VALUE) { + brw_MOV(p, suboffset(vec1(src2), 0), suboffset(vec1(src0), 1)); + brw_MOV(p, suboffset(vec1(src2), 1), suboffset(vec1(src0), 2)); + brw_MOV(p, suboffset(vec1(src2), 2), suboffset(vec1(src0), 3)); + brw_MOV(p, suboffset(vec1(src2), 3), suboffset(vec1(src0), 4)); Ken and I discussed this a bit. Ken suggested that this: brw_MOV(p, brw_vec4_grf(src1.nr, 0), brw_vec4_grf(dst.nr, 1)); Should be able to copy all 4 dwords in one instruction. What do you think? By the way, this was for the version of this patch on the gs-support-snb-for-submission-02092014 which has src1 as the destination and dst as the source for the moves. (Hmm, not sure about the src1 naming in this context...) If that change seem good, then Reviewed-by: Jordan Justen jordan.l.jus...@intel.com + } + brw_set_default_access_mode(p, BRW_ALIGN_16); brw_pop_insn_state(p); } @@ -1374,7 +1384,7 @@ vec4_generator::generate_vec4_instruction(vec4_instruction *instruction, break; case GS_OPCODE_FF_SYNC: - generate_gs_ff_sync(dst, src[0], src[1]); + generate_gs_ff_sync(dst, src[0], src[1], src[2]); break; case GS_OPCODE_FF_SYNC_SET_PRIMITIVES: diff --git a/src/mesa/drivers/dri/i965/gen6_gs_visitor.cpp b/src/mesa/drivers/dri/i965/gen6_gs_visitor.cpp index
[Mesa-dev] [PATCH v2] i965/fs: Implement SIMD16 dual source blending.
From the SNB PRM, volume 4, part 1, page 193: The dual source render target messages only have SIMD8 forms due to maximum message length limitations. SIMD16 pixel shaders must send two of these messages to cover all of the pixels. Each message contains two colors (4 channels each) for each pixel in the message payload. Bugzilla: https://bugs.freedesktop.org/show_bug.cgi?id=82831 --- src/mesa/drivers/dri/i965/brw_eu.h | 1 + src/mesa/drivers/dri/i965/brw_eu_emit.c| 3 +- src/mesa/drivers/dri/i965/brw_fs_generator.cpp | 14 - src/mesa/drivers/dri/i965/brw_fs_visitor.cpp | 79 ++ 4 files changed, 83 insertions(+), 14 deletions(-) diff --git a/src/mesa/drivers/dri/i965/brw_eu.h b/src/mesa/drivers/dri/i965/brw_eu.h index e6c26e3..5908ba5 100644 --- a/src/mesa/drivers/dri/i965/brw_eu.h +++ b/src/mesa/drivers/dri/i965/brw_eu.h @@ -266,6 +266,7 @@ void brw_fb_WRITE(struct brw_compile *p, unsigned msg_length, unsigned response_length, bool eot, + bool last_render_target, bool header_present); void brw_SAMPLE(struct brw_compile *p, diff --git a/src/mesa/drivers/dri/i965/brw_eu_emit.c b/src/mesa/drivers/dri/i965/brw_eu_emit.c index 39f94e9..ffdbe6d 100644 --- a/src/mesa/drivers/dri/i965/brw_eu_emit.c +++ b/src/mesa/drivers/dri/i965/brw_eu_emit.c @@ -2251,6 +2251,7 @@ void brw_fb_WRITE(struct brw_compile *p, unsigned msg_length, unsigned response_length, bool eot, + bool last_render_target, bool header_present) { struct brw_context *brw = p-brw; @@ -2290,7 +2291,7 @@ void brw_fb_WRITE(struct brw_compile *p, msg_type, msg_length, header_present, - eot, /* last render target write */ + last_render_target, response_length, eot, 0 /* send_commit_msg */); diff --git a/src/mesa/drivers/dri/i965/brw_fs_generator.cpp b/src/mesa/drivers/dri/i965/brw_fs_generator.cpp index 1bc10f5..a4b84aa 100644 --- a/src/mesa/drivers/dri/i965/brw_fs_generator.cpp +++ b/src/mesa/drivers/dri/i965/brw_fs_generator.cpp @@ -121,9 +121,12 @@ fs_generator::fire_fb_write(fs_inst *inst, if (inst-opcode == FS_OPCODE_REP_FB_WRITE) msg_control = BRW_DATAPORT_RENDER_TARGET_WRITE_SIMD16_SINGLE_SOURCE_REPLICATED; - else if (prog_data-dual_src_blend) - msg_control = BRW_DATAPORT_RENDER_TARGET_WRITE_SIMD8_DUAL_SOURCE_SUBSPAN01; - else if (dispatch_width == 16) + else if (prog_data-dual_src_blend) { + if (dispatch_width == 8 || !inst-eot) + msg_control = BRW_DATAPORT_RENDER_TARGET_WRITE_SIMD8_DUAL_SOURCE_SUBSPAN01; + else + msg_control = BRW_DATAPORT_RENDER_TARGET_WRITE_SIMD8_DUAL_SOURCE_SUBSPAN23; + } else if (dispatch_width == 16) msg_control = BRW_DATAPORT_RENDER_TARGET_WRITE_SIMD16_SINGLE_SOURCE; else msg_control = BRW_DATAPORT_RENDER_TARGET_WRITE_SIMD8_SINGLE_SOURCE_SUBSPAN01; @@ -131,6 +134,9 @@ fs_generator::fire_fb_write(fs_inst *inst, uint32_t surf_index = prog_data-binding_table.render_target_start + inst-target; + bool last_render_target = inst-eot || + (prog_data-dual_src_blend dispatch_width == 16); + brw_fb_WRITE(p, dispatch_width, base_reg, @@ -140,6 +146,7 @@ fs_generator::fire_fb_write(fs_inst *inst, nr, 0, inst-eot, +last_render_target, inst-header_present); brw_mark_surface_used(prog_data-base, surf_index); @@ -254,6 +261,7 @@ fs_generator::generate_blorp_fb_write(fs_inst *inst) inst-mlen, 0, true, +true, inst-header_present); } diff --git a/src/mesa/drivers/dri/i965/brw_fs_visitor.cpp b/src/mesa/drivers/dri/i965/brw_fs_visitor.cpp index 2d5318a..99d04c0 100644 --- a/src/mesa/drivers/dri/i965/brw_fs_visitor.cpp +++ b/src/mesa/drivers/dri/i965/brw_fs_visitor.cpp @@ -3064,12 +3064,6 @@ fs_visitor::emit_fb_writes() int reg_width = dispatch_width / 8; bool src0_alpha_to_render_target = false; - if (do_dual_src) { - no16(GL_ARB_blend_func_extended not yet supported in SIMD16.); - if (dispatch_width == 16) - do_dual_src = false; - } - /* From the Sandy Bridge PRM, volume 4, page 198: * * Dispatched Pixel Enables. One bit per pixel indicating @@ -3109,11 +3103,22 @@ fs_visitor::emit_fb_writes() nr += 1; } - /* Reserve space for color. It'll be filled in per MRT below. */ + /* Reserve space for color. It'll be filled in per MRT below. +* +* From the SNB PRM, volume 4, part 1, page
Re: [Mesa-dev] [PATCH] replace file specific compileroptimizationwith inline attibute
Hello Matt, Am Sonntag, 14. September 2014, 21:12:57 schrieb Matt Turner: On Fri, Sep 12, 2014 at 1:02 AM, Marc Dietrich marvi...@gmx.de wrote: Am Donnerstag, 11. September 2014, 08:52:39 schrieb Matt Turner: On Thu, Sep 11, 2014 at 6:58 AM, Marc Dietrich marvi...@gmx.de wrote: File specific optimization as used for src/mesa/main/streaming-load-memcpy.c currently will cause problems with LTO in the future (see: https://bugs.freedesktop.org/show_bug.cgi?id=83669). Replace it with in-file target specification. This is only available in gcc-4.8 and later. no, it's there since 4.4 (4.8 has some runtime auto selection for c++ AFAIK). See https://gcc.gnu.org/gcc-4.4/changes.html (C family). Oh, interesting. This sounds like a good plan then. Looks to me like gcc-4.3 is the only version that supports -msse4* and doesn't support this attribute, and I think everyone would be okay with requiring =gcc-4.4 to compile i965_dri.so. Perhaps we could use this with our code using SSSE3 intrinsics as well. I'll investigate. Thanks for bringing this up and correcting me about gcc's support! I guess we need a fallback for compilers supporting -msse4.1 (icc only?) and not __attribute__ (target). Meh. Maybe it can inspire other compiler authors to maintain compatibility with gcc. But I'm predicting dissent.. are you planing to commit this patch? Marc signature.asc Description: This is a digitally signed message part. ___ mesa-dev mailing list mesa-dev@lists.freedesktop.org http://lists.freedesktop.org/mailman/listinfo/mesa-dev
Re: [Mesa-dev] [PATCH 21/37] i965/gen6/gs: Implement support for gl_PrimitiveIdIn.
On jue, 2014-09-18 at 00:30 -0700, Jordan Justen wrote: On Thu, Aug 14, 2014 at 4:11 AM, Iago Toral Quiroga ito...@igalia.com wrote: For this we will need to move PrimitiveID information, delivered in the thread payload in r0.1, to a separate register (we use GS_OPCODE_SET_PRIMITIVE_ID for this), then map the corresponding varying slot to that register in the setup_payload() method. Notice that we cannot use a virtual register as the destination for the PrimitiveID because we need to map all input attributes to hardware registers in setup_payload(), which happens before virtual registers are mapped to hardware registers. We could work around that issue if we were able to compute the first non-payload register in emit_prolog() and move the PrimitiveID information to that register, but we can't because at that point we still don't know the final number uniforms that will be included in the payload. So, what we do is to place PrimitiveID information in r1, which is always delivered as part of the payload but its only populated with data relevant for transform feedback when we set GEN6_GS_SVBI_PAYLOAD_ENABLE in the 3DSTATE_GS state packet. When we implement transform feedback, we wil make sure to move the value of r1 to another register before we overwrite it with the PrimitiveID. --- src/mesa/drivers/dri/i965/gen6_gs_visitor.cpp | 69 ++- src/mesa/drivers/dri/i965/gen6_gs_visitor.h | 2 + 2 files changed, 70 insertions(+), 1 deletion(-) diff --git a/src/mesa/drivers/dri/i965/gen6_gs_visitor.cpp b/src/mesa/drivers/dri/i965/gen6_gs_visitor.cpp index 4a440eb..b45c381 100644 --- a/src/mesa/drivers/dri/i965/gen6_gs_visitor.cpp +++ b/src/mesa/drivers/dri/i965/gen6_gs_visitor.cpp @@ -31,6 +31,8 @@ #include gen6_gs_visitor.h +const unsigned MAX_GS_INPUT_VERTICES = 6; + namespace brw { void @@ -38,6 +40,7 @@ gen6_gs_visitor::emit_prolog() { vec4_gs_visitor::emit_prolog(); + this-current_annotation = gen6 prolog; /* Gen6 geometry shaders require to allocate an initial VUE handle via * FF_SYNC message, however the documentation remarks that only one thread * can write to the URB simultaneously and the FF_SYNC message provides the @@ -59,7 +62,6 @@ gen6_gs_visitor::emit_prolog() * flags for the next vertex come right after the data items and flags for * the previous vertex. */ - this-current_annotation = gen6 prolog; Seems like this belongs in i965/gen6/gs: Add initial implementation for a gen6 geometry shader visitor. (Or, perhaps just drop the change...) You are right, I'll fix it. Iago Patches 21-26 (gs-support-snb-for-submission-02092014) i965/gen6/gs: Implement support for gl_PrimitiveIdIn. i965/gen6/gs: Assign geometry shader VUE map properly. i965/gen6/gs: Enable texture units and upload sampler state. i965/gen6/gs: implement GS_OPCODE_SVB_WRITE opcode i965/gen6/gs: implement GS_OPCODE_SVB_SET_DST_INDEX opcode i965/gen6/gs: implement GS_OPCODE_FF_SYNC_SET_PRIMITIVES opcode Reviewed-by: Jordan Justen jordan.l.jus...@intel.com this-vertex_output = src_reg(this, glsl_type::uint_type, (prog_data-vue_map.num_slots + 1) * @@ -94,6 +96,30 @@ gen6_gs_visitor::emit_prolog() */ this-prim_count = src_reg(this, glsl_type::uint_type); emit(MOV(dst_reg(this-prim_count), 0u)); + + /* PrimitveID is delivered in r0.1 of the thread payload. If the program +* needs it we have to move it to a separate register where we can map +* the atttribute. +* +* Notice that we cannot use a virtual register for this, because we need to +* map all input attributes to hardware registers in setup_payload(), +* which happens before virtual registers are mapped to hardware registers. +* We could work around that issue if we were able to compute the first +* non-payload register here and move the PrimitiveID information to that +* register, but we can't because at this point we don't know the final +* number uniforms that will be included in the payload. +* +* So, what we do is to place PrimitiveID information in r1, which is always +* delivered as part of the payload, but its only populated with data +* relevant for transform feedback when we set GEN6_GS_SVBI_PAYLOAD_ENABLE +* in the 3DSTATE_GS state packet. That information can be obtained by other +* means though, so we can safely use r1 for this purpose. +*/ + if (c-prog_data.include_primitive_id) { + this-primitive_id = + src_reg(retype(brw_vec8_grf(1, 0), BRW_REGISTER_TYPE_UD)); + emit(GS_OPCODE_SET_PRIMITIVE_ID, dst_reg(this-primitive_id)); + } } void @@ -410,4 +436,45 @@ gen6_gs_visitor::emit_thread_end()
Re: [Mesa-dev] [PATCH 27/37] i965/gen6/gs: Add an additional parameter to the FF_SYNC opcode.
On jue, 2014-09-18 at 00:48 -0700, Jordan Justen wrote: On Thu, Aug 14, 2014 at 4:11 AM, Iago Toral Quiroga ito...@igalia.com wrote: From: Samuel Iglesias Gonsalvez sigles...@igalia.com We will use this parameter in later patches to provide information relevant to transform feedback that needs to be set as part of the FF_SYNC message. Signed-off-by: Samuel Iglesias Gonsalvez sigles...@igalia.com --- src/mesa/drivers/dri/i965/brw_defines.h | 4 src/mesa/drivers/dri/i965/brw_vec4.h | 3 ++- src/mesa/drivers/dri/i965/brw_vec4_generator.cpp | 16 +--- src/mesa/drivers/dri/i965/gen6_gs_visitor.cpp| 3 ++- 4 files changed, 21 insertions(+), 5 deletions(-) diff --git a/src/mesa/drivers/dri/i965/brw_defines.h b/src/mesa/drivers/dri/i965/brw_defines.h index 6e8b998..b0d6d9f 100644 --- a/src/mesa/drivers/dri/i965/brw_defines.h +++ b/src/mesa/drivers/dri/i965/brw_defines.h @@ -1030,6 +1030,10 @@ enum opcode { * FF_SYNC operation. * * - src1 is the number of primitives written. +* +* - src2 is the value to hold in M0.0: number of SO vertices to write +* and number of SO primitives needed. Its value will be overwritten +* with the SVBI values if transform feedback is enabled. */ GS_OPCODE_FF_SYNC, diff --git a/src/mesa/drivers/dri/i965/brw_vec4.h b/src/mesa/drivers/dri/i965/brw_vec4.h index 763cb23..58a5aac 100644 --- a/src/mesa/drivers/dri/i965/brw_vec4.h +++ b/src/mesa/drivers/dri/i965/brw_vec4.h @@ -679,7 +679,8 @@ private: struct brw_reg src2); void generate_gs_ff_sync(struct brw_reg dst, struct brw_reg src0, -struct brw_reg src1); +struct brw_reg src1, +struct brw_reg src2); void generate_gs_set_primitive_id(struct brw_reg dst); void generate_oword_dual_block_offsets(struct brw_reg m1, struct brw_reg index); diff --git a/src/mesa/drivers/dri/i965/brw_vec4_generator.cpp b/src/mesa/drivers/dri/i965/brw_vec4_generator.cpp index d4554f5..c69b305 100644 --- a/src/mesa/drivers/dri/i965/brw_vec4_generator.cpp +++ b/src/mesa/drivers/dri/i965/brw_vec4_generator.cpp @@ -734,7 +734,8 @@ vec4_generator::generate_gs_ff_sync_set_primitives(struct brw_reg dst, void vec4_generator::generate_gs_ff_sync(struct brw_reg dst, struct brw_reg src0, -struct brw_reg src1) +struct brw_reg src1, +struct brw_reg src2) { /* We use dst to setup the ff_sync header, so we expect it to be * initialized to R0 by the caller. Here we overwrite dword 0 (cleared @@ -744,7 +745,7 @@ vec4_generator::generate_gs_ff_sync(struct brw_reg dst, brw_push_insn_state(p); brw_set_default_mask_control(p, BRW_MASK_DISABLE); brw_set_default_access_mode(p, BRW_ALIGN_1); - brw_MOV(p, get_element_ud(dst, 0), brw_imm_ud(0)); + brw_MOV(p, get_element_ud(dst, 0), get_element_ud(src2, 0)); brw_MOV(p, get_element_ud(dst, 1), get_element_ud(src1, 0)); brw_set_default_access_mode(p, BRW_ALIGN_16); brw_pop_insn_state(p); @@ -763,6 +764,15 @@ vec4_generator::generate_gs_ff_sync(struct brw_reg dst, brw_set_default_access_mode(p, BRW_ALIGN_1); brw_set_default_mask_control(p, BRW_MASK_DISABLE); brw_MOV(p, get_element_ud(dst, 0), get_element_ud(src0, 0)); + + /* src2 is not an immediate when we use transform feedback */ + if (src2.file != BRW_IMMEDIATE_VALUE) { + brw_MOV(p, suboffset(vec1(src2), 0), suboffset(vec1(src0), 1)); + brw_MOV(p, suboffset(vec1(src2), 1), suboffset(vec1(src0), 2)); + brw_MOV(p, suboffset(vec1(src2), 2), suboffset(vec1(src0), 3)); + brw_MOV(p, suboffset(vec1(src2), 3), suboffset(vec1(src0), 4)); Ken and I discussed this a bit. Ken suggested that this: brw_MOV(p, brw_vec4_grf(src1.nr, 0), brw_vec4_grf(dst.nr, 1)); Should be able to copy all 4 dwords in one instruction. What do you think? Sure, if we can do this in just on MOV that is better. I'll give it a try. By the way, this was for the version of this patch on the gs-support-snb-for-submission-02092014 which has src1 as the destination and dst as the source for the moves. (Hmm, not sure about the src1 naming in this context...) Yes, this is used as both a src and a dst... and I supposed Samuel decided to follow naming conventions for other opcodes that have a dst and multiple src parameters. I suppose the best way to do this would have been to create a separate generator opcode for the part where this is used as a destination register only... Iago If that change seem good, then Reviewed-by: Jordan Justen
Re: [Mesa-dev] [PATCH 27/37] i965/gen6/gs: Add an additional parameter to the FF_SYNC opcode.
On Thu, 2014-09-18 at 10:39 +0200, Iago Toral Quiroga wrote: On jue, 2014-09-18 at 00:48 -0700, Jordan Justen wrote: On Thu, Aug 14, 2014 at 4:11 AM, Iago Toral Quiroga ito...@igalia.com wrote: From: Samuel Iglesias Gonsalvez sigles...@igalia.com We will use this parameter in later patches to provide information relevant to transform feedback that needs to be set as part of the FF_SYNC message. Signed-off-by: Samuel Iglesias Gonsalvez sigles...@igalia.com --- src/mesa/drivers/dri/i965/brw_defines.h | 4 src/mesa/drivers/dri/i965/brw_vec4.h | 3 ++- src/mesa/drivers/dri/i965/brw_vec4_generator.cpp | 16 +--- src/mesa/drivers/dri/i965/gen6_gs_visitor.cpp| 3 ++- 4 files changed, 21 insertions(+), 5 deletions(-) diff --git a/src/mesa/drivers/dri/i965/brw_defines.h b/src/mesa/drivers/dri/i965/brw_defines.h index 6e8b998..b0d6d9f 100644 --- a/src/mesa/drivers/dri/i965/brw_defines.h +++ b/src/mesa/drivers/dri/i965/brw_defines.h @@ -1030,6 +1030,10 @@ enum opcode { * FF_SYNC operation. * * - src1 is the number of primitives written. +* +* - src2 is the value to hold in M0.0: number of SO vertices to write +* and number of SO primitives needed. Its value will be overwritten +* with the SVBI values if transform feedback is enabled. */ GS_OPCODE_FF_SYNC, diff --git a/src/mesa/drivers/dri/i965/brw_vec4.h b/src/mesa/drivers/dri/i965/brw_vec4.h index 763cb23..58a5aac 100644 --- a/src/mesa/drivers/dri/i965/brw_vec4.h +++ b/src/mesa/drivers/dri/i965/brw_vec4.h @@ -679,7 +679,8 @@ private: struct brw_reg src2); void generate_gs_ff_sync(struct brw_reg dst, struct brw_reg src0, -struct brw_reg src1); +struct brw_reg src1, +struct brw_reg src2); void generate_gs_set_primitive_id(struct brw_reg dst); void generate_oword_dual_block_offsets(struct brw_reg m1, struct brw_reg index); diff --git a/src/mesa/drivers/dri/i965/brw_vec4_generator.cpp b/src/mesa/drivers/dri/i965/brw_vec4_generator.cpp index d4554f5..c69b305 100644 --- a/src/mesa/drivers/dri/i965/brw_vec4_generator.cpp +++ b/src/mesa/drivers/dri/i965/brw_vec4_generator.cpp @@ -734,7 +734,8 @@ vec4_generator::generate_gs_ff_sync_set_primitives(struct brw_reg dst, void vec4_generator::generate_gs_ff_sync(struct brw_reg dst, struct brw_reg src0, -struct brw_reg src1) +struct brw_reg src1, +struct brw_reg src2) { /* We use dst to setup the ff_sync header, so we expect it to be * initialized to R0 by the caller. Here we overwrite dword 0 (cleared @@ -744,7 +745,7 @@ vec4_generator::generate_gs_ff_sync(struct brw_reg dst, brw_push_insn_state(p); brw_set_default_mask_control(p, BRW_MASK_DISABLE); brw_set_default_access_mode(p, BRW_ALIGN_1); - brw_MOV(p, get_element_ud(dst, 0), brw_imm_ud(0)); + brw_MOV(p, get_element_ud(dst, 0), get_element_ud(src2, 0)); brw_MOV(p, get_element_ud(dst, 1), get_element_ud(src1, 0)); brw_set_default_access_mode(p, BRW_ALIGN_16); brw_pop_insn_state(p); @@ -763,6 +764,15 @@ vec4_generator::generate_gs_ff_sync(struct brw_reg dst, brw_set_default_access_mode(p, BRW_ALIGN_1); brw_set_default_mask_control(p, BRW_MASK_DISABLE); brw_MOV(p, get_element_ud(dst, 0), get_element_ud(src0, 0)); + + /* src2 is not an immediate when we use transform feedback */ + if (src2.file != BRW_IMMEDIATE_VALUE) { + brw_MOV(p, suboffset(vec1(src2), 0), suboffset(vec1(src0), 1)); + brw_MOV(p, suboffset(vec1(src2), 1), suboffset(vec1(src0), 2)); + brw_MOV(p, suboffset(vec1(src2), 2), suboffset(vec1(src0), 3)); + brw_MOV(p, suboffset(vec1(src2), 3), suboffset(vec1(src0), 4)); Ken and I discussed this a bit. Ken suggested that this: brw_MOV(p, brw_vec4_grf(src1.nr, 0), brw_vec4_grf(dst.nr, 1)); Should be able to copy all 4 dwords in one instruction. What do you think? Sure, if we can do this in just on MOV that is better. I'll give it a try. Piglit shows no regressions with this change, it works like a charm! We will add it to the commit. Thanks, Sam signature.asc Description: This is a digitally signed message part ___ mesa-dev mailing list mesa-dev@lists.freedesktop.org http://lists.freedesktop.org/mailman/listinfo/mesa-dev
Re: [Mesa-dev] [PATCH 06/11] glsl: Check realloc return value in ir_function::matching_signature()
On 10.09.2014 00:59, Anuj Phogat wrote: On Mon, Sep 8, 2014 at 11:53 PM, Juha-Pekka Heikkila juhapekka.heikk...@gmail.com wrote: Signed-off-by: Juha-Pekka Heikkila juhapekka.heikk...@gmail.com --- src/glsl/ir_function.cpp | 11 +-- 1 file changed, 9 insertions(+), 2 deletions(-) diff --git a/src/glsl/ir_function.cpp b/src/glsl/ir_function.cpp index 98bec45..2b2643c 100644 --- a/src/glsl/ir_function.cpp +++ b/src/glsl/ir_function.cpp @@ -24,6 +24,7 @@ #include glsl_types.h #include ir.h #include glsl_parser_extras.h +#include main/errors.h typedef enum { PARAMETER_LIST_NO_MATCH, @@ -296,6 +297,7 @@ ir_function::matching_signature(_mesa_glsl_parse_state *state, bool *is_exact) { ir_function_signature **inexact_matches = NULL; + ir_function_signature **inexact_matches_temp; ir_function_signature *match = NULL; int num_inexact_matches = 0; @@ -321,11 +323,16 @@ ir_function::matching_signature(_mesa_glsl_parse_state *state, free(inexact_matches); return sig; case PARAMETER_LIST_INEXACT_MATCH: - inexact_matches = (ir_function_signature **) + inexact_matches_temp = (ir_function_signature **) realloc(inexact_matches, sizeof(*inexact_matches) * (num_inexact_matches + 1)); - assert(inexact_matches); + if (inexact_matches_temp == NULL) { +_mesa_error_no_memory(__func__); +free(inexact_matches); This free is not required. inexact_matches is null. Why is inexact matches null? This reallocation is inside foreach_in_list{..} and the amount of inexact matches is counted with num_inexact_matches variable. If we're not getting the null from realloc on the first run inexact_matches would have valid pointer. +return NULL; + } + inexact_matches = inexact_matches_temp; inexact_matches[num_inexact_matches++] = sig; continue; case PARAMETER_LIST_NO_MATCH: -- 1.8.5.1 ___ mesa-dev mailing list mesa-dev@lists.freedesktop.org http://lists.freedesktop.org/mailman/listinfo/mesa-dev ___ mesa-dev mailing list mesa-dev@lists.freedesktop.org http://lists.freedesktop.org/mailman/listinfo/mesa-dev
Re: [Mesa-dev] [PATCH] i965/fs: Implement SIMD16 dual source blending.
On jue, 2014-09-18 at 08:08 +0200, Iago Toral Quiroga wrote: Hi Jason, On mié, 2014-09-17 at 11:39 -0700, Jason Ekstrand wrote: I haven't tested this yet, just looked it over. I've got a couple of inline comments below. One general comment though: I'm currently working on a bunch of compiler stuff that reworks the way we do FB writes. In particular, it reworks things to use GRF registers instead of the MRF. It probably wouldn't be too bad for me to rebase on top of this or to rebase your patch on top of what I'm doing. I just thought I'd warn you about the conflict. Thanks for the warning, I suppose it should be easy for me to rebase this patch on top of your work if you happen to land yours sooner. On Wed, Sep 17, 2014 at 4:36 AM, Iago Toral Quiroga ito...@igalia.com wrote: From the SNB PRM, volume 4, part 1, page 193: The dual source render target messages only have SIMD8 forms due to maximum message length limitations. SIMD16 pixel shaders must send two of these messages to cover all of the pixels. Each message contains two colors (4 channels each) for each pixel in the message payload. Bugzilla: https://bugs.freedesktop.org/show_bug.cgi?id=82831 --- src/mesa/drivers/dri/i965/brw_eu.h | 1 + src/mesa/drivers/dri/i965/brw_eu_emit.c| 3 +- src/mesa/drivers/dri/i965/brw_fs_generator.cpp | 14 +++-- src/mesa/drivers/dri/i965/brw_fs_visitor.cpp | 41 +++--- 4 files changed, 45 insertions(+), 14 deletions(-) I tested this on SandyBridge and IvyBridge. No piglit regressions in these platforms, but would be nice if someone could test this in later platforms too. I only noticed these two tests for dual source blending in piglit though: tests/spec/ext_framebuffer_multisample/alpha-to-one-dual-src-blend.cpp tests/spec/ext_framebuffer_multisample/alpha-to-coverage-dual-src-blend.cpp The first one fails, in both platforms with and without my patch. The second one passes in both platforms, with and without my patch. I also tested this with a seprate test program to verify that it worked, at least, in a simple case. diff --git a/src/mesa/drivers/dri/i965/brw_eu.h b/src/mesa/drivers/dri/i965/brw_eu.h index e6c26e3..5908ba5 100644 --- a/src/mesa/drivers/dri/i965/brw_eu.h +++ b/src/mesa/drivers/dri/i965/brw_eu.h @@ -266,6 +266,7 @@ void brw_fb_WRITE(struct brw_compile *p, unsigned msg_length, unsigned response_length, bool eot, + bool last_render_target, bool header_present); void brw_SAMPLE(struct brw_compile *p, diff --git a/src/mesa/drivers/dri/i965/brw_eu_emit.c b/src/mesa/drivers/dri/i965/brw_eu_emit.c index 39f94e9..ffdbe6d 100644 --- a/src/mesa/drivers/dri/i965/brw_eu_emit.c +++ b/src/mesa/drivers/dri/i965/brw_eu_emit.c @@ -2251,6 +2251,7 @@ void brw_fb_WRITE(struct brw_compile *p, unsigned msg_length, unsigned response_length, bool eot, + bool last_render_target, bool header_present) { struct brw_context *brw = p-brw; @@ -2290,7 +2291,7 @@ void brw_fb_WRITE(struct brw_compile *p, msg_type, msg_length, header_present, - eot, /* last render target write */ + last_render_target, response_length, eot, 0 /* send_commit_msg */); diff --git a/src/mesa/drivers/dri/i965/brw_fs_generator.cpp b/src/mesa/drivers/dri/i965/brw_fs_generator.cpp index 1bc10f5..a4b84aa 100644 --- a/src/mesa/drivers/dri/i965/brw_fs_generator.cpp +++ b/src/mesa/drivers/dri/i965/brw_fs_generator.cpp @@ -121,9 +121,12 @@ fs_generator::fire_fb_write(fs_inst *inst, if (inst-opcode == FS_OPCODE_REP_FB_WRITE) msg_control = BRW_DATAPORT_RENDER_TARGET_WRITE_SIMD16_SINGLE_SOURCE_REPLICATED; - else if (prog_data-dual_src_blend) - msg_control =
[Mesa-dev] [PATCH 2/3] radeon/winsys: keep track of the last CS a BO was used in
From: Christian König christian.koe...@amd.com Signed-off-by: Christian König christian.koe...@amd.com --- src/gallium/winsys/radeon/drm/radeon_drm_bo.h | 3 +++ src/gallium/winsys/radeon/drm/radeon_drm_cs.c | 11 +-- src/gallium/winsys/radeon/drm/radeon_drm_cs.h | 2 +- 3 files changed, 13 insertions(+), 3 deletions(-) diff --git a/src/gallium/winsys/radeon/drm/radeon_drm_bo.h b/src/gallium/winsys/radeon/drm/radeon_drm_bo.h index 1c00a13..393c53c 100644 --- a/src/gallium/winsys/radeon/drm/radeon_drm_bo.h +++ b/src/gallium/winsys/radeon/drm/radeon_drm_bo.h @@ -65,6 +65,9 @@ struct radeon_bo { /* how many command streams, which are being emitted in a separate * thread, is this bo referenced in? */ int num_active_ioctls; + +/* the ID of the last command submission this buffer was used with */ +uint64_t last_cs_id; }; struct pb_manager *radeon_bomgr_create(struct radeon_drm_winsys *rws); diff --git a/src/gallium/winsys/radeon/drm/radeon_drm_cs.c b/src/gallium/winsys/radeon/drm/radeon_drm_cs.c index 0aa54c2..e821b6f 100644 --- a/src/gallium/winsys/radeon/drm/radeon_drm_cs.c +++ b/src/gallium/winsys/radeon/drm/radeon_drm_cs.c @@ -107,7 +107,7 @@ static boolean radeon_init_cs_context(struct radeon_cs_context *csc, csc-chunks[1].length_dw = 0; csc-chunks[1].chunk_data = (uint64_t)(uintptr_t)csc-relocs; csc-chunks[2].chunk_id = RADEON_CHUNK_ID_FLAGS; -csc-chunks[2].length_dw = 2; +csc-chunks[2].length_dw = 5; csc-chunks[2].chunk_data = (uint64_t)(uintptr_t)csc-flags; csc-chunk_array[0] = (uint64_t)(uintptr_t)csc-chunks[0]; @@ -382,6 +382,7 @@ static boolean radeon_drm_cs_memory_below_limit(struct radeon_winsys_cs *rcs, ui void radeon_drm_cs_emit_ioctl_oneshot(struct radeon_drm_cs *cs, struct radeon_cs_context *csc) { +uint64_t id; unsigned i; if (drmCommandWriteRead(csc-fd, DRM_RADEON_CS, @@ -403,8 +404,11 @@ void radeon_drm_cs_emit_ioctl_oneshot(struct radeon_drm_cs *cs, struct radeon_cs radeon_dump_cs_on_lockup(cs, csc); } -for (i = 0; i csc-crelocs; i++) +id = *((uint64_t *)csc-flags[3]); +for (i = 0; i csc-crelocs; i++) { p_atomic_dec(csc-relocs_bo[i]-num_active_ioctls); +csc-relocs_bo[i]-last_cs_id = id; +} radeon_cs_context_cleanup(csc); } @@ -533,6 +537,9 @@ static void radeon_drm_cs_flush(struct radeon_winsys_cs *rcs, } break; } +cs-cst-flags[2] = 0; +cs-cst-flags[3] = 0; +cs-cst-flags[4] = 0; if (cs-ws-thread) { pipe_semaphore_wait(cs-flush_completed); diff --git a/src/gallium/winsys/radeon/drm/radeon_drm_cs.h b/src/gallium/winsys/radeon/drm/radeon_drm_cs.h index 089494e..1d0bc64 100644 --- a/src/gallium/winsys/radeon/drm/radeon_drm_cs.h +++ b/src/gallium/winsys/radeon/drm/radeon_drm_cs.h @@ -37,7 +37,7 @@ struct radeon_cs_context { struct drm_radeon_cscs; struct drm_radeon_cs_chunk chunks[3]; uint64_tchunk_array[3]; -uint32_tflags[2]; +uint32_tflags[5]; uint32_tcs_trace_id; -- 1.9.1 ___ mesa-dev mailing list mesa-dev@lists.freedesktop.org http://lists.freedesktop.org/mailman/listinfo/mesa-dev
[Mesa-dev] [PATCH 1/3] radeon/winsys: always send the INFO chunk
From: Christian König christian.koe...@amd.com Old kernels that don't know the chunk should simply ignore it. Signed-off-by: Christian König christian.koe...@amd.com --- src/gallium/winsys/radeon/drm/radeon_drm_cs.c | 14 ++ 1 file changed, 2 insertions(+), 12 deletions(-) diff --git a/src/gallium/winsys/radeon/drm/radeon_drm_cs.c b/src/gallium/winsys/radeon/drm/radeon_drm_cs.c index ecf8957..0aa54c2 100644 --- a/src/gallium/winsys/radeon/drm/radeon_drm_cs.c +++ b/src/gallium/winsys/radeon/drm/radeon_drm_cs.c @@ -115,6 +115,7 @@ static boolean radeon_init_cs_context(struct radeon_cs_context *csc, csc-chunk_array[2] = (uint64_t)(uintptr_t)csc-chunks[2]; csc-cs.chunks = (uint64_t)(uintptr_t)csc-chunk_array; +csc-cs.num_chunks = 3; for (i = 0; i Elements(csc-reloc_indices_hashlist); i++) { csc-reloc_indices_hashlist[i] = -1; @@ -498,48 +499,37 @@ static void radeon_drm_cs_flush(struct radeon_winsys_cs *rcs, p_atomic_inc(cs-cst-relocs_bo[i]-num_active_ioctls); } +cs-cst-flags[0] = 0; switch (cs-base.ring_type) { case RING_DMA: -cs-cst-flags[0] = 0; cs-cst-flags[1] = RADEON_CS_RING_DMA; -cs-cst-cs.num_chunks = 3; if (cs-ws-info.r600_virtual_address) { cs-cst-flags[0] |= RADEON_CS_USE_VM; } break; case RING_UVD: -cs-cst-flags[0] = 0; cs-cst-flags[1] = RADEON_CS_RING_UVD; -cs-cst-cs.num_chunks = 3; break; case RING_VCE: -cs-cst-flags[0] = 0; cs-cst-flags[1] = RADEON_CS_RING_VCE; -cs-cst-cs.num_chunks = 3; break; default: case RING_GFX: -cs-cst-flags[0] = 0; cs-cst-flags[1] = RADEON_CS_RING_GFX; -cs-cst-cs.num_chunks = 2; if (flags RADEON_FLUSH_KEEP_TILING_FLAGS) { cs-cst-flags[0] |= RADEON_CS_KEEP_TILING_FLAGS; -cs-cst-cs.num_chunks = 3; } if (cs-ws-info.r600_virtual_address) { cs-cst-flags[0] |= RADEON_CS_USE_VM; -cs-cst-cs.num_chunks = 3; } if (flags RADEON_FLUSH_END_OF_FRAME) { cs-cst-flags[0] |= RADEON_CS_END_OF_FRAME; -cs-cst-cs.num_chunks = 3; } if (flags RADEON_FLUSH_COMPUTE) { cs-cst-flags[1] = RADEON_CS_RING_COMPUTE; -cs-cst-cs.num_chunks = 3; } break; } -- 1.9.1 ___ mesa-dev mailing list mesa-dev@lists.freedesktop.org http://lists.freedesktop.org/mailman/listinfo/mesa-dev
[Mesa-dev] [PATCH 3/3] radeon/winsys: explicitly sync BOs
From: Christian König christian.koe...@amd.com For now syncs all engines accessing a BO using the new kernel interface, older kernels should ignore the new chunk and maintain the old behavior. Signed-off-by: Christian König christian.koe...@amd.com --- src/gallium/winsys/radeon/drm/radeon_drm_cs.c | 30 --- src/gallium/winsys/radeon/drm/radeon_drm_cs.h | 5 +++-- 2 files changed, 30 insertions(+), 5 deletions(-) diff --git a/src/gallium/winsys/radeon/drm/radeon_drm_cs.c b/src/gallium/winsys/radeon/drm/radeon_drm_cs.c index e821b6f..587719b 100644 --- a/src/gallium/winsys/radeon/drm/radeon_drm_cs.c +++ b/src/gallium/winsys/radeon/drm/radeon_drm_cs.c @@ -72,6 +72,9 @@ #include stdint.h #include xf86drm.h +#ifndef RADEON_CHUNK_ID_WAIT_FOR +#define RADEON_CHUNK_ID_WAIT_FOR 0x05 +#endif #define RELOC_DWORDS (sizeof(struct drm_radeon_cs_reloc) / sizeof(uint32_t)) @@ -100,6 +103,13 @@ static boolean radeon_init_cs_context(struct radeon_cs_context *csc, return FALSE; } +csc-cs_ids = CALLOC(csc-nrelocs, sizeof(uint64_t)); +if (!csc-cs_ids) { +FREE(csc-relocs_bo); +FREE(csc-relocs); +return FALSE; +} + csc-chunks[0].chunk_id = RADEON_CHUNK_ID_IB; csc-chunks[0].length_dw = 0; csc-chunks[0].chunk_data = (uint64_t)(uintptr_t)csc-buf; @@ -109,13 +119,17 @@ static boolean radeon_init_cs_context(struct radeon_cs_context *csc, csc-chunks[2].chunk_id = RADEON_CHUNK_ID_FLAGS; csc-chunks[2].length_dw = 5; csc-chunks[2].chunk_data = (uint64_t)(uintptr_t)csc-flags; +csc-chunks[3].chunk_id = RADEON_CHUNK_ID_WAIT_FOR; +csc-chunks[3].length_dw = 0; +csc-chunks[3].chunk_data = (uint64_t)(uintptr_t)csc-cs_ids; csc-chunk_array[0] = (uint64_t)(uintptr_t)csc-chunks[0]; csc-chunk_array[1] = (uint64_t)(uintptr_t)csc-chunks[1]; csc-chunk_array[2] = (uint64_t)(uintptr_t)csc-chunks[2]; +csc-chunk_array[3] = (uint64_t)(uintptr_t)csc-chunks[3]; csc-cs.chunks = (uint64_t)(uintptr_t)csc-chunk_array; -csc-cs.num_chunks = 3; +csc-cs.num_chunks = 4; for (i = 0; i Elements(csc-reloc_indices_hashlist); i++) { csc-reloc_indices_hashlist[i] = -1; @@ -285,8 +299,11 @@ static unsigned radeon_add_reloc(struct radeon_drm_cs *cs, size = csc-nrelocs * sizeof(struct drm_radeon_cs_reloc); csc-relocs = realloc(csc-relocs, size); - csc-chunks[1].chunk_data = (uint64_t)(uintptr_t)csc-relocs; + +size = csc-nrelocs * sizeof(uint64_t); +csc-cs_ids = realloc(csc-cs_ids, size); +csc-chunks[3].chunk_data = (uint64_t)(uintptr_t)csc-cs_ids; } /* Initialize the new relocation. */ @@ -383,7 +400,14 @@ static boolean radeon_drm_cs_memory_below_limit(struct radeon_winsys_cs *rcs, ui void radeon_drm_cs_emit_ioctl_oneshot(struct radeon_drm_cs *cs, struct radeon_cs_context *csc) { uint64_t id; -unsigned i; +unsigned i, c; + +for (i = 0, c = 0; i csc-crelocs; i++) { +id = csc-relocs_bo[i]-last_cs_id; +if (id) +csc-cs_ids[c++] = id; +} +csc-chunks[3].length_dw = c * 2; if (drmCommandWriteRead(csc-fd, DRM_RADEON_CS, csc-cs, sizeof(struct drm_radeon_cs))) { diff --git a/src/gallium/winsys/radeon/drm/radeon_drm_cs.h b/src/gallium/winsys/radeon/drm/radeon_drm_cs.h index 1d0bc64..f903b5d 100644 --- a/src/gallium/winsys/radeon/drm/radeon_drm_cs.h +++ b/src/gallium/winsys/radeon/drm/radeon_drm_cs.h @@ -35,8 +35,8 @@ struct radeon_cs_context { int fd; struct drm_radeon_cscs; -struct drm_radeon_cs_chunk chunks[3]; -uint64_tchunk_array[3]; +struct drm_radeon_cs_chunk chunks[4]; +uint64_tchunk_array[4]; uint32_tflags[5]; uint32_tcs_trace_id; @@ -47,6 +47,7 @@ struct radeon_cs_context { unsignedvalidated_crelocs; struct radeon_bo**relocs_bo; struct drm_radeon_cs_reloc *relocs; +uint64_t*cs_ids; int reloc_indices_hashlist[512]; -- 1.9.1 ___ mesa-dev mailing list mesa-dev@lists.freedesktop.org http://lists.freedesktop.org/mailman/listinfo/mesa-dev
[Mesa-dev] [PATCH 1/2] target-helpers: add inline qualifier on configuration_query()
To silence unused function warnings. --- src/gallium/auxiliary/target-helpers/inline_drm_helper.h |2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/gallium/auxiliary/target-helpers/inline_drm_helper.h b/src/gallium/auxiliary/target-helpers/inline_drm_helper.h index dd55a71..9ca7a4a 100644 --- a/src/gallium/auxiliary/target-helpers/inline_drm_helper.h +++ b/src/gallium/auxiliary/target-helpers/inline_drm_helper.h @@ -408,7 +408,7 @@ static const struct drm_conf_ret share_fd_ret = { {true}, }; -static const struct drm_conf_ret * +static inline const struct drm_conf_ret * configuration_query(enum drm_conf conf) { switch (conf) { -- 1.7.10.4 ___ mesa-dev mailing list mesa-dev@lists.freedesktop.org http://lists.freedesktop.org/mailman/listinfo/mesa-dev
[Mesa-dev] [PATCH 2/2] st/xa: silence unused variable warning
--- src/gallium/state_trackers/xa/xa_tracker.c |1 + 1 file changed, 1 insertion(+) diff --git a/src/gallium/state_trackers/xa/xa_tracker.c b/src/gallium/state_trackers/xa/xa_tracker.c index 268d56b..f69ac8e 100644 --- a/src/gallium/state_trackers/xa/xa_tracker.c +++ b/src/gallium/state_trackers/xa/xa_tracker.c @@ -148,6 +148,7 @@ xa_tracker_create(int drm_fd) #if GALLIUM_STATIC_TARGETS xa-screen = dd_create_screen(drm_fd); +(void) loader_fd; /* silence unused var warning */ #else loader_fd = dup(drm_fd); if (loader_fd == -1) -- 1.7.10.4 ___ mesa-dev mailing list mesa-dev@lists.freedesktop.org http://lists.freedesktop.org/mailman/listinfo/mesa-dev
Re: [Mesa-dev] [PATCH 06/11] glsl: Check realloc return value in ir_function::matching_signature()
On Thu, Sep 18, 2014 at 3:26 AM, Juha-Pekka Heikkila juhapekka.heikk...@gmail.com wrote: On 10.09.2014 00:59, Anuj Phogat wrote: On Mon, Sep 8, 2014 at 11:53 PM, Juha-Pekka Heikkila juhapekka.heikk...@gmail.com wrote: Signed-off-by: Juha-Pekka Heikkila juhapekka.heikk...@gmail.com --- src/glsl/ir_function.cpp | 11 +-- 1 file changed, 9 insertions(+), 2 deletions(-) diff --git a/src/glsl/ir_function.cpp b/src/glsl/ir_function.cpp index 98bec45..2b2643c 100644 --- a/src/glsl/ir_function.cpp +++ b/src/glsl/ir_function.cpp @@ -24,6 +24,7 @@ #include glsl_types.h #include ir.h #include glsl_parser_extras.h +#include main/errors.h typedef enum { PARAMETER_LIST_NO_MATCH, @@ -296,6 +297,7 @@ ir_function::matching_signature(_mesa_glsl_parse_state *state, bool *is_exact) { ir_function_signature **inexact_matches = NULL; + ir_function_signature **inexact_matches_temp; ir_function_signature *match = NULL; int num_inexact_matches = 0; @@ -321,11 +323,16 @@ ir_function::matching_signature(_mesa_glsl_parse_state *state, free(inexact_matches); return sig; case PARAMETER_LIST_INEXACT_MATCH: - inexact_matches = (ir_function_signature **) + inexact_matches_temp = (ir_function_signature **) realloc(inexact_matches, sizeof(*inexact_matches) * (num_inexact_matches + 1)); - assert(inexact_matches); + if (inexact_matches_temp == NULL) { +_mesa_error_no_memory(__func__); +free(inexact_matches); This free is not required. inexact_matches is null. Why is inexact matches null? This reallocation is inside foreach_in_list{..} and the amount of inexact matches is counted with num_inexact_matches variable. If we're not getting the null from realloc on the first run inexact_matches would have valid pointer. Right. Ignore my comment. I'm fine with this patch. +return NULL; + } + inexact_matches = inexact_matches_temp; inexact_matches[num_inexact_matches++] = sig; continue; case PARAMETER_LIST_NO_MATCH: -- 1.8.5.1 ___ mesa-dev mailing list mesa-dev@lists.freedesktop.org http://lists.freedesktop.org/mailman/listinfo/mesa-dev ___ mesa-dev mailing list mesa-dev@lists.freedesktop.org http://lists.freedesktop.org/mailman/listinfo/mesa-dev
Re: [Mesa-dev] [PATCH 2/3] radeon/winsys: keep track of the last CS a BO was used in
On Thu, Sep 18, 2014 at 5:34 PM, Christian König deathsim...@vodafone.de wrote: From: Christian König christian.koe...@amd.com Signed-off-by: Christian König christian.koe...@amd.com --- src/gallium/winsys/radeon/drm/radeon_drm_bo.h | 3 +++ src/gallium/winsys/radeon/drm/radeon_drm_cs.c | 11 +-- src/gallium/winsys/radeon/drm/radeon_drm_cs.h | 2 +- 3 files changed, 13 insertions(+), 3 deletions(-) diff --git a/src/gallium/winsys/radeon/drm/radeon_drm_bo.h b/src/gallium/winsys/radeon/drm/radeon_drm_bo.h index 1c00a13..393c53c 100644 --- a/src/gallium/winsys/radeon/drm/radeon_drm_bo.h +++ b/src/gallium/winsys/radeon/drm/radeon_drm_bo.h @@ -65,6 +65,9 @@ struct radeon_bo { /* how many command streams, which are being emitted in a separate * thread, is this bo referenced in? */ int num_active_ioctls; + +/* the ID of the last command submission this buffer was used with */ +uint64_t last_cs_id; }; struct pb_manager *radeon_bomgr_create(struct radeon_drm_winsys *rws); diff --git a/src/gallium/winsys/radeon/drm/radeon_drm_cs.c b/src/gallium/winsys/radeon/drm/radeon_drm_cs.c index 0aa54c2..e821b6f 100644 --- a/src/gallium/winsys/radeon/drm/radeon_drm_cs.c +++ b/src/gallium/winsys/radeon/drm/radeon_drm_cs.c @@ -107,7 +107,7 @@ static boolean radeon_init_cs_context(struct radeon_cs_context *csc, csc-chunks[1].length_dw = 0; csc-chunks[1].chunk_data = (uint64_t)(uintptr_t)csc-relocs; csc-chunks[2].chunk_id = RADEON_CHUNK_ID_FLAGS; -csc-chunks[2].length_dw = 2; +csc-chunks[2].length_dw = 5; csc-chunks[2].chunk_data = (uint64_t)(uintptr_t)csc-flags; csc-chunk_array[0] = (uint64_t)(uintptr_t)csc-chunks[0]; @@ -382,6 +382,7 @@ static boolean radeon_drm_cs_memory_below_limit(struct radeon_winsys_cs *rcs, ui void radeon_drm_cs_emit_ioctl_oneshot(struct radeon_drm_cs *cs, struct radeon_cs_context *csc) { +uint64_t id; unsigned i; if (drmCommandWriteRead(csc-fd, DRM_RADEON_CS, @@ -403,8 +404,11 @@ void radeon_drm_cs_emit_ioctl_oneshot(struct radeon_drm_cs *cs, struct radeon_cs radeon_dump_cs_on_lockup(cs, csc); } -for (i = 0; i csc-crelocs; i++) +id = *((uint64_t *)csc-flags[3]); Please add a comment here that the ID is returned by the CS ioctl. Other than that, the series is: Reviewed-by: Marek Olšák marek.ol...@amd.com Marek ___ mesa-dev mailing list mesa-dev@lists.freedesktop.org http://lists.freedesktop.org/mailman/listinfo/mesa-dev
Re: [Mesa-dev] [PATCH 28/37] i965/gen6/gs: implement transform feedback support in gen6_gs_visitor
On Thu, Aug 14, 2014 at 4:12 AM, Iago Toral Quiroga ito...@igalia.com wrote: From: Samuel Iglesias Gonsalvez sigles...@igalia.com This takes care of generating code required to handle transform feedback. Notice that transform feedback isn't enabled yet, since that requires additional setups in other parts of the code that will come in later patches. Signed-off-by: Samuel Iglesias Gonsalvez sigles...@igalia.com --- src/mesa/drivers/dri/i965/brw_context.h | 113 ++ src/mesa/drivers/dri/i965/gen6_gs_visitor.cpp | 309 +- src/mesa/drivers/dri/i965/gen6_gs_visitor.h | 14 ++ 3 files changed, 391 insertions(+), 45 deletions(-) diff --git a/src/mesa/drivers/dri/i965/brw_context.h b/src/mesa/drivers/dri/i965/brw_context.h index 7439da1..3418b76 100644 --- a/src/mesa/drivers/dri/i965/brw_context.h +++ b/src/mesa/drivers/dri/i965/brw_context.h @@ -553,48 +553,6 @@ struct brw_vs_prog_data { bool uses_vertexid; }; - -/* Note: brw_gs_prog_data_compare() must be updated when adding fields to - * this struct! - */ -struct brw_gs_prog_data -{ - struct brw_vec4_prog_data base; - - /** -* Size of an output vertex, measured in HWORDS (32 bytes). -*/ - unsigned output_vertex_size_hwords; - - unsigned output_topology; - - /** -* Size of the control data (cut bits or StreamID bits), in hwords (32 -* bytes). 0 if there is no control data. -*/ - unsigned control_data_header_size_hwords; - - /** -* Format of the control data (either GEN7_GS_CONTROL_DATA_FORMAT_GSCTL_SID -* if the control data is StreamID bits, or -* GEN7_GS_CONTROL_DATA_FORMAT_GSCTL_CUT if the control data is cut bits). -* Ignored if control_data_header_size is 0. -*/ - unsigned control_data_format; - - bool include_primitive_id; - - int invocations; - - /** -* Dispatch mode, can be any of: -* GEN7_GS_DISPATCH_MODE_DUAL_OBJECT -* GEN7_GS_DISPATCH_MODE_DUAL_INSTANCE -* GEN7_GS_DISPATCH_MODE_SINGLE -*/ - int dispatch_mode; -}; - /** Number of texture sampler units */ #define BRW_MAX_TEX_UNIT 32 @@ -641,6 +599,77 @@ struct brw_gs_prog_data #define SURF_INDEX_GEN6_SOL_BINDING(t) (t) #define BRW_MAX_GEN6_GS_SURFACES SURF_INDEX_GEN6_SOL_BINDING(BRW_MAX_SOL_BINDINGS) +/* Note: brw_gs_prog_data_compare() must be updated when adding fields to + * this struct! + */ +struct brw_gs_prog_data +{ + struct brw_vec4_prog_data base; + + /** +* Size of an output vertex, measured in HWORDS (32 bytes). +*/ + unsigned output_vertex_size_hwords; + + unsigned output_topology; + + /** +* Size of the control data (cut bits or StreamID bits), in hwords (32 +* bytes). 0 if there is no control data. +*/ + unsigned control_data_header_size_hwords; + + /** +* Format of the control data (either GEN7_GS_CONTROL_DATA_FORMAT_GSCTL_SID +* if the control data is StreamID bits, or +* GEN7_GS_CONTROL_DATA_FORMAT_GSCTL_CUT if the control data is cut bits). +* Ignored if control_data_header_size is 0. +*/ + unsigned control_data_format; + + bool include_primitive_id; + + int invocations; + + /** +* Dispatch mode, can be any of: +* GEN7_GS_DISPATCH_MODE_DUAL_OBJECT +* GEN7_GS_DISPATCH_MODE_DUAL_INSTANCE +* GEN7_GS_DISPATCH_MODE_SINGLE +*/ + int dispatch_mode; + + /** +* Gen6 transform feedback enabled flag. +*/ + bool gen6_xfb_enabled; + + /** +* Gen6: Provoking vertex convention for odd-numbered triangles +* in tristrips. +*/ + GLuint pv_first:1; + + /** +* Gen6: Number of varyings that are output to transform feedback. +*/ + GLuint num_transform_feedback_bindings:7; /* 0-BRW_MAX_SOL_BINDINGS */ + + /** +* Gen6: Map from the index of a transform feedback binding table entry to the +* gl_varying_slot that should be streamed out through that binding table +* entry. +*/ + unsigned char transform_feedback_bindings[BRW_MAX_SOL_BINDINGS]; + + /** +* Gen6: Map from the index of a transform feedback binding table entry to the +* swizzles that should be used when streaming out data through that +* binding table entry. +*/ + unsigned char transform_feedback_swizzles[BRW_MAX_SOL_BINDINGS]; +}; + /** * Stride in bytes between shader_time entries. * diff --git a/src/mesa/drivers/dri/i965/gen6_gs_visitor.cpp b/src/mesa/drivers/dri/i965/gen6_gs_visitor.cpp index c1cfe75..b8eaa58 100644 --- a/src/mesa/drivers/dri/i965/gen6_gs_visitor.cpp +++ b/src/mesa/drivers/dri/i965/gen6_gs_visitor.cpp @@ -97,6 +97,45 @@ gen6_gs_visitor::emit_prolog() this-prim_count = src_reg(this, glsl_type::uint_type); emit(MOV(dst_reg(this-prim_count), 0u)); + if (c-prog_data.gen6_xfb_enabled) { + const struct gl_transform_feedback_info *linked_xfb_info =
Re: [Mesa-dev] [PATCH 29/37] i965/gen6/gs: Setup SOL surfaces for user-provided geometry shaders
On Thu, Aug 14, 2014 at 4:12 AM, Iago Toral Quiroga ito...@igalia.com wrote: From: Samuel Iglesias Gonsalvez sigles...@igalia.com Update gen6_gs_binding_table and gen6_sol_surface to use user-provided geometry program information when present. This is necessary to implement transform feedback support. Signed-off-by: Samuel Iglesias Gonsalvez sigles...@igalia.com --- src/mesa/drivers/dri/i965/brw_context.h | 2 +- src/mesa/drivers/dri/i965/gen6_sol.c| 119 ++-- 2 files changed, 82 insertions(+), 39 deletions(-) diff --git a/src/mesa/drivers/dri/i965/brw_context.h b/src/mesa/drivers/dri/i965/brw_context.h index 3418b76..82f32af 100644 --- a/src/mesa/drivers/dri/i965/brw_context.h +++ b/src/mesa/drivers/dri/i965/brw_context.h @@ -914,7 +914,7 @@ struct brw_stage_state uint32_t push_const_offset; /* Offset in the batchbuffer */ int push_const_size; /* in 256-bit register increments */ - /* Binding table: pointers to SURFACE_STATE entries. */ + /** Binding table: pointers to SURFACE_STATE entries. */ Shouldn't be part of this patch. With it removed: Reviewed-by: Jordan Justen jordan.l.jus...@intel.com uint32_t bind_bo_offset; uint32_t surf_offset[BRW_MAX_SURFACES]; diff --git a/src/mesa/drivers/dri/i965/gen6_sol.c b/src/mesa/drivers/dri/i965/gen6_sol.c index e1c1b3c..d21a010 100644 --- a/src/mesa/drivers/dri/i965/gen6_sol.c +++ b/src/mesa/drivers/dri/i965/gen6_sol.c @@ -41,13 +41,21 @@ gen6_update_sol_surfaces(struct brw_context *brw) /* BRW_NEW_TRANSFORM_FEEDBACK */ struct gl_transform_feedback_object *xfb_obj = ctx-TransformFeedback.CurrentObject; - /* BRW_NEW_VERTEX_PROGRAM */ - const struct gl_shader_program *shaderprog = - ctx-_Shader-CurrentProgram[MESA_SHADER_VERTEX]; - const struct gl_transform_feedback_info *linked_xfb_info = - shaderprog-LinkedTransformFeedback; + const struct gl_shader_program *shaderprog; + const struct gl_transform_feedback_info *linked_xfb_info; int i; + if (brw-geometry_program) { + /* BRW_NEW_GEOMETRY_PROGRAM */ + shaderprog = + ctx-_Shader-CurrentProgram[MESA_SHADER_GEOMETRY]; + } else { + /* BRW_NEW_VERTEX_PROGRAM */ + shaderprog = + ctx-_Shader-CurrentProgram[MESA_SHADER_VERTEX]; + } + linked_xfb_info = shaderprog-LinkedTransformFeedback; + for (i = 0; i BRW_MAX_SOL_BINDINGS; ++i) { const int surf_index = SURF_INDEX_GEN6_SOL_BINDING(i); if (_mesa_is_xfb_active_and_unpaused(ctx) @@ -56,12 +64,24 @@ gen6_update_sol_surfaces(struct brw_context *brw) unsigned buffer_offset = xfb_obj-Offset[buffer] / 4 + linked_xfb_info-Outputs[i].DstOffset; - brw_update_sol_surface( -brw, xfb_obj-Buffers[buffer], brw-ff_gs.surf_offset[surf_index], -linked_xfb_info-Outputs[i].NumComponents, -linked_xfb_info-BufferStride[buffer], buffer_offset); + if (brw-geometry_program) { +brw_update_sol_surface( + brw, xfb_obj-Buffers[buffer], + brw-gs.base.surf_offset[surf_index], + linked_xfb_info-Outputs[i].NumComponents, + linked_xfb_info-BufferStride[buffer], buffer_offset); + } else { +brw_update_sol_surface( + brw, xfb_obj-Buffers[buffer], + brw-ff_gs.surf_offset[surf_index], + linked_xfb_info-Outputs[i].NumComponents, + linked_xfb_info-BufferStride[buffer], buffer_offset); + } } else { - brw-ff_gs.surf_offset[surf_index] = 0; + if (!brw-geometry_program) +brw-ff_gs.surf_offset[surf_index] = 0; + else +brw-gs.base.surf_offset[surf_index] = 0; } } @@ -73,6 +93,7 @@ const struct brw_tracked_state gen6_sol_surface = { .mesa = 0, .brw = (BRW_NEW_BATCH | BRW_NEW_VERTEX_PROGRAM | + BRW_NEW_GEOMETRY_PROGRAM | BRW_NEW_TRANSFORM_FEEDBACK), .cache = 0 }, @@ -86,38 +107,50 @@ const struct brw_tracked_state gen6_sol_surface = { static void brw_gs_upload_binding_table(struct brw_context *brw) { - struct gl_context *ctx = brw-ctx; - /* BRW_NEW_VERTEX_PROGRAM */ - const struct gl_shader_program *shaderprog = - ctx-_Shader-CurrentProgram[MESA_SHADER_VERTEX]; - bool has_surfaces = false; uint32_t *bind; - if (shaderprog) { - const struct gl_transform_feedback_info *linked_xfb_info = -shaderprog-LinkedTransformFeedback; - /* Currently we only ever upload surfaces for SOL. */ - has_surfaces = linked_xfb_info-NumOutputs != 0; - } + if (!brw-geometry_program) { + struct gl_context *ctx = brw-ctx; + /* BRW_NEW_VERTEX_PROGRAM */ + const struct gl_shader_program *shaderprog = +
[Mesa-dev] [PATCH 3/5] mesa: Set correct array element in vbo_exec_vtx_init.
I'm not familiar with this code, but this sure appears to be a typo. It looks like the intent is to set each array element, not arrays[0] each time. Notably, the loop just below uses array, not arrays. Signed-off-by: Kenneth Graunke kenn...@whitecape.org Cc: mesa-sta...@lists.freedesktop.org --- src/mesa/vbo/vbo_exec_api.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/mesa/vbo/vbo_exec_api.c b/src/mesa/vbo/vbo_exec_api.c index 74aec12..2871100 100644 --- a/src/mesa/vbo/vbo_exec_api.c +++ b/src/mesa/vbo/vbo_exec_api.c @@ -1067,7 +1067,7 @@ void vbo_exec_vtx_init( struct vbo_exec_context *exec ) struct gl_client_array *array; array = arrays[VERT_ATTRIB_FF(i)]; array-BufferObj = NULL; - _mesa_reference_buffer_object(ctx, arrays-BufferObj, + _mesa_reference_buffer_object(ctx, array-BufferObj, vbo-currval[VBO_ATTRIB_POS+i].BufferObj); } -- 2.1.0 ___ mesa-dev mailing list mesa-dev@lists.freedesktop.org http://lists.freedesktop.org/mailman/listinfo/mesa-dev
[Mesa-dev] [PATCH 4/5] mesa: Use VertexArray, not _VertexArray, in array size expressions.
Both sizes are VERT_ATTRIB_MAX, so this has no effect. But it drops a few trivial uses of the derived state. Signed-off-by: Kenneth Graunke kenn...@whitecape.org --- src/mesa/main/arrayobj.c | 2 +- src/mesa/main/attrib.c | 2 +- src/mesa/main/varray.c | 8 3 files changed, 6 insertions(+), 6 deletions(-) diff --git a/src/mesa/main/arrayobj.c b/src/mesa/main/arrayobj.c index 0d77b11..6440ea6 100644 --- a/src/mesa/main/arrayobj.c +++ b/src/mesa/main/arrayobj.c @@ -230,7 +230,7 @@ _mesa_initialize_vao(struct gl_context *ctx, obj-RefCount = 1; /* Init the individual arrays */ - for (i = 0; i Elements(obj-_VertexAttrib); i++) { + for (i = 0; i Elements(obj-VertexAttrib); i++) { switch (i) { case VERT_ATTRIB_WEIGHT: init_array(ctx, obj, VERT_ATTRIB_WEIGHT, 1, GL_FLOAT); diff --git a/src/mesa/main/attrib.c b/src/mesa/main/attrib.c index ef98ba7..d90e662 100644 --- a/src/mesa/main/attrib.c +++ b/src/mesa/main/attrib.c @@ -1449,7 +1449,7 @@ copy_array_object(struct gl_context *ctx, /* In theory must be the same anyway, but on recreate make sure it matches */ dest-ARBsemantics = src-ARBsemantics; - for (i = 0; i Elements(src-_VertexAttrib); i++) { + for (i = 0; i Elements(src-VertexAttrib); i++) { _mesa_copy_client_array(ctx, dest-_VertexAttrib[i], src-_VertexAttrib[i]); _mesa_copy_vertex_attrib_array(ctx, dest-VertexAttrib[i], src-VertexAttrib[i]); _mesa_copy_vertex_buffer_binding(ctx, dest-VertexBinding[i], src-VertexBinding[i]); diff --git a/src/mesa/main/varray.c b/src/mesa/main/varray.c index ead7864..09bf52c 100644 --- a/src/mesa/main/varray.c +++ b/src/mesa/main/varray.c @@ -711,7 +711,7 @@ _mesa_EnableVertexAttribArray(GLuint index) vao = ctx-Array.VAO; - ASSERT(VERT_ATTRIB_GENERIC(index) Elements(vao-_VertexAttrib)); + ASSERT(VERT_ATTRIB_GENERIC(index) Elements(vao-VertexAttrib)); if (!vao-VertexAttrib[VERT_ATTRIB_GENERIC(index)].Enabled) { /* was disabled, now being enabled */ @@ -737,7 +737,7 @@ _mesa_DisableVertexAttribArray(GLuint index) vao = ctx-Array.VAO; - ASSERT(VERT_ATTRIB_GENERIC(index) Elements(vao-_VertexAttrib)); + ASSERT(VERT_ATTRIB_GENERIC(index) Elements(vao-VertexAttrib)); if (vao-VertexAttrib[VERT_ATTRIB_GENERIC(index)].Enabled) { /* was enabled, now being disabled */ @@ -831,7 +831,7 @@ get_current_attrib(struct gl_context *ctx, GLuint index, const char *function) return NULL; } - ASSERT(VERT_ATTRIB_GENERIC(index) Elements(ctx-Array.VAO-_VertexAttrib)); + ASSERT(VERT_ATTRIB_GENERIC(index) Elements(ctx-Array.VAO-VertexAttrib)); FLUSH_CURRENT(ctx, 0); return ctx-Current.Attrib[VERT_ATTRIB_GENERIC(index)]; @@ -953,7 +953,7 @@ _mesa_GetVertexAttribPointerv(GLuint index, GLenum pname, GLvoid **pointer) return; } - ASSERT(VERT_ATTRIB_GENERIC(index) Elements(ctx-Array.VAO-_VertexAttrib)); + ASSERT(VERT_ATTRIB_GENERIC(index) Elements(ctx-Array.VAO-VertexAttrib)); *pointer = (GLvoid *) ctx-Array.VAO-VertexAttrib[VERT_ATTRIB_GENERIC(index)].Ptr; } -- 2.1.0 ___ mesa-dev mailing list mesa-dev@lists.freedesktop.org http://lists.freedesktop.org/mailman/listinfo/mesa-dev
[Mesa-dev] [PATCH 5/5] mesa: Replace gl_client_array usage in _mesa_print_arrays()
For now, this prints out the same information as before - just using the newer/non-derived structures. Printing out each structure's fields separately might be more useful, but I've never used this code, so I'm not sure. Signed-off-by: Kenneth Graunke kenn...@whitecape.org --- src/mesa/main/varray.c | 47 +++ 1 file changed, 31 insertions(+), 16 deletions(-) diff --git a/src/mesa/main/varray.c b/src/mesa/main/varray.c index 09bf52c..380a32e 100644 --- a/src/mesa/main/varray.c +++ b/src/mesa/main/varray.c @@ -1904,16 +1904,19 @@ _mesa_copy_vertex_buffer_binding(struct gl_context *ctx, * Print vertex array's fields. */ static void -print_array(const char *name, GLint index, const struct gl_client_array *array) +print_array(const char *name, GLint index, +const struct gl_vertex_attrib_array *attrib, +const struct gl_vertex_buffer_binding *binding) { if (index = 0) printf( %s[%d]: , name, index); else printf( %s: , name); printf(Ptr=%p, Type=0x%x, Size=%d, ElemSize=%u, Stride=%d, Buffer=%u(Size %lu)\n, - array-Ptr, array-Type, array-Size, - array-_ElementSize, array-StrideB, - array-BufferObj-Name, (unsigned long) array-BufferObj-Size); + _mesa_vertex_attrib_address(attrib, binding), + attrib-Type, attrib-Size, + attrib-_ElementSize, binding-Stride, + binding-BufferObj-Name, (unsigned long) binding-BufferObj-Size); } @@ -1927,18 +1930,30 @@ _mesa_print_arrays(struct gl_context *ctx) GLuint i; printf(Array Object %u\n, vao-Name); - if (vao-_VertexAttrib[VERT_ATTRIB_POS].Enabled) - print_array(Vertex, -1, vao-_VertexAttrib[VERT_ATTRIB_POS]); - if (vao-_VertexAttrib[VERT_ATTRIB_NORMAL].Enabled) - print_array(Normal, -1, vao-_VertexAttrib[VERT_ATTRIB_NORMAL]); - if (vao-_VertexAttrib[VERT_ATTRIB_COLOR0].Enabled) - print_array(Color, -1, vao-_VertexAttrib[VERT_ATTRIB_COLOR0]); - for (i = 0; i ctx-Const.MaxTextureCoordUnits; i++) - if (vao-_VertexAttrib[VERT_ATTRIB_TEX(i)].Enabled) - print_array(TexCoord, i, vao-_VertexAttrib[VERT_ATTRIB_TEX(i)]); - for (i = 0; i VERT_ATTRIB_GENERIC_MAX; i++) - if (vao-_VertexAttrib[VERT_ATTRIB_GENERIC(i)].Enabled) - print_array(Attrib, i, vao-_VertexAttrib[VERT_ATTRIB_GENERIC(i)]); + if (vao-VertexAttrib[VERT_ATTRIB_POS].Enabled) { + print_array(Vertex, -1, vao-VertexAttrib[VERT_ATTRIB_POS], +vao-VertexBinding[VERT_ATTRIB_POS]); + } + if (vao-VertexAttrib[VERT_ATTRIB_NORMAL].Enabled) { + print_array(Normal, -1, vao-VertexAttrib[VERT_ATTRIB_NORMAL], +vao-VertexBinding[VERT_ATTRIB_NORMAL]); + } + if (vao-VertexAttrib[VERT_ATTRIB_COLOR0].Enabled) { + print_array(Color, -1, vao-VertexAttrib[VERT_ATTRIB_COLOR0], + vao-VertexBinding[VERT_ATTRIB_COLOR0]); + } + for (i = 0; i ctx-Const.MaxTextureCoordUnits; i++) { + if (vao-VertexAttrib[VERT_ATTRIB_TEX(i)].Enabled) { + print_array(TexCoord, i, vao-VertexAttrib[VERT_ATTRIB_TEX(i)], +vao-VertexBinding[VERT_ATTRIB_TEX(i)]); + } + } + for (i = 0; i VERT_ATTRIB_GENERIC_MAX; i++) { + if (vao-VertexAttrib[VERT_ATTRIB_GENERIC(i)].Enabled) { + print_array(Attrib, i, vao-VertexAttrib[VERT_ATTRIB_GENERIC(i)], + vao-VertexBinding[VERT_ATTRIB_GENERIC(i)]); + } + } } -- 2.1.0 ___ mesa-dev mailing list mesa-dev@lists.freedesktop.org http://lists.freedesktop.org/mailman/listinfo/mesa-dev
[Mesa-dev] [PATCH 2/5] mesa: Use proper structure for glGet*(GL_TEXTURE_COORD_ARRAY*).
The code in get.c that handles this uses ctx-Array.VAO-VertexAttrib, which is a gl_vertex_attrib_array structure, not a gl_client_array. The offsets of all fields happened to be the same in both structures, at least on x86_64. Size, Type, and Stride are obviously the same: both structures start with the same fields, in the same order. Enabled is dicier: there are different fields before it in both structures, including pointer sized values which might need special alignment. Signed-off-by: Kenneth Graunke kenn...@whitecape.org Cc: mesa-sta...@lists.freedesktop.org --- src/mesa/main/get_hash_params.py | 8 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/src/mesa/main/get_hash_params.py b/src/mesa/main/get_hash_params.py index aace8a5..da35684 100644 --- a/src/mesa/main/get_hash_params.py +++ b/src/mesa/main/get_hash_params.py @@ -203,10 +203,10 @@ descriptor=[ [ COLOR_ARRAY_SIZE, LOC_CUSTOM, TYPE_INT, 0, NO_EXTRA ], [ COLOR_ARRAY_TYPE, ARRAY_ENUM(VertexAttrib[VERT_ATTRIB_COLOR0].Type), NO_EXTRA ], [ COLOR_ARRAY_STRIDE, ARRAY_INT(VertexAttrib[VERT_ATTRIB_COLOR0].Stride), NO_EXTRA ], - [ TEXTURE_COORD_ARRAY, LOC_CUSTOM, TYPE_BOOLEAN, offsetof(struct gl_client_array, Enabled), NO_EXTRA ], - [ TEXTURE_COORD_ARRAY_SIZE, LOC_CUSTOM, TYPE_INT, offsetof(struct gl_client_array, Size), NO_EXTRA ], - [ TEXTURE_COORD_ARRAY_TYPE, LOC_CUSTOM, TYPE_ENUM, offsetof(struct gl_client_array, Type), NO_EXTRA ], - [ TEXTURE_COORD_ARRAY_STRIDE, LOC_CUSTOM, TYPE_INT, offsetof(struct gl_client_array, Stride), NO_EXTRA ], + [ TEXTURE_COORD_ARRAY, LOC_CUSTOM, TYPE_BOOLEAN, offsetof(struct gl_vertex_attrib_array, Enabled), NO_EXTRA ], + [ TEXTURE_COORD_ARRAY_SIZE, LOC_CUSTOM, TYPE_INT, offsetof(struct gl_vertex_attrib_array, Size), NO_EXTRA ], + [ TEXTURE_COORD_ARRAY_TYPE, LOC_CUSTOM, TYPE_ENUM, offsetof(struct gl_vertex_attrib_array, Type), NO_EXTRA ], + [ TEXTURE_COORD_ARRAY_STRIDE, LOC_CUSTOM, TYPE_INT, offsetof(struct gl_vertex_attrib_array, Stride), NO_EXTRA ], # GL_ARB_multitexture [ MAX_TEXTURE_UNITS, CONTEXT_INT(Const.MaxTextureUnits), NO_EXTRA ], -- 2.1.0 ___ mesa-dev mailing list mesa-dev@lists.freedesktop.org http://lists.freedesktop.org/mailman/listinfo/mesa-dev
[Mesa-dev] [PATCH 1/5] mesa: Remove some dead helper functions.
Dead since the _MaxElement removal, but these functions seemed generally applicable, so I decided to remove them in a separate patch. Signed-off-by: Kenneth Graunke kenn...@whitecape.org --- src/mesa/main/arrayobj.h | 26 -- 1 file changed, 26 deletions(-) diff --git a/src/mesa/main/arrayobj.h b/src/mesa/main/arrayobj.h index 1819cd1..3c1f918 100644 --- a/src/mesa/main/arrayobj.h +++ b/src/mesa/main/arrayobj.h @@ -78,32 +78,6 @@ extern void _mesa_update_vao_client_arrays(struct gl_context *ctx, struct gl_vertex_array_object *vao); - -/** Returns the bitmask of all enabled arrays in fixed function mode. - * - * In fixed function mode only the traditional fixed function arrays - * are available. - */ -static inline GLbitfield64 -_mesa_array_object_get_enabled_ff(const struct gl_vertex_array_object *vao) -{ - return vao-_Enabled VERT_BIT_FF_ALL; -} - -/** Returns the bitmask of all enabled arrays in arb/glsl shader mode. - * - * In arb/glsl shader mode all the fixed function and the arb/glsl generic - * arrays are available. Only the first generic array takes - * precedence over the legacy position array. - */ -static inline GLbitfield64 -_mesa_array_object_get_enabled_arb(const struct gl_vertex_array_object *vao) -{ - GLbitfield64 enabled = vao-_Enabled; - return enabled ~(VERT_BIT_POS (enabled VERT_ATTRIB_GENERIC0)); -} - - /* * API functions */ -- 2.1.0 ___ mesa-dev mailing list mesa-dev@lists.freedesktop.org http://lists.freedesktop.org/mailman/listinfo/mesa-dev
Re: [Mesa-dev] [PATCH 30/37] i965/gen6/gs: Buffer PSIZ/flags vertex data in gen6_gs_visitor
Reviewed-by: Jordan Justen jordan.l.jus...@intel.com On Thu, Aug 14, 2014 at 4:12 AM, Iago Toral Quiroga ito...@igalia.com wrote: From: Samuel Iglesias Gonsalvez sigles...@igalia.com Since geometry shaders can alter the value of varyings packed in the first output VUE slot (PSIZ), we need to buffer it together with all the other vertex data so we can emit the right value for each vertex when we do the URB writes. This fixes the following piglit test in gen6: tests/spec/glsl-1.50/execution/redeclare-pervertex-out-subset-gs.shader_test Signed-off-by: Samuel Iglesias Gonsalvez sigles...@igalia.com --- src/mesa/drivers/dri/i965/gen6_gs_visitor.cpp | 79 ++- 1 file changed, 41 insertions(+), 38 deletions(-) diff --git a/src/mesa/drivers/dri/i965/gen6_gs_visitor.cpp b/src/mesa/drivers/dri/i965/gen6_gs_visitor.cpp index b8eaa58..fca7536 100644 --- a/src/mesa/drivers/dri/i965/gen6_gs_visitor.cpp +++ b/src/mesa/drivers/dri/i965/gen6_gs_visitor.cpp @@ -178,16 +178,33 @@ gen6_gs_visitor::visit(ir_emit_vertex *) /* Buffer all output slots for this vertex in vertex_output */ for (int slot = 0; slot prog_data-vue_map.num_slots; ++slot) { - /* We will handle PSIZ for each vertex at thread end time since it - * is not computed by the GS algorithm and requires specific handling. - */ int varying = prog_data-vue_map.slot_to_varying[slot]; if (varying != VARYING_SLOT_PSIZ) { dst_reg dst(this-vertex_output); dst.reladdr = ralloc(mem_ctx, src_reg); memcpy(dst.reladdr, this-vertex_output_offset, sizeof(src_reg)); emit_urb_slot(dst, varying); + } else { +/* The PSIZ slot can pack multiple varyings in different channels + * and emit_urb_slot() will produce a MOV instruction for each of + * them. Since we are writing to an array, that will translate to + * possibly multiple MOV instructions with an array destination and + * each will generate a scratch write with the same offset into + * scratch space (thus, each one overwriting the previous). This is + * not what we want. What we will do instead is emit PSIZ to a + * a regular temporary register, then move that resgister into the + * array. This way we only have one instruction with an array + * destination and we only produce a single scratch write. + */ +dst_reg tmp = dst_reg(src_reg(this, glsl_type::uvec4_type)); +emit_urb_slot(tmp, varying); +dst_reg dst(this-vertex_output); +dst.reladdr = ralloc(mem_ctx, src_reg); +memcpy(dst.reladdr, this-vertex_output_offset, sizeof(src_reg)); +vec4_instruction *inst = emit(MOV(dst, src_reg(tmp))); +inst-force_writemask_all = true; } + emit(ADD(dst_reg(this-vertex_output_offset), this-vertex_output_offset, 1u)); } @@ -427,17 +444,12 @@ gen6_gs_visitor::emit_thread_end() memcpy(data.reladdr, this-vertex_output_offset, sizeof(src_reg)); - if (varying == VARYING_SLOT_PSIZ) { - /* We did not buffer PSIZ, emit it directly here */ - emit_urb_slot(dst_reg(MRF, mrf), varying); - } else { - /* Copy this slot to the appropriate message register */ - dst_reg reg = dst_reg(MRF, mrf); - reg.type = output_reg[varying].type; - data.type = reg.type; - vec4_instruction *inst = emit(MOV(reg, data)); - inst-force_writemask_all = true; - } + /* Copy this slot to the appropriate message register */ + dst_reg reg = dst_reg(MRF, mrf); + reg.type = output_reg[varying].type; + data.type = reg.type; + vec4_instruction *inst = emit(MOV(reg, data)); + inst-force_writemask_all = true; mrf++; emit(ADD(dst_reg(this-vertex_output_offset), @@ -585,22 +597,19 @@ gen6_gs_visitor::xfb_buffer_output() /* Buffer all TF outputs for this vertex in xfb_output */ for (int binding = 0; binding prog_data-num_transform_feedback_bindings; binding++) { - /* We will handle PSIZ for each vertex at thread end time since it - * is not computed by the GS algorithm and requires specific handling. - */ unsigned varying = prog_data-transform_feedback_bindings[binding]; - if (varying != VARYING_SLOT_PSIZ) { - dst_reg dst(this-xfb_output); - dst.reladdr = ralloc(mem_ctx, src_reg); - memcpy(dst.reladdr, this-xfb_output_offset, sizeof(src_reg)); - dst.type =
[Mesa-dev] [PATCH 0/2] nv50, nvc0: fix weirdo zs formats and their blits
There were reports of issues with gallium-nine. It's unclear whether mesa/st uses these, the patches did not produce any piglit changes. However they seem right... Ilia Mirkin (2): nv50,nvc0: add missing depth/stencil formats to tile flag selection nv50,nvc0: fix 3d blit logic for odd depth/stencil formats src/gallium/drivers/nouveau/nv50/nv50_blit.h| 21 ++--- src/gallium/drivers/nouveau/nv50/nv50_miptree.c | 4 src/gallium/drivers/nouveau/nv50/nv50_surface.c | 4 src/gallium/drivers/nouveau/nvc0/nvc0_miptree.c | 4 4 files changed, 26 insertions(+), 7 deletions(-) -- 1.8.5.5 ___ mesa-dev mailing list mesa-dev@lists.freedesktop.org http://lists.freedesktop.org/mailman/listinfo/mesa-dev
[Mesa-dev] [PATCH 1/2] nv50, nvc0: add missing depth/stencil formats to tile flag selection
Reported-by: David Heidelberger david.heidelber...@ixit.cz Signed-off-by: Ilia Mirkin imir...@alum.mit.edu --- src/gallium/drivers/nouveau/nv50/nv50_miptree.c | 4 src/gallium/drivers/nouveau/nvc0/nvc0_miptree.c | 4 2 files changed, 8 insertions(+) diff --git a/src/gallium/drivers/nouveau/nv50/nv50_miptree.c b/src/gallium/drivers/nouveau/nv50/nv50_miptree.c index 14e5a0d..1aacaec 100644 --- a/src/gallium/drivers/nouveau/nv50/nv50_miptree.c +++ b/src/gallium/drivers/nouveau/nv50/nv50_miptree.c @@ -78,9 +78,12 @@ nv50_mt_choose_storage_type(struct nv50_miptree *mt, boolean compressed) case PIPE_FORMAT_Z16_UNORM: tile_flags = 0x6c + ms; break; + case PIPE_FORMAT_X8Z24_UNORM: + case PIPE_FORMAT_S8X24_UINT: case PIPE_FORMAT_S8_UINT_Z24_UNORM: tile_flags = 0x18 + ms; break; + case PIPE_FORMAT_X24S8_UINT: case PIPE_FORMAT_Z24X8_UNORM: case PIPE_FORMAT_Z24_UNORM_S8_UINT: tile_flags = 0x128 + ms; @@ -88,6 +91,7 @@ nv50_mt_choose_storage_type(struct nv50_miptree *mt, boolean compressed) case PIPE_FORMAT_Z32_FLOAT: tile_flags = 0x40 + ms; break; + case PIPE_FORMAT_X32_S8X24_UINT: case PIPE_FORMAT_Z32_FLOAT_S8X24_UINT: tile_flags = 0x60 + ms; break; diff --git a/src/gallium/drivers/nouveau/nvc0/nvc0_miptree.c b/src/gallium/drivers/nouveau/nvc0/nvc0_miptree.c index 3baa752..1beda7d 100644 --- a/src/gallium/drivers/nouveau/nvc0/nvc0_miptree.c +++ b/src/gallium/drivers/nouveau/nvc0/nvc0_miptree.c @@ -53,12 +53,15 @@ nvc0_mt_choose_storage_type(struct nv50_miptree *mt, boolean compressed) else tile_flags = 0x01; break; + case PIPE_FORMAT_X8Z24_UNORM: + case PIPE_FORMAT_S8X24_UINT: case PIPE_FORMAT_S8_UINT_Z24_UNORM: if (compressed) tile_flags = 0x51 + ms; else tile_flags = 0x46; break; + case PIPE_FORMAT_X24S8_UINT: case PIPE_FORMAT_Z24X8_UNORM: case PIPE_FORMAT_Z24_UNORM_S8_UINT: if (compressed) @@ -72,6 +75,7 @@ nvc0_mt_choose_storage_type(struct nv50_miptree *mt, boolean compressed) else tile_flags = 0x7b; break; + case PIPE_FORMAT_X32_S8X24_UINT: case PIPE_FORMAT_Z32_FLOAT_S8X24_UINT: if (compressed) tile_flags = 0xce + ms; -- 1.8.5.5 ___ mesa-dev mailing list mesa-dev@lists.freedesktop.org http://lists.freedesktop.org/mailman/listinfo/mesa-dev
[Mesa-dev] [PATCH 2/2] nv50, nvc0: fix 3d blit logic for odd depth/stencil formats
Reported-by: David Heidelberger david.heidelber...@ixit.cz Signed-off-by: Ilia Mirkin imir...@alum.mit.edu --- src/gallium/drivers/nouveau/nv50/nv50_blit.h| 21 ++--- src/gallium/drivers/nouveau/nv50/nv50_surface.c | 4 2 files changed, 18 insertions(+), 7 deletions(-) diff --git a/src/gallium/drivers/nouveau/nv50/nv50_blit.h b/src/gallium/drivers/nouveau/nv50/nv50_blit.h index bdd6a63..756c4c1 100644 --- a/src/gallium/drivers/nouveau/nv50/nv50_blit.h +++ b/src/gallium/drivers/nouveau/nv50/nv50_blit.h @@ -111,10 +111,14 @@ nv50_blit_zeta_to_colour_format(enum pipe_format format) case PIPE_FORMAT_Z24_UNORM_S8_UINT: case PIPE_FORMAT_S8_UINT_Z24_UNORM: case PIPE_FORMAT_Z24X8_UNORM: + case PIPE_FORMAT_X8Z24_UNORM: + case PIPE_FORMAT_X24S8_UINT: + case PIPE_FORMAT_S8X24_UINT: return PIPE_FORMAT_R8G8B8A8_UNORM; case PIPE_FORMAT_Z32_FLOAT: return PIPE_FORMAT_R32_FLOAT; case PIPE_FORMAT_Z32_FLOAT_S8X24_UINT: + case PIPE_FORMAT_X32_S8X24_UINT: return PIPE_FORMAT_R32G32_FLOAT; default: assert(0); @@ -131,19 +135,21 @@ nv50_blit_derive_color_mask(const struct pipe_blit_info *info) uint16_t color_mask = 0; switch (info-dst.format) { + case PIPE_FORMAT_Z24X8_UNORM: + case PIPE_FORMAT_X24S8_UINT: case PIPE_FORMAT_Z24_UNORM_S8_UINT: if (mask PIPE_MASK_S) color_mask |= 0x1000; - /* fall through */ - case PIPE_FORMAT_Z24X8_UNORM: if (mask PIPE_MASK_Z) color_mask |= 0x0111; break; + case PIPE_FORMAT_X8Z24_UNORM: + case PIPE_FORMAT_S8X24_UINT: case PIPE_FORMAT_S8_UINT_Z24_UNORM: - if (mask PIPE_MASK_Z) - color_mask |= 0x1110; if (mask PIPE_MASK_S) color_mask |= 0x0001; + if (mask PIPE_MASK_Z) + color_mask |= 0x1110; break; default: if (mask (PIPE_MASK_R | PIPE_MASK_Z)) color_mask |= 0x0001; @@ -162,17 +168,18 @@ nv50_blit_eng2d_get_mask(const struct pipe_blit_info *info) uint32_t mask = 0; switch (info-dst.format) { + case PIPE_FORMAT_Z24X8_UNORM: + case PIPE_FORMAT_X24S8_UINT: case PIPE_FORMAT_Z24_UNORM_S8_UINT: if (info-mask PIPE_MASK_Z) mask |= 0x00ff; if (info-mask PIPE_MASK_S) mask |= 0xff00; break; + case PIPE_FORMAT_X8Z24_UNORM: + case PIPE_FORMAT_S8X24_UINT: case PIPE_FORMAT_S8_UINT_Z24_UNORM: if (info-mask PIPE_MASK_Z) mask |= 0xff00; if (info-mask PIPE_MASK_S) mask |= 0x00ff; break; - case PIPE_FORMAT_X8Z24_UNORM: - if (info-mask PIPE_MASK_Z) mask = 0x00ff; - break; default: mask = 0x; break; diff --git a/src/gallium/drivers/nouveau/nv50/nv50_surface.c b/src/gallium/drivers/nouveau/nv50/nv50_surface.c index 8ec4a5f..e1dd6e0 100644 --- a/src/gallium/drivers/nouveau/nv50/nv50_surface.c +++ b/src/gallium/drivers/nouveau/nv50/nv50_surface.c @@ -827,6 +827,7 @@ nv50_blit_select_mode(const struct pipe_blit_info *info) switch (info-dst.resource-format) { case PIPE_FORMAT_Z24_UNORM_S8_UINT: case PIPE_FORMAT_Z24X8_UNORM: + case PIPE_FORMAT_X24S8_UINT: switch (mask PIPE_MASK_ZS) { case PIPE_MASK_ZS: return NV50_BLIT_MODE_Z24S8; case PIPE_MASK_Z: return NV50_BLIT_MODE_Z24X8; @@ -834,6 +835,8 @@ nv50_blit_select_mode(const struct pipe_blit_info *info) return NV50_BLIT_MODE_X24S8; } case PIPE_FORMAT_S8_UINT_Z24_UNORM: + case PIPE_FORMAT_X8Z24_UNORM: + case PIPE_FORMAT_S8X24_UINT: switch (mask PIPE_MASK_ZS) { case PIPE_MASK_ZS: return NV50_BLIT_MODE_S8Z24; case PIPE_MASK_Z: return NV50_BLIT_MODE_X8Z24; @@ -842,6 +845,7 @@ nv50_blit_select_mode(const struct pipe_blit_info *info) } case PIPE_FORMAT_Z32_FLOAT: case PIPE_FORMAT_Z32_FLOAT_S8X24_UINT: + case PIPE_FORMAT_X32_S8X24_UINT: switch (mask PIPE_MASK_ZS) { case PIPE_MASK_ZS: return NV50_BLIT_MODE_ZS; case PIPE_MASK_Z: return NV50_BLIT_MODE_PASS; -- 1.8.5.5 ___ mesa-dev mailing list mesa-dev@lists.freedesktop.org http://lists.freedesktop.org/mailman/listinfo/mesa-dev