On Tue, 13 Dec 2011 15:35:18 -0800, Paul Berry <stereotype...@gmail.com> wrote: > diff --git a/src/mesa/drivers/dri/i965/brw_gs.c > b/src/mesa/drivers/dri/i965/brw_gs.c > index f5d5898..c323a73 100644 > --- a/src/mesa/drivers/dri/i965/brw_gs.c > +++ b/src/mesa/drivers/dri/i965/brw_gs.c > @@ -183,7 +183,31 @@ static void populate_key( struct brw_context *brw, > } else if (intel->gen == 6) { > /* On Gen6, GS is used for transform feedback. */ > /* _NEW_TRANSFORM_FEEDBACK */ > - key->need_gs_prog = ctx->TransformFeedback.CurrentObject->Active; > + if (ctx->TransformFeedback.CurrentObject->Active) { > + const struct gl_shader_program *shaderprog = > + ctx->Shader.CurrentVertexProgram; > + const struct gl_transform_feedback_info *linked_xfb_info = > + &shaderprog->LinkedTransformFeedback; > + int i; > + > + /* Make sure that the VUE slots won't overflow the unsigned chars in > + * key->transform_feedback_bindings[]. > + */ > + assert (BRW_VERT_RESULT_MAX <= 256);
We generally don't put a space between assert and the opening paren. Also, this assert is really weird to me. Neither of the two fields in the key are related to 256 -- there's a 7-bit field, and a BRW_MAX_SOL_BINDINGS field. (Also, for compile-time checks like this, we now have STATIC_ASSERT. Hooray!) > @@ -107,12 +119,27 @@ static void brw_gs_overwrite_header_dw2(struct > brw_gs_compile *c, > * of DWORD 2. URB_WRITE messages need the primitive type in bits 6:2 of > * DWORD 2. So this function extracts the primitive type field, bitshifts it > * appropriately, and stores it in c->reg.header. > + * > + * Also, if num_verts is 3, it converts primitive type > + * _3DPRIM_TRISTRIP_REVERSE to _3DPRIM_TRISTRIP, so that odd numbered > + * triangles in a triangle strip will render correctly. > */ > -static void brw_gs_overwrite_header_dw2_from_r0(struct brw_gs_compile *c) > +static void brw_gs_overwrite_header_dw2_from_r0(struct brw_gs_compile *c, > + unsigned num_verts) > { > struct brw_compile *p = &c->func; > brw_AND(p, get_element_ud(c->reg.header, 2), get_element_ud(c->reg.R0, 2), > brw_imm_ud(0x1f)); > + if (num_verts == 3) { > + brw_CMP(p, vec1(brw_null_reg()), BRW_CONDITIONAL_EQ, > + get_element_ud(c->reg.header, 2), > + brw_imm_ud(_3DPRIM_TRISTRIP_REVERSE)); > + { > + brw_MOV(p, get_element_ud(c->reg.header, 2), > + brw_imm_ud(_3DPRIM_TRISTRIP)); > + brw_set_predicate_control(p, BRW_PREDICATE_NONE); > + } > + } This doesn't mess up normal rendering of tristrips in any way? It looks like it could be a separate commit to me. > diff --git a/src/mesa/drivers/dri/i965/brw_wm_surface_state.c > b/src/mesa/drivers/dri/i965/brw_wm_surface_state.c > index f9b0b71..4ec9ef5 100644 > --- a/src/mesa/drivers/dri/i965/brw_wm_surface_state.c > +++ b/src/mesa/drivers/dri/i965/brw_wm_surface_state.c > +/** > + * Set up a binding table entry for use by stream output logic (transform > + * feedback). > + * > + * buffer_size_minus_1 must me less than BRW_MAX_NUM_BUFFER_ENTRIES. > + */ > +static void > +brw_update_sol_surface(struct brw_context *brw, drm_intel_bo *bo, > + uint32_t *out_offset, unsigned num_vector_components, > + unsigned stride_dwords, unsigned offset_dwords, > + uint32_t buffer_size_minus_1) > +{ > + uint32_t *surf = brw_state_batch(brw, AUB_TRACE_SURFACE_STATE, 6 * 4, 32, > + out_offset); > + uint32_t width = buffer_size_minus_1 & 0x7f; > + uint32_t height = (buffer_size_minus_1 & 0xfff80) >> 7; > + uint32_t depth = (buffer_size_minus_1 & 0x7f00000) >> 20; > + uint32_t pitch_minus_1 = 4*stride_dwords - 1; > + uint32_t surface_format; > + uint32_t offset_bytes = 4 * offset_dwords; Could I get just a little more whitespace in your code? Between the declaration and the switch, after the switch, etc.? > + surf[1] = bo->offset + offset_bytes; /* reloc */ > + surf[2] = (width << BRW_SURFACE_WIDTH_SHIFT | > + height << BRW_SURFACE_HEIGHT_SHIFT); > + surf[3] = (depth << BRW_SURFACE_DEPTH_SHIFT | > + pitch_minus_1 << BRW_SURFACE_PITCH_SHIFT); This looks to me like the hardware width/heigth/depth fields are ending up programmed as 1 larger than you intend. Compare to brw_create_constant_surface(). > + /* Emit relocation to surface contents. Section 5.1.1 of the gen4 > + * bspec ("Data Cache") says that the data cache does not exist as > + * a separate cache and is just the sampler cache. > + */ > + drm_intel_bo_emit_reloc(brw->intel.batch.bo, > + *out_offset + 4, > + bo, offset_bytes, > + I915_GEM_DOMAIN_RENDER, I915_GEM_DOMAIN_RENDER); > +} If it's the sampler cache, that would be (I915_GEM_DOMAIN_SAMPLER, 0). But it obviously isn't, since we're reading and writing. > diff --git a/src/mesa/drivers/dri/i965/gen6_sol.c > b/src/mesa/drivers/dri/i965/gen6_sol.c > new file mode 100644 > index 0000000..af372c1 > --- /dev/null > +++ b/src/mesa/drivers/dri/i965/gen6_sol.c > @@ -0,0 +1,102 @@ > +/* > + * Copyright © 2011 Intel Corporation > + * > + * Permission is hereby granted, free of charge, to any person obtaining a > + * copy of this software and associated documentation files (the "Software"), > + * to deal in the Software without restriction, including without limitation > + * the rights to use, copy, modify, merge, publish, distribute, sublicense, > + * and/or sell copies of the Software, and to permit persons to whom the > + * Software is furnished to do so, subject to the following conditions: > + * > + * The above copyright notice and this permission notice (including the next > + * paragraph) shall be included in all copies or substantial portions of the > + * Software. > + * > + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR > + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, > + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL > + * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER > + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING > + * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER > DEALINGS > + * IN THE SOFTWARE. > + */ > + > +/** \file gen6_sol.c > + * > + * Code to initialize the binding table entries used by transform feedback. > + */ > + > +#include "brw_context.h" > +#include "intel_buffer_objects.h" > +#include "brw_defines.h" > + > +static void > +brw_update_sol_surfaces(struct brw_context *brw) If it's gen6-only, let's call it gen6_update_sol_surfaces. > +{ > + unsigned buffer_offsets[BRW_MAX_SOL_BUFFERS]; > + struct gl_context *ctx = &brw->intel.ctx; > + struct intel_context *intel = &brw->intel; > + /* _NEW_TRANSFORM_FEEDBACK */ > + struct gl_transform_feedback_object *xfb_obj = > + ctx->TransformFeedback.CurrentObject; > + /* BRW_NEW_VERTEX_PROGRAM */ > + const struct gl_shader_program *shaderprog = > + ctx->Shader.CurrentVertexProgram; > + const struct gl_transform_feedback_info *linked_xfb_info = > + &shaderprog->LinkedTransformFeedback; > + int i; > + for (i = 0; i < BRW_MAX_SOL_BUFFERS; ++i) > + buffer_offsets[i] = xfb_obj->Offset[i] / 4; > + for (i = 0; i < BRW_MAX_SOL_BINDINGS; ++i) { > + const int surf_index = SURF_INDEX_SOL_BINDING(i); If you're going to mess with indentation to get your indented code to fit, that's often a good indication that a separate function is in order. The shadowed loop index made me twitch, but it's actually valid. > + if (xfb_obj->Active && i < linked_xfb_info->NumOutputs) { > + unsigned buffer = linked_xfb_info->Outputs[i].OutputBuffer; > + struct gl_buffer_object *buffer_obj = xfb_obj->Buffers[buffer]; > + drm_intel_bo *bo = intel_buffer_object(buffer_obj)->buffer; > + size_t size_dwords = buffer_obj->Size / 4; > + unsigned num_components = linked_xfb_info->Outputs[i].NumComponents; > + unsigned stride = linked_xfb_info->BufferStride[buffer]; > + uint32_t buffer_size_minus_1; > + /* FIXME: can we rely on core Mesa to ensure that the buffer isn't > + * too big to map using a single binding table entry? > + */ > + assert ((size_dwords - buffer_offsets[buffer]) / stride > + <= BRW_MAX_NUM_BUFFER_ENTRIES); > + if (size_dwords > buffer_offsets[buffer] + num_components) { > + /* There is room for at least 1 transform feedback output in the > + * buffer. Compute the number of additional transform feedback > + * outputs the buffer has room for. > + */ > + buffer_size_minus_1 = > + (size_dwords - buffer_offsets[buffer] - num_components) > + / stride; > + } else { > + /* There isn't even room for a single transform feedback output > in > + * the buffer. We can't configure the binding table entry to > + * prevent output entirely; we'll have to rely on the geometry > + * shader to detect overflow. But to minimize the damage in case > + * of a bug, set up the binding table entry to just allow a > single > + * output. > + */ > + buffer_size_minus_1 = 0; > + } > + intel->vtbl.update_sol_surface( > + brw, bo, &brw->bind.surf_offset[surf_index], > + num_components, stride, buffer_offsets[buffer], > + buffer_size_minus_1); > + buffer_offsets[buffer] += num_components; > + } else { > + brw->bind.surf_offset[surf_index] = 0; > + } > + } > +} brw_update_sol_surface() looks like it would rather live here than in brw_wm_surface_state.c, and it shouldn't be in a vtable. I think moving most of the logic in this loop into the brw_update_sol_surface() function would make me happier.
pgpcLvxm9n3HM.pgp
Description: PGP signature
_______________________________________________ mesa-dev mailing list mesa-dev@lists.freedesktop.org http://lists.freedesktop.org/mailman/listinfo/mesa-dev