Re: [Mesa-dev] [PATCH 5/8] i965 gen6: Initial implementation of transform feedback.

Eric Anholt Sat, 17 Dec 2011 11:50:34 -0800

On Tue, 13 Dec 2011 15:35:18 -0800, Paul Berry <stereotype...@gmail.com> wrote:
> diff --git a/src/mesa/drivers/dri/i965/brw_gs.c 
> b/src/mesa/drivers/dri/i965/brw_gs.c
> index f5d5898..c323a73 100644
> --- a/src/mesa/drivers/dri/i965/brw_gs.c
> +++ b/src/mesa/drivers/dri/i965/brw_gs.c
> @@ -183,7 +183,31 @@ static void populate_key( struct brw_context *brw,
>     } else if (intel->gen == 6) {
>        /* On Gen6, GS is used for transform feedback. */
>        /* _NEW_TRANSFORM_FEEDBACK */
> -      key->need_gs_prog = ctx->TransformFeedback.CurrentObject->Active;
> +      if (ctx->TransformFeedback.CurrentObject->Active) {
> +         const struct gl_shader_program *shaderprog =
> +            ctx->Shader.CurrentVertexProgram;
> +         const struct gl_transform_feedback_info *linked_xfb_info =
> +            &shaderprog->LinkedTransformFeedback;
> +         int i;
> +
> +         /* Make sure that the VUE slots won't overflow the unsigned chars in
> +          * key->transform_feedback_bindings[].
> +          */
> +         assert (BRW_VERT_RESULT_MAX <= 256);


We generally don't put a space between assert and the opening paren.

Also, this assert is really weird to me.  Neither of the two fields in
the key are related to 256 -- there's a 7-bit field, and a
BRW_MAX_SOL_BINDINGS field.  (Also, for compile-time checks like this,
we now have STATIC_ASSERT.  Hooray!)

> @@ -107,12 +119,27 @@ static void brw_gs_overwrite_header_dw2(struct 
> brw_gs_compile *c,
>   * of DWORD 2.  URB_WRITE messages need the primitive type in bits 6:2 of
>   * DWORD 2.  So this function extracts the primitive type field, bitshifts it
>   * appropriately, and stores it in c->reg.header.
> + *
> + * Also, if num_verts is 3, it converts primitive type
> + * _3DPRIM_TRISTRIP_REVERSE to _3DPRIM_TRISTRIP, so that odd numbered
> + * triangles in a triangle strip will render correctly.
>   */
> -static void brw_gs_overwrite_header_dw2_from_r0(struct brw_gs_compile *c)
> +static void brw_gs_overwrite_header_dw2_from_r0(struct brw_gs_compile *c,
> +                                                unsigned num_verts)
>  {
>     struct brw_compile *p = &c->func;
>     brw_AND(p, get_element_ud(c->reg.header, 2), get_element_ud(c->reg.R0, 2),
>             brw_imm_ud(0x1f));
> +   if (num_verts == 3) {
> +      brw_CMP(p, vec1(brw_null_reg()), BRW_CONDITIONAL_EQ,
> +              get_element_ud(c->reg.header, 2),
> +              brw_imm_ud(_3DPRIM_TRISTRIP_REVERSE));
> +      {
> +         brw_MOV(p, get_element_ud(c->reg.header, 2),
> +                 brw_imm_ud(_3DPRIM_TRISTRIP));
> +         brw_set_predicate_control(p, BRW_PREDICATE_NONE);
> +      }
> +   }

This doesn't mess up normal rendering of tristrips in any way?  It looks
like it could be a separate commit to me.

> diff --git a/src/mesa/drivers/dri/i965/brw_wm_surface_state.c 
> b/src/mesa/drivers/dri/i965/brw_wm_surface_state.c
> index f9b0b71..4ec9ef5 100644
> --- a/src/mesa/drivers/dri/i965/brw_wm_surface_state.c
> +++ b/src/mesa/drivers/dri/i965/brw_wm_surface_state.c

> +/**
> + * Set up a binding table entry for use by stream output logic (transform
> + * feedback).
> + *
> + * buffer_size_minus_1 must me less than BRW_MAX_NUM_BUFFER_ENTRIES.
> + */
> +static void
> +brw_update_sol_surface(struct brw_context *brw, drm_intel_bo *bo,
> +                       uint32_t *out_offset, unsigned num_vector_components,
> +                       unsigned stride_dwords, unsigned offset_dwords,
> +                       uint32_t buffer_size_minus_1)
> +{
> +   uint32_t *surf = brw_state_batch(brw, AUB_TRACE_SURFACE_STATE, 6 * 4, 32,
> +                                    out_offset);
> +   uint32_t width = buffer_size_minus_1 & 0x7f;
> +   uint32_t height = (buffer_size_minus_1 & 0xfff80) >> 7;
> +   uint32_t depth = (buffer_size_minus_1 & 0x7f00000) >> 20;
> +   uint32_t pitch_minus_1 = 4*stride_dwords - 1;
> +   uint32_t surface_format;
> +   uint32_t offset_bytes = 4 * offset_dwords;

Could I get just a little more whitespace in your code?  Between the
declaration and the switch, after the switch, etc.?

> +   surf[1] = bo->offset + offset_bytes; /* reloc */
> +   surf[2] = (width << BRW_SURFACE_WIDTH_SHIFT |
> +           height << BRW_SURFACE_HEIGHT_SHIFT);
> +   surf[3] = (depth << BRW_SURFACE_DEPTH_SHIFT |
> +              pitch_minus_1 << BRW_SURFACE_PITCH_SHIFT);

This looks to me like the hardware width/heigth/depth fields are ending
up programmed as 1 larger than you intend.  Compare to
brw_create_constant_surface().

> +   /* Emit relocation to surface contents.  Section 5.1.1 of the gen4
> +    * bspec ("Data Cache") says that the data cache does not exist as
> +    * a separate cache and is just the sampler cache.
> +    */
> +   drm_intel_bo_emit_reloc(brw->intel.batch.bo,
> +                        *out_offset + 4,
> +                        bo, offset_bytes,
> +                        I915_GEM_DOMAIN_RENDER, I915_GEM_DOMAIN_RENDER);
> +}

If it's the sampler cache, that would be (I915_GEM_DOMAIN_SAMPLER, 0).
But it obviously isn't, since we're reading and writing.

> diff --git a/src/mesa/drivers/dri/i965/gen6_sol.c 
> b/src/mesa/drivers/dri/i965/gen6_sol.c
> new file mode 100644
> index 0000000..af372c1
> --- /dev/null
> +++ b/src/mesa/drivers/dri/i965/gen6_sol.c
> @@ -0,0 +1,102 @@
> +/*
> + * Copyright © 2011 Intel Corporation
> + *
> + * Permission is hereby granted, free of charge, to any person obtaining a
> + * copy of this software and associated documentation files (the "Software"),
> + * to deal in the Software without restriction, including without limitation
> + * the rights to use, copy, modify, merge, publish, distribute, sublicense,
> + * and/or sell copies of the Software, and to permit persons to whom the
> + * Software is furnished to do so, subject to the following conditions:
> + *
> + * The above copyright notice and this permission notice (including the next
> + * paragraph) shall be included in all copies or substantial portions of the
> + * Software.
> + *
> + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
> + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
> + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
> + * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
> + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
> + * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER 
> DEALINGS
> + * IN THE SOFTWARE.
> + */
> +
> +/** \file gen6_sol.c
> + *
> + * Code to initialize the binding table entries used by transform feedback.
> + */
> +
> +#include "brw_context.h"
> +#include "intel_buffer_objects.h"
> +#include "brw_defines.h"
> +
> +static void
> +brw_update_sol_surfaces(struct brw_context *brw)

If it's gen6-only, let's call it gen6_update_sol_surfaces.

> +{
> +   unsigned buffer_offsets[BRW_MAX_SOL_BUFFERS];
> +   struct gl_context *ctx = &brw->intel.ctx;
> +   struct intel_context *intel = &brw->intel;
> +   /* _NEW_TRANSFORM_FEEDBACK */
> +   struct gl_transform_feedback_object *xfb_obj =
> +      ctx->TransformFeedback.CurrentObject;
> +   /* BRW_NEW_VERTEX_PROGRAM */
> +   const struct gl_shader_program *shaderprog =
> +      ctx->Shader.CurrentVertexProgram;
> +   const struct gl_transform_feedback_info *linked_xfb_info =
> +      &shaderprog->LinkedTransformFeedback;
> +   int i;
> +   for (i = 0; i < BRW_MAX_SOL_BUFFERS; ++i)
> +      buffer_offsets[i] = xfb_obj->Offset[i] / 4;
> +   for (i = 0; i < BRW_MAX_SOL_BINDINGS; ++i) {
> +      const int surf_index = SURF_INDEX_SOL_BINDING(i);

If you're going to mess with indentation to get your indented code to
fit, that's often a good indication that a separate function is in
order.

The shadowed loop index made me twitch, but it's actually valid.

> +      if (xfb_obj->Active && i < linked_xfb_info->NumOutputs) {
> +         unsigned buffer = linked_xfb_info->Outputs[i].OutputBuffer;
> +         struct gl_buffer_object *buffer_obj = xfb_obj->Buffers[buffer];
> +         drm_intel_bo *bo = intel_buffer_object(buffer_obj)->buffer;
> +         size_t size_dwords = buffer_obj->Size / 4;
> +         unsigned num_components = linked_xfb_info->Outputs[i].NumComponents;
> +         unsigned stride = linked_xfb_info->BufferStride[buffer];
> +         uint32_t buffer_size_minus_1;
> +         /* FIXME: can we rely on core Mesa to ensure that the buffer isn't
> +          * too big to map using a single binding table entry?
> +          */
> +         assert ((size_dwords - buffer_offsets[buffer]) / stride
> +                 <= BRW_MAX_NUM_BUFFER_ENTRIES);
> +         if (size_dwords > buffer_offsets[buffer] + num_components) {
> +            /* There is room for at least 1 transform feedback output in the
> +             * buffer.  Compute the number of additional transform feedback
> +             * outputs the buffer has room for.
> +             */
> +            buffer_size_minus_1 =
> +               (size_dwords - buffer_offsets[buffer] - num_components)
> +               / stride;
> +         } else {
> +            /* There isn't even room for a single transform feedback output 
> in
> +             * the buffer.  We can't configure the binding table entry to
> +             * prevent output entirely; we'll have to rely on the geometry
> +             * shader to detect overflow.  But to minimize the damage in case
> +             * of a bug, set up the binding table entry to just allow a 
> single
> +             * output.
> +             */
> +            buffer_size_minus_1 = 0;
> +         }
> +         intel->vtbl.update_sol_surface(
> +            brw, bo, &brw->bind.surf_offset[surf_index],
> +            num_components, stride, buffer_offsets[buffer],
> +            buffer_size_minus_1);
> +         buffer_offsets[buffer] += num_components;
> +      } else {
> +         brw->bind.surf_offset[surf_index] = 0;
> +      }
> +   }
> +}

brw_update_sol_surface() looks like it would rather live here than in
brw_wm_surface_state.c, and it shouldn't be in a vtable.

I think moving most of the logic in this loop into the
brw_update_sol_surface() function would make me happier.

pgpcLvxm9n3HM.pgp
Description: PGP signature

_______________________________________________
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
http://lists.freedesktop.org/mailman/listinfo/mesa-dev

Re: [Mesa-dev] [PATCH 5/8] i965 gen6: Initial implementation of transform feedback.

Reply via email to