On 09/03/2013 06:18 PM, Paul Berry wrote:
> ---
> src/mesa/drivers/dri/i965/brw_state.h | 9 +-
> src/mesa/drivers/dri/i965/gen6_sf_state.c | 153
> +-
> src/mesa/drivers/dri/i965/gen7_sf_state.c | 64 +
> 3 files changed, 97 insertions(+), 129 deletions(-)
>
> diff --git a/src/mesa/drivers/dri/i965/brw_state.h
> b/src/mesa/drivers/dri/i965/brw_state.h
> index 22e4a61..dd3e216 100644
> --- a/src/mesa/drivers/dri/i965/brw_state.h
> +++ b/src/mesa/drivers/dri/i965/brw_state.h
> @@ -223,9 +223,12 @@ void gen4_init_vtable_sampler_functions(struct
> brw_context *brw);
> void gen7_init_vtable_sampler_functions(struct brw_context *brw);
>
> /* gen6_sf_state.c */
> -uint32_t
> -get_attr_override(const struct brw_vue_map *vue_map, int
> urb_entry_read_offset,
> - int fs_attr, bool two_side_color, uint32_t
> *max_source_attr);
> +void
> +calculate_attr_overrides(const struct brw_context *brw,
> + uint16_t *attr_overrides,
> + uint32_t *point_sprite_enables,
> + uint32_t *flat_enables,
> + uint32_t *urb_entry_read_length);
>
> /* brw_vs_surface_state.c */
> void
> diff --git a/src/mesa/drivers/dri/i965/gen6_sf_state.c
> b/src/mesa/drivers/dri/i965/gen6_sf_state.c
> index dfe9a31..7094994 100644
> --- a/src/mesa/drivers/dri/i965/gen6_sf_state.c
> +++ b/src/mesa/drivers/dri/i965/gen6_sf_state.c
> @@ -52,7 +52,7 @@
> * the VUE that are not needed by the fragment shader. It is measured in
> * 256-bit increments.
> */
> -uint32_t
> +static uint32_t
> get_attr_override(const struct brw_vue_map *vue_map, int
> urb_entry_read_offset,
>int fs_attr, bool two_side_color, uint32_t
> *max_source_attr)
> {
> @@ -123,21 +123,98 @@ get_attr_override(const struct brw_vue_map *vue_map,
> int urb_entry_read_offset,
> return source_attr;
> }
>
> +
> +/**
> + * Create the mapping from the FS inputs we produce to the VS outputs they
Only VS outputs?
> + * source from.
> + */
> +void
> +calculate_attr_overrides(const struct brw_context *brw,
> + uint16_t *attr_overrides,
> + uint32_t *point_sprite_enables,
> + uint32_t *flat_enables,
> + uint32_t *urb_entry_read_length)
> +{
> + const int urb_entry_read_offset = BRW_SF_URB_ENTRY_READ_OFFSET;
> + uint32_t max_source_attr = 0;
> + int input_index = 0;
> +
> + /* _NEW_LIGHT */
> + bool shade_model_flat = brw->ctx.Light.ShadeModel == GL_FLAT;
> +
> + for (int attr = 0; attr < VARYING_SLOT_MAX; attr++) {
> + enum glsl_interp_qualifier interp_qualifier =
> + brw->fragment_program->InterpQualifier[attr];
> + bool is_gl_Color = attr == VARYING_SLOT_COL0 || attr ==
> VARYING_SLOT_COL1;
> +
> + if (!(brw->fragment_program->Base.InputsRead & BITFIELD64_BIT(attr)))
> + continue;
> +
> + /* _NEW_POINT */
> + if (brw->ctx.Point.PointSprite &&
> + (attr >= VARYING_SLOT_TEX0 && attr <= VARYING_SLOT_TEX7) &&
> + brw->ctx.Point.CoordReplace[attr - VARYING_SLOT_TEX0]) {
> + *point_sprite_enables |= (1 << input_index);
> + }
> +
> + if (attr == VARYING_SLOT_PNTC)
> + *point_sprite_enables |= (1 << input_index);
> +
> + /* flat shading */
> + if (interp_qualifier == INTERP_QUALIFIER_FLAT ||
> + (shade_model_flat && is_gl_Color &&
> + interp_qualifier == INTERP_QUALIFIER_NONE))
> + *flat_enables |= (1 << input_index);
> +
> + /* The hardware can only do the overrides on 16 overrides at a
> + * time, and the other up to 16 have to be lined up so that the
> + * input index = the output index. We'll need to do some
> + * tweaking to make sure that's the case.
> + */
> + assert(input_index < 16 || attr == input_index);
> +
> + /* BRW_NEW_VUE_MAP_GEOM_OUT | _NEW_LIGHT | _NEW_PROGRAM */
> + attr_overrides[input_index++] =
> + get_attr_override(&brw->vue_map_geom_out,
> +urb_entry_read_offset, attr,
> + brw->ctx.VertexProgram._TwoSideEnabled,
> + &max_source_attr);
> + }
> +
> + for (; input_index < VARYING_SLOT_MAX; input_index++)
> + attr_overrides[input_index] = 0;
> +
> + /* From the Sandy Bridge PRM, Volume 2, Part 1, documentation for
> +* 3DSTATE_SF DWord 1 bits 15:11, "Vertex URB Entry Read Length":
> +*
> +* "This field should be set to the minimum length required to read the
> +* maximum source attribute. The maximum source attribute is indicated
> +* by the maximum value of the enabled Attribute # Source Attribute if
> +* Attribute Swizzle Enable is set, Number of Output Attributes-1 if
> +* enable is not set.
> +* read_length = ceiling((max_source_attr + 1) / 2)
> +*
> +* [errata] Corruption/Hang p