Kenneth Graunke <kenn...@whitecape.org> writes:

> On Tuesday, December 01, 2015 12:19:31 AM Jordan Justen wrote:
>> From: Francisco Jerez <curroje...@riseup.net>
>> 
>> The L3 state atom calculates the target L3 partition weights when the
>> program bound to some shader stage is modified, and in case they are
>> far enough from the current partitioning it makes sure that the L3
>> state is re-emitted.
>> 
>> v3: Fix for inconsistent units the context URB size is expressed in.
>>     Clamp URB size to 1008 KB on SKL due to FF hardware limitation.
>> ---
>>  src/mesa/drivers/dri/i965/brw_context.h   |  6 +++
>>  src/mesa/drivers/dri/i965/brw_state.h     |  1 +
>>  src/mesa/drivers/dri/i965/gen7_l3_state.c | 81 
>> +++++++++++++++++++++++++++++++
>>  3 files changed, 88 insertions(+)
>> 
>> diff --git a/src/mesa/drivers/dri/i965/brw_context.h 
>> b/src/mesa/drivers/dri/i965/brw_context.h
>> index 0b91147..48024c6 100644
>> --- a/src/mesa/drivers/dri/i965/brw_context.h
>> +++ b/src/mesa/drivers/dri/i965/brw_context.h
>> @@ -672,6 +672,8 @@ enum brw_predicate_state {
>>  
>>  struct shader_times;
>>  
>> +struct brw_l3_config;
>> +
>>  /**
>>   * brw_context is derived from gl_context.
>>   */
>> @@ -1214,6 +1216,10 @@ struct brw_context
>>     int basevertex;
>>  
>>     struct {
>> +      const struct brw_l3_config *config;
>> +   } l3;
>> +
>> +   struct {
>>        drm_intel_bo *bo;
>>        const char **names;
>>        int *ids;
>> diff --git a/src/mesa/drivers/dri/i965/brw_state.h 
>> b/src/mesa/drivers/dri/i965/brw_state.h
>> index 94734ba..49f301a 100644
>> --- a/src/mesa/drivers/dri/i965/brw_state.h
>> +++ b/src/mesa/drivers/dri/i965/brw_state.h
>> @@ -129,6 +129,7 @@ extern const struct brw_tracked_state gen7_depthbuffer;
>>  extern const struct brw_tracked_state gen7_clip_state;
>>  extern const struct brw_tracked_state gen7_disable_stages;
>>  extern const struct brw_tracked_state gen7_gs_state;
>> +extern const struct brw_tracked_state gen7_l3_state;
>>  extern const struct brw_tracked_state gen7_ps_state;
>>  extern const struct brw_tracked_state gen7_push_constant_space;
>>  extern const struct brw_tracked_state gen7_sbe_state;
>> diff --git a/src/mesa/drivers/dri/i965/gen7_l3_state.c 
>> b/src/mesa/drivers/dri/i965/gen7_l3_state.c
>> index 0b5e231..a895723 100644
>> --- a/src/mesa/drivers/dri/i965/gen7_l3_state.c
>> +++ b/src/mesa/drivers/dri/i965/gen7_l3_state.c
>> @@ -418,3 +418,84 @@ setup_l3_config(struct brw_context *brw, const struct 
>> brw_l3_config *cfg)
>>        }
>>     }
>>  }
>> +
>> +/**
>> + * Return the unit brw_context::urb::size is expressed in, in KB.  \sa
>> + * brw_device_info::urb::size.
>> + */
>> +static unsigned
>> +get_urb_size_scale(const struct brw_device_info *devinfo)
>> +{
>> +   return (devinfo->gen >= 8 ? devinfo->num_slices : 1);
>> +}
>> +
>> +/**
>> + * Update the URB size in the context state for the specified L3
>> + * configuration.
>> + */
>> +static void
>> +update_urb_size(struct brw_context *brw, const struct brw_l3_config *cfg)
>> +{
>> +   const struct brw_device_info *devinfo = brw->intelScreen->devinfo;
>> +   /* From the SKL "L3 Allocation and Programming" documentation:
>> +    *
>> +    * "URB is limited to 1008KB due to programming restrictions.  This is 
>> not
>> +    * a restriction of the L3 implementation, but of the FF and other 
>> clients.
>> +    * Therefore, in a GT4 implementation it is possible for the programmed
>> +    * allocation of the L3 data array to provide 3*384KB=1152KB for URB, but
>> +    * only 1008KB of this will be used."
>> +    */
>> +   const unsigned max = (devinfo->gen == 9 ? 1008 : ~0);
>
> I think this would be clearer as devinfo->gt == 4, since it applies to
> all currently known GT4 parts, and doesn't apply to GT1-3.

The reason is that only Gen9 has this limitation.  Gen10 will have a
different limit on the URB space addressable by fixed function clients.

> Presumably GT1-3 just don't have enough URB to hit that limit, so it's
> probably moot...*shrug*.
>
Yeah, in theory this limitation applies to all Gen9 hardware but only
GT4 devices have a large enough L3 to hit it regularly.  In principle
you could hit the limit on GT3 too if you tried out some non-validated
configuration so it seems sensible to apply it to Gen9 in general even
if only GT4 parts are going to hit it in practice.  It doesn't hurt
anyway.

> Acked-by: Kenneth Graunke <kenn...@whitecape.org>
>
>> +   const unsigned sz =
>> +      MIN2(max, cfg->n[L3P_URB] * get_l3_way_size(devinfo)) /
>> +      get_urb_size_scale(devinfo);
>> +
>> +   if (brw->urb.size != sz) {
>> +      brw->urb.size = sz;
>> +      brw->ctx.NewDriverState |= BRW_NEW_URB_SIZE;
>> +   }
>> +}
>> +
>> +static void
>> +emit_l3_state(struct brw_context *brw)
>> +{
>> +   const struct brw_l3_weights w = get_pipeline_state_l3_weights(brw);
>> +   const float dw = diff_l3_weights(w, 
>> get_config_l3_weights(brw->l3.config));
>> +   /* The distance between any two compatible weight vectors cannot exceed 
>> two
>> +    * due to the triangle inequality.
>> +    */
>> +   const float large_dw_threshold = 2.0;
>> +   /* Somewhat arbitrary, simply makes sure that there will be no repeated
>> +    * transitions to the same L3 configuration, could probably do better 
>> here.
>> +    */
>> +   const float small_dw_threshold = 0.5;
>> +   /* If we're emitting a new batch the caches should already be clean and 
>> the
>> +    * transition should be relatively cheap, so it shouldn't hurt much to 
>> use
>> +    * the smaller threshold.  Otherwise use the larger threshold so that we
>> +    * only reprogram the L3 mid-batch if the most recently programmed
>> +    * configuration is incompatible with the current pipeline state.
>> +    */
>
> This seems like a good idea.
>
>> +   const float dw_threshold = (brw->ctx.NewDriverState & BRW_NEW_BATCH ?
>> +                               small_dw_threshold : large_dw_threshold);
>> +
>> +   if (dw > dw_threshold && brw->can_do_pipelined_register_writes) {
>> +      const struct brw_l3_config *const cfg =
>> +         get_l3_config(brw->intelScreen->devinfo, w);
>> +
>> +      setup_l3_config(brw, cfg);
>> +      update_urb_size(brw, cfg);
>> +      brw->l3.config = cfg;
>> +   }
>> +}
>> +
>> +const struct brw_tracked_state gen7_l3_state = {
>> +   .dirty = {
>> +      .mesa = 0,
>> +      .brw = BRW_NEW_BATCH |
>> +             BRW_NEW_CS_PROG_DATA |
>> +             BRW_NEW_FS_PROG_DATA |
>> +             BRW_NEW_GS_PROG_DATA |
>> +             BRW_NEW_VS_PROG_DATA,
>> +   },
>> +   .emit = emit_l3_state
>> +};
>> 
> _______________________________________________
> mesa-dev mailing list
> mesa-dev@lists.freedesktop.org
> http://lists.freedesktop.org/mailman/listinfo/mesa-dev

Attachment: signature.asc
Description: PGP signature

_______________________________________________
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
http://lists.freedesktop.org/mailman/listinfo/mesa-dev

Reply via email to