Re: [Mesa-dev] [PATCH 03/27] i965: Enable hardware-generated binding tables on render path.

2015-05-08 Thread Abdiel Janulgue


On 05/07/2015 04:43 PM, Pohjolainen, Topi wrote:
> On Tue, Apr 28, 2015 at 11:08:00PM +0300, Abdiel Janulgue wrote:
>> This patch implements the binding table enable command which is also
>> used to allocate a binding table pool where hardware-generated
>> binding table entries are flushed into. Each binding table offset in
>> the binding table pool is unique per each shader stage that are
>> enabled within a batch.
>>
>> Also insert the required brw_tracked_state objects to enable
>> hw-generated binding tables in normal render path.
>>
>> Signed-off-by: Abdiel Janulgue 
>> ---
>>  src/mesa/drivers/dri/i965/brw_binding_tables.c | 70 
>> ++
>>  src/mesa/drivers/dri/i965/brw_context.c|  4 ++
>>  src/mesa/drivers/dri/i965/brw_context.h|  5 ++
>>  src/mesa/drivers/dri/i965/brw_state.h  |  7 +++
>>  src/mesa/drivers/dri/i965/brw_state_upload.c   |  2 +
>>  src/mesa/drivers/dri/i965/intel_batchbuffer.c  |  4 ++
>>  6 files changed, 92 insertions(+)
>>
>> diff --git a/src/mesa/drivers/dri/i965/brw_binding_tables.c 
>> b/src/mesa/drivers/dri/i965/brw_binding_tables.c
>> index 459165a..a58e32e 100644
>> --- a/src/mesa/drivers/dri/i965/brw_binding_tables.c
>> +++ b/src/mesa/drivers/dri/i965/brw_binding_tables.c
>> @@ -44,6 +44,11 @@
>>  #include "brw_state.h"
>>  #include "intel_batchbuffer.h"
>>  
>> +/* Somehow the hw-binding table pool offset must start here, otherwise
>> + * the GPU will hang
>> + */
>> +#define HW_BT_START_OFFSET 256;
> 
> I think we want to understand this a little better before enabling...
> 
>> +
>>  /**
>>   * Upload a shader stage's binding table as indirect state.
>>   *
>> @@ -163,6 +168,71 @@ const struct brw_tracked_state brw_gs_binding_table = {
>> .emit = brw_gs_upload_binding_table,
>>  };
>>  
>> +/**
>> + * Hardware-generated binding tables for the resource streamer
>> + */
>> +void
>> +gen7_disable_hw_binding_tables(struct brw_context *brw)
>> +{
>> +   BEGIN_BATCH(3);
>> +   OUT_BATCH(_3DSTATE_BINDING_TABLE_POOL_ALLOC << 16 | (3 - 2));
>> +   OUT_BATCH(SET_FIELD(BRW_HW_BINDING_TABLE_OFF, 
>> BRW_HW_BINDING_TABLE_ENABLE) |
>> + brw->is_haswell ? HSW_HW_BINDING_TABLE_RESERVED : 0);
>> +   OUT_BATCH(0);
>> +   ADVANCE_BATCH();
>> +
>> +   /* Pipe control workaround */
>> +   brw_emit_pipe_control_flush(brw, PIPE_CONTROL_STATE_CACHE_INVALIDATE);
>> +}
>> +
>> +void
>> +gen7_enable_hw_binding_tables(struct brw_context *brw)
>> +{
>> +   if (!brw->has_resource_streamer) {
>> +  gen7_disable_hw_binding_tables(brw);
> 
> I started wondering why we really need this - RS is disabled by default and
> we haven't needed to do anything to disable it before.
> 
>> +  return;
>> +   }
>> +
>> +   if (!brw->hw_bt_pool.bo) {
>> +  /* From the BSpec, 3D Pipeline > Resource Streamer > Hardware Binding 
>> Tables:
>> +   *
>> +   *  "A maximum of 16,383 Binding tables are allowed in any batch 
>> buffer."
>> +   */
>> +  int max_size = 16383 * 4;
> 
> But does it really need this much all the time? I guess I need to go and
> read the spec.

This is actually just one re-usable buffer object sticking around for
the lifetime of the context. Compare this with creating lots of bo
every-time we enable the resource streamer. I think it helps with
reducing the amount of relocations we have.

> 
>> +  brw->hw_bt_pool.bo = drm_intel_bo_alloc(brw->bufmgr, "hw_bt",
>> +  max_size, 64);
>> +  brw->hw_bt_pool.next_offset = HW_BT_START_OFFSET;
>> +   }
>> +
>> +   uint32_t dw1 = SET_FIELD(BRW_HW_BINDING_TABLE_ON, 
>> BRW_HW_BINDING_TABLE_ENABLE);
>> +   if (brw->is_haswell)
>> +  dw1 |= SET_FIELD(GEN7_MOCS_L3, GEN7_HW_BT_MOCS) | 
>> HSW_HW_BINDING_TABLE_RESERVED;
> 
> These are overflowing 80 columns.
> 
>> +
>> +   BEGIN_BATCH(3);
>> +   OUT_BATCH(_3DSTATE_BINDING_TABLE_POOL_ALLOC << 16 | (3 - 2));
>> +   OUT_RELOC(brw->hw_bt_pool.bo, I915_GEM_DOMAIN_SAMPLER, 0, dw1);
>> +   OUT_RELOC(brw->hw_bt_pool.bo, I915_GEM_DOMAIN_SAMPLER, 0,
>> + brw->hw_bt_pool.bo->size);
>> +   ADVANCE_BATCH();
>> +
>> +   /* Pipe control workaround */
>> +   brw_emit_pipe_control_flush(brw, PIPE_CONTROL_STATE_CACHE_INVALIDATE);
> 
> Would you have a spec reference for this?

3D-Media-GPGPU Engine > Resource Streamer [HSW+] > Hardware Binding
Tables [HSW+] > Programming note

"When switching between HW and SW binding table generation, SW must
issue a state cache invalidate."

> 
>> +}
>> +
>> +void
>> +gen7_reset_rs_pool_offsets(struct brw_context *brw)
>> +{
>> +   brw->hw_bt_pool.next_offset = HW_BT_START_OFFSET;
>> +}
>> +
>> +const struct brw_tracked_state gen7_hw_binding_tables = {
>> +   .dirty = {
>> +  .mesa = 0,
>> +  .brw = BRW_NEW_BATCH,
>> +   },
>> +   .emit = gen7_enable_hw_binding_tables
>> +};
>> +
>>  /** @} */
>>  
>>  /**
>> diff --git a/src/mesa/drivers/dri/i965/brw_context.c 
>> b/src/mesa/drivers/dri/i965/brw_context.c
>> index c7e1e81..9c7ccae 100

Re: [Mesa-dev] [PATCH 03/27] i965: Enable hardware-generated binding tables on render path.

2015-05-08 Thread Abdiel Janulgue


On 05/07/2015 05:46 PM, Pohjolainen, Topi wrote:
> On Thu, May 07, 2015 at 04:43:21PM +0300, Pohjolainen, Topi wrote:
>> On Tue, Apr 28, 2015 at 11:08:00PM +0300, Abdiel Janulgue wrote:
>>> This patch implements the binding table enable command which is also
>>> used to allocate a binding table pool where hardware-generated
>>> binding table entries are flushed into. Each binding table offset in
>>> the binding table pool is unique per each shader stage that are
>>> enabled within a batch.
>>>
>>> Also insert the required brw_tracked_state objects to enable
>>> hw-generated binding tables in normal render path.
>>>
>>> Signed-off-by: Abdiel Janulgue 
>>> ---
>>>  src/mesa/drivers/dri/i965/brw_binding_tables.c | 70 
>>> ++
>>>  src/mesa/drivers/dri/i965/brw_context.c|  4 ++
>>>  src/mesa/drivers/dri/i965/brw_context.h|  5 ++
>>>  src/mesa/drivers/dri/i965/brw_state.h  |  7 +++
>>>  src/mesa/drivers/dri/i965/brw_state_upload.c   |  2 +
>>>  src/mesa/drivers/dri/i965/intel_batchbuffer.c  |  4 ++
>>>  6 files changed, 92 insertions(+)
>>>
>>> diff --git a/src/mesa/drivers/dri/i965/brw_binding_tables.c 
>>> b/src/mesa/drivers/dri/i965/brw_binding_tables.c
>>> index 459165a..a58e32e 100644
>>> --- a/src/mesa/drivers/dri/i965/brw_binding_tables.c
>>> +++ b/src/mesa/drivers/dri/i965/brw_binding_tables.c
>>> @@ -44,6 +44,11 @@
>>>  #include "brw_state.h"
>>>  #include "intel_batchbuffer.h"
>>>  
>>> +/* Somehow the hw-binding table pool offset must start here, otherwise
>>> + * the GPU will hang
>>> + */
>>> +#define HW_BT_START_OFFSET 256;
>>
>> I think we want to understand this a little better before enabling...

Actually, now that I remember I had my notes somewhere when I first
enabled this hw-binding table over a year ago. I dug it up and 256 is
the actually the size of a single stage's "hw-binding table state"
expressed in hw-binding table format. Details:

From the Bspec 3DSTATE_BINDING_TABLE_POINTERS_x > Pointer to PS Binding
Table section lists the format as:

"SurfaceStateOffset[16:6]BINDING_TABLE_STATE*256 When
HW-generated binding table is enabled"

So this is 16-bits[1] x 256 = 512 bytes.

Now this offset must be expressed in "Pointer to PS Binding Table" using
the hw-generated binding table format which must be aligned to 16:6.
However the bit entry field in dw1 of 3DSTATE_BINDING_TABLE_POINTERS_x
must be set within 15:5 so this value should be >> 1. Hence, the 256
(similar case is evident on function gen7_update_binding_table() in
patch 4 of this series).

Seems the RS hardware is extremely intolerant of even slight variations
hence the hungs when this is not followed closely.

In the next version, I can make the magic numbers a bit more clearer.

[1] 3D-Media-GPGPU Engine > Shared Functions > 3D Sampler > State > HW
Generated BINDING_TABLE_STATE

>>
>>> +
>>>  /**
>>>   * Upload a shader stage's binding table as indirect state.
>>>   *
>>> @@ -163,6 +168,71 @@ const struct brw_tracked_state brw_gs_binding_table = {
>>> .emit = brw_gs_upload_binding_table,
>>>  };
>>>  
>>> +/**
>>> + * Hardware-generated binding tables for the resource streamer
>>> + */
>>> +void
>>> +gen7_disable_hw_binding_tables(struct brw_context *brw)
>>> +{
>>> +   BEGIN_BATCH(3);
>>> +   OUT_BATCH(_3DSTATE_BINDING_TABLE_POOL_ALLOC << 16 | (3 - 2));
>>> +   OUT_BATCH(SET_FIELD(BRW_HW_BINDING_TABLE_OFF, 
>>> BRW_HW_BINDING_TABLE_ENABLE) |
>>> + brw->is_haswell ? HSW_HW_BINDING_TABLE_RESERVED : 0);
>>> +   OUT_BATCH(0);
>>> +   ADVANCE_BATCH();
>>> +
>>> +   /* Pipe control workaround */
>>> +   brw_emit_pipe_control_flush(brw, PIPE_CONTROL_STATE_CACHE_INVALIDATE);
>>> +}
>>> +
>>> +void
>>> +gen7_enable_hw_binding_tables(struct brw_context *brw)
>>> +{
>>> +   if (!brw->has_resource_streamer) {
>>> +  gen7_disable_hw_binding_tables(brw);
>>
>> I started wondering why we really need this - RS is disabled by default and
>> we haven't needed to do anything to disable it before.
> 
> Right, patch number eight gave me the answer, we want to disable it for blorp.
> 
___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
http://lists.freedesktop.org/mailman/listinfo/mesa-dev


Re: [Mesa-dev] [PATCH 03/27] i965: Enable hardware-generated binding tables on render path.

2015-05-07 Thread Pohjolainen, Topi
On Thu, May 07, 2015 at 04:43:21PM +0300, Pohjolainen, Topi wrote:
> On Tue, Apr 28, 2015 at 11:08:00PM +0300, Abdiel Janulgue wrote:
> > This patch implements the binding table enable command which is also
> > used to allocate a binding table pool where hardware-generated
> > binding table entries are flushed into. Each binding table offset in
> > the binding table pool is unique per each shader stage that are
> > enabled within a batch.
> > 
> > Also insert the required brw_tracked_state objects to enable
> > hw-generated binding tables in normal render path.
> > 
> > Signed-off-by: Abdiel Janulgue 
> > ---
> >  src/mesa/drivers/dri/i965/brw_binding_tables.c | 70 
> > ++
> >  src/mesa/drivers/dri/i965/brw_context.c|  4 ++
> >  src/mesa/drivers/dri/i965/brw_context.h|  5 ++
> >  src/mesa/drivers/dri/i965/brw_state.h  |  7 +++
> >  src/mesa/drivers/dri/i965/brw_state_upload.c   |  2 +
> >  src/mesa/drivers/dri/i965/intel_batchbuffer.c  |  4 ++
> >  6 files changed, 92 insertions(+)
> > 
> > diff --git a/src/mesa/drivers/dri/i965/brw_binding_tables.c 
> > b/src/mesa/drivers/dri/i965/brw_binding_tables.c
> > index 459165a..a58e32e 100644
> > --- a/src/mesa/drivers/dri/i965/brw_binding_tables.c
> > +++ b/src/mesa/drivers/dri/i965/brw_binding_tables.c
> > @@ -44,6 +44,11 @@
> >  #include "brw_state.h"
> >  #include "intel_batchbuffer.h"
> >  
> > +/* Somehow the hw-binding table pool offset must start here, otherwise
> > + * the GPU will hang
> > + */
> > +#define HW_BT_START_OFFSET 256;
> 
> I think we want to understand this a little better before enabling...
> 
> > +
> >  /**
> >   * Upload a shader stage's binding table as indirect state.
> >   *
> > @@ -163,6 +168,71 @@ const struct brw_tracked_state brw_gs_binding_table = {
> > .emit = brw_gs_upload_binding_table,
> >  };
> >  
> > +/**
> > + * Hardware-generated binding tables for the resource streamer
> > + */
> > +void
> > +gen7_disable_hw_binding_tables(struct brw_context *brw)
> > +{
> > +   BEGIN_BATCH(3);
> > +   OUT_BATCH(_3DSTATE_BINDING_TABLE_POOL_ALLOC << 16 | (3 - 2));
> > +   OUT_BATCH(SET_FIELD(BRW_HW_BINDING_TABLE_OFF, 
> > BRW_HW_BINDING_TABLE_ENABLE) |
> > + brw->is_haswell ? HSW_HW_BINDING_TABLE_RESERVED : 0);
> > +   OUT_BATCH(0);
> > +   ADVANCE_BATCH();
> > +
> > +   /* Pipe control workaround */
> > +   brw_emit_pipe_control_flush(brw, PIPE_CONTROL_STATE_CACHE_INVALIDATE);
> > +}
> > +
> > +void
> > +gen7_enable_hw_binding_tables(struct brw_context *brw)
> > +{
> > +   if (!brw->has_resource_streamer) {
> > +  gen7_disable_hw_binding_tables(brw);
> 
> I started wondering why we really need this - RS is disabled by default and
> we haven't needed to do anything to disable it before.
> 
> > +  return;
> > +   }
> > +
> > +   if (!brw->hw_bt_pool.bo) {
> > +  /* From the BSpec, 3D Pipeline > Resource Streamer > Hardware 
> > Binding Tables:
> > +   *
> > +   *  "A maximum of 16,383 Binding tables are allowed in any batch 
> > buffer."
> > +   */
> > +  int max_size = 16383 * 4;
> 
> But does it really need this much all the time? I guess I need to go and
> read the spec.

I haven't read through the entire series but it seems that we can calculate
(at least for gather constants) pretty accurately how much we need space.
Could we do it also here based on the program data of all stages? I maybe
missing something and just throwing questions up in the air, so bare with me...
___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
http://lists.freedesktop.org/mailman/listinfo/mesa-dev


Re: [Mesa-dev] [PATCH 03/27] i965: Enable hardware-generated binding tables on render path.

2015-05-07 Thread Pohjolainen, Topi
On Thu, May 07, 2015 at 04:43:21PM +0300, Pohjolainen, Topi wrote:
> On Tue, Apr 28, 2015 at 11:08:00PM +0300, Abdiel Janulgue wrote:
> > This patch implements the binding table enable command which is also
> > used to allocate a binding table pool where hardware-generated
> > binding table entries are flushed into. Each binding table offset in
> > the binding table pool is unique per each shader stage that are
> > enabled within a batch.
> > 
> > Also insert the required brw_tracked_state objects to enable
> > hw-generated binding tables in normal render path.
> > 
> > Signed-off-by: Abdiel Janulgue 
> > ---
> >  src/mesa/drivers/dri/i965/brw_binding_tables.c | 70 
> > ++
> >  src/mesa/drivers/dri/i965/brw_context.c|  4 ++
> >  src/mesa/drivers/dri/i965/brw_context.h|  5 ++
> >  src/mesa/drivers/dri/i965/brw_state.h  |  7 +++
> >  src/mesa/drivers/dri/i965/brw_state_upload.c   |  2 +
> >  src/mesa/drivers/dri/i965/intel_batchbuffer.c  |  4 ++
> >  6 files changed, 92 insertions(+)
> > 
> > diff --git a/src/mesa/drivers/dri/i965/brw_binding_tables.c 
> > b/src/mesa/drivers/dri/i965/brw_binding_tables.c
> > index 459165a..a58e32e 100644
> > --- a/src/mesa/drivers/dri/i965/brw_binding_tables.c
> > +++ b/src/mesa/drivers/dri/i965/brw_binding_tables.c
> > @@ -44,6 +44,11 @@
> >  #include "brw_state.h"
> >  #include "intel_batchbuffer.h"
> >  
> > +/* Somehow the hw-binding table pool offset must start here, otherwise
> > + * the GPU will hang
> > + */
> > +#define HW_BT_START_OFFSET 256;
> 
> I think we want to understand this a little better before enabling...
> 
> > +
> >  /**
> >   * Upload a shader stage's binding table as indirect state.
> >   *
> > @@ -163,6 +168,71 @@ const struct brw_tracked_state brw_gs_binding_table = {
> > .emit = brw_gs_upload_binding_table,
> >  };
> >  
> > +/**
> > + * Hardware-generated binding tables for the resource streamer
> > + */
> > +void
> > +gen7_disable_hw_binding_tables(struct brw_context *brw)
> > +{
> > +   BEGIN_BATCH(3);
> > +   OUT_BATCH(_3DSTATE_BINDING_TABLE_POOL_ALLOC << 16 | (3 - 2));
> > +   OUT_BATCH(SET_FIELD(BRW_HW_BINDING_TABLE_OFF, 
> > BRW_HW_BINDING_TABLE_ENABLE) |
> > + brw->is_haswell ? HSW_HW_BINDING_TABLE_RESERVED : 0);
> > +   OUT_BATCH(0);
> > +   ADVANCE_BATCH();
> > +
> > +   /* Pipe control workaround */
> > +   brw_emit_pipe_control_flush(brw, PIPE_CONTROL_STATE_CACHE_INVALIDATE);
> > +}
> > +
> > +void
> > +gen7_enable_hw_binding_tables(struct brw_context *brw)
> > +{
> > +   if (!brw->has_resource_streamer) {
> > +  gen7_disable_hw_binding_tables(brw);
> 
> I started wondering why we really need this - RS is disabled by default and
> we haven't needed to do anything to disable it before.

Right, patch number eight gave me the answer, we want to disable it for blorp.
___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
http://lists.freedesktop.org/mailman/listinfo/mesa-dev


Re: [Mesa-dev] [PATCH 03/27] i965: Enable hardware-generated binding tables on render path.

2015-05-07 Thread Pohjolainen, Topi
On Tue, Apr 28, 2015 at 11:08:00PM +0300, Abdiel Janulgue wrote:
> This patch implements the binding table enable command which is also
> used to allocate a binding table pool where hardware-generated
> binding table entries are flushed into. Each binding table offset in
> the binding table pool is unique per each shader stage that are
> enabled within a batch.
> 
> Also insert the required brw_tracked_state objects to enable
> hw-generated binding tables in normal render path.
> 
> Signed-off-by: Abdiel Janulgue 
> ---
>  src/mesa/drivers/dri/i965/brw_binding_tables.c | 70 
> ++
>  src/mesa/drivers/dri/i965/brw_context.c|  4 ++
>  src/mesa/drivers/dri/i965/brw_context.h|  5 ++
>  src/mesa/drivers/dri/i965/brw_state.h  |  7 +++
>  src/mesa/drivers/dri/i965/brw_state_upload.c   |  2 +
>  src/mesa/drivers/dri/i965/intel_batchbuffer.c  |  4 ++
>  6 files changed, 92 insertions(+)
> 
> diff --git a/src/mesa/drivers/dri/i965/brw_binding_tables.c 
> b/src/mesa/drivers/dri/i965/brw_binding_tables.c
> index 459165a..a58e32e 100644
> --- a/src/mesa/drivers/dri/i965/brw_binding_tables.c
> +++ b/src/mesa/drivers/dri/i965/brw_binding_tables.c
> @@ -44,6 +44,11 @@
>  #include "brw_state.h"
>  #include "intel_batchbuffer.h"
>  
> +/* Somehow the hw-binding table pool offset must start here, otherwise
> + * the GPU will hang
> + */
> +#define HW_BT_START_OFFSET 256;

I think we want to understand this a little better before enabling...

> +
>  /**
>   * Upload a shader stage's binding table as indirect state.
>   *
> @@ -163,6 +168,71 @@ const struct brw_tracked_state brw_gs_binding_table = {
> .emit = brw_gs_upload_binding_table,
>  };
>  
> +/**
> + * Hardware-generated binding tables for the resource streamer
> + */
> +void
> +gen7_disable_hw_binding_tables(struct brw_context *brw)
> +{
> +   BEGIN_BATCH(3);
> +   OUT_BATCH(_3DSTATE_BINDING_TABLE_POOL_ALLOC << 16 | (3 - 2));
> +   OUT_BATCH(SET_FIELD(BRW_HW_BINDING_TABLE_OFF, 
> BRW_HW_BINDING_TABLE_ENABLE) |
> + brw->is_haswell ? HSW_HW_BINDING_TABLE_RESERVED : 0);
> +   OUT_BATCH(0);
> +   ADVANCE_BATCH();
> +
> +   /* Pipe control workaround */
> +   brw_emit_pipe_control_flush(brw, PIPE_CONTROL_STATE_CACHE_INVALIDATE);
> +}
> +
> +void
> +gen7_enable_hw_binding_tables(struct brw_context *brw)
> +{
> +   if (!brw->has_resource_streamer) {
> +  gen7_disable_hw_binding_tables(brw);

I started wondering why we really need this - RS is disabled by default and
we haven't needed to do anything to disable it before.

> +  return;
> +   }
> +
> +   if (!brw->hw_bt_pool.bo) {
> +  /* From the BSpec, 3D Pipeline > Resource Streamer > Hardware Binding 
> Tables:
> +   *
> +   *  "A maximum of 16,383 Binding tables are allowed in any batch 
> buffer."
> +   */
> +  int max_size = 16383 * 4;

But does it really need this much all the time? I guess I need to go and
read the spec.

> +  brw->hw_bt_pool.bo = drm_intel_bo_alloc(brw->bufmgr, "hw_bt",
> +  max_size, 64);
> +  brw->hw_bt_pool.next_offset = HW_BT_START_OFFSET;
> +   }
> +
> +   uint32_t dw1 = SET_FIELD(BRW_HW_BINDING_TABLE_ON, 
> BRW_HW_BINDING_TABLE_ENABLE);
> +   if (brw->is_haswell)
> +  dw1 |= SET_FIELD(GEN7_MOCS_L3, GEN7_HW_BT_MOCS) | 
> HSW_HW_BINDING_TABLE_RESERVED;

These are overflowing 80 columns.

> +
> +   BEGIN_BATCH(3);
> +   OUT_BATCH(_3DSTATE_BINDING_TABLE_POOL_ALLOC << 16 | (3 - 2));
> +   OUT_RELOC(brw->hw_bt_pool.bo, I915_GEM_DOMAIN_SAMPLER, 0, dw1);
> +   OUT_RELOC(brw->hw_bt_pool.bo, I915_GEM_DOMAIN_SAMPLER, 0,
> + brw->hw_bt_pool.bo->size);
> +   ADVANCE_BATCH();
> +
> +   /* Pipe control workaround */
> +   brw_emit_pipe_control_flush(brw, PIPE_CONTROL_STATE_CACHE_INVALIDATE);

Would you have a spec reference for this?

> +}
> +
> +void
> +gen7_reset_rs_pool_offsets(struct brw_context *brw)
> +{
> +   brw->hw_bt_pool.next_offset = HW_BT_START_OFFSET;
> +}
> +
> +const struct brw_tracked_state gen7_hw_binding_tables = {
> +   .dirty = {
> +  .mesa = 0,
> +  .brw = BRW_NEW_BATCH,
> +   },
> +   .emit = gen7_enable_hw_binding_tables
> +};
> +
>  /** @} */
>  
>  /**
> diff --git a/src/mesa/drivers/dri/i965/brw_context.c 
> b/src/mesa/drivers/dri/i965/brw_context.c
> index c7e1e81..9c7ccae 100644
> --- a/src/mesa/drivers/dri/i965/brw_context.c
> +++ b/src/mesa/drivers/dri/i965/brw_context.c
> @@ -953,6 +953,10 @@ intelDestroyContext(__DRIcontext * driContextPriv)
> if (brw->wm.base.scratch_bo)
>drm_intel_bo_unreference(brw->wm.base.scratch_bo);
>  
> +   gen7_reset_rs_pool_offsets(brw);
> +   drm_intel_bo_unreference(brw->hw_bt_pool.bo);
> +   brw->hw_bt_pool.bo = NULL;
> +
> drm_intel_gem_context_destroy(brw->hw_ctx);
>  
> if (ctx->swrast_context) {
> diff --git a/src/mesa/drivers/dri/i965/brw_context.h 
> b/src/mesa/drivers/dri/i965/brw_context.h
> index 07626af..1c72b74 100644
> --- a/src/mesa/drivers/dri/i965/b

[Mesa-dev] [PATCH 03/27] i965: Enable hardware-generated binding tables on render path.

2015-04-28 Thread Abdiel Janulgue
This patch implements the binding table enable command which is also
used to allocate a binding table pool where hardware-generated
binding table entries are flushed into. Each binding table offset in
the binding table pool is unique per each shader stage that are
enabled within a batch.

Also insert the required brw_tracked_state objects to enable
hw-generated binding tables in normal render path.

Signed-off-by: Abdiel Janulgue 
---
 src/mesa/drivers/dri/i965/brw_binding_tables.c | 70 ++
 src/mesa/drivers/dri/i965/brw_context.c|  4 ++
 src/mesa/drivers/dri/i965/brw_context.h|  5 ++
 src/mesa/drivers/dri/i965/brw_state.h  |  7 +++
 src/mesa/drivers/dri/i965/brw_state_upload.c   |  2 +
 src/mesa/drivers/dri/i965/intel_batchbuffer.c  |  4 ++
 6 files changed, 92 insertions(+)

diff --git a/src/mesa/drivers/dri/i965/brw_binding_tables.c 
b/src/mesa/drivers/dri/i965/brw_binding_tables.c
index 459165a..a58e32e 100644
--- a/src/mesa/drivers/dri/i965/brw_binding_tables.c
+++ b/src/mesa/drivers/dri/i965/brw_binding_tables.c
@@ -44,6 +44,11 @@
 #include "brw_state.h"
 #include "intel_batchbuffer.h"
 
+/* Somehow the hw-binding table pool offset must start here, otherwise
+ * the GPU will hang
+ */
+#define HW_BT_START_OFFSET 256;
+
 /**
  * Upload a shader stage's binding table as indirect state.
  *
@@ -163,6 +168,71 @@ const struct brw_tracked_state brw_gs_binding_table = {
.emit = brw_gs_upload_binding_table,
 };
 
+/**
+ * Hardware-generated binding tables for the resource streamer
+ */
+void
+gen7_disable_hw_binding_tables(struct brw_context *brw)
+{
+   BEGIN_BATCH(3);
+   OUT_BATCH(_3DSTATE_BINDING_TABLE_POOL_ALLOC << 16 | (3 - 2));
+   OUT_BATCH(SET_FIELD(BRW_HW_BINDING_TABLE_OFF, BRW_HW_BINDING_TABLE_ENABLE) |
+ brw->is_haswell ? HSW_HW_BINDING_TABLE_RESERVED : 0);
+   OUT_BATCH(0);
+   ADVANCE_BATCH();
+
+   /* Pipe control workaround */
+   brw_emit_pipe_control_flush(brw, PIPE_CONTROL_STATE_CACHE_INVALIDATE);
+}
+
+void
+gen7_enable_hw_binding_tables(struct brw_context *brw)
+{
+   if (!brw->has_resource_streamer) {
+  gen7_disable_hw_binding_tables(brw);
+  return;
+   }
+
+   if (!brw->hw_bt_pool.bo) {
+  /* From the BSpec, 3D Pipeline > Resource Streamer > Hardware Binding 
Tables:
+   *
+   *  "A maximum of 16,383 Binding tables are allowed in any batch buffer."
+   */
+  int max_size = 16383 * 4;
+  brw->hw_bt_pool.bo = drm_intel_bo_alloc(brw->bufmgr, "hw_bt",
+  max_size, 64);
+  brw->hw_bt_pool.next_offset = HW_BT_START_OFFSET;
+   }
+
+   uint32_t dw1 = SET_FIELD(BRW_HW_BINDING_TABLE_ON, 
BRW_HW_BINDING_TABLE_ENABLE);
+   if (brw->is_haswell)
+  dw1 |= SET_FIELD(GEN7_MOCS_L3, GEN7_HW_BT_MOCS) | 
HSW_HW_BINDING_TABLE_RESERVED;
+
+   BEGIN_BATCH(3);
+   OUT_BATCH(_3DSTATE_BINDING_TABLE_POOL_ALLOC << 16 | (3 - 2));
+   OUT_RELOC(brw->hw_bt_pool.bo, I915_GEM_DOMAIN_SAMPLER, 0, dw1);
+   OUT_RELOC(brw->hw_bt_pool.bo, I915_GEM_DOMAIN_SAMPLER, 0,
+ brw->hw_bt_pool.bo->size);
+   ADVANCE_BATCH();
+
+   /* Pipe control workaround */
+   brw_emit_pipe_control_flush(brw, PIPE_CONTROL_STATE_CACHE_INVALIDATE);
+}
+
+void
+gen7_reset_rs_pool_offsets(struct brw_context *brw)
+{
+   brw->hw_bt_pool.next_offset = HW_BT_START_OFFSET;
+}
+
+const struct brw_tracked_state gen7_hw_binding_tables = {
+   .dirty = {
+  .mesa = 0,
+  .brw = BRW_NEW_BATCH,
+   },
+   .emit = gen7_enable_hw_binding_tables
+};
+
 /** @} */
 
 /**
diff --git a/src/mesa/drivers/dri/i965/brw_context.c 
b/src/mesa/drivers/dri/i965/brw_context.c
index c7e1e81..9c7ccae 100644
--- a/src/mesa/drivers/dri/i965/brw_context.c
+++ b/src/mesa/drivers/dri/i965/brw_context.c
@@ -953,6 +953,10 @@ intelDestroyContext(__DRIcontext * driContextPriv)
if (brw->wm.base.scratch_bo)
   drm_intel_bo_unreference(brw->wm.base.scratch_bo);
 
+   gen7_reset_rs_pool_offsets(brw);
+   drm_intel_bo_unreference(brw->hw_bt_pool.bo);
+   brw->hw_bt_pool.bo = NULL;
+
drm_intel_gem_context_destroy(brw->hw_ctx);
 
if (ctx->swrast_context) {
diff --git a/src/mesa/drivers/dri/i965/brw_context.h 
b/src/mesa/drivers/dri/i965/brw_context.h
index 07626af..1c72b74 100644
--- a/src/mesa/drivers/dri/i965/brw_context.h
+++ b/src/mesa/drivers/dri/i965/brw_context.h
@@ -1360,6 +1360,11 @@ struct brw_context
   uint32_t fast_clear_op;
} wm;
 
+   /* RS hardware binding table */
+   struct {
+  drm_intel_bo *bo;
+  uint32_t next_offset;
+   } hw_bt_pool;
 
struct {
   uint32_t state_offset;
diff --git a/src/mesa/drivers/dri/i965/brw_state.h 
b/src/mesa/drivers/dri/i965/brw_state.h
index cfa67b6..d882bdd 100644
--- a/src/mesa/drivers/dri/i965/brw_state.h
+++ b/src/mesa/drivers/dri/i965/brw_state.h
@@ -130,6 +130,7 @@ extern const struct brw_tracked_state gen7_sol_state;
 extern const struct brw_tracked_state gen7_urb;
 extern const struct brw_tracked_state gen7_vs_state;
 extern const