Re: [Mesa-dev] [PATCH 08/22] i965/gs: Allocate URB space for use by GS.

2013-08-31 Thread Paul Berry
On 29 August 2013 19:37, Chad Versace  wrote:

> On 08/26/2013 03:12 PM, Paul Berry wrote:
>
>  +   /* VS always requires at least 32 URB entries */
>>
>
> Haswell GT2 and GT3 require at least 64 URB entries.


Good catch.  Thank you double checking me on that.

I'll make a follow-up patch that adds brw->urb.min_vs_entries, populates it
correctly at context creation time, and uses it when programming the URB.


>
>
>  +   unsigned vs_chunks =
>> +  ALIGN(32 * vs_entry_size_bytes, chunk_size_bytes) /
>> chunk_size_bytes;
>> +   unsigned vs_wants =
>> +  ALIGN(brw->urb.max_vs_entries * vs_entry_size_bytes,
>> +chunk_size_bytes) / chunk_size_bytes - vs_chunks;
>>
>
___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
http://lists.freedesktop.org/mailman/listinfo/mesa-dev


Re: [Mesa-dev] [PATCH 08/22] i965/gs: Allocate URB space for use by GS.

2013-08-29 Thread Chad Versace

On 08/26/2013 03:12 PM, Paul Berry wrote:


+   /* VS always requires at least 32 URB entries */


Haswell GT2 and GT3 require at least 64 URB entries.


+   unsigned vs_chunks =
+  ALIGN(32 * vs_entry_size_bytes, chunk_size_bytes) / chunk_size_bytes;
+   unsigned vs_wants =
+  ALIGN(brw->urb.max_vs_entries * vs_entry_size_bytes,
+chunk_size_bytes) / chunk_size_bytes - vs_chunks;

___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
http://lists.freedesktop.org/mailman/listinfo/mesa-dev


Re: [Mesa-dev] [PATCH 08/22] i965/gs: Allocate URB space for use by GS.

2013-08-29 Thread Paul Berry
On 28 August 2013 21:00, Kenneth Graunke  wrote:

> On 08/26/2013 03:12 PM, Paul Berry wrote:
>
>> Previously, we gave all of the URB space (other than the small amount
>> that is used for push constants) to the vertex shader.  However, when
>> a geometry shader is active, we need to divide it up between the
>> vertex and geometry shaders.
>>
>> The size of the URB entries for the vertex and geometry shaders can
>> vary dramatically from one shader to the next.  So it doesn't make
>> sense to simply split the available space in two.  In particular:
>>
>> - On Ivy Bridge GT1, this would not leave enough space for the worst
>>case geometry shader, which requires 64k of URB space.
>>
>> - Due to hardware-imposed limits on the maximum number of URB entries,
>>sometimes a given shader stage will only be capable of using a small
>>amount of URB space.  When this happens, it may make sense to
>>allocate substantially less than half of the available space to that
>>stage.
>>
>> Our algorithm for dividing space between the two stages is to first
>> compute (a) the minimum amount of URB space that each stage needs in
>> order to function properly, and (b) the amount of additional URB space
>> that each stage "wants" (i.e. that it would be capable of making use
>> of).  If the total amount of space available is not enough to satisfy
>> needs + wants, then each stage's "wants" amount is scaled back by the
>> same factor in order to fit.
>>
>> When only a vertex shader is active, this algorithm produces
>> equivalent results to the old algorithm (if the vertex shader stage
>> can make use of all the available URB space, we assign all the space
>> to it; if it can't, we let it use as much as it can).
>>
>> In the future, when we need to support tessellation control and
>> tessellation evaluation pipeline stages, it should be straightforward
>> to expand this algorithm to cover them.
>>
>> v2: Use "unsigned" rather than "GLuint".
>> ---
>>   src/mesa/drivers/dri/i965/brw_**context.h  |   6 +-
>>   src/mesa/drivers/dri/i965/**gen7_blorp.cpp |  16 ++--
>>   src/mesa/drivers/dri/i965/**gen7_urb.c | 155
>> +-**-
>>   3 files changed, 142 insertions(+), 35 deletions(-)
>>
>> diff --git a/src/mesa/drivers/dri/i965/**brw_context.h
>> b/src/mesa/drivers/dri/i965/**brw_context.h
>> index be5175f..77f2a6b 100644
>> --- a/src/mesa/drivers/dri/i965/**brw_context.h
>> +++ b/src/mesa/drivers/dri/i965/**brw_context.h
>> @@ -1511,8 +1511,10 @@ void
>>   gen7_allocate_push_constants(**struct brw_context *brw);
>>
>>   void
>> -gen7_emit_urb_state(struct brw_context *brw, GLuint nr_vs_entries,
>> -GLuint vs_size, GLuint vs_start);
>> +gen7_emit_urb_state(struct brw_context *brw,
>> +unsigned nr_vs_entries, unsigned vs_size,
>> +unsigned vs_start, unsigned nr_gs_entries,
>> +unsigned gs_size, unsigned gs_start);
>>
>>
>>
>> diff --git a/src/mesa/drivers/dri/i965/**gen7_blorp.cpp
>> b/src/mesa/drivers/dri/i965/**gen7_blorp.cpp
>> index a387836..6c798b1 100644
>> --- a/src/mesa/drivers/dri/i965/**gen7_blorp.cpp
>> +++ b/src/mesa/drivers/dri/i965/**gen7_blorp.cpp
>> @@ -51,14 +51,16 @@ static void
>>   gen7_blorp_emit_urb_config(**struct brw_context *brw,
>>  const brw_blorp_params *params)
>>   {
>> -   /* The minimum valid value is 32. See 3DSTATE_URB_VS,
>> -* Dword 1.15:0 "VS Number of URB Entries".
>> +   /* The minimum valid number of VS entries is 32. See 3DSTATE_URB_VS,
>> Dword
>> +* 1.15:0 "VS Number of URB Entries".
>>   */
>> -   int num_vs_entries = 32;
>> -   int vs_size = 2;
>> -   int vs_start = 2; /* skip over push constants */
>> -
>> -   gen7_emit_urb_state(brw, num_vs_entries, vs_size, vs_start);
>> +   gen7_emit_urb_state(brw,
>> +   32 /* num_vs_entries */,
>> +   2 /* vs_size */,
>> +   2 /* vs_start */,
>> +   0 /* num_gs_entries */,
>> +   1 /* gs_size */,
>> +   2 /* gs_start */);
>>   }
>>
>>
>> diff --git a/src/mesa/drivers/dri/i965/**gen7_urb.c
>> b/src/mesa/drivers/dri/i965/**gen7_urb.c
>> index 927af37..2d10cc12 100644
>> --- a/src/mesa/drivers/dri/i965/**gen7_urb.c
>> +++ b/src/mesa/drivers/dri/i965/**gen7_urb.c
>> @@ -74,34 +74,136 @@ gen7_upload_urb(struct brw_context *brw)
>>   {
>>  const int push_size_kB = brw->is_haswell && brw->gt == 3 ? 32 : 16;
>>
>> -   /* Total space for entries is URB size - 16kB for push constants */
>> -   int handle_region_size = (brw->urb.size - push_size_kB) * 1024; /*
>> bytes */
>> -
>>  /* CACHE_NEW_VS_PROG */
>>  unsigned vs_size = MAX2(brw->vs.prog_data->base.**urb_entry_size,
>> 1);
>> -
>> -   int nr_vs_entries = handle_region_size / (vs_size * 64);
>> -   if (nr_vs_entries > brw->urb.max_vs_entries)
>> -  nr_vs_entries = brw->urb.max_vs_entries;
>> -
>> -   /* Accordi

Re: [Mesa-dev] [PATCH 08/22] i965/gs: Allocate URB space for use by GS.

2013-08-28 Thread Kenneth Graunke

On 08/26/2013 03:12 PM, Paul Berry wrote:

Previously, we gave all of the URB space (other than the small amount
that is used for push constants) to the vertex shader.  However, when
a geometry shader is active, we need to divide it up between the
vertex and geometry shaders.

The size of the URB entries for the vertex and geometry shaders can
vary dramatically from one shader to the next.  So it doesn't make
sense to simply split the available space in two.  In particular:

- On Ivy Bridge GT1, this would not leave enough space for the worst
   case geometry shader, which requires 64k of URB space.

- Due to hardware-imposed limits on the maximum number of URB entries,
   sometimes a given shader stage will only be capable of using a small
   amount of URB space.  When this happens, it may make sense to
   allocate substantially less than half of the available space to that
   stage.

Our algorithm for dividing space between the two stages is to first
compute (a) the minimum amount of URB space that each stage needs in
order to function properly, and (b) the amount of additional URB space
that each stage "wants" (i.e. that it would be capable of making use
of).  If the total amount of space available is not enough to satisfy
needs + wants, then each stage's "wants" amount is scaled back by the
same factor in order to fit.

When only a vertex shader is active, this algorithm produces
equivalent results to the old algorithm (if the vertex shader stage
can make use of all the available URB space, we assign all the space
to it; if it can't, we let it use as much as it can).

In the future, when we need to support tessellation control and
tessellation evaluation pipeline stages, it should be straightforward
to expand this algorithm to cover them.

v2: Use "unsigned" rather than "GLuint".
---
  src/mesa/drivers/dri/i965/brw_context.h  |   6 +-
  src/mesa/drivers/dri/i965/gen7_blorp.cpp |  16 ++--
  src/mesa/drivers/dri/i965/gen7_urb.c | 155 +--
  3 files changed, 142 insertions(+), 35 deletions(-)

diff --git a/src/mesa/drivers/dri/i965/brw_context.h 
b/src/mesa/drivers/dri/i965/brw_context.h
index be5175f..77f2a6b 100644
--- a/src/mesa/drivers/dri/i965/brw_context.h
+++ b/src/mesa/drivers/dri/i965/brw_context.h
@@ -1511,8 +1511,10 @@ void
  gen7_allocate_push_constants(struct brw_context *brw);

  void
-gen7_emit_urb_state(struct brw_context *brw, GLuint nr_vs_entries,
-GLuint vs_size, GLuint vs_start);
+gen7_emit_urb_state(struct brw_context *brw,
+unsigned nr_vs_entries, unsigned vs_size,
+unsigned vs_start, unsigned nr_gs_entries,
+unsigned gs_size, unsigned gs_start);



diff --git a/src/mesa/drivers/dri/i965/gen7_blorp.cpp 
b/src/mesa/drivers/dri/i965/gen7_blorp.cpp
index a387836..6c798b1 100644
--- a/src/mesa/drivers/dri/i965/gen7_blorp.cpp
+++ b/src/mesa/drivers/dri/i965/gen7_blorp.cpp
@@ -51,14 +51,16 @@ static void
  gen7_blorp_emit_urb_config(struct brw_context *brw,
 const brw_blorp_params *params)
  {
-   /* The minimum valid value is 32. See 3DSTATE_URB_VS,
-* Dword 1.15:0 "VS Number of URB Entries".
+   /* The minimum valid number of VS entries is 32. See 3DSTATE_URB_VS, Dword
+* 1.15:0 "VS Number of URB Entries".
  */
-   int num_vs_entries = 32;
-   int vs_size = 2;
-   int vs_start = 2; /* skip over push constants */
-
-   gen7_emit_urb_state(brw, num_vs_entries, vs_size, vs_start);
+   gen7_emit_urb_state(brw,
+   32 /* num_vs_entries */,
+   2 /* vs_size */,
+   2 /* vs_start */,
+   0 /* num_gs_entries */,
+   1 /* gs_size */,
+   2 /* gs_start */);
  }


diff --git a/src/mesa/drivers/dri/i965/gen7_urb.c 
b/src/mesa/drivers/dri/i965/gen7_urb.c
index 927af37..2d10cc12 100644
--- a/src/mesa/drivers/dri/i965/gen7_urb.c
+++ b/src/mesa/drivers/dri/i965/gen7_urb.c
@@ -74,34 +74,136 @@ gen7_upload_urb(struct brw_context *brw)
  {
 const int push_size_kB = brw->is_haswell && brw->gt == 3 ? 32 : 16;

-   /* Total space for entries is URB size - 16kB for push constants */
-   int handle_region_size = (brw->urb.size - push_size_kB) * 1024; /* bytes */
-
 /* CACHE_NEW_VS_PROG */
 unsigned vs_size = MAX2(brw->vs.prog_data->base.urb_entry_size, 1);
-
-   int nr_vs_entries = handle_region_size / (vs_size * 64);
-   if (nr_vs_entries > brw->urb.max_vs_entries)
-  nr_vs_entries = brw->urb.max_vs_entries;
-
-   /* According to volume 2a, nr_vs_entries must be a multiple of 8. */
-   brw->urb.nr_vs_entries = ROUND_DOWN_TO(nr_vs_entries, 8);
-
-   /* URB Starting Addresses are specified in multiples of 8kB. */
-   brw->urb.vs_start = push_size_kB / 8; /* skip over push constants */
-
-   assert(brw->urb.nr_vs_entries % 8 == 0);
-   assert(brw->urb.nr_gs_entries % 8 == 0);
-   /* GS requirement */
-   assert(!brw->ff_gs.prog_ac