[Mesa-dev] [RFC 2/3] u_vbuf: add logic to use a limited number of vbufs

2016-06-11 Thread Christian Gmeiner
From: "Wladimir J. van der Laan" 

Make it possible to limit the number of vertex buffers as there exist
GPUs with less then 32 supported vertex buffers.

Signed-off-by: Wladimir J. van der Laan 
---
 src/gallium/auxiliary/util/u_vbuf.c | 45 +++--
 src/gallium/auxiliary/util/u_vbuf.h |  3 +++
 2 files changed, 41 insertions(+), 7 deletions(-)

diff --git a/src/gallium/auxiliary/util/u_vbuf.c 
b/src/gallium/auxiliary/util/u_vbuf.c
index 5b4e527..464c279 100644
--- a/src/gallium/auxiliary/util/u_vbuf.c
+++ b/src/gallium/auxiliary/util/u_vbuf.c
@@ -184,6 +184,8 @@ struct u_vbuf {
uint32_t incompatible_vb_mask; /* each bit describes a corresp. buffer */
/* Which buffer has a non-zero stride. */
uint32_t nonzero_stride_vb_mask; /* each bit describes a corresp. buffer */
+   /* Which buffers are allowed (supported by hardware). */
+   uint32_t allowed_vb_mask;
 };
 
 static void *
@@ -291,10 +293,14 @@ boolean u_vbuf_get_caps(struct pipe_screen *screen, 
struct u_vbuf_caps *caps)
caps->user_vertex_buffers =
   screen->get_param(screen, PIPE_CAP_USER_VERTEX_BUFFERS);
 
+   caps->max_vertex_buffers =
+  screen->get_param(screen, PIPE_CAP_MAX_VERTEX_BUFFERS);
+
if (!caps->buffer_offset_unaligned ||
!caps->buffer_stride_unaligned ||
!caps->velem_src_offset_unaligned ||
-   !caps->user_vertex_buffers) {
+   !caps->user_vertex_buffers ||
+   !caps->max_vertex_buffers) {
   fallback = TRUE;
}
 
@@ -313,6 +319,7 @@ u_vbuf_create(struct pipe_context *pipe,
mgr->cso_cache = cso_cache_create();
mgr->translate_cache = translate_cache_create();
memset(mgr->fallback_vbs, ~0, sizeof(mgr->fallback_vbs));
+   mgr->allowed_vb_mask = (1 << mgr->caps.max_vertex_buffers) - 1;
 
mgr->uploader = u_upload_create(pipe, 1024 * 1024,
PIPE_BIND_VERTEX_BUFFER,
@@ -523,14 +530,15 @@ u_vbuf_translate_buffers(struct u_vbuf *mgr, struct 
translate_key *key,
 
 static boolean
 u_vbuf_translate_find_free_vb_slots(struct u_vbuf *mgr,
-unsigned mask[VB_NUM])
+unsigned mask[VB_NUM],
+unsigned extra_free_vb_mask)
 {
unsigned type;
unsigned fallback_vbs[VB_NUM];
/* Set the bit for each buffer which is incompatible, or isn't set. */
uint32_t unused_vb_mask =
-  mgr->ve->incompatible_vb_mask_all | mgr->incompatible_vb_mask |
-  ~mgr->enabled_vb_mask;
+  (mgr->ve->incompatible_vb_mask_all | mgr->incompatible_vb_mask |
+  ~mgr->enabled_vb_mask | extra_free_vb_mask) & mgr->allowed_vb_mask;
 
memset(fallback_vbs, ~0, sizeof(fallback_vbs));
 
@@ -573,6 +581,7 @@ u_vbuf_translate_begin(struct u_vbuf *mgr,
unsigned i, type;
unsigned incompatible_vb_mask = mgr->incompatible_vb_mask &
mgr->ve->used_vb_mask;
+   unsigned extra_free_vb_mask = 0;
 
int start[VB_NUM] = {
   start_vertex, /* VERTEX */
@@ -618,8 +627,15 @@ u_vbuf_translate_begin(struct u_vbuf *mgr,
 
assert(mask[VB_VERTEX] || mask[VB_INSTANCE] || mask[VB_CONST]);
 
+   /* In the case of unroll_indices, we can regard all non-constant
+* vertex buffers with only non-instance vertex elements as incompatible
+* and thus free.
+*/
+   if (unroll_indices)
+   extra_free_vb_mask = mask[VB_VERTEX] & ~mask[VB_INSTANCE];
+
/* Find free vertex buffer slots. */
-   if (!u_vbuf_translate_find_free_vb_slots(mgr, mask)) {
+   if (!u_vbuf_translate_find_free_vb_slots(mgr, mask, extra_free_vb_mask)) {
   return FALSE;
}
 
@@ -778,6 +794,17 @@ u_vbuf_create_vertex_elements(struct u_vbuf *mgr, unsigned 
count,
   }
}
 
+   if (used_buffers & ~mgr->allowed_vb_mask) {
+  /* More vertex buffers are used than the hardware supports.  In
+   * principle, we only need to make sure that less vertex buffers are
+   * used, and mark some of the latter vertex buffers as incompatible.
+   * For now, mark all vertex buffers as incompatible.
+   */
+  ve->incompatible_vb_mask_any = used_buffers;
+  ve->compatible_vb_mask_any = 0;
+  ve->incompatible_elem_mask = (1 << count) - 1;
+   }
+
ve->used_vb_mask = used_buffers;
ve->compatible_vb_mask_all = ~ve->incompatible_vb_mask_any & used_buffers;
ve->incompatible_vb_mask_all = ~ve->compatible_vb_mask_any & used_buffers;
@@ -790,8 +817,12 @@ u_vbuf_create_vertex_elements(struct u_vbuf *mgr, unsigned 
count,
   }
}
 
-   ve->driver_cso =
-  pipe->create_vertex_elements_state(pipe, count, driver_attribs);
+   /* Only create driver CSO if no incompatible elements */
+   if (!ve->incompatible_elem_mask) {
+  ve->driver_cso =
+ pipe->create_vertex_elements_state(pipe, count, driver_attribs);
+   }
+
return ve;
 }
 
diff --git a/src/gallium/auxiliary/util/u_vbuf.h 
b/src/gallium/auxiliary/util/u_vbuf.h
index 9e8b135..9ff9938 100644
--- a/src/g

Re: [Mesa-dev] [RFC 2/3] u_vbuf: add logic to use a limited number of vbufs

2016-06-13 Thread Nicolai Hähnle

On 11.06.2016 21:21, Christian Gmeiner wrote:

From: "Wladimir J. van der Laan" 

Make it possible to limit the number of vertex buffers as there exist
GPUs with less then 32 supported vertex buffers.

Signed-off-by: Wladimir J. van der Laan 
---
  src/gallium/auxiliary/util/u_vbuf.c | 45 +++--
  src/gallium/auxiliary/util/u_vbuf.h |  3 +++
  2 files changed, 41 insertions(+), 7 deletions(-)

diff --git a/src/gallium/auxiliary/util/u_vbuf.c 
b/src/gallium/auxiliary/util/u_vbuf.c
index 5b4e527..464c279 100644
--- a/src/gallium/auxiliary/util/u_vbuf.c
+++ b/src/gallium/auxiliary/util/u_vbuf.c
@@ -184,6 +184,8 @@ struct u_vbuf {
 uint32_t incompatible_vb_mask; /* each bit describes a corresp. buffer */
 /* Which buffer has a non-zero stride. */
 uint32_t nonzero_stride_vb_mask; /* each bit describes a corresp. buffer */
+   /* Which buffers are allowed (supported by hardware). */
+   uint32_t allowed_vb_mask;
  };

  static void *
@@ -291,10 +293,14 @@ boolean u_vbuf_get_caps(struct pipe_screen *screen, 
struct u_vbuf_caps *caps)
 caps->user_vertex_buffers =
screen->get_param(screen, PIPE_CAP_USER_VERTEX_BUFFERS);

+   caps->max_vertex_buffers =
+  screen->get_param(screen, PIPE_CAP_MAX_VERTEX_BUFFERS);
+
 if (!caps->buffer_offset_unaligned ||
 !caps->buffer_stride_unaligned ||
 !caps->velem_src_offset_unaligned ||
-   !caps->user_vertex_buffers) {
+   !caps->user_vertex_buffers ||
+   !caps->max_vertex_buffers) {
fallback = TRUE;
 }

@@ -313,6 +319,7 @@ u_vbuf_create(struct pipe_context *pipe,
 mgr->cso_cache = cso_cache_create();
 mgr->translate_cache = translate_cache_create();
 memset(mgr->fallback_vbs, ~0, sizeof(mgr->fallback_vbs));
+   mgr->allowed_vb_mask = (1 << mgr->caps.max_vertex_buffers) - 1;


This is undefined when max_vertex_buffers is 31 or 32. You can use 
u_bit_consecutive.




 mgr->uploader = u_upload_create(pipe, 1024 * 1024,
 PIPE_BIND_VERTEX_BUFFER,
@@ -523,14 +530,15 @@ u_vbuf_translate_buffers(struct u_vbuf *mgr, struct 
translate_key *key,

  static boolean
  u_vbuf_translate_find_free_vb_slots(struct u_vbuf *mgr,
-unsigned mask[VB_NUM])
+unsigned mask[VB_NUM],
+unsigned extra_free_vb_mask)
  {
 unsigned type;
 unsigned fallback_vbs[VB_NUM];
 /* Set the bit for each buffer which is incompatible, or isn't set. */
 uint32_t unused_vb_mask =
-  mgr->ve->incompatible_vb_mask_all | mgr->incompatible_vb_mask |
-  ~mgr->enabled_vb_mask;
+  (mgr->ve->incompatible_vb_mask_all | mgr->incompatible_vb_mask |
+  ~mgr->enabled_vb_mask | extra_free_vb_mask) & mgr->allowed_vb_mask;

 memset(fallback_vbs, ~0, sizeof(fallback_vbs));

@@ -573,6 +581,7 @@ u_vbuf_translate_begin(struct u_vbuf *mgr,
 unsigned i, type;
 unsigned incompatible_vb_mask = mgr->incompatible_vb_mask &
 mgr->ve->used_vb_mask;
+   unsigned extra_free_vb_mask = 0;

 int start[VB_NUM] = {
start_vertex, /* VERTEX */
@@ -618,8 +627,15 @@ u_vbuf_translate_begin(struct u_vbuf *mgr,

 assert(mask[VB_VERTEX] || mask[VB_INSTANCE] || mask[VB_CONST]);

+   /* In the case of unroll_indices, we can regard all non-constant
+* vertex buffers with only non-instance vertex elements as incompatible
+* and thus free.
+*/
+   if (unroll_indices)
+   extra_free_vb_mask = mask[VB_VERTEX] & ~mask[VB_INSTANCE];
+
 /* Find free vertex buffer slots. */
-   if (!u_vbuf_translate_find_free_vb_slots(mgr, mask)) {
+   if (!u_vbuf_translate_find_free_vb_slots(mgr, mask, extra_free_vb_mask)) {
return FALSE;
 }


This logic of using extra space in case of unroll_indices looks 
unrelated to the caps->max_vertex_buffers stuff, can you put it into a 
separate patch?




@@ -778,6 +794,17 @@ u_vbuf_create_vertex_elements(struct u_vbuf *mgr, unsigned 
count,
}
 }

+   if (used_buffers & ~mgr->allowed_vb_mask) {
+  /* More vertex buffers are used than the hardware supports.  In
+   * principle, we only need to make sure that less vertex buffers are
+   * used, and mark some of the latter vertex buffers as incompatible.
+   * For now, mark all vertex buffers as incompatible.
+   */
+  ve->incompatible_vb_mask_any = used_buffers;
+  ve->compatible_vb_mask_any = 0;
+  ve->incompatible_elem_mask = (1 << count) - 1;
+   }
+
 ve->used_vb_mask = used_buffers;
 ve->compatible_vb_mask_all = ~ve->incompatible_vb_mask_any & used_buffers;
 ve->incompatible_vb_mask_all = ~ve->compatible_vb_mask_any & used_buffers;
@@ -790,8 +817,12 @@ u_vbuf_create_vertex_elements(struct u_vbuf *mgr, unsigned 
count,
}
 }

-   ve->driver_cso =
-  pipe->create_vertex_elements_state(pipe, count, driver_attribs);
+   /* Only create driver C

Re: [Mesa-dev] [RFC 2/3] u_vbuf: add logic to use a limited number of vbufs

2016-06-13 Thread Marek Olšák
On Sat, Jun 11, 2016 at 9:21 PM, Christian Gmeiner
 wrote:
> From: "Wladimir J. van der Laan" 
>
> Make it possible to limit the number of vertex buffers as there exist
> GPUs with less then 32 supported vertex buffers.
>
> Signed-off-by: Wladimir J. van der Laan 
> ---
>  src/gallium/auxiliary/util/u_vbuf.c | 45 
> +++--
>  src/gallium/auxiliary/util/u_vbuf.h |  3 +++
>  2 files changed, 41 insertions(+), 7 deletions(-)
>
> diff --git a/src/gallium/auxiliary/util/u_vbuf.c 
> b/src/gallium/auxiliary/util/u_vbuf.c
> index 5b4e527..464c279 100644
> --- a/src/gallium/auxiliary/util/u_vbuf.c
> +++ b/src/gallium/auxiliary/util/u_vbuf.c
> @@ -184,6 +184,8 @@ struct u_vbuf {
> uint32_t incompatible_vb_mask; /* each bit describes a corresp. buffer */
> /* Which buffer has a non-zero stride. */
> uint32_t nonzero_stride_vb_mask; /* each bit describes a corresp. buffer 
> */
> +   /* Which buffers are allowed (supported by hardware). */
> +   uint32_t allowed_vb_mask;
>  };
>
>  static void *
> @@ -291,10 +293,14 @@ boolean u_vbuf_get_caps(struct pipe_screen *screen, 
> struct u_vbuf_caps *caps)
> caps->user_vertex_buffers =
>screen->get_param(screen, PIPE_CAP_USER_VERTEX_BUFFERS);
>
> +   caps->max_vertex_buffers =
> +  screen->get_param(screen, PIPE_CAP_MAX_VERTEX_BUFFERS);
> +
> if (!caps->buffer_offset_unaligned ||
> !caps->buffer_stride_unaligned ||
> !caps->velem_src_offset_unaligned ||
> -   !caps->user_vertex_buffers) {
> +   !caps->user_vertex_buffers ||
> +   !caps->max_vertex_buffers) {

When is max_vertex_buffers zero, really?

Marek
___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/mesa-dev


Re: [Mesa-dev] [RFC 2/3] u_vbuf: add logic to use a limited number of vbufs

2016-06-14 Thread Christian Gmeiner
Hi Nicolai,

2016-06-13 11:48 GMT+02:00 Nicolai Hähnle :
> On 11.06.2016 21:21, Christian Gmeiner wrote:
>>
>> From: "Wladimir J. van der Laan" 
>>
>> Make it possible to limit the number of vertex buffers as there exist
>> GPUs with less then 32 supported vertex buffers.
>>
>> Signed-off-by: Wladimir J. van der Laan 
>> ---
>>   src/gallium/auxiliary/util/u_vbuf.c | 45
>> +++--
>>   src/gallium/auxiliary/util/u_vbuf.h |  3 +++
>>   2 files changed, 41 insertions(+), 7 deletions(-)
>>
>> diff --git a/src/gallium/auxiliary/util/u_vbuf.c
>> b/src/gallium/auxiliary/util/u_vbuf.c
>> index 5b4e527..464c279 100644
>> --- a/src/gallium/auxiliary/util/u_vbuf.c
>> +++ b/src/gallium/auxiliary/util/u_vbuf.c
>> @@ -184,6 +184,8 @@ struct u_vbuf {
>>  uint32_t incompatible_vb_mask; /* each bit describes a corresp.
>> buffer */
>>  /* Which buffer has a non-zero stride. */
>>  uint32_t nonzero_stride_vb_mask; /* each bit describes a corresp.
>> buffer */
>> +   /* Which buffers are allowed (supported by hardware). */
>> +   uint32_t allowed_vb_mask;
>>   };
>>
>>   static void *
>> @@ -291,10 +293,14 @@ boolean u_vbuf_get_caps(struct pipe_screen *screen,
>> struct u_vbuf_caps *caps)
>>  caps->user_vertex_buffers =
>> screen->get_param(screen, PIPE_CAP_USER_VERTEX_BUFFERS);
>>
>> +   caps->max_vertex_buffers =
>> +  screen->get_param(screen, PIPE_CAP_MAX_VERTEX_BUFFERS);
>> +
>>  if (!caps->buffer_offset_unaligned ||
>>  !caps->buffer_stride_unaligned ||
>>  !caps->velem_src_offset_unaligned ||
>> -   !caps->user_vertex_buffers) {
>> +   !caps->user_vertex_buffers ||
>> +   !caps->max_vertex_buffers) {
>> fallback = TRUE;
>>  }
>>
>> @@ -313,6 +319,7 @@ u_vbuf_create(struct pipe_context *pipe,
>>  mgr->cso_cache = cso_cache_create();
>>  mgr->translate_cache = translate_cache_create();
>>  memset(mgr->fallback_vbs, ~0, sizeof(mgr->fallback_vbs));
>> +   mgr->allowed_vb_mask = (1 << mgr->caps.max_vertex_buffers) - 1;
>
>
> This is undefined when max_vertex_buffers is 31 or 32. You can use
> u_bit_consecutive.

Ok.

>
>
>>
>>  mgr->uploader = u_upload_create(pipe, 1024 * 1024,
>>  PIPE_BIND_VERTEX_BUFFER,
>> @@ -523,14 +530,15 @@ u_vbuf_translate_buffers(struct u_vbuf *mgr, struct
>> translate_key *key,
>>
>>   static boolean
>>   u_vbuf_translate_find_free_vb_slots(struct u_vbuf *mgr,
>> -unsigned mask[VB_NUM])
>> +unsigned mask[VB_NUM],
>> +unsigned extra_free_vb_mask)
>>   {
>>  unsigned type;
>>  unsigned fallback_vbs[VB_NUM];
>>  /* Set the bit for each buffer which is incompatible, or isn't set.
>> */
>>  uint32_t unused_vb_mask =
>> -  mgr->ve->incompatible_vb_mask_all | mgr->incompatible_vb_mask |
>> -  ~mgr->enabled_vb_mask;
>> +  (mgr->ve->incompatible_vb_mask_all | mgr->incompatible_vb_mask |
>> +  ~mgr->enabled_vb_mask | extra_free_vb_mask) & mgr->allowed_vb_mask;
>>
>>  memset(fallback_vbs, ~0, sizeof(fallback_vbs));
>>
>> @@ -573,6 +581,7 @@ u_vbuf_translate_begin(struct u_vbuf *mgr,
>>  unsigned i, type;
>>  unsigned incompatible_vb_mask = mgr->incompatible_vb_mask &
>>  mgr->ve->used_vb_mask;
>> +   unsigned extra_free_vb_mask = 0;
>>
>>  int start[VB_NUM] = {
>> start_vertex, /* VERTEX */
>> @@ -618,8 +627,15 @@ u_vbuf_translate_begin(struct u_vbuf *mgr,
>>
>>  assert(mask[VB_VERTEX] || mask[VB_INSTANCE] || mask[VB_CONST]);
>>
>> +   /* In the case of unroll_indices, we can regard all non-constant
>> +* vertex buffers with only non-instance vertex elements as
>> incompatible
>> +* and thus free.
>> +*/
>> +   if (unroll_indices)
>> +   extra_free_vb_mask = mask[VB_VERTEX] & ~mask[VB_INSTANCE];
>> +
>>  /* Find free vertex buffer slots. */
>> -   if (!u_vbuf_translate_find_free_vb_slots(mgr, mask)) {
>> +   if (!u_vbuf_translate_find_free_vb_slots(mgr, mask,
>> extra_free_vb_mask)) {
>> return FALSE;
>>  }
>
>
> This logic of using extra space in case of unroll_indices looks unrelated to
> the caps->max_vertex_buffers stuff, can you put it into a separate patch?
>

Yes that makes sense.

>>
>> @@ -778,6 +794,17 @@ u_vbuf_create_vertex_elements(struct u_vbuf *mgr,
>> unsigned count,
>> }
>>  }
>>
>> +   if (used_buffers & ~mgr->allowed_vb_mask) {
>> +  /* More vertex buffers are used than the hardware supports.  In
>> +   * principle, we only need to make sure that less vertex buffers
>> are
>> +   * used, and mark some of the latter vertex buffers as
>> incompatible.
>> +   * For now, mark all vertex buffers as incompatible.
>> +   */
>> +  ve->incompatible_vb_mask_any = used_buffers;
>> +  ve->compatible_vb_mask_any = 0;
>> +  ve->incompatible_elem_mask = (1 << count) - 1;
>

Re: [Mesa-dev] [RFC 2/3] u_vbuf: add logic to use a limited number of vbufs

2016-06-14 Thread Christian Gmeiner
Hi Marek,

2016-06-13 14:48 GMT+02:00 Marek Olšák :
> On Sat, Jun 11, 2016 at 9:21 PM, Christian Gmeiner
>  wrote:
>> From: "Wladimir J. van der Laan" 
>>
>> Make it possible to limit the number of vertex buffers as there exist
>> GPUs with less then 32 supported vertex buffers.
>>
>> Signed-off-by: Wladimir J. van der Laan 
>> ---
>>  src/gallium/auxiliary/util/u_vbuf.c | 45 
>> +++--
>>  src/gallium/auxiliary/util/u_vbuf.h |  3 +++
>>  2 files changed, 41 insertions(+), 7 deletions(-)
>>
>> diff --git a/src/gallium/auxiliary/util/u_vbuf.c 
>> b/src/gallium/auxiliary/util/u_vbuf.c
>> index 5b4e527..464c279 100644
>> --- a/src/gallium/auxiliary/util/u_vbuf.c
>> +++ b/src/gallium/auxiliary/util/u_vbuf.c
>> @@ -184,6 +184,8 @@ struct u_vbuf {
>> uint32_t incompatible_vb_mask; /* each bit describes a corresp. buffer */
>> /* Which buffer has a non-zero stride. */
>> uint32_t nonzero_stride_vb_mask; /* each bit describes a corresp. buffer 
>> */
>> +   /* Which buffers are allowed (supported by hardware). */
>> +   uint32_t allowed_vb_mask;
>>  };
>>
>>  static void *
>> @@ -291,10 +293,14 @@ boolean u_vbuf_get_caps(struct pipe_screen *screen, 
>> struct u_vbuf_caps *caps)
>> caps->user_vertex_buffers =
>>screen->get_param(screen, PIPE_CAP_USER_VERTEX_BUFFERS);
>>
>> +   caps->max_vertex_buffers =
>> +  screen->get_param(screen, PIPE_CAP_MAX_VERTEX_BUFFERS);
>> +
>> if (!caps->buffer_offset_unaligned ||
>> !caps->buffer_stride_unaligned ||
>> !caps->velem_src_offset_unaligned ||
>> -   !caps->user_vertex_buffers) {
>> +   !caps->user_vertex_buffers ||
>> +   !caps->max_vertex_buffers) {
>
> When is max_vertex_buffers zero, really?
>

It should never be zero. If it is zero the driver does something
terrible wrong. I will remove that change from next
patch series.

Thanks for review.

greets
--
Christian Gmeiner, MSc

https://soundcloud.com/christian-gmeiner
___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/mesa-dev