Re: [Mesa-dev] [PATCH 16/17] st/mesa: upload zero-stride vertex attributes here

2017-05-04 Thread Nicolai Hähnle

On 04.05.2017 00:02, Marek Olšák wrote:

On Wed, May 3, 2017 at 6:04 PM, Nicolai Hähnle  wrote:

On 01.05.2017 14:53, Marek Olšák wrote:


From: Marek Olšák 

This is the best place to do it. Now drivers without u_vbuf don't have to
do it.
---
 src/mesa/state_tracker/st_atom_array.c | 56
--
 src/mesa/state_tracker/st_context.c|  2 ++
 src/mesa/state_tracker/st_context.h|  1 +
 3 files changed, 43 insertions(+), 16 deletions(-)

diff --git a/src/mesa/state_tracker/st_atom_array.c
b/src/mesa/state_tracker/st_atom_array.c
index cc9cac1..813468b 100644
--- a/src/mesa/state_tracker/st_atom_array.c
+++ b/src/mesa/state_tracker/st_atom_array.c
@@ -37,20 +37,21 @@
  */

 #include "st_context.h"
 #include "st_atom.h"
 #include "st_cb_bufferobjects.h"
 #include "st_draw.h"
 #include "st_program.h"

 #include "cso_cache/cso_context.h"
 #include "util/u_math.h"
+#include "util/u_upload_mgr.h"
 #include "main/bufferobj.h"
 #include "main/glformats.h"

 /* vertex_formats[gltype - GL_BYTE][integer*2 + normalized][size - 1] */
 static const uint16_t vertex_formats[][4][4] = {
{ /* GL_BYTE */
   {
  PIPE_FORMAT_R8_SSCALED,
  PIPE_FORMAT_R8G8_SSCALED,
  PIPE_FORMAT_R8G8B8_SSCALED,
@@ -327,20 +328,25 @@ is_interleaved_arrays(const struct st_vertex_program
*vp,
for (attr = 0; attr < num_inputs; attr++) {
   const struct gl_vertex_array *array;
   const struct gl_buffer_object *bufObj;
   GLsizei stride;

   array = get_client_array(arrays, vp->index_to_input[attr]);
   if (!array)
 continue;

   stride = array->StrideB; /* in bytes */
+
+  /* To keep things simple, don't allow interleaved zero-stride
attribs. */
+  if (stride == 0)
+ return false;
+
   bufObj = array->BufferObj;
   if (attr == 0) {
  /* save info about the first array */
  firstStride = stride;
  firstPtr = array->Ptr;
  firstBufObj = bufObj;
  userSpaceBuffer = !bufObj || !bufObj->Name;
   }
   else {
  /* check if other arrays interleave with the first, in same
buffer */
@@ -564,20 +570,21 @@ setup_interleaved_attribs(struct st_context *st,
 static void
 setup_non_interleaved_attribs(struct st_context *st,
   const struct st_vertex_program *vp,
   const struct gl_vertex_array **arrays,
   unsigned num_inputs)
 {
struct gl_context *ctx = st->ctx;
struct pipe_vertex_buffer vbuffer[PIPE_MAX_ATTRIBS];
struct pipe_vertex_element velements[PIPE_MAX_ATTRIBS] = {{0}};
unsigned num_vbuffers = 0;
+   unsigned unref_buffers = 0;
GLuint attr;

for (attr = 0; attr < num_inputs;) {
   const unsigned mesaAttr = vp->index_to_input[attr];
   const struct gl_vertex_array *array;
   struct gl_buffer_object *bufobj;
   GLsizei stride;
   unsigned src_format;
   unsigned bufidx;

@@ -601,54 +608,71 @@ setup_non_interleaved_attribs(struct st_context *st,
  if (!stobj || !stobj->buffer) {
 st->vertex_array_out_of_memory = true;
 return; /* out-of-memory error probably */
  }

  vbuffer[bufidx].buffer.resource = stobj->buffer;
  vbuffer[bufidx].is_user_buffer = false;
  vbuffer[bufidx].buffer_offset = pointer_to_offset(array->Ptr);
   }
   else {
- /* wrap user data */
- void *ptr;
-
- if (array->Ptr) {
-ptr = (void *) array->Ptr;
- }
- else {
-/* no array, use ctx->Current.Attrib[] value */
-ptr = (void *) ctx->Current.Attrib[mesaAttr];
-stride = 0;
+ if (stride == 0) {
+void *ptr = array->Ptr ? (void*)array->Ptr :
+
(void*)ctx->Current.Attrib[mesaAttr];
+
+vbuffer[bufidx].is_user_buffer = false;
+vbuffer[bufidx].buffer.resource = NULL;
+
+/* Use const_uploader for zero-stride vertex attributes,
because
+ * it may use a better memory placement than stream_uploader.
+ * The reason is that zero-stride attributes can be fetched
many
+ * times (thousands of times), so a better placement is going
to
+ * perform better.
+ *
+ * Upload the maximum possible size, which is 4x GLdouble =
32.
+ */
+u_upload_data(st->can_bind_const_buffer_as_vertex ?
+ st->pipe->const_uploader :
+ st->pipe->stream_uploader,
+  0, 32, 32, ptr,
+  [bufidx].buffer_offset,
+  [bufidx].buffer.resource);
+unref_buffers |= 1u << bufidx;



In the array->Ptr != NULL case, can we get a more accurate size here?
Otherwise we might read off the end of allocated memory and crash.

This wasn't an issue before in radeonsi, 

Re: [Mesa-dev] [PATCH 16/17] st/mesa: upload zero-stride vertex attributes here

2017-05-03 Thread Marek Olšák
On Wed, May 3, 2017 at 6:04 PM, Nicolai Hähnle  wrote:
> On 01.05.2017 14:53, Marek Olšák wrote:
>>
>> From: Marek Olšák 
>>
>> This is the best place to do it. Now drivers without u_vbuf don't have to
>> do it.
>> ---
>>  src/mesa/state_tracker/st_atom_array.c | 56
>> --
>>  src/mesa/state_tracker/st_context.c|  2 ++
>>  src/mesa/state_tracker/st_context.h|  1 +
>>  3 files changed, 43 insertions(+), 16 deletions(-)
>>
>> diff --git a/src/mesa/state_tracker/st_atom_array.c
>> b/src/mesa/state_tracker/st_atom_array.c
>> index cc9cac1..813468b 100644
>> --- a/src/mesa/state_tracker/st_atom_array.c
>> +++ b/src/mesa/state_tracker/st_atom_array.c
>> @@ -37,20 +37,21 @@
>>   */
>>
>>  #include "st_context.h"
>>  #include "st_atom.h"
>>  #include "st_cb_bufferobjects.h"
>>  #include "st_draw.h"
>>  #include "st_program.h"
>>
>>  #include "cso_cache/cso_context.h"
>>  #include "util/u_math.h"
>> +#include "util/u_upload_mgr.h"
>>  #include "main/bufferobj.h"
>>  #include "main/glformats.h"
>>
>>  /* vertex_formats[gltype - GL_BYTE][integer*2 + normalized][size - 1] */
>>  static const uint16_t vertex_formats[][4][4] = {
>> { /* GL_BYTE */
>>{
>>   PIPE_FORMAT_R8_SSCALED,
>>   PIPE_FORMAT_R8G8_SSCALED,
>>   PIPE_FORMAT_R8G8B8_SSCALED,
>> @@ -327,20 +328,25 @@ is_interleaved_arrays(const struct st_vertex_program
>> *vp,
>> for (attr = 0; attr < num_inputs; attr++) {
>>const struct gl_vertex_array *array;
>>const struct gl_buffer_object *bufObj;
>>GLsizei stride;
>>
>>array = get_client_array(arrays, vp->index_to_input[attr]);
>>if (!array)
>>  continue;
>>
>>stride = array->StrideB; /* in bytes */
>> +
>> +  /* To keep things simple, don't allow interleaved zero-stride
>> attribs. */
>> +  if (stride == 0)
>> + return false;
>> +
>>bufObj = array->BufferObj;
>>if (attr == 0) {
>>   /* save info about the first array */
>>   firstStride = stride;
>>   firstPtr = array->Ptr;
>>   firstBufObj = bufObj;
>>   userSpaceBuffer = !bufObj || !bufObj->Name;
>>}
>>else {
>>   /* check if other arrays interleave with the first, in same
>> buffer */
>> @@ -564,20 +570,21 @@ setup_interleaved_attribs(struct st_context *st,
>>  static void
>>  setup_non_interleaved_attribs(struct st_context *st,
>>const struct st_vertex_program *vp,
>>const struct gl_vertex_array **arrays,
>>unsigned num_inputs)
>>  {
>> struct gl_context *ctx = st->ctx;
>> struct pipe_vertex_buffer vbuffer[PIPE_MAX_ATTRIBS];
>> struct pipe_vertex_element velements[PIPE_MAX_ATTRIBS] = {{0}};
>> unsigned num_vbuffers = 0;
>> +   unsigned unref_buffers = 0;
>> GLuint attr;
>>
>> for (attr = 0; attr < num_inputs;) {
>>const unsigned mesaAttr = vp->index_to_input[attr];
>>const struct gl_vertex_array *array;
>>struct gl_buffer_object *bufobj;
>>GLsizei stride;
>>unsigned src_format;
>>unsigned bufidx;
>>
>> @@ -601,54 +608,71 @@ setup_non_interleaved_attribs(struct st_context *st,
>>   if (!stobj || !stobj->buffer) {
>>  st->vertex_array_out_of_memory = true;
>>  return; /* out-of-memory error probably */
>>   }
>>
>>   vbuffer[bufidx].buffer.resource = stobj->buffer;
>>   vbuffer[bufidx].is_user_buffer = false;
>>   vbuffer[bufidx].buffer_offset = pointer_to_offset(array->Ptr);
>>}
>>else {
>> - /* wrap user data */
>> - void *ptr;
>> -
>> - if (array->Ptr) {
>> -ptr = (void *) array->Ptr;
>> - }
>> - else {
>> -/* no array, use ctx->Current.Attrib[] value */
>> -ptr = (void *) ctx->Current.Attrib[mesaAttr];
>> -stride = 0;
>> + if (stride == 0) {
>> +void *ptr = array->Ptr ? (void*)array->Ptr :
>> +
>> (void*)ctx->Current.Attrib[mesaAttr];
>> +
>> +vbuffer[bufidx].is_user_buffer = false;
>> +vbuffer[bufidx].buffer.resource = NULL;
>> +
>> +/* Use const_uploader for zero-stride vertex attributes,
>> because
>> + * it may use a better memory placement than stream_uploader.
>> + * The reason is that zero-stride attributes can be fetched
>> many
>> + * times (thousands of times), so a better placement is going
>> to
>> + * perform better.
>> + *
>> + * Upload the maximum possible size, which is 4x GLdouble =
>> 32.
>> + */
>> +u_upload_data(st->can_bind_const_buffer_as_vertex ?
>> + st->pipe->const_uploader :
>> + st->pipe->stream_uploader,
>> +  

Re: [Mesa-dev] [PATCH 16/17] st/mesa: upload zero-stride vertex attributes here

2017-05-03 Thread Nicolai Hähnle

On 01.05.2017 14:53, Marek Olšák wrote:

From: Marek Olšák 

This is the best place to do it. Now drivers without u_vbuf don't have to
do it.
---
 src/mesa/state_tracker/st_atom_array.c | 56 --
 src/mesa/state_tracker/st_context.c|  2 ++
 src/mesa/state_tracker/st_context.h|  1 +
 3 files changed, 43 insertions(+), 16 deletions(-)

diff --git a/src/mesa/state_tracker/st_atom_array.c 
b/src/mesa/state_tracker/st_atom_array.c
index cc9cac1..813468b 100644
--- a/src/mesa/state_tracker/st_atom_array.c
+++ b/src/mesa/state_tracker/st_atom_array.c
@@ -37,20 +37,21 @@
  */

 #include "st_context.h"
 #include "st_atom.h"
 #include "st_cb_bufferobjects.h"
 #include "st_draw.h"
 #include "st_program.h"

 #include "cso_cache/cso_context.h"
 #include "util/u_math.h"
+#include "util/u_upload_mgr.h"
 #include "main/bufferobj.h"
 #include "main/glformats.h"

 /* vertex_formats[gltype - GL_BYTE][integer*2 + normalized][size - 1] */
 static const uint16_t vertex_formats[][4][4] = {
{ /* GL_BYTE */
   {
  PIPE_FORMAT_R8_SSCALED,
  PIPE_FORMAT_R8G8_SSCALED,
  PIPE_FORMAT_R8G8B8_SSCALED,
@@ -327,20 +328,25 @@ is_interleaved_arrays(const struct st_vertex_program *vp,
for (attr = 0; attr < num_inputs; attr++) {
   const struct gl_vertex_array *array;
   const struct gl_buffer_object *bufObj;
   GLsizei stride;

   array = get_client_array(arrays, vp->index_to_input[attr]);
   if (!array)
 continue;

   stride = array->StrideB; /* in bytes */
+
+  /* To keep things simple, don't allow interleaved zero-stride attribs. */
+  if (stride == 0)
+ return false;
+
   bufObj = array->BufferObj;
   if (attr == 0) {
  /* save info about the first array */
  firstStride = stride;
  firstPtr = array->Ptr;
  firstBufObj = bufObj;
  userSpaceBuffer = !bufObj || !bufObj->Name;
   }
   else {
  /* check if other arrays interleave with the first, in same buffer */
@@ -564,20 +570,21 @@ setup_interleaved_attribs(struct st_context *st,
 static void
 setup_non_interleaved_attribs(struct st_context *st,
   const struct st_vertex_program *vp,
   const struct gl_vertex_array **arrays,
   unsigned num_inputs)
 {
struct gl_context *ctx = st->ctx;
struct pipe_vertex_buffer vbuffer[PIPE_MAX_ATTRIBS];
struct pipe_vertex_element velements[PIPE_MAX_ATTRIBS] = {{0}};
unsigned num_vbuffers = 0;
+   unsigned unref_buffers = 0;
GLuint attr;

for (attr = 0; attr < num_inputs;) {
   const unsigned mesaAttr = vp->index_to_input[attr];
   const struct gl_vertex_array *array;
   struct gl_buffer_object *bufobj;
   GLsizei stride;
   unsigned src_format;
   unsigned bufidx;

@@ -601,54 +608,71 @@ setup_non_interleaved_attribs(struct st_context *st,
  if (!stobj || !stobj->buffer) {
 st->vertex_array_out_of_memory = true;
 return; /* out-of-memory error probably */
  }

  vbuffer[bufidx].buffer.resource = stobj->buffer;
  vbuffer[bufidx].is_user_buffer = false;
  vbuffer[bufidx].buffer_offset = pointer_to_offset(array->Ptr);
   }
   else {
- /* wrap user data */
- void *ptr;
-
- if (array->Ptr) {
-ptr = (void *) array->Ptr;
- }
- else {
-/* no array, use ctx->Current.Attrib[] value */
-ptr = (void *) ctx->Current.Attrib[mesaAttr];
-stride = 0;
+ if (stride == 0) {
+void *ptr = array->Ptr ? (void*)array->Ptr :
+ (void*)ctx->Current.Attrib[mesaAttr];
+
+vbuffer[bufidx].is_user_buffer = false;
+vbuffer[bufidx].buffer.resource = NULL;
+
+/* Use const_uploader for zero-stride vertex attributes, because
+ * it may use a better memory placement than stream_uploader.
+ * The reason is that zero-stride attributes can be fetched many
+ * times (thousands of times), so a better placement is going to
+ * perform better.
+ *
+ * Upload the maximum possible size, which is 4x GLdouble = 32.
+ */
+u_upload_data(st->can_bind_const_buffer_as_vertex ?
+ st->pipe->const_uploader :
+ st->pipe->stream_uploader,
+  0, 32, 32, ptr,
+  [bufidx].buffer_offset,
+  [bufidx].buffer.resource);
+unref_buffers |= 1u << bufidx;


In the array->Ptr != NULL case, can we get a more accurate size here? 
Otherwise we might read off the end of allocated memory and crash.


This wasn't an issue before in radeonsi, because we still use u_vbuf 
with compatibility profiles.


Cheers,
Nicolai



+ 

[Mesa-dev] [PATCH 16/17] st/mesa: upload zero-stride vertex attributes here

2017-05-01 Thread Marek Olšák
From: Marek Olšák 

This is the best place to do it. Now drivers without u_vbuf don't have to
do it.
---
 src/mesa/state_tracker/st_atom_array.c | 56 --
 src/mesa/state_tracker/st_context.c|  2 ++
 src/mesa/state_tracker/st_context.h|  1 +
 3 files changed, 43 insertions(+), 16 deletions(-)

diff --git a/src/mesa/state_tracker/st_atom_array.c 
b/src/mesa/state_tracker/st_atom_array.c
index cc9cac1..813468b 100644
--- a/src/mesa/state_tracker/st_atom_array.c
+++ b/src/mesa/state_tracker/st_atom_array.c
@@ -37,20 +37,21 @@
  */
 
 #include "st_context.h"
 #include "st_atom.h"
 #include "st_cb_bufferobjects.h"
 #include "st_draw.h"
 #include "st_program.h"
 
 #include "cso_cache/cso_context.h"
 #include "util/u_math.h"
+#include "util/u_upload_mgr.h"
 #include "main/bufferobj.h"
 #include "main/glformats.h"
 
 /* vertex_formats[gltype - GL_BYTE][integer*2 + normalized][size - 1] */
 static const uint16_t vertex_formats[][4][4] = {
{ /* GL_BYTE */
   {
  PIPE_FORMAT_R8_SSCALED,
  PIPE_FORMAT_R8G8_SSCALED,
  PIPE_FORMAT_R8G8B8_SSCALED,
@@ -327,20 +328,25 @@ is_interleaved_arrays(const struct st_vertex_program *vp,
for (attr = 0; attr < num_inputs; attr++) {
   const struct gl_vertex_array *array;
   const struct gl_buffer_object *bufObj;
   GLsizei stride;
 
   array = get_client_array(arrays, vp->index_to_input[attr]);
   if (!array)
 continue;
 
   stride = array->StrideB; /* in bytes */
+
+  /* To keep things simple, don't allow interleaved zero-stride attribs. */
+  if (stride == 0)
+ return false;
+
   bufObj = array->BufferObj;
   if (attr == 0) {
  /* save info about the first array */
  firstStride = stride;
  firstPtr = array->Ptr;
  firstBufObj = bufObj;
  userSpaceBuffer = !bufObj || !bufObj->Name;
   }
   else {
  /* check if other arrays interleave with the first, in same buffer */
@@ -564,20 +570,21 @@ setup_interleaved_attribs(struct st_context *st,
 static void
 setup_non_interleaved_attribs(struct st_context *st,
   const struct st_vertex_program *vp,
   const struct gl_vertex_array **arrays,
   unsigned num_inputs)
 {
struct gl_context *ctx = st->ctx;
struct pipe_vertex_buffer vbuffer[PIPE_MAX_ATTRIBS];
struct pipe_vertex_element velements[PIPE_MAX_ATTRIBS] = {{0}};
unsigned num_vbuffers = 0;
+   unsigned unref_buffers = 0;
GLuint attr;
 
for (attr = 0; attr < num_inputs;) {
   const unsigned mesaAttr = vp->index_to_input[attr];
   const struct gl_vertex_array *array;
   struct gl_buffer_object *bufobj;
   GLsizei stride;
   unsigned src_format;
   unsigned bufidx;
 
@@ -601,54 +608,71 @@ setup_non_interleaved_attribs(struct st_context *st,
  if (!stobj || !stobj->buffer) {
 st->vertex_array_out_of_memory = true;
 return; /* out-of-memory error probably */
  }
 
  vbuffer[bufidx].buffer.resource = stobj->buffer;
  vbuffer[bufidx].is_user_buffer = false;
  vbuffer[bufidx].buffer_offset = pointer_to_offset(array->Ptr);
   }
   else {
- /* wrap user data */
- void *ptr;
-
- if (array->Ptr) {
-ptr = (void *) array->Ptr;
- }
- else {
-/* no array, use ctx->Current.Attrib[] value */
-ptr = (void *) ctx->Current.Attrib[mesaAttr];
-stride = 0;
+ if (stride == 0) {
+void *ptr = array->Ptr ? (void*)array->Ptr :
+ (void*)ctx->Current.Attrib[mesaAttr];
+
+vbuffer[bufidx].is_user_buffer = false;
+vbuffer[bufidx].buffer.resource = NULL;
+
+/* Use const_uploader for zero-stride vertex attributes, because
+ * it may use a better memory placement than stream_uploader.
+ * The reason is that zero-stride attributes can be fetched many
+ * times (thousands of times), so a better placement is going to
+ * perform better.
+ *
+ * Upload the maximum possible size, which is 4x GLdouble = 32.
+ */
+u_upload_data(st->can_bind_const_buffer_as_vertex ?
+ st->pipe->const_uploader :
+ st->pipe->stream_uploader,
+  0, 32, 32, ptr,
+  [bufidx].buffer_offset,
+  [bufidx].buffer.resource);
+unref_buffers |= 1u << bufidx;
+ } else {
+assert(array->Ptr);
+vbuffer[bufidx].buffer.user = array->Ptr;
+vbuffer[bufidx].is_user_buffer = true;
+vbuffer[bufidx].buffer_offset = 0;
  }
-
- assert(ptr);
-
- vbuffer[bufidx].buffer.user = ptr;
-