Re: [Mesa-dev] [PATCH 16/17] st/mesa: upload zero-stride vertex attributes here
On 04.05.2017 00:02, Marek Olšák wrote: On Wed, May 3, 2017 at 6:04 PM, Nicolai Hähnlewrote: On 01.05.2017 14:53, Marek Olšák wrote: From: Marek Olšák This is the best place to do it. Now drivers without u_vbuf don't have to do it. --- src/mesa/state_tracker/st_atom_array.c | 56 -- src/mesa/state_tracker/st_context.c| 2 ++ src/mesa/state_tracker/st_context.h| 1 + 3 files changed, 43 insertions(+), 16 deletions(-) diff --git a/src/mesa/state_tracker/st_atom_array.c b/src/mesa/state_tracker/st_atom_array.c index cc9cac1..813468b 100644 --- a/src/mesa/state_tracker/st_atom_array.c +++ b/src/mesa/state_tracker/st_atom_array.c @@ -37,20 +37,21 @@ */ #include "st_context.h" #include "st_atom.h" #include "st_cb_bufferobjects.h" #include "st_draw.h" #include "st_program.h" #include "cso_cache/cso_context.h" #include "util/u_math.h" +#include "util/u_upload_mgr.h" #include "main/bufferobj.h" #include "main/glformats.h" /* vertex_formats[gltype - GL_BYTE][integer*2 + normalized][size - 1] */ static const uint16_t vertex_formats[][4][4] = { { /* GL_BYTE */ { PIPE_FORMAT_R8_SSCALED, PIPE_FORMAT_R8G8_SSCALED, PIPE_FORMAT_R8G8B8_SSCALED, @@ -327,20 +328,25 @@ is_interleaved_arrays(const struct st_vertex_program *vp, for (attr = 0; attr < num_inputs; attr++) { const struct gl_vertex_array *array; const struct gl_buffer_object *bufObj; GLsizei stride; array = get_client_array(arrays, vp->index_to_input[attr]); if (!array) continue; stride = array->StrideB; /* in bytes */ + + /* To keep things simple, don't allow interleaved zero-stride attribs. */ + if (stride == 0) + return false; + bufObj = array->BufferObj; if (attr == 0) { /* save info about the first array */ firstStride = stride; firstPtr = array->Ptr; firstBufObj = bufObj; userSpaceBuffer = !bufObj || !bufObj->Name; } else { /* check if other arrays interleave with the first, in same buffer */ @@ -564,20 +570,21 @@ setup_interleaved_attribs(struct st_context *st, static void setup_non_interleaved_attribs(struct st_context *st, const struct st_vertex_program *vp, const struct gl_vertex_array **arrays, unsigned num_inputs) { struct gl_context *ctx = st->ctx; struct pipe_vertex_buffer vbuffer[PIPE_MAX_ATTRIBS]; struct pipe_vertex_element velements[PIPE_MAX_ATTRIBS] = {{0}}; unsigned num_vbuffers = 0; + unsigned unref_buffers = 0; GLuint attr; for (attr = 0; attr < num_inputs;) { const unsigned mesaAttr = vp->index_to_input[attr]; const struct gl_vertex_array *array; struct gl_buffer_object *bufobj; GLsizei stride; unsigned src_format; unsigned bufidx; @@ -601,54 +608,71 @@ setup_non_interleaved_attribs(struct st_context *st, if (!stobj || !stobj->buffer) { st->vertex_array_out_of_memory = true; return; /* out-of-memory error probably */ } vbuffer[bufidx].buffer.resource = stobj->buffer; vbuffer[bufidx].is_user_buffer = false; vbuffer[bufidx].buffer_offset = pointer_to_offset(array->Ptr); } else { - /* wrap user data */ - void *ptr; - - if (array->Ptr) { -ptr = (void *) array->Ptr; - } - else { -/* no array, use ctx->Current.Attrib[] value */ -ptr = (void *) ctx->Current.Attrib[mesaAttr]; -stride = 0; + if (stride == 0) { +void *ptr = array->Ptr ? (void*)array->Ptr : + (void*)ctx->Current.Attrib[mesaAttr]; + +vbuffer[bufidx].is_user_buffer = false; +vbuffer[bufidx].buffer.resource = NULL; + +/* Use const_uploader for zero-stride vertex attributes, because + * it may use a better memory placement than stream_uploader. + * The reason is that zero-stride attributes can be fetched many + * times (thousands of times), so a better placement is going to + * perform better. + * + * Upload the maximum possible size, which is 4x GLdouble = 32. + */ +u_upload_data(st->can_bind_const_buffer_as_vertex ? + st->pipe->const_uploader : + st->pipe->stream_uploader, + 0, 32, 32, ptr, + [bufidx].buffer_offset, + [bufidx].buffer.resource); +unref_buffers |= 1u << bufidx; In the array->Ptr != NULL case, can we get a more accurate size here? Otherwise we might read off the end of allocated memory and crash. This wasn't an issue before in radeonsi,
Re: [Mesa-dev] [PATCH 16/17] st/mesa: upload zero-stride vertex attributes here
On Wed, May 3, 2017 at 6:04 PM, Nicolai Hähnlewrote: > On 01.05.2017 14:53, Marek Olšák wrote: >> >> From: Marek Olšák >> >> This is the best place to do it. Now drivers without u_vbuf don't have to >> do it. >> --- >> src/mesa/state_tracker/st_atom_array.c | 56 >> -- >> src/mesa/state_tracker/st_context.c| 2 ++ >> src/mesa/state_tracker/st_context.h| 1 + >> 3 files changed, 43 insertions(+), 16 deletions(-) >> >> diff --git a/src/mesa/state_tracker/st_atom_array.c >> b/src/mesa/state_tracker/st_atom_array.c >> index cc9cac1..813468b 100644 >> --- a/src/mesa/state_tracker/st_atom_array.c >> +++ b/src/mesa/state_tracker/st_atom_array.c >> @@ -37,20 +37,21 @@ >> */ >> >> #include "st_context.h" >> #include "st_atom.h" >> #include "st_cb_bufferobjects.h" >> #include "st_draw.h" >> #include "st_program.h" >> >> #include "cso_cache/cso_context.h" >> #include "util/u_math.h" >> +#include "util/u_upload_mgr.h" >> #include "main/bufferobj.h" >> #include "main/glformats.h" >> >> /* vertex_formats[gltype - GL_BYTE][integer*2 + normalized][size - 1] */ >> static const uint16_t vertex_formats[][4][4] = { >> { /* GL_BYTE */ >>{ >> PIPE_FORMAT_R8_SSCALED, >> PIPE_FORMAT_R8G8_SSCALED, >> PIPE_FORMAT_R8G8B8_SSCALED, >> @@ -327,20 +328,25 @@ is_interleaved_arrays(const struct st_vertex_program >> *vp, >> for (attr = 0; attr < num_inputs; attr++) { >>const struct gl_vertex_array *array; >>const struct gl_buffer_object *bufObj; >>GLsizei stride; >> >>array = get_client_array(arrays, vp->index_to_input[attr]); >>if (!array) >> continue; >> >>stride = array->StrideB; /* in bytes */ >> + >> + /* To keep things simple, don't allow interleaved zero-stride >> attribs. */ >> + if (stride == 0) >> + return false; >> + >>bufObj = array->BufferObj; >>if (attr == 0) { >> /* save info about the first array */ >> firstStride = stride; >> firstPtr = array->Ptr; >> firstBufObj = bufObj; >> userSpaceBuffer = !bufObj || !bufObj->Name; >>} >>else { >> /* check if other arrays interleave with the first, in same >> buffer */ >> @@ -564,20 +570,21 @@ setup_interleaved_attribs(struct st_context *st, >> static void >> setup_non_interleaved_attribs(struct st_context *st, >>const struct st_vertex_program *vp, >>const struct gl_vertex_array **arrays, >>unsigned num_inputs) >> { >> struct gl_context *ctx = st->ctx; >> struct pipe_vertex_buffer vbuffer[PIPE_MAX_ATTRIBS]; >> struct pipe_vertex_element velements[PIPE_MAX_ATTRIBS] = {{0}}; >> unsigned num_vbuffers = 0; >> + unsigned unref_buffers = 0; >> GLuint attr; >> >> for (attr = 0; attr < num_inputs;) { >>const unsigned mesaAttr = vp->index_to_input[attr]; >>const struct gl_vertex_array *array; >>struct gl_buffer_object *bufobj; >>GLsizei stride; >>unsigned src_format; >>unsigned bufidx; >> >> @@ -601,54 +608,71 @@ setup_non_interleaved_attribs(struct st_context *st, >> if (!stobj || !stobj->buffer) { >> st->vertex_array_out_of_memory = true; >> return; /* out-of-memory error probably */ >> } >> >> vbuffer[bufidx].buffer.resource = stobj->buffer; >> vbuffer[bufidx].is_user_buffer = false; >> vbuffer[bufidx].buffer_offset = pointer_to_offset(array->Ptr); >>} >>else { >> - /* wrap user data */ >> - void *ptr; >> - >> - if (array->Ptr) { >> -ptr = (void *) array->Ptr; >> - } >> - else { >> -/* no array, use ctx->Current.Attrib[] value */ >> -ptr = (void *) ctx->Current.Attrib[mesaAttr]; >> -stride = 0; >> + if (stride == 0) { >> +void *ptr = array->Ptr ? (void*)array->Ptr : >> + >> (void*)ctx->Current.Attrib[mesaAttr]; >> + >> +vbuffer[bufidx].is_user_buffer = false; >> +vbuffer[bufidx].buffer.resource = NULL; >> + >> +/* Use const_uploader for zero-stride vertex attributes, >> because >> + * it may use a better memory placement than stream_uploader. >> + * The reason is that zero-stride attributes can be fetched >> many >> + * times (thousands of times), so a better placement is going >> to >> + * perform better. >> + * >> + * Upload the maximum possible size, which is 4x GLdouble = >> 32. >> + */ >> +u_upload_data(st->can_bind_const_buffer_as_vertex ? >> + st->pipe->const_uploader : >> + st->pipe->stream_uploader, >> +
Re: [Mesa-dev] [PATCH 16/17] st/mesa: upload zero-stride vertex attributes here
On 01.05.2017 14:53, Marek Olšák wrote: From: Marek OlšákThis is the best place to do it. Now drivers without u_vbuf don't have to do it. --- src/mesa/state_tracker/st_atom_array.c | 56 -- src/mesa/state_tracker/st_context.c| 2 ++ src/mesa/state_tracker/st_context.h| 1 + 3 files changed, 43 insertions(+), 16 deletions(-) diff --git a/src/mesa/state_tracker/st_atom_array.c b/src/mesa/state_tracker/st_atom_array.c index cc9cac1..813468b 100644 --- a/src/mesa/state_tracker/st_atom_array.c +++ b/src/mesa/state_tracker/st_atom_array.c @@ -37,20 +37,21 @@ */ #include "st_context.h" #include "st_atom.h" #include "st_cb_bufferobjects.h" #include "st_draw.h" #include "st_program.h" #include "cso_cache/cso_context.h" #include "util/u_math.h" +#include "util/u_upload_mgr.h" #include "main/bufferobj.h" #include "main/glformats.h" /* vertex_formats[gltype - GL_BYTE][integer*2 + normalized][size - 1] */ static const uint16_t vertex_formats[][4][4] = { { /* GL_BYTE */ { PIPE_FORMAT_R8_SSCALED, PIPE_FORMAT_R8G8_SSCALED, PIPE_FORMAT_R8G8B8_SSCALED, @@ -327,20 +328,25 @@ is_interleaved_arrays(const struct st_vertex_program *vp, for (attr = 0; attr < num_inputs; attr++) { const struct gl_vertex_array *array; const struct gl_buffer_object *bufObj; GLsizei stride; array = get_client_array(arrays, vp->index_to_input[attr]); if (!array) continue; stride = array->StrideB; /* in bytes */ + + /* To keep things simple, don't allow interleaved zero-stride attribs. */ + if (stride == 0) + return false; + bufObj = array->BufferObj; if (attr == 0) { /* save info about the first array */ firstStride = stride; firstPtr = array->Ptr; firstBufObj = bufObj; userSpaceBuffer = !bufObj || !bufObj->Name; } else { /* check if other arrays interleave with the first, in same buffer */ @@ -564,20 +570,21 @@ setup_interleaved_attribs(struct st_context *st, static void setup_non_interleaved_attribs(struct st_context *st, const struct st_vertex_program *vp, const struct gl_vertex_array **arrays, unsigned num_inputs) { struct gl_context *ctx = st->ctx; struct pipe_vertex_buffer vbuffer[PIPE_MAX_ATTRIBS]; struct pipe_vertex_element velements[PIPE_MAX_ATTRIBS] = {{0}}; unsigned num_vbuffers = 0; + unsigned unref_buffers = 0; GLuint attr; for (attr = 0; attr < num_inputs;) { const unsigned mesaAttr = vp->index_to_input[attr]; const struct gl_vertex_array *array; struct gl_buffer_object *bufobj; GLsizei stride; unsigned src_format; unsigned bufidx; @@ -601,54 +608,71 @@ setup_non_interleaved_attribs(struct st_context *st, if (!stobj || !stobj->buffer) { st->vertex_array_out_of_memory = true; return; /* out-of-memory error probably */ } vbuffer[bufidx].buffer.resource = stobj->buffer; vbuffer[bufidx].is_user_buffer = false; vbuffer[bufidx].buffer_offset = pointer_to_offset(array->Ptr); } else { - /* wrap user data */ - void *ptr; - - if (array->Ptr) { -ptr = (void *) array->Ptr; - } - else { -/* no array, use ctx->Current.Attrib[] value */ -ptr = (void *) ctx->Current.Attrib[mesaAttr]; -stride = 0; + if (stride == 0) { +void *ptr = array->Ptr ? (void*)array->Ptr : + (void*)ctx->Current.Attrib[mesaAttr]; + +vbuffer[bufidx].is_user_buffer = false; +vbuffer[bufidx].buffer.resource = NULL; + +/* Use const_uploader for zero-stride vertex attributes, because + * it may use a better memory placement than stream_uploader. + * The reason is that zero-stride attributes can be fetched many + * times (thousands of times), so a better placement is going to + * perform better. + * + * Upload the maximum possible size, which is 4x GLdouble = 32. + */ +u_upload_data(st->can_bind_const_buffer_as_vertex ? + st->pipe->const_uploader : + st->pipe->stream_uploader, + 0, 32, 32, ptr, + [bufidx].buffer_offset, + [bufidx].buffer.resource); +unref_buffers |= 1u << bufidx; In the array->Ptr != NULL case, can we get a more accurate size here? Otherwise we might read off the end of allocated memory and crash. This wasn't an issue before in radeonsi, because we still use u_vbuf with compatibility profiles. Cheers, Nicolai +
[Mesa-dev] [PATCH 16/17] st/mesa: upload zero-stride vertex attributes here
From: Marek OlšákThis is the best place to do it. Now drivers without u_vbuf don't have to do it. --- src/mesa/state_tracker/st_atom_array.c | 56 -- src/mesa/state_tracker/st_context.c| 2 ++ src/mesa/state_tracker/st_context.h| 1 + 3 files changed, 43 insertions(+), 16 deletions(-) diff --git a/src/mesa/state_tracker/st_atom_array.c b/src/mesa/state_tracker/st_atom_array.c index cc9cac1..813468b 100644 --- a/src/mesa/state_tracker/st_atom_array.c +++ b/src/mesa/state_tracker/st_atom_array.c @@ -37,20 +37,21 @@ */ #include "st_context.h" #include "st_atom.h" #include "st_cb_bufferobjects.h" #include "st_draw.h" #include "st_program.h" #include "cso_cache/cso_context.h" #include "util/u_math.h" +#include "util/u_upload_mgr.h" #include "main/bufferobj.h" #include "main/glformats.h" /* vertex_formats[gltype - GL_BYTE][integer*2 + normalized][size - 1] */ static const uint16_t vertex_formats[][4][4] = { { /* GL_BYTE */ { PIPE_FORMAT_R8_SSCALED, PIPE_FORMAT_R8G8_SSCALED, PIPE_FORMAT_R8G8B8_SSCALED, @@ -327,20 +328,25 @@ is_interleaved_arrays(const struct st_vertex_program *vp, for (attr = 0; attr < num_inputs; attr++) { const struct gl_vertex_array *array; const struct gl_buffer_object *bufObj; GLsizei stride; array = get_client_array(arrays, vp->index_to_input[attr]); if (!array) continue; stride = array->StrideB; /* in bytes */ + + /* To keep things simple, don't allow interleaved zero-stride attribs. */ + if (stride == 0) + return false; + bufObj = array->BufferObj; if (attr == 0) { /* save info about the first array */ firstStride = stride; firstPtr = array->Ptr; firstBufObj = bufObj; userSpaceBuffer = !bufObj || !bufObj->Name; } else { /* check if other arrays interleave with the first, in same buffer */ @@ -564,20 +570,21 @@ setup_interleaved_attribs(struct st_context *st, static void setup_non_interleaved_attribs(struct st_context *st, const struct st_vertex_program *vp, const struct gl_vertex_array **arrays, unsigned num_inputs) { struct gl_context *ctx = st->ctx; struct pipe_vertex_buffer vbuffer[PIPE_MAX_ATTRIBS]; struct pipe_vertex_element velements[PIPE_MAX_ATTRIBS] = {{0}}; unsigned num_vbuffers = 0; + unsigned unref_buffers = 0; GLuint attr; for (attr = 0; attr < num_inputs;) { const unsigned mesaAttr = vp->index_to_input[attr]; const struct gl_vertex_array *array; struct gl_buffer_object *bufobj; GLsizei stride; unsigned src_format; unsigned bufidx; @@ -601,54 +608,71 @@ setup_non_interleaved_attribs(struct st_context *st, if (!stobj || !stobj->buffer) { st->vertex_array_out_of_memory = true; return; /* out-of-memory error probably */ } vbuffer[bufidx].buffer.resource = stobj->buffer; vbuffer[bufidx].is_user_buffer = false; vbuffer[bufidx].buffer_offset = pointer_to_offset(array->Ptr); } else { - /* wrap user data */ - void *ptr; - - if (array->Ptr) { -ptr = (void *) array->Ptr; - } - else { -/* no array, use ctx->Current.Attrib[] value */ -ptr = (void *) ctx->Current.Attrib[mesaAttr]; -stride = 0; + if (stride == 0) { +void *ptr = array->Ptr ? (void*)array->Ptr : + (void*)ctx->Current.Attrib[mesaAttr]; + +vbuffer[bufidx].is_user_buffer = false; +vbuffer[bufidx].buffer.resource = NULL; + +/* Use const_uploader for zero-stride vertex attributes, because + * it may use a better memory placement than stream_uploader. + * The reason is that zero-stride attributes can be fetched many + * times (thousands of times), so a better placement is going to + * perform better. + * + * Upload the maximum possible size, which is 4x GLdouble = 32. + */ +u_upload_data(st->can_bind_const_buffer_as_vertex ? + st->pipe->const_uploader : + st->pipe->stream_uploader, + 0, 32, 32, ptr, + [bufidx].buffer_offset, + [bufidx].buffer.resource); +unref_buffers |= 1u << bufidx; + } else { +assert(array->Ptr); +vbuffer[bufidx].buffer.user = array->Ptr; +vbuffer[bufidx].is_user_buffer = true; +vbuffer[bufidx].buffer_offset = 0; } - - assert(ptr); - - vbuffer[bufidx].buffer.user = ptr; -