[Mesa-dev] [PATCH v2] nv50, nvc0: optimize coherent buffer checking at draw time

2015-12-09 Thread Samuel Pitoiset
Instead of iterating over all the buffer resources looking for coherent
buffers, we keep track of a context-wide count. This will save some
iterations (and CPU cycles) in 99.99% case because usually coherent
buffers are not so used.

Changes from v2:
 - forgot to apply some changes for nv50 (texture/vertex bufs)

Signed-off-by: Samuel Pitoiset 
---
 src/gallium/drivers/nouveau/nv50/nv50_context.h |  3 ++
 src/gallium/drivers/nouveau/nv50/nv50_state.c   | 19 +++
 src/gallium/drivers/nouveau/nv50/nv50_vbo.c | 44 +++--
 src/gallium/drivers/nouveau/nvc0/nvc0_context.h |  3 ++
 src/gallium/drivers/nouveau/nvc0/nvc0_state.c   | 26 +++
 src/gallium/drivers/nouveau/nvc0/nvc0_vbo.c | 41 +--
 6 files changed, 64 insertions(+), 72 deletions(-)

diff --git a/src/gallium/drivers/nouveau/nv50/nv50_context.h 
b/src/gallium/drivers/nouveau/nv50/nv50_context.h
index 2cebcd9..712d00e 100644
--- a/src/gallium/drivers/nouveau/nv50/nv50_context.h
+++ b/src/gallium/drivers/nouveau/nv50/nv50_context.h
@@ -134,9 +134,11 @@ struct nv50_context {
struct nv50_constbuf constbuf[3][NV50_MAX_PIPE_CONSTBUFS];
uint16_t constbuf_dirty[3];
uint16_t constbuf_valid[3];
+   uint16_t constbuf_coherent[3];
 
struct pipe_vertex_buffer vtxbuf[PIPE_MAX_ATTRIBS];
unsigned num_vtxbufs;
+   uint32_t vtxbufs_coherent;
struct pipe_index_buffer idxbuf;
uint32_t vbo_fifo; /* bitmask of vertex elements to be pushed to FIFO */
uint32_t vbo_user; /* bitmask of vertex buffers pointing to user memory */
@@ -148,6 +150,7 @@ struct nv50_context {
 
struct pipe_sampler_view *textures[3][PIPE_MAX_SAMPLERS];
unsigned num_textures[3];
+   uint32_t textures_coherent[3];
struct nv50_tsc_entry *samplers[3][PIPE_MAX_SAMPLERS];
unsigned num_samplers[3];
 
diff --git a/src/gallium/drivers/nouveau/nv50/nv50_state.c 
b/src/gallium/drivers/nouveau/nv50/nv50_state.c
index fd7c7cd..b6e5c75 100644
--- a/src/gallium/drivers/nouveau/nv50/nv50_state.c
+++ b/src/gallium/drivers/nouveau/nv50/nv50_state.c
@@ -661,9 +661,16 @@ nv50_stage_set_sampler_views(struct nv50_context *nv50, 
int s,
assert(nr <= PIPE_MAX_SAMPLERS);
for (i = 0; i < nr; ++i) {
   struct nv50_tic_entry *old = nv50_tic_entry(nv50->textures[s][i]);
+  struct pipe_resource *res = views[i]->texture;
   if (old)
  nv50_screen_tic_unlock(nv50->screen, old);
 
+  if (res->target == PIPE_BUFFER &&
+  (res->flags & PIPE_RESOURCE_FLAG_MAP_COHERENT))
+ nv50->textures_coherent[s] |= 1 << i;
+  else
+ nv50->textures_coherent[s] &= ~(1 << i);
+
   pipe_sampler_view_reference(>textures[s][i], views[i]);
}
 
@@ -852,8 +859,13 @@ nv50_set_constant_buffer(struct pipe_context *pipe, uint 
shader, uint index,
   nv50->constbuf[s][i].offset = cb->buffer_offset;
   nv50->constbuf[s][i].size = MIN2(align(cb->buffer_size, 0x100), 0x1);
   nv50->constbuf_valid[s] |= 1 << i;
+  if (res->flags & PIPE_RESOURCE_FLAG_MAP_COHERENT)
+ nv50->constbuf_coherent[s] |= 1 << i;
+  else
+ nv50->constbuf_coherent[s] &= ~(1 << i);
} else {
   nv50->constbuf_valid[s] &= ~(1 << i);
+  nv50->constbuf_coherent[s] &= ~(1 << i);
}
nv50->constbuf_dirty[s] |= 1 << i;
 
@@ -1000,6 +1012,7 @@ nv50_set_vertex_buffers(struct pipe_context *pipe,
if (!vb) {
   nv50->vbo_user &= ~(((1ull << count) - 1) << start_slot);
   nv50->vbo_constant &= ~(((1ull << count) - 1) << start_slot);
+  nv50->vtxbufs_coherent &= ~(((1ull << count) - 1) << start_slot);
   return;
}
 
@@ -1012,9 +1025,15 @@ nv50_set_vertex_buffers(struct pipe_context *pipe,
 nv50->vbo_constant |= 1 << dst_index;
  else
 nv50->vbo_constant &= ~(1 << dst_index);
+ nv50->vtxbufs_coherent &= ~(1 << dst_index);
   } else {
  nv50->vbo_user &= ~(1 << dst_index);
  nv50->vbo_constant &= ~(1 << dst_index);
+
+ if (vb[i].buffer->flags & PIPE_RESOURCE_FLAG_MAP_COHERENT)
+nv50->vtxbufs_coherent |= (1 << dst_index);
+ else
+nv50->vtxbufs_coherent &= ~(1 << dst_index);
   }
}
 
diff --git a/src/gallium/drivers/nouveau/nv50/nv50_vbo.c 
b/src/gallium/drivers/nouveau/nv50/nv50_vbo.c
index 85878d5..b6ba803 100644
--- a/src/gallium/drivers/nouveau/nv50/nv50_vbo.c
+++ b/src/gallium/drivers/nouveau/nv50/nv50_vbo.c
@@ -761,8 +761,7 @@ nv50_draw_vbo(struct pipe_context *pipe, const struct 
pipe_draw_info *info)
 {
struct nv50_context *nv50 = nv50_context(pipe);
struct nouveau_pushbuf *push = nv50->base.pushbuf;
-   bool tex_dirty = false;
-   int i, s;
+   int s;
 
/* NOTE: caller must ensure that (min_index + index_bias) is >= 0 */
nv50->vb_elt_first = info->min_index + info->index_bias;
@@ -791,27 +790,9 @@ nv50_draw_vbo(struct pipe_context *pipe, const struct 
pipe_draw_info *info)
 
push->kick_notify = 

Re: [Mesa-dev] [PATCH v2] nv50, nvc0: optimize coherent buffer checking at draw time

2015-12-09 Thread Ilia Mirkin
On Wed, Dec 9, 2015 at 5:40 PM, Samuel Pitoiset
 wrote:
> Instead of iterating over all the buffer resources looking for coherent
> buffers, we keep track of a context-wide count. This will save some
> iterations (and CPU cycles) in 99.99% case because usually coherent
> buffers are not so used.
>
> Changes from v2:
>  - forgot to apply some changes for nv50 (texture/vertex bufs)
>
> Signed-off-by: Samuel Pitoiset 
> ---
>  src/gallium/drivers/nouveau/nv50/nv50_context.h |  3 ++
>  src/gallium/drivers/nouveau/nv50/nv50_state.c   | 19 +++
>  src/gallium/drivers/nouveau/nv50/nv50_vbo.c | 44 
> +++--
>  src/gallium/drivers/nouveau/nvc0/nvc0_context.h |  3 ++
>  src/gallium/drivers/nouveau/nvc0/nvc0_state.c   | 26 +++
>  src/gallium/drivers/nouveau/nvc0/nvc0_vbo.c | 41 +--
>  6 files changed, 64 insertions(+), 72 deletions(-)
>
> diff --git a/src/gallium/drivers/nouveau/nv50/nv50_context.h 
> b/src/gallium/drivers/nouveau/nv50/nv50_context.h
> index 2cebcd9..712d00e 100644
> --- a/src/gallium/drivers/nouveau/nv50/nv50_context.h
> +++ b/src/gallium/drivers/nouveau/nv50/nv50_context.h
> @@ -134,9 +134,11 @@ struct nv50_context {
> struct nv50_constbuf constbuf[3][NV50_MAX_PIPE_CONSTBUFS];
> uint16_t constbuf_dirty[3];
> uint16_t constbuf_valid[3];
> +   uint16_t constbuf_coherent[3];
>
> struct pipe_vertex_buffer vtxbuf[PIPE_MAX_ATTRIBS];
> unsigned num_vtxbufs;
> +   uint32_t vtxbufs_coherent;
> struct pipe_index_buffer idxbuf;
> uint32_t vbo_fifo; /* bitmask of vertex elements to be pushed to FIFO */
> uint32_t vbo_user; /* bitmask of vertex buffers pointing to user memory */
> @@ -148,6 +150,7 @@ struct nv50_context {
>
> struct pipe_sampler_view *textures[3][PIPE_MAX_SAMPLERS];
> unsigned num_textures[3];
> +   uint32_t textures_coherent[3];
> struct nv50_tsc_entry *samplers[3][PIPE_MAX_SAMPLERS];
> unsigned num_samplers[3];
>
> diff --git a/src/gallium/drivers/nouveau/nv50/nv50_state.c 
> b/src/gallium/drivers/nouveau/nv50/nv50_state.c
> index fd7c7cd..b6e5c75 100644
> --- a/src/gallium/drivers/nouveau/nv50/nv50_state.c
> +++ b/src/gallium/drivers/nouveau/nv50/nv50_state.c
> @@ -661,9 +661,16 @@ nv50_stage_set_sampler_views(struct nv50_context *nv50, 
> int s,
> assert(nr <= PIPE_MAX_SAMPLERS);
> for (i = 0; i < nr; ++i) {
>struct nv50_tic_entry *old = nv50_tic_entry(nv50->textures[s][i]);
> +  struct pipe_resource *res = views[i]->texture;

I'm moderately sure either views[i] or texture can be null. [Same for nvc0.]

>if (old)
>   nv50_screen_tic_unlock(nv50->screen, old);
>
> +  if (res->target == PIPE_BUFFER &&
> +  (res->flags & PIPE_RESOURCE_FLAG_MAP_COHERENT))
> + nv50->textures_coherent[s] |= 1 << i;
> +  else
> + nv50->textures_coherent[s] &= ~(1 << i);
> +
>pipe_sampler_view_reference(>textures[s][i], views[i]);
> }
>
> @@ -852,8 +859,13 @@ nv50_set_constant_buffer(struct pipe_context *pipe, uint 
> shader, uint index,
>nv50->constbuf[s][i].offset = cb->buffer_offset;
>nv50->constbuf[s][i].size = MIN2(align(cb->buffer_size, 0x100), 
> 0x1);
>nv50->constbuf_valid[s] |= 1 << i;
> +  if (res->flags & PIPE_RESOURCE_FLAG_MAP_COHERENT)
> + nv50->constbuf_coherent[s] |= 1 << i;
> +  else
> + nv50->constbuf_coherent[s] &= ~(1 << i);

You also need to clear it out in the user case.

> } else {
>nv50->constbuf_valid[s] &= ~(1 << i);
> +  nv50->constbuf_coherent[s] &= ~(1 << i);
> }
> nv50->constbuf_dirty[s] |= 1 << i;
>
> @@ -1000,6 +1012,7 @@ nv50_set_vertex_buffers(struct pipe_context *pipe,
> if (!vb) {
>nv50->vbo_user &= ~(((1ull << count) - 1) << start_slot);
>nv50->vbo_constant &= ~(((1ull << count) - 1) << start_slot);
> +  nv50->vtxbufs_coherent &= ~(((1ull << count) - 1) << start_slot);
>return;
> }
>
> @@ -1012,9 +1025,15 @@ nv50_set_vertex_buffers(struct pipe_context *pipe,
>  nv50->vbo_constant |= 1 << dst_index;
>   else
>  nv50->vbo_constant &= ~(1 << dst_index);
> + nv50->vtxbufs_coherent &= ~(1 << dst_index);
>} else {
>   nv50->vbo_user &= ~(1 << dst_index);
>   nv50->vbo_constant &= ~(1 << dst_index);
> +
> + if (vb[i].buffer->flags & PIPE_RESOURCE_FLAG_MAP_COHERENT)

I wonder if vb[i].buffer might be null here... Not sure if that's
allowed or not...

> +nv50->vtxbufs_coherent |= (1 << dst_index);
> + else
> +nv50->vtxbufs_coherent &= ~(1 << dst_index);
>}
> }
>
> diff --git a/src/gallium/drivers/nouveau/nv50/nv50_vbo.c 
> b/src/gallium/drivers/nouveau/nv50/nv50_vbo.c
> index 85878d5..b6ba803 100644
> --- a/src/gallium/drivers/nouveau/nv50/nv50_vbo.c
> +++ b/src/gallium/drivers/nouveau/nv50/nv50_vbo.c
> @@ -761,8 +761,7