Re: [Mesa-dev] [PATCH v2] nv50, nvc0: optimize coherent buffer checking at draw time
On Wed, Dec 9, 2015 at 5:40 PM, Samuel Pitoiset wrote: > Instead of iterating over all the buffer resources looking for coherent > buffers, we keep track of a context-wide count. This will save some > iterations (and CPU cycles) in 99.99% case because usually coherent > buffers are not so used. > > Changes from v2: > - forgot to apply some changes for nv50 (texture/vertex bufs) > > Signed-off-by: Samuel Pitoiset > --- > src/gallium/drivers/nouveau/nv50/nv50_context.h | 3 ++ > src/gallium/drivers/nouveau/nv50/nv50_state.c | 19 +++ > src/gallium/drivers/nouveau/nv50/nv50_vbo.c | 44 > +++-- > src/gallium/drivers/nouveau/nvc0/nvc0_context.h | 3 ++ > src/gallium/drivers/nouveau/nvc0/nvc0_state.c | 26 +++ > src/gallium/drivers/nouveau/nvc0/nvc0_vbo.c | 41 +-- > 6 files changed, 64 insertions(+), 72 deletions(-) > > diff --git a/src/gallium/drivers/nouveau/nv50/nv50_context.h > b/src/gallium/drivers/nouveau/nv50/nv50_context.h > index 2cebcd9..712d00e 100644 > --- a/src/gallium/drivers/nouveau/nv50/nv50_context.h > +++ b/src/gallium/drivers/nouveau/nv50/nv50_context.h > @@ -134,9 +134,11 @@ struct nv50_context { > struct nv50_constbuf constbuf[3][NV50_MAX_PIPE_CONSTBUFS]; > uint16_t constbuf_dirty[3]; > uint16_t constbuf_valid[3]; > + uint16_t constbuf_coherent[3]; > > struct pipe_vertex_buffer vtxbuf[PIPE_MAX_ATTRIBS]; > unsigned num_vtxbufs; > + uint32_t vtxbufs_coherent; > struct pipe_index_buffer idxbuf; > uint32_t vbo_fifo; /* bitmask of vertex elements to be pushed to FIFO */ > uint32_t vbo_user; /* bitmask of vertex buffers pointing to user memory */ > @@ -148,6 +150,7 @@ struct nv50_context { > > struct pipe_sampler_view *textures[3][PIPE_MAX_SAMPLERS]; > unsigned num_textures[3]; > + uint32_t textures_coherent[3]; > struct nv50_tsc_entry *samplers[3][PIPE_MAX_SAMPLERS]; > unsigned num_samplers[3]; > > diff --git a/src/gallium/drivers/nouveau/nv50/nv50_state.c > b/src/gallium/drivers/nouveau/nv50/nv50_state.c > index fd7c7cd..b6e5c75 100644 > --- a/src/gallium/drivers/nouveau/nv50/nv50_state.c > +++ b/src/gallium/drivers/nouveau/nv50/nv50_state.c > @@ -661,9 +661,16 @@ nv50_stage_set_sampler_views(struct nv50_context *nv50, > int s, > assert(nr <= PIPE_MAX_SAMPLERS); > for (i = 0; i < nr; ++i) { >struct nv50_tic_entry *old = nv50_tic_entry(nv50->textures[s][i]); > + struct pipe_resource *res = views[i]->texture; I'm moderately sure either views[i] or texture can be null. [Same for nvc0.] >if (old) > nv50_screen_tic_unlock(nv50->screen, old); > > + if (res->target == PIPE_BUFFER && > + (res->flags & PIPE_RESOURCE_FLAG_MAP_COHERENT)) > + nv50->textures_coherent[s] |= 1 << i; > + else > + nv50->textures_coherent[s] &= ~(1 << i); > + >pipe_sampler_view_reference(&nv50->textures[s][i], views[i]); > } > > @@ -852,8 +859,13 @@ nv50_set_constant_buffer(struct pipe_context *pipe, uint > shader, uint index, >nv50->constbuf[s][i].offset = cb->buffer_offset; >nv50->constbuf[s][i].size = MIN2(align(cb->buffer_size, 0x100), > 0x1); >nv50->constbuf_valid[s] |= 1 << i; > + if (res->flags & PIPE_RESOURCE_FLAG_MAP_COHERENT) > + nv50->constbuf_coherent[s] |= 1 << i; > + else > + nv50->constbuf_coherent[s] &= ~(1 << i); You also need to clear it out in the user case. > } else { >nv50->constbuf_valid[s] &= ~(1 << i); > + nv50->constbuf_coherent[s] &= ~(1 << i); > } > nv50->constbuf_dirty[s] |= 1 << i; > > @@ -1000,6 +1012,7 @@ nv50_set_vertex_buffers(struct pipe_context *pipe, > if (!vb) { >nv50->vbo_user &= ~(((1ull << count) - 1) << start_slot); >nv50->vbo_constant &= ~(((1ull << count) - 1) << start_slot); > + nv50->vtxbufs_coherent &= ~(((1ull << count) - 1) << start_slot); >return; > } > > @@ -1012,9 +1025,15 @@ nv50_set_vertex_buffers(struct pipe_context *pipe, > nv50->vbo_constant |= 1 << dst_index; > else > nv50->vbo_constant &= ~(1 << dst_index); > + nv50->vtxbufs_coherent &= ~(1 << dst_index); >} else { > nv50->vbo_user &= ~(1 << dst_index); > nv50->vbo_constant &= ~(1 << dst_index); > + > + if (vb[i].buffer->flags & PIPE_RESOURCE_FLAG_MAP_COHERENT) I wonder if vb[i].buffer might be null here... Not sure if that's allowed or not... > +nv50->vtxbufs_coherent |= (1 << dst_index); > + else > +nv50->vtxbufs_coherent &= ~(1 << dst_index); >} > } > > diff --git a/src/gallium/drivers/nouveau/nv50/nv50_vbo.c > b/src/gallium/drivers/nouveau/nv50/nv50_vbo.c > index 85878d5..b6ba803 100644 > --- a/src/gallium/drivers/nouveau/nv50/nv50_vbo.c > +++ b/src/gallium/drivers/nouveau/nv50/nv50_vbo.c > @@ -761,8 +761,7 @@ nv50_draw_vbo(struct pipe_context *pipe, const
[Mesa-dev] [PATCH v2] nv50, nvc0: optimize coherent buffer checking at draw time
Instead of iterating over all the buffer resources looking for coherent buffers, we keep track of a context-wide count. This will save some iterations (and CPU cycles) in 99.99% case because usually coherent buffers are not so used. Changes from v2: - forgot to apply some changes for nv50 (texture/vertex bufs) Signed-off-by: Samuel Pitoiset --- src/gallium/drivers/nouveau/nv50/nv50_context.h | 3 ++ src/gallium/drivers/nouveau/nv50/nv50_state.c | 19 +++ src/gallium/drivers/nouveau/nv50/nv50_vbo.c | 44 +++-- src/gallium/drivers/nouveau/nvc0/nvc0_context.h | 3 ++ src/gallium/drivers/nouveau/nvc0/nvc0_state.c | 26 +++ src/gallium/drivers/nouveau/nvc0/nvc0_vbo.c | 41 +-- 6 files changed, 64 insertions(+), 72 deletions(-) diff --git a/src/gallium/drivers/nouveau/nv50/nv50_context.h b/src/gallium/drivers/nouveau/nv50/nv50_context.h index 2cebcd9..712d00e 100644 --- a/src/gallium/drivers/nouveau/nv50/nv50_context.h +++ b/src/gallium/drivers/nouveau/nv50/nv50_context.h @@ -134,9 +134,11 @@ struct nv50_context { struct nv50_constbuf constbuf[3][NV50_MAX_PIPE_CONSTBUFS]; uint16_t constbuf_dirty[3]; uint16_t constbuf_valid[3]; + uint16_t constbuf_coherent[3]; struct pipe_vertex_buffer vtxbuf[PIPE_MAX_ATTRIBS]; unsigned num_vtxbufs; + uint32_t vtxbufs_coherent; struct pipe_index_buffer idxbuf; uint32_t vbo_fifo; /* bitmask of vertex elements to be pushed to FIFO */ uint32_t vbo_user; /* bitmask of vertex buffers pointing to user memory */ @@ -148,6 +150,7 @@ struct nv50_context { struct pipe_sampler_view *textures[3][PIPE_MAX_SAMPLERS]; unsigned num_textures[3]; + uint32_t textures_coherent[3]; struct nv50_tsc_entry *samplers[3][PIPE_MAX_SAMPLERS]; unsigned num_samplers[3]; diff --git a/src/gallium/drivers/nouveau/nv50/nv50_state.c b/src/gallium/drivers/nouveau/nv50/nv50_state.c index fd7c7cd..b6e5c75 100644 --- a/src/gallium/drivers/nouveau/nv50/nv50_state.c +++ b/src/gallium/drivers/nouveau/nv50/nv50_state.c @@ -661,9 +661,16 @@ nv50_stage_set_sampler_views(struct nv50_context *nv50, int s, assert(nr <= PIPE_MAX_SAMPLERS); for (i = 0; i < nr; ++i) { struct nv50_tic_entry *old = nv50_tic_entry(nv50->textures[s][i]); + struct pipe_resource *res = views[i]->texture; if (old) nv50_screen_tic_unlock(nv50->screen, old); + if (res->target == PIPE_BUFFER && + (res->flags & PIPE_RESOURCE_FLAG_MAP_COHERENT)) + nv50->textures_coherent[s] |= 1 << i; + else + nv50->textures_coherent[s] &= ~(1 << i); + pipe_sampler_view_reference(&nv50->textures[s][i], views[i]); } @@ -852,8 +859,13 @@ nv50_set_constant_buffer(struct pipe_context *pipe, uint shader, uint index, nv50->constbuf[s][i].offset = cb->buffer_offset; nv50->constbuf[s][i].size = MIN2(align(cb->buffer_size, 0x100), 0x1); nv50->constbuf_valid[s] |= 1 << i; + if (res->flags & PIPE_RESOURCE_FLAG_MAP_COHERENT) + nv50->constbuf_coherent[s] |= 1 << i; + else + nv50->constbuf_coherent[s] &= ~(1 << i); } else { nv50->constbuf_valid[s] &= ~(1 << i); + nv50->constbuf_coherent[s] &= ~(1 << i); } nv50->constbuf_dirty[s] |= 1 << i; @@ -1000,6 +1012,7 @@ nv50_set_vertex_buffers(struct pipe_context *pipe, if (!vb) { nv50->vbo_user &= ~(((1ull << count) - 1) << start_slot); nv50->vbo_constant &= ~(((1ull << count) - 1) << start_slot); + nv50->vtxbufs_coherent &= ~(((1ull << count) - 1) << start_slot); return; } @@ -1012,9 +1025,15 @@ nv50_set_vertex_buffers(struct pipe_context *pipe, nv50->vbo_constant |= 1 << dst_index; else nv50->vbo_constant &= ~(1 << dst_index); + nv50->vtxbufs_coherent &= ~(1 << dst_index); } else { nv50->vbo_user &= ~(1 << dst_index); nv50->vbo_constant &= ~(1 << dst_index); + + if (vb[i].buffer->flags & PIPE_RESOURCE_FLAG_MAP_COHERENT) +nv50->vtxbufs_coherent |= (1 << dst_index); + else +nv50->vtxbufs_coherent &= ~(1 << dst_index); } } diff --git a/src/gallium/drivers/nouveau/nv50/nv50_vbo.c b/src/gallium/drivers/nouveau/nv50/nv50_vbo.c index 85878d5..b6ba803 100644 --- a/src/gallium/drivers/nouveau/nv50/nv50_vbo.c +++ b/src/gallium/drivers/nouveau/nv50/nv50_vbo.c @@ -761,8 +761,7 @@ nv50_draw_vbo(struct pipe_context *pipe, const struct pipe_draw_info *info) { struct nv50_context *nv50 = nv50_context(pipe); struct nouveau_pushbuf *push = nv50->base.pushbuf; - bool tex_dirty = false; - int i, s; + int s; /* NOTE: caller must ensure that (min_index + index_bias) is >= 0 */ nv50->vb_elt_first = info->min_index + info->index_bias; @@ -791,27 +790,9 @@ nv50_draw_vbo(struct pipe_context *pipe, const struct pipe_draw_info *info) push->kick_notify = nv50_draw_vbo_kick_notify;