Reviewed-by: Pierre Moreau <pierre.mor...@free.fr>
On 06:24 PM - Oct 28 2015, Samuel Pitoiset wrote: > Changes since v2: > - remove unused 'nv50_hw_query_funcs' struct > > Signed-off-by: Samuel Pitoiset <samuel.pitoi...@gmail.com> > --- > src/gallium/drivers/nouveau/Makefile.sources | 2 + > src/gallium/drivers/nouveau/nv50/nv50_query.c | 354 ++----------------- > src/gallium/drivers/nouveau/nv50/nv50_query.h | 26 +- > src/gallium/drivers/nouveau/nv50/nv50_query_hw.c | 389 > +++++++++++++++++++++ > src/gallium/drivers/nouveau/nv50/nv50_query_hw.h | 39 +++ > .../drivers/nouveau/nv50/nv50_shader_state.c | 7 +- > src/gallium/drivers/nouveau/nv50/nv50_state.c | 3 +- > src/gallium/drivers/nouveau/nv50/nv50_vbo.c | 5 +- > 8 files changed, 476 insertions(+), 349 deletions(-) > create mode 100644 src/gallium/drivers/nouveau/nv50/nv50_query_hw.c > create mode 100644 src/gallium/drivers/nouveau/nv50/nv50_query_hw.h > > diff --git a/src/gallium/drivers/nouveau/Makefile.sources > b/src/gallium/drivers/nouveau/Makefile.sources > index 06d9d97..83f8113 100644 > --- a/src/gallium/drivers/nouveau/Makefile.sources > +++ b/src/gallium/drivers/nouveau/Makefile.sources > @@ -74,6 +74,8 @@ NV50_C_SOURCES := \ > nv50/nv50_push.c \ > nv50/nv50_query.c \ > nv50/nv50_query.h \ > + nv50/nv50_query_hw.c \ > + nv50/nv50_query_hw.h \ > nv50/nv50_resource.c \ > nv50/nv50_resource.h \ > nv50/nv50_screen.c \ > diff --git a/src/gallium/drivers/nouveau/nv50/nv50_query.c > b/src/gallium/drivers/nouveau/nv50/nv50_query.c > index 1b4abdb..dd9b85b 100644 > --- a/src/gallium/drivers/nouveau/nv50/nv50_query.c > +++ b/src/gallium/drivers/nouveau/nv50/nv50_query.c > @@ -26,334 +26,45 @@ > > #include "nv50/nv50_context.h" > #include "nv50/nv50_query.h" > -#include "nv_object.xml.h" > - > -#define NV50_QUERY_STATE_READY 0 > -#define NV50_QUERY_STATE_ACTIVE 1 > -#define NV50_QUERY_STATE_ENDED 2 > -#define NV50_QUERY_STATE_FLUSHED 3 > - > -/* XXX: Nested queries, and simultaneous queries on multiple gallium contexts > - * (since we use only a single GPU channel per screen) will not work > properly. > - * > - * The first is not that big of an issue because OpenGL does not allow nested > - * queries anyway. > - */ > - > -#define NV50_QUERY_ALLOC_SPACE 256 > - > -static bool > -nv50_query_allocate(struct nv50_context *nv50, struct nv50_query *q, int > size) > -{ > - struct nv50_screen *screen = nv50->screen; > - int ret; > - > - if (q->bo) { > - nouveau_bo_ref(NULL, &q->bo); > - if (q->mm) { > - if (q->state == NV50_QUERY_STATE_READY) > - nouveau_mm_free(q->mm); > - else > - nouveau_fence_work(screen->base.fence.current, > nouveau_mm_free_work, > - q->mm); > - } > - } > - if (size) { > - q->mm = nouveau_mm_allocate(screen->base.mm_GART, size, &q->bo, > &q->base); > - if (!q->bo) > - return false; > - q->offset = q->base; > - > - ret = nouveau_bo_map(q->bo, 0, screen->base.client); > - if (ret) { > - nv50_query_allocate(nv50, q, 0); > - return false; > - } > - q->data = (uint32_t *)((uint8_t *)q->bo->map + q->base); > - } > - return true; > -} > - > -static void > -nv50_query_destroy(struct pipe_context *pipe, struct pipe_query *pq) > -{ > - nv50_query_allocate(nv50_context(pipe), nv50_query(pq), 0); > - nouveau_fence_ref(NULL, &nv50_query(pq)->fence); > - FREE(nv50_query(pq)); > -} > +#include "nv50/nv50_query_hw.h" > > static struct pipe_query * > -nv50_query_create(struct pipe_context *pipe, unsigned type, unsigned index) > +nv50_create_query(struct pipe_context *pipe, unsigned type, unsigned index) > { > struct nv50_context *nv50 = nv50_context(pipe); > struct nv50_query *q; > > - q = CALLOC_STRUCT(nv50_query); > - if (!q) > - return NULL; > - > - if (!nv50_query_allocate(nv50, q, NV50_QUERY_ALLOC_SPACE)) { > - FREE(q); > - return NULL; > - } > - > - q->is64bit = (type == PIPE_QUERY_PRIMITIVES_GENERATED || > - type == PIPE_QUERY_PRIMITIVES_EMITTED || > - type == PIPE_QUERY_SO_STATISTICS || > - type == PIPE_QUERY_PIPELINE_STATISTICS); > - q->type = type; > - > - if (q->type == PIPE_QUERY_OCCLUSION_COUNTER) { > - q->offset -= 32; > - q->data -= 32 / sizeof(*q->data); /* we advance before query_begin ! */ > - } > - > + q = nv50_hw_create_query(nv50, type, index); > return (struct pipe_query *)q; > } > > static void > -nv50_query_get(struct nouveau_pushbuf *push, struct nv50_query *q, > - unsigned offset, uint32_t get) > +nv50_destroy_query(struct pipe_context *pipe, struct pipe_query *pq) > { > - offset += q->offset; > - > - PUSH_SPACE(push, 5); > - PUSH_REFN (push, q->bo, NOUVEAU_BO_GART | NOUVEAU_BO_WR); > - BEGIN_NV04(push, NV50_3D(QUERY_ADDRESS_HIGH), 4); > - PUSH_DATAh(push, q->bo->offset + offset); > - PUSH_DATA (push, q->bo->offset + offset); > - PUSH_DATA (push, q->sequence); > - PUSH_DATA (push, get); > + struct nv50_query *q = nv50_query(pq); > + q->funcs->destroy_query(nv50_context(pipe), q); > } > > static boolean > -nv50_query_begin(struct pipe_context *pipe, struct pipe_query *pq) > +nv50_begin_query(struct pipe_context *pipe, struct pipe_query *pq) > { > - struct nv50_context *nv50 = nv50_context(pipe); > - struct nouveau_pushbuf *push = nv50->base.pushbuf; > struct nv50_query *q = nv50_query(pq); > - > - /* For occlusion queries we have to change the storage, because a previous > - * query might set the initial render conition to false even *after* we > re- > - * initialized it to true. > - */ > - if (q->type == PIPE_QUERY_OCCLUSION_COUNTER) { > - q->offset += 32; > - q->data += 32 / sizeof(*q->data); > - if (q->offset - q->base == NV50_QUERY_ALLOC_SPACE) > - nv50_query_allocate(nv50, q, NV50_QUERY_ALLOC_SPACE); > - > - /* XXX: can we do this with the GPU, and sync with respect to a > previous > - * query ? > - */ > - q->data[0] = q->sequence; /* initialize sequence */ > - q->data[1] = 1; /* initial render condition = true */ > - q->data[4] = q->sequence + 1; /* for comparison COND_MODE */ > - q->data[5] = 0; > - } > - if (!q->is64bit) > - q->data[0] = q->sequence++; /* the previously used one */ > - > - switch (q->type) { > - case PIPE_QUERY_OCCLUSION_COUNTER: > - q->nesting = nv50->screen->num_occlusion_queries_active++; > - if (q->nesting) { > - nv50_query_get(push, q, 0x10, 0x0100f002); > - } else { > - PUSH_SPACE(push, 4); > - BEGIN_NV04(push, NV50_3D(COUNTER_RESET), 1); > - PUSH_DATA (push, NV50_3D_COUNTER_RESET_SAMPLECNT); > - BEGIN_NV04(push, NV50_3D(SAMPLECNT_ENABLE), 1); > - PUSH_DATA (push, 1); > - } > - break; > - case PIPE_QUERY_PRIMITIVES_GENERATED: > - nv50_query_get(push, q, 0x10, 0x06805002); > - break; > - case PIPE_QUERY_PRIMITIVES_EMITTED: > - nv50_query_get(push, q, 0x10, 0x05805002); > - break; > - case PIPE_QUERY_SO_STATISTICS: > - nv50_query_get(push, q, 0x20, 0x05805002); > - nv50_query_get(push, q, 0x30, 0x06805002); > - break; > - case PIPE_QUERY_PIPELINE_STATISTICS: > - nv50_query_get(push, q, 0x80, 0x00801002); /* VFETCH, VERTICES */ > - nv50_query_get(push, q, 0x90, 0x01801002); /* VFETCH, PRIMS */ > - nv50_query_get(push, q, 0xa0, 0x02802002); /* VP, LAUNCHES */ > - nv50_query_get(push, q, 0xb0, 0x03806002); /* GP, LAUNCHES */ > - nv50_query_get(push, q, 0xc0, 0x04806002); /* GP, PRIMS_OUT */ > - nv50_query_get(push, q, 0xd0, 0x07804002); /* RAST, PRIMS_IN */ > - nv50_query_get(push, q, 0xe0, 0x08804002); /* RAST, PRIMS_OUT */ > - nv50_query_get(push, q, 0xf0, 0x0980a002); /* ROP, PIXELS */ > - break; > - case PIPE_QUERY_TIME_ELAPSED: > - nv50_query_get(push, q, 0x10, 0x00005002); > - break; > - default: > - break; > - } > - q->state = NV50_QUERY_STATE_ACTIVE; > - return true; > + return q->funcs->begin_query(nv50_context(pipe), q); > } > > static void > -nv50_query_end(struct pipe_context *pipe, struct pipe_query *pq) > +nv50_end_query(struct pipe_context *pipe, struct pipe_query *pq) > { > - struct nv50_context *nv50 = nv50_context(pipe); > - struct nouveau_pushbuf *push = nv50->base.pushbuf; > struct nv50_query *q = nv50_query(pq); > - > - q->state = NV50_QUERY_STATE_ENDED; > - > - switch (q->type) { > - case PIPE_QUERY_OCCLUSION_COUNTER: > - nv50_query_get(push, q, 0, 0x0100f002); > - if (--nv50->screen->num_occlusion_queries_active == 0) { > - PUSH_SPACE(push, 2); > - BEGIN_NV04(push, NV50_3D(SAMPLECNT_ENABLE), 1); > - PUSH_DATA (push, 0); > - } > - break; > - case PIPE_QUERY_PRIMITIVES_GENERATED: > - nv50_query_get(push, q, 0, 0x06805002); > - break; > - case PIPE_QUERY_PRIMITIVES_EMITTED: > - nv50_query_get(push, q, 0, 0x05805002); > - break; > - case PIPE_QUERY_SO_STATISTICS: > - nv50_query_get(push, q, 0x00, 0x05805002); > - nv50_query_get(push, q, 0x10, 0x06805002); > - break; > - case PIPE_QUERY_PIPELINE_STATISTICS: > - nv50_query_get(push, q, 0x00, 0x00801002); /* VFETCH, VERTICES */ > - nv50_query_get(push, q, 0x10, 0x01801002); /* VFETCH, PRIMS */ > - nv50_query_get(push, q, 0x20, 0x02802002); /* VP, LAUNCHES */ > - nv50_query_get(push, q, 0x30, 0x03806002); /* GP, LAUNCHES */ > - nv50_query_get(push, q, 0x40, 0x04806002); /* GP, PRIMS_OUT */ > - nv50_query_get(push, q, 0x50, 0x07804002); /* RAST, PRIMS_IN */ > - nv50_query_get(push, q, 0x60, 0x08804002); /* RAST, PRIMS_OUT */ > - nv50_query_get(push, q, 0x70, 0x0980a002); /* ROP, PIXELS */ > - break; > - case PIPE_QUERY_TIMESTAMP: > - q->sequence++; > - /* fall through */ > - case PIPE_QUERY_TIME_ELAPSED: > - nv50_query_get(push, q, 0, 0x00005002); > - break; > - case PIPE_QUERY_GPU_FINISHED: > - q->sequence++; > - nv50_query_get(push, q, 0, 0x1000f010); > - break; > - case NVA0_QUERY_STREAM_OUTPUT_BUFFER_OFFSET: > - q->sequence++; > - nv50_query_get(push, q, 0, 0x0d005002 | (q->index << 5)); > - break; > - case PIPE_QUERY_TIMESTAMP_DISJOINT: > - /* This query is not issued on GPU because disjoint is forced to false > */ > - q->state = NV50_QUERY_STATE_READY; > - break; > - default: > - assert(0); > - break; > - } > - > - if (q->is64bit) > - nouveau_fence_ref(nv50->screen->base.fence.current, &q->fence); > -} > - > -static inline void > -nv50_query_update(struct nv50_query *q) > -{ > - if (q->is64bit) { > - if (nouveau_fence_signalled(q->fence)) > - q->state = NV50_QUERY_STATE_READY; > - } else { > - if (q->data[0] == q->sequence) > - q->state = NV50_QUERY_STATE_READY; > - } > + q->funcs->end_query(nv50_context(pipe), q); > } > > static boolean > -nv50_query_result(struct pipe_context *pipe, struct pipe_query *pq, > - boolean wait, union pipe_query_result *result) > +nv50_get_query_result(struct pipe_context *pipe, struct pipe_query *pq, > + boolean wait, union pipe_query_result *result) > { > - struct nv50_context *nv50 = nv50_context(pipe); > struct nv50_query *q = nv50_query(pq); > - uint64_t *res64 = (uint64_t *)result; > - uint32_t *res32 = (uint32_t *)result; > - uint8_t *res8 = (uint8_t *)result; > - uint64_t *data64 = (uint64_t *)q->data; > - int i; > - > - if (q->state != NV50_QUERY_STATE_READY) > - nv50_query_update(q); > - > - if (q->state != NV50_QUERY_STATE_READY) { > - if (!wait) { > - /* for broken apps that spin on GL_QUERY_RESULT_AVAILABLE */ > - if (q->state != NV50_QUERY_STATE_FLUSHED) { > - q->state = NV50_QUERY_STATE_FLUSHED; > - PUSH_KICK(nv50->base.pushbuf); > - } > - return false; > - } > - if (nouveau_bo_wait(q->bo, NOUVEAU_BO_RD, nv50->screen->base.client)) > - return false; > - } > - q->state = NV50_QUERY_STATE_READY; > - > - switch (q->type) { > - case PIPE_QUERY_GPU_FINISHED: > - res8[0] = true; > - break; > - case PIPE_QUERY_OCCLUSION_COUNTER: /* u32 sequence, u32 count, u64 time */ > - res64[0] = q->data[1] - q->data[5]; > - break; > - case PIPE_QUERY_PRIMITIVES_GENERATED: /* u64 count, u64 time */ > - case PIPE_QUERY_PRIMITIVES_EMITTED: /* u64 count, u64 time */ > - res64[0] = data64[0] - data64[2]; > - break; > - case PIPE_QUERY_SO_STATISTICS: > - res64[0] = data64[0] - data64[4]; > - res64[1] = data64[2] - data64[6]; > - break; > - case PIPE_QUERY_PIPELINE_STATISTICS: > - for (i = 0; i < 8; ++i) > - res64[i] = data64[i * 2] - data64[16 + i * 2]; > - break; > - case PIPE_QUERY_TIMESTAMP: > - res64[0] = data64[1]; > - break; > - case PIPE_QUERY_TIMESTAMP_DISJOINT: > - res64[0] = 1000000000; > - res8[8] = false; > - break; > - case PIPE_QUERY_TIME_ELAPSED: > - res64[0] = data64[1] - data64[3]; > - break; > - case NVA0_QUERY_STREAM_OUTPUT_BUFFER_OFFSET: > - res32[0] = q->data[1]; > - break; > - default: > - return false; > - } > - > - return true; > -} > - > -void > -nv84_query_fifo_wait(struct nouveau_pushbuf *push, struct nv50_query *q) > -{ > - unsigned offset = q->offset; > - > - PUSH_SPACE(push, 5); > - PUSH_REFN (push, q->bo, NOUVEAU_BO_GART | NOUVEAU_BO_RD); > - BEGIN_NV04(push, SUBC_3D(NV84_SUBCHAN_SEMAPHORE_ADDRESS_HIGH), 4); > - PUSH_DATAh(push, q->bo->offset + offset); > - PUSH_DATA (push, q->bo->offset + offset); > - PUSH_DATA (push, q->sequence); > - PUSH_DATA (push, NV84_SUBCHAN_SEMAPHORE_TRIGGER_ACQUIRE_EQUAL); > + return q->funcs->get_query_result(nv50_context(pipe), q, wait, result); > } > > static void > @@ -363,7 +74,8 @@ nv50_render_condition(struct pipe_context *pipe, > { > struct nv50_context *nv50 = nv50_context(pipe); > struct nouveau_pushbuf *push = nv50->base.pushbuf; > - struct nv50_query *q; > + struct nv50_query *q = nv50_query(pq); > + struct nv50_hw_query *hq = nv50_hw_query(q); > uint32_t cond; > bool wait = > mode != PIPE_RENDER_COND_NO_WAIT && > @@ -373,7 +85,6 @@ nv50_render_condition(struct pipe_context *pipe, > cond = NV50_3D_COND_MODE_ALWAYS; > } > else { > - q = nv50_query(pq); > /* NOTE: comparison of 2 queries only works if both have completed */ > switch (q->type) { > case PIPE_QUERY_SO_OVERFLOW_PREDICATE: > @@ -384,7 +95,7 @@ nv50_render_condition(struct pipe_context *pipe, > case PIPE_QUERY_OCCLUSION_COUNTER: > case PIPE_QUERY_OCCLUSION_PREDICATE: > if (likely(!condition)) { > - if (unlikely(q->nesting)) > + if (unlikely(hq->nesting)) > cond = wait ? NV50_3D_COND_MODE_NOT_EQUAL : > NV50_3D_COND_MODE_ALWAYS; > else > @@ -419,28 +130,15 @@ nv50_render_condition(struct pipe_context *pipe, > PUSH_DATA (push, 0); > } > > - PUSH_REFN (push, q->bo, NOUVEAU_BO_GART | NOUVEAU_BO_RD); > + PUSH_REFN (push, hq->bo, NOUVEAU_BO_GART | NOUVEAU_BO_RD); > BEGIN_NV04(push, NV50_3D(COND_ADDRESS_HIGH), 3); > - PUSH_DATAh(push, q->bo->offset + q->offset); > - PUSH_DATA (push, q->bo->offset + q->offset); > + PUSH_DATAh(push, hq->bo->offset + hq->offset); > + PUSH_DATA (push, hq->bo->offset + hq->offset); > PUSH_DATA (push, cond); > > BEGIN_NV04(push, NV50_2D(COND_ADDRESS_HIGH), 2); > - PUSH_DATAh(push, q->bo->offset + q->offset); > - PUSH_DATA (push, q->bo->offset + q->offset); > -} > - > -void > -nv50_query_pushbuf_submit(struct nouveau_pushbuf *push, uint16_t method, > - struct nv50_query *q, unsigned result_offset) > -{ > - nv50_query_update(q); > - if (q->state != NV50_QUERY_STATE_READY) > - nouveau_bo_wait(q->bo, NOUVEAU_BO_RD, push->client); > - q->state = NV50_QUERY_STATE_READY; > - > - BEGIN_NV04(push, SUBC_3D(method), 1); > - PUSH_DATA (push, q->data[result_offset / 4]); > + PUSH_DATAh(push, hq->bo->offset + hq->offset); > + PUSH_DATA (push, hq->bo->offset + hq->offset); > } > > void > @@ -448,10 +146,10 @@ nv50_init_query_functions(struct nv50_context *nv50) > { > struct pipe_context *pipe = &nv50->base.pipe; > > - pipe->create_query = nv50_query_create; > - pipe->destroy_query = nv50_query_destroy; > - pipe->begin_query = nv50_query_begin; > - pipe->end_query = nv50_query_end; > - pipe->get_query_result = nv50_query_result; > + pipe->create_query = nv50_create_query; > + pipe->destroy_query = nv50_destroy_query; > + pipe->begin_query = nv50_begin_query; > + pipe->end_query = nv50_end_query; > + pipe->get_query_result = nv50_get_query_result; > pipe->render_condition = nv50_render_condition; > } > diff --git a/src/gallium/drivers/nouveau/nv50/nv50_query.h > b/src/gallium/drivers/nouveau/nv50/nv50_query.h > index a703013..d990285 100644 > --- a/src/gallium/drivers/nouveau/nv50/nv50_query.h > +++ b/src/gallium/drivers/nouveau/nv50/nv50_query.h > @@ -4,23 +4,22 @@ > #include "pipe/p_context.h" > > #include "nouveau_context.h" > -#include "nouveau_mm.h" > > -#define NVA0_QUERY_STREAM_OUTPUT_BUFFER_OFFSET (PIPE_QUERY_TYPES + 0) > +struct nv50_context; > +struct nv50_query; > + > +struct nv50_query_funcs { > + void (*destroy_query)(struct nv50_context *, struct nv50_query *); > + boolean (*begin_query)(struct nv50_context *, struct nv50_query *); > + void (*end_query)(struct nv50_context *, struct nv50_query *); > + boolean (*get_query_result)(struct nv50_context *, struct nv50_query *, > + boolean, union pipe_query_result *); > +}; > > struct nv50_query { > - uint32_t *data; > + const struct nv50_query_funcs *funcs; > uint16_t type; > uint16_t index; > - uint32_t sequence; > - struct nouveau_bo *bo; > - uint32_t base; > - uint32_t offset; /* base + i * 32 */ > - uint8_t state; > - bool is64bit; > - int nesting; /* only used for occlusion queries */ > - struct nouveau_mm_allocation *mm; > - struct nouveau_fence *fence; > }; > > static inline struct nv50_query * > @@ -30,8 +29,5 @@ nv50_query(struct pipe_query *pipe) > } > > void nv50_init_query_functions(struct nv50_context *); > -void nv50_query_pushbuf_submit(struct nouveau_pushbuf *, uint16_t, > - struct nv50_query *, unsigned result_offset); > -void nv84_query_fifo_wait(struct nouveau_pushbuf *, struct nv50_query *); > > #endif > diff --git a/src/gallium/drivers/nouveau/nv50/nv50_query_hw.c > b/src/gallium/drivers/nouveau/nv50/nv50_query_hw.c > new file mode 100644 > index 0000000..fcdd183 > --- /dev/null > +++ b/src/gallium/drivers/nouveau/nv50/nv50_query_hw.c > @@ -0,0 +1,389 @@ > +/* > + * Copyright 2011 Christoph Bumiller > + * Copyright 2015 Samuel Pitoiset > + * > + * Permission is hereby granted, free of charge, to any person obtaining a > + * copy of this software and associated documentation files (the "Software"), > + * to deal in the Software without restriction, including without limitation > + * the rights to use, copy, modify, merge, publish, distribute, sublicense, > + * and/or sell copies of the Software, and to permit persons to whom the > + * Software is furnished to do so, subject to the following conditions: > + * > + * The above copyright notice and this permission notice shall be included in > + * all copies or substantial portions of the Software. > + * > + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR > + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, > + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL > + * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR > + * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, > + * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR > + * OTHER DEALINGS IN THE SOFTWARE. > + */ > + > +#define NV50_PUSH_EXPLICIT_SPACE_CHECKING > + > +#include "nv50/nv50_context.h" > +#include "nv50/nv50_query_hw.h" > +#include "nv_object.xml.h" > + > +#define NV50_HW_QUERY_STATE_READY 0 > +#define NV50_HW_QUERY_STATE_ACTIVE 1 > +#define NV50_HW_QUERY_STATE_ENDED 2 > +#define NV50_HW_QUERY_STATE_FLUSHED 3 > + > +/* XXX: Nested queries, and simultaneous queries on multiple gallium contexts > + * (since we use only a single GPU channel per screen) will not work > properly. > + * > + * The first is not that big of an issue because OpenGL does not allow nested > + * queries anyway. > + */ > + > +#define NV50_HW_QUERY_ALLOC_SPACE 256 > + > +static bool > +nv50_hw_query_allocate(struct nv50_context *nv50, struct nv50_query *q, > + int size) > +{ > + struct nv50_screen *screen = nv50->screen; > + struct nv50_hw_query *hq = nv50_hw_query(q); > + int ret; > + > + if (hq->bo) { > + nouveau_bo_ref(NULL, &hq->bo); > + if (hq->mm) { > + if (hq->state == NV50_HW_QUERY_STATE_READY) > + nouveau_mm_free(hq->mm); > + else > + nouveau_fence_work(screen->base.fence.current, > + nouveau_mm_free_work, hq->mm); > + } > + } > + if (size) { > + hq->mm = nouveau_mm_allocate(screen->base.mm_GART, size, > + &hq->bo, &hq->base_offset); > + if (!hq->bo) > + return false; > + hq->offset = hq->base_offset; > + > + ret = nouveau_bo_map(hq->bo, 0, screen->base.client); > + if (ret) { > + nv50_hw_query_allocate(nv50, q, 0); > + return false; > + } > + hq->data = (uint32_t *)((uint8_t *)hq->bo->map + hq->base_offset); > + } > + return true; > +} > + > +static void > +nv50_hw_query_get(struct nouveau_pushbuf *push, struct nv50_query *q, > + unsigned offset, uint32_t get) > +{ > + struct nv50_hw_query *hq = nv50_hw_query(q); > + > + offset += hq->offset; > + > + PUSH_SPACE(push, 5); > + PUSH_REFN (push, hq->bo, NOUVEAU_BO_GART | NOUVEAU_BO_WR); > + BEGIN_NV04(push, NV50_3D(QUERY_ADDRESS_HIGH), 4); > + PUSH_DATAh(push, hq->bo->offset + offset); > + PUSH_DATA (push, hq->bo->offset + offset); > + PUSH_DATA (push, hq->sequence); > + PUSH_DATA (push, get); > +} > + > +static inline void > +nv50_hw_query_update(struct nv50_query *q) > +{ > + struct nv50_hw_query *hq = nv50_hw_query(q); > + > + if (hq->is64bit) { > + if (nouveau_fence_signalled(hq->fence)) > + hq->state = NV50_HW_QUERY_STATE_READY; > + } else { > + if (hq->data[0] == hq->sequence) > + hq->state = NV50_HW_QUERY_STATE_READY; > + } > +} > + > +static void > +nv50_hw_destroy_query(struct nv50_context *nv50, struct nv50_query *q) > +{ > + struct nv50_hw_query *hq = nv50_hw_query(q); > + nv50_hw_query_allocate(nv50, q, 0); > + nouveau_fence_ref(NULL, &hq->fence); > + FREE(hq); > +} > + > +static boolean > +nv50_hw_begin_query(struct nv50_context *nv50, struct nv50_query *q) > +{ > + struct nouveau_pushbuf *push = nv50->base.pushbuf; > + struct nv50_hw_query *hq = nv50_hw_query(q); > + > + /* For occlusion queries we have to change the storage, because a previous > + * query might set the initial render condition to false even *after* we > re- > + * initialized it to true. > + */ > + if (q->type == PIPE_QUERY_OCCLUSION_COUNTER) { > + hq->offset += 32; > + hq->data += 32 / sizeof(*hq->data); > + if (hq->offset - hq->base_offset == NV50_HW_QUERY_ALLOC_SPACE) > + nv50_hw_query_allocate(nv50, q, NV50_HW_QUERY_ALLOC_SPACE); > + > + /* XXX: can we do this with the GPU, and sync with respect to a > previous > + * query ? > + */ > + hq->data[0] = hq->sequence; /* initialize sequence */ > + hq->data[1] = 1; /* initial render condition = true */ > + hq->data[4] = hq->sequence + 1; /* for comparison COND_MODE */ > + hq->data[5] = 0; > + } > + if (!hq->is64bit) > + hq->data[0] = hq->sequence++; /* the previously used one */ > + > + switch (q->type) { > + case PIPE_QUERY_OCCLUSION_COUNTER: > + hq->nesting = nv50->screen->num_occlusion_queries_active++; > + if (hq->nesting) { > + nv50_hw_query_get(push, q, 0x10, 0x0100f002); > + } else { > + PUSH_SPACE(push, 4); > + BEGIN_NV04(push, NV50_3D(COUNTER_RESET), 1); > + PUSH_DATA (push, NV50_3D_COUNTER_RESET_SAMPLECNT); > + BEGIN_NV04(push, NV50_3D(SAMPLECNT_ENABLE), 1); > + PUSH_DATA (push, 1); > + } > + break; > + case PIPE_QUERY_PRIMITIVES_GENERATED: > + nv50_hw_query_get(push, q, 0x10, 0x06805002); > + break; > + case PIPE_QUERY_PRIMITIVES_EMITTED: > + nv50_hw_query_get(push, q, 0x10, 0x05805002); > + break; > + case PIPE_QUERY_SO_STATISTICS: > + nv50_hw_query_get(push, q, 0x20, 0x05805002); > + nv50_hw_query_get(push, q, 0x30, 0x06805002); > + break; > + case PIPE_QUERY_PIPELINE_STATISTICS: > + nv50_hw_query_get(push, q, 0x80, 0x00801002); /* VFETCH, VERTICES */ > + nv50_hw_query_get(push, q, 0x90, 0x01801002); /* VFETCH, PRIMS */ > + nv50_hw_query_get(push, q, 0xa0, 0x02802002); /* VP, LAUNCHES */ > + nv50_hw_query_get(push, q, 0xb0, 0x03806002); /* GP, LAUNCHES */ > + nv50_hw_query_get(push, q, 0xc0, 0x04806002); /* GP, PRIMS_OUT */ > + nv50_hw_query_get(push, q, 0xd0, 0x07804002); /* RAST, PRIMS_IN */ > + nv50_hw_query_get(push, q, 0xe0, 0x08804002); /* RAST, PRIMS_OUT */ > + nv50_hw_query_get(push, q, 0xf0, 0x0980a002); /* ROP, PIXELS */ > + break; > + case PIPE_QUERY_TIME_ELAPSED: > + nv50_hw_query_get(push, q, 0x10, 0x00005002); > + break; > + default: > + assert(0); > + return false; > + } > + hq->state = NV50_HW_QUERY_STATE_ACTIVE; > + return true; > +} > + > +static void > +nv50_hw_end_query(struct nv50_context *nv50, struct nv50_query *q) > +{ > + struct nouveau_pushbuf *push = nv50->base.pushbuf; > + struct nv50_hw_query *hq = nv50_hw_query(q); > + > + hq->state = NV50_HW_QUERY_STATE_ENDED; > + > + switch (q->type) { > + case PIPE_QUERY_OCCLUSION_COUNTER: > + nv50_hw_query_get(push, q, 0, 0x0100f002); > + if (--nv50->screen->num_occlusion_queries_active == 0) { > + PUSH_SPACE(push, 2); > + BEGIN_NV04(push, NV50_3D(SAMPLECNT_ENABLE), 1); > + PUSH_DATA (push, 0); > + } > + break; > + case PIPE_QUERY_PRIMITIVES_GENERATED: > + nv50_hw_query_get(push, q, 0, 0x06805002); > + break; > + case PIPE_QUERY_PRIMITIVES_EMITTED: > + nv50_hw_query_get(push, q, 0, 0x05805002); > + break; > + case PIPE_QUERY_SO_STATISTICS: > + nv50_hw_query_get(push, q, 0x00, 0x05805002); > + nv50_hw_query_get(push, q, 0x10, 0x06805002); > + break; > + case PIPE_QUERY_PIPELINE_STATISTICS: > + nv50_hw_query_get(push, q, 0x00, 0x00801002); /* VFETCH, VERTICES */ > + nv50_hw_query_get(push, q, 0x10, 0x01801002); /* VFETCH, PRIMS */ > + nv50_hw_query_get(push, q, 0x20, 0x02802002); /* VP, LAUNCHES */ > + nv50_hw_query_get(push, q, 0x30, 0x03806002); /* GP, LAUNCHES */ > + nv50_hw_query_get(push, q, 0x40, 0x04806002); /* GP, PRIMS_OUT */ > + nv50_hw_query_get(push, q, 0x50, 0x07804002); /* RAST, PRIMS_IN */ > + nv50_hw_query_get(push, q, 0x60, 0x08804002); /* RAST, PRIMS_OUT */ > + nv50_hw_query_get(push, q, 0x70, 0x0980a002); /* ROP, PIXELS */ > + break; > + case PIPE_QUERY_TIMESTAMP: > + hq->sequence++; > + /* fall through */ > + case PIPE_QUERY_TIME_ELAPSED: > + nv50_hw_query_get(push, q, 0, 0x00005002); > + break; > + case PIPE_QUERY_GPU_FINISHED: > + hq->sequence++; > + nv50_hw_query_get(push, q, 0, 0x1000f010); > + break; > + case NVA0_HW_QUERY_STREAM_OUTPUT_BUFFER_OFFSET: > + hq->sequence++; > + nv50_hw_query_get(push, q, 0, 0x0d005002 | (q->index << 5)); > + break; > + case PIPE_QUERY_TIMESTAMP_DISJOINT: > + /* This query is not issued on GPU because disjoint is forced to false > */ > + hq->state = NV50_HW_QUERY_STATE_READY; > + break; > + default: > + assert(0); > + break; > + } > + if (hq->is64bit) > + nouveau_fence_ref(nv50->screen->base.fence.current, &hq->fence); > +} > + > +static boolean > +nv50_hw_get_query_result(struct nv50_context *nv50, struct nv50_query *q, > + boolean wait, union pipe_query_result *result) > +{ > + struct nv50_hw_query *hq = nv50_hw_query(q); > + uint64_t *res64 = (uint64_t *)result; > + uint32_t *res32 = (uint32_t *)result; > + uint8_t *res8 = (uint8_t *)result; > + uint64_t *data64 = (uint64_t *)hq->data; > + int i; > + > + if (hq->state != NV50_HW_QUERY_STATE_READY) > + nv50_hw_query_update(q); > + > + if (hq->state != NV50_HW_QUERY_STATE_READY) { > + if (!wait) { > + /* for broken apps that spin on GL_QUERY_RESULT_AVAILABLE */ > + if (hq->state != NV50_HW_QUERY_STATE_FLUSHED) { > + hq->state = NV50_HW_QUERY_STATE_FLUSHED; > + PUSH_KICK(nv50->base.pushbuf); > + } > + return false; > + } > + if (nouveau_bo_wait(hq->bo, NOUVEAU_BO_RD, nv50->screen->base.client)) > + return false; > + } > + hq->state = NV50_HW_QUERY_STATE_READY; > + > + switch (q->type) { > + case PIPE_QUERY_GPU_FINISHED: > + res8[0] = true; > + break; > + case PIPE_QUERY_OCCLUSION_COUNTER: /* u32 sequence, u32 count, u64 time */ > + res64[0] = hq->data[1] - hq->data[5]; > + break; > + case PIPE_QUERY_PRIMITIVES_GENERATED: /* u64 count, u64 time */ > + case PIPE_QUERY_PRIMITIVES_EMITTED: /* u64 count, u64 time */ > + res64[0] = data64[0] - data64[2]; > + break; > + case PIPE_QUERY_SO_STATISTICS: > + res64[0] = data64[0] - data64[4]; > + res64[1] = data64[2] - data64[6]; > + break; > + case PIPE_QUERY_PIPELINE_STATISTICS: > + for (i = 0; i < 8; ++i) > + res64[i] = data64[i * 2] - data64[16 + i * 2]; > + break; > + case PIPE_QUERY_TIMESTAMP: > + res64[0] = data64[1]; > + break; > + case PIPE_QUERY_TIMESTAMP_DISJOINT: > + res64[0] = 1000000000; > + res8[8] = false; > + break; > + case PIPE_QUERY_TIME_ELAPSED: > + res64[0] = data64[1] - data64[3]; > + break; > + case NVA0_HW_QUERY_STREAM_OUTPUT_BUFFER_OFFSET: > + res32[0] = hq->data[1]; > + break; > + default: > + assert(0); > + return false; > + } > + > + return true; > +} > + > +static const struct nv50_query_funcs hw_query_funcs = { > + .destroy_query = nv50_hw_destroy_query, > + .begin_query = nv50_hw_begin_query, > + .end_query = nv50_hw_end_query, > + .get_query_result = nv50_hw_get_query_result, > +}; > + > +struct nv50_query * > +nv50_hw_create_query(struct nv50_context *nv50, unsigned type, unsigned > index) > +{ > + struct nv50_hw_query *hq; > + struct nv50_query *q; > + > + hq = CALLOC_STRUCT(nv50_hw_query); > + if (!hq) > + return NULL; > + > + q = &hq->base; > + q->funcs = &hw_query_funcs; > + q->type = type; > + > + if (!nv50_hw_query_allocate(nv50, q, NV50_HW_QUERY_ALLOC_SPACE)) { > + FREE(hq); > + return NULL; > + } > + > + if (q->type == PIPE_QUERY_OCCLUSION_COUNTER) { > + /* we advance before query_begin ! */ > + hq->offset -= 32; > + hq->data -= 32 / sizeof(*hq->data); > + } > + > + hq->is64bit = (type == PIPE_QUERY_PRIMITIVES_GENERATED || > + type == PIPE_QUERY_PRIMITIVES_EMITTED || > + type == PIPE_QUERY_SO_STATISTICS || > + type == PIPE_QUERY_PIPELINE_STATISTICS); > + > + return q; > +} > + > +void > +nv50_hw_query_pushbuf_submit(struct nouveau_pushbuf *push, uint16_t method, > + struct nv50_query *q, unsigned result_offset) > +{ > + struct nv50_hw_query *hq = nv50_hw_query(q); > + > + nv50_hw_query_update(q); > + if (hq->state != NV50_HW_QUERY_STATE_READY) > + nouveau_bo_wait(hq->bo, NOUVEAU_BO_RD, push->client); > + hq->state = NV50_HW_QUERY_STATE_READY; > + > + BEGIN_NV04(push, SUBC_3D(method), 1); > + PUSH_DATA (push, hq->data[result_offset / 4]); > +} > + > +void > +nv84_hw_query_fifo_wait(struct nouveau_pushbuf *push, struct nv50_query *q) > +{ > + struct nv50_hw_query *hq = nv50_hw_query(q); > + unsigned offset = hq->offset; > + > + PUSH_SPACE(push, 5); > + PUSH_REFN (push, hq->bo, NOUVEAU_BO_GART | NOUVEAU_BO_RD); > + BEGIN_NV04(push, SUBC_3D(NV84_SUBCHAN_SEMAPHORE_ADDRESS_HIGH), 4); > + PUSH_DATAh(push, hq->bo->offset + offset); > + PUSH_DATA (push, hq->bo->offset + offset); > + PUSH_DATA (push, hq->sequence); > + PUSH_DATA (push, NV84_SUBCHAN_SEMAPHORE_TRIGGER_ACQUIRE_EQUAL); > +} > diff --git a/src/gallium/drivers/nouveau/nv50/nv50_query_hw.h > b/src/gallium/drivers/nouveau/nv50/nv50_query_hw.h > new file mode 100644 > index 0000000..fe518a5 > --- /dev/null > +++ b/src/gallium/drivers/nouveau/nv50/nv50_query_hw.h > @@ -0,0 +1,39 @@ > +#ifndef __NV50_QUERY_HW_H__ > +#define __NV50_QUERY_HW_H__ > + > +#include "nouveau_fence.h" > +#include "nouveau_mm.h" > + > +#include "nv50_query.h" > + > +#define NVA0_HW_QUERY_STREAM_OUTPUT_BUFFER_OFFSET (PIPE_QUERY_TYPES + 0) > + > +struct nv50_hw_query { > + struct nv50_query base; > + uint32_t *data; > + uint32_t sequence; > + struct nouveau_bo *bo; > + uint32_t base_offset; > + uint32_t offset; /* base + i * 32 */ > + uint8_t state; > + bool is64bit; > + int nesting; /* only used for occlusion queries */ > + struct nouveau_mm_allocation *mm; > + struct nouveau_fence *fence; > +}; > + > +static inline struct nv50_hw_query * > +nv50_hw_query(struct nv50_query *q) > +{ > + return (struct nv50_hw_query *)q; > +} > + > +struct nv50_query * > +nv50_hw_create_query(struct nv50_context *, unsigned, unsigned); > +void > +nv50_hw_query_pushbuf_submit(struct nouveau_pushbuf *, uint16_t, > + struct nv50_query *, unsigned); > +void > +nv84_hw_query_fifo_wait(struct nouveau_pushbuf *, struct nv50_query *); > + > +#endif > diff --git a/src/gallium/drivers/nouveau/nv50/nv50_shader_state.c > b/src/gallium/drivers/nouveau/nv50/nv50_shader_state.c > index 958d044..703f1fe 100644 > --- a/src/gallium/drivers/nouveau/nv50/nv50_shader_state.c > +++ b/src/gallium/drivers/nouveau/nv50/nv50_shader_state.c > @@ -27,6 +27,7 @@ > #include "util/u_inlines.h" > > #include "nv50/nv50_context.h" > +#include "nv50/nv50_query_hw.h" > > void > nv50_constbufs_validate(struct nv50_context *nv50) > @@ -629,7 +630,7 @@ nv50_stream_output_validate(struct nv50_context *nv50) > const unsigned n = nv50->screen->base.class_3d >= NVA0_3D_CLASS ? 4 : > 3; > > if (n == 4 && !targ->clean) > - nv84_query_fifo_wait(push, nv50_query(targ->pq)); > + nv84_hw_query_fifo_wait(push, nv50_query(targ->pq)); > BEGIN_NV04(push, NV50_3D(STRMOUT_ADDRESS_HIGH(i)), n); > PUSH_DATAh(push, buf->address + targ->pipe.buffer_offset); > PUSH_DATA (push, buf->address + targ->pipe.buffer_offset); > @@ -638,8 +639,8 @@ nv50_stream_output_validate(struct nv50_context *nv50) > PUSH_DATA(push, targ->pipe.buffer_size); > if (!targ->clean) { > assert(targ->pq); > - nv50_query_pushbuf_submit(push, NVA0_3D_STRMOUT_OFFSET(i), > - nv50_query(targ->pq), 0x4); > + nv50_hw_query_pushbuf_submit(push, NVA0_3D_STRMOUT_OFFSET(i), > + nv50_query(targ->pq), 0x4); > } else { > BEGIN_NV04(push, NVA0_3D(STRMOUT_OFFSET(i)), 1); > PUSH_DATA(push, 0); > diff --git a/src/gallium/drivers/nouveau/nv50/nv50_state.c > b/src/gallium/drivers/nouveau/nv50/nv50_state.c > index 8af2add..906f536 100644 > --- a/src/gallium/drivers/nouveau/nv50/nv50_state.c > +++ b/src/gallium/drivers/nouveau/nv50/nv50_state.c > @@ -30,6 +30,7 @@ > > #include "nv50/nv50_stateobj.h" > #include "nv50/nv50_context.h" > +#include "nv50/nv50_query_hw.h" > > #include "nv50/nv50_3d.xml.h" > #include "nv50/nv50_texture.xml.h" > @@ -1033,7 +1034,7 @@ nv50_so_target_create(struct pipe_context *pipe, > > if (nouveau_context(pipe)->screen->class_3d >= NVA0_3D_CLASS) { > targ->pq = pipe->create_query(pipe, > - NVA0_QUERY_STREAM_OUTPUT_BUFFER_OFFSET, > 0); > + > NVA0_HW_QUERY_STREAM_OUTPUT_BUFFER_OFFSET, 0); > if (!targ->pq) { > FREE(targ); > return NULL; > diff --git a/src/gallium/drivers/nouveau/nv50/nv50_vbo.c > b/src/gallium/drivers/nouveau/nv50/nv50_vbo.c > index dbc6632..9fa6fce 100644 > --- a/src/gallium/drivers/nouveau/nv50/nv50_vbo.c > +++ b/src/gallium/drivers/nouveau/nv50/nv50_vbo.c > @@ -27,6 +27,7 @@ > #include "translate/translate.h" > > #include "nv50/nv50_context.h" > +#include "nv50/nv50_query_hw.h" > #include "nv50/nv50_resource.h" > > #include "nv50/nv50_3d.xml.h" > @@ -745,8 +746,8 @@ nva0_draw_stream_output(struct nv50_context *nv50, > PUSH_DATA (push, 0); > BEGIN_NV04(push, NVA0_3D(DRAW_TFB_STRIDE), 1); > PUSH_DATA (push, so->stride); > - nv50_query_pushbuf_submit(push, NVA0_3D_DRAW_TFB_BYTES, > - nv50_query(so->pq), 0x4); > + nv50_hw_query_pushbuf_submit(push, NVA0_3D_DRAW_TFB_BYTES, > + nv50_query(so->pq), 0x4); > BEGIN_NV04(push, NV50_3D(VERTEX_END_GL), 1); > PUSH_DATA (push, 0); > > -- > 2.5.3 > > _______________________________________________ > mesa-dev mailing list > mesa-dev@lists.freedesktop.org > http://lists.freedesktop.org/mailman/listinfo/mesa-dev _______________________________________________ mesa-dev mailing list mesa-dev@lists.freedesktop.org http://lists.freedesktop.org/mailman/listinfo/mesa-dev