Re: [Mesa3d-dev] r600: implement ARB_occlusion_query

Alex Deucher Tue, 27 Oct 2009 01:00:53 -0700

On Mon, Oct 26, 2009 at 5:32 PM, Alex Deucher <alexdeuc...@gmail.com> wrote:
> On Mon, Oct 26, 2009 at 4:55 AM, Stephan Schmid <stephan_2...@gmx.de> wrote:
>> This implements GL_ARB_occlusion_query for RV610
>> Currently it results in a huge performance gain in games that take advantage 
>> of
>> ARB_oq such as sauerbraten (cube2).
>> issues:
>> - this was tested so far only on RV610. I figured out that the RV610 writes 
>> one
>>  single uint64_t value when triggering the zpass write event. The specs 
>> aren't
>>  too clear about what exactly is written.
>>  It might be that there are multiple zpass counters on chip and that 
>> r6xx/r7xx
>>  chips write one uint64_t per counter (just as the r300 do it). In this case 
>> the
>>  RV610 would write only one value because it's one of the smallest chips in 
>> the
>>  family so it's got only one counter.
>>  If my assumtion were true it would be necessary to use n*sizeof(uint64_t) in
>>  r600_emit_query_finish as offset (n = number of counters/values written) and
>>  to consider the additional values in radeonQueryGetResult when computing the
>>  result of the query.
>>  It would be interesting to know what the other r6xx/r7xx write on 
>> zpass-write event
>>  to support them as well.
>
> Stephan,
>
>   Nice work!  The zpass stuff is per DB just like the older chips, so
> you'll need to allocate enough memory to support two qwords for each
> DB.  The number of DBs depends on the asic.  We'll probably need a drm
> query similar to what we do for r300.  I'm working on a cleaned up
> version of your mesa patch and a drm patch to return the number of RBs
> like we do for r300.


After testing, it seems r6xx aggregates the zpass results from all DB
blocks into 1 qword.  R7xx, seems to work differently.  I'm following
up internally.  The attached patch works properly on all r6xx cards I
have and certain r7xx cards.  tri-query and glean oq tests pass.

Alex

From 888e8fd56788bcf4fc42b9d630ad1c4a01d8c9b1 Mon Sep 17 00:00:00 2001
From: Alex Deucher <alexdeuc...@gmail.com>
Date: Tue, 27 Oct 2009 03:50:58 -0400
Subject: [PATCH] r600: add occlusion query support

Based on initial patch from Stephan Schmid <stephan_2...@gmx.de>.

Basic idea is to dump the zpass count before and after and substract
to get the total number of visible fragments.  R6xx appears to
aggregate the results of all DB blocks into a single qword and works
properly on all cards I've tested on.  R7xx seems to work differently
and needs follow up.

Signed-off-by: Alex Deucher <alexdeuc...@gmail.com>
---
 src/mesa/drivers/dri/r600/r600_context.c      |   28 ++++++++++++--
 src/mesa/drivers/dri/r600/r700_chip.c         |   50 +++++++++++++++++++++++++
 src/mesa/drivers/dri/r600/r700_state.c        |    1 +
 src/mesa/drivers/dri/radeon/radeon_queryobj.c |   28 +++++++++++---
 4 files changed, 97 insertions(+), 10 deletions(-)

diff --git a/src/mesa/drivers/dri/r600/r600_context.c b/src/mesa/drivers/dri/r600/r600_context.c
index c1bf76d..6fe2926 100644
--- a/src/mesa/drivers/dri/r600/r600_context.c
+++ b/src/mesa/drivers/dri/r600/r600_context.c
@@ -64,6 +64,7 @@ WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
 #include "r600_cmdbuf.h"
 #include "r600_emit.h"
 #include "radeon_bocs_wrapper.h"
+#include "radeon_queryobj.h"
 
 #include "r700_state.h"
 #include "r700_ioctl.h"
@@ -73,11 +74,8 @@ WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
 #include "utils.h"
 #include "xmlpool.h"		/* for symbolic values of enum-type options */
 
-/* hw_tcl_on derives from future_hw_tcl_on when its safe to change it. */
-int future_hw_tcl_on = 1;
-int hw_tcl_on = 1;
-
 #define need_GL_VERSION_2_0
+#define need_GL_ARB_occlusion_query
 #define need_GL_ARB_point_parameters
 #define need_GL_ARB_vertex_program
 #define need_GL_EXT_blend_equation_separate
@@ -98,6 +96,7 @@ static const struct dri_extension card_extensions[] = {
   /* *INDENT-OFF* */
   {"GL_ARB_depth_texture",		NULL},
   {"GL_ARB_fragment_program",		NULL},
+  {"GL_ARB_occlusion_query",            GL_ARB_occlusion_query_functions},
   {"GL_ARB_multitexture",		NULL},
   {"GL_ARB_point_parameters",		GL_ARB_point_parameters_functions},
   {"GL_ARB_shadow",			NULL},
@@ -204,6 +203,25 @@ static void r600_fallback(GLcontext *ctx, GLuint bit, GLboolean mode)
 		context->radeon.Fallback &= ~bit;
 }
 
+static void r600_emit_query_finish(radeonContextPtr radeon)
+{
+	context_t *context = (context_t*) radeon;
+	BATCH_LOCALS(&context->radeon);
+
+	struct radeon_query_object *query = radeon->query.current;
+
+	BEGIN_BATCH_NO_AUTOSTATE(4 + 2);
+	R600_OUT_BATCH(CP_PACKET3(R600_IT_EVENT_WRITE, 2));
+	R600_OUT_BATCH(ZPASS_DONE);
+	R600_OUT_BATCH(query->curr_offset); /* hw writes qwords */
+	R600_OUT_BATCH(0x00000000);
+	R600_OUT_BATCH_RELOC(VGT_EVENT_INITIATOR, query->bo, 0, 0, RADEON_GEM_DOMAIN_GTT, 0);
+	END_BATCH();
+	query->curr_offset += 8 * sizeof(uint64_t);
+	assert(query->curr_offset < RADEON_QUERY_PAGE_SIZE);
+	query->emitted_begin = GL_FALSE;
+}
+
 static void r600_init_vtbl(radeonContextPtr radeon)
 {
 	radeon->vtbl.get_lock = r600_get_lock;
@@ -212,6 +230,7 @@ static void r600_init_vtbl(radeonContextPtr radeon)
 	radeon->vtbl.swtcl_flush = NULL;
 	radeon->vtbl.pre_emit_atoms = r600_vtbl_pre_emit_atoms;
 	radeon->vtbl.fallback = r600_fallback;
+	radeon->vtbl.emit_query_finish = r600_emit_query_finish;
 }
 
 static void r600InitConstValues(GLcontext *ctx, radeonScreenPtr screen)
@@ -340,6 +359,7 @@ GLboolean r600CreateContext(const __GLcontextModes * glVisual,
 	r700InitStateFuncs(&functions);
 	r600InitTextureFuncs(&functions);
 	r700InitShaderFuncs(&functions);
+	radeonInitQueryObjFunctions(&functions);
 	r700InitIoctlFuncs(&functions);
 	radeonInitBufferObjectFuncs(&functions);
 
diff --git a/src/mesa/drivers/dri/r600/r700_chip.c b/src/mesa/drivers/dri/r600/r700_chip.c
index 75b97c5..63a4bc4 100644
--- a/src/mesa/drivers/dri/r600/r700_chip.c
+++ b/src/mesa/drivers/dri/r600/r700_chip.c
@@ -1100,6 +1100,28 @@ static void r700SendVSConsts(GLcontext *ctx, struct radeon_state_atom *atom)
 	COMMIT_BATCH();
 }
 
+static void r700SendQueryBegin(GLcontext *ctx, struct radeon_state_atom *atom)
+{
+	radeonContextPtr radeon = RADEON_CONTEXT(ctx);
+	struct radeon_query_object *query = radeon->query.current;
+	BATCH_LOCALS(radeon);
+	radeon_print(RADEON_STATE, RADEON_VERBOSE, "%s\n", __func__);
+
+	radeon_cs_space_check_with_bo(radeon->cmdbuf.cs,
+				      query->bo,
+				      0, RADEON_GEM_DOMAIN_GTT);
+
+	BEGIN_BATCH_NO_AUTOSTATE(4 + 2);
+	R600_OUT_BATCH(CP_PACKET3(R600_IT_EVENT_WRITE, 2));
+	R600_OUT_BATCH(ZPASS_DONE);
+	R600_OUT_BATCH(query->curr_offset); /* hw writes qwords */
+	R600_OUT_BATCH(0x00000000);
+	R600_OUT_BATCH_RELOC(VGT_EVENT_INITIATOR, query->bo, 0, 0, RADEON_GEM_DOMAIN_GTT, 0);
+	END_BATCH();
+	query->curr_offset += 8 * sizeof(uint64_t);
+	query->emitted_begin = GL_TRUE;
+}
+
 static int check_always(GLcontext *ctx, struct radeon_state_atom *atom)
 {
 	return atom->cmd_size;
@@ -1208,6 +1230,20 @@ static int check_vs_consts(GLcontext *ctx, struct radeon_state_atom *atom)
 	return count;
 }
 
+static int check_queryobj(GLcontext *ctx, struct radeon_state_atom *atom)
+{
+	radeonContextPtr radeon = RADEON_CONTEXT(ctx);
+	struct radeon_query_object *query = radeon->query.current;
+	int count;
+
+	if (!query || query->emitted_begin)
+		count = 0;
+	else
+		count = atom->cmd_size;
+	radeon_print(RADEON_STATE, RADEON_TRACE, "%s %d\n", __func__, count);
+	return count;
+}
+
 #define ALLOC_STATE( ATOM, CHK, SZ, EMIT )				\
 do {									\
 	context->atoms.ATOM.cmd_size = (SZ);				\
@@ -1221,6 +1257,19 @@ do {									\
 	insert_at_tail(&context->radeon.hw.atomlist, &context->atoms.ATOM); \
 } while (0)
 
+static void r600_init_query_stateobj(radeonContextPtr radeon, int SZ)
+{
+	radeon->query.queryobj.cmd_size = (SZ);
+	radeon->query.queryobj.cmd = NULL;
+	radeon->query.queryobj.name = "queryobj";
+	radeon->query.queryobj.idx = 0;
+	radeon->query.queryobj.check = check_queryobj;
+	radeon->query.queryobj.dirty = GL_FALSE;
+	radeon->query.queryobj.emit = r700SendQueryBegin;
+	radeon->hw.max_state_size += (SZ);
+	insert_at_tail(&radeon->hw.atomlist, &radeon->query.queryobj);
+}
+
 void r600InitAtoms(context_t *context)
 {
 	radeon_print(RADEON_STATE, RADEON_NORMAL, "%s %p\n", __func__, context);
@@ -1260,6 +1309,7 @@ void r600InitAtoms(context_t *context)
 	ALLOC_STATE(tx, tx, (R700_TEXTURE_NUMBERUNITS * 20), r700SendTexState);
 	ALLOC_STATE(tx_smplr, tx, (R700_TEXTURE_NUMBERUNITS * 5), r700SendTexSamplerState);
 	ALLOC_STATE(tx_brdr_clr, tx, (R700_TEXTURE_NUMBERUNITS * 6), r700SendTexBorderColorState);
+	r600_init_query_stateobj(&context->radeon, 6 * 2);
 
 	context->radeon.hw.is_dirty = GL_TRUE;
 	context->radeon.hw.all_dirty = GL_TRUE;
diff --git a/src/mesa/drivers/dri/r600/r700_state.c b/src/mesa/drivers/dri/r600/r700_state.c
index 9a6a68a..0b67636 100644
--- a/src/mesa/drivers/dri/r600/r700_state.c
+++ b/src/mesa/drivers/dri/r600/r700_state.c
@@ -1675,6 +1675,7 @@ void r700InitState(GLcontext * ctx) //-------------------
     SETfield(r700->DB_RENDER_OVERRIDE.u32All, FORCE_DISABLE, FORCE_HIZ_ENABLE_shift, FORCE_HIZ_ENABLE_mask);
     SETfield(r700->DB_RENDER_OVERRIDE.u32All, FORCE_DISABLE, FORCE_HIS_ENABLE0_shift, FORCE_HIS_ENABLE0_mask);
     SETfield(r700->DB_RENDER_OVERRIDE.u32All, FORCE_DISABLE, FORCE_HIS_ENABLE1_shift, FORCE_HIS_ENABLE1_mask);
+    SETbit(r700->DB_RENDER_OVERRIDE.u32All, NOOP_CULL_DISABLE_bit);
 
     r700->DB_ALPHA_TO_MASK.u32All = 0;
     SETfield(r700->DB_ALPHA_TO_MASK.u32All, 2, ALPHA_TO_MASK_OFFSET0_shift, ALPHA_TO_MASK_OFFSET0_mask);
diff --git a/src/mesa/drivers/dri/radeon/radeon_queryobj.c b/src/mesa/drivers/dri/radeon/radeon_queryobj.c
index b79d864..889b432 100644
--- a/src/mesa/drivers/dri/radeon/radeon_queryobj.c
+++ b/src/mesa/drivers/dri/radeon/radeon_queryobj.c
@@ -47,8 +47,8 @@ static int radeonQueryIsFlushed(GLcontext *ctx, struct gl_query_object *q)
 
 static void radeonQueryGetResult(GLcontext *ctx, struct gl_query_object *q)
 {
+	radeonContextPtr radeon = RADEON_CONTEXT(ctx);
 	struct radeon_query_object *query = (struct radeon_query_object *)q;
-	uint32_t *result;
 	int i;
 
 	radeon_print(RADEON_STATE, RADEON_VERBOSE,
@@ -57,12 +57,28 @@ static void radeonQueryGetResult(GLcontext *ctx, struct gl_query_object *q)
 
 	radeon_bo_map(query->bo, GL_FALSE);
 
-	result = query->bo->ptr;
-
 	query->Base.Result = 0;
-	for (i = 0; i < query->curr_offset/sizeof(uint32_t); ++i) {
-		query->Base.Result += result[i];
-		radeon_print(RADEON_STATE, RADEON_TRACE, "result[%d] = %d\n", i, result[i]);
+	if (IS_R600_CLASS(radeon->radeonScreen)) {
+		uint64_t *result = query->bo->ptr;
+		int max_oq;
+		/* r6xx appears to aggregate the results, r7xx doesn't?  */
+		if (radeon->radeonScreen->chip_family >= CHIP_FAMILY_RV770)
+			max_oq = 8;
+		else
+			max_oq = 1;
+		for (i = 0; i < max_oq; ++i) {
+			uint64_t start = result[0 + i] & 0x7fffffffffffffff;
+			uint64_t end = result[8 + i] & 0x7fffffffffffffff;
+			uint64_t query_count = end - start;
+			query->Base.Result += query_count;
+			radeon_print(RADEON_STATE, RADEON_TRACE, "result[%d] = %d\n", i, (uint32_t)query_count);
+		}
+	} else {
+		uint32_t *result = query->bo->ptr;
+		for (i = 0; i < query->curr_offset/sizeof(uint32_t); ++i) {
+			query->Base.Result += result[i];
+			radeon_print(RADEON_STATE, RADEON_TRACE, "result[%d] = %d\n", i, result[i]);
+		}
 	}
 
 	radeon_bo_unmap(query->bo);
-- 
1.5.6.3

------------------------------------------------------------------------------
Come build with us! The BlackBerry(R) Developer Conference in SF, CA
is the only developer event you need to attend this year. Jumpstart your
developing skills, take BlackBerry mobile applications to market and stay 
ahead of the curve. Join us from November 9 - 12, 2009. Register now!
http://p.sf.net/sfu/devconference

_______________________________________________
Mesa3d-dev mailing list
Mesa3d-dev@lists.sourceforge.net
https://lists.sourceforge.net/lists/listinfo/mesa3d-dev

Re: [Mesa3d-dev] r600: implement ARB_occlusion_query

Reply via email to