---
src/gallium/drivers/radeon/r600_pipe_common.c | 15 ++
src/gallium/drivers/radeon/r600_pipe_common.h | 40 +++++
src/gallium/drivers/radeon/r600_texture.c | 239
++++++++++++++++++++++++++
src/gallium/drivers/radeonsi/si_blit.c | 14 +-
src/gallium/drivers/radeonsi/si_state.c | 15 ++
src/gallium/drivers/radeonsi/si_state_draw.c | 5 +-
6 files changed, 326 insertions(+), 2 deletions(-)
diff --git a/src/gallium/drivers/radeon/r600_pipe_common.c
b/src/gallium/drivers/radeon/r600_pipe_common.c
index 5d4a679..66afcfa 100644
--- a/src/gallium/drivers/radeon/r600_pipe_common.c
+++ b/src/gallium/drivers/radeon/r600_pipe_common.c
@@ -397,6 +397,21 @@ bool r600_common_context_init(struct
r600_common_context *rctx,
void r600_common_context_cleanup(struct r600_common_context *rctx)
{
+ unsigned i,j;
+
+ /* Release DCC stats. */
+ for (i = 0; i < ARRAY_SIZE(rctx->dcc_stats); i++) {
+ assert(!rctx->dcc_stats[i].query_active);
+
+ for (j = 0; j < ARRAY_SIZE(rctx->dcc_stats[i].ps_stats);
j++)
+ if (rctx->dcc_stats[i].ps_stats[j])
+ rctx->b.destroy_query(&rctx->b,
+
rctx->dcc_stats[i].ps_stats[j]);
+
+ pipe_resource_reference((struct pipe_resource**)
+ &rctx->dcc_stats[i].tex, NULL);
+ }
+
if (rctx->gfx.cs)
rctx->ws->cs_destroy(rctx->gfx.cs);
if (rctx->dma.cs)
diff --git a/src/gallium/drivers/radeon/r600_pipe_common.h
b/src/gallium/drivers/radeon/r600_pipe_common.h
index 92cba13..cdec907 100644
--- a/src/gallium/drivers/radeon/r600_pipe_common.h
+++ b/src/gallium/drivers/radeon/r600_pipe_common.h
@@ -272,6 +272,25 @@ struct r600_texture {
* dcc_offset contains the absolute GPUVM address, not the
relative one.
*/
struct r600_resource *dcc_separate_buffer;
+ /* When DCC is temporarily disabled, the separate buffer is here.
*/
+ struct r600_resource *last_dcc_separate_buffer;
+ /* We need to track DCC dirtiness, because st/dri usually calls
+ * flush_resource twice per frame (not a bug) and we don't wanna
+ * decompress DCC twice. Also, the dirty tracking must be done
even
+ * if DCC isn't used, because it's required by the DCC usage
analysis
+ * for a possible future enablement.
+ */
+ bool separate_dcc_dirty;
+ /* Statistics gathering for the DCC enablement heuristic. */
+ bool dcc_gather_statistics;
+ /* Estimate of how much this color buffer is written to in units
of
+ * full-screen draws: ps_invocations / (width * height)
+ * Shader kills, late Z, and blending with trivial discards make
it
+ * inaccurate (we need to count CB updates, not PS invocations).
+ */
+ unsigned ps_draw_ratio;
+ /* The number of clears since the last DCC usage analysis. */
+ unsigned num_slow_clears;
/* Counter that should be non-zero if the texture is bound to a
* framebuffer. Implemented in radeonsi only.
@@ -536,6 +555,21 @@ struct r600_common_context {
float sample_locations_8x[8][2];
float sample_locations_16x[16][2];
+ /* Statistics gathering for the DCC enablement heuristic. It can't
be
+ * in r600_texture because r600_texture can be shared by multiple
+ * contexts. This is for back buffers only. We shouldn't get too
many
+ * of those.
+ */
+ struct {
+ struct r600_texture *tex;
+ /* Query queue: 0 = usually active, 1 = waiting, 2 =
readback. */
+ struct pipe_query *ps_stats[3];
+ /* If all slots are used and another slot is needed,
+ * the least recently used slot is evicted based on this.
*/
+ int64_t last_use_timestamp;
+ bool query_active;
+ } dcc_stats[2];
+
/* The list of all texture buffer objects in this context.
* This list is walked when a buffer is invalidated/reallocated
and
* the GPU addresses are updated. */
@@ -688,6 +722,12 @@ struct pipe_surface
*r600_create_surface_custom(struct pipe_context *pipe,
const struct pipe_surface
*templ,
unsigned width, unsigned
height);
unsigned r600_translate_colorswap(enum pipe_format format, bool
do_endian_swap);
+void vi_separate_dcc_start_query(struct pipe_context *ctx,
+ struct r600_texture *tex);
+void vi_separate_dcc_stop_query(struct pipe_context *ctx,
+ struct r600_texture *tex);
+void vi_separate_dcc_analyze_stats(struct pipe_context *ctx,
+ struct r600_texture *tex);
void vi_dcc_clear_level(struct r600_common_context *rctx,
struct r600_texture *rtex,
unsigned level, unsigned clear_value);
diff --git a/src/gallium/drivers/radeon/r600_texture.c
b/src/gallium/drivers/radeon/r600_texture.c
index 23be5ed..7295ab6 100644
--- a/src/gallium/drivers/radeon/r600_texture.c
+++ b/src/gallium/drivers/radeon/r600_texture.c
@@ -26,9 +26,11 @@
*/
#include "r600_pipe_common.h"
#include "r600_cs.h"
+#include "r600_query.h"
#include "util/u_format.h"
#include "util/u_memory.h"
#include "util/u_pack_color.h"
+#include "os/os_time.h"
#include <errno.h>
#include <inttypes.h>
@@ -567,6 +569,7 @@ static void r600_texture_destroy(struct pipe_screen
*screen,
}
pb_reference(&resource->buf, NULL);
r600_resource_reference(&rtex->dcc_separate_buffer, NULL);
+ r600_resource_reference(&rtex->last_dcc_separate_buffer, NULL);
FREE(rtex);
}
@@ -1017,6 +1020,7 @@ r600_texture_create_object(struct pipe_screen
*screen,
rtex->non_disp_tiling = rtex->is_depth &&
rtex->surface.level[0].mode >= RADEON_SURF_MODE_1D;
/* Applies to GCN. */
rtex->last_msaa_resolve_target_micro_mode =
rtex->surface.micro_tile_mode;
+ rtex->ps_draw_ratio = 100; /* start with a sufficiently high
number */
if (rtex->is_depth) {
if (!(base->flags & (R600_RESOURCE_FLAG_TRANSFER |
@@ -1705,6 +1709,224 @@ unsigned r600_translate_colorswap(enum pipe_format
format, bool do_endian_swap)
return ~0U;
}
+/* PIPELINE_STAT-BASED DCC ENABLEMENT FOR DISPLAYABLE SURFACES */
+
+/**
+ * Return the per-context slot where DCC statistics queries for the
texture live.
+ */
+static unsigned vi_get_context_dcc_stats_index(struct r600_common_context
*rctx,
+ struct r600_texture *tex)
+{
+ int i, empty_slot = -1;
+
+ for (i = 0; i < ARRAY_SIZE(rctx->dcc_stats); i++) {
+ /* Return if found. */
+ if (rctx->dcc_stats[i].tex == tex) {
+ rctx->dcc_stats[i].last_use_timestamp =
os_time_get();
+ return i;
+ }
+
+ /* Record the first seen empty slot. */
+ if (empty_slot == -1 && !rctx->dcc_stats[i].tex)
+ empty_slot = i;
+ }
+
+ /* Not found. Remove the oldest member to make space in the array.
*/
+ if (empty_slot == -1) {
+ int oldest_slot = 0;
+
+ /* Find the oldest slot. */
+ for (i = 1; i < ARRAY_SIZE(rctx->dcc_stats); i++)
+ if
(rctx->dcc_stats[oldest_slot].last_use_timestamp >
+ rctx->dcc_stats[i].last_use_timestamp)
+ oldest_slot = i;
+
+ /* Clean up the oldest slot. */
+ if (rctx->dcc_stats[oldest_slot].query_active)
+ vi_separate_dcc_stop_query(&rctx->b,
+
rctx->dcc_stats[oldest_slot].tex);
+
+ for (i = 0; i <
ARRAY_SIZE(rctx->dcc_stats[oldest_slot].ps_stats); i++)
+ if (rctx->dcc_stats[oldest_slot].ps_stats[i]) {
+ rctx->b.destroy_query(&rctx->b,
+
rctx->dcc_stats[oldest_slot].ps_stats[i]);
+ rctx->dcc_stats[oldest_slot].ps_stats[i] =
NULL;
+ }
+
+ pipe_resource_reference((struct pipe_resource**)
+ &rctx->dcc_stats[oldest_slot].tex,
NULL);
+ empty_slot = oldest_slot;
+ }
+
+ /* Add the texture to the new slot. */
+ pipe_resource_reference((struct
pipe_resource**)&rctx->dcc_stats[empty_slot].tex,
+ &tex->resource.b.b);
+ rctx->dcc_stats[empty_slot].last_use_timestamp = os_time_get();
+ return empty_slot;
+}
+
+static struct pipe_query *
+vi_create_resuming_pipestats_query(struct pipe_context *ctx)
+{
+ struct r600_query_hw *query = (struct r600_query_hw*)
+ ctx->create_query(ctx, PIPE_QUERY_PIPELINE_STATISTICS, 0);
+
+ query->flags |= R600_QUERY_HW_FLAG_BEGIN_RESUMES;
+ return (struct pipe_query*)query;
+}
+
+/**
+ * Called when binding a color buffer.
+ */
+void vi_separate_dcc_start_query(struct pipe_context *ctx,
+ struct r600_texture *tex)
+{
+ struct r600_common_context *rctx = (struct
r600_common_context*)ctx;
+ unsigned i = vi_get_context_dcc_stats_index(rctx, tex);
+
+ assert(!rctx->dcc_stats[i].query_active);
+
+ if (!rctx->dcc_stats[i].ps_stats[0])
+ rctx->dcc_stats[i].ps_stats[0] =
vi_create_resuming_pipestats_query(ctx);
+
+ /* begin or resume the query */
+ ctx->begin_query(ctx, rctx->dcc_stats[i].ps_stats[0]);
+ rctx->dcc_stats[i].query_active = true;
+}
+
+/**
+ * Called when unbinding a color buffer.
+ */
+void vi_separate_dcc_stop_query(struct pipe_context *ctx,
+ struct r600_texture *tex)
+{
+ struct r600_common_context *rctx = (struct
r600_common_context*)ctx;
+ unsigned i = vi_get_context_dcc_stats_index(rctx, tex);
+
+ assert(rctx->dcc_stats[i].query_active);
+ assert(rctx->dcc_stats[i].ps_stats[0]);
+
+ /* pause or end the query */
+ ctx->end_query(ctx, rctx->dcc_stats[i].ps_stats[0]);
+ rctx->dcc_stats[i].query_active = false;
+}
+
+static bool vi_can_enable_separate_dcc(struct r600_texture *tex)
+{
+ /* The minimum number of fullscreen draws per frame that is
required
+ * to enable DCC. */
+ return tex->ps_draw_ratio + tex->num_slow_clears >= 5;
+}