Re: [Mesa-dev] [PATCH 22/22] anv: Predicate fast-clear resolves

2017-05-10 Thread Nanley Chery
On Wed, May 03, 2017 at 03:39:51PM -0700, Jason Ekstrand wrote:
> On Thu, Apr 27, 2017 at 11:32 AM, Nanley Chery 
> wrote:
> 
> > There's no image layout to represent a full-RT-cleared color attachment.
> > That's one reason we can end up with redundant resolves. Testing has
> > shown that such resolves can measurably hurt performance and that
> > predicating them can avoid the penalty.
> >
> > Signed-off-by: Nanley Chery 
> > ---
> >  src/intel/vulkan/anv_blorp.c   |   3 +-
> >  src/intel/vulkan/anv_image.c   |   6 +++
> >  src/intel/vulkan/genX_cmd_buffer.c | 106 ++
> > ---
> >  3 files changed, 108 insertions(+), 7 deletions(-)
> >
> > diff --git a/src/intel/vulkan/anv_blorp.c b/src/intel/vulkan/anv_blorp.c
> > index 821d01a077..32f0edf316 100644
> > --- a/src/intel/vulkan/anv_blorp.c
> > +++ b/src/intel/vulkan/anv_blorp.c
> > @@ -1500,7 +1500,8 @@ anv_ccs_resolve(struct anv_cmd_buffer * const
> > cmd_buffer,
> >
> > struct blorp_batch batch;
> > blorp_batch_init(_buffer->device->blorp, , cmd_buffer,
> > -BLORP_BATCH_NO_EMIT_DEPTH_STENCIL);
> > +BLORP_BATCH_NO_EMIT_DEPTH_STENCIL |
> > +BLORP_BATCH_PREDICATE_ENABLE);
> >
> > struct blorp_surf surf;
> > get_blorp_surf_for_anv_image(image, VK_IMAGE_ASPECT_COLOR_BIT,
> > diff --git a/src/intel/vulkan/anv_image.c b/src/intel/vulkan/anv_image.c
> > index 751f2d6026..92ee86dab5 100644
> > --- a/src/intel/vulkan/anv_image.c
> > +++ b/src/intel/vulkan/anv_image.c
> > @@ -208,6 +208,12 @@ anv_fill_ccs_resolve_ss(const struct anv_device *
> > const device,
> > .aux_usage = image->aux_usage ==
> > ISL_AUX_USAGE_NONE ?
> >  ISL_AUX_USAGE_CCS_D :
> > image->aux_usage,
> > .mocs = device->default_mocs);
> > +
> > +   /* The following dword is used as a flag to represent whether or not
> > this
> > +* CCS subresource needs resolving. We want this to be zero by default,
> > +* which means that a resolve is not necessary.
> > +*/
> > +   assert(*(uint32_t *)(data + device->isl_dev.ss.addr_offset) == 0);
> >
> 
> This seems like kind-of an odd place to put it... Not a problem, just odd.
> 
> 

I agree. It's logically better off in anv_BindImage. It's mainly here
for convenience (the data pointer is already calculated for us).

> >  }
> >
> >  /**
> > diff --git a/src/intel/vulkan/genX_cmd_buffer.c
> > b/src/intel/vulkan/genX_cmd_buffer.c
> > index 95729ec8a8..041301290e 100644
> > --- a/src/intel/vulkan/genX_cmd_buffer.c
> > +++ b/src/intel/vulkan/genX_cmd_buffer.c
> > @@ -402,6 +402,82 @@ transition_depth_buffer(struct anv_cmd_buffer
> > *cmd_buffer,
> >anv_gen8_hiz_op_resolve(cmd_buffer, image, hiz_op);
> >  }
> >
> > +static uint32_t
> > +clear_value_buffer_offset(const struct anv_cmd_buffer * const cmd_buffer,
> > +  const struct anv_image * const image,
> > +  const uint8_t level)
> > +{
> > +   return image->offset +
> > +  image->aux_surface.offset + image->aux_surface.isl.size +
> > +  cmd_buffer->device->isl_dev.ss.size * level;
> > +}
> > +
> > +#define MI_PREDICATE_SRC0  0x2400
> > +#define MI_PREDICATE_SRC1  0x2408
> > +
> > +enum ccs_resolve_state {
> > +   CCS_RESOLVE_NOT_NEEDED,
> > +   CCS_RESOLVE_NEEDED,
> > +   CCS_RESOLVE_STARTING,
> > +};
> > +
> > +/* Manages the state of an color image subresource to ensure resolves are
> > + * performed properly.
> > + */
> > +static void
> > +genX(set_resolve_state)(struct anv_cmd_buffer * const cmd_buffer,
> > +const struct anv_image * const image,
> > +const uint8_t level,
> > +const enum ccs_resolve_state state)
> > +{
> > +   assert(cmd_buffer && image);
> > +
> > +   /* The image subresource range must have a color auxiliary buffer. */
> > +   assert(anv_image_has_color_aux(image));
> > +   assert(level < anv_color_aux_levels(image));
> > +
> > +   /* We store the resolve flag in the location of the surface base
> > address
> > +* field for simplicity and because it is initialized to zero when the
> > +* clear value buffer is initialized.
> > +*/
> > +   const uint32_t resolve_flag_offset =
> > +  clear_value_buffer_offset(cmd_buffer, image, level) +
> > +  cmd_buffer->device->isl_dev.ss.addr_offset;
> > +
> > +   /* An update operation should not overwrite the fast clear value. */
> > +   if (cmd_buffer->device->isl_dev.ss.addr_offset <
> > +   cmd_buffer->device->isl_dev.ss.clear_value_offset) {
> > +  assert(cmd_buffer->device->isl_dev.ss.addr_offset + 4 <=
> > + cmd_buffer->device->isl_dev.ss.clear_value_offset);
> > +   }
> > +
> > +   if (state != CCS_RESOLVE_STARTING) {
> > +  assert(state == CCS_RESOLVE_NEEDED || state ==
> > CCS_RESOLVE_NOT_NEEDED);
> > +  

Re: [Mesa-dev] [PATCH 22/22] anv: Predicate fast-clear resolves

2017-05-03 Thread Jason Ekstrand
On Thu, Apr 27, 2017 at 11:32 AM, Nanley Chery 
wrote:

> There's no image layout to represent a full-RT-cleared color attachment.
> That's one reason we can end up with redundant resolves. Testing has
> shown that such resolves can measurably hurt performance and that
> predicating them can avoid the penalty.
>
> Signed-off-by: Nanley Chery 
> ---
>  src/intel/vulkan/anv_blorp.c   |   3 +-
>  src/intel/vulkan/anv_image.c   |   6 +++
>  src/intel/vulkan/genX_cmd_buffer.c | 106 ++
> ---
>  3 files changed, 108 insertions(+), 7 deletions(-)
>
> diff --git a/src/intel/vulkan/anv_blorp.c b/src/intel/vulkan/anv_blorp.c
> index 821d01a077..32f0edf316 100644
> --- a/src/intel/vulkan/anv_blorp.c
> +++ b/src/intel/vulkan/anv_blorp.c
> @@ -1500,7 +1500,8 @@ anv_ccs_resolve(struct anv_cmd_buffer * const
> cmd_buffer,
>
> struct blorp_batch batch;
> blorp_batch_init(_buffer->device->blorp, , cmd_buffer,
> -BLORP_BATCH_NO_EMIT_DEPTH_STENCIL);
> +BLORP_BATCH_NO_EMIT_DEPTH_STENCIL |
> +BLORP_BATCH_PREDICATE_ENABLE);
>
> struct blorp_surf surf;
> get_blorp_surf_for_anv_image(image, VK_IMAGE_ASPECT_COLOR_BIT,
> diff --git a/src/intel/vulkan/anv_image.c b/src/intel/vulkan/anv_image.c
> index 751f2d6026..92ee86dab5 100644
> --- a/src/intel/vulkan/anv_image.c
> +++ b/src/intel/vulkan/anv_image.c
> @@ -208,6 +208,12 @@ anv_fill_ccs_resolve_ss(const struct anv_device *
> const device,
> .aux_usage = image->aux_usage ==
> ISL_AUX_USAGE_NONE ?
>  ISL_AUX_USAGE_CCS_D :
> image->aux_usage,
> .mocs = device->default_mocs);
> +
> +   /* The following dword is used as a flag to represent whether or not
> this
> +* CCS subresource needs resolving. We want this to be zero by default,
> +* which means that a resolve is not necessary.
> +*/
> +   assert(*(uint32_t *)(data + device->isl_dev.ss.addr_offset) == 0);
>

This seems like kind-of an odd place to put it... Not a problem, just odd.


>  }
>
>  /**
> diff --git a/src/intel/vulkan/genX_cmd_buffer.c
> b/src/intel/vulkan/genX_cmd_buffer.c
> index 95729ec8a8..041301290e 100644
> --- a/src/intel/vulkan/genX_cmd_buffer.c
> +++ b/src/intel/vulkan/genX_cmd_buffer.c
> @@ -402,6 +402,82 @@ transition_depth_buffer(struct anv_cmd_buffer
> *cmd_buffer,
>anv_gen8_hiz_op_resolve(cmd_buffer, image, hiz_op);
>  }
>
> +static uint32_t
> +clear_value_buffer_offset(const struct anv_cmd_buffer * const cmd_buffer,
> +  const struct anv_image * const image,
> +  const uint8_t level)
> +{
> +   return image->offset +
> +  image->aux_surface.offset + image->aux_surface.isl.size +
> +  cmd_buffer->device->isl_dev.ss.size * level;
> +}
> +
> +#define MI_PREDICATE_SRC0  0x2400
> +#define MI_PREDICATE_SRC1  0x2408
> +
> +enum ccs_resolve_state {
> +   CCS_RESOLVE_NOT_NEEDED,
> +   CCS_RESOLVE_NEEDED,
> +   CCS_RESOLVE_STARTING,
> +};
> +
> +/* Manages the state of an color image subresource to ensure resolves are
> + * performed properly.
> + */
> +static void
> +genX(set_resolve_state)(struct anv_cmd_buffer * const cmd_buffer,
> +const struct anv_image * const image,
> +const uint8_t level,
> +const enum ccs_resolve_state state)
> +{
> +   assert(cmd_buffer && image);
> +
> +   /* The image subresource range must have a color auxiliary buffer. */
> +   assert(anv_image_has_color_aux(image));
> +   assert(level < anv_color_aux_levels(image));
> +
> +   /* We store the resolve flag in the location of the surface base
> address
> +* field for simplicity and because it is initialized to zero when the
> +* clear value buffer is initialized.
> +*/
> +   const uint32_t resolve_flag_offset =
> +  clear_value_buffer_offset(cmd_buffer, image, level) +
> +  cmd_buffer->device->isl_dev.ss.addr_offset;
> +
> +   /* An update operation should not overwrite the fast clear value. */
> +   if (cmd_buffer->device->isl_dev.ss.addr_offset <
> +   cmd_buffer->device->isl_dev.ss.clear_value_offset) {
> +  assert(cmd_buffer->device->isl_dev.ss.addr_offset + 4 <=
> + cmd_buffer->device->isl_dev.ss.clear_value_offset);
> +   }
> +
> +   if (state != CCS_RESOLVE_STARTING) {
> +  assert(state == CCS_RESOLVE_NEEDED || state ==
> CCS_RESOLVE_NOT_NEEDED);
> +  /* The HW docs say that there is no way to guarantee the completion
> of
> +   * the following command. We use it nevertheless because it shows no
> +   * issues in testing is currently being used in the GL driver.
> +   */
> +  anv_batch_emit(_buffer->batch, GENX(MI_STORE_DATA_IMM), sdi) {
> + sdi.Address = (struct anv_address) { image->bo,
> resolve_flag_offset };
> + sdi.ImmediateData = state == 

[Mesa-dev] [PATCH 22/22] anv: Predicate fast-clear resolves

2017-04-27 Thread Nanley Chery
There's no image layout to represent a full-RT-cleared color attachment.
That's one reason we can end up with redundant resolves. Testing has
shown that such resolves can measurably hurt performance and that
predicating them can avoid the penalty.

Signed-off-by: Nanley Chery 
---
 src/intel/vulkan/anv_blorp.c   |   3 +-
 src/intel/vulkan/anv_image.c   |   6 +++
 src/intel/vulkan/genX_cmd_buffer.c | 106 ++---
 3 files changed, 108 insertions(+), 7 deletions(-)

diff --git a/src/intel/vulkan/anv_blorp.c b/src/intel/vulkan/anv_blorp.c
index 821d01a077..32f0edf316 100644
--- a/src/intel/vulkan/anv_blorp.c
+++ b/src/intel/vulkan/anv_blorp.c
@@ -1500,7 +1500,8 @@ anv_ccs_resolve(struct anv_cmd_buffer * const cmd_buffer,
 
struct blorp_batch batch;
blorp_batch_init(_buffer->device->blorp, , cmd_buffer,
-BLORP_BATCH_NO_EMIT_DEPTH_STENCIL);
+BLORP_BATCH_NO_EMIT_DEPTH_STENCIL |
+BLORP_BATCH_PREDICATE_ENABLE);
 
struct blorp_surf surf;
get_blorp_surf_for_anv_image(image, VK_IMAGE_ASPECT_COLOR_BIT,
diff --git a/src/intel/vulkan/anv_image.c b/src/intel/vulkan/anv_image.c
index 751f2d6026..92ee86dab5 100644
--- a/src/intel/vulkan/anv_image.c
+++ b/src/intel/vulkan/anv_image.c
@@ -208,6 +208,12 @@ anv_fill_ccs_resolve_ss(const struct anv_device * const 
device,
.aux_usage = image->aux_usage == ISL_AUX_USAGE_NONE ?
 ISL_AUX_USAGE_CCS_D : image->aux_usage,
.mocs = device->default_mocs);
+
+   /* The following dword is used as a flag to represent whether or not this
+* CCS subresource needs resolving. We want this to be zero by default,
+* which means that a resolve is not necessary.
+*/
+   assert(*(uint32_t *)(data + device->isl_dev.ss.addr_offset) == 0);
 }
 
 /**
diff --git a/src/intel/vulkan/genX_cmd_buffer.c 
b/src/intel/vulkan/genX_cmd_buffer.c
index 95729ec8a8..041301290e 100644
--- a/src/intel/vulkan/genX_cmd_buffer.c
+++ b/src/intel/vulkan/genX_cmd_buffer.c
@@ -402,6 +402,82 @@ transition_depth_buffer(struct anv_cmd_buffer *cmd_buffer,
   anv_gen8_hiz_op_resolve(cmd_buffer, image, hiz_op);
 }
 
+static uint32_t
+clear_value_buffer_offset(const struct anv_cmd_buffer * const cmd_buffer,
+  const struct anv_image * const image,
+  const uint8_t level)
+{
+   return image->offset +
+  image->aux_surface.offset + image->aux_surface.isl.size +
+  cmd_buffer->device->isl_dev.ss.size * level;
+}
+
+#define MI_PREDICATE_SRC0  0x2400
+#define MI_PREDICATE_SRC1  0x2408
+
+enum ccs_resolve_state {
+   CCS_RESOLVE_NOT_NEEDED,
+   CCS_RESOLVE_NEEDED,
+   CCS_RESOLVE_STARTING,
+};
+
+/* Manages the state of an color image subresource to ensure resolves are
+ * performed properly.
+ */
+static void
+genX(set_resolve_state)(struct anv_cmd_buffer * const cmd_buffer,
+const struct anv_image * const image,
+const uint8_t level,
+const enum ccs_resolve_state state)
+{
+   assert(cmd_buffer && image);
+
+   /* The image subresource range must have a color auxiliary buffer. */
+   assert(anv_image_has_color_aux(image));
+   assert(level < anv_color_aux_levels(image));
+
+   /* We store the resolve flag in the location of the surface base address
+* field for simplicity and because it is initialized to zero when the
+* clear value buffer is initialized.
+*/
+   const uint32_t resolve_flag_offset =
+  clear_value_buffer_offset(cmd_buffer, image, level) +
+  cmd_buffer->device->isl_dev.ss.addr_offset;
+
+   /* An update operation should not overwrite the fast clear value. */
+   if (cmd_buffer->device->isl_dev.ss.addr_offset <
+   cmd_buffer->device->isl_dev.ss.clear_value_offset) {
+  assert(cmd_buffer->device->isl_dev.ss.addr_offset + 4 <=
+ cmd_buffer->device->isl_dev.ss.clear_value_offset);
+   }
+
+   if (state != CCS_RESOLVE_STARTING) {
+  assert(state == CCS_RESOLVE_NEEDED || state == CCS_RESOLVE_NOT_NEEDED);
+  /* The HW docs say that there is no way to guarantee the completion of
+   * the following command. We use it nevertheless because it shows no
+   * issues in testing is currently being used in the GL driver.
+   */
+  anv_batch_emit(_buffer->batch, GENX(MI_STORE_DATA_IMM), sdi) {
+ sdi.Address = (struct anv_address) { image->bo, resolve_flag_offset };
+ sdi.ImmediateData = state == CCS_RESOLVE_NEEDED;
+  }
+   } else {
+  /* Make the pending predicated resolve a no-op if one is not needed.
+   * predicate = do_resolve = resolve_flag != 0;
+   */
+  emit_lri(_buffer->batch, MI_PREDICATE_SRC1, 0);
+  emit_lri(_buffer->batch, MI_PREDICATE_SRC1 + 4, 0);
+  emit_lri(_buffer->batch, MI_PREDICATE_SRC0, 0);
+  emit_lrm(_buffer->batch,