Re: [Mesa-dev] [PATCH v3 08/16] anv: Transition more color buffer layouts

Jason Ekstrand Mon, 17 Jul 2017 14:42:06 -0700

On Fri, Jul 14, 2017 at 2:42 AM, Nanley Chery <nanleych...@gmail.com> wrote:


> On Mon, Jul 10, 2017 at 10:14:21AM -0700, Jason Ekstrand wrote:
> > On Wed, Jun 28, 2017 at 2:14 PM, Nanley Chery <nanleych...@gmail.com>
> wrote:
> >
> > > v2: Expound on comment for the pipe controls (Jason Ekstrand).
> > >
> > > Signed-off-by: Nanley Chery <nanley.g.ch...@intel.com>
> > > ---
> > >  src/intel/vulkan/anv_blorp.c       |   4 +-
> > >  src/intel/vulkan/genX_cmd_buffer.c | 183
> ++++++++++++++++++++++++++++++
> > > +++----
> > >  2 files changed, 167 insertions(+), 20 deletions(-)
> > >
> > > diff --git a/src/intel/vulkan/anv_blorp.c
> b/src/intel/vulkan/anv_blorp.c
> > > index 459d57ec57..84b01e8792 100644
> > > --- a/src/intel/vulkan/anv_blorp.c
> > > +++ b/src/intel/vulkan/anv_blorp.c
> > > @@ -1451,7 +1451,9 @@ anv_image_ccs_clear(struct anv_cmd_buffer
> > > *cmd_buffer,
> > >
> > >     struct blorp_surf surf;
> > >     get_blorp_surf_for_anv_image(image, VK_IMAGE_ASPECT_COLOR_BIT,
> > > -                                image->aux_usage, &surf);
> > > +                                image->aux_usage ==
> ISL_AUX_USAGE_CCS_E ?
> > > +                                ISL_AUX_USAGE_CCS_E :
> ISL_AUX_USAGE_CCS_D,
> > > +                                &surf);
> > >
> > >     /* From the Sky Lake PRM Vol. 7, "Render Target Fast Clear":
> > >      *
> > > diff --git a/src/intel/vulkan/genX_cmd_buffer.c
> > > b/src/intel/vulkan/genX_cmd_buffer.c
> > > index decf0b28d6..1a9b841c7c 100644
> > > --- a/src/intel/vulkan/genX_cmd_buffer.c
> > > +++ b/src/intel/vulkan/genX_cmd_buffer.c
> > > @@ -524,6 +524,17 @@ genX(copy_fast_clear_dwords)(struct
> anv_cmd_buffer
> > > *cmd_buffer,
> > >     }
> > >  }
> > >
> > > +/**
> > > + * @brief Transitions a color buffer from one layout to another.
> > > + *
> > > + * See section 6.1.1. Image Layout Transitions of the Vulkan 1.0.50
> spec
> > > for
> > > + * more information.
> > > + *
> > > + * @param level_count VK_REMAINING_MIP_LEVELS isn't supported.
> > > + * @param layer_count VK_REMAINING_ARRAY_LAYERS isn't supported. For
> 3D
> > > images,
> > > + *                    this represents the maximum layers to
> transition at
> > > each
> > > + *                    specified miplevel.
> > > + */
> > >  static void
> > >  transition_color_buffer(struct anv_cmd_buffer *cmd_buffer,
> > >                          const struct anv_image *image,
> > > @@ -532,13 +543,27 @@ transition_color_buffer(struct anv_cmd_buffer
> > > *cmd_buffer,
> > >                          VkImageLayout initial_layout,
> > >                          VkImageLayout final_layout)
> > >  {
> > > -   assert(image->aspects == VK_IMAGE_ASPECT_COLOR_BIT);
> > > -
> > > -   if (image->aux_surface.isl.size == 0)
> > > -      return;
> > > -
> > > -   if (initial_layout != VK_IMAGE_LAYOUT_UNDEFINED &&
> > > -       initial_layout != VK_IMAGE_LAYOUT_PREINITIALIZED)
> > > +   /* Validate the inputs. */
> > > +   assert(cmd_buffer);
> > > +   assert(image && image->aspects == VK_IMAGE_ASPECT_COLOR_BIT);
> > > +   /* These values aren't supported for simplicity's sake. */
> > > +   assert(level_count != VK_REMAINING_MIP_LEVELS &&
> > > +          layer_count != VK_REMAINING_ARRAY_LAYERS);
> > > +   /* Ensure the subresource range is valid. */
> > > +   uint64_t last_level_num = base_level + level_count;
> > > +   const uint32_t max_depth = anv_minify(image->extent.depth,
> > > base_level);
> > > +   const uint32_t image_layers = MAX2(image->array_size, max_depth);
> > > +   assert(base_layer + layer_count  <= image_layers);
> > > +   assert(last_level_num <= image->levels);
> > > +   /* The spec disallows these final layouts. */
> > > +   assert(final_layout != VK_IMAGE_LAYOUT_UNDEFINED &&
> > > +          final_layout != VK_IMAGE_LAYOUT_PREINITIALIZED);
> > > +
> > > +   /* No work is necessary if the layout stays the same or if this
> > > subresource
> > > +    * range lacks auxiliary data.
> > > +    */
> > > +   if (initial_layout == final_layout ||
> > > +       base_layer >= anv_image_aux_layers(image, base_level))
> > >        return;
> > >
> > >     /* A transition of a 3D subresource works on all slices at a time.
> */
> > > @@ -549,22 +574,142 @@ transition_color_buffer(struct anv_cmd_buffer
> > > *cmd_buffer,
> > >
> > >     /* We're interested in the subresource range subset that has aux
> data.
> > > */
> > >     level_count = MIN2(level_count, anv_image_aux_levels(image));
> > > +   layer_count = MIN2(layer_count, anv_image_aux_layers(image,
> > > base_level));
> > >
> >
> > Is this correct?  I think we want MIN2(layer_count,
> anv_image_aux_layers()
> > - base_layer), don't we?  This would also mean there's a bug in the
> current
> > level_count.
> >
> >
>
> I see nothing wrong with it. The current equation limits the
> user-specified the layer_count to the actual number of layers which can
> be transitioned. What's the meaning behind subtracting base_layer?
>

Because anv_image_aux_layers is the number of layers relative to 0 not
relative to base_layer.  The layer_count parameter, on the other hand, is
relative to base_layer.  Put differently, the maximum layer they will touch
is base_layer + layer_count - 1 where the max layer in the image is
anv_image_aux_layers() - 1.  So, if we're going to clamp layer_count, we
need to clamp it to anv_imagae_aux_layers() - base_layer.


> > > +   last_level_num = base_level + level_count;
> > > +
> > > +   /* Record whether or not the layout is undefined. Pre-initialized
> > > images
> > > +    * with auxiliary buffers have a non-linear layout and are thus
> > > undefined.
> > > +    */
> > > +   assert(image->tiling == VK_IMAGE_TILING_OPTIMAL);
> > > +   const bool undef_layout = initial_layout ==
> VK_IMAGE_LAYOUT_UNDEFINED
> > > ||
> > > +                             initial_layout == VK_IMAGE_LAYOUT_
> > > PREINITIALIZED;
> > >
> > > -   /* We're transitioning from an undefined layout. We must ensure
> that
> > > the
> > > -    * clear values buffer is filled with valid data.
> > > +   /* Do preparatory work before the resolve operation or return
> early if
> > > no
> > > +    * resolve is actually needed.
> > >      */
> > > -   for (unsigned l = 0; l < level_count; l++)
> > > -      init_fast_clear_state_entry(cmd_buffer, image, base_level + l);
> > > -
> > > -   if (image->aux_usage == ISL_AUX_USAGE_CCS_E) {
> > > -      /* We're transitioning from an undefined layout so it doesn't
> really
> > > -       * matter what data ends up in the color buffer.  We do,
> however,
> > > need to
> > > -       * ensure that the CCS has valid data in it.  One easy way to do
> > > that is
> > > -       * to fast-clear the specified range.
> > > +   if (undef_layout) {
> > > +      /* A subresource in the undefined layout may have been aliased
> and
> > > +       * populated with any arrangement of bits. Therefore, we must
> > > initialize
> > > +       * the related aux buffer and clear buffer entry with desirable
> > > values.
> > > +       *
> > > +       * Initialize the relevant clear buffer entries.
> > >         */
> > > -      anv_image_ccs_clear(cmd_buffer, image, base_level, level_count,
> > > -                          base_layer, layer_count);
> > > +      for (unsigned level = base_level; level < last_level_num;
> level++)
> > > +         init_fast_clear_state_entry(cmd_buffer, image, level);
> > > +
> > > +      /* MCS buffers have been initialized for correct behaviour. */
> > >
> >
> > I don't think this is correct.  I can definitely see MCS going off the
> > rails in a similar way to CCS if things aren't initialized correctly.
> > Unfortunately, MSAA test coverage is defintiely sub-par right now.  I
> think
> > I'd feel more comfortable if we fast-cleared them too.  Fortunately,
> > though, we always leave basic MCS on for MSAA surfaces so no resolve will
> > ever be needed.
> >
> >
>
> That's true. 2x and 8x MCS surfaces require that some of the upper bits
> be zero, so they be should be fast-cleared as well. I'll make a separate
> patch for fast-clearing MCS and CC it to stable.
>
> > > +      if (image->samples > 1)
> > > +         return;
> > > +
> > > +      /* Initialize the CCS buffers to enable correct rendering. This
> > > operation
> > > +       * requires up to two steps: one to rid the CCS buffer of data
> that
> > > may
> > > +       * cause GPU hangs, and another to ensure that writes done
> without
> > > CCS
> > > +       * will be visible to reads done with CCS.
> > > +       *
> > > +       * On SKL+, it becomes possible to have a CCS buffer with
> invalid
> > > data.
> > > +       * One easy way to avoid this state is to fast-clear the
> specified
> > > range.
> > > +       */
> > > +      if (GEN_GEN >= 9) {
> > > +         anv_image_ccs_clear(cmd_buffer, image, base_level,
> level_count,
> > > +                             base_layer, layer_count);
> > > +      }
> > > +      /* At this point, some elements of the CCS buffer may have the
> > > fast-clear
> > > +       * bit-arrangement. As the user writes to a subresource, we
> need to
> > > have
> > > +       * the associated CCS elements enter the ambiguated state. This
> > > enables
> > > +       * reads (implicit or explicit) to reflect the user-written data
> > > instead
> > > +       * of the clear color. The only time such elements will not
> change
> > > their
> > > +       * state as described above, is in a final layout that doesn't
> have
> > > CCS
> > > +       * enabled. In this case, we must force the associated CCS
> buffers
> > > of the
> > > +       * specified range to enter the ambiguated state in advance.
> > > +       */
> > > +      if (image->aux_usage != ISL_AUX_USAGE_CCS_E &&
> > > +          final_layout != VK_IMAGE_LAYOUT_COLOR_ATTACHMENT_OPTIMAL) {
> > > +         /* The CCS_D buffer may not be enabled in the final layout.
> > > Continue
> > > +          * executing this function to perform a resolve.
> > > +          */
> > > +          anv_perf_warn("Performing an additional resolve for CCS_D
> > > layout "
> > > +                        "transition. Consider always leaving it on or
> "
> > > +                        "performing an ambiguation pass.");
> > > +      } else {
> > > +         /* Writes in the final layout will be aware of the CCS
> buffer. In
> > > +          * addition, the clear buffer entries and the CCS buffers
> have
> > > been
> > > +          * populated with values that will result in correct
> rendering.
> > > +          */
> > > +         return;
> > >
> >
> > Ugh... This all works but it makes me an even bigger fan of that
> ambiguate
> > pass... Maybe we should consider reviving it once this all lands.
> >
> >
>
> I can see why. It would simplify this chunk of code a lot.
>
> > > +      }
> > > +   } else if (initial_layout != VK_IMAGE_LAYOUT_COLOR_
> ATTACHMENT_OPTIMAL)
> > > {
> > > +      /* Resolves are only necessary if the subresource may contain
> blocks
> > > +       * fast-cleared to values unsupported in other layouts. This
> only
> > > occurs
> > > +       * if the initial layout is COLOR_ATTACHMENT_OPTIMAL.
> > > +       */
> > > +      return;
> > > +   } else if (image->samples > 1) {
> > > +      /* MCS buffers don't need resolving. */
> > > +      return;
> > >     }
> > > +
> > > +   /* Perform a resolve to synchronize data between the main and aux
> > > buffer.
> > > +    * Before we begin, we must satisfy the cache flushing requirement
> > > specified
> > > +    * in the Sky Lake PRM Vol. 7, "MCS Buffer for Render Target(s)":
> > > +    *
> > > +    *    Any transition from any value in {Clear, Render, Resolve} to
> a
> > > +    *    different value in {Clear, Render, Resolve} requires end of
> pipe
> > > +    *    synchronization.
> > > +    *
> > > +    * We perform a flush of the write cache before and after the
> clear and
> > > +    * resolve operations to meet this requirement.
> > > +    *
> > > +    * Unlike other drawing, it seems that fast clear operations are
> not
> > > +    * properly synchronized. The first PIPE_CONTROL here likely
> ensures
> > > that
> > > +    * the contents of the previous render or clear hit the render
> target
> > > before
> > > +    * we resolve and the second likely ensures that the resolve is
> > > complete
> > > +    * before we do any more rendering or clearing.
> > >
> >
> > I'm really not a fan of how much this has been weasel-worded...
> >
> > <rant>
> > One of the big problems with the PRMs is that they don't actually
> describe
> > the hardware.  They describe how a theoretical driver can/should program
> > the hardware to get correct rendering results.  What they don't tell you
> is
> > *why* programming the hardware that way is needed.  They get especially
> bad
> > about this around synchronization and image layout.  They're also
> > frequently missing important details and are sometimes flat-out wrong.
> >
> > The statement "fast clear ops are not properly synchronized with other
> > drawing" describes, in some sense, what's going on in the hardware.  You
> > have two rendering operations in-flight and, due to some hardware detail
> I
> > don't fully understand, they can't safely be in the pipeline at the same
> > time.  It's not a full description I'll grant, but it at least describes,
> > in general terms, the problem the workaround is solving.  The language in
> > the PRM, on the other hand, describes some engineer's mental model of
> that
> > synchronization problem and a way of working around it.
> >
> > We write drivers for hardware, not for PRMs.  I don't think we need to
> > weasel-word our comments to make it look like all we have is hair-brained
> > theories while whoever wrote the PRM note has the actual knowledge.  If
> > we're particularly unsure about something then that should be expressed
> in
> > the comment.  However, I'm pretty convinced that what's going on here is
> a
> > synchronization problem.  Our understanding of fast-clear operations may
> > improve in the future and, if/when it does, we'll update the comments.
> > </rant>
> >
> >
>
> Thanks for sharing that point of view. I hadn't really thought that much
> about how the PRMs were created. I agree that I can at least remove the
> word "seems" from the comment while still being accurate.
>
> > > +    */
> > > +   cmd_buffer->state.pending_pipe_bits |=
> > > +      ANV_PIPE_RENDER_TARGET_CACHE_FLUSH_BIT | ANV_PIPE_CS_STALL_BIT;
> > > +
> > > +   for (uint32_t level = base_level; level < last_level_num; level++)
> {
> > > +
> > > +      /* The number of layers changes at each 3D miplevel. */
> > > +      if (image->type == VK_IMAGE_TYPE_3D) {
> > > +         layer_count = MIN2(layer_count, anv_image_aux_layers(image,
> > > level));
> > > +      }
> > > +
> > > +      /* Create a surface state with the right clear color and
> perform the
> > > +       * resolve.
> > > +       */
> > > +      struct anv_state surface_state =
> > > +         anv_cmd_buffer_alloc_surface_state(cmd_buffer);
> > > +      isl_surf_fill_state(&cmd_buffer->device->isl_dev,
> > > surface_state.map,
> > > +                          .surf = &image->color_surface.isl,
> > > +                          .view = &(struct isl_view) {
> > > +                              .usage = ISL_SURF_USAGE_RENDER_TARGET_
> BIT,
> > > +                              .format = image->color_surface.isl.
> format,
> > > +                              .swizzle = ISL_SWIZZLE_IDENTITY,
> > > +                              .base_level = level,
> > > +                              .levels = 1,
> > > +                              .base_array_layer = base_layer,
> > > +                              .array_len = layer_count,
> > > +                           },
> > > +                          .aux_surf = &image->aux_surface.isl,
> > > +                          .aux_usage = image->aux_usage ==
> > > ISL_AUX_USAGE_NONE ?
> > > +                                       ISL_AUX_USAGE_CCS_D :
> > > image->aux_usage,
> > > +                          .mocs = cmd_buffer->device->default_mocs);
> > > +      add_image_relocs(cmd_buffer, image, VK_IMAGE_ASPECT_COLOR_BIT,
> > > +                       image->aux_usage == ISL_AUX_USAGE_CCS_E ?
> > > +                       ISL_AUX_USAGE_CCS_E : ISL_AUX_USAGE_CCS_D,
> > > +                       surface_state);
> > > +      anv_state_flush(cmd_buffer->device, surface_state);
> > > +      genX(copy_fast_clear_dwords)(cmd_buffer, surface_state, image,
> > > level,
> > > +                                   false /* copy to ss */);
> > > +      anv_ccs_resolve(cmd_buffer, surface_state, image, level,
> > > layer_count,
> > > +                      image->aux_usage == ISL_AUX_USAGE_CCS_E ?
> > > +                      BLORP_FAST_CLEAR_OP_RESOLVE_PARTIAL :
> > > +                      BLORP_FAST_CLEAR_OP_RESOLVE_FULL);
> > > +   }
> > > +
> > > +   cmd_buffer->state.pending_pipe_bits |=
> > > +      ANV_PIPE_RENDER_TARGET_CACHE_FLUSH_BIT | ANV_PIPE_CS_STALL_BIT;
> > >  }
> > >
> > >  /**
> > > --
> > > 2.13.1
> > >
> > > _______________________________________________
> > > mesa-dev mailing list
> > > mesa-dev@lists.freedesktop.org
> > > https://lists.freedesktop.org/mailman/listinfo/mesa-dev
> > >
>

_______________________________________________
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/mesa-dev

Re: [Mesa-dev] [PATCH v3 08/16] anv: Transition more color buffer layouts

Reply via email to