blorp: Add support for single sample scaled blit with bilinear filter

Anuj Phogat Wed, 07 Aug 2013 09:33:59 -0700

On Tue, Aug 6, 2013 at 3:05 PM, Paul Berry <stereotype...@gmail.com> wrote:


> On 5 August 2013 15:37, Anuj Phogat <anuj.pho...@gmail.com> wrote:
>
>> Currently single sample scaled blits with GL_LINEAR filter falls
>> back to meta path. Patch removes this limitation in BLORP engine
>> and implements single sample scaled blit with bilinear filter.
>> No piglit, gles3 regressions are obeserved with this patch. Piglit
>> test case patches to verify this implementation are out on piglit
>> mailing list.
>>
>
> I'm uncomfortable with the approach taken in this patch, because it
> doesn't make use of the bilinear filtering capability built into the
> sampling hardware.
>
> Back when you were implementing EXT_framebuffer_multisample_blit_scaled,
> there was good reason not to use the sampler's bilinear filtering
> capability--because it doesn't work properly for multisampled textures.
> But for scaled blitting of single-sampled textures it should work fine, and
> in all likelihood it will be faster than doing manual bilinear filtering in
> the shader.  Also, there's a higher risk of making mistakes if we manually
> implement bilinear filtering in the shader.
>
> I'd recommend instead using the "sample" message to read from the surface
> when doing GL_LINEAR filtering.
>
> Yes, It'll be more efficient to use inbuilt support for bilinear filtering
in hardware. I'll modify my patch
to use "sample" message.

>
>
>
>>
>> Signed-off-by: Anuj Phogat <anuj.pho...@gmail.com>
>> ---
>>  src/mesa/drivers/dri/i965/brw_blorp.h         |   7 +-
>>  src/mesa/drivers/dri/i965/brw_blorp_blit.cpp  | 157
>> ++++++++++++++++++++------
>>  src/mesa/drivers/dri/i965/intel_mipmap_tree.c |   4 +-
>>  3 files changed, 132 insertions(+), 36 deletions(-)
>>
>> diff --git a/src/mesa/drivers/dri/i965/brw_blorp.h
>> b/src/mesa/drivers/dri/i965/brw_blorp.h
>> index 49862b8..be40625 100644
>> --- a/src/mesa/drivers/dri/i965/brw_blorp.h
>> +++ b/src/mesa/drivers/dri/i965/brw_blorp.h
>> @@ -44,7 +44,7 @@ brw_blorp_blit_miptrees(struct brw_context *brw,
>>                          float src_x1, float src_y1,
>>                          float dst_x0, float dst_y0,
>>                          float dst_x1, float dst_y1,
>> -                        bool mirror_x, bool mirror_y);
>> +                        GLenum filter, bool mirror_x, bool mirror_y);
>>
>>  bool
>>  brw_blorp_clear_color(struct brw_context *brw, struct gl_framebuffer *fb,
>> @@ -333,6 +333,9 @@ struct brw_blorp_blit_prog_key
>>      */
>>     float x_scale;
>>     float y_scale;
>> +
>> +   /* True for single sample scaled blits with linear filter. */
>> +   bool bilinear_filter;
>>  };
>>
>>  class brw_blorp_blit_params : public brw_blorp_params
>> @@ -347,7 +350,7 @@ public:
>>                           GLfloat src_x1, GLfloat src_y1,
>>                           GLfloat dst_x0, GLfloat dst_y0,
>>                           GLfloat dst_x1, GLfloat dst_y1,
>> -                         bool mirror_x, bool mirror_y);
>> +                         GLenum filter, bool mirror_x, bool mirror_y);
>>
>>     virtual uint32_t get_wm_prog(struct brw_context *brw,
>>                                  brw_blorp_prog_data **prog_data) const;
>> diff --git a/src/mesa/drivers/dri/i965/brw_blorp_blit.cpp
>> b/src/mesa/drivers/dri/i965/brw_blorp_blit.cpp
>> index 8c0db48..0a28026 100644
>> --- a/src/mesa/drivers/dri/i965/brw_blorp_blit.cpp
>> +++ b/src/mesa/drivers/dri/i965/brw_blorp_blit.cpp
>> @@ -133,7 +133,7 @@ brw_blorp_blit_miptrees(struct brw_context *brw,
>>                          float src_x1, float src_y1,
>>                          float dst_x0, float dst_y0,
>>                          float dst_x1, float dst_y1,
>> -                        bool mirror_x, bool mirror_y)
>> +                        GLenum filter, bool mirror_x, bool mirror_y)
>>  {
>>     /* Get ready to blit.  This includes depth resolving the src and dst
>>      * buffers if necessary.  Note: it's not necessary to do a color
>> resolve on
>> @@ -161,7 +161,7 @@ brw_blorp_blit_miptrees(struct brw_context *brw,
>>                                  src_x1, src_y1,
>>                                  dst_x0, dst_y0,
>>                                  dst_x1, dst_y1,
>> -                                mirror_x, mirror_y);
>> +                                filter, mirror_x, mirror_y);
>>     brw_blorp_exec(brw, &params);
>>
>>     intel_miptree_slice_set_needs_hiz_resolve(dst_mt, dst_level,
>> dst_layer);
>> @@ -173,7 +173,7 @@ do_blorp_blit(struct brw_context *brw, GLbitfield
>> buffer_bit,
>>                struct intel_renderbuffer *dst_irb,
>>                GLfloat srcX0, GLfloat srcY0, GLfloat srcX1, GLfloat srcY1,
>>                GLfloat dstX0, GLfloat dstY0, GLfloat dstX1, GLfloat dstY1,
>> -              bool mirror_x, bool mirror_y)
>> +              GLenum filter, bool mirror_x, bool mirror_y)
>>  {
>>     /* Find source/dst miptrees */
>>     struct intel_mipmap_tree *src_mt = find_miptree(buffer_bit, src_irb);
>> @@ -185,7 +185,7 @@ do_blorp_blit(struct brw_context *brw, GLbitfield
>> buffer_bit,
>>                             dst_mt, dst_irb->mt_level, dst_irb->mt_layer,
>>                             srcX0, srcY0, srcX1, srcY1,
>>                             dstX0, dstY0, dstX1, dstY1,
>> -                           mirror_x, mirror_y);
>> +                           filter, mirror_x, mirror_y);
>>
>>     intel_renderbuffer_set_needs_downsample(dst_irb);
>>  }
>> @@ -245,14 +245,6 @@ try_blorp_blit(struct brw_context *brw,
>>     fixup_mirroring(mirror_y, srcY0, srcY1);
>>     fixup_mirroring(mirror_y, dstY0, dstY1);
>>
>> -   /* Linear filtering is not yet implemented in blorp engine. So,
>> fallback
>> -    * to other blit paths.
>> -    */
>> -   if ((srcX1 - srcX0 != dstX1 - dstX0 ||
>> -        srcY1 - srcY0 != dstY1 - dstY0) &&
>> -       filter == GL_LINEAR)
>> -      return false;
>> -
>>     /* If the destination rectangle needs to be clipped or scissored, do
>> so.
>>      */
>>     if (!(clip_or_scissor(mirror_x, srcX0, srcX1, dstX0, dstX1,
>> @@ -304,7 +296,7 @@ try_blorp_blit(struct brw_context *brw,
>>          if (dst_irb)
>>              do_blorp_blit(brw, buffer_bit, src_irb, dst_irb, srcX0,
>> srcY0,
>>                            srcX1, srcY1, dstX0, dstY0, dstX1, dstY1,
>> -                          mirror_x, mirror_y);
>> +                          filter, mirror_x, mirror_y);
>>        }
>>        break;
>>     case GL_DEPTH_BUFFER_BIT:
>> @@ -316,7 +308,7 @@ try_blorp_blit(struct brw_context *brw,
>>           return false;
>>        do_blorp_blit(brw, buffer_bit, src_irb, dst_irb, srcX0, srcY0,
>>                      srcX1, srcY1, dstX0, dstY0, dstX1, dstY1,
>> -                    mirror_x, mirror_y);
>> +                    filter, mirror_x, mirror_y);
>>        break;
>>     case GL_STENCIL_BUFFER_BIT:
>>        src_irb =
>> @@ -327,7 +319,7 @@ try_blorp_blit(struct brw_context *brw,
>>           return false;
>>        do_blorp_blit(brw, buffer_bit, src_irb, dst_irb, srcX0, srcY0,
>>                      srcX1, srcY1, dstX0, dstY0, dstX1, dstY1,
>> -                    mirror_x, mirror_y);
>> +                    filter, mirror_x, mirror_y);
>>        break;
>>     default:
>>        assert(false);
>> @@ -396,7 +388,7 @@ brw_blorp_copytexsubimage(struct brw_context *brw,
>>                             dst_mt, dst_image->Level, dst_image->Face +
>> slice,
>>                             srcX0, srcY0, srcX1, srcY1,
>>                             dstX0, dstY0, dstX1, dstY1,
>> -                           false, mirror_y);
>> +                           GL_NEAREST, false, mirror_y);
>>
>>     /* If we're copying to a packed depth stencil texture and the source
>>      * framebuffer has separate stencil, we need to also copy the stencil
>> data
>> @@ -420,7 +412,7 @@ brw_blorp_copytexsubimage(struct brw_context *brw,
>>                                   dst_image->Face + slice,
>>                                   srcX0, srcY0, srcX1, srcY1,
>>                                   dstX0, dstY0, dstX1, dstY1,
>> -                                 false, mirror_y);
>> +                                 GL_NEAREST, false, mirror_y);
>>        }
>>     }
>>
>> @@ -637,6 +629,7 @@ private:
>>     void single_to_blend();
>>     void manual_blend_average(unsigned num_samples);
>>     void manual_blend_bilinear(unsigned num_samples);
>> +   void single_sample_bilinear_filter(void);
>>     void sample(struct brw_reg dst);
>>     void texel_fetch(struct brw_reg dst);
>>     void mcs_fetch();
>> @@ -873,15 +866,19 @@ brw_blorp_blit_program::compile(struct brw_context
>> *brw,
>>           decode_msaa(key->tex_samples, key->tex_layout);
>>        }
>>
>> -      /* Now (X, Y, S) = decode_msaa(tex_samples, detile(tex_tiling,
>> offset)).
>> -       *
>> -       * In other words: X, Y, and S now contain values which, when
>> passed to
>> -       * the texturing unit, will cause data to be read from the correct
>> -       * memory location.  So we can fetch the texel now.
>> -       */
>> -      if (key->tex_layout == INTEL_MSAA_LAYOUT_CMS)
>> -         mcs_fetch();
>> -      texel_fetch(texture_data[0]);
>> +      if (key->blit_scaled && key->bilinear_filter)
>> +         single_sample_bilinear_filter();
>> +      else {
>> +         /* Now (X, Y, S) = decode_msaa(tex_samples, detile(tex_tiling,
>> offset)).
>> +          *
>> +          * In other words: X, Y, and S now contain values which, when
>> passed to
>> +          * the texturing unit, will cause data to be read from the
>> correct
>> +          * memory location.  So we can fetch the texel now.
>> +          */
>> +         if (key->tex_layout == INTEL_MSAA_LAYOUT_CMS)
>> +            mcs_fetch();
>> +         texel_fetch(texture_data[0]);
>> +      }
>>     }
>>
>>     /* Finally, write the fetched (or blended) value to the render target
>> and
>> @@ -947,7 +944,7 @@ brw_blorp_blit_program::alloc_regs()
>>        reg += 2;
>>     }
>>
>> -   if (key->blit_scaled && key->blend) {
>> +   if (key->blit_scaled) {
>>        this->x_sample_coords = brw_vec8_grf(reg, 0);
>>        reg += 2;
>>        this->y_sample_coords = brw_vec8_grf(reg, 0);
>> @@ -1442,6 +1439,22 @@ brw_blorp_blit_program::translate_dst_to_src()
>>        brw_RNDD(&func, Yp_f, Y_f);
>>        brw_MUL(&func, X_f, Xp_f, brw_imm_f(1 / key->x_scale));
>>        brw_MUL(&func, Y_f, Yp_f, brw_imm_f(1 / key->y_scale));
>> +   } else if (key->blit_scaled && key->bilinear_filter && !key->blend) {
>> +      /* Adjust coordinates so that integers represent pixel centers
>> rather
>> +      * than pixel edges.
>> +      */
>> +      brw_ADD(&func, X_f, X_f, brw_imm_f(-0.5));
>> +      brw_ADD(&func, Y_f, Y_f, brw_imm_f(-0.5));
>> +
>> +      /* Store the fractional parts to be used as bilinear interpolation
>> +       * coefficients.
>> +       */
>> +      brw_FRC(&func, x_frac, X_f);
>> +      brw_FRC(&func, y_frac, Y_f);
>> +
>> +      /* Round the float coordinates down to nearest integer */
>> +      brw_MOV(&func, Xp, X_f);
>> +      brw_MOV(&func, Yp, Y_f);
>>     } else {
>>        /* Round the float coordinates down to nearest integer by moving to
>>         * UD registers.
>> @@ -1765,6 +1778,74 @@
>> brw_blorp_blit_program::manual_blend_bilinear(unsigned num_samples)
>>  #undef SAMPLE
>>  }
>>
>> +void
>> +brw_blorp_blit_program::single_sample_bilinear_filter(void)
>> +{
>> +   /* Bilinear filtering is performed by following operations:
>> +    * - Compute the colors from 2x2 pixels (vec4 c0, vec4 c1, vec4 c2,
>> vec4 c3)
>> +    * - linearly interpolate colors c0 and c1 in X
>> +    * - linearly interpolate colors c2 and c3 in X
>> +    * - linearly interpolate the results of last two operations in Y
>> +    *
>> +    *   result = lrp(lrp(c0 + c1) + lrp(c2 + c3))
>> +    */
>> +   ASSERT(s_is_zero);
>> +   SWAP_XY_AND_XPYP();
>> +
>> +   /* Move the X1, Y1 from Float to UD regsiters. */
>> +   brw_MOV(&func, vec1(t1), rect_grid_x1);
>> +   brw_MOV(&func, vec1(t2), rect_grid_y1);
>> +
>> +   for (unsigned i = 0; i < 4; ++i) {
>> +      assert(i < ARRAY_SIZE(texture_data));
>> +
>> +      /* Compute pixel coordinates */
>> +      brw_ADD(&func, vec16(X), Xp, brw_imm_ud(i % 2));
>> +      brw_ADD(&func, vec16(Y), Yp, brw_imm_ud(i / 2));
>> +
>> +      /* Clamp the X, Y texture coordinates to properly handle the
>> sampling of
>> +       * texels on texture edges.
>> +       */
>> +      clamp_tex_coords(vec16(X), vec16(Y),
>> +                       brw_imm_ud(0), brw_imm_ud(0),
>> +                       vec1(t1), vec1(t2));
>> +
>> +      /* The MCS value we fetch has to match up with the pixel that we're
>> +       * sampling from. Since we sample from different pixels in each
>> +       * iteration of this "for" loop, the call to mcs_fetch() should be
>> +       * here inside the loop after computing the pixel coordinates.
>> +       */
>> +      if (key->tex_layout == INTEL_MSAA_LAYOUT_CMS)
>> +         mcs_fetch();
>> +
>> +      texel_fetch(texture_data[i]);
>> +   }
>> +
>> +#define PIXEL(x, y) offset(texture_data[x], y)
>> +   brw_set_access_mode(&func, BRW_ALIGN_16);
>> +   for (int index = 3; index > 0; ) {
>> +      /* Since we're doing SIMD16, 4 color channels fits in to 8
>> registers.
>> +       * Counter value of 8 in 'for' loop below is used to interpolate
>> all
>> +       * the color components.
>> +       */
>> +      for (int k = 0; k < 8; ++k)
>> +         brw_LRP(&func,
>> +                 vec8(PIXEL(index - 1, k)),
>> +                 offset(x_frac, k & 1),
>> +                 PIXEL(index, k),
>> +                 PIXEL(index - 1, k));
>> +      index -= 2;
>> +   }
>> +   for (int k = 0; k < 8; ++k)
>> +      brw_LRP(&func,
>> +              vec8(PIXEL(0, k)),
>> +              offset(y_frac, k & 1),
>> +              vec8(PIXEL(2, k)),
>> +              vec8(PIXEL(0, k)));
>> +   brw_set_access_mode(&func, BRW_ALIGN_1);
>> +#undef PIXEL
>> +}
>> +
>>  /**
>>   * Emit code to look up a value in the texture using the SAMPLE message
>> (which
>>   * does blending of MSAA surfaces).
>> @@ -2050,6 +2131,7 @@ brw_blorp_blit_params::brw_blorp_blit_params(struct
>> brw_context *brw,
>>                                               GLfloat src_x1, GLfloat
>> src_y1,
>>                                               GLfloat dst_x0, GLfloat
>> dst_y0,
>>                                               GLfloat dst_x1, GLfloat
>> dst_y1,
>> +                                             GLenum filter,
>>                                               bool mirror_x, bool
>> mirror_y)
>>  {
>>     struct gl_context *ctx = &brw->ctx;
>> @@ -2058,7 +2140,10 @@
>> brw_blorp_blit_params::brw_blorp_blit_params(struct brw_context *brw,
>>     src.set(brw, src_mt, src_level, src_layer);
>>     dst.set(brw, dst_mt, dst_level, dst_layer);
>>
>> -   src.brw_surfaceformat = dst.brw_surfaceformat;
>> +   if (src.num_samples > 1)
>> +      src.brw_surfaceformat = dst.brw_surfaceformat;
>> +   else
>> +      dst.brw_surfaceformat = src.brw_surfaceformat;
>>
>>     use_wm_prog = true;
>>     memset(&wm_prog_key, 0, sizeof(wm_prog_key));
>> @@ -2123,11 +2208,19 @@
>> brw_blorp_blit_params::brw_blorp_blit_params(struct brw_context *brw,
>>        ((dst_x1 - dst_x0) == (src_x1 - src_x0) &&
>>         (dst_y1 - dst_y0) == (src_y1 - src_y0)) ? false : true;
>>
>> -   /* Scaling factors used for bilinear filtering in multisample scaled
>> -    * blits.
>> +   /* Scaling factors used for bilinear filtering in
>> single-sample/multisample
>> +    * scaled blits.
>>      */
>> -   wm_prog_key.x_scale = 2.0;
>> -   wm_prog_key.y_scale = src_mt->num_samples / 2.0;
>> +   wm_prog_key.x_scale = 1.0;
>> +   wm_prog_key.y_scale = 1.0;
>> +   if (wm_prog_key.blit_scaled && src_mt->num_samples > 0) {
>> +      wm_prog_key.x_scale = 2.0;
>> +      wm_prog_key.y_scale = src_mt->num_samples / 2.0;
>> +   }
>> +
>> +   /* bilinear filtering or not */
>> +   if (filter == GL_LINEAR)
>> +      wm_prog_key.bilinear_filter = true;
>>
>>     /* The render path must be configured to use the same number of
>> samples as
>>      * the destination buffer.
>> diff --git a/src/mesa/drivers/dri/i965/intel_mipmap_tree.c
>> b/src/mesa/drivers/dri/i965/intel_mipmap_tree.c
>> index d6643ca..de5f8f2 100644
>> --- a/src/mesa/drivers/dri/i965/intel_mipmap_tree.c
>> +++ b/src/mesa/drivers/dri/i965/intel_mipmap_tree.c
>> @@ -1552,7 +1552,7 @@ intel_miptree_updownsample(struct brw_context *brw,
>>                             width, height,
>>                             dst_x0, dst_y0,
>>                             width, height,
>> -                           false, false /*mirror x, y*/);
>> +                           GL_NEAREST, false, false /*mirror x, y*/);
>>
>>     if (src->stencil_mt) {
>>        brw_blorp_blit_miptrees(brw,
>> @@ -1562,7 +1562,7 @@ intel_miptree_updownsample(struct brw_context *brw,
>>                                width, height,
>>                                dst_x0, dst_y0,
>>                                width, height,
>> -                              false, false /*mirror x, y*/);
>> +                              GL_NEAREST, false, false /*mirror x, y*/);
>>     }
>>  }
>>
>> --
>> 1.8.1.4
>>
>> _______________________________________________
>> mesa-dev mailing list
>> mesa-dev@lists.freedesktop.org
>> http://lists.freedesktop.org/mailman/listinfo/mesa-dev
>>
>
>

_______________________________________________
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
http://lists.freedesktop.org/mailman/listinfo/mesa-dev

Re: [Mesa-dev] [PATCH 3/3] i965/blorp: Add support for single sample scaled blit with bilinear filter

Reply via email to