On 5 August 2013 15:37, Anuj Phogat <anuj.pho...@gmail.com> wrote: > Currently single sample scaled blits with GL_LINEAR filter falls > back to meta path. Patch removes this limitation in BLORP engine > and implements single sample scaled blit with bilinear filter. > No piglit, gles3 regressions are obeserved with this patch. Piglit > test case patches to verify this implementation are out on piglit > mailing list. >
I'm uncomfortable with the approach taken in this patch, because it doesn't make use of the bilinear filtering capability built into the sampling hardware. Back when you were implementing EXT_framebuffer_multisample_blit_scaled, there was good reason not to use the sampler's bilinear filtering capability--because it doesn't work properly for multisampled textures. But for scaled blitting of single-sampled textures it should work fine, and in all likelihood it will be faster than doing manual bilinear filtering in the shader. Also, there's a higher risk of making mistakes if we manually implement bilinear filtering in the shader. I'd recommend instead using the "sample" message to read from the surface when doing GL_LINEAR filtering. > > Signed-off-by: Anuj Phogat <anuj.pho...@gmail.com> > --- > src/mesa/drivers/dri/i965/brw_blorp.h | 7 +- > src/mesa/drivers/dri/i965/brw_blorp_blit.cpp | 157 > ++++++++++++++++++++------ > src/mesa/drivers/dri/i965/intel_mipmap_tree.c | 4 +- > 3 files changed, 132 insertions(+), 36 deletions(-) > > diff --git a/src/mesa/drivers/dri/i965/brw_blorp.h > b/src/mesa/drivers/dri/i965/brw_blorp.h > index 49862b8..be40625 100644 > --- a/src/mesa/drivers/dri/i965/brw_blorp.h > +++ b/src/mesa/drivers/dri/i965/brw_blorp.h > @@ -44,7 +44,7 @@ brw_blorp_blit_miptrees(struct brw_context *brw, > float src_x1, float src_y1, > float dst_x0, float dst_y0, > float dst_x1, float dst_y1, > - bool mirror_x, bool mirror_y); > + GLenum filter, bool mirror_x, bool mirror_y); > > bool > brw_blorp_clear_color(struct brw_context *brw, struct gl_framebuffer *fb, > @@ -333,6 +333,9 @@ struct brw_blorp_blit_prog_key > */ > float x_scale; > float y_scale; > + > + /* True for single sample scaled blits with linear filter. */ > + bool bilinear_filter; > }; > > class brw_blorp_blit_params : public brw_blorp_params > @@ -347,7 +350,7 @@ public: > GLfloat src_x1, GLfloat src_y1, > GLfloat dst_x0, GLfloat dst_y0, > GLfloat dst_x1, GLfloat dst_y1, > - bool mirror_x, bool mirror_y); > + GLenum filter, bool mirror_x, bool mirror_y); > > virtual uint32_t get_wm_prog(struct brw_context *brw, > brw_blorp_prog_data **prog_data) const; > diff --git a/src/mesa/drivers/dri/i965/brw_blorp_blit.cpp > b/src/mesa/drivers/dri/i965/brw_blorp_blit.cpp > index 8c0db48..0a28026 100644 > --- a/src/mesa/drivers/dri/i965/brw_blorp_blit.cpp > +++ b/src/mesa/drivers/dri/i965/brw_blorp_blit.cpp > @@ -133,7 +133,7 @@ brw_blorp_blit_miptrees(struct brw_context *brw, > float src_x1, float src_y1, > float dst_x0, float dst_y0, > float dst_x1, float dst_y1, > - bool mirror_x, bool mirror_y) > + GLenum filter, bool mirror_x, bool mirror_y) > { > /* Get ready to blit. This includes depth resolving the src and dst > * buffers if necessary. Note: it's not necessary to do a color > resolve on > @@ -161,7 +161,7 @@ brw_blorp_blit_miptrees(struct brw_context *brw, > src_x1, src_y1, > dst_x0, dst_y0, > dst_x1, dst_y1, > - mirror_x, mirror_y); > + filter, mirror_x, mirror_y); > brw_blorp_exec(brw, ¶ms); > > intel_miptree_slice_set_needs_hiz_resolve(dst_mt, dst_level, > dst_layer); > @@ -173,7 +173,7 @@ do_blorp_blit(struct brw_context *brw, GLbitfield > buffer_bit, > struct intel_renderbuffer *dst_irb, > GLfloat srcX0, GLfloat srcY0, GLfloat srcX1, GLfloat srcY1, > GLfloat dstX0, GLfloat dstY0, GLfloat dstX1, GLfloat dstY1, > - bool mirror_x, bool mirror_y) > + GLenum filter, bool mirror_x, bool mirror_y) > { > /* Find source/dst miptrees */ > struct intel_mipmap_tree *src_mt = find_miptree(buffer_bit, src_irb); > @@ -185,7 +185,7 @@ do_blorp_blit(struct brw_context *brw, GLbitfield > buffer_bit, > dst_mt, dst_irb->mt_level, dst_irb->mt_layer, > srcX0, srcY0, srcX1, srcY1, > dstX0, dstY0, dstX1, dstY1, > - mirror_x, mirror_y); > + filter, mirror_x, mirror_y); > > intel_renderbuffer_set_needs_downsample(dst_irb); > } > @@ -245,14 +245,6 @@ try_blorp_blit(struct brw_context *brw, > fixup_mirroring(mirror_y, srcY0, srcY1); > fixup_mirroring(mirror_y, dstY0, dstY1); > > - /* Linear filtering is not yet implemented in blorp engine. So, > fallback > - * to other blit paths. > - */ > - if ((srcX1 - srcX0 != dstX1 - dstX0 || > - srcY1 - srcY0 != dstY1 - dstY0) && > - filter == GL_LINEAR) > - return false; > - > /* If the destination rectangle needs to be clipped or scissored, do > so. > */ > if (!(clip_or_scissor(mirror_x, srcX0, srcX1, dstX0, dstX1, > @@ -304,7 +296,7 @@ try_blorp_blit(struct brw_context *brw, > if (dst_irb) > do_blorp_blit(brw, buffer_bit, src_irb, dst_irb, srcX0, srcY0, > srcX1, srcY1, dstX0, dstY0, dstX1, dstY1, > - mirror_x, mirror_y); > + filter, mirror_x, mirror_y); > } > break; > case GL_DEPTH_BUFFER_BIT: > @@ -316,7 +308,7 @@ try_blorp_blit(struct brw_context *brw, > return false; > do_blorp_blit(brw, buffer_bit, src_irb, dst_irb, srcX0, srcY0, > srcX1, srcY1, dstX0, dstY0, dstX1, dstY1, > - mirror_x, mirror_y); > + filter, mirror_x, mirror_y); > break; > case GL_STENCIL_BUFFER_BIT: > src_irb = > @@ -327,7 +319,7 @@ try_blorp_blit(struct brw_context *brw, > return false; > do_blorp_blit(brw, buffer_bit, src_irb, dst_irb, srcX0, srcY0, > srcX1, srcY1, dstX0, dstY0, dstX1, dstY1, > - mirror_x, mirror_y); > + filter, mirror_x, mirror_y); > break; > default: > assert(false); > @@ -396,7 +388,7 @@ brw_blorp_copytexsubimage(struct brw_context *brw, > dst_mt, dst_image->Level, dst_image->Face + > slice, > srcX0, srcY0, srcX1, srcY1, > dstX0, dstY0, dstX1, dstY1, > - false, mirror_y); > + GL_NEAREST, false, mirror_y); > > /* If we're copying to a packed depth stencil texture and the source > * framebuffer has separate stencil, we need to also copy the stencil > data > @@ -420,7 +412,7 @@ brw_blorp_copytexsubimage(struct brw_context *brw, > dst_image->Face + slice, > srcX0, srcY0, srcX1, srcY1, > dstX0, dstY0, dstX1, dstY1, > - false, mirror_y); > + GL_NEAREST, false, mirror_y); > } > } > > @@ -637,6 +629,7 @@ private: > void single_to_blend(); > void manual_blend_average(unsigned num_samples); > void manual_blend_bilinear(unsigned num_samples); > + void single_sample_bilinear_filter(void); > void sample(struct brw_reg dst); > void texel_fetch(struct brw_reg dst); > void mcs_fetch(); > @@ -873,15 +866,19 @@ brw_blorp_blit_program::compile(struct brw_context > *brw, > decode_msaa(key->tex_samples, key->tex_layout); > } > > - /* Now (X, Y, S) = decode_msaa(tex_samples, detile(tex_tiling, > offset)). > - * > - * In other words: X, Y, and S now contain values which, when > passed to > - * the texturing unit, will cause data to be read from the correct > - * memory location. So we can fetch the texel now. > - */ > - if (key->tex_layout == INTEL_MSAA_LAYOUT_CMS) > - mcs_fetch(); > - texel_fetch(texture_data[0]); > + if (key->blit_scaled && key->bilinear_filter) > + single_sample_bilinear_filter(); > + else { > + /* Now (X, Y, S) = decode_msaa(tex_samples, detile(tex_tiling, > offset)). > + * > + * In other words: X, Y, and S now contain values which, when > passed to > + * the texturing unit, will cause data to be read from the > correct > + * memory location. So we can fetch the texel now. > + */ > + if (key->tex_layout == INTEL_MSAA_LAYOUT_CMS) > + mcs_fetch(); > + texel_fetch(texture_data[0]); > + } > } > > /* Finally, write the fetched (or blended) value to the render target > and > @@ -947,7 +944,7 @@ brw_blorp_blit_program::alloc_regs() > reg += 2; > } > > - if (key->blit_scaled && key->blend) { > + if (key->blit_scaled) { > this->x_sample_coords = brw_vec8_grf(reg, 0); > reg += 2; > this->y_sample_coords = brw_vec8_grf(reg, 0); > @@ -1442,6 +1439,22 @@ brw_blorp_blit_program::translate_dst_to_src() > brw_RNDD(&func, Yp_f, Y_f); > brw_MUL(&func, X_f, Xp_f, brw_imm_f(1 / key->x_scale)); > brw_MUL(&func, Y_f, Yp_f, brw_imm_f(1 / key->y_scale)); > + } else if (key->blit_scaled && key->bilinear_filter && !key->blend) { > + /* Adjust coordinates so that integers represent pixel centers > rather > + * than pixel edges. > + */ > + brw_ADD(&func, X_f, X_f, brw_imm_f(-0.5)); > + brw_ADD(&func, Y_f, Y_f, brw_imm_f(-0.5)); > + > + /* Store the fractional parts to be used as bilinear interpolation > + * coefficients. > + */ > + brw_FRC(&func, x_frac, X_f); > + brw_FRC(&func, y_frac, Y_f); > + > + /* Round the float coordinates down to nearest integer */ > + brw_MOV(&func, Xp, X_f); > + brw_MOV(&func, Yp, Y_f); > } else { > /* Round the float coordinates down to nearest integer by moving to > * UD registers. > @@ -1765,6 +1778,74 @@ > brw_blorp_blit_program::manual_blend_bilinear(unsigned num_samples) > #undef SAMPLE > } > > +void > +brw_blorp_blit_program::single_sample_bilinear_filter(void) > +{ > + /* Bilinear filtering is performed by following operations: > + * - Compute the colors from 2x2 pixels (vec4 c0, vec4 c1, vec4 c2, > vec4 c3) > + * - linearly interpolate colors c0 and c1 in X > + * - linearly interpolate colors c2 and c3 in X > + * - linearly interpolate the results of last two operations in Y > + * > + * result = lrp(lrp(c0 + c1) + lrp(c2 + c3)) > + */ > + ASSERT(s_is_zero); > + SWAP_XY_AND_XPYP(); > + > + /* Move the X1, Y1 from Float to UD regsiters. */ > + brw_MOV(&func, vec1(t1), rect_grid_x1); > + brw_MOV(&func, vec1(t2), rect_grid_y1); > + > + for (unsigned i = 0; i < 4; ++i) { > + assert(i < ARRAY_SIZE(texture_data)); > + > + /* Compute pixel coordinates */ > + brw_ADD(&func, vec16(X), Xp, brw_imm_ud(i % 2)); > + brw_ADD(&func, vec16(Y), Yp, brw_imm_ud(i / 2)); > + > + /* Clamp the X, Y texture coordinates to properly handle the > sampling of > + * texels on texture edges. > + */ > + clamp_tex_coords(vec16(X), vec16(Y), > + brw_imm_ud(0), brw_imm_ud(0), > + vec1(t1), vec1(t2)); > + > + /* The MCS value we fetch has to match up with the pixel that we're > + * sampling from. Since we sample from different pixels in each > + * iteration of this "for" loop, the call to mcs_fetch() should be > + * here inside the loop after computing the pixel coordinates. > + */ > + if (key->tex_layout == INTEL_MSAA_LAYOUT_CMS) > + mcs_fetch(); > + > + texel_fetch(texture_data[i]); > + } > + > +#define PIXEL(x, y) offset(texture_data[x], y) > + brw_set_access_mode(&func, BRW_ALIGN_16); > + for (int index = 3; index > 0; ) { > + /* Since we're doing SIMD16, 4 color channels fits in to 8 > registers. > + * Counter value of 8 in 'for' loop below is used to interpolate all > + * the color components. > + */ > + for (int k = 0; k < 8; ++k) > + brw_LRP(&func, > + vec8(PIXEL(index - 1, k)), > + offset(x_frac, k & 1), > + PIXEL(index, k), > + PIXEL(index - 1, k)); > + index -= 2; > + } > + for (int k = 0; k < 8; ++k) > + brw_LRP(&func, > + vec8(PIXEL(0, k)), > + offset(y_frac, k & 1), > + vec8(PIXEL(2, k)), > + vec8(PIXEL(0, k))); > + brw_set_access_mode(&func, BRW_ALIGN_1); > +#undef PIXEL > +} > + > /** > * Emit code to look up a value in the texture using the SAMPLE message > (which > * does blending of MSAA surfaces). > @@ -2050,6 +2131,7 @@ brw_blorp_blit_params::brw_blorp_blit_params(struct > brw_context *brw, > GLfloat src_x1, GLfloat > src_y1, > GLfloat dst_x0, GLfloat > dst_y0, > GLfloat dst_x1, GLfloat > dst_y1, > + GLenum filter, > bool mirror_x, bool mirror_y) > { > struct gl_context *ctx = &brw->ctx; > @@ -2058,7 +2140,10 @@ brw_blorp_blit_params::brw_blorp_blit_params(struct > brw_context *brw, > src.set(brw, src_mt, src_level, src_layer); > dst.set(brw, dst_mt, dst_level, dst_layer); > > - src.brw_surfaceformat = dst.brw_surfaceformat; > + if (src.num_samples > 1) > + src.brw_surfaceformat = dst.brw_surfaceformat; > + else > + dst.brw_surfaceformat = src.brw_surfaceformat; > > use_wm_prog = true; > memset(&wm_prog_key, 0, sizeof(wm_prog_key)); > @@ -2123,11 +2208,19 @@ > brw_blorp_blit_params::brw_blorp_blit_params(struct brw_context *brw, > ((dst_x1 - dst_x0) == (src_x1 - src_x0) && > (dst_y1 - dst_y0) == (src_y1 - src_y0)) ? false : true; > > - /* Scaling factors used for bilinear filtering in multisample scaled > - * blits. > + /* Scaling factors used for bilinear filtering in > single-sample/multisample > + * scaled blits. > */ > - wm_prog_key.x_scale = 2.0; > - wm_prog_key.y_scale = src_mt->num_samples / 2.0; > + wm_prog_key.x_scale = 1.0; > + wm_prog_key.y_scale = 1.0; > + if (wm_prog_key.blit_scaled && src_mt->num_samples > 0) { > + wm_prog_key.x_scale = 2.0; > + wm_prog_key.y_scale = src_mt->num_samples / 2.0; > + } > + > + /* bilinear filtering or not */ > + if (filter == GL_LINEAR) > + wm_prog_key.bilinear_filter = true; > > /* The render path must be configured to use the same number of > samples as > * the destination buffer. > diff --git a/src/mesa/drivers/dri/i965/intel_mipmap_tree.c > b/src/mesa/drivers/dri/i965/intel_mipmap_tree.c > index d6643ca..de5f8f2 100644 > --- a/src/mesa/drivers/dri/i965/intel_mipmap_tree.c > +++ b/src/mesa/drivers/dri/i965/intel_mipmap_tree.c > @@ -1552,7 +1552,7 @@ intel_miptree_updownsample(struct brw_context *brw, > width, height, > dst_x0, dst_y0, > width, height, > - false, false /*mirror x, y*/); > + GL_NEAREST, false, false /*mirror x, y*/); > > if (src->stencil_mt) { > brw_blorp_blit_miptrees(brw, > @@ -1562,7 +1562,7 @@ intel_miptree_updownsample(struct brw_context *brw, > width, height, > dst_x0, dst_y0, > width, height, > - false, false /*mirror x, y*/); > + GL_NEAREST, false, false /*mirror x, y*/); > } > } > > -- > 1.8.1.4 > > _______________________________________________ > mesa-dev mailing list > mesa-dev@lists.freedesktop.org > http://lists.freedesktop.org/mailman/listinfo/mesa-dev >
_______________________________________________ mesa-dev mailing list mesa-dev@lists.freedesktop.org http://lists.freedesktop.org/mailman/listinfo/mesa-dev