On Tue, 2018-05-15 at 15:28 -0700, Jason Ekstrand wrote: > --- > src/intel/blorp/blorp_clear.c | 199 +++++++++++++++++++------------- > ---------- > 1 file changed, 88 insertions(+), 111 deletions(-) > > diff --git a/src/intel/blorp/blorp_clear.c > b/src/intel/blorp/blorp_clear.c > index 39bc0c6..5625221 100644 > --- a/src/intel/blorp/blorp_clear.c > +++ b/src/intel/blorp/blorp_clear.c > @@ -193,104 +193,7 @@ get_fast_clear_rect(const struct isl_device > *dev, > > /* Only single sampled surfaces need to (and actually can) be > resolved. */ > if (aux_surf->usage == ISL_SURF_USAGE_CCS_BIT) { > - /* From the Ivy Bridge PRM, Vol2 Part1 11.7 "MCS Buffer for > Render > - * Target(s)", beneath the "Fast Color Clear" bullet (p327): > - * > - * Clear pass must have a clear rectangle that must follow > - * alignment rules in terms of pixels and lines as shown > in the > - * table below. Further, the clear-rectangle height and > width > - * must be multiple of the following dimensions. If the > height > - * and width of the render target being cleared do not > meet these > - * requirements, an MCS buffer can be created such that it > - * follows the requirement and covers the RT. > - * > - * The alignment size in the table that follows is related to > the > - * alignment size that is baked into the CCS surface format > but with X > - * alignment multiplied by 16 and Y alignment multiplied by > 32. > - */ > - x_align = isl_format_get_layout(aux_surf->format)->bw; > - y_align = isl_format_get_layout(aux_surf->format)->bh; > - > - x_align *= 16; > - > - /* SKL+ line alignment requirement for Y-tiled are half those > of the prior > - * generations. > - */ > - if (dev->info->gen >= 9) > - y_align *= 16; > - else > - y_align *= 32; > - > - /* From the Ivy Bridge PRM, Vol2 Part1 11.7 "MCS Buffer for > Render > - * Target(s)", beneath the "Fast Color Clear" bullet (p327): > - * > - * In order to optimize the performance MCS buffer (when > bound to > - * 1X RT) clear similarly to MCS buffer clear for MSRT > case, > - * clear rect is required to be scaled by the following > factors > - * in the horizontal and vertical directions: > - * > - * The X and Y scale down factors in the table that follows > are each > - * equal to half the alignment value computed above. > - */ > - x_scaledown = x_align / 2; > - y_scaledown = y_align / 2; > - > - if (ISL_DEV_IS_HASWELL(dev)) { > - /* The following text was added in the Haswell PRM, "3D > Media GPGPU > - * Engine" >> "MCS Buffer for Render Target(s)" >> Table > "Color Clear > - * of Non-MultiSampler Render Target Restrictions": > - * > - * "Clear rectangle must be aligned to two times the > number of > - * pixels in the table shown below due to 16X16 hashing > across the > - * slice." > - * > - * It has persisted in the documentation for all platforms > up until > - * Cannonlake and possibly even beyond. However, we > believe that it > - * is only needed on Haswell. > - * > - * There are a couple possible explanations for this > restriction: > - * > - * 1) If you assume that the hardware is writing to the CCS > as > - * bytes, then the x/y_align computed above gives you an > alignment > - * in the CCS of 8x8 bytes and, if 16x16 is needed for > hashing, we > - * need to multiply by 2. > - * > - * 2) Haswell is a bit unique in that it's CCS tiling does > not line > - * up with Y-tiling on a cache-line > granularity. Instead, it has > - * an extra bit of swizzling in bit 9. Also, bit 6 > swizzling > - * applies to the CCS on Haswell. This means that > Haswell CTS > - * does not match on a cache-line granularity but it > does match on > - * a 2x2 cache line granularity. > - * > - * Clearly, the first explanation seems to follow > documentation the > - * best but they may be related. In any case, empirical > evidence > - * seems to confirm that it is, indeed required on Haswell. > - * > - * On Broadwell things get a bit stickier. Broadwell adds > support > - * for mip-mapped CCS with an alignment in the CCS of > 256x128. For a > - * 32bpb main surface, the above computation will yield a > x/y_align > - * of 128x128 for a Y-tiled main surface and 256x64 for X- > tiled. In > - * either case, if we double the alignment, we will get an > alignment > - * bigger than horizontal and vertical alignment of the CCS > and fast > - * clears of one LOD may leak into others. > - * > - * Starting with Skylake, the image alignment for the CCS > is only > - * 128x64 which is exactly the x/h_align computed above if > the main > - * surface has a 32bpb format. Also, the "Render Target > Resolve" > - * page in the bspec (not the PRM) says, "The Resolve > Rectangle size > - * is same as Clear Rectangle size from SKL+". The > x/y_align > - * computed above (without doubling) match the resolve > rectangle > - * calculation perfectly. > - * > - * Finally, to confirm all this, a full test run was > performed on > - * Feb. 9, 2018 with this doubling removed and the only > platform > - * which seemed to be affected was Haswell. The run > consisted of > - * piglit, dEQP, the Vulkan CTS 1.0.2, the OpenGL 4.5 CTS, > and the > - * OpenGL ES 3.2 CTS. > - */ > - x_align *= 2; > - y_align *= 2; > - } > + unreachable("This function only supports MCS fast-clear");
Why not just remove the if/else altogether and start with assert(aux_surf->usage == ISL_SURF_USAGE_MCS_BIT) instead? Iago > } else { > assert(aux_surf->usage == ISL_SURF_USAGE_MCS_BIT); > > @@ -826,14 +729,8 @@ blorp_ccs_op(struct blorp_batch *batch, > enum isl_format format, > enum isl_aux_op ccs_op) > { > - if (ccs_op == ISL_AUX_OP_FAST_CLEAR) { > - blorp_fast_clear(batch, surf, format, level, start_layer, > num_layers, > - 0, 0, > - minify(surf->surf->logical_level0_px.w, > level), > - minify(surf->surf->logical_level0_px.h, > level)); > - return; > - } else if (ISL_DEV_GEN(batch->blorp->isl_dev) < 10 && > - ccs_op == ISL_AUX_OP_AMBIGUATE) { > + if (ISL_DEV_GEN(batch->blorp->isl_dev) < 10 && > + ccs_op == ISL_AUX_OP_AMBIGUATE) { > /* Prior to Cannonlake, the ambiguate is not available as a > hardware > * operation. Instead, we have to fake it by carefully > binding the CCS > * as a render target and clearing it to 0. We leave that > complicated > @@ -863,6 +760,11 @@ blorp_ccs_op(struct blorp_batch *batch, > isl_format_get_layout(params.dst.aux_surf.format); > assert(aux_fmtl->txc == ISL_TXC_CCS); > > + /* The PRM Sections entitled "Fast Color Clear" and "Render > Target Resolve" > + * contain tables for the scale down factor for fast clear and > resolve > + * rectangles. The values in those tables are easily computed > from the > + * CCS element block size. > + */ > unsigned x_scaledown, y_scaledown; > if (ISL_DEV_GEN(batch->blorp->isl_dev) >= 9) { > x_scaledown = aux_fmtl->bw * 8; > @@ -871,16 +773,91 @@ blorp_ccs_op(struct blorp_batch *batch, > x_scaledown = aux_fmtl->bw * 8; > y_scaledown = aux_fmtl->bh * 16; > } else { > - x_scaledown = aux_fmtl->bw / 2; > - y_scaledown = aux_fmtl->bh / 2; > + assert(ISL_DEV_GEN(batch->blorp->isl_dev) == 7); > + if (ccs_op == ISL_AUX_OP_FAST_CLEAR) { > + x_scaledown = aux_fmtl->bw * 8; > + y_scaledown = aux_fmtl->bh * 16; > + } else { > + x_scaledown = aux_fmtl->bw / 2; > + y_scaledown = aux_fmtl->bh / 2; > + } > } > + > + unsigned x_align = x_scaledown; > + unsigned y_align = y_scaledown; > + if (ccs_op == ISL_AUX_OP_FAST_CLEAR) { > + /* The PRM Section entitled "Fast Color Clear" contains tables > for the > + * scaledown and alignment factors for fast clear > operations. In all > + * cases, the alignment table is 2x the scaledown table. > + */ > + x_align *= 2; > + y_align *= 2; > + > + if (ISL_DEV_IS_HASWELL(batch->blorp->isl_dev)) { > + /* The following text was added in the Haswell PRM, "3D > Media GPGPU > + * Engine" >> "MCS Buffer for Render Target(s)" >> Table > "Color Clear > + * of Non-MultiSampler Render Target Restrictions": > + * > + * "Clear rectangle must be aligned to two times the > number of > + * pixels in the table shown below due to 16X16 hashing > across the > + * slice." > + * > + * It has persisted in the documentation for all platforms > up until > + * Cannonlake and possibly even beyond. However, we > believe that it > + * is only needed on Haswell. > + * > + * There are a couple possible explanations for this > restriction: > + * > + * 1) If you assume that the hardware is writing to the CCS > as > + * bytes, then the x/y_align computed above gives you an > alignment > + * in the CCS of 8x8 bytes and, if 16x16 is needed for > hashing, we > + * need to multiply by 2. > + * > + * 2) Haswell is a bit unique in that it's CCS tiling does > not line > + * up with Y-tiling on a cache-line > granularity. Instead, it has > + * an extra bit of swizzling in bit 9. Also, bit 6 > swizzling > + * applies to the CCS on Haswell. This means that > Haswell CTS > + * does not match on a cache-line granularity but it > does match on > + * a 2x2 cache line granularity. > + * > + * Clearly, the first explanation seems to follow > documentation the > + * best but they may be related. In any case, empirical > evidence > + * seems to confirm that it is, indeed required on Haswell. > + * > + * On Broadwell things get a bit stickier. Broadwell adds > support > + * for mip-mapped CCS with an alignment in the CCS of > 256x128. For a > + * 32bpb main surface, the above computation will yield a > x/y_align > + * of 128x128 for a Y-tiled main surface and 256x64 for X- > tiled. In > + * either case, if we double the alignment, we will get an > alignment > + * bigger than horizontal and vertical alignment of the CCS > and fast > + * clears of one LOD may leak into others. > + * > + * Starting with Skylake, the image alignment for the CCS > is only > + * 128x64 which is exactly the x/h_align computed above if > the main > + * surface has a 32bpb format. Also, the "Render Target > Resolve" > + * page in the bspec (not the PRM) says, "The Resolve > Rectangle size > + * is same as Clear Rectangle size from SKL+". The > x/y_align > + * computed above (without doubling) match the resolve > rectangle > + * calculation perfectly. > + * > + * Finally, to confirm all this, a full test run was > performed on > + * Feb. 9, 2018 with this doubling removed and the only > platform > + * which seemed to be affected was Haswell. The run > consisted of > + * piglit, dEQP, the Vulkan CTS 1.0.2, the OpenGL 4.5 CTS, > and the > + * OpenGL ES 3.2 CTS. > + */ > + x_align *= 2; > + y_align *= 2; > + } > + } > + > params.x0 = params.y0 = 0; > params.x1 = minify(params.dst.aux_surf.logical_level0_px.width, > level); > params.y1 = minify(params.dst.aux_surf.logical_level0_px.height, > level); > - params.x1 = ALIGN(params.x1, x_scaledown) / x_scaledown; > - params.y1 = ALIGN(params.y1, y_scaledown) / y_scaledown; > + params.x1 = ALIGN(params.x1, x_align) / x_scaledown; > + params.y1 = ALIGN(params.y1, y_align) / y_scaledown; > > - assert(ccs_op != ISL_AUX_OP_FAST_CLEAR); > + memset(¶ms.wm_inputs.clear_color, 0xff, 4*sizeof(float)); > params.fast_clear_op = ccs_op; > params.num_layers = num_layers; > _______________________________________________ mesa-dev mailing list mesa-dev@lists.freedesktop.org https://lists.freedesktop.org/mailman/listinfo/mesa-dev