Re: [Mesa-dev] [PATCH 23/28] i965/blorp: Add support for W-[de]tiling to the NIR path

2016-05-12 Thread Pohjolainen, Topi
On Tue, May 10, 2016 at 04:16:43PM -0700, Jason Ekstrand wrote:
> ---
>  src/mesa/drivers/dri/i965/brw_blorp_blit.cpp | 107 
> ++-
>  1 file changed, 105 insertions(+), 2 deletions(-)
> 
> diff --git a/src/mesa/drivers/dri/i965/brw_blorp_blit.cpp 
> b/src/mesa/drivers/dri/i965/brw_blorp_blit.cpp
> index 27aab20..c0c02cf 100644
> --- a/src/mesa/drivers/dri/i965/brw_blorp_blit.cpp
> +++ b/src/mesa/drivers/dri/i965/brw_blorp_blit.cpp
> @@ -571,6 +571,98 @@ blorp_nir_txf_ms_mcs(nir_builder *b, nir_ssa_def *pos)
> return >dest.ssa;
>  }
>  
> +static nir_ssa_def *
> +nir_mask_shift_or(struct nir_builder *b, nir_ssa_def *dst, nir_ssa_def *src,
> +  uint32_t src_mask, int src_left_shift)
> +{
> +   nir_ssa_def *masked = nir_iand(b, src, nir_imm_int(b, src_mask));
> +
> +   nir_ssa_def *shifted;
> +   if (src_left_shift > 0) {
> +  shifted = nir_ishl(b, masked, nir_imm_int(b, src_left_shift));
> +   } else if (src_left_shift < 0) {
> +  shifted = nir_ushr(b, masked, nir_imm_int(b, -src_left_shift));
> +   } else {
> +  assert(src_left_shift == 0);
> +  shifted = masked;
> +   }
> +
> +   return nir_ior(b, dst, shifted);
> +}

We could keep original documentation here also:

/**
 * Emit code to compensate for the difference between Y and W tiling.
 *
 * This code modifies the X and Y coordinates according to the formula:
 *
 *   (X', Y', S') = detile(new_tiling, tile(old_tiling, X, Y, S))
 *
 * (See brw_blorp_blit_program).
 *
 * It can only translate between W and Y tiling, so new_tiling and old_tiling
 * are booleans where true represents W tiling and false represents Y tiling.
 */

> +
> +static inline nir_ssa_def *
> +blorp_nir_retile_y_to_w(nir_builder *b, nir_ssa_def *pos)
> +{
> +   assert(pos->num_components == 2);
> +   nir_ssa_def *x_Y = nir_channel(b, pos, 0);
> +   nir_ssa_def *y_Y = nir_channel(b, pos, 1);
> +
> +   /* Given X and Y coordinates that describe an address using Y tiling,
> +* translate to the X and Y coordinates that describe the same address
> +* using W tiling.
> +*
> +* If we break down the low order bits of X and Y, using a
> +* single letter to represent each low-order bit:
> +*
> +*   X = A << 7 | 0bBCDEFGH
> +*   Y = J << 5 | 0bKLMNP   (1)
> +*
> +* Then we can apply the Y tiling formula to see the memory offset being
> +* addressed:
> +*
> +*   offset = (J * tile_pitch + A) << 12 | 0bBCDKLMNPEFGH   (2)
> +*
> +* If we apply the W detiling formula to this memory location, that the
> +* corresponding X' and Y' coordinates are:
> +*
> +*   X' = A << 6 | 0bBCDPFH (3)
> +*   Y' = J << 6 | 0bKLMNEG
> +*
> +* Combining (1) and (3), we see that to transform (X, Y) to (X', Y'),
> +* we need to make the following computation:
> +*
> +*   X' = (X & ~0b1011) >> 1 | (Y & 0b1) << 2 | X & 0b1 (4)
> +*   Y' = (Y & ~0b1) << 1 | (X & 0b1000) >> 2 | (X & 0b10) >> 1
> +*/
> +   nir_ssa_def *x_W = nir_imm_int(b, 0);
> +   x_W = nir_mask_shift_or(b, x_W, x_Y, 0xfff4, -1);
> +   x_W = nir_mask_shift_or(b, x_W, y_Y, 0x1, 2);
> +   x_W = nir_mask_shift_or(b, x_W, x_Y, 0x1, 0);
> +
> +   nir_ssa_def *y_W = nir_imm_int(b, 0);
> +   y_W = nir_mask_shift_or(b, y_W, y_Y, 0xfffe, 1);
> +   y_W = nir_mask_shift_or(b, y_W, x_Y, 0x8, -2);
> +   y_W = nir_mask_shift_or(b, y_W, x_Y, 0x2, -1);
> +
> +   return nir_vec2(b, x_W, y_W);
> +}
> +
> +static inline nir_ssa_def *
> +blorp_nir_retile_w_to_y(nir_builder *b, nir_ssa_def *pos)
> +{
> +   assert(pos->num_components == 2);
> +   nir_ssa_def *x_W = nir_channel(b, pos, 0);
> +   nir_ssa_def *y_W = nir_channel(b, pos, 1);
> +
> +   /* Applying the same logic as above, but in reverse, we obtain the
> +* formulas:
> +*
> +* X' = (X & ~0b101) << 1 | (Y & 0b10) << 2 | (Y & 0b1) << 1 | X & 0b1
> +* Y' = (Y & ~0b11) >> 1 | (X & 0b100) >> 2
> +*/
> +   nir_ssa_def *x_Y = nir_imm_int(b, 0);
> +   x_Y = nir_mask_shift_or(b, x_Y, x_W, 0xfffa, 1);
> +   x_Y = nir_mask_shift_or(b, x_Y, y_W, 0x2, 2);
> +   x_Y = nir_mask_shift_or(b, x_Y, y_W, 0x1, 1);
> +   x_Y = nir_mask_shift_or(b, x_Y, x_W, 0x1, 0);
> +
> +   nir_ssa_def *y_Y = nir_imm_int(b, 0);
> +   y_Y = nir_mask_shift_or(b, y_Y, y_W, 0xfffc, -1);
> +   y_Y = nir_mask_shift_or(b, y_Y, x_W, 0x4, -2);
> +
> +   return nir_vec2(b, x_Y, y_Y);
> +}
> +
>  /**
>   * Generator for WM programs used in BLORP blits.
>   *
> @@ -786,7 +878,12 @@ brw_blorp_build_nir_shader(struct brw_context *brw,
> if (rt_tiled_w != key->dst_tiled_w ||
> key->rt_samples != key->dst_samples ||
> key->rt_layout != key->dst_layout) {
> -  goto fail;
> +  if (key->rt_samples != key->dst_samples ||
> +  key->rt_layout != key->dst_layout ||
> +  key->rt_samples != 0)

I think we need to allow rt_samples == 1 also, right? Single 

[Mesa-dev] [PATCH 23/28] i965/blorp: Add support for W-[de]tiling to the NIR path

2016-05-10 Thread Jason Ekstrand
---
 src/mesa/drivers/dri/i965/brw_blorp_blit.cpp | 107 ++-
 1 file changed, 105 insertions(+), 2 deletions(-)

diff --git a/src/mesa/drivers/dri/i965/brw_blorp_blit.cpp 
b/src/mesa/drivers/dri/i965/brw_blorp_blit.cpp
index 27aab20..c0c02cf 100644
--- a/src/mesa/drivers/dri/i965/brw_blorp_blit.cpp
+++ b/src/mesa/drivers/dri/i965/brw_blorp_blit.cpp
@@ -571,6 +571,98 @@ blorp_nir_txf_ms_mcs(nir_builder *b, nir_ssa_def *pos)
return >dest.ssa;
 }
 
+static nir_ssa_def *
+nir_mask_shift_or(struct nir_builder *b, nir_ssa_def *dst, nir_ssa_def *src,
+  uint32_t src_mask, int src_left_shift)
+{
+   nir_ssa_def *masked = nir_iand(b, src, nir_imm_int(b, src_mask));
+
+   nir_ssa_def *shifted;
+   if (src_left_shift > 0) {
+  shifted = nir_ishl(b, masked, nir_imm_int(b, src_left_shift));
+   } else if (src_left_shift < 0) {
+  shifted = nir_ushr(b, masked, nir_imm_int(b, -src_left_shift));
+   } else {
+  assert(src_left_shift == 0);
+  shifted = masked;
+   }
+
+   return nir_ior(b, dst, shifted);
+}
+
+static inline nir_ssa_def *
+blorp_nir_retile_y_to_w(nir_builder *b, nir_ssa_def *pos)
+{
+   assert(pos->num_components == 2);
+   nir_ssa_def *x_Y = nir_channel(b, pos, 0);
+   nir_ssa_def *y_Y = nir_channel(b, pos, 1);
+
+   /* Given X and Y coordinates that describe an address using Y tiling,
+* translate to the X and Y coordinates that describe the same address
+* using W tiling.
+*
+* If we break down the low order bits of X and Y, using a
+* single letter to represent each low-order bit:
+*
+*   X = A << 7 | 0bBCDEFGH
+*   Y = J << 5 | 0bKLMNP   (1)
+*
+* Then we can apply the Y tiling formula to see the memory offset being
+* addressed:
+*
+*   offset = (J * tile_pitch + A) << 12 | 0bBCDKLMNPEFGH   (2)
+*
+* If we apply the W detiling formula to this memory location, that the
+* corresponding X' and Y' coordinates are:
+*
+*   X' = A << 6 | 0bBCDPFH (3)
+*   Y' = J << 6 | 0bKLMNEG
+*
+* Combining (1) and (3), we see that to transform (X, Y) to (X', Y'),
+* we need to make the following computation:
+*
+*   X' = (X & ~0b1011) >> 1 | (Y & 0b1) << 2 | X & 0b1 (4)
+*   Y' = (Y & ~0b1) << 1 | (X & 0b1000) >> 2 | (X & 0b10) >> 1
+*/
+   nir_ssa_def *x_W = nir_imm_int(b, 0);
+   x_W = nir_mask_shift_or(b, x_W, x_Y, 0xfff4, -1);
+   x_W = nir_mask_shift_or(b, x_W, y_Y, 0x1, 2);
+   x_W = nir_mask_shift_or(b, x_W, x_Y, 0x1, 0);
+
+   nir_ssa_def *y_W = nir_imm_int(b, 0);
+   y_W = nir_mask_shift_or(b, y_W, y_Y, 0xfffe, 1);
+   y_W = nir_mask_shift_or(b, y_W, x_Y, 0x8, -2);
+   y_W = nir_mask_shift_or(b, y_W, x_Y, 0x2, -1);
+
+   return nir_vec2(b, x_W, y_W);
+}
+
+static inline nir_ssa_def *
+blorp_nir_retile_w_to_y(nir_builder *b, nir_ssa_def *pos)
+{
+   assert(pos->num_components == 2);
+   nir_ssa_def *x_W = nir_channel(b, pos, 0);
+   nir_ssa_def *y_W = nir_channel(b, pos, 1);
+
+   /* Applying the same logic as above, but in reverse, we obtain the
+* formulas:
+*
+* X' = (X & ~0b101) << 1 | (Y & 0b10) << 2 | (Y & 0b1) << 1 | X & 0b1
+* Y' = (Y & ~0b11) >> 1 | (X & 0b100) >> 2
+*/
+   nir_ssa_def *x_Y = nir_imm_int(b, 0);
+   x_Y = nir_mask_shift_or(b, x_Y, x_W, 0xfffa, 1);
+   x_Y = nir_mask_shift_or(b, x_Y, y_W, 0x2, 2);
+   x_Y = nir_mask_shift_or(b, x_Y, y_W, 0x1, 1);
+   x_Y = nir_mask_shift_or(b, x_Y, x_W, 0x1, 0);
+
+   nir_ssa_def *y_Y = nir_imm_int(b, 0);
+   y_Y = nir_mask_shift_or(b, y_Y, y_W, 0xfffc, -1);
+   y_Y = nir_mask_shift_or(b, y_Y, x_W, 0x4, -2);
+
+   return nir_vec2(b, x_Y, y_Y);
+}
+
 /**
  * Generator for WM programs used in BLORP blits.
  *
@@ -786,7 +878,12 @@ brw_blorp_build_nir_shader(struct brw_context *brw,
if (rt_tiled_w != key->dst_tiled_w ||
key->rt_samples != key->dst_samples ||
key->rt_layout != key->dst_layout) {
-  goto fail;
+  if (key->rt_samples != key->dst_samples ||
+  key->rt_layout != key->dst_layout ||
+  key->rt_samples != 0)
+ goto fail;
+  if (rt_tiled_w != key->dst_tiled_w)
+ dst_pos = blorp_nir_retile_y_to_w(, dst_pos);
}
 
/* Now (X, Y, S) = decode_msaa(dst_samples, detile(dst_tiling, offset)).
@@ -832,7 +929,13 @@ brw_blorp_build_nir_shader(struct brw_context *brw,
key->tex_samples != key->src_samples ||
key->tex_layout != key->src_layout) &&
   !key->bilinear_filter) {
- goto fail;
+ if (key->tex_samples != key->src_samples ||
+ key->tex_layout != key->src_layout ||
+ key->tex_samples != 0)
+goto fail;
+
+ if (tex_tiled_w != key->src_tiled_w)
+src_pos = blorp_nir_retile_w_to_y(, src_pos);
   }
 
   if (key->bilinear_filter) {
-- 
2.5.0.400.gff86faf

___