[Mesa-dev] [PATCH] r600g: Use hardware sqrt instruction

2014-07-18 Thread Glenn Kennard
Piglit quick tests including sqrt pass, no other regressions,
tested on radeon 6670.
---
Should be slightly more precise than the invsqrt/recip/mul combination
used previously, I reckon up to about 2 bits of mantissa, and saves
two instructions per sqrt emitted.

It would be good if someone could test this on Cayman since it uses
a slightly different codepath.

 src/gallium/drivers/r600/r600_pipe.c   | 2 +-
 src/gallium/drivers/r600/r600_shader.c | 9 +++--
 2 files changed, 4 insertions(+), 7 deletions(-)

diff --git a/src/gallium/drivers/r600/r600_pipe.c 
b/src/gallium/drivers/r600/r600_pipe.c
index 5bf9c00..ee6a416 100644
--- a/src/gallium/drivers/r600/r600_pipe.c
+++ b/src/gallium/drivers/r600/r600_pipe.c
@@ -428,7 +428,7 @@ static int r600_get_shader_param(struct pipe_screen* 
pscreen, unsigned shader, e
case PIPE_SHADER_CAP_TGSI_CONT_SUPPORTED:
return 1;
case PIPE_SHADER_CAP_TGSI_SQRT_SUPPORTED:
-   return 0;
+   return 1;
case PIPE_SHADER_CAP_INDIRECT_INPUT_ADDR:
case PIPE_SHADER_CAP_INDIRECT_OUTPUT_ADDR:
case PIPE_SHADER_CAP_INDIRECT_TEMP_ADDR:
diff --git a/src/gallium/drivers/r600/r600_shader.c 
b/src/gallium/drivers/r600/r600_shader.c
index db928f3..907547d 100644
--- a/src/gallium/drivers/r600/r600_shader.c
+++ b/src/gallium/drivers/r600/r600_shader.c
@@ -6498,8 +6498,7 @@ static struct r600_shader_tgsi_instruction 
r600_shader_tgsi_instruction[] = {
{TGSI_OPCODE_SUB,   0, ALU_OP2_ADD, tgsi_op2},
{TGSI_OPCODE_LRP,   0, ALU_OP0_NOP, tgsi_lrp},
{TGSI_OPCODE_CND,   0, ALU_OP0_NOP, tgsi_unsupported},
-   /* gap */
-   {20,0, ALU_OP0_NOP, tgsi_unsupported},
+   {TGSI_OPCODE_SQRT,  0, ALU_OP1_SQRT_IEEE, 
tgsi_trans_srcx_replicate},
{TGSI_OPCODE_DP2A,  0, ALU_OP0_NOP, tgsi_unsupported},
/* gap */
{22,0, ALU_OP0_NOP, tgsi_unsupported},
@@ -6693,8 +6692,7 @@ static struct r600_shader_tgsi_instruction 
eg_shader_tgsi_instruction[] = {
{TGSI_OPCODE_SUB,   0, ALU_OP2_ADD, tgsi_op2},
{TGSI_OPCODE_LRP,   0, ALU_OP0_NOP, tgsi_lrp},
{TGSI_OPCODE_CND,   0, ALU_OP0_NOP, tgsi_unsupported},
-   /* gap */
-   {20,0, ALU_OP0_NOP, tgsi_unsupported},
+   {TGSI_OPCODE_SQRT,  0, ALU_OP1_SQRT_IEEE, 
tgsi_trans_srcx_replicate},
{TGSI_OPCODE_DP2A,  0, ALU_OP0_NOP, tgsi_unsupported},
/* gap */
{22,0, ALU_OP0_NOP, tgsi_unsupported},
@@ -6888,8 +6886,7 @@ static struct r600_shader_tgsi_instruction 
cm_shader_tgsi_instruction[] = {
{TGSI_OPCODE_SUB,   0, ALU_OP2_ADD, tgsi_op2},
{TGSI_OPCODE_LRP,   0, ALU_OP0_NOP, tgsi_lrp},
{TGSI_OPCODE_CND,   0, ALU_OP0_NOP, tgsi_unsupported},
-   /* gap */
-   {20,0, ALU_OP0_NOP, tgsi_unsupported},
+   {TGSI_OPCODE_SQRT,  0, ALU_OP1_SQRT_IEEE, cayman_emit_float_instr},
{TGSI_OPCODE_DP2A,  0, ALU_OP0_NOP, tgsi_unsupported},
/* gap */
{22,0, ALU_OP0_NOP, tgsi_unsupported},
-- 
1.8.3.2

___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
http://lists.freedesktop.org/mailman/listinfo/mesa-dev


Re: [Mesa-dev] [PATCH 04/14] i965/gen7: Don't allocate hiz miptree structure

2014-07-18 Thread Pohjolainen, Topi
On Tue, Jul 15, 2014 at 06:32:12PM -0700, Jordan Justen wrote:
> We now skip allocating a hiz miptree for gen7. Instead, we calculate
> the required hiz buffer parameters and allocate a bo directly.
> 
> Signed-off-by: Jordan Justen 
> ---
>  src/mesa/drivers/dri/i965/intel_mipmap_tree.c | 95 
> ++-
>  1 file changed, 93 insertions(+), 2 deletions(-)
> 
> diff --git a/src/mesa/drivers/dri/i965/intel_mipmap_tree.c 
> b/src/mesa/drivers/dri/i965/intel_mipmap_tree.c
> index 8719c29..7e8bec8 100644
> --- a/src/mesa/drivers/dri/i965/intel_mipmap_tree.c
> +++ b/src/mesa/drivers/dri/i965/intel_mipmap_tree.c
> @@ -823,7 +823,10 @@ intel_miptree_release(struct intel_mipmap_tree **mt)
>drm_intel_bo_unreference((*mt)->bo);
>intel_miptree_release(&(*mt)->stencil_mt);
>if ((*mt)->hiz_buf) {
> - intel_miptree_release(&(*mt)->hiz_buf->mt);
> + if ((*mt)->hiz_buf->mt)
> +intel_miptree_release(&(*mt)->hiz_buf->mt);
> + else
> +drm_intel_bo_unreference((*mt)->hiz_buf->bo);
>   free((*mt)->hiz_buf);
>}
>intel_miptree_release(&(*mt)->mcs_mt);
> @@ -1374,6 +1377,89 @@ intel_miptree_level_enable_hiz(struct brw_context *brw,
>  }
>  
>  
> +/**
> + * Helper for intel_miptree_alloc_hiz() that determines the required hiz
> + * buffer dimensions and allocates a bo for the hiz buffer.
> + */
> +static struct intel_miptree_aux_buffer *
> +intel_gen7_hiz_buf_create(struct brw_context *brw,
> +  struct intel_mipmap_tree *mt)
> +{
> +   unsigned z_width = mt->logical_width0;
> +   unsigned z_height = mt->logical_height0;
> +   const unsigned z_depth = mt->logical_depth0;
> +   unsigned hz_width, hz_height, qpitch;
> +   struct intel_miptree_aux_buffer *buf = calloc(sizeof(*buf), 1);
> +
> +   if (!buf)
> +  return NULL;
> +
> +   /* Gen7 PRM Volume 2, Part 1, 11.5.3 "Hierarchical Depth Buffer" documents
> +* adjustments required for Z_Height and Z_Width based on multisampling.
> +*/
> +   switch(mt->num_samples) {
> +   case 0:
> +   case 1:
> +  break;
> +   case 2:
> +   case 4:
> +  z_width *= 2;
> +  z_height *= 2;
> +  break;
> +   case 8:
> +  z_width *= 4;
> +  z_height *= 2;
> +  break;
> +   default:
> +  assert(!"Unsupported sample count!");
> +   }
> +
> +   const unsigned vertical_align = 8; /* 'j' in the docs */
> +   const unsigned H0 = z_height;
> +   const unsigned h0 = ALIGN(H0, vertical_align);
> +   const unsigned h1 = ALIGN(minify(H0, 1), vertical_align);
> +   const unsigned Z0 = z_depth;
> +
> +   /* HZ_Width (bytes) = ceiling(Z_Width / 16) * 16 */
> +   hz_width = ALIGN(z_width, 16);
> +
> +   if (mt->target == GL_TEXTURE_3D) {
> +  unsigned H_i = H0;
> +  unsigned Z_i = Z0;
> +  hz_height = 0;
> +  for (int level = mt->first_level; level <= mt->last_level; ++level) {
> + unsigned h_i = ALIGN(H_i, vertical_align);
> + /* sum(i=0 to m; h_i * max(1, floor(Z_Depth/2**i))) */
> + hz_height += h_i * Z_i;
> + H_i = minify(H_i, 1);
> + Z_i = minify(Z_i, 1);
> +  }
> +  /* HZ_Height =
> +   *(1/2) * sum(i=0 to m; h_i * max(1, floor(Z_Depth/2**i)))
> +   */
> +  hz_height = CEILING(hz_height, 2);
> +   } else {
> +  qpitch = h0 + h1 + (12 * vertical_align);

I wonder if we are reading the same spec. I'm still in the opinion that this
is the formula for the normal (non-hiz) miptrees. In the table in section
3D-Media-GPGPU Engine > 3D Pipeline Stages > Pixel >
Early Depth/Stencil Processing > Hierarchical Depth Buffer there is a special
iterative formula for 1D/2D hiz just as there is special case for 3D hiz.

For 1D/2D: HZ_QPitch = h_0 + max(h1, sum(i=2 to m: h_i))

> +  /* HZ_Height (rows) = Ceiling ( ( Q_pitch * Z_depth/2) /8 ) * 8 */
> +  hz_height = (ALIGN(qpitch, 8) / 2) * Z0;
> +  if (mt->target == GL_TEXTURE_CUBE_MAP_ARRAY ||
> +  mt->target == GL_TEXTURE_CUBE_MAP) {
> + hz_height *= 6;
> +  }
> +   }
> +
> +   unsigned long pitch;
> +   uint32_t tiling = I915_TILING_Y;
> +   buf->bo = drm_intel_bo_alloc_tiled(brw->bufmgr, "hiz",
> +  hz_width, hz_height, 1,
> +  &tiling, &pitch,
> +  BO_ALLOC_FOR_RENDER);
> +   buf->pitch = pitch;
> +
> +   return buf;
> +}
> +
> +
>  static struct intel_miptree_aux_buffer *
>  intel_hiz_miptree_buf_create(struct brw_context *brw,
>   struct intel_mipmap_tree *mt)
> @@ -1412,7 +1498,12 @@ intel_miptree_alloc_hiz(struct brw_context *brw,
>   struct intel_mipmap_tree *mt)
>  {
> assert(mt->hiz_buf == NULL);
> -   mt->hiz_buf = intel_hiz_miptree_buf_create(brw, mt);
> +
> +   if (brw->gen == 7) {
> +  mt->hiz_buf = intel_gen7_hiz_buf_create(brw, mt);
> +   } else {
> +  mt->hiz_buf = intel_hiz_miptree_buf_create(brw, mt);

Re: [Mesa-dev] [PATCH 04/14] i965/gen7: Don't allocate hiz miptree structure

2014-07-18 Thread Pohjolainen, Topi
On Fri, Jul 18, 2014 at 11:02:56AM +0300, Pohjolainen, Topi wrote:
> On Tue, Jul 15, 2014 at 06:32:12PM -0700, Jordan Justen wrote:
> > We now skip allocating a hiz miptree for gen7. Instead, we calculate
> > the required hiz buffer parameters and allocate a bo directly.
> > 
> > Signed-off-by: Jordan Justen 
> > ---
> >  src/mesa/drivers/dri/i965/intel_mipmap_tree.c | 95 
> > ++-
> >  1 file changed, 93 insertions(+), 2 deletions(-)
> > 
> > diff --git a/src/mesa/drivers/dri/i965/intel_mipmap_tree.c 
> > b/src/mesa/drivers/dri/i965/intel_mipmap_tree.c
> > index 8719c29..7e8bec8 100644
> > --- a/src/mesa/drivers/dri/i965/intel_mipmap_tree.c
> > +++ b/src/mesa/drivers/dri/i965/intel_mipmap_tree.c
> > @@ -823,7 +823,10 @@ intel_miptree_release(struct intel_mipmap_tree **mt)
> >drm_intel_bo_unreference((*mt)->bo);
> >intel_miptree_release(&(*mt)->stencil_mt);
> >if ((*mt)->hiz_buf) {
> > - intel_miptree_release(&(*mt)->hiz_buf->mt);
> > + if ((*mt)->hiz_buf->mt)
> > +intel_miptree_release(&(*mt)->hiz_buf->mt);
> > + else
> > +drm_intel_bo_unreference((*mt)->hiz_buf->bo);
> >   free((*mt)->hiz_buf);
> >}
> >intel_miptree_release(&(*mt)->mcs_mt);
> > @@ -1374,6 +1377,89 @@ intel_miptree_level_enable_hiz(struct brw_context 
> > *brw,
> >  }
> >  
> >  
> > +/**
> > + * Helper for intel_miptree_alloc_hiz() that determines the required hiz
> > + * buffer dimensions and allocates a bo for the hiz buffer.
> > + */
> > +static struct intel_miptree_aux_buffer *
> > +intel_gen7_hiz_buf_create(struct brw_context *brw,
> > +  struct intel_mipmap_tree *mt)
> > +{
> > +   unsigned z_width = mt->logical_width0;
> > +   unsigned z_height = mt->logical_height0;
> > +   const unsigned z_depth = mt->logical_depth0;
> > +   unsigned hz_width, hz_height, qpitch;
> > +   struct intel_miptree_aux_buffer *buf = calloc(sizeof(*buf), 1);
> > +
> > +   if (!buf)
> > +  return NULL;
> > +
> > +   /* Gen7 PRM Volume 2, Part 1, 11.5.3 "Hierarchical Depth Buffer" 
> > documents
> > +* adjustments required for Z_Height and Z_Width based on multisampling.
> > +*/
> > +   switch(mt->num_samples) {
> > +   case 0:
> > +   case 1:
> > +  break;
> > +   case 2:
> > +   case 4:
> > +  z_width *= 2;
> > +  z_height *= 2;
> > +  break;
> > +   case 8:
> > +  z_width *= 4;
> > +  z_height *= 2;
> > +  break;
> > +   default:
> > +  assert(!"Unsupported sample count!");
> > +   }
> > +
> > +   const unsigned vertical_align = 8; /* 'j' in the docs */
> > +   const unsigned H0 = z_height;
> > +   const unsigned h0 = ALIGN(H0, vertical_align);
> > +   const unsigned h1 = ALIGN(minify(H0, 1), vertical_align);
> > +   const unsigned Z0 = z_depth;
> > +
> > +   /* HZ_Width (bytes) = ceiling(Z_Width / 16) * 16 */
> > +   hz_width = ALIGN(z_width, 16);
> > +
> > +   if (mt->target == GL_TEXTURE_3D) {
> > +  unsigned H_i = H0;
> > +  unsigned Z_i = Z0;
> > +  hz_height = 0;
> > +  for (int level = mt->first_level; level <= mt->last_level; ++level) {
> > + unsigned h_i = ALIGN(H_i, vertical_align);
> > + /* sum(i=0 to m; h_i * max(1, floor(Z_Depth/2**i))) */
> > + hz_height += h_i * Z_i;
> > + H_i = minify(H_i, 1);
> > + Z_i = minify(Z_i, 1);
> > +  }
> > +  /* HZ_Height =
> > +   *(1/2) * sum(i=0 to m; h_i * max(1, floor(Z_Depth/2**i)))
> > +   */
> > +  hz_height = CEILING(hz_height, 2);
> > +   } else {
> > +  qpitch = h0 + h1 + (12 * vertical_align);
> 
> I wonder if we are reading the same spec. I'm still in the opinion that this
> is the formula for the normal (non-hiz) miptrees. In the table in section
> 3D-Media-GPGPU Engine > 3D Pipeline Stages > Pixel >
> Early Depth/Stencil Processing > Hierarchical Depth Buffer there is a special
> iterative formula for 1D/2D hiz just as there is special case for 3D hiz.
> 
> For 1D/2D: HZ_QPitch = h_0 + max(h1, sum(i=2 to m: h_i))

Oh, you had this in the next patch for gen8. Sorry, my mistake, I should have
checked the specs for the older hw.

> 
> > +  /* HZ_Height (rows) = Ceiling ( ( Q_pitch * Z_depth/2) /8 ) * 8 */
> > +  hz_height = (ALIGN(qpitch, 8) / 2) * Z0;
> > +  if (mt->target == GL_TEXTURE_CUBE_MAP_ARRAY ||
> > +  mt->target == GL_TEXTURE_CUBE_MAP) {
> > + hz_height *= 6;
> > +  }
> > +   }
> > +
> > +   unsigned long pitch;
> > +   uint32_t tiling = I915_TILING_Y;
> > +   buf->bo = drm_intel_bo_alloc_tiled(brw->bufmgr, "hiz",
> > +  hz_width, hz_height, 1,
> > +  &tiling, &pitch,
> > +  BO_ALLOC_FOR_RENDER);
> > +   buf->pitch = pitch;
> > +
> > +   return buf;
> > +}
> > +
> > +
> >  static struct intel_miptree_aux_buffer *
> >  intel_hiz_miptree_buf_create(struct brw_context *brw,
> >  

Re: [Mesa-dev] [PATCH 04/14] i965/gen7: Don't allocate hiz miptree structure

2014-07-18 Thread Pohjolainen, Topi
On Tue, Jul 15, 2014 at 06:32:12PM -0700, Jordan Justen wrote:
> We now skip allocating a hiz miptree for gen7. Instead, we calculate
> the required hiz buffer parameters and allocate a bo directly.
> 
> Signed-off-by: Jordan Justen 
> ---
>  src/mesa/drivers/dri/i965/intel_mipmap_tree.c | 95 
> ++-
>  1 file changed, 93 insertions(+), 2 deletions(-)
> 
> diff --git a/src/mesa/drivers/dri/i965/intel_mipmap_tree.c 
> b/src/mesa/drivers/dri/i965/intel_mipmap_tree.c
> index 8719c29..7e8bec8 100644
> --- a/src/mesa/drivers/dri/i965/intel_mipmap_tree.c
> +++ b/src/mesa/drivers/dri/i965/intel_mipmap_tree.c
> @@ -823,7 +823,10 @@ intel_miptree_release(struct intel_mipmap_tree **mt)
>drm_intel_bo_unreference((*mt)->bo);
>intel_miptree_release(&(*mt)->stencil_mt);
>if ((*mt)->hiz_buf) {
> - intel_miptree_release(&(*mt)->hiz_buf->mt);
> + if ((*mt)->hiz_buf->mt)
> +intel_miptree_release(&(*mt)->hiz_buf->mt);
> + else
> +drm_intel_bo_unreference((*mt)->hiz_buf->bo);
>   free((*mt)->hiz_buf);
>}
>intel_miptree_release(&(*mt)->mcs_mt);
> @@ -1374,6 +1377,89 @@ intel_miptree_level_enable_hiz(struct brw_context *brw,
>  }
>  
>  
> +/**
> + * Helper for intel_miptree_alloc_hiz() that determines the required hiz
> + * buffer dimensions and allocates a bo for the hiz buffer.
> + */
> +static struct intel_miptree_aux_buffer *
> +intel_gen7_hiz_buf_create(struct brw_context *brw,
> +  struct intel_mipmap_tree *mt)
> +{
> +   unsigned z_width = mt->logical_width0;
> +   unsigned z_height = mt->logical_height0;
> +   const unsigned z_depth = mt->logical_depth0;
> +   unsigned hz_width, hz_height, qpitch;

Minor nit, qpitch could be called hz_qpitch for clarity as it is a result of
hiz-specific rules just as hz_width and hz_height. Simple matter of taste and
you choose the way that you feel the best.

> +   struct intel_miptree_aux_buffer *buf = calloc(sizeof(*buf), 1);
> +
> +   if (!buf)
> +  return NULL;
> +
> +   /* Gen7 PRM Volume 2, Part 1, 11.5.3 "Hierarchical Depth Buffer" documents
> +* adjustments required for Z_Height and Z_Width based on multisampling.
> +*/
> +   switch(mt->num_samples) {
> +   case 0:
> +   case 1:
> +  break;
> +   case 2:
> +   case 4:
> +  z_width *= 2;
> +  z_height *= 2;
> +  break;
> +   case 8:
> +  z_width *= 4;
> +  z_height *= 2;
> +  break;
> +   default:
> +  assert(!"Unsupported sample count!");
> +   }
> +
> +   const unsigned vertical_align = 8; /* 'j' in the docs */
> +   const unsigned H0 = z_height;
> +   const unsigned h0 = ALIGN(H0, vertical_align);
> +   const unsigned h1 = ALIGN(minify(H0, 1), vertical_align);
> +   const unsigned Z0 = z_depth;
> +
> +   /* HZ_Width (bytes) = ceiling(Z_Width / 16) * 16 */
> +   hz_width = ALIGN(z_width, 16);
> +
> +   if (mt->target == GL_TEXTURE_3D) {
> +  unsigned H_i = H0;
> +  unsigned Z_i = Z0;
> +  hz_height = 0;
> +  for (int level = mt->first_level; level <= mt->last_level; ++level) {
> + unsigned h_i = ALIGN(H_i, vertical_align);
> + /* sum(i=0 to m; h_i * max(1, floor(Z_Depth/2**i))) */

I had to think for a second if you had typo here (2**i) but then realized
you used it to mean power-of-two. I've also seen people using 2^i, would that
make sense to you?

> + hz_height += h_i * Z_i;
> + H_i = minify(H_i, 1);
> + Z_i = minify(Z_i, 1);
> +  }
> +  /* HZ_Height =
> +   *(1/2) * sum(i=0 to m; h_i * max(1, floor(Z_Depth/2**i)))
> +   */
> +  hz_height = CEILING(hz_height, 2);
> +   } else {
> +  qpitch = h0 + h1 + (12 * vertical_align);
> +  /* HZ_Height (rows) = Ceiling ( ( Q_pitch * Z_depth/2) /8 ) * 8 */
> +  hz_height = (ALIGN(qpitch, 8) / 2) * Z0;

Here the ALIGN is no-op - qpitch is a sum of three already aligned numbers,
and hence it is aligned itself. The final result in turn is not always aligned
(althought is should be). For example, say

  qpitch = ALIGN(16, 8) + ALIGN(minify(16, 1), 8) + 12 * 8 = 15 * 8
  ZO = z_depth = 1

  => hz_height = (15 * 8 / 2) * 1 = 60

This particular case would probably fine as there is only one layer and still
a lot of extra. But that may not be the case with higher odd layer numbers
anymore.

I would change this into:

 hz_height = ALIGN(qpitch * Z0 / 2, vertical_align);

> +  if (mt->target == GL_TEXTURE_CUBE_MAP_ARRAY ||
> +  mt->target == GL_TEXTURE_CUBE_MAP) {
> + hz_height *= 6;
> +  }
> +   }
> +
> +   unsigned long pitch;
> +   uint32_t tiling = I915_TILING_Y;
> +   buf->bo = drm_intel_bo_alloc_tiled(brw->bufmgr, "hiz",
> +  hz_width, hz_height, 1,
> +  &tiling, &pitch,
> +  BO_ALLOC_FOR_RENDER);
> +   buf->pitch = pitch;
> +
> +   return buf;
> +}
> +
> +
>  static struct int

Re: [Mesa-dev] [PATCH 04/14] i965/gen7: Don't allocate hiz miptree structure

2014-07-18 Thread Pohjolainen, Topi
On Fri, Jul 18, 2014 at 11:04:55AM +0300, Pohjolainen, Topi wrote:
> On Fri, Jul 18, 2014 at 11:02:56AM +0300, Pohjolainen, Topi wrote:
> > On Tue, Jul 15, 2014 at 06:32:12PM -0700, Jordan Justen wrote:
> > > We now skip allocating a hiz miptree for gen7. Instead, we calculate
> > > the required hiz buffer parameters and allocate a bo directly.
> > > 
> > > Signed-off-by: Jordan Justen 
> > > ---
> > >  src/mesa/drivers/dri/i965/intel_mipmap_tree.c | 95 
> > > ++-
> > >  1 file changed, 93 insertions(+), 2 deletions(-)
> > > 
> > > diff --git a/src/mesa/drivers/dri/i965/intel_mipmap_tree.c 
> > > b/src/mesa/drivers/dri/i965/intel_mipmap_tree.c
> > > index 8719c29..7e8bec8 100644
> > > --- a/src/mesa/drivers/dri/i965/intel_mipmap_tree.c
> > > +++ b/src/mesa/drivers/dri/i965/intel_mipmap_tree.c
> > > @@ -823,7 +823,10 @@ intel_miptree_release(struct intel_mipmap_tree **mt)
> > >drm_intel_bo_unreference((*mt)->bo);
> > >intel_miptree_release(&(*mt)->stencil_mt);
> > >if ((*mt)->hiz_buf) {
> > > - intel_miptree_release(&(*mt)->hiz_buf->mt);
> > > + if ((*mt)->hiz_buf->mt)
> > > +intel_miptree_release(&(*mt)->hiz_buf->mt);
> > > + else
> > > +drm_intel_bo_unreference((*mt)->hiz_buf->bo);
> > >   free((*mt)->hiz_buf);
> > >}
> > >intel_miptree_release(&(*mt)->mcs_mt);
> > > @@ -1374,6 +1377,89 @@ intel_miptree_level_enable_hiz(struct brw_context 
> > > *brw,
> > >  }
> > >  
> > >  
> > > +/**
> > > + * Helper for intel_miptree_alloc_hiz() that determines the required hiz
> > > + * buffer dimensions and allocates a bo for the hiz buffer.
> > > + */
> > > +static struct intel_miptree_aux_buffer *
> > > +intel_gen7_hiz_buf_create(struct brw_context *brw,
> > > +  struct intel_mipmap_tree *mt)
> > > +{
> > > +   unsigned z_width = mt->logical_width0;
> > > +   unsigned z_height = mt->logical_height0;
> > > +   const unsigned z_depth = mt->logical_depth0;
> > > +   unsigned hz_width, hz_height, qpitch;
> > > +   struct intel_miptree_aux_buffer *buf = calloc(sizeof(*buf), 1);
> > > +
> > > +   if (!buf)
> > > +  return NULL;
> > > +
> > > +   /* Gen7 PRM Volume 2, Part 1, 11.5.3 "Hierarchical Depth Buffer" 
> > > documents
> > > +* adjustments required for Z_Height and Z_Width based on 
> > > multisampling.
> > > +*/
> > > +   switch(mt->num_samples) {
> > > +   case 0:
> > > +   case 1:
> > > +  break;
> > > +   case 2:
> > > +   case 4:
> > > +  z_width *= 2;
> > > +  z_height *= 2;
> > > +  break;
> > > +   case 8:
> > > +  z_width *= 4;
> > > +  z_height *= 2;
> > > +  break;
> > > +   default:
> > > +  assert(!"Unsupported sample count!");
> > > +   }
> > > +
> > > +   const unsigned vertical_align = 8; /* 'j' in the docs */
> > > +   const unsigned H0 = z_height;
> > > +   const unsigned h0 = ALIGN(H0, vertical_align);
> > > +   const unsigned h1 = ALIGN(minify(H0, 1), vertical_align);
> > > +   const unsigned Z0 = z_depth;
> > > +
> > > +   /* HZ_Width (bytes) = ceiling(Z_Width / 16) * 16 */
> > > +   hz_width = ALIGN(z_width, 16);
> > > +
> > > +   if (mt->target == GL_TEXTURE_3D) {
> > > +  unsigned H_i = H0;
> > > +  unsigned Z_i = Z0;
> > > +  hz_height = 0;
> > > +  for (int level = mt->first_level; level <= mt->last_level; 
> > > ++level) {
> > > + unsigned h_i = ALIGN(H_i, vertical_align);
> > > + /* sum(i=0 to m; h_i * max(1, floor(Z_Depth/2**i))) */
> > > + hz_height += h_i * Z_i;
> > > + H_i = minify(H_i, 1);
> > > + Z_i = minify(Z_i, 1);
> > > +  }
> > > +  /* HZ_Height =
> > > +   *(1/2) * sum(i=0 to m; h_i * max(1, floor(Z_Depth/2**i)))
> > > +   */
> > > +  hz_height = CEILING(hz_height, 2);
> > > +   } else {
> > > +  qpitch = h0 + h1 + (12 * vertical_align);
> > 
> > I wonder if we are reading the same spec. I'm still in the opinion that this
> > is the formula for the normal (non-hiz) miptrees. In the table in section
> > 3D-Media-GPGPU Engine > 3D Pipeline Stages > Pixel >
> > Early Depth/Stencil Processing > Hierarchical Depth Buffer there is a 
> > special
> > iterative formula for 1D/2D hiz just as there is special case for 3D hiz.
> > 
> > For 1D/2D: HZ_QPitch = h_0 + max(h1, sum(i=2 to m: h_i))
> 
> Oh, you had this in the next patch for gen8. Sorry, my mistake, I should have
> checked the specs for the older hw.

I think the PRM for gen7 is not entirely clear either (Gen7 PRM Volume 2,
Part 1, 11.5.3 "Hierarchical Depth Buffer). In one hand it says that the
qpitch is calculated the same way as for other miptrees:

  "...where, Qpitch is computed using vertical alignment j=8, please refer to
   the GPU overview volume for Qpitch definition."

But it also introduces the same formula as gen8 bspec does for 1D/2D HZ_Qpitch,
only this is not used for anything in the PRM. Almost as if there is the bit
missing in the table tel

Re: [Mesa-dev] [PATCH 05/14] i965/gen8: Don't allocate hiz miptree structure

2014-07-18 Thread Pohjolainen, Topi
On Tue, Jul 15, 2014 at 06:32:13PM -0700, Jordan Justen wrote:
> We now skip allocating a hiz miptree for gen8. Instead, we calculate
> the required hiz buffer parameters and allocate a bo directly.
> 
> Signed-off-by: Jordan Justen 
> ---
>  src/mesa/drivers/dri/i965/intel_mipmap_tree.c | 91 
> +++
>  1 file changed, 91 insertions(+)
> 
> diff --git a/src/mesa/drivers/dri/i965/intel_mipmap_tree.c 
> b/src/mesa/drivers/dri/i965/intel_mipmap_tree.c
> index 7e8bec8..08a4ebe 100644
> --- a/src/mesa/drivers/dri/i965/intel_mipmap_tree.c
> +++ b/src/mesa/drivers/dri/i965/intel_mipmap_tree.c
> @@ -1460,6 +1460,95 @@ intel_gen7_hiz_buf_create(struct brw_context *brw,
>  }
>  
>  
> +/**
> + * Helper for intel_miptree_alloc_hiz() that determines the required hiz
> + * buffer dimensions and allocates a bo for the hiz buffer.
> + */
> +static struct intel_miptree_aux_buffer *
> +intel_gen8_hiz_buf_create(struct brw_context *brw,
> +  struct intel_mipmap_tree *mt)
> +{
> +   unsigned z_width = mt->logical_width0;
> +   unsigned z_height = mt->logical_height0;
> +   const unsigned z_depth = mt->logical_depth0;
> +   unsigned hz_width, hz_height;
> +   struct intel_miptree_aux_buffer *buf = calloc(sizeof(*buf), 1);
> +
> +   if (!buf)
> +  return NULL;
> +
> +   /* Gen7 PRM Volume 2, Part 1, 11.5.3 "Hierarchical Depth Buffer" documents
> +* adjustments required for Z_Height and Z_Width based on multisampling.
> +*/
> +   switch(mt->num_samples) {
> +   case 0:
> +   case 1:
> +  break;
> +   case 2:
> +   case 4:
> +  z_width *= 2;
> +  z_height *= 2;
> +  break;
> +   case 8:
> +  z_width *= 4;
> +  z_height *= 2;
> +  break;
> +   default:
> +  assert(!"Unsupported sample count!");
> +   }
> +
> +   const unsigned vertical_align = 8; /* 'j' in the docs */
> +   const unsigned H0 = z_height;
> +   const unsigned h0 = ALIGN(H0, vertical_align);
> +   const unsigned h1 = ALIGN(minify(H0, 1), vertical_align);
> +   const unsigned Z0 = z_depth;
> +
> +   /* HZ_Width (bytes) = ceiling(Z_Width / 16) * 16 */
> +   hz_width = ALIGN(z_width, 16);
> +
> +   unsigned H_i = H0;
> +   unsigned Z_i = Z0;
> +   unsigned sum_h_i = 0;
> +   unsigned hz_height_3d_sum = 0;
> +   for (int level = mt->first_level; level <= mt->last_level; ++level) {
> +  unsigned i = level - mt->first_level;
> +  unsigned h_i = ALIGN(H_i, vertical_align);
> +  /* sum(i=2 to m; h_i) */
> +  if (i >= 2) {
> + sum_h_i += h_i;
> +  }
> +  /* sum(i=0 to m; h_i * max(1, floor(Z_Depth/2**i))) */
> +  hz_height_3d_sum += h_i * Z_i;
> +  H_i = minify(H_i, 1);
> +  Z_i = minify(Z_i, 1);
> +   }
> +   /* HZ_QPitch = h0 + max(h1, sum(i=2 to m; h_i)) */
> +   buf->qpitch = h0 + MAX2(h1, sum_h_i);
> +
> +   if (mt->target == GL_TEXTURE_3D) {
> +  /* (1/2) * sum(i=0 to m; h_i * max(1, floor(Z_Depth/2**i))) */
> +  hz_height = CEILING(hz_height_3d_sum, 2);
> +   } else {
> +  /* HZ_Height (rows) = ceiling( (HZ_QPitch/2)/8) *8 * Z_Depth */
> +  hz_height = (ALIGN(buf->qpitch, 8) / 2) * Z0;

Similarly as in the previous patch, I think this should be instead:

 hz_height = ALIGN(buf->qpitch * Z0 / 2, vertical_align);

> +  if (mt->target == GL_TEXTURE_CUBE_MAP_ARRAY ||
> +  mt->target == GL_TEXTURE_CUBE_MAP) {
> + hz_height *= 6;
> +  }
> +   }
> +
> +   unsigned long pitch;
> +   uint32_t tiling = I915_TILING_Y;
> +   buf->bo = drm_intel_bo_alloc_tiled(brw->bufmgr, "hiz",
> +  hz_width, hz_height, 1,
> +  &tiling, &pitch,
> +  BO_ALLOC_FOR_RENDER);
> +   buf->pitch = pitch;
> +
> +   return buf;
> +}
> +
> +
>  static struct intel_miptree_aux_buffer *
>  intel_hiz_miptree_buf_create(struct brw_context *brw,
>   struct intel_mipmap_tree *mt)
> @@ -1501,6 +1590,8 @@ intel_miptree_alloc_hiz(struct brw_context *brw,
>  
> if (brw->gen == 7) {
>mt->hiz_buf = intel_gen7_hiz_buf_create(brw, mt);
> +   } else if (brw->gen >= 8) {
> +  mt->hiz_buf = intel_gen8_hiz_buf_create(brw, mt);
> } else {
>mt->hiz_buf = intel_hiz_miptree_buf_create(brw, mt);
> }
> -- 
> 2.0.0
> 
> ___
> mesa-dev mailing list
> mesa-dev@lists.freedesktop.org
> http://lists.freedesktop.org/mailman/listinfo/mesa-dev
___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
http://lists.freedesktop.org/mailman/listinfo/mesa-dev


Re: [Mesa-dev] [PATCH 09/14] i965/gen8: Use aux buf qpitch for Auxiliary Buffer (MCS)

2014-07-18 Thread Pohjolainen, Topi
On Tue, Jul 15, 2014 at 06:32:17PM -0700, Jordan Justen wrote:
> For hiz, the qpitch may be different than the main miptree.

s/hiz/aux/ ?

> 
> In "i965: Wrap MCS miptree in intel_miptree_aux_buffer" we set
> aux_buf->qpitch to mt->qpitch, so for MCS, this should be a no-op.
> 
> Signed-off-by: Jordan Justen 
> ---
>  src/mesa/drivers/dri/i965/gen8_surface_state.c | 4 ++--
>  1 file changed, 2 insertions(+), 2 deletions(-)
> 
> diff --git a/src/mesa/drivers/dri/i965/gen8_surface_state.c 
> b/src/mesa/drivers/dri/i965/gen8_surface_state.c
> index 87f0d49..3d97232 100644
> --- a/src/mesa/drivers/dri/i965/gen8_surface_state.c
> +++ b/src/mesa/drivers/dri/i965/gen8_surface_state.c
> @@ -203,7 +203,7 @@ gen8_update_texture_surface(struct gl_context *ctx,
>   (intelObj->_MaxLevel - tObj->BaseLevel); /* mip count */
>  
> if (aux_buf) {
> -  surf[6] = SET_FIELD(mt->qpitch / 4, GEN8_SURFACE_AUX_QPITCH) |
> +  surf[6] = SET_FIELD(aux_buf->qpitch / 4, GEN8_SURFACE_AUX_QPITCH) |
>  SET_FIELD((aux_buf->pitch / 128) - 1, 
> GEN8_SURFACE_AUX_PITCH) |
>  aux_mode;
> } else {
> @@ -394,7 +394,7 @@ gen8_update_renderbuffer_surface(struct brw_context *brw,
> surf[5] = irb->mt_level - irb->mt->first_level;
>  
> if (aux_buf) {
> -  surf[6] = SET_FIELD(mt->qpitch / 4, GEN8_SURFACE_AUX_QPITCH) |
> +  surf[6] = SET_FIELD(aux_buf->qpitch / 4, GEN8_SURFACE_AUX_QPITCH) |
>  SET_FIELD((aux_buf->pitch / 128) - 1, 
> GEN8_SURFACE_AUX_PITCH) |
>  aux_mode;
> } else {
> -- 
> 2.0.0
> 
> ___
> mesa-dev mailing list
> mesa-dev@lists.freedesktop.org
> http://lists.freedesktop.org/mailman/listinfo/mesa-dev
___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
http://lists.freedesktop.org/mailman/listinfo/mesa-dev


Re: [Mesa-dev] [PATCH 12/14] i965/gen8: Initialize aux_mode to GEN8_SURFACE_AUX_MODE_NONE

2014-07-18 Thread Pohjolainen, Topi
On Tue, Jul 15, 2014 at 06:32:20PM -0700, Jordan Justen wrote:
> GEN8_SURFACE_AUX_MODE_NONE is 0, so this is a no-op.
> 
> Yet, this also makes it clear that we can compare aux_mode to the
> other GEN8_SURFACE_AUX_MODE_ values. We will want to compare to
> GEN8_SURFACE_AUX_MODE_HIZ.
> 
> Signed-off-by: Jordan Justen 

Patches 7-12 are:

Reviewed-by: Topi Pohjolainen 

> ---
>  src/mesa/drivers/dri/i965/gen8_surface_state.c | 4 ++--
>  1 file changed, 2 insertions(+), 2 deletions(-)
> 
> diff --git a/src/mesa/drivers/dri/i965/gen8_surface_state.c 
> b/src/mesa/drivers/dri/i965/gen8_surface_state.c
> index 3d97232..4818fca 100644
> --- a/src/mesa/drivers/dri/i965/gen8_surface_state.c
> +++ b/src/mesa/drivers/dri/i965/gen8_surface_state.c
> @@ -133,7 +133,7 @@ gen8_update_texture_surface(struct gl_context *ctx,
> struct gl_texture_image *firstImage = tObj->Image[0][tObj->BaseLevel];
> struct gl_sampler_object *sampler = _mesa_get_samplerobj(ctx, unit);
> struct intel_miptree_aux_buffer *aux_buf = NULL;
> -   uint32_t aux_mode = 0;
> +   uint32_t aux_mode = GEN8_SURFACE_AUX_MODE_NONE;
> mesa_format format = intelObj->_Format;
>  
> if (tObj->Target == GL_TEXTURE_BUFFER) {
> @@ -309,7 +309,7 @@ gen8_update_renderbuffer_surface(struct brw_context *brw,
> struct intel_renderbuffer *irb = intel_renderbuffer(rb);
> struct intel_mipmap_tree *mt = irb->mt;
> struct intel_miptree_aux_buffer *aux_buf = NULL;
> -   uint32_t aux_mode = 0;
> +   uint32_t aux_mode = GEN8_SURFACE_AUX_MODE_NONE;
> unsigned width = mt->logical_width0;
> unsigned height = mt->logical_height0;
> unsigned pitch = mt->pitch;
> -- 
> 2.0.0
> 
> ___
> mesa-dev mailing list
> mesa-dev@lists.freedesktop.org
> http://lists.freedesktop.org/mailman/listinfo/mesa-dev
___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
http://lists.freedesktop.org/mailman/listinfo/mesa-dev


Re: [Mesa-dev] common-code patches before vc4 driver

2014-07-18 Thread Emil Velikov
On 18/07/14 00:56, Eric Anholt wrote:
> Here are the patches I have for common code in my vc4 driver tree.  I
> think they should be obvious enough.
> 
> I'm curious what people feel about merging vc4.  I've got a series at this
> point that's clean enough in my opinion (copyrights fixed up, and I think
> a tolerable mix of "giant code dump for starting out the project" and
> "mostly sensible incremental work from there".)  Should I just go ahead
> whenever?  Does anybody want to engage in a review process with me, or
> should I just go ahead on my own, like Rob's doing with freedreno?
> 
> Of course, the driver code I've written so far isn't using an actual
> stable kernel ABI -- I still need to handle little things like validating
> shaders and uniforms for security (and to relocate sampler configuration
> parameters), and asynchronous execution of command lists.  So anything I
> land would be getting ABI-breaking reworks later on.
> 
I would not mind helping out with the build system side of things :)

FWIW this series is
Reviewed-by: Emil Velikov 

-Emil

> ___
> mesa-dev mailing list
> mesa-dev@lists.freedesktop.org
> http://lists.freedesktop.org/mailman/listinfo/mesa-dev
> 

___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
http://lists.freedesktop.org/mailman/listinfo/mesa-dev


Re: [Mesa-dev] [PATCH] mesa: Correctly use glDrawBuffers for multiple buffers and glDrawBuffer for one buffer.

2014-07-18 Thread Emil Velikov
Pavel can you use up-to 80 column width for the commit message. It is somewhat
of a unwritten rule plus it makes things a bit easier to read :)

Cheers,
Emil

On 17/07/14 16:21, Pavel Popov wrote:
> According to spec (OpenGL 4.0 specification, pages 254-255) we have a 
> different bits set
> for one buffer and for multiple buffers. For glDrawBuffer we may have up to 
> four bits set
> but for glDrawBuffers we can only have one bit set.
> 
> The _mesa_drawbuffers is called with ctx->Const.MaxDrawBuffers and NULL 
> arguments when
> _mesa_update_framebuffer or _mesa_update_draw_buffers is called. In this 
> situation realization
> for glDrawBuffers is used for any number of buffers. Even for one. But 
> glDrawBuffer have to be
> used for one buffer instead of glDrawBuffers.
> 
> Piglit test 'gl30basic' fails with assert with debug Mesa and pass with 
> release
> 'main/buffers.c:520: _mesa_drawbuffers: Assertion 
> `__builtin_popcount(destMask[buf]) == 1' failed.'
> Probably some other tests also can be affected.
> 
> Signed-off-by: Pavel Popov 
> ---
>  src/mesa/main/buffers.c | 7 +--
>  1 file changed, 5 insertions(+), 2 deletions(-)
> 
> diff --git a/src/mesa/main/buffers.c b/src/mesa/main/buffers.c
> index b13a7af..a640360 100644
> --- a/src/mesa/main/buffers.c
> +++ b/src/mesa/main/buffers.c
> @@ -480,6 +480,7 @@ _mesa_drawbuffers(struct gl_context *ctx, GLuint n, const 
> GLenum *buffers,
> struct gl_framebuffer *fb = ctx->DrawBuffer;
> GLbitfield mask[MAX_DRAW_BUFFERS];
> GLuint buf;
> +   GLuint m = n;
>  
> if (!destMask) {
>/* compute destMask values now */
> @@ -489,15 +490,17 @@ _mesa_drawbuffers(struct gl_context *ctx, GLuint n, 
> const GLenum *buffers,
>   mask[output] = draw_buffer_enum_to_bitmask(ctx, buffers[output]);
>   ASSERT(mask[output] != BAD_MASK);
>   mask[output] &= supportedMask;
> + if (mask[output] == 0)
> +m--;
>}
>destMask = mask;
> }
>  
> /*
> -* If n==1, destMask[0] may have up to four bits set.
> +* If m==1, destMask[0] may have up to four bits set.
>  * Otherwise, destMask[x] can only have one bit set.
>  */
> -   if (n == 1) {
> +   if (m == 1) {
>GLuint count = 0, destMask0 = destMask[0];
>while (destMask0) {
>   GLint bufIndex = ffs(destMask0) - 1;
> 

___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
http://lists.freedesktop.org/mailman/listinfo/mesa-dev


Re: [Mesa-dev] [PATCH 0/5] radeon: Write-combined CPU mappings of BOs in GTT

2014-07-18 Thread Michel Dänzer
On 18.07.2014 12:58, Dieter Nützel wrote:
> Am 18.07.2014 05:07, schrieb Michel Dänzer:
>> On 17.07.2014 19:09, Christian König wrote:
>>> Am 17.07.2014 12:01, schrieb Michel Dänzer:
 [PATCH 5/5] drm/radeon: Use VRAM for indirect buffers on >= SI
>>>
>>> I'm still not very keen with this change since I still don't understand
>>> the reason why it's faster than with GTT. Definitely needs more testing
>>> on a wider range of systems.
>>
>> Sure. If anyone wants to give this patch a spin and see if they can
>> measure any performance difference, good or bad, that would be
>> interesting.
>>
>>> Maybe limit it to APUs for now?
>>
>> But IIRC, CPU writes to VRAM vs. write-combined GTT are actually an even
>> bigger win with dedicated GPUs than with the Kaveri built-in GPU on my
>> system. I suspect it may depend on the bandwidth available for PCIe vs.
>> system memory though.
> 
> Michel,
> 
> please, please do NOT change anything on this!;-)
> You all know that I currently can only run this on my poor Duron 1800
> with RV730 (AGP), but...
> 
> With this all 'objview' demos (mesa-demos) run at 60 fps (vsync),
> even with chip set/CPU power management enabled (athcool on).
> 
> If I set vblank_mode=0
> the slowest GreatLakesBiplaneHP.obj
> run at ~100 fps (~16 fps before) => 6x speedup.
> (Even 5 planes run at 30 fps) - Wow!!!

That's great, but note that the disputed change above only has an effect
with SI or newer GPUs, i.e. none with yours.

I suspect that speedup is because the app ends up using effectively
static vertex/index buffers, which are now in VRAM instead of in GTT due
to my Mesa changes.


> Overall X/Kwin eXperience is much better.
> Let me know which benchmarks you need.

I'm not looking for anything in particular, basically anything where you
care about performance. E.g. the usual suspects in PTS.


> BTW Do anyone know how I can override BIOS GTT settings?
> I can only set 256 MB max. - BIOS patching?

Your AGP bridge hardware might not support more.

> With agpmode=-1 I can run with 1024 MB GTT

What effect does that have on performance? I'm not sure if AGP provides
any benefit for GPUs with PCIe GART.


-- 
Earthling Michel Dänzer|  http://www.amd.com
Libre software enthusiast  |Mesa and X developer
___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
http://lists.freedesktop.org/mailman/listinfo/mesa-dev


Re: [Mesa-dev] [PATCH 2/7] i965/cfg: Add a foreach_block_and_inst macro.

2014-07-18 Thread Pohjolainen, Topi

Subject of patch number four sort of hints that this would be used in patch
number three also. I didn't find any occurencies though, did you mean to use
it there already?

Anyway, patches 1-4 are:

Reviewed-by: Topi Pohjolainen 

On Thu, Jul 17, 2014 at 03:26:02PM -0700, Matt Turner wrote:
> Will let us abstract how the instructions are stored.
> ---
>  src/mesa/drivers/dri/i965/brw_cfg.h | 4 
>  1 file changed, 4 insertions(+)
> 
> diff --git a/src/mesa/drivers/dri/i965/brw_cfg.h 
> b/src/mesa/drivers/dri/i965/brw_cfg.h
> index cdbadde..01fcc1b 100644
> --- a/src/mesa/drivers/dri/i965/brw_cfg.h
> +++ b/src/mesa/drivers/dri/i965/brw_cfg.h
> @@ -103,6 +103,10 @@ struct cfg_t {
> int num_blocks;
>  };
>  
> +#define foreach_block_and_inst(__block, __type, __inst, __cfg) \
> +   foreach_block (__block, __cfg)  \
> +  foreach_inst_in_block (__type, __inst, __block)
> +
>  #define foreach_inst_in_block(__type, __inst, __block) \
> for (__type *__inst = (__type *)__block->start; \
>  __inst != __block->end->next;  \
> -- 
> 1.8.5.5
> 
> ___
> mesa-dev mailing list
> mesa-dev@lists.freedesktop.org
> http://lists.freedesktop.org/mailman/listinfo/mesa-dev
___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
http://lists.freedesktop.org/mailman/listinfo/mesa-dev


Re: [Mesa-dev] [PATCH 4/5] r600g, radeonsi: Use write-combined persistent GTT mappings

2014-07-18 Thread Marek Olšák
If the requirements of GL_MAP_COHERENT_BIT are satisfied, then the
patch is okay.

Marek


On Fri, Jul 18, 2014 at 5:19 AM, Michel Dänzer  wrote:
> On 17.07.2014 21:00, Marek Olšák wrote:
>> On Thu, Jul 17, 2014 at 12:01 PM, Michel Dänzer  wrote:
>>> From: Michel Dänzer 
>>>
>>> This is hopefully safe: The kernel makes sure writes to these mappings
>>> finish before the GPU might start reading from them, and the GPU caches
>>> are invalidated at the start of a command stream.
>>>
>> The resource flags actually tell you what you can do. If the COHERENT
>> flag is set, the mapping must be cached.
>
> Why is that required? As I explain above, we should satisfy the
> requirements of the ARB_buffer_storage extension AFAICT.
>
>
> As pointed out by you and Grigori in other posts, I should probably just
> drop the special treatment of persistent mappings though, so the
> placement and flags are derived from the buffer usage.
>
>
> --
> Earthling Michel Dänzer|  http://www.amd.com
> Libre software enthusiast  |Mesa and X developer
___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
http://lists.freedesktop.org/mailman/listinfo/mesa-dev


[Mesa-dev] [PATCH] glsl: remove unnecessary check

2014-07-18 Thread Timothy Arceri
This code does nothing useful as the next recursive call on the array element 
will override any null values if the element is a record anyway. The code is 
also not doing what the comment says as its trying to set the record type 
pointer for only the first element of the array not the first leaf field of the 
record.

Signed-off-by: Timothy Arceri 
---

Resending this patch. Its a required cleanup for arrays of arrays. 

 src/glsl/link_uniforms.cpp | 8 
 1 file changed, 8 deletions(-)

diff --git a/src/glsl/link_uniforms.cpp b/src/glsl/link_uniforms.cpp
index ba66053..9566124 100644
--- a/src/glsl/link_uniforms.cpp
+++ b/src/glsl/link_uniforms.cpp
@@ -187,9 +187,6 @@ program_resource_visitor::recursion(const glsl_type *t, 
char **name,
   }
} else if (t->is_array() && (t->fields.array->is_record()
 || t->fields.array->is_interface())) {
-  if (record_type == NULL && t->fields.array->is_record())
- record_type = t->fields.array;
-
   for (unsigned i = 0; i < t->length; i++) {
 size_t new_length = name_length;
 
@@ -198,11 +195,6 @@ program_resource_visitor::recursion(const glsl_type *t, 
char **name,
 
  recursion(t->fields.array, name, new_length, row_major,
record_type);
-
- /* Only the first leaf-field of the record gets called with the
-  * record type pointer.
-  */
- record_type = NULL;
   }
} else {
   this->visit_field(t, *name, row_major, record_type);
-- 
1.9.3

___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
http://lists.freedesktop.org/mailman/listinfo/mesa-dev


[Mesa-dev] [PATCH 2/5] nv50/ir: keep track of whether the program uses fp64

2014-07-18 Thread Ilia Mirkin
---
 src/gallium/drivers/nouveau/codegen/nv50_ir_driver.h   | 1 +
 src/gallium/drivers/nouveau/codegen/nv50_ir_target.cpp | 8 ++--
 2 files changed, 7 insertions(+), 2 deletions(-)

diff --git a/src/gallium/drivers/nouveau/codegen/nv50_ir_driver.h 
b/src/gallium/drivers/nouveau/codegen/nv50_ir_driver.h
index f829aac..3a89a29 100644
--- a/src/gallium/drivers/nouveau/codegen/nv50_ir_driver.h
+++ b/src/gallium/drivers/nouveau/codegen/nv50_ir_driver.h
@@ -183,6 +183,7 @@ struct nv50_ir_prog_info
   boolean sampleInterp;  /* perform sample interp on all fp inputs */
   uint8_t backFaceColor[2];  /* input/output indices of back face colour */
   uint8_t globalAccess;  /* 1 for read, 2 for wr, 3 for rw */
+  boolean fp64;  /* program uses fp64 math */
   boolean nv50styleSurfaces; /* generate gX[] access for raw buffers */
   uint8_t resInfoCBSlot; /* cX[] used for tex handles, surface info */
   uint16_t texBindBase;  /* base address for tex handles (nve4) */
diff --git a/src/gallium/drivers/nouveau/codegen/nv50_ir_target.cpp 
b/src/gallium/drivers/nouveau/codegen/nv50_ir_target.cpp
index 0397bdc..7992f53 100644
--- a/src/gallium/drivers/nouveau/codegen/nv50_ir_target.cpp
+++ b/src/gallium/drivers/nouveau/codegen/nv50_ir_target.cpp
@@ -379,9 +379,13 @@ Program::emitBinary(struct nv50_ir_prog_info *info)
 
   assert(emit->getCodeSize() == fn->binPos);
 
-  for (int b = 0; b < fn->bbCount; ++b)
- for (Instruction *i = fn->bbArray[b]->getEntry(); i; i = i->next)
+  for (int b = 0; b < fn->bbCount; ++b) {
+ for (Instruction *i = fn->bbArray[b]->getEntry(); i; i = i->next) {
 emit->emitInstruction(i);
+if (i->sType == TYPE_F64 || i->dType == TYPE_F64)
+   info->io.fp64 = true;
+ }
+  }
}
info->bin.relocData = emit->getRelocInfo();
 
-- 
1.8.5.5

___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
http://lists.freedesktop.org/mailman/listinfo/mesa-dev


[Mesa-dev] [PATCH 3/5] nvc0: mark shader header if fp64 is used

2014-07-18 Thread Ilia Mirkin
Signed-off-by: Ilia Mirkin 
---
 src/gallium/drivers/nouveau/nvc0/nvc0_program.c | 2 ++
 1 file changed, 2 insertions(+)

diff --git a/src/gallium/drivers/nouveau/nvc0/nvc0_program.c 
b/src/gallium/drivers/nouveau/nvc0/nvc0_program.c
index c624e21..ce0207a 100644
--- a/src/gallium/drivers/nouveau/nvc0/nvc0_program.c
+++ b/src/gallium/drivers/nouveau/nvc0/nvc0_program.c
@@ -640,6 +640,8 @@ nvc0_program_translate(struct nvc0_program *prog, uint16_t 
chipset)
*/
if (info->io.globalAccess)
   prog->hdr[0] |= 1 << 16;
+   if (info->io.fp64)
+  prog->hdr[0] |= 1 << 27;
 
if (prog->pipe.stream_output.num_outputs)
   prog->tfb = nvc0_program_create_tfb_state(info,
-- 
1.8.5.5

___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
http://lists.freedesktop.org/mailman/listinfo/mesa-dev


[Mesa-dev] [PATCH 4/5] nv50/ir: fix hard-coded TYPE_U32 sized register

2014-07-18 Thread Ilia Mirkin
Signed-off-by: Ilia Mirkin 
---

I noticed this in a review of the code trying to figure out why the next
problem was happening. This doesn't actually fix anything, but there's no
reason why phi nodes must be restricted to 32-bit registers. (Although they
are, for now.)

 src/gallium/drivers/nouveau/codegen/nv50_ir_ra.cpp | 7 ---
 1 file changed, 4 insertions(+), 3 deletions(-)

diff --git a/src/gallium/drivers/nouveau/codegen/nv50_ir_ra.cpp 
b/src/gallium/drivers/nouveau/codegen/nv50_ir_ra.cpp
index e4f56b1..117da94 100644
--- a/src/gallium/drivers/nouveau/codegen/nv50_ir_ra.cpp
+++ b/src/gallium/drivers/nouveau/codegen/nv50_ir_ra.cpp
@@ -389,11 +389,12 @@ RegAlloc::PhiMovesPass::visit(BasicBlock *bb)
  pb->insertTail(new_FlowInstruction(func, OP_BRA, bb));
 
   for (phi = bb->getPhi(); phi && phi->op == OP_PHI; phi = phi->next) {
- mov = new_Instruction(func, OP_MOV, TYPE_U32);
+ LValue *tmp = new_LValue(func, phi->getDef(0)->asLValue());
+ mov = new_Instruction(func, OP_MOV, typeOfSize(tmp->reg.size));
 
  mov->setSrc(0, phi->getSrc(j));
- mov->setDef(0, new_LValue(func, phi->getDef(0)->asLValue()));
- phi->setSrc(j, mov->getDef(0));
+ mov->setDef(0, tmp);
+ phi->setSrc(j, tmp);
 
  pb->insertBefore(pb->getExit(), mov);
   }
-- 
1.8.5.5

___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
http://lists.freedesktop.org/mailman/listinfo/mesa-dev


[Mesa-dev] [PATCH 0/5] nvc0: fp64 preparation

2014-07-18 Thread Ilia Mirkin
Most of codegen is already FP64-ready. There are a few edge-cases that I ran
into, many of which can apply even to non-fp64-enabled programs (although the
double-wide registers are not very common without fp64).

I've yet to give this a full piglit run, but wanted to send these out in case
someone wanted to comment. They do not depend on the preliminary core fp64
work.

Ilia Mirkin (5):
  nvc0: make sure that the local memory allocation is aligned to 0x10
  nv50/ir: keep track of whether the program uses fp64
  nvc0: mark shader header if fp64 is used
  nv50/ir: fix hard-coded TYPE_U32 sized register
  nv50/ir: fix phi/union sources when their def has been merged

 src/gallium/drivers/nouveau/codegen/nv50_ir_driver.h   |  1 +
 src/gallium/drivers/nouveau/codegen/nv50_ir_ra.cpp | 15 ---
 src/gallium/drivers/nouveau/codegen/nv50_ir_target.cpp |  8 ++--
 src/gallium/drivers/nouveau/nvc0/nvc0_program.c|  4 +++-
 4 files changed, 22 insertions(+), 6 deletions(-)

-- 
1.8.5.5

___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
http://lists.freedesktop.org/mailman/listinfo/mesa-dev


[Mesa-dev] [PATCH 1/5] nvc0: make sure that the local memory allocation is aligned to 0x10

2014-07-18 Thread Ilia Mirkin
Signed-off-by: Ilia Mirkin 
Cc: 
---

Was getting weird shader errors in dmat4*dmat4 which spilled one double-wide
register (i.e. size 8). envytools docs apparently list this as having to be
aligned to 0x10, and this indeed fixes it.

 src/gallium/drivers/nouveau/nvc0/nvc0_program.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/src/gallium/drivers/nouveau/nvc0/nvc0_program.c 
b/src/gallium/drivers/nouveau/nvc0/nvc0_program.c
index 1c82a9a..c624e21 100644
--- a/src/gallium/drivers/nouveau/nvc0/nvc0_program.c
+++ b/src/gallium/drivers/nouveau/nvc0/nvc0_program.c
@@ -626,7 +626,7 @@ nvc0_program_translate(struct nvc0_program *prog, uint16_t 
chipset)
if (info->bin.tlsSpace) {
   assert(info->bin.tlsSpace < (1 << 24));
   prog->hdr[0] |= 1 << 26;
-  prog->hdr[1] |= info->bin.tlsSpace; /* l[] size */
+  prog->hdr[1] |= align(info->bin.tlsSpace, 0x10); /* l[] size */
   prog->need_tls = TRUE;
}
/* TODO: factor 2 only needed where joinat/precont is used,
-- 
1.8.5.5

___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
http://lists.freedesktop.org/mailman/listinfo/mesa-dev


[Mesa-dev] [PATCH 5/5] nv50/ir: fix phi/union sources when their def has been merged

2014-07-18 Thread Ilia Mirkin
In a situation where double-register values are used, the phi nodes can
still end up being u32 values. They all get merged into one RA node
though. When fixing up the merge (which comes after the phi node), the
phi node's def would get fixed, but not its sources which would remain
at the low register value.

This maintains the invariant that a phi node's defs and sources are
allocated the same register.

Signed-off-by: Ilia Mirkin 
---

I _think_ that the split case might also need this, in case there's a split
that feeds into phi nodes, and those phi nodes are never merged. But this
fixes a real issue, and this stuff is pretty finicky... rather not poke the
bear too hard.

 src/gallium/drivers/nouveau/codegen/nv50_ir_ra.cpp | 8 
 1 file changed, 8 insertions(+)

diff --git a/src/gallium/drivers/nouveau/codegen/nv50_ir_ra.cpp 
b/src/gallium/drivers/nouveau/codegen/nv50_ir_ra.cpp
index 117da94..21d7fd0 100644
--- a/src/gallium/drivers/nouveau/codegen/nv50_ir_ra.cpp
+++ b/src/gallium/drivers/nouveau/codegen/nv50_ir_ra.cpp
@@ -1702,6 +1702,14 @@ GCRA::resolveSplitsAndMerges()
  Value *v = merge->getSrc(s);
  v->reg.data.id = regs.bytesToId(v, reg);
  v->join = v;
+ // If the value is defined by a phi/union node, we also need to
+ // perform the same fixup on that node's sources, since after RA
+ // their registers should be identical.
+ if (v->getInsn()->op == OP_PHI || v->getInsn()->op == OP_UNION) {
+Instruction *phi = v->getInsn();
+for (int phis = 0; phi->srcExists(phis); ++phis)
+   phi->getSrc(phis)->join = v;
+ }
  reg += v->reg.size;
   }
}
-- 
1.8.5.5

___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
http://lists.freedesktop.org/mailman/listinfo/mesa-dev


[Mesa-dev] [Bug 68296] Using old viewport value after a window resize (content is clipped)

2014-07-18 Thread bugzilla-daemon
https://bugs.freedesktop.org/show_bug.cgi?id=68296

--- Comment #12 from U. Artie Eoff  ---
Created attachment 103044
  --> https://bugs.freedesktop.org/attachment.cgi?id=103044&action=edit
clear color does not fill new viewport size

The problem is that the GL color buffer does not fill the GL viewport after
it's been resized.  That is, after calling glViewport(...) the background color
goes black, and in the more extreme case the original clear color remains in
the portion of the previous viewport size giving the illusion of being clipped
(as seen in my attached image).

If you modify the glClearColor(...) in the sample test code to be something
other than black, then the problem becomes obvious.  If then you add a
glClear(GL_COLOR_BUFFER_BIT) after the call to glViewport(...), then the
problem goes away.

The question is, is it the responsibility of the GL client program to make a
call to glClear after executing glViewport, or is Mesa supposed to handle this
for you?

The EFL developers indicate that this is only a problem on Intel platforms.

-- 
You are receiving this mail because:
You are the assignee for the bug.
___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
http://lists.freedesktop.org/mailman/listinfo/mesa-dev


[Mesa-dev] [Bug 68296] Using old viewport value after a window resize (content is clipped)

2014-07-18 Thread bugzilla-daemon
https://bugs.freedesktop.org/show_bug.cgi?id=68296

U. Artie Eoff  changed:

   What|Removed |Added

 Status|RESOLVED|REOPENED
 Resolution|WORKSFORME  |---

-- 
You are receiving this mail because:
You are the assignee for the bug.
___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
http://lists.freedesktop.org/mailman/listinfo/mesa-dev


Re: [Mesa-dev] [PATCH] mesa: Correctly use glDrawBuffers for multiple buffers and glDrawBuffer for one buffer.

2014-07-18 Thread Brian Paul

On 07/17/2014 09:21 AM, Pavel Popov wrote:

According to spec (OpenGL 4.0 specification, pages 254-255) we have a different 
bits set
for one buffer and for multiple buffers. For glDrawBuffer we may have up to 
four bits set
but for glDrawBuffers we can only have one bit set.

The _mesa_drawbuffers is called with ctx->Const.MaxDrawBuffers and NULL 
arguments when
_mesa_update_framebuffer or _mesa_update_draw_buffers is called. In this 
situation realization
for glDrawBuffers is used for any number of buffers. Even for one. But 
glDrawBuffer have to be
used for one buffer instead of glDrawBuffers.

Piglit test 'gl30basic' fails with assert with debug Mesa and pass with release
 'main/buffers.c:520: _mesa_drawbuffers: Assertion 
`__builtin_popcount(destMask[buf]) == 1' failed.'
Probably some other tests also can be affected.

Signed-off-by: Pavel Popov 
---
  src/mesa/main/buffers.c | 7 +--
  1 file changed, 5 insertions(+), 2 deletions(-)

diff --git a/src/mesa/main/buffers.c b/src/mesa/main/buffers.c
index b13a7af..a640360 100644
--- a/src/mesa/main/buffers.c
+++ b/src/mesa/main/buffers.c
@@ -480,6 +480,7 @@ _mesa_drawbuffers(struct gl_context *ctx, GLuint n, const 
GLenum *buffers,
 struct gl_framebuffer *fb = ctx->DrawBuffer;
 GLbitfield mask[MAX_DRAW_BUFFERS];
 GLuint buf;
+   GLuint m = n;

 if (!destMask) {
/* compute destMask values now */
@@ -489,15 +490,17 @@ _mesa_drawbuffers(struct gl_context *ctx, GLuint n, const 
GLenum *buffers,
   mask[output] = draw_buffer_enum_to_bitmask(ctx, buffers[output]);
   ASSERT(mask[output] != BAD_MASK);
   mask[output] &= supportedMask;
+ if (mask[output] == 0)
+m--;
}
destMask = mask;
 }

 /*
-* If n==1, destMask[0] may have up to four bits set.
+* If m==1, destMask[0] may have up to four bits set.
  * Otherwise, destMask[x] can only have one bit set.
  */
-   if (n == 1) {
+   if (m == 1) {
GLuint count = 0, destMask0 = destMask[0];
while (destMask0) {
   GLint bufIndex = ffs(destMask0) - 1;



Weird.  I remembered seeing this assertion a few days ago, but now I 
can't reproduce it.  I don't know what might have changed.


In any case, I don't think the patch is quite right.  If the array of 
buffers was something like {GL_NONE, GL_COLOR_ATTACHMENT1, GL_NONE, 
GL_NONE} and n=4 you'd wind up with m=1, but you'd be executing the 
wrong part of the if block.


Instead, I think you'd need to test if (m == 1 && destMask[0]) since 
it's the 0th entry that requires special validation.


And as someone else pointed out, please word-wrap the commit msg to 
70-75 chars.


-Brian

___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
http://lists.freedesktop.org/mailman/listinfo/mesa-dev


[Mesa-dev] [PATCH] gbm: Replace GBM_DRIVERS_PATH with LIBGL_DRIVERS_PATH

2014-07-18 Thread Dylan Baker
Currently mesa searches for two different environment variables,
LIBGL_DRIVERS_PATH and GBM_DRIVERS_PATH. The first is used for search
for DRI drivers in every case except GBM, and the latter is used
exclusively for setting GBM drivers. This patch simplifies things by
having just one variable to set.

Signed-off-by: Dylan Baker 
---
 src/gbm/backends/dri/gbm_dri.c | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/src/gbm/backends/dri/gbm_dri.c b/src/gbm/backends/dri/gbm_dri.c
index 347bc99..9d9d1c4 100644
--- a/src/gbm/backends/dri/gbm_dri.c
+++ b/src/gbm/backends/dri/gbm_dri.c
@@ -212,8 +212,8 @@ dri_load_driver(struct gbm_dri_device *dri)
 
search_paths = NULL;
if (geteuid() == getuid()) {
-  /* don't allow setuid apps to use GBM_DRIVERS_PATH */
-  search_paths = getenv("GBM_DRIVERS_PATH");
+  /* don't allow setuid apps to use LIBGL_DRIVERS_PATH */
+  search_paths = getenv("LIBGL_DRIVERS_PATH");
}
if (search_paths == NULL)
   search_paths = DEFAULT_DRIVER_DIR;
-- 
2.0.0

___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
http://lists.freedesktop.org/mailman/listinfo/mesa-dev


Re: [Mesa-dev] [PATCH 05/12] main: Gather some common format conversion functions into a single format_utils file

2014-07-18 Thread Brian Paul
Can you possibly shorten the subject/1st line of the commit message to 
be 70-75 chars?


On 07/17/2014 12:04 PM, Jason Ekstrand wrote:

This initial commit puts all of the RGB <-> sRGB conversion functions in
one place.

Signed-off-by: Jason Ekstrand 
---
  src/mesa/Makefile.sources|   1 +
  src/mesa/main/format_pack.c  | 102 ++-
  src/mesa/main/format_unpack.c|  69 --
  src/mesa/main/format_unpack.h|   3 --
  src/mesa/main/format_utils.c |  56 +
  src/mesa/main/format_utils.h |  68 ++
  src/mesa/main/texcompress_etc.c  |  20 
  src/mesa/main/texcompress_s3tc.c |  26 +-
  8 files changed, 204 insertions(+), 141 deletions(-)
  create mode 100644 src/mesa/main/format_utils.c
  create mode 100644 src/mesa/main/format_utils.h

diff --git a/src/mesa/Makefile.sources b/src/mesa/Makefile.sources
index f4904fb..a261113 100644
--- a/src/mesa/Makefile.sources
+++ b/src/mesa/Makefile.sources
@@ -48,6 +48,7 @@ MAIN_FILES = \
$(SRCDIR)main/formats.c \
$(SRCDIR)main/format_pack.c \
$(SRCDIR)main/format_unpack.c \
+   $(SRCDIR)main/format_utils.c \
$(SRCDIR)main/framebuffer.c \
$(SRCDIR)main/get.c \
$(SRCDIR)main/genmipmap.c \


src/mesa/SConscript also needs to be updated with format_utils.c


Looks good otherwise.

Reviewed-by: Brian Paul 


diff --git a/src/mesa/main/format_pack.c b/src/mesa/main/format_pack.c
index 6b28592..fb3feb5 100644
--- a/src/mesa/main/format_pack.c
+++ b/src/mesa/main/format_pack.c
@@ -38,6 +38,7 @@

  #include "colormac.h"
  #include "format_pack.h"
+#include "format_utils.h"
  #include "macros.h"
  #include "../../gallium/auxiliary/util/u_format_rgb9e5.h"
  #include "../../gallium/auxiliary/util/u_format_r11g11b10f.h"
@@ -58,39 +59,6 @@ typedef void (*pack_float_rgba_row_func)(GLuint n,
   const GLfloat src[][4], void *dst);


-
-static inline GLfloat
-linear_to_srgb(GLfloat cl)
-{
-   if (cl < 0.0f)
-  return 0.0f;
-   else if (cl < 0.0031308f)
-  return 12.92f * cl;
-   else if (cl < 1.0f)
-  return 1.055f * powf(cl, 0.41666f) - 0.055f;
-   else
-  return 1.0f;
-}
-
-
-static inline GLubyte
-linear_float_to_srgb_ubyte(GLfloat cl)
-{
-   GLubyte res = FLOAT_TO_UBYTE(linear_to_srgb(cl));
-   return res;
-}
-
-
-static inline GLubyte
-linear_ubyte_to_srgb_ubyte(GLubyte cl)
-{
-   GLubyte res = FLOAT_TO_UBYTE(linear_to_srgb(cl / 255.0f));
-   return res;
-}
-
-
-
-
  /*
   * MESA_FORMAT_A8B8G8R8_UNORM
   */
@@ -1043,18 +1011,18 @@ static void
  pack_ubyte_BGR_SRGB8(const GLubyte src[4], void *dst)
  {
 GLubyte *d = ((GLubyte *) dst);
-   d[2] = linear_ubyte_to_srgb_ubyte(src[RCOMP]);
-   d[1] = linear_ubyte_to_srgb_ubyte(src[GCOMP]);
-   d[0] = linear_ubyte_to_srgb_ubyte(src[BCOMP]);
+   d[2] = _mesa_linear_ubyte_to_srgb_ubyte(src[RCOMP]);
+   d[1] = _mesa_linear_ubyte_to_srgb_ubyte(src[GCOMP]);
+   d[0] = _mesa_linear_ubyte_to_srgb_ubyte(src[BCOMP]);
  }

  static void
  pack_float_BGR_SRGB8(const GLfloat src[4], void *dst)
  {
 GLubyte *d = ((GLubyte *) dst);
-   d[2] = linear_float_to_srgb_ubyte(src[RCOMP]);
-   d[1] = linear_float_to_srgb_ubyte(src[GCOMP]);
-   d[0] = linear_float_to_srgb_ubyte(src[BCOMP]);
+   d[2] = FLOAT_TO_UBYTE(_mesa_linear_to_srgb(src[RCOMP]));
+   d[1] = FLOAT_TO_UBYTE(_mesa_linear_to_srgb(src[GCOMP]));
+   d[0] = FLOAT_TO_UBYTE(_mesa_linear_to_srgb(src[BCOMP]));
  }


@@ -1064,9 +1032,9 @@ static void
  pack_ubyte_A8B8G8R8_SRGB(const GLubyte src[4], void *dst)
  {
 GLuint *d = ((GLuint *) dst);
-   GLubyte r = linear_ubyte_to_srgb_ubyte(src[RCOMP]);
-   GLubyte g = linear_ubyte_to_srgb_ubyte(src[GCOMP]);
-   GLubyte b = linear_ubyte_to_srgb_ubyte(src[BCOMP]);
+   GLubyte r = _mesa_linear_ubyte_to_srgb_ubyte(src[RCOMP]);
+   GLubyte g = _mesa_linear_ubyte_to_srgb_ubyte(src[GCOMP]);
+   GLubyte b = _mesa_linear_ubyte_to_srgb_ubyte(src[BCOMP]);
 *d = PACK_COLOR_(r, g, b, src[ACOMP]);
  }

@@ -1075,9 +1043,9 @@ pack_float_A8B8G8R8_SRGB(const GLfloat src[4], void *dst)
  {
 GLuint *d = ((GLuint *) dst);
 GLubyte r, g, b, a;
-   r = linear_float_to_srgb_ubyte(src[RCOMP]);
-   g = linear_float_to_srgb_ubyte(src[GCOMP]);
-   b = linear_float_to_srgb_ubyte(src[BCOMP]);
+   r = FLOAT_TO_UBYTE(_mesa_linear_to_srgb(src[RCOMP]));
+   g = FLOAT_TO_UBYTE(_mesa_linear_to_srgb(src[GCOMP]));
+   b = FLOAT_TO_UBYTE(_mesa_linear_to_srgb(src[BCOMP]));
 UNCLAMPED_FLOAT_TO_UBYTE(a, src[ACOMP]);
 *d = PACK_COLOR_(r, g, b, a);
  }
@@ -1089,9 +1057,9 @@ static void
  pack_ubyte_B8G8R8A8_SRGB(const GLubyte src[4], void *dst)
  {
 GLuint *d = ((GLuint *) dst);
-   GLubyte r = linear_ubyte_to_srgb_ubyte(src[RCOMP]);
-   GLubyte g = linear_ubyte_to_srgb_ubyte(src[GCOMP]);
-   GLubyte b = linear_ubyte_to_srgb_ubyte(src[BCOMP]);
+   GLubyte r = _mesa_linear_ubyte_to_srgb_ubyte(src[RCOMP]);
+   GLubyte

Re: [Mesa-dev] [PATCH 08/12] main/texstore: Split texture storage into three functions: texstore_depth_stencil, texstore_compressed, and texstore_rgba

2014-07-18 Thread Brian Paul

The first line of the commit msg should be shorter.

Why do you want to do this split?  The commit message doesn't say.

-Brian


On 07/17/2014 12:04 PM, Jason Ekstrand wrote:

Signed-off-by: Jason Ekstrand 
---
  src/mesa/main/texstore.c | 171 +++
  1 file changed, 100 insertions(+), 71 deletions(-)

diff --git a/src/mesa/main/texstore.c b/src/mesa/main/texstore.c
index d363f9f..e1f2284 100644
--- a/src/mesa/main/texstore.c
+++ b/src/mesa/main/texstore.c
@@ -3540,35 +3540,95 @@ _mesa_texstore_abgr2101010(TEXSTORE_PARAMS)
 return GL_TRUE;
  }

+
  static GLboolean
-_mesa_texstore_null(TEXSTORE_PARAMS)
+texstore_depth_stencil(TEXSTORE_PARAMS)
  {
-   (void) ctx; (void) dims;
-   (void) baseInternalFormat;
-   (void) dstFormat;
-   (void) dstRowStride; (void) dstSlices,
-   (void) srcWidth; (void) srcHeight; (void) srcDepth;
-   (void) srcFormat; (void) srcType;
-   (void) srcAddr;
-   (void) srcPacking;
-
-   /* should never happen */
-   _mesa_problem(NULL, "_mesa_texstore_null() is called");
-   return GL_FALSE;
+   static StoreTexImageFunc table[MESA_FORMAT_COUNT];
+   static GLboolean initialized = GL_FALSE;
+
+   if (!initialized) {
+  memset(table, 0, sizeof table);
+
+  table[MESA_FORMAT_S8_UINT_Z24_UNORM] = _mesa_texstore_z24_s8;
+  table[MESA_FORMAT_Z24_UNORM_S8_UINT] = _mesa_texstore_s8_z24;
+  table[MESA_FORMAT_Z_UNORM16] = _mesa_texstore_z16;
+  table[MESA_FORMAT_Z24_UNORM_X8_UINT] = _mesa_texstore_x8_z24;
+  table[MESA_FORMAT_X8_UINT_Z24_UNORM] = _mesa_texstore_z24_x8;
+  table[MESA_FORMAT_Z_UNORM32] = _mesa_texstore_z32;
+  table[MESA_FORMAT_S_UINT8] = _mesa_texstore_s8;
+  table[MESA_FORMAT_Z_FLOAT32] = _mesa_texstore_z32;
+  table[MESA_FORMAT_Z32_FLOAT_S8X24_UINT] = _mesa_texstore_z32f_x24s8;
+
+  initialized = GL_TRUE;
+   }
+
+   ASSERT(table[dstFormat]);
+   return table[dstFormat](ctx, dims, baseInternalFormat,
+   dstFormat, dstRowStride, dstSlices,
+   srcWidth, srcHeight, srcDepth,
+   srcFormat, srcType, srcAddr, srcPacking);
  }

+static GLboolean
+texstore_compressed(TEXSTORE_PARAMS)
+{
+   static StoreTexImageFunc table[MESA_FORMAT_COUNT];
+   static GLboolean initialized = GL_FALSE;
+
+   if (!initialized) {
+  memset(table, 0, sizeof table);

-/**
- * Return the StoreTexImageFunc pointer to store an image in the given format.
- */
-static StoreTexImageFunc
-_mesa_get_texstore_func(mesa_format format)
+  table[MESA_FORMAT_SRGB_DXT1] = _mesa_texstore_rgb_dxt1;
+  table[MESA_FORMAT_SRGBA_DXT1] = _mesa_texstore_rgba_dxt1;
+  table[MESA_FORMAT_SRGBA_DXT3] = _mesa_texstore_rgba_dxt3;
+  table[MESA_FORMAT_SRGBA_DXT5] = _mesa_texstore_rgba_dxt5;
+  table[MESA_FORMAT_RGB_FXT1] = _mesa_texstore_rgb_fxt1;
+  table[MESA_FORMAT_RGBA_FXT1] = _mesa_texstore_rgba_fxt1;
+  table[MESA_FORMAT_RGB_DXT1] = _mesa_texstore_rgb_dxt1;
+  table[MESA_FORMAT_RGBA_DXT1] = _mesa_texstore_rgba_dxt1;
+  table[MESA_FORMAT_RGBA_DXT3] = _mesa_texstore_rgba_dxt3;
+  table[MESA_FORMAT_RGBA_DXT5] = _mesa_texstore_rgba_dxt5;
+  table[MESA_FORMAT_R_RGTC1_UNORM] = _mesa_texstore_red_rgtc1;
+  table[MESA_FORMAT_R_RGTC1_SNORM] = _mesa_texstore_signed_red_rgtc1;
+  table[MESA_FORMAT_RG_RGTC2_UNORM] = _mesa_texstore_rg_rgtc2;
+  table[MESA_FORMAT_RG_RGTC2_SNORM] = _mesa_texstore_signed_rg_rgtc2;
+  table[MESA_FORMAT_L_LATC1_UNORM] = _mesa_texstore_red_rgtc1;
+  table[MESA_FORMAT_L_LATC1_SNORM] = _mesa_texstore_signed_red_rgtc1;
+  table[MESA_FORMAT_LA_LATC2_UNORM] = _mesa_texstore_rg_rgtc2;
+  table[MESA_FORMAT_LA_LATC2_SNORM] = _mesa_texstore_signed_rg_rgtc2;
+  table[MESA_FORMAT_ETC1_RGB8] = _mesa_texstore_etc1_rgb8;
+  table[MESA_FORMAT_ETC2_RGB8] = _mesa_texstore_etc2_rgb8;
+  table[MESA_FORMAT_ETC2_SRGB8] = _mesa_texstore_etc2_srgb8;
+  table[MESA_FORMAT_ETC2_RGBA8_EAC] = _mesa_texstore_etc2_rgba8_eac;
+  table[MESA_FORMAT_ETC2_SRGB8_ALPHA8_EAC] = 
_mesa_texstore_etc2_srgb8_alpha8_eac;
+  table[MESA_FORMAT_ETC2_R11_EAC] = _mesa_texstore_etc2_r11_eac;
+  table[MESA_FORMAT_ETC2_RG11_EAC] = _mesa_texstore_etc2_rg11_eac;
+  table[MESA_FORMAT_ETC2_SIGNED_R11_EAC] = 
_mesa_texstore_etc2_signed_r11_eac;
+  table[MESA_FORMAT_ETC2_SIGNED_RG11_EAC] = 
_mesa_texstore_etc2_signed_rg11_eac;
+  table[MESA_FORMAT_ETC2_RGB8_PUNCHTHROUGH_ALPHA1] =
+ _mesa_texstore_etc2_rgb8_punchthrough_alpha1;
+  table[MESA_FORMAT_ETC2_SRGB8_PUNCHTHROUGH_ALPHA1] =
+ _mesa_texstore_etc2_srgb8_punchthrough_alpha1;
+
+  initialized = GL_TRUE;
+   }
+
+   ASSERT(table[dstFormat]);
+   return table[dstFormat](ctx, dims, baseInternalFormat,
+   dstFormat, dstRowStride, dstSlices,
+   srcWidth, srcHeight, srcDepth,
+   srcFormat, srcType, srcAddr, srcPacking);
+}
+
+static GLboolean
+tex

Re: [Mesa-dev] [PATCH 10/12] main/format_pack: Fix a wrong datatype in pack_ubyte_R8G8_UNORM

2014-07-18 Thread Brian Paul

On 07/17/2014 12:04 PM, Jason Ekstrand wrote:

Before it was only storing one of the color components due to truncation.
With this patch it now properly stores all of them.

Signed-off-by: Jason Ekstrand 
---
  src/mesa/main/format_pack.c | 2 +-
  1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/src/mesa/main/format_pack.c b/src/mesa/main/format_pack.c
index fb3feb5..4b52405 100644
--- a/src/mesa/main/format_pack.c
+++ b/src/mesa/main/format_pack.c
@@ -856,7 +856,7 @@ pack_float_R_UNORM8(const GLfloat src[4], void *dst)
  static void
  pack_ubyte_R8G8_UNORM(const GLubyte src[4], void *dst)
  {
-   GLubyte *d = ((GLubyte *) dst);
+   GLushort *d = ((GLushort *) dst);
 *d = PACK_COLOR_88(src[GCOMP], src[RCOMP]);
  }




Reviewed-by: Brian Paul 

Should probably be tagged for the stable branch too.


___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
http://lists.freedesktop.org/mailman/listinfo/mesa-dev


Re: [Mesa-dev] [PATCH 06/12] main/format_utils: Add a general format conversion function

2014-07-18 Thread Brian Paul

On 07/17/2014 12:04 PM, Jason Ekstrand wrote:

Most format conversion operations required by GL can be performed by
converting one channel at a time, shuffling the channels around, and
optionally filling missing channels with zeros and ones.  This adds a
function to do just that in a general, yet efficient, way.

Signed-off-by: Jason Ekstrand 
---
  src/mesa/main/format_utils.c | 566 +++
  src/mesa/main/format_utils.h |  18 ++
  2 files changed, 584 insertions(+)

diff --git a/src/mesa/main/format_utils.c b/src/mesa/main/format_utils.c
index 241c158..0cb3eae 100644
--- a/src/mesa/main/format_utils.c
+++ b/src/mesa/main/format_utils.c
@@ -54,3 +54,569 @@ _mesa_srgb_ubyte_to_linear_float(uint8_t cl)

 return lut[cl];
  }
+
+static bool
+swizzle_convert_try_memcpy(void *dst, GLenum dst_type, int num_dst_channels,
+   const void *src, GLenum src_type, int 
num_src_channels,
+   const uint8_t swizzle[4], bool normalized, int 
count)


Please add a comment on this function describing the parameters and what 
the return value means.




+{
+   int i;
+
+   if (src_type != dst_type)
+  return false;
+   if (num_src_channels != num_dst_channels)
+  return false;
+
+   for (i = 0; i < num_dst_channels; ++i)
+  if (swizzle[i] != i && swizzle[i] != MESA_FORMAT_SWIZZLE_NONE)
+ return false;
+
+   memcpy(dst, src, count * num_src_channels * _mesa_sizeof_type(src_type));
+
+   return true;
+}
+
+/* Note: This loop is carefully crafted for performance.  Be careful when
+ * changing it and run some benchmarks to ensure no performance regressions
+ * if you do.
+ */


Comments for the macro's parameters might be nice.  And a comment saying 
what the macro actually does.




+#define SWIZZLE_CONVERT_LOOP(DST_TYPE, SRC_TYPE, CONV)   \
+   do {  \
+  const SRC_TYPE *typed_src = void_src;  \
+  DST_TYPE *typed_dst = void_dst;\
+  DST_TYPE tmp[7];   \
+  tmp[4] = 0;\
+  tmp[5] = one;  \
+  for (s = 0; s < count; ++s) {  \
+ for (j = 0; j < num_src_channels; ++j) {\
+SRC_TYPE src = typed_src[j]; \
+tmp[j] = CONV;   \
+ }   \
+ \
+ typed_dst[0] = tmp[swizzle_x];  \
+ if (num_dst_channels > 1) { \
+typed_dst[1] = tmp[swizzle_y];   \
+if (num_dst_channels > 2) {  \
+   typed_dst[2] = tmp[swizzle_z];\
+   if (num_dst_channels > 3) {   \
+  typed_dst[3] = tmp[swizzle_w]; \
+   } \
+}\
+ }   \


In other places in Mesa we do that sort of thing with a switch statement 
with fall-throughs.  That might be even more efficient.  In the common 
case, there's 4 channels so you're always doing 3 ifs.  An optimized 
switch could be one computed jump.




+ typed_src += num_src_channels;  \
+ typed_dst += num_dst_channels;  \
+  }  \
+   } while (0);
+
+/**
+ * Convert between array-based color formats.
+ *
+ * Most format conversion operations required by GL can be performed by
+ * converting one channel at a time, shuffling the channels around, and
+ * optionally filling missing channels with zeros and ones.  This function
+ * does just that in a general, yet efficient, way.
+ *
+ * Most of the parameters are self-explanitory.  The swizzle parameter is


explanatory



+ * an array of 4 numbers (see _mesa_get_format_swizzle) that describes
+ * where each channel in the destination should come from in the source.
+ *
+ * Under most circumstances, the source and destination images must be
+ * different as no care is taken not to clobber one with the other.
+ * However, if they have the same number of bits per pixel, it is safe to
+ * do an in-place conversion.


Please document the function parameters too.



+ */
+void
+_mesa_swizzle_and_convert(void *void_dst, GLenum dst_type, int 
num_dst_channels,
+  const void *void_src, GLenum src_type, int 
num_src_channels,
+  const uint8_t swizzle[4], bool normalized, int count)
+{
+   int s, j;
+   register uint8_t swizzle_x, swizzle_y, swizzle_z, swizzle_w;
+
+   if (swizzle_convert_try_memcpy(void_dst, dst_type, num_dst_channels,
+  void_src, src_type, num_src_channel

Re: [Mesa-dev] [PATCH 07/12] main/format_utils: Add a function for determining if a format is actually an array format and computing the array format parameters

2014-07-18 Thread Brian Paul

Again, shorten the 1st line, please.


On 07/17/2014 12:04 PM, Jason Ekstrand wrote:

This is a direct helper function for using _mesa_swizzle_and_convert

Signed-off-by: Jason Ekstrand 
---
  src/mesa/main/format_utils.c | 93 
  src/mesa/main/format_utils.h |  4 ++
  2 files changed, 97 insertions(+)

diff --git a/src/mesa/main/format_utils.c b/src/mesa/main/format_utils.c
index 0cb3eae..b9c7a54 100644
--- a/src/mesa/main/format_utils.c
+++ b/src/mesa/main/format_utils.c
@@ -55,6 +55,99 @@ _mesa_srgb_ubyte_to_linear_float(uint8_t cl)
 return lut[cl];
  }

+static const uint8_t map_identity[7] = { 0, 1, 2, 3, 4, 5, 6 };
+static const uint8_t map_3210[7] = { 3, 2, 1, 0, 4, 5, 6 };
+static const uint8_t map_1032[7] = { 1, 0, 3, 2, 4, 5, 6 };
+
+/**
+ * A helper function for figuring out if a (possibly packed) format is
+ * actually an array format and how to work with it.  If the format can not
+ * be used as an array format, thus function returns false.


Please also document the returned parameters.



+ */
+bool
+_mesa_format_to_array(mesa_format format, GLenum *type, int *num_components,
+  uint8_t swizzle[4], bool *normalized)
+{
+   int i;
+   GLuint format_components;
+   uint8_t packed_swizzle[4];
+   const uint8_t *endian;
+
+   if (_mesa_is_format_compressed(format))
+  return false;
+
+   *normalized = !_mesa_is_format_integer(format);
+
+   _mesa_format_to_type_and_comps(format, type, &format_components);


Maybe insert a blank line here.



+   switch (_mesa_get_format_layout(format)) {
+   case MESA_FORMAT_LAYOUT_ARRAY:
+  *num_components = format_components;
+  _mesa_get_format_swizzle(format, swizzle);
+  return true;
+   case MESA_FORMAT_LAYOUT_PACKED:
+  switch (*type) {
+  case GL_UNSIGNED_BYTE:
+  case GL_BYTE:
+ if (_mesa_get_format_max_bits(format) != 8)
+return false;
+ *num_components = _mesa_get_format_bytes(format);
+ switch (*num_components) {
+ case 1:
+endian = map_identity;
+break;
+ case 2:
+endian = _mesa_little_endian() ? map_identity : map_1032;
+break;
+ case 4:
+endian = _mesa_little_endian() ? map_identity : map_3210;
+break;
+ default:
+assert(!"Invalid number of components");
+ }
+ break;
+  case GL_UNSIGNED_SHORT:
+  case GL_SHORT:
+  case GL_HALF_FLOAT:
+ if (_mesa_get_format_max_bits(format) != 16)
+return false;
+ *num_components = _mesa_get_format_bytes(format) / 2;
+ switch (*num_components) {
+ case 1:
+endian = map_identity;
+break;
+ case 2:
+endian = _mesa_little_endian() ? map_identity : map_1032;
+break;
+ default:
+assert(!"Invalid number of components");
+ }
+ break;
+  case GL_UNSIGNED_INT:
+  case GL_INT:
+  case GL_FLOAT:
+ /* This isn't packed.  At least not really. */
+ assert(format_components == 1);
+ if (_mesa_get_format_max_bits(format) != 32)
+return false;
+ *num_components = format_components;
+ endian = map_identity;
+ break;
+  default:
+ return false;
+  }
+
+  _mesa_get_format_swizzle(format, packed_swizzle);
+
+  for (i = 0; i < 4; ++i)
+ swizzle[i] = endian[packed_swizzle[i]];
+
+  return true;
+   case MESA_FORMAT_LAYOUT_OTHER:
+   default:
+  return false;
+   }
+}
+
  static bool
  swizzle_convert_try_memcpy(void *dst, GLenum dst_type, int num_dst_channels,
 const void *src, GLenum src_type, int 
num_src_channels,
diff --git a/src/mesa/main/format_utils.h b/src/mesa/main/format_utils.h
index c5dab7b..990c3f2 100644
--- a/src/mesa/main/format_utils.h
+++ b/src/mesa/main/format_utils.h
@@ -78,6 +78,10 @@ _mesa_srgb_to_linear(float cs)

  float _mesa_srgb_ubyte_to_linear_float(uint8_t cl);

+bool
+_mesa_format_to_array(mesa_format, GLenum *type, int *num_components,
+  uint8_t swizzle[4], bool *normalized);
+
  void
  _mesa_swizzle_and_convert(void *dst, GLenum dst_type, int num_dst_channels,
const void *src, GLenum src_type, int 
num_src_channels,



___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
http://lists.freedesktop.org/mailman/listinfo/mesa-dev


Re: [Mesa-dev] [PATCH 09/12] main/texstore: Use _mesa_swizzle_and_convert when possible

2014-07-18 Thread Brian Paul

On 07/17/2014 12:04 PM, Jason Ekstrand wrote:

This should be both faster and more accurate than our general slow-path of
converting everything to float.

Signed-off-by: Jason Ekstrand 
---
  src/mesa/main/texstore.c | 179 +++
  1 file changed, 164 insertions(+), 15 deletions(-)

diff --git a/src/mesa/main/texstore.c b/src/mesa/main/texstore.c
index e1f2284..13fb3a8 100644
--- a/src/mesa/main/texstore.c
+++ b/src/mesa/main/texstore.c
@@ -55,6 +55,7 @@
  #include "bufferobj.h"
  #include "colormac.h"
  #include "format_pack.h"
+#include "format_utils.h"
  #include "image.h"
  #include "macros.h"
  #include "mipmap.h"
@@ -233,21 +234,44 @@ static int
  get_map_idx(GLenum value)
  {
 switch (value) {
-   case GL_LUMINANCE: return IDX_LUMINANCE;
-   case GL_ALPHA: return IDX_ALPHA;
-   case GL_INTENSITY: return IDX_INTENSITY;
-   case GL_LUMINANCE_ALPHA: return IDX_LUMINANCE_ALPHA;
-   case GL_RGB: return IDX_RGB;
-   case GL_RGBA: return IDX_RGBA;
-   case GL_RED: return IDX_RED;
-   case GL_GREEN: return IDX_GREEN;
-   case GL_BLUE: return IDX_BLUE;
-   case GL_BGR: return IDX_BGR;
-   case GL_BGRA: return IDX_BGRA;
-   case GL_ABGR_EXT: return IDX_ABGR;
-   case GL_RG: return IDX_RG;
+   case GL_LUMINANCE:
+   case GL_LUMINANCE_INTEGER_EXT:
+  return IDX_LUMINANCE;
+   case GL_ALPHA:
+   case GL_ALPHA_INTEGER:
+  return IDX_ALPHA;
+   case GL_INTENSITY:
+  return IDX_INTENSITY;
+   case GL_LUMINANCE_ALPHA:
+   case GL_LUMINANCE_ALPHA_INTEGER_EXT:
+  return IDX_LUMINANCE_ALPHA;
+   case GL_RGB:
+   case GL_RGB_INTEGER:
+  return IDX_RGB;
+   case GL_RGBA:
+   case GL_RGBA_INTEGER:
+  return IDX_RGBA;
+   case GL_RED:
+   case GL_RED_INTEGER:
+  return IDX_RED;
+   case GL_GREEN:
+  return IDX_GREEN;
+   case GL_BLUE:
+  return IDX_BLUE;
+   case GL_BGR:
+   case GL_BGR_INTEGER:
+  return IDX_BGR;
+   case GL_BGRA:
+   case GL_BGRA_INTEGER:
+  return IDX_BGRA;
+   case GL_ABGR_EXT:
+  return IDX_ABGR;
+   case GL_RG:
+   case GL_RG_INTEGER:
+  return IDX_RG;
 default:
-  _mesa_problem(NULL, "Unexpected inFormat");
+  _mesa_problem(NULL, "Unexpected inFormat %s",
+_mesa_lookup_enum_by_nr(value));
return 0;
 }
  }
@@ -789,6 +813,7 @@ swizzle_copy(GLubyte *dst, GLuint dstComponents, const 
GLubyte *src,

  static const GLubyte map_identity[6] = { 0, 1, 2, 3, ZERO, ONE };
  static const GLubyte map_3210[6] = { 3, 2, 1, 0, ZERO, ONE };
+static const GLubyte map_1032[6] = { 1, 0, 3, 2, ZERO, ONE };


  /**
@@ -826,6 +851,12 @@ byteswap_mapping( GLboolean swapBytes,
 switch (srcType) {
 case GL_BYTE:
 case GL_UNSIGNED_BYTE:
+   case GL_SHORT:
+   case GL_UNSIGNED_SHORT:
+   case GL_INT:
+   case GL_UNSIGNED_INT:
+   case GL_FLOAT:
+   case GL_HALF_FLOAT:
return map_identity;
 case GL_UNSIGNED_INT_8_8_8_8:
 case GL_UNSIGNED_INT_8_8_8_8_REV:
@@ -3621,6 +3652,117 @@ texstore_compressed(TEXSTORE_PARAMS)
 srcFormat, srcType, srcAddr, srcPacking);
  }

+static void
+invert_swizzle(uint8_t dst[4], const uint8_t src[4])


Please put a comment on this function to explain what it does.  Maybe 
use GLubyte for dst, src since that's the type of the arguments below.





+{
+   int i, j;
+
+   dst[0] = MESA_FORMAT_SWIZZLE_NONE;
+   dst[1] = MESA_FORMAT_SWIZZLE_NONE;
+   dst[2] = MESA_FORMAT_SWIZZLE_NONE;
+   dst[3] = MESA_FORMAT_SWIZZLE_NONE;
+
+   for (i = 0; i < 4; ++i)
+  for (j = 0; j < 4; ++j)
+ if (src[j] == i && dst[i] == MESA_FORMAT_SWIZZLE_NONE)
+dst[i] = j;
+}
+
+static GLboolean
+texstore_swizzle(TEXSTORE_PARAMS)


Commment on this function too, please.



+{
+   const GLint srcRowStride = _mesa_image_row_stride(srcPacking, srcWidth,
+ srcFormat, srcType);
+   const GLint srcImageStride = _mesa_image_image_stride(srcPacking,
+  srcWidth, srcHeight, srcFormat, srcType);
+   const GLubyte *srcImage = (const GLubyte *) _mesa_image_address(dims,
+srcPacking, srcAddr, srcWidth, srcHeight, srcFormat, srcType, 0, 0, 0);
+   const int src_components = _mesa_components_in_format(srcFormat);
+
+   GLubyte swizzle[4], rgba2base[6], base2src[6], rgba2dst[4], dst2rgba[4];
+   const GLubyte *swap;
+   GLenum dst_type;
+   int dst_components;
+   bool is_array, normalized, need_swap;
+   GLint i, img, row;
+   const GLubyte *src_row;
+   GLubyte *dst_row;
+
+   is_array = _mesa_format_to_array(dstFormat, &dst_type, &dst_components,
+rgba2dst, &normalized);
+
+   if (!is_array)
+  return GL_FALSE;
+
+   switch (srcType) {
+   case GL_FLOAT:
+   case GL_UNSIGNED_BYTE:
+   case GL_BYTE:
+   case GL_UNSIGNED_SHORT:
+   case GL_SHORT:
+   case GL_UNSIGNED_INT:
+   case GL_INT:
+  /* If wa have to swap bytes in a multi-byte datatype, that means


"we"



+   * we're not doing 

Re: [Mesa-dev] [PATCH 12/12] mesa/texstore: Add a generic rgba integer texture upload path

2014-07-18 Thread Brian Paul

On 07/17/2014 12:04 PM, Jason Ekstrand wrote:

Again, we delete a lot of functions that aren't really doing anything
interesting anymore.

Signed-off-by: Jason Ekstrand 
---
  src/mesa/main/texstore.c | 545 ++-
  1 file changed, 66 insertions(+), 479 deletions(-)

diff --git a/src/mesa/main/texstore.c b/src/mesa/main/texstore.c
index 31317cb..ae6b286 100644
--- a/src/mesa/main/texstore.c
+++ b/src/mesa/main/texstore.c
@@ -1194,423 +1194,6 @@ _mesa_texstore_s8(TEXSTORE_PARAMS)
  }


-/* non-normalized, signed int8 */
-static GLboolean
-_mesa_texstore_rgba_int8(TEXSTORE_PARAMS)
-{
-   GLenum baseFormat = _mesa_get_format_base_format(dstFormat);
-   GLint components = _mesa_components_in_format(baseFormat);
-
-   /* this forces alpha to 1 in make_temp_uint_image */
-   if (dstFormat == MESA_FORMAT_RGBX_SINT8) {
-  baseFormat = GL_RGBA;
-  components = 4;
-   }
-
-   ASSERT(dstFormat == MESA_FORMAT_R_SINT8 ||
-  dstFormat == MESA_FORMAT_RG_SINT8 ||
-  dstFormat == MESA_FORMAT_RGB_SINT8 ||
-  dstFormat == MESA_FORMAT_RGBA_SINT8 ||
-  dstFormat == MESA_FORMAT_A_SINT8 ||
-  dstFormat == MESA_FORMAT_I_SINT8 ||
-  dstFormat == MESA_FORMAT_L_SINT8 ||
-  dstFormat == MESA_FORMAT_LA_SINT8 ||
-  dstFormat == MESA_FORMAT_RGBX_SINT8);
-   ASSERT(baseInternalFormat == GL_RGBA ||
-  baseInternalFormat == GL_RGB ||
-  baseInternalFormat == GL_RG ||
-  baseInternalFormat == GL_RED ||
-  baseInternalFormat == GL_ALPHA ||
-  baseInternalFormat == GL_LUMINANCE ||
-  baseInternalFormat == GL_LUMINANCE_ALPHA ||
-  baseInternalFormat == GL_INTENSITY);
-   ASSERT(_mesa_get_format_bytes(dstFormat) == components * sizeof(GLbyte));
-
-   {
-  /* general path */
-  const GLuint *tempImage = make_temp_uint_image(ctx, dims,
-baseInternalFormat,
-baseFormat,
-srcWidth, srcHeight, 
srcDepth,
-srcFormat, srcType,
-srcAddr,
-srcPacking);
-  const GLuint *src = tempImage;
-  GLint img, row;
-  GLboolean is_unsigned = _mesa_is_type_unsigned(srcType);
-  if (!tempImage)
- return GL_FALSE;
-  for (img = 0; img < srcDepth; img++) {
- GLubyte *dstRow = dstSlices[img];
- for (row = 0; row < srcHeight; row++) {
-GLbyte *dstTexel = (GLbyte *) dstRow;
-GLint i;
-if (is_unsigned) {
-   for (i = 0; i < srcWidth * components; i++) {
-  dstTexel[i] = (GLbyte) MIN2(src[i], 0x7f);
-   }
-} else {
-   for (i = 0; i < srcWidth * components; i++) {
-  dstTexel[i] = (GLbyte) CLAMP((GLint) src[i], -0x80, 0x7f);
-   }
-}
-dstRow += dstRowStride;
-src += srcWidth * components;
- }
-  }
-
-  free((void *) tempImage);
-   }
-   return GL_TRUE;
-}
-
-
-/* non-normalized, signed int16 */
-static GLboolean
-_mesa_texstore_rgba_int16(TEXSTORE_PARAMS)
-{
-   GLenum baseFormat = _mesa_get_format_base_format(dstFormat);
-   GLint components = _mesa_components_in_format(baseFormat);
-
-   /* this forces alpha to 1 in make_temp_uint_image */
-   if (dstFormat == MESA_FORMAT_RGBX_SINT16) {
-  baseFormat = GL_RGBA;
-  components = 4;
-   }
-
-   ASSERT(dstFormat == MESA_FORMAT_R_SINT16 ||
-  dstFormat == MESA_FORMAT_RG_SINT16 ||
-  dstFormat == MESA_FORMAT_RGB_SINT16 ||
-  dstFormat == MESA_FORMAT_RGBA_SINT16 ||
-  dstFormat == MESA_FORMAT_A_SINT16 ||
-  dstFormat == MESA_FORMAT_L_SINT16 ||
-  dstFormat == MESA_FORMAT_I_SINT16 ||
-  dstFormat == MESA_FORMAT_LA_SINT16 ||
-  dstFormat == MESA_FORMAT_RGBX_SINT16);
-   ASSERT(baseInternalFormat == GL_RGBA ||
-  baseInternalFormat == GL_RGB ||
-  baseInternalFormat == GL_RG ||
-  baseInternalFormat == GL_RED ||
-  baseInternalFormat == GL_ALPHA ||
-  baseInternalFormat == GL_LUMINANCE ||
-  baseInternalFormat == GL_LUMINANCE_ALPHA ||
-  baseInternalFormat == GL_INTENSITY);
-   ASSERT(_mesa_get_format_bytes(dstFormat) == components * sizeof(GLshort));
-
-   {
-  /* general path */
-  const GLuint *tempImage = make_temp_uint_image(ctx, dims,
-baseInternalFormat,
-baseFormat,
-srcWidth, srcHeight, 
srcDepth,
-srcFormat, srcType,
-srcAddr,
-   

[Mesa-dev] [Bug 81500] wrong color in flightgear for the c172p if "Atmospheric light scattering" is used

2014-07-18 Thread bugzilla-daemon
https://bugs.freedesktop.org/show_bug.cgi?id=81500

--- Comment #1 from Barto  ---
Created attachment 103046
  --> https://bugs.freedesktop.org/attachment.cgi?id=103046&action=edit
wrong color for the c172p plane, it's green instead of grey

-- 
You are receiving this mail because:
You are the assignee for the bug.
___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
http://lists.freedesktop.org/mailman/listinfo/mesa-dev


[Mesa-dev] [Bug 81500] wrong color in flightgear for the c172p if "Atmospheric light scattering" is used

2014-07-18 Thread bugzilla-daemon
https://bugs.freedesktop.org/show_bug.cgi?id=81500

Kai  changed:

   What|Removed |Added

 Attachment #103045|text/plain  |image/png
  mime type||

-- 
You are receiving this mail because:
You are the assignee for the bug.
___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
http://lists.freedesktop.org/mailman/listinfo/mesa-dev


[Mesa-dev] [Bug 81500] wrong color in flightgear for the c172p if "Atmospheric light scattering" is used

2014-07-18 Thread bugzilla-daemon
https://bugs.freedesktop.org/show_bug.cgi?id=81500

--- Comment #2 from Barto  ---
Created attachment 103047
  --> https://bugs.freedesktop.org/attachment.cgi?id=103047&action=edit
glxinfo for ati radeon hd4650 pcie

-- 
You are receiving this mail because:
You are the assignee for the bug.
___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
http://lists.freedesktop.org/mailman/listinfo/mesa-dev


Re: [Mesa-dev] [PATCH 00/12] Rework texture upload code

2014-07-18 Thread Brian Paul

On 07/17/2014 12:04 PM, Jason Ekstrand wrote:

This is the first installment of some work I've been doing over the past
couple of weeks to refactor mesa's texture conversion/storage code.  There
is more to be done and more that I have done but have not included in this
series.  This is the first mailing-list-ready fruits of my efforts.  The
important bits here include:

  1) Using a human-readable CSV file to describe texture formats similar to
 the way it is currently don in gallium.  This is much easier to
 read/edit than the structure in formats.c.  The guts of formats.c is
 then autogenerated from this CSV file.


I'm kind of on the fence about this.  Some of us have been hoping that 
we'd eventually consolidate some of the Mesa and gallium code so we 
wouldn't have duplicated/parallel code.  The format code is a good 
example.  In theory, the MESA_FORMAT_ stuff could be replaced by the 
gallium PIPE_FORMAT_ code.


I kind of like the hand-written format code since it's simple to 
understand and update.  Now you've got 600 lines of (uncommented) Python 
code to figure out and maintain.  When something breaks it's important 
that the code is easily maintainable if you're not around.





  2) Adding a very generic yet efficient _mesa_swizzle_and_convert function
 that is capable of performing the vast majority of texture format
 conversions in one function.  It has also been fairly carefully tuned
 to be even faster than the _mesa_swizzle_ubyte_image special-case that
 we had before for ubyte textures only it also works on the other
 datatypes and can even do type conversions as it swizzles.

  3) Refactoring of texstore.c including the use of the above
 _mesa_swizzle_and_convert function along with the already-existing
 packing functions to remove a lot of hand-written special-case code.


This part generally looks good.  Comments already posted.



Thanks to the format CSV file, there's even more that we can now do.
Things I hope to accomplish in the future include:

  1) Autogenerate the bulk of main/format_pack.c, main/format_unpack.c, and
 main/pack.c from CSV files.  There's some refactoring that will be
 required first, but it shouldn't be that hard and I already have the
 python code to do the generation; it's just not part of this patch
 series.


Again, this kind of thing was already done for gallium.  It's kind of 
sad to effectively duplicate it.


I'm interested in hearing other opinions.

-Brian



  2) Find a general way to do depth-stencil formats.  I'm a bit dubious as
 to whether or not this will turn out to be practical, but I haven't had
 a chance to look into it too much yet.
  2) Do similar refactors for GetTexImage, ReadPixels, and DrawPixels.

Happy Reviewing!
--Jason Ekstrand




___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
http://lists.freedesktop.org/mailman/listinfo/mesa-dev


[Mesa-dev] [Bug 81500] wrong color in flightgear for the c172p if "Atmospheric light scattering" is used

2014-07-18 Thread bugzilla-daemon
https://bugs.freedesktop.org/show_bug.cgi?id=81500

--- Comment #3 from Kai  ---
(In reply to comment #0)
> I can do an apitrace if you want it,

I'd guess doing a git bisect would be more helpful, since this sounds like a
regression.

-- 
You are receiving this mail because:
You are the assignee for the bug.
___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
http://lists.freedesktop.org/mailman/listinfo/mesa-dev


[Mesa-dev] [Bug 81500] New: wrong color in flightgear for the c172p if "Atmospheric light scattering" is used

2014-07-18 Thread bugzilla-daemon
https://bugs.freedesktop.org/show_bug.cgi?id=81500

  Priority: medium
Bug ID: 81500
  Assignee: mesa-dev@lists.freedesktop.org
   Summary: wrong color in flightgear for the c172p if
"Atmospheric light scattering" is used
  Severity: normal
Classification: Unclassified
OS: All
  Reporter: mister.free...@laposte.net
  Hardware: Other
Status: NEW
   Version: 10.2
 Component: Mesa core
   Product: Mesa

Created attachment 103045
  --> https://bugs.freedesktop.org/attachment.cgi?id=103045&action=edit
wrong color for the c172p plane, it's green instead of grey

I use mesa 10.2.3 with archlinux 64 bits,

I have an ati radeon HD4650 Pcie graphic card ( with the open source driver
radeon ), 4 Gb ram,

I notice a bug if I use flightgear 3.0.0 and mesa 10.2.3 :

- if I select the "cessna c172p" aircraft and If I use the "Atmospheric light
scattering" option ( in render options from flightgear ) the color of the c172p
is wrong, it's green instead of grey,

you can see this with the screenshot,

I can do an apitrace if you want it,

If I use an old version of mesa ( like 10.0.3 ) the bug is gone

-- 
You are receiving this mail because:
You are the assignee for the bug.
___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
http://lists.freedesktop.org/mailman/listinfo/mesa-dev


Re: [Mesa-dev] [PATCH 0/5] radeon: Write-combined CPU mappings of BOs in GTT

2014-07-18 Thread Christian König

Am 18.07.2014 05:07, schrieb Michel Dänzer:

[PATCH 5/5] drm/radeon: Use VRAM for indirect buffers on >= SI

I'm still not very keen with this change since I still don't understand
the reason why it's faster than with GTT. Definitely needs more testing
on a wider range of systems.

Sure. If anyone wants to give this patch a spin and see if they can
measure any performance difference, good or bad, that would be interesting.


Maybe limit it to APUs for now?

But IIRC, CPU writes to VRAM vs. write-combined GTT are actually an even
bigger win with dedicated GPUs than with the Kaveri built-in GPU on my
system. I suspect it may depend on the bandwidth available for PCIe vs.
system memory though.


I've made a few tests today with the kernel part of the patches running 
Xonotic on Ultra in 1920 x 1080.


Without any patches I get around ~47.0fps on average with my dedicated 
HD7870.


Adding only "drm/radeon: Use write-combined CPU mappings of rings and 
IBs on >= SI" and that goes down to ~45.3fps.


Adding on to off that "drm/radeon: Use VRAM for indirect buffers on >= 
SI" and the frame rate goes down to ~27.74fps.


So enabling this unconditionally is definitely not a good idea. What I 
don't understand yet is why using USWC reduces the fps on SI as well. It 
looks like the reads from the IB buffer for command stream validation on 
SI affect that more than thought.


Christian.
___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
http://lists.freedesktop.org/mailman/listinfo/mesa-dev


Re: [Mesa-dev] [PATCH 10/12] main/format_pack: Fix a wrong datatype in pack_ubyte_R8G8_UNORM

2014-07-18 Thread Jason Ekstrand
On Fri, Jul 18, 2014 at 7:59 AM, Brian Paul  wrote:

> On 07/17/2014 12:04 PM, Jason Ekstrand wrote:
>
>> Before it was only storing one of the color components due to truncation.
>> With this patch it now properly stores all of them.
>>
>> Signed-off-by: Jason Ekstrand 
>> ---
>>   src/mesa/main/format_pack.c | 2 +-
>>   1 file changed, 1 insertion(+), 1 deletion(-)
>>
>> diff --git a/src/mesa/main/format_pack.c b/src/mesa/main/format_pack.c
>> index fb3feb5..4b52405 100644
>> --- a/src/mesa/main/format_pack.c
>> +++ b/src/mesa/main/format_pack.c
>> @@ -856,7 +856,7 @@ pack_float_R_UNORM8(const GLfloat src[4], void *dst)
>>   static void
>>   pack_ubyte_R8G8_UNORM(const GLubyte src[4], void *dst)
>>   {
>> -   GLubyte *d = ((GLubyte *) dst);
>> +   GLushort *d = ((GLushort *) dst);
>>  *d = PACK_COLOR_88(src[GCOMP], src[RCOMP]);
>>   }
>>
>>
>>
> Reviewed-by: Brian Paul 
>
> Should probably be tagged for the stable branch too.
>

Yeah, I've already tweaked the commit message locally to CC mesa-stable.


>
>
> ___
> mesa-dev mailing list
> mesa-dev@lists.freedesktop.org
> http://lists.freedesktop.org/mailman/listinfo/mesa-dev
>
___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
http://lists.freedesktop.org/mailman/listinfo/mesa-dev


[Mesa-dev] [Bug 81500] wrong color in flightgear for the c172p if "Atmospheric light scattering" is used

2014-07-18 Thread bugzilla-daemon
https://bugs.freedesktop.org/show_bug.cgi?id=81500

--- Comment #4 from Barto  ---
I did an apitrace, I will post the link for download the file,

I will do a bisect also,

after tests I notice that it's probably a bug with the code related to the r600
driver in mesa,

because if I run flightgear ( and the trace generated by apitrace ) without
hardware acceleration ( "export LIBGL_ALWAYS_SOFTWARE=1" ) the bug is gone,

so it's seems that the r600 driver in mesa 10.2.3 has a problem, maybe a
regression

-- 
You are receiving this mail because:
You are the assignee for the bug.
___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
http://lists.freedesktop.org/mailman/listinfo/mesa-dev


Re: [Mesa-dev] [PATCH 0/5] radeon: Write-combined CPU mappings of BOs in GTT

2014-07-18 Thread Dieter Nützel

Am 18.07.2014 05:07, schrieb Michel Dänzer:

On 17.07.2014 19:09, Christian König wrote:

Am 17.07.2014 12:01, schrieb Michel Dänzer:

In order to try and improve X(Shm)PutImage performance with glamor, I
implemented support for write-combined CPU mappings of BOs in GTT.

This did provide a nice speedup, but to my surprise, using VRAM 
instead

of write-combined GTT turned out to be even faster in general on my
Kaveri machine, both for the internal GPU and for discrete GPUs.

However, I've kept the changes from GTT to VRAM separated, in case 
this

turns out to be a loss on other setups.

Kernel patches:

[PATCH 1/5] drm/radeon: Remove radeon_gart_restore()
[PATCH 2/5] drm/radeon: Pass GART page flags to
[PATCH 3/5] drm/radeon: Allow write-combined CPU mappings of BOs in
[PATCH 4/5] drm/radeon: Use write-combined CPU mappings of rings and


Those four are Reviewed-by: Christian König 


Thanks!



[PATCH 5/5] drm/radeon: Use VRAM for indirect buffers on >= SI


I'm still not very keen with this change since I still don't 
understand
the reason why it's faster than with GTT. Definitely needs more 
testing

on a wider range of systems.


Sure. If anyone wants to give this patch a spin and see if they can
measure any performance difference, good or bad, that would be 
interesting.



Maybe limit it to APUs for now?


But IIRC, CPU writes to VRAM vs. write-combined GTT are actually an 
even

bigger win with dedicated GPUs than with the Kaveri built-in GPU on my
system. I suspect it may depend on the bandwidth available for PCIe vs.
system memory though.


Michel,

please, please do NOT change anything on this!;-)
You all know that I currently can only run this on my poor Duron 1800 
with RV730 (AGP), but...


With this all 'objview' demos (mesa-demos) run at 60 fps (vsync),
even with chip set/CPU power management enabled (athcool on).

If I set vblank_mode=0
the slowest GreatLakesBiplaneHP.obj
run at ~100 fps (~16 fps before) => 6x speedup.
(Even 5 planes run at 30 fps) - Wow!!!

'buddha' went from ~40 fps up to ~175 fps
'bunny' went from ~60 fps up to ~215 fps
'bobcat' show not such a big improvement 'only' 70 fps more

R600_HYPERZ=1
help somewhat, too but not in all cases.

Overall X/Kwin eXperience is much better.
Let me know which benchmarks you need.

Cheers,
  Dieter

BTW Do anyone know how I can override BIOS GTT settings?
I can only set 256 MB max. - BIOS patching?
With agpmode=-1 I can run with 1024 MB GTT
___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
http://lists.freedesktop.org/mailman/listinfo/mesa-dev


Re: [Mesa-dev] [PATCH 09/14] i965/gen8: Use aux buf qpitch for Auxiliary Buffer (MCS)

2014-07-18 Thread Jordan Justen
On Fri, Jul 18, 2014 at 2:24 AM, Pohjolainen, Topi
 wrote:
> On Tue, Jul 15, 2014 at 06:32:17PM -0700, Jordan Justen wrote:
>> For hiz, the qpitch may be different than the main miptree.
>
> s/hiz/aux/ ?

The reason the change is needed is hiz. I could reword it like this,
which might be better:

"For some auxiliary buffers the qpitch may be different than the main
miptree. (for example, hiz)"

Is that wording better?

-Jordan

>>
>> In "i965: Wrap MCS miptree in intel_miptree_aux_buffer" we set
>> aux_buf->qpitch to mt->qpitch, so for MCS, this should be a no-op.
>>
>> Signed-off-by: Jordan Justen 
>> ---
>>  src/mesa/drivers/dri/i965/gen8_surface_state.c | 4 ++--
>>  1 file changed, 2 insertions(+), 2 deletions(-)
>>
>> diff --git a/src/mesa/drivers/dri/i965/gen8_surface_state.c 
>> b/src/mesa/drivers/dri/i965/gen8_surface_state.c
>> index 87f0d49..3d97232 100644
>> --- a/src/mesa/drivers/dri/i965/gen8_surface_state.c
>> +++ b/src/mesa/drivers/dri/i965/gen8_surface_state.c
>> @@ -203,7 +203,7 @@ gen8_update_texture_surface(struct gl_context *ctx,
>>   (intelObj->_MaxLevel - tObj->BaseLevel); /* mip count */
>>
>> if (aux_buf) {
>> -  surf[6] = SET_FIELD(mt->qpitch / 4, GEN8_SURFACE_AUX_QPITCH) |
>> +  surf[6] = SET_FIELD(aux_buf->qpitch / 4, GEN8_SURFACE_AUX_QPITCH) |
>>  SET_FIELD((aux_buf->pitch / 128) - 1, 
>> GEN8_SURFACE_AUX_PITCH) |
>>  aux_mode;
>> } else {
>> @@ -394,7 +394,7 @@ gen8_update_renderbuffer_surface(struct brw_context *brw,
>> surf[5] = irb->mt_level - irb->mt->first_level;
>>
>> if (aux_buf) {
>> -  surf[6] = SET_FIELD(mt->qpitch / 4, GEN8_SURFACE_AUX_QPITCH) |
>> +  surf[6] = SET_FIELD(aux_buf->qpitch / 4, GEN8_SURFACE_AUX_QPITCH) |
>>  SET_FIELD((aux_buf->pitch / 128) - 1, 
>> GEN8_SURFACE_AUX_PITCH) |
>>  aux_mode;
>> } else {
>> --
>> 2.0.0
>>
>> ___
>> mesa-dev mailing list
>> mesa-dev@lists.freedesktop.org
>> http://lists.freedesktop.org/mailman/listinfo/mesa-dev
> ___
> mesa-dev mailing list
> mesa-dev@lists.freedesktop.org
> http://lists.freedesktop.org/mailman/listinfo/mesa-dev
___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
http://lists.freedesktop.org/mailman/listinfo/mesa-dev


Re: [Mesa-dev] [PATCH 00/12] Rework texture upload code

2014-07-18 Thread Jason Ekstrand
On Fri, Jul 18, 2014 at 8:32 AM, Brian Paul  wrote:

> On 07/17/2014 12:04 PM, Jason Ekstrand wrote:
>
>> This is the first installment of some work I've been doing over the past
>> couple of weeks to refactor mesa's texture conversion/storage code.  There
>> is more to be done and more that I have done but have not included in this
>> series.  This is the first mailing-list-ready fruits of my efforts.  The
>> important bits here include:
>>
>>   1) Using a human-readable CSV file to describe texture formats similar
>> to
>>  the way it is currently don in gallium.  This is much easier to
>>  read/edit than the structure in formats.c.  The guts of formats.c is
>>  then autogenerated from this CSV file.
>>
>
> I'm kind of on the fence about this.  Some of us have been hoping that
> we'd eventually consolidate some of the Mesa and gallium code so we
> wouldn't have duplicated/parallel code.  The format code is a good example.
>  In theory, the MESA_FORMAT_ stuff could be replaced by the gallium
> PIPE_FORMAT_ code.
>

Or the other way round...  I intentionally kept the format of the CSV file
almost identical.  Hopefully, this will eventually be merged into, at the
very least, one file format and parser.  The only significant difference in
the CSV format is that I replaced gallium's "plain" format layout with
"packed" and "array" layouts.  Mesa makes a distinction here that gallium
does not.  In Gallium, the format layout rules are basically the same as
for C structure bitfields and whether a format is a packed format or an
array format is determined based on the size of the channels.  In mesa, the
difference is expliciti and packed formats are specified in lsb-to-msb
order always.   We could bikeshed all day about which of these is better
than the other.

I did make some substantial changes to the format_parser.py code from the
original gallium version.  Some of this was removing the difference between
big and little-endian.  Some was simply doing what I thought were cleanups
such as adding an actual Swizzle class that allows you to compose and
pseudo-invert swizzles (useful for writing conversion code).


>
> I kind of like the hand-written format code since it's simple to
> understand and update.  Now you've got 600 lines of (uncommented) Python
> code to figure out and maintain.  When something breaks it's important that
> the code is easily maintainable if you're not around.


The problem with the hand-written format_info code is that, while adding a
format is fairly easy, adding a type of information is not.  The three
patches that followed to remove indexBits and add packing and swizzling
information would have been a nightmare.  Having it laid out in a table
makes this much easier.  Also, if we want any sort of auto-generation, we
need the table anyway.  Might as well get rid of one more place where
things can get out-of-sync.

I'll try to better comment the python code.


>
>
>
>>   2) Adding a very generic yet efficient _mesa_swizzle_and_convert
>> function
>>  that is capable of performing the vast majority of texture format
>>  conversions in one function.  It has also been fairly carefully tuned
>>  to be even faster than the _mesa_swizzle_ubyte_image special-case
>> that
>>  we had before for ubyte textures only it also works on the other
>>  datatypes and can even do type conversions as it swizzles.
>>
>>   3) Refactoring of texstore.c including the use of the above
>>  _mesa_swizzle_and_convert function along with the already-existing
>>  packing functions to remove a lot of hand-written special-case code.
>>
>
> This part generally looks good.  Comments already posted.
>
>
>
>> Thanks to the format CSV file, there's even more that we can now do.
>> Things I hope to accomplish in the future include:
>>
>>   1) Autogenerate the bulk of main/format_pack.c, main/format_unpack.c,
>> and
>>  main/pack.c from CSV files.  There's some refactoring that will be
>>  required first, but it shouldn't be that hard and I already have the
>>  python code to do the generation; it's just not part of this patch
>>  series.
>>
>
> Again, this kind of thing was already done for gallium.  It's kind of sad
> to effectively duplicate it.
>
> I'm interested in hearing other opinions.
>

Agreed.  However, short of replacing all of mesa's formats and
format-handling code with gallium formats and handling code, we are always
going to have some duplication.


>
> -Brian
>
>
>
>2) Find a general way to do depth-stencil formats.  I'm a bit dubious as
>>  to whether or not this will turn out to be practical, but I haven't
>> had
>>  a chance to look into it too much yet.
>>   2) Do similar refactors for GetTexImage, ReadPixels, and DrawPixels.
>>
>> Happy Reviewing!
>> --Jason Ekstrand
>>
>
>
>
>
___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
http://lists.freedesktop.org/mailman/listinfo/mesa-dev


[Mesa-dev] [PATCH] glsl: ensure that frexp returns a 0 exponent for zero values

2014-07-18 Thread Ilia Mirkin
The current code appears to work in simple tests, however this will
guarantee that the returned exponent is 0 for a 0 value.

Signed-off-by: Ilia Mirkin 
---

I couldn't make a simple test-case that would cause the current logic to
fail. However when I did the same thing with doubles, I ran into trouble. It
seems safer to move the csel outside of the add in case the value actually has
a non-0 exponent despite a 0 significand.

 src/glsl/builtin_functions.cpp | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/src/glsl/builtin_functions.cpp b/src/glsl/builtin_functions.cpp
index e01742c..5755de9 100644
--- a/src/glsl/builtin_functions.cpp
+++ b/src/glsl/builtin_functions.cpp
@@ -4229,8 +4229,8 @@ builtin_builder::_frexp(const glsl_type *x_type, const 
glsl_type *exp_type)
 * to unsigned integers to ensure that 1 bits aren't shifted in.
 */
body.emit(assign(exponent, rshift(bitcast_f2i(abs(x)), exponent_shift)));
-   body.emit(assign(exponent, add(exponent, csel(is_not_zero, exponent_bias,
- imm(0, vec_elem);
+   body.emit(assign(exponent, csel(is_not_zero, add(exponent, exponent_bias),
+   imm(0, vec_elem;
 
ir_variable *bits = body.make_temp(uvec, "bits");
body.emit(assign(bits, bitcast_f2u(x)));
-- 
1.8.5.5

___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
http://lists.freedesktop.org/mailman/listinfo/mesa-dev


[Mesa-dev] [Bug 81500] wrong color in flightgear for the c172p if "Atmospheric light scattering" is used

2014-07-18 Thread bugzilla-daemon
https://bugs.freedesktop.org/show_bug.cgi?id=81500

--- Comment #5 from Barto  ---
here is the apitrace :

http://demo.ovh.eu/download/365b2f85df30b16ec7539d7b9622542a/fgfs.tar.bz2

at start you will see a green aircraft if you graphic card is affected by the
bug ( r600 driver for example ), 

then after fews seconds in the apitrace I uncheck the "atmospheric light
scattering" option in flightgear, the aircraft will immediatly become "grey"
after this modification,

if your graphic card is not affected by the bug ( or if you use an old version
of mesa, like 10.0.3 ) the cessna c172p will be always in grey in the apitrace
( the good color for this plane ) even if "atmospheric light scattering" option
is checked in flightgear options,

now I will do a bisect in order to find the guilty

-- 
You are receiving this mail because:
You are the assignee for the bug.
___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
http://lists.freedesktop.org/mailman/listinfo/mesa-dev


Re: [Mesa-dev] [PATCH] gbm: Replace GBM_DRIVERS_PATH with LIBGL_DRIVERS_PATH

2014-07-18 Thread Kenneth Graunke
On Friday, July 18, 2014 07:41:57 AM Dylan Baker wrote:
> Currently mesa searches for two different environment variables,
> LIBGL_DRIVERS_PATH and GBM_DRIVERS_PATH. The first is used for search
> for DRI drivers in every case except GBM, and the latter is used
> exclusively for setting GBM drivers. This patch simplifies things by
> having just one variable to set.
> 
> Signed-off-by: Dylan Baker 
> ---
>  src/gbm/backends/dri/gbm_dri.c | 4 ++--
>  1 file changed, 2 insertions(+), 2 deletions(-)
> 
> diff --git a/src/gbm/backends/dri/gbm_dri.c b/src/gbm/backends/dri/gbm_dri.c
> index 347bc99..9d9d1c4 100644
> --- a/src/gbm/backends/dri/gbm_dri.c
> +++ b/src/gbm/backends/dri/gbm_dri.c
> @@ -212,8 +212,8 @@ dri_load_driver(struct gbm_dri_device *dri)
>  
> search_paths = NULL;
> if (geteuid() == getuid()) {
> -  /* don't allow setuid apps to use GBM_DRIVERS_PATH */
> -  search_paths = getenv("GBM_DRIVERS_PATH");
> +  /* don't allow setuid apps to use LIBGL_DRIVERS_PATH */
> +  search_paths = getenv("LIBGL_DRIVERS_PATH");
> }
> if (search_paths == NULL)
>search_paths = DEFAULT_DRIVER_DIR;
> 

I'm definitely a fan of moving to LIBGL_DRIVERS_PATH for everything - 
GBM_DRIVERS_PATH is just another environment variable to forget to set properly.

As is, this is:
Reviewed-by: Kenneth Graunke 

Are people okay with just moving to LIBGL_DRIVERS_PATH completely like this?  
Or do people want it to check GBM_DRIVERS_PATH then fall back to 
LIBGL_DRIVERS_PATH?  Or use $GBM_DRIVERS_PATH:$LIBGL_DRIVERS_PATH?


signature.asc
Description: This is a digitally signed message part.
___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
http://lists.freedesktop.org/mailman/listinfo/mesa-dev


Re: [Mesa-dev] [PATCH] r600g: Use hardware sqrt instruction

2014-07-18 Thread Alex Deucher
On Fri, Jul 18, 2014 at 3:54 AM, Glenn Kennard  wrote:
> Piglit quick tests including sqrt pass, no other regressions,
> tested on radeon 6670.
> ---
> Should be slightly more precise than the invsqrt/recip/mul combination
> used previously, I reckon up to about 2 bits of mantissa, and saves
> two instructions per sqrt emitted.
>
> It would be good if someone could test this on Cayman since it uses
> a slightly different codepath.

Reviewed-by: Alex Deucher 

>
>  src/gallium/drivers/r600/r600_pipe.c   | 2 +-
>  src/gallium/drivers/r600/r600_shader.c | 9 +++--
>  2 files changed, 4 insertions(+), 7 deletions(-)
>
> diff --git a/src/gallium/drivers/r600/r600_pipe.c 
> b/src/gallium/drivers/r600/r600_pipe.c
> index 5bf9c00..ee6a416 100644
> --- a/src/gallium/drivers/r600/r600_pipe.c
> +++ b/src/gallium/drivers/r600/r600_pipe.c
> @@ -428,7 +428,7 @@ static int r600_get_shader_param(struct pipe_screen* 
> pscreen, unsigned shader, e
> case PIPE_SHADER_CAP_TGSI_CONT_SUPPORTED:
> return 1;
> case PIPE_SHADER_CAP_TGSI_SQRT_SUPPORTED:
> -   return 0;
> +   return 1;
> case PIPE_SHADER_CAP_INDIRECT_INPUT_ADDR:
> case PIPE_SHADER_CAP_INDIRECT_OUTPUT_ADDR:
> case PIPE_SHADER_CAP_INDIRECT_TEMP_ADDR:
> diff --git a/src/gallium/drivers/r600/r600_shader.c 
> b/src/gallium/drivers/r600/r600_shader.c
> index db928f3..907547d 100644
> --- a/src/gallium/drivers/r600/r600_shader.c
> +++ b/src/gallium/drivers/r600/r600_shader.c
> @@ -6498,8 +6498,7 @@ static struct r600_shader_tgsi_instruction 
> r600_shader_tgsi_instruction[] = {
> {TGSI_OPCODE_SUB,   0, ALU_OP2_ADD, tgsi_op2},
> {TGSI_OPCODE_LRP,   0, ALU_OP0_NOP, tgsi_lrp},
> {TGSI_OPCODE_CND,   0, ALU_OP0_NOP, tgsi_unsupported},
> -   /* gap */
> -   {20,0, ALU_OP0_NOP, tgsi_unsupported},
> +   {TGSI_OPCODE_SQRT,  0, ALU_OP1_SQRT_IEEE, 
> tgsi_trans_srcx_replicate},
> {TGSI_OPCODE_DP2A,  0, ALU_OP0_NOP, tgsi_unsupported},
> /* gap */
> {22,0, ALU_OP0_NOP, tgsi_unsupported},
> @@ -6693,8 +6692,7 @@ static struct r600_shader_tgsi_instruction 
> eg_shader_tgsi_instruction[] = {
> {TGSI_OPCODE_SUB,   0, ALU_OP2_ADD, tgsi_op2},
> {TGSI_OPCODE_LRP,   0, ALU_OP0_NOP, tgsi_lrp},
> {TGSI_OPCODE_CND,   0, ALU_OP0_NOP, tgsi_unsupported},
> -   /* gap */
> -   {20,0, ALU_OP0_NOP, tgsi_unsupported},
> +   {TGSI_OPCODE_SQRT,  0, ALU_OP1_SQRT_IEEE, 
> tgsi_trans_srcx_replicate},
> {TGSI_OPCODE_DP2A,  0, ALU_OP0_NOP, tgsi_unsupported},
> /* gap */
> {22,0, ALU_OP0_NOP, tgsi_unsupported},
> @@ -6888,8 +6886,7 @@ static struct r600_shader_tgsi_instruction 
> cm_shader_tgsi_instruction[] = {
> {TGSI_OPCODE_SUB,   0, ALU_OP2_ADD, tgsi_op2},
> {TGSI_OPCODE_LRP,   0, ALU_OP0_NOP, tgsi_lrp},
> {TGSI_OPCODE_CND,   0, ALU_OP0_NOP, tgsi_unsupported},
> -   /* gap */
> -   {20,0, ALU_OP0_NOP, tgsi_unsupported},
> +   {TGSI_OPCODE_SQRT,  0, ALU_OP1_SQRT_IEEE, 
> cayman_emit_float_instr},
> {TGSI_OPCODE_DP2A,  0, ALU_OP0_NOP, tgsi_unsupported},
> /* gap */
> {22,0, ALU_OP0_NOP, tgsi_unsupported},
> --
> 1.8.3.2
>
> ___
> mesa-dev mailing list
> mesa-dev@lists.freedesktop.org
> http://lists.freedesktop.org/mailman/listinfo/mesa-dev
___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
http://lists.freedesktop.org/mailman/listinfo/mesa-dev


Re: [Mesa-dev] [PATCH 0/5] radeon: Write-combined CPU mappings of BOs in GTT

2014-07-18 Thread Marek Olšák
On Fri, Jul 18, 2014 at 5:47 PM, Christian König
 wrote:
> Am 18.07.2014 05:07, schrieb Michel Dänzer:

 [PATCH 5/5] drm/radeon: Use VRAM for indirect buffers on >= SI
>>>
>>> I'm still not very keen with this change since I still don't understand
>>> the reason why it's faster than with GTT. Definitely needs more testing
>>> on a wider range of systems.
>>
>> Sure. If anyone wants to give this patch a spin and see if they can
>> measure any performance difference, good or bad, that would be
>> interesting.
>>
>>> Maybe limit it to APUs for now?
>>
>> But IIRC, CPU writes to VRAM vs. write-combined GTT are actually an even
>> bigger win with dedicated GPUs than with the Kaveri built-in GPU on my
>> system. I suspect it may depend on the bandwidth available for PCIe vs.
>> system memory though.
>
>
> I've made a few tests today with the kernel part of the patches running
> Xonotic on Ultra in 1920 x 1080.
>
> Without any patches I get around ~47.0fps on average with my dedicated
> HD7870.
>
> Adding only "drm/radeon: Use write-combined CPU mappings of rings and IBs on
>>= SI" and that goes down to ~45.3fps.
>
> Adding on to off that "drm/radeon: Use VRAM for indirect buffers on >= SI"
> and the frame rate goes down to ~27.74fps.
>
> So enabling this unconditionally is definitely not a good idea. What I don't
> understand yet is why using USWC reduces the fps on SI as well. It looks
> like the reads from the IB buffer for command stream validation on SI affect
> that more than thought.

Yes, there is a CS parser with SI, but shouldn't the parser read from
the CPU copy that came with the ioctl instead? Anyway, I recommend
only using VRAM for IBs which are not parsed and patched by the CPU
(which reduces it down to CIK graphics and DMA IBs, right?)

Marek
___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
http://lists.freedesktop.org/mailman/listinfo/mesa-dev


Re: [Mesa-dev] [PATCH] glsl: remove unnecessary check

2014-07-18 Thread Marek Olšák
Please, every line of the commit message should be at most 80 characters long.

Marek

On Fri, Jul 18, 2014 at 1:47 PM, Timothy Arceri  wrote:
> This code does nothing useful as the next recursive call on the array element 
> will override any null values if the element is a record anyway. The code is 
> also not doing what the comment says as its trying to set the record type 
> pointer for only the first element of the array not the first leaf field of 
> the record.
>
> Signed-off-by: Timothy Arceri 
> ---
>
> Resending this patch. Its a required cleanup for arrays of arrays.
>
>  src/glsl/link_uniforms.cpp | 8 
>  1 file changed, 8 deletions(-)
>
> diff --git a/src/glsl/link_uniforms.cpp b/src/glsl/link_uniforms.cpp
> index ba66053..9566124 100644
> --- a/src/glsl/link_uniforms.cpp
> +++ b/src/glsl/link_uniforms.cpp
> @@ -187,9 +187,6 @@ program_resource_visitor::recursion(const glsl_type *t, 
> char **name,
>}
> } else if (t->is_array() && (t->fields.array->is_record()
>  || t->fields.array->is_interface())) {
> -  if (record_type == NULL && t->fields.array->is_record())
> - record_type = t->fields.array;
> -
>for (unsigned i = 0; i < t->length; i++) {
>  size_t new_length = name_length;
>
> @@ -198,11 +195,6 @@ program_resource_visitor::recursion(const glsl_type *t, 
> char **name,
>
>   recursion(t->fields.array, name, new_length, row_major,
> record_type);
> -
> - /* Only the first leaf-field of the record gets called with the
> -  * record type pointer.
> -  */
> - record_type = NULL;
>}
> } else {
>this->visit_field(t, *name, row_major, record_type);
> --
> 1.9.3
>
> ___
> mesa-dev mailing list
> mesa-dev@lists.freedesktop.org
> http://lists.freedesktop.org/mailman/listinfo/mesa-dev
___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
http://lists.freedesktop.org/mailman/listinfo/mesa-dev


[Mesa-dev] [PATCH] configure.ac: Add LLVM patch version to error message.

2014-07-18 Thread Vinson Lee
Signed-off-by: Vinson Lee 
---
 configure.ac | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/configure.ac b/configure.ac
index bdcc989..744e55c 100644
--- a/configure.ac
+++ b/configure.ac
@@ -1884,7 +1884,7 @@ radeon_llvm_check() {
 LLVM_REQUIRED_VERSION_MINOR="4"
 LLVM_REQUIRED_VERSION_PATCH="2"
 if test "${LLVM_VERSION_INT}${LLVM_VERSION_PATCH}" -lt 
"${LLVM_REQUIRED_VERSION_MAJOR}0${LLVM_REQUIRED_VERSION_MINOR}${LLVM_REQUIRED_VERSION_PATCH}";
 then
-AC_MSG_ERROR([LLVM 
$LLVM_REQUIRED_VERSION_MAJOR.$LLVM_REQUIRED_VERSION_MINOR or newer is required 
for $1])
+AC_MSG_ERROR([LLVM 
$LLVM_REQUIRED_VERSION_MAJOR.$LLVM_REQUIRED_VERSION_MINOR.$LLVM_REQUIRED_VERSION_PATCH
 or newer is required for $1])
 fi
 if test true && $LLVM_CONFIG --targets-built | grep -qvw 'R600' ; then
 AC_MSG_ERROR([LLVM R600 Target not enabled.  You can enable it when 
building the LLVM
-- 
1.9.3

___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
http://lists.freedesktop.org/mailman/listinfo/mesa-dev


Re: [Mesa-dev] [PATCH] configure.ac: Add LLVM patch version to error message.

2014-07-18 Thread Marek Olšák
Reviewed-by: Marek Olšák 

Marek

On Fri, Jul 18, 2014 at 9:15 PM, Vinson Lee  wrote:
> Signed-off-by: Vinson Lee 
> ---
>  configure.ac | 2 +-
>  1 file changed, 1 insertion(+), 1 deletion(-)
>
> diff --git a/configure.ac b/configure.ac
> index bdcc989..744e55c 100644
> --- a/configure.ac
> +++ b/configure.ac
> @@ -1884,7 +1884,7 @@ radeon_llvm_check() {
>  LLVM_REQUIRED_VERSION_MINOR="4"
>  LLVM_REQUIRED_VERSION_PATCH="2"
>  if test "${LLVM_VERSION_INT}${LLVM_VERSION_PATCH}" -lt 
> "${LLVM_REQUIRED_VERSION_MAJOR}0${LLVM_REQUIRED_VERSION_MINOR}${LLVM_REQUIRED_VERSION_PATCH}";
>  then
> -AC_MSG_ERROR([LLVM 
> $LLVM_REQUIRED_VERSION_MAJOR.$LLVM_REQUIRED_VERSION_MINOR or newer is 
> required for $1])
> +AC_MSG_ERROR([LLVM 
> $LLVM_REQUIRED_VERSION_MAJOR.$LLVM_REQUIRED_VERSION_MINOR.$LLVM_REQUIRED_VERSION_PATCH
>  or newer is required for $1])
>  fi
>  if test true && $LLVM_CONFIG --targets-built | grep -qvw 'R600' ; then
>  AC_MSG_ERROR([LLVM R600 Target not enabled.  You can enable it when 
> building the LLVM
> --
> 1.9.3
>
> ___
> mesa-dev mailing list
> mesa-dev@lists.freedesktop.org
> http://lists.freedesktop.org/mailman/listinfo/mesa-dev
___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
http://lists.freedesktop.org/mailman/listinfo/mesa-dev


Re: [Mesa-dev] [PATCH 4/5] r600g, radeonsi: Use write-combined persistent GTT mappings

2014-07-18 Thread Grigori Goronzy
On 18.07.2014 13:45, Marek Olšák wrote:
> If the requirements of GL_MAP_COHERENT_BIT are satisfied, then the
> patch is okay.
>

Apart from correctness, I still wonder how this will affect performance,
most notably CPU reads. This change unconditionally uses write-combined,
uncached memory for MAP_COHERENT buffers. Unless I am missing something,
CPU reads will be slow, even if the buffer storage flags indicate that
the buffer will be read by the CPU. Maybe it's a good idea to avoid
write combined memory if the buffer storage flags include MAP_READ_BIT?

Grigori

> Marek
> 
> 
> On Fri, Jul 18, 2014 at 5:19 AM, Michel Dänzer  wrote:
>> On 17.07.2014 21:00, Marek Olšák wrote:
>>> On Thu, Jul 17, 2014 at 12:01 PM, Michel Dänzer  wrote:
 From: Michel Dänzer 

 This is hopefully safe: The kernel makes sure writes to these mappings
 finish before the GPU might start reading from them, and the GPU caches
 are invalidated at the start of a command stream.

>>> The resource flags actually tell you what you can do. If the COHERENT
>>> flag is set, the mapping must be cached.
>>
>> Why is that required? As I explain above, we should satisfy the
>> requirements of the ARB_buffer_storage extension AFAICT.
>>
>>
>> As pointed out by you and Grigori in other posts, I should probably just
>> drop the special treatment of persistent mappings though, so the
>> placement and flags are derived from the buffer usage.
>>
>>
>> --
>> Earthling Michel Dänzer|  http://www.amd.com
>> Libre software enthusiast  |Mesa and X developer
> ___
> mesa-dev mailing list
> mesa-dev@lists.freedesktop.org
> http://lists.freedesktop.org/mailman/listinfo/mesa-dev
> 




signature.asc
Description: OpenPGP digital signature
___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
http://lists.freedesktop.org/mailman/listinfo/mesa-dev


[Mesa-dev] [PATCH 2/3] radeonsi/compute: Bump number of user sgprs for LLVM 3.5

2014-07-18 Thread Tom Stellard
---
 src/gallium/drivers/radeonsi/si_compute.c | 7 ++-
 1 file changed, 6 insertions(+), 1 deletion(-)

diff --git a/src/gallium/drivers/radeonsi/si_compute.c 
b/src/gallium/drivers/radeonsi/si_compute.c
index 3a9f00f..a7d61e7 100644
--- a/src/gallium/drivers/radeonsi/si_compute.c
+++ b/src/gallium/drivers/radeonsi/si_compute.c
@@ -32,6 +32,11 @@
 #include "radeon_llvm_util.h"
 
 #define MAX_GLOBAL_BUFFERS 20
+#if HAVE_LLVM < 0x0305
+#define NUM_USER_SGPRS 2
+#else
+#define NUM_USER_SGPRS 4
+#endif
 
 struct si_pipe_compute {
struct si_context *ctx;
@@ -132,7 +137,7 @@ static void si_launch_grid(
uint32_t *kernel_args;
uint64_t kernel_args_va;
uint64_t shader_va;
-   unsigned arg_user_sgpr_count = 2;
+   unsigned arg_user_sgpr_count = NUM_USER_SGPRS;
unsigned i;
struct si_pipe_shader *shader = &program->kernels[pc];
unsigned lds_blocks;
-- 
1.8.1.5

___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
http://lists.freedesktop.org/mailman/listinfo/mesa-dev


[Mesa-dev] [PATCH 1/3] winsys/radeon: Query the kernel for the number of SEs and SHs per SE

2014-07-18 Thread Tom Stellard
---
 src/gallium/winsys/radeon/drm/radeon_drm_winsys.c | 6 ++
 src/gallium/winsys/radeon/drm/radeon_winsys.h | 2 ++
 2 files changed, 8 insertions(+)

diff --git a/src/gallium/winsys/radeon/drm/radeon_drm_winsys.c 
b/src/gallium/winsys/radeon/drm/radeon_drm_winsys.c
index 576fea5..7cda70a 100644
--- a/src/gallium/winsys/radeon/drm/radeon_drm_winsys.c
+++ b/src/gallium/winsys/radeon/drm/radeon_drm_winsys.c
@@ -389,6 +389,12 @@ static boolean do_winsys_init(struct radeon_drm_winsys *ws)
 radeon_get_drm_value(ws->fd, RADEON_INFO_ACTIVE_CU_COUNT, NULL,
  &ws->info.max_compute_units);
 
+radeon_get_drm_value(ws->fd, RADEON_INFO_MAX_SE, NULL,
+ &ws->info.max_se);
+
+radeon_get_drm_value(ws->fd, RADEON_INFO_MAX_SH_PER_SE, NULL,
+ &ws->info.max_sh_per_se);
+
 if (radeon_get_drm_value(ws->fd, RADEON_INFO_SI_TILE_MODE_ARRAY, NULL,
  ws->info.si_tile_mode_array)) {
 ws->info.si_tile_mode_array_valid = TRUE;
diff --git a/src/gallium/winsys/radeon/drm/radeon_winsys.h 
b/src/gallium/winsys/radeon/drm/radeon_winsys.h
index 6df1987..a63a50b 100644
--- a/src/gallium/winsys/radeon/drm/radeon_winsys.h
+++ b/src/gallium/winsys/radeon/drm/radeon_winsys.h
@@ -199,6 +199,8 @@ struct radeon_info {
 uint32_tvram_size;
 uint32_tmax_sclk;
 uint32_tmax_compute_units;
+uint32_tmax_se;
+uint32_tmax_sh_per_se;
 
 uint32_tdrm_major; /* version */
 uint32_tdrm_minor;
-- 
1.8.1.5

___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
http://lists.freedesktop.org/mailman/listinfo/mesa-dev


[Mesa-dev] [PATCH 3/3] radeonsi/compute: Add support scratch buffer support

2014-07-18 Thread Tom Stellard
The scratch buffer will be used for private memory and also register
spilling.
---
 src/gallium/drivers/radeonsi/si_compute.c | 85 ++-
 src/gallium/drivers/radeonsi/si_shader.c  |  5 ++
 src/gallium/drivers/radeonsi/si_shader.h  |  2 +
 3 files changed, 90 insertions(+), 2 deletions(-)

diff --git a/src/gallium/drivers/radeonsi/si_compute.c 
b/src/gallium/drivers/radeonsi/si_compute.c
index a7d61e7..d6cbbf4 100644
--- a/src/gallium/drivers/radeonsi/si_compute.c
+++ b/src/gallium/drivers/radeonsi/si_compute.c
@@ -122,6 +122,43 @@ static void si_set_global_binding(
}
 }
 
+/**
+ * This function computes the value for R_00B860_COMPUTE_TMPRING_SIZE.WAVES
+ * /p block_layout is the number of threads in each work group.
+ * /p grid layout is the number of work groups.
+ */
+static unsigned compute_num_waves_for_scratch(
+   const struct radeon_info *info;
+   const uint *block_layout,
+   const uint *grid_layout)
+{
+   unsigned num_sh = MAX2(info->max_sh_per_se, 1);
+   unsigned num_se = MAX2(info->max_se, 1);
+   unsigned num_blocks = 1;
+   unsigned threads_per_block = 1;
+   unsigned waves_per_block;
+   unsigned waves_per_sh;
+   unsigned waves;
+   unsigned scratch_waves;
+   unsigned i;
+
+   for (i = 0; i < 3; i++) {
+   threads_per_block *= block_layout[i];
+   num_blocks *= grid_layout[i];
+   }
+
+   waves_per_block = align(threads_per_block, 64) / 64;
+   waves = waves_per_block * num_blocks;
+   waves_per_sh = align(waves, num_sh * num_se) / (num_sh * num_se);
+   scratch_waves = waves_per_sh * num_sh * num_se;
+
+   if (waves_per_block > waves_per_sh) {
+   scratch_waves = waves_per_block * num_sh * num_se;
+   }
+
+   return scratch_waves;
+}
+
 static void si_launch_grid(
struct pipe_context *ctx,
const uint *block_layout, const uint *grid_layout,
@@ -134,13 +171,16 @@ static void si_launch_grid(
unsigned kernel_args_size;
unsigned num_work_size_bytes = 36;
uint32_t kernel_args_offset = 0;
+   uint32_t scratch_offset = 0;
uint32_t *kernel_args;
uint64_t kernel_args_va;
+   uint64_t scratch_buffer_va = 0;
uint64_t shader_va;
unsigned arg_user_sgpr_count = NUM_USER_SGPRS;
unsigned i;
struct si_pipe_shader *shader = &program->kernels[pc];
unsigned lds_blocks;
+   unsigned num_waves_for_scratch;
 
pm4->compute_pkt = true;
si_cmd_context_control(pm4);
@@ -158,7 +198,9 @@ static void si_launch_grid(
/* Upload the kernel arguments */
 
/* The extra num_work_size_bytes are for work group / work item size 
information */
-   kernel_args_size = program->input_size + num_work_size_bytes;
+   kernel_args_size = program->input_size + num_work_size_bytes + 8 /* For 
scratch va */;
+   scratch_offset = program->input_size + num_work_size_bytes;
+
kernel_args = MALLOC(kernel_args_size);
for (i = 0; i < 3; i++) {
kernel_args[i] = grid_layout[i];
@@ -166,8 +208,34 @@ static void si_launch_grid(
kernel_args[i + 6] = block_layout[i];
}
 
+   num_waves_for_scratch = compute_num_waves_for_scratch(
+   &stcx->screen.info, block_layout, grid_layout);
+
memcpy(kernel_args + (num_work_size_bytes / 4), input, 
program->input_size);
 
+   if (shader->scratch_bytes_per_wave > 0) {
+   float *ptr;
+
+   COMPUTE_DBG(sctx->screen, "Waves: %u; Scratch per wave: %u 
bytes; "
+   "Total Scratch: %u bytes\n", num_waves_for_scratch,
+   shader->scratch_bytes_per_wave, info.width0);
+   if (!shader->scratch_bo) {
+   shader->scratch_bo = (struct r600_resource*)
+   si_resource_create_custom(sctx->b.b.screen,
+   PIPE_USAGE_DEFAULT, info.width0);
+   }
+   ptr = sctx->b.ws->buffer_map(shader->scratch_bo->cs_buf, 
sctx->b.rings.gfx.cs,
+   PIPE_TRANSFER_READ);
+   scratch_buffer_va = r600_resource_va(ctx->screen,
+   (struct pipe_resource*)shader->scratch_bo);
+   si_pm4_add_bo(pm4, shader->scratch_bo,
+   RADEON_USAGE_READWRITE,
+   RADEON_PRIO_SHADER_RESOURCE_RW);
+
+   }
+   memcpy(kernel_args + (scratch_offset / 4), &scratch_buffer_va,
+  sizeof(scratch_buffer_va));
+
for (i = 0; i < (kernel_args_size / 4); i++) {
COMPUTE_DBG(sctx->screen, "input %u : %u\n", i,
kernel_args[i]);
@@ -183,6 +251,10 @@ static void si_launch_grid(
 
si_pm4_set_reg(pm4, R_00B900_COMPUTE_USER_DATA_0, kernel_args_va);
si_pm4_set

[Mesa-dev] [PATCH 3/3] radeonsi: Read rodata from ELF and append it to the end of shaders

2014-07-18 Thread Tom Stellard
The is used for programs that have arrays of constants that
are accessed using dynamic indices.  The shader will compute
the base address of the constants and then access them using
SMRD instructions.
---
 src/gallium/drivers/radeon/r600_pipe_common.h |  5 +
 src/gallium/drivers/radeon/radeon_elf_util.c  |  5 +
 src/gallium/drivers/radeonsi/si_shader.c  | 16 +---
 3 files changed, 23 insertions(+), 3 deletions(-)

diff --git a/src/gallium/drivers/radeon/r600_pipe_common.h 
b/src/gallium/drivers/radeon/r600_pipe_common.h
index d82adf5..8f1a0a5 100644
--- a/src/gallium/drivers/radeon/r600_pipe_common.h
+++ b/src/gallium/drivers/radeon/r600_pipe_common.h
@@ -108,6 +108,11 @@ struct radeon_shader_binary {
unsigned char *config;
unsigned config_size;
 
+   /** Constant data accessed by the shader.  This will be uploaded
+* into a constant buffer. */
+   unsigned char *rodata;
+   unsigned rodata_size;
+
/** Set to 1 if the disassembly for this binary has been dumped to
 *  stderr. */
int disassembled;
diff --git a/src/gallium/drivers/radeon/radeon_elf_util.c 
b/src/gallium/drivers/radeon/radeon_elf_util.c
index 7d92962..7c5f93e 100644
--- a/src/gallium/drivers/radeon/radeon_elf_util.c
+++ b/src/gallium/drivers/radeon/radeon_elf_util.c
@@ -80,6 +80,11 @@ void radeon_elf_read(const char *elf_data, unsigned elf_size,
fprintf(stderr, "\nShader Disassembly:\n\n");
fprintf(stderr, "%.*s\n", (int)section_data->d_size,
  (char *)section_data->d_buf);
+   } else if (!strncmp(name, ".rodata", 7)) {
+   section_data = elf_getdata(section, section_data);
+   binary->rodata_size = section_data->d_size;
+   binary->rodata = MALLOC(binary->rodata_size * 
sizeof(unsigned char));
+   memcpy(binary->rodata, section_data->d_buf, 
binary->rodata_size);
}
}
 
diff --git a/src/gallium/drivers/radeonsi/si_shader.c 
b/src/gallium/drivers/radeonsi/si_shader.c
index 8593bca..641e563 100644
--- a/src/gallium/drivers/radeonsi/si_shader.c
+++ b/src/gallium/drivers/radeonsi/si_shader.c
@@ -2499,11 +2499,12 @@ int si_compile_llvm(struct si_context *sctx, struct 
si_pipe_shader *shader,
 {
unsigned r; /* llvm_compile result */
unsigned i;
-   uint32_t *ptr;
+   unsigned char *ptr;
struct radeon_shader_binary binary;
bool dump = r600_can_dump_shader(&sctx->screen->b,
shader->selector ? shader->selector->tokens : NULL);
const char * gpu_family = 
r600_get_llvm_processor_name(sctx->screen->b.family);
+   unsigned code_size;
 
/* Use LLVM to compile shader */
memset(&binary, 0, sizeof(binary));
@@ -2551,19 +2552,28 @@ int si_compile_llvm(struct si_context *sctx, struct 
si_pipe_shader *shader,
}
 
/* copy new shader */
+   code_size = binary.code_size + binary.rodata_size;
r600_resource_reference(&shader->bo, NULL);
shader->bo = si_resource_create_custom(sctx->b.b.screen, 
PIPE_USAGE_IMMUTABLE,
-  binary.code_size);
+  code_size);
if (shader->bo == NULL) {
return -ENOMEM;
}
 
-   ptr = (uint32_t*)sctx->b.ws->buffer_map(shader->bo->cs_buf, 
sctx->b.rings.gfx.cs, PIPE_TRANSFER_WRITE);
+   ptr = sctx->b.ws->buffer_map(shader->bo->cs_buf, sctx->b.rings.gfx.cs,
+   PIPE_TRANSFER_WRITE);
util_memcpy_cpu_to_le32(ptr, binary.code, binary.code_size);
+   /* Copy read only data if any. */
+   if (binary.rodata_size > 0) {
+   ptr += binary.code_size;
+   util_memcpy_cpu_to_le32(ptr, binary.rodata, binary.rodata_size);
+   }
+
sctx->b.ws->buffer_unmap(shader->bo->cs_buf);
 
free(binary.code);
free(binary.config);
+   free(binary.rodata);
 
return r;
 }
-- 
1.8.1.5

___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
http://lists.freedesktop.org/mailman/listinfo/mesa-dev


[Mesa-dev] [PATCH 1/3] util: Add util_memcpy_cpu_to_le32() v2

2014-07-18 Thread Tom Stellard
v2:
  - Preserve word boundaries.
---
 src/gallium/auxiliary/util/u_math.h | 17 +
 1 file changed, 17 insertions(+)

diff --git a/src/gallium/auxiliary/util/u_math.h 
b/src/gallium/auxiliary/util/u_math.h
index b9ed197..5de181a 100644
--- a/src/gallium/auxiliary/util/u_math.h
+++ b/src/gallium/auxiliary/util/u_math.h
@@ -812,6 +812,23 @@ util_bswap16(uint16_t n)
   (n << 8);
 }
 
+static INLINE void*
+util_memcpy_cpu_to_le32(void *dest, void *src, size_t n)
+{
+#ifdef PIPE_ARCH_BIG_ENDIAN
+   size_t i, e;
+   asset(n % 4 == 0);
+
+   for (i = 0, e = n / 4; i < e; i++) {
+   uint32_t *d = (uint32_t*)dest;
+   uint32_t *s = (uint32_t*)src;
+   d[i] = util_bswap32(s[i]);
+   }
+   return dest;
+#else
+   return memcpy(dest, src, n);
+#endif
+}
 
 /**
  * Clamp X to [MIN, MAX].
-- 
1.8.1.5

___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
http://lists.freedesktop.org/mailman/listinfo/mesa-dev


[Mesa-dev] [PATCH 2/3] radeonsi: Use util_memcpy_cpu_to_le32()

2014-07-18 Thread Tom Stellard
---
 src/gallium/drivers/radeonsi/si_descriptors.c | 4 +---
 src/gallium/drivers/radeonsi/si_shader.c  | 8 +---
 2 files changed, 2 insertions(+), 10 deletions(-)

diff --git a/src/gallium/drivers/radeonsi/si_descriptors.c 
b/src/gallium/drivers/radeonsi/si_descriptors.c
index 38ad077..41c1b67 100644
--- a/src/gallium/drivers/radeonsi/si_descriptors.c
+++ b/src/gallium/drivers/radeonsi/si_descriptors.c
@@ -451,9 +451,7 @@ void si_upload_const_buffer(struct si_context *sctx, struct 
r600_resource **rbuf
return;
}
 
-   for (i = 0; i < size / 4; ++i) {
-   tmpPtr[i] = util_cpu_to_le32(((uint32_t *)ptr)[i]);
-   }
+   util_memcpy_cpu_to_le32(tmpPtr, ptr, size);
 
u_upload_data(sctx->b.uploader, 0, size, tmpPtr, const_offset,
(struct pipe_resource**)rbuffer);
diff --git a/src/gallium/drivers/radeonsi/si_shader.c 
b/src/gallium/drivers/radeonsi/si_shader.c
index f0650f4..8593bca 100644
--- a/src/gallium/drivers/radeonsi/si_shader.c
+++ b/src/gallium/drivers/radeonsi/si_shader.c
@@ -2559,13 +2559,7 @@ int si_compile_llvm(struct si_context *sctx, struct 
si_pipe_shader *shader,
}
 
ptr = (uint32_t*)sctx->b.ws->buffer_map(shader->bo->cs_buf, 
sctx->b.rings.gfx.cs, PIPE_TRANSFER_WRITE);
-   if (SI_BIG_ENDIAN) {
-   for (i = 0; i < binary.code_size / 4; ++i) {
-   ptr[i] = util_cpu_to_le32((*(uint32_t*)(binary.code + 
i*4)));
-   }
-   } else {
-   memcpy(ptr, binary.code, binary.code_size);
-   }
+   util_memcpy_cpu_to_le32(ptr, binary.code, binary.code_size);
sctx->b.ws->buffer_unmap(shader->bo->cs_buf);
 
free(binary.code);
-- 
1.8.1.5

___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
http://lists.freedesktop.org/mailman/listinfo/mesa-dev


[Mesa-dev] [PATCH] radeonsi/compute: Share COMPUTE_DBG macro with r600g

2014-07-18 Thread Tom Stellard
---
 src/gallium/drivers/r600/evergreen_compute.h  | 13 -
 src/gallium/drivers/radeon/r600_pipe_common.h |  5 +
 src/gallium/drivers/radeonsi/si_compute.c |  5 +
 3 files changed, 10 insertions(+), 13 deletions(-)

diff --git a/src/gallium/drivers/r600/evergreen_compute.h 
b/src/gallium/drivers/r600/evergreen_compute.h
index e760790..4fb53a1 100644
--- a/src/gallium/drivers/r600/evergreen_compute.h
+++ b/src/gallium/drivers/r600/evergreen_compute.h
@@ -59,17 +59,4 @@ void r600_compute_global_transfer_flush_region( struct 
pipe_context *, struct pi
 void r600_compute_global_transfer_inline_write( struct pipe_context *, struct 
pipe_resource *, unsigned level,
 unsigned usage, const struct 
pipe_box *, const void *data, unsigned stride, unsigned layer_stride);
 
-
-static inline void COMPUTE_DBG(struct r600_screen *rscreen, const char *fmt, 
...)
-{
-   if (!(rscreen->b.debug_flags & DBG_COMPUTE)) {
-   return;
-   }
-
-   va_list ap;
-   va_start(ap, fmt);
-   _debug_vprintf(fmt, ap);
-   va_end(ap);
-}
-
 #endif
diff --git a/src/gallium/drivers/radeon/r600_pipe_common.h 
b/src/gallium/drivers/radeon/r600_pipe_common.h
index 8f1a0a5..cee9622 100644
--- a/src/gallium/drivers/radeon/r600_pipe_common.h
+++ b/src/gallium/drivers/radeon/r600_pipe_common.h
@@ -515,6 +515,11 @@ static inline unsigned r600_tex_aniso_filter(unsigned 
filter)
 /* else */return 4;
 }
 
+#define COMPUTE_DBG(rscreen, fmt, args...) \
+   do { \
+   if ((rscreen->b.debug_flags & DBG_COMPUTE)) fprintf(stderr, 
fmt, ##args); \
+   } while (0);
+
 #define R600_ERR(fmt, args...) \
fprintf(stderr, "EE %s:%d %s - "fmt, __FILE__, __LINE__, __func__, 
##args)
 
diff --git a/src/gallium/drivers/radeonsi/si_compute.c 
b/src/gallium/drivers/radeonsi/si_compute.c
index c0637f6..3a9f00f 100644
--- a/src/gallium/drivers/radeonsi/si_compute.c
+++ b/src/gallium/drivers/radeonsi/si_compute.c
@@ -163,6 +163,11 @@ static void si_launch_grid(
 
memcpy(kernel_args + (num_work_size_bytes / 4), input, 
program->input_size);
 
+   for (i = 0; i < (kernel_args_size / 4); i++) {
+   COMPUTE_DBG(sctx->screen, "input %u : %u\n", i,
+   kernel_args[i]);
+   }
+
si_upload_const_buffer(sctx, &kernel_args_buffer, (uint8_t*)kernel_args,
kernel_args_size, &kernel_args_offset);
kernel_args_va = r600_resource_va(ctx->screen,
-- 
1.8.1.5

___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
http://lists.freedesktop.org/mailman/listinfo/mesa-dev


Re: [Mesa-dev] [PATCH 2/7] i965/cfg: Add a foreach_block_and_inst macro.

2014-07-18 Thread Matt Turner
On Fri, Jul 18, 2014 at 2:57 AM, Pohjolainen, Topi
 wrote:
>
> Subject of patch number four sort of hints that this would be used in patch
> number three also. I didn't find any occurencies though, did you mean to use
> it there already?
>
> Anyway, patches 1-4 are:
>
> Reviewed-by: Topi Pohjolainen 

Yeah, looks like I use this macro in patches after this series. I'll
hold off on committing it until it's used.

Thanks for the review!
___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
http://lists.freedesktop.org/mailman/listinfo/mesa-dev


[Mesa-dev] [PATCH 1/2] i965/fs: Fix gl_SampleID for 2x MSAA and SIMD16 mode.

2014-07-18 Thread Kenneth Graunke
We might be able to do this without an extra program key field, but this
is non-invasive and fixes the bug, for now.

This fixes the following Piglit tests on Broadwell:
- ARB_sample_shading/builtin-gl-sample-id 2
- ARB_sample_shading/builtin-gl-sample-position 2
- EXT_framebuffer_multisample/multisample-blit 2 color
- EXT_framebuffer_multisample/multisample-blit 2 color linear
- EXT_framebuffer_multisample/multisample-blit 2 depth
- EXT_framebuffer_multisample/no-color 2 depth combined
- EXT_framebuffer_multisample/no-color 2 depth separate
- EXT_framebuffer_multisample/no-color 2 depth single
- EXT_framebuffer_multisample/no-color 2 depth-computed combined
- EXT_framebuffer_multisample/no-color 2 depth-computed separate
- EXT_framebuffer_multisample/no-color 2 depth-computed single
- EXT_framebuffer_multisample/unaligned-blit 2 color msaa
- EXT_framebuffer_multisample/unaligned-blit 2 depth msaa

Signed-off-by: Kenneth Graunke 
Bugzilla: https://bugs.freedesktop.org/show_bug.cgi?id=80991
Cc: "10.2" 
---
 src/mesa/drivers/dri/i965/brw_fs.cpp | 7 ++-
 src/mesa/drivers/dri/i965/brw_wm.c   | 4 
 src/mesa/drivers/dri/i965/brw_wm.h   | 1 +
 3 files changed, 11 insertions(+), 1 deletion(-)

diff --git a/src/mesa/drivers/dri/i965/brw_fs.cpp 
b/src/mesa/drivers/dri/i965/brw_fs.cpp
index 1a5d987..a749e89 100644
--- a/src/mesa/drivers/dri/i965/brw_fs.cpp
+++ b/src/mesa/drivers/dri/i965/brw_fs.cpp
@@ -1304,6 +1304,11 @@ fs_visitor::emit_sampleid_setup(ir_variable *ir)
* populating a temporary variable with the sequence (0, 1, 2, 3),
* and then reading from it using vstride=1, width=4, hstride=0.
* These computations hold good for 4x multisampling as well.
+   *
+   * For 2x MSAA and SIMD16, we want to use the sequence (0, 1, 0, 1):
+   * the first four slots are sample 0 of subspan 0; the next four
+   * are sample 1 of subspan 0; the third group is sample 0 of
+   * subspan 1, and finally sample 1 of subspan 1.
*/
   fs_inst *inst;
   inst = emit(BRW_OPCODE_AND, t1,
@@ -1313,7 +1318,7 @@ fs_visitor::emit_sampleid_setup(ir_variable *ir)
   inst = emit(BRW_OPCODE_SHR, t1, t1, fs_reg(5));
   inst->force_writemask_all = true;
   /* This works for both SIMD8 and SIMD16 */
-  inst = emit(MOV(t2, brw_imm_v(0x3210)));
+  inst = emit(MOV(t2, brw_imm_v(key->persample_2x ? 0x1010 : 0x3210)));
   inst->force_writemask_all = true;
   /* This special instruction takes care of setting vstride=1,
* width=4, hstride=0 of t2 during an ADD instruction.
diff --git a/src/mesa/drivers/dri/i965/brw_wm.c 
b/src/mesa/drivers/dri/i965/brw_wm.c
index d5a28dc..15fcc1f 100644
--- a/src/mesa/drivers/dri/i965/brw_wm.c
+++ b/src/mesa/drivers/dri/i965/brw_wm.c
@@ -277,6 +277,8 @@ brw_wm_debug_recompile(struct brw_context *brw,
   old_key->flat_shade, key->flat_shade);
found |= key_debug(brw, "per-sample shading",
   old_key->persample_shading, key->persample_shading);
+   found |= key_debug(brw, "per-sample shading and 2x MSAA",
+  old_key->persample_2x, key->persample_2x);
found |= key_debug(brw, "number of color buffers",
   old_key->nr_color_regions, key->nr_color_regions);
found |= key_debug(brw, "MRT alpha test or alpha-to-coverage",
@@ -522,6 +524,8 @@ static void brw_wm_populate_key( struct brw_context *brw,
/* Ignore sample qualifier while computing this flag. */
key->persample_shading =
   _mesa_get_min_invocations_per_fragment(ctx, &fp->program, true) > 1;
+   if (key->persample_shading)
+  key->persample_2x = ctx->DrawBuffer->Visual.samples == 2;
 
key->compute_pos_offset =
   _mesa_get_min_invocations_per_fragment(ctx, &fp->program, false) > 1 &&
diff --git a/src/mesa/drivers/dri/i965/brw_wm.h 
b/src/mesa/drivers/dri/i965/brw_wm.h
index 7458301..77a3644 100644
--- a/src/mesa/drivers/dri/i965/brw_wm.h
+++ b/src/mesa/drivers/dri/i965/brw_wm.h
@@ -62,6 +62,7 @@ struct brw_wm_prog_key {
GLuint stats_wm:1;
GLuint flat_shade:1;
GLuint persample_shading:1;
+   GLuint persample_2x:1;
GLuint nr_color_regions:5;
GLuint replicate_alpha:1;
GLuint render_to_fbo:1;
-- 
2.0.0

___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
http://lists.freedesktop.org/mailman/listinfo/mesa-dev


[Mesa-dev] [PATCH 2/2] i965/fs: Fix gl_SampleMask handling for SIMD16 on Gen8+.

2014-07-18 Thread Kenneth Graunke
We actually want to use mov(16), not mov(8).

Fixes 7 Piglit tests: ARB_sample_shading/builtin-gl-sample-mask [2468]
and ARB_sample_shading/builtin-gl-sample-mask-simple [468].

Signed-off-by: Kenneth Graunke 
Bugzilla: https://bugs.freedesktop.org/show_bug.cgi?id=80991
Cc: "10.2" 
---
 src/mesa/drivers/dri/i965/gen8_fs_generator.cpp | 5 -
 1 file changed, 5 deletions(-)

diff --git a/src/mesa/drivers/dri/i965/gen8_fs_generator.cpp 
b/src/mesa/drivers/dri/i965/gen8_fs_generator.cpp
index 2924820..2d745fd 100644
--- a/src/mesa/drivers/dri/i965/gen8_fs_generator.cpp
+++ b/src/mesa/drivers/dri/i965/gen8_fs_generator.cpp
@@ -715,13 +715,8 @@ gen8_fs_generator::generate_set_omask(fs_inst *inst,
  mask.hstride == BRW_HORIZONTAL_STRIDE_0);
}
 
-   unsigned save_exec_size = default_state.exec_size;
-   default_state.exec_size = BRW_EXECUTE_8;
-
gen8_instruction *mov = MOV(dst, retype(mask, dst.type));
gen8_set_mask_control(mov, BRW_MASK_DISABLE);
-
-   default_state.exec_size = save_exec_size;
 }
 
 /**
-- 
2.0.0

___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
http://lists.freedesktop.org/mailman/listinfo/mesa-dev


Re: [Mesa-dev] [PATCH 1/3] util: Add util_memcpy_cpu_to_le32() v2

2014-07-18 Thread Patrick Baggett
On Fri, Jul 18, 2014 at 2:10 PM, Tom Stellard 
wrote:

> v2:
>   - Preserve word boundaries.
> ---
>  src/gallium/auxiliary/util/u_math.h | 17 +
>  1 file changed, 17 insertions(+)
>
> diff --git a/src/gallium/auxiliary/util/u_math.h
> b/src/gallium/auxiliary/util/u_math.h
> index b9ed197..5de181a 100644
> --- a/src/gallium/auxiliary/util/u_math.h
> +++ b/src/gallium/auxiliary/util/u_math.h
> @@ -812,6 +812,23 @@ util_bswap16(uint16_t n)
>(n << 8);
>  }
>
> +static INLINE void*
> +util_memcpy_cpu_to_le32(void *dest, void *src, size_t n)
>

I don't know where Mesa is with C99 standards, but if you are utilizing C99
keywords, I think "restrict" would help here to show that the two pointers
do not overlap. I'm not sure if have to mark 'd' and 's' as restrict to get
the benefit if they are initialized by a typecast, but it probably wouldn't
be a bad idea.

This may be a no-go with C++ however.


> +{
> +#ifdef PIPE_ARCH_BIG_ENDIAN
> +   size_t i, e;
> +   asset(n % 4 == 0);
> +
> +   for (i = 0, e = n / 4; i < e; i++) {
> +   uint32_t *d = (uint32_t*)dest;
> +   uint32_t *s = (uint32_t*)src;
> +   d[i] = util_bswap32(s[i]);
> +   }
> +   return dest;
> +#else
> +   return memcpy(dest, src, n);
> +#endif
> +}
>
>  /**
>   * Clamp X to [MIN, MAX].
> --
> 1.8.1.5
>
> ___
> mesa-dev mailing list
> mesa-dev@lists.freedesktop.org
> http://lists.freedesktop.org/mailman/listinfo/mesa-dev
>
___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
http://lists.freedesktop.org/mailman/listinfo/mesa-dev


[Mesa-dev] [Bug 81500] wrong color in flightgear for the c172p if "Atmospheric light scattering" is used

2014-07-18 Thread bugzilla-daemon
https://bugs.freedesktop.org/show_bug.cgi?id=81500

--- Comment #6 from Barto  ---
I have started a bisect,

I use these settings :

git bisect start
git bisect bad a06c9791d1b7fcedfb56ecbdc601d42fab196916 
git bisect good 5f41cae633af3603ab369c139bfe2de6bbcc6369

but when git wants me to test this commit :

Bisecting: 19 revisions left to test after this (roughly 4 steps)
[0939d3d0974a579fa65b76ebc6074d61e11f03b0] sso: Add display list support for
ARB_separate_shader_objects new functions

I get an error during the compilation, it's impossible to build the commit
"0939d3d0974a579fa65b76ebc6074d61e11f03b0" :


drivers/common/meta.c: In function '_mesa_meta_begin':
drivers/common/meta.c:1202:1: error: invalid storage class for function
'invert_z'
 invert_z(GLfloat normZ)

I don't know how to fix this error, is it normal that commit
""0939d3d0974a579fa65b76ebc6074d61e11f03b0" fails to compile ?

http://cgit.freedesktop.org/mesa/mesa/commit/?id=0939d3d0974a579fa65b76ebc6074d61e11f03b0

-- 
You are receiving this mail because:
You are the assignee for the bug.
___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
http://lists.freedesktop.org/mailman/listinfo/mesa-dev


[Mesa-dev] [Bug 81500] wrong color in flightgear for the c172p if "Atmospheric light scattering" is used

2014-07-18 Thread bugzilla-daemon
https://bugs.freedesktop.org/show_bug.cgi?id=81500

--- Comment #7 from Ilia Mirkin  ---
(In reply to comment #6)
> I get an error during the compilation, it's impossible to build the commit
> "0939d3d0974a579fa65b76ebc6074d61e11f03b0" :
> 
> 
> drivers/common/meta.c: In function '_mesa_meta_begin':
> drivers/common/meta.c:1202:1: error: invalid storage class for function
> 'invert_z'
>  invert_z(GLfloat normZ)
> 
> I don't know how to fix this error, is it normal that commit
> ""0939d3d0974a579fa65b76ebc6074d61e11f03b0" fails to compile ?

git bisect skip

Note that bugs are added and fixed all the time too, so if you're unsure
whether a commit is good or bad (e.g. the whole screen is black, or the plane
is missing, or whatever), just skip it and it'll build something else. Huge
regressions/build failures tend not to last too long in the tree, so it
shouldn't be too bad.

-- 
You are receiving this mail because:
You are the assignee for the bug.
___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
http://lists.freedesktop.org/mailman/listinfo/mesa-dev


Re: [Mesa-dev] [PATCH 4/5] r600g, radeonsi: Use write-combined persistent GTT mappings

2014-07-18 Thread Marek Olšák
GL_MAP_READ_BIT is always unconditionally set for glBufferData, which
is the most-used function. However, st/mesa can look at the
immutability flag to distinguish between BufferData and BufferStorage
before pipe_buffer_create is called, and set the read flag if the
caller is BufferStorage and GL_MAP_READ_BIT is flagged, and not set
any flag otherwise.

Marek

On Fri, Jul 18, 2014 at 9:50 PM, Grigori Goronzy  wrote:
> On 18.07.2014 13:45, Marek Olšák wrote:
>> If the requirements of GL_MAP_COHERENT_BIT are satisfied, then the
>> patch is okay.
>>
>
> Apart from correctness, I still wonder how this will affect performance,
> most notably CPU reads. This change unconditionally uses write-combined,
> uncached memory for MAP_COHERENT buffers. Unless I am missing something,
> CPU reads will be slow, even if the buffer storage flags indicate that
> the buffer will be read by the CPU. Maybe it's a good idea to avoid
> write combined memory if the buffer storage flags include MAP_READ_BIT?
>
> Grigori
>
>> Marek
>>
>>
>> On Fri, Jul 18, 2014 at 5:19 AM, Michel Dänzer  wrote:
>>> On 17.07.2014 21:00, Marek Olšák wrote:
 On Thu, Jul 17, 2014 at 12:01 PM, Michel Dänzer  wrote:
> From: Michel Dänzer 
>
> This is hopefully safe: The kernel makes sure writes to these mappings
> finish before the GPU might start reading from them, and the GPU caches
> are invalidated at the start of a command stream.
>
 The resource flags actually tell you what you can do. If the COHERENT
 flag is set, the mapping must be cached.
>>>
>>> Why is that required? As I explain above, we should satisfy the
>>> requirements of the ARB_buffer_storage extension AFAICT.
>>>
>>>
>>> As pointed out by you and Grigori in other posts, I should probably just
>>> drop the special treatment of persistent mappings though, so the
>>> placement and flags are derived from the buffer usage.
>>>
>>>
>>> --
>>> Earthling Michel Dänzer|  http://www.amd.com
>>> Libre software enthusiast  |Mesa and X developer
>> ___
>> mesa-dev mailing list
>> mesa-dev@lists.freedesktop.org
>> http://lists.freedesktop.org/mailman/listinfo/mesa-dev
>>
>
>
___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
http://lists.freedesktop.org/mailman/listinfo/mesa-dev


[Mesa-dev] [PATCH] util: Add util_memcpy_cpu_to_le32() v3

2014-07-18 Thread Tom Stellard
v2:
  - Preserve word boundaries.

v3:
  - Use const and restrict.
  - Fix indentation.
---
 src/gallium/auxiliary/util/u_math.h | 17 +
 1 file changed, 17 insertions(+)

diff --git a/src/gallium/auxiliary/util/u_math.h 
b/src/gallium/auxiliary/util/u_math.h
index b9ed197..f6dcb22 100644
--- a/src/gallium/auxiliary/util/u_math.h
+++ b/src/gallium/auxiliary/util/u_math.h
@@ -812,6 +812,23 @@ util_bswap16(uint16_t n)
   (n << 8);
 }
 
+static INLINE void*
+util_memcpy_cpu_to_le32(void * restrict dest, const void * restrict src, 
size_t n)
+{
+#ifdef PIPE_ARCH_BIG_ENDIAN
+   size_t i, e;
+   asset(n % 4 == 0);
+
+   for (i = 0, e = n / 4; i < e; i++) {
+  uint32_t * restrict d = (uint32_t* restrict)dest;
+  const uint32_t * restrict s = (const uint32_t* restrict)src;
+  d[i] = util_bswap32(s[i]);
+   }
+   return dest;
+#else
+   return memcpy(dest, src, n);
+#endif
+}
 
 /**
  * Clamp X to [MIN, MAX].
-- 
1.8.1.5

___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
http://lists.freedesktop.org/mailman/listinfo/mesa-dev


Re: [Mesa-dev] [PATCH 4/5] r600g, radeonsi: Use write-combined persistent GTT mappings

2014-07-18 Thread Marek Olšák
On Fri, Jul 18, 2014 at 5:19 AM, Michel Dänzer  wrote:
> On 17.07.2014 21:00, Marek Olšák wrote:
>> On Thu, Jul 17, 2014 at 12:01 PM, Michel Dänzer  wrote:
>>> From: Michel Dänzer 
>>>
>>> This is hopefully safe: The kernel makes sure writes to these mappings
>>> finish before the GPU might start reading from them, and the GPU caches
>>> are invalidated at the start of a command stream.
>>>
>> The resource flags actually tell you what you can do. If the COHERENT
>> flag is set, the mapping must be cached.
>
> Why is that required? As I explain above, we should satisfy the
> requirements of the ARB_buffer_storage extension AFAICT.
>
>
> As pointed out by you and Grigori in other posts, I should probably just
> drop the special treatment of persistent mappings though, so the
> placement and flags are derived from the buffer usage.

Yes, please drop the special treatment of persistent mappings. Thank you.

Marek
___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
http://lists.freedesktop.org/mailman/listinfo/mesa-dev


[Mesa-dev] [PATCH] radeonsi/compute: Add support scratch buffer support v2

2014-07-18 Thread Tom Stellard
The scratch buffer will be used for private memory and also register
spilling.

v2:
  - Code cleanups
---

I had some uncommitted changes left in my tree when I generated v1 of this 
patch.

 src/gallium/drivers/radeonsi/si_compute.c | 80 ++-
 src/gallium/drivers/radeonsi/si_shader.c  |  5 ++
 src/gallium/drivers/radeonsi/si_shader.h  |  2 +
 3 files changed, 85 insertions(+), 2 deletions(-)

diff --git a/src/gallium/drivers/radeonsi/si_compute.c 
b/src/gallium/drivers/radeonsi/si_compute.c
index a7d61e7..42e4fec 100644
--- a/src/gallium/drivers/radeonsi/si_compute.c
+++ b/src/gallium/drivers/radeonsi/si_compute.c
@@ -122,6 +122,43 @@ static void si_set_global_binding(
}
 }
 
+/**
+ * This function computes the value for R_00B860_COMPUTE_TMPRING_SIZE.WAVES
+ * /p block_layout is the number of threads in each work group.
+ * /p grid layout is the number of work groups.
+ */
+static unsigned compute_num_waves_for_scratch(
+   const struct radeon_info *info,
+   const uint *block_layout,
+   const uint *grid_layout)
+{
+   unsigned num_sh = MAX2(info->max_sh_per_se, 1);
+   unsigned num_se = MAX2(info->max_se, 1);
+   unsigned num_blocks = 1;
+   unsigned threads_per_block = 1;
+   unsigned waves_per_block;
+   unsigned waves_per_sh;
+   unsigned waves;
+   unsigned scratch_waves;
+   unsigned i;
+
+   for (i = 0; i < 3; i++) {
+   threads_per_block *= block_layout[i];
+   num_blocks *= grid_layout[i];
+   }
+
+   waves_per_block = align(threads_per_block, 64) / 64;
+   waves = waves_per_block * num_blocks;
+   waves_per_sh = align(waves, num_sh * num_se) / (num_sh * num_se);
+   scratch_waves = waves_per_sh * num_sh * num_se;
+
+   if (waves_per_block > waves_per_sh) {
+   scratch_waves = waves_per_block * num_sh * num_se;
+   }
+
+   return scratch_waves;
+}
+
 static void si_launch_grid(
struct pipe_context *ctx,
const uint *block_layout, const uint *grid_layout,
@@ -136,11 +173,13 @@ static void si_launch_grid(
uint32_t kernel_args_offset = 0;
uint32_t *kernel_args;
uint64_t kernel_args_va;
+   uint64_t scratch_buffer_va = 0;
uint64_t shader_va;
unsigned arg_user_sgpr_count = NUM_USER_SGPRS;
unsigned i;
struct si_pipe_shader *shader = &program->kernels[pc];
unsigned lds_blocks;
+   unsigned num_waves_for_scratch;
 
pm4->compute_pkt = true;
si_cmd_context_control(pm4);
@@ -158,7 +197,8 @@ static void si_launch_grid(
/* Upload the kernel arguments */
 
/* The extra num_work_size_bytes are for work group / work item size 
information */
-   kernel_args_size = program->input_size + num_work_size_bytes;
+   kernel_args_size = program->input_size + num_work_size_bytes + 8 /* For 
scratch va */;
+
kernel_args = MALLOC(kernel_args_size);
for (i = 0; i < 3; i++) {
kernel_args[i] = grid_layout[i];
@@ -166,8 +206,31 @@ static void si_launch_grid(
kernel_args[i + 6] = block_layout[i];
}
 
+   num_waves_for_scratch = compute_num_waves_for_scratch(
+   &sctx->screen->b.info, block_layout, grid_layout);
+
memcpy(kernel_args + (num_work_size_bytes / 4), input, 
program->input_size);
 
+   if (shader->scratch_bytes_per_wave > 0) {
+   unsigned scratch_bytes = shader->scratch_bytes_per_wave *
+   num_waves_for_scratch;
+
+   COMPUTE_DBG(sctx->screen, "Waves: %u; Scratch per wave: %u 
bytes; "
+   "Total Scratch: %u bytes\n", num_waves_for_scratch,
+   shader->scratch_bytes_per_wave, scratch_bytes);
+   if (!shader->scratch_bo) {
+   shader->scratch_bo = (struct r600_resource*)
+   si_resource_create_custom(sctx->b.b.screen,
+   PIPE_USAGE_DEFAULT, scratch_bytes);
+   }
+   scratch_buffer_va = r600_resource_va(ctx->screen,
+   (struct pipe_resource*)shader->scratch_bo);
+   si_pm4_add_bo(pm4, shader->scratch_bo,
+   RADEON_USAGE_READWRITE,
+   RADEON_PRIO_SHADER_RESOURCE_RW);
+
+   }
+
for (i = 0; i < (kernel_args_size / 4); i++) {
COMPUTE_DBG(sctx->screen, "input %u : %u\n", i,
kernel_args[i]);
@@ -183,6 +246,10 @@ static void si_launch_grid(
 
si_pm4_set_reg(pm4, R_00B900_COMPUTE_USER_DATA_0, kernel_args_va);
si_pm4_set_reg(pm4, R_00B900_COMPUTE_USER_DATA_0 + 4, 
S_008F04_BASE_ADDRESS_HI (kernel_args_va >> 32) | S_008F04_STRIDE(0));
+   si_pm4_set_reg(pm4, R_00B900_COMPUTE_USER_DATA_0 + 8, 
scratch_buffer_va);
+   si_pm4_set_reg(

Re: [Mesa-dev] [PATCH 04/12] main/formats: Add layout and swizzle information

2014-07-18 Thread Marek Olšák
On Thu, Jul 17, 2014 at 8:04 PM, Jason Ekstrand  wrote:
> Signed-off-by: Jason Ekstrand 
> ---
>  src/mesa/main/format_info.py | 11 +++
>  src/mesa/main/formats.c  | 46 
> 
>  src/mesa/main/formats.h  | 29 
>  3 files changed, 86 insertions(+)
>
> diff --git a/src/mesa/main/format_info.py b/src/mesa/main/format_info.py
> index b8956a5..448bd00 100644
> --- a/src/mesa/main/format_info.py
> +++ b/src/mesa/main/format_info.py
> @@ -96,6 +96,14 @@ def get_gl_data_type(fmat):
> else:
>assert False
>
> +def get_mesa_layout(fmat):
> +   if fmat.layout == 'array':
> +  return 'MESA_FORMAT_LAYOUT_ARRAY'
> +   elif fmat.layout == 'packed':
> +  return 'MESA_FORMAT_LAYOUT_PACKED'
> +   else:
> +  return 'MESA_FORMAT_LAYOUT_OTHER'
> +
>  def get_channel_bits(fmat, chan_name):
> if fmat.is_compressed():
># These values are pretty-much bogus, but OpenGL requires that we
> @@ -166,6 +174,7 @@ for fmat in formats:
> print '   {'
> print '  {0},'.format(fmat.name)
> print '  "{0}",'.format(fmat.name)
> +   print '  {0},'.format(get_mesa_layout(fmat))
> print '  {0},'.format(get_gl_base_format(fmat))
> print '  {0},'.format(get_gl_data_type(fmat))
>
> @@ -176,6 +185,8 @@ for fmat in formats:
>
> print '  {0}, {1}, {2},'.format(fmat.block_width, fmat.block_height,
> int(fmat.block_size() / 8))
> +
> +   print '  {{ {0} }},'.format(', '.join(map(str, fmat.swizzle)))
> print '   },'
>
>  print '};'
> diff --git a/src/mesa/main/formats.c b/src/mesa/main/formats.c
> index 39cc5f1..f03425e 100644
> --- a/src/mesa/main/formats.c
> +++ b/src/mesa/main/formats.c
> @@ -40,6 +40,8 @@ struct gl_format_info
> /** text name for debugging */
> const char *StrName;
>
> +   enum mesa_format_layout Layout;
> +
> /**
>  * Base format is one of GL_RED, GL_RG, GL_RGB, GL_RGBA, GL_ALPHA,
>  * GL_LUMINANCE, GL_LUMINANCE_ALPHA, GL_INTENSITY, GL_YCBCR_MESA,
> @@ -67,6 +69,8 @@ struct gl_format_info
>  */
> GLubyte BlockWidth, BlockHeight;
> GLubyte BytesPerBlock;
> +
> +   uint8_t Swizzle[4];
>  };
>
>  #include "format_info.c"
> @@ -178,6 +182,21 @@ _mesa_get_format_max_bits(mesa_format format)
>
>
>  /**
> + * Return the layout type of the given format.
> + * The return value will be one of:
> + *MESA_FORMAT_LAYOUT_ARRAY
> + *MESA_FORMAT_LAYOUT_PACKED
> + *MESA_FORMAT_LAYOUT_OTHER
> + */
> +extern enum mesa_format_layout
> +_mesa_get_format_layout(mesa_format format)
> +{
> +   const struct gl_format_info *info = _mesa_get_format_info(format);
> +   return info->Layout;
> +}
> +
> +
> +/**
>   * Return the data type (or more specifically, the data representation)
>   * for the given format.
>   * The return value will be one of:
> @@ -224,6 +243,33 @@ _mesa_get_format_block_size(mesa_format format, GLuint 
> *bw, GLuint *bh)
>  }
>
>
> +/**
> + * Returns the an array of four numbers representing the transformation
> + * from the RGBA or SZ colorspace to the given format.  For array formats,
> + * the i'th RGBA component is given by:
> + *
> + * if (swizzle[i] <= MESA_FORMAT_SWIZZLE_W)
> + *comp = data[swizzle[i]];
> + * else if (swizzle[i] == MESA_FORMAT_SWIZZLE_ZERO)
> + *comp = 0;
> + * else if (swizzle[i] == MESA_FORMAT_SWIZZLE_ONE)
> + *comp = 1;
> + * else if (swizzle[i] == MESA_FORMAT_SWIZZLE_NONE)
> + *// data does not contain a channel of this format
> + *
> + * For packed formats, the swizzle gives the number of components left of
> + * the least significant bit.
> + *
> + * Compressed formats have no swizzle.
> + */
> +void
> +_mesa_get_format_swizzle(mesa_format format, uint8_t swizzle_out[4])
> +{
> +   const struct gl_format_info *info = _mesa_get_format_info(format);
> +   memcpy(swizzle_out, info->Swizzle, sizeof(info->Swizzle));
> +}
> +
> +
>  /** Is the given format a compressed format? */
>  GLboolean
>  _mesa_is_format_compressed(mesa_format format)
> diff --git a/src/mesa/main/formats.h b/src/mesa/main/formats.h
> index dc50bc8..48aad44 100644
> --- a/src/mesa/main/formats.h
> +++ b/src/mesa/main/formats.h
> @@ -56,6 +56,15 @@ extern "C" {
>   */
>  #define MAX_PIXEL_BYTES 16
>
> +/**
> + * Specifies the layout of a pixel format.  See the MESA_FORMAT
> + * documentation below.
> + */
> +enum mesa_format_layout {
> +   MESA_FORMAT_LAYOUT_ARRAY,
> +   MESA_FORMAT_LAYOUT_PACKED,
> +   MESA_FORMAT_LAYOUT_OTHER,
> +};
>
>  /**
>   * Mesa texture/renderbuffer image formats.
> @@ -419,6 +428,9 @@ _mesa_get_format_bits(mesa_format format, GLenum pname);
>  extern GLuint
>  _mesa_get_format_max_bits(mesa_format format);
>
> +extern enum mesa_format_layout
> +_mesa_get_format_layout(mesa_format format);
> +
>  extern GLenum
>  _mesa_get_format_datatype(mesa_format format);
>
> @@ -428,6 +440,23 @@ _mesa_get_format_base_format(mesa_format format);
>  extern void

[Mesa-dev] [PATCH 01/17] i965: Split gen6 renderbuffer surface state from gen5 and older

2014-07-18 Thread Jordan Justen
We will program the gen6 renderbuffer surface state differently to
enable layered rendering on gen6.

Signed-off-by: Jordan Justen 
Reviewed-by: Topi Pohjolainen 
---
 src/mesa/drivers/dri/i965/Makefile.sources |   1 +
 src/mesa/drivers/dri/i965/brw_context.c|   4 +
 src/mesa/drivers/dri/i965/brw_state.h  |   3 +
 src/mesa/drivers/dri/i965/gen6_surface_state.c | 152 +
 4 files changed, 160 insertions(+)
 create mode 100644 src/mesa/drivers/dri/i965/gen6_surface_state.c

diff --git a/src/mesa/drivers/dri/i965/Makefile.sources 
b/src/mesa/drivers/dri/i965/Makefile.sources
index e235679..43e3378 100644
--- a/src/mesa/drivers/dri/i965/Makefile.sources
+++ b/src/mesa/drivers/dri/i965/Makefile.sources
@@ -130,6 +130,7 @@ i965_FILES = \
gen6_scissor_state.c \
gen6_sf_state.c \
 gen6_sol.c \
+   gen6_surface_state.c \
gen6_urb.c \
gen6_viewport_state.c \
gen6_vs_state.c \
diff --git a/src/mesa/drivers/dri/i965/brw_context.c 
b/src/mesa/drivers/dri/i965/brw_context.c
index c47ad36..dbe68a8 100644
--- a/src/mesa/drivers/dri/i965/brw_context.c
+++ b/src/mesa/drivers/dri/i965/brw_context.c
@@ -645,6 +645,10 @@ brwCreateContext(gl_api api,
   gen7_init_vtable_surface_functions(brw);
   gen7_init_vtable_sampler_functions(brw);
   brw->vtbl.emit_depth_stencil_hiz = gen7_emit_depth_stencil_hiz;
+   } else if (brw->gen >= 6) {
+  gen6_init_vtable_surface_functions(brw);
+  gen4_init_vtable_sampler_functions(brw);
+  brw->vtbl.emit_depth_stencil_hiz = brw_emit_depth_stencil_hiz;
} else {
   gen4_init_vtable_surface_functions(brw);
   gen4_init_vtable_sampler_functions(brw);
diff --git a/src/mesa/drivers/dri/i965/brw_state.h 
b/src/mesa/drivers/dri/i965/brw_state.h
index 6f1db6c..8e176f3 100644
--- a/src/mesa/drivers/dri/i965/brw_state.h
+++ b/src/mesa/drivers/dri/i965/brw_state.h
@@ -262,6 +262,9 @@ calculate_attr_overrides(const struct brw_context *brw,
  uint32_t *flat_enables,
  uint32_t *urb_entry_read_length);
 
+/* gen6_surface_state.c */
+void gen6_init_vtable_surface_functions(struct brw_context *brw);
+
 /* brw_vs_surface_state.c */
 void
 brw_upload_pull_constants(struct brw_context *brw,
diff --git a/src/mesa/drivers/dri/i965/gen6_surface_state.c 
b/src/mesa/drivers/dri/i965/gen6_surface_state.c
new file mode 100644
index 000..9fec372
--- /dev/null
+++ b/src/mesa/drivers/dri/i965/gen6_surface_state.c
@@ -0,0 +1,152 @@
+/*
+ * Copyright (c) 2014 Intel Corporation
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice (including the next
+ * paragraph) shall be included in all copies or substantial portions of the
+ * Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
+ * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
+ * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
+ * IN THE SOFTWARE.
+ */
+
+
+#include "main/context.h"
+#include "main/blend.h"
+#include "main/mtypes.h"
+#include "main/samplerobj.h"
+#include "program/prog_parameter.h"
+
+#include "intel_mipmap_tree.h"
+#include "intel_batchbuffer.h"
+#include "intel_tex.h"
+#include "intel_fbo.h"
+#include "intel_buffer_objects.h"
+
+#include "brw_context.h"
+#include "brw_state.h"
+#include "brw_defines.h"
+#include "brw_wm.h"
+
+/**
+ * Sets up a surface state structure to point at the given region.
+ * While it is only used for the front/back buffer currently, it should be
+ * usable for further buffers when doing ARB_draw_buffer support.
+ */
+static void
+gen6_update_renderbuffer_surface(struct brw_context *brw,
+ struct gl_renderbuffer *rb,
+ bool layered,
+ unsigned int unit)
+{
+   struct gl_context *ctx = &brw->ctx;
+   struct intel_renderbuffer *irb = intel_renderbuffer(rb);
+   struct intel_mipmap_tree *mt = irb->mt;
+   uint32_t *surf;
+   uint32_t tile_x, tile_y;
+   uint32_t format = 0;
+   /* _NEW_BUFFERS */
+   mesa_format rb_format = _mesa_get_render_format(ctx, intel_rb_format(irb));
+   uint32_t surf_index =
+  brw->wm.prog_data->binding_table.render_target_start + unit;
+
+   ass

[Mesa-dev] [PATCH 17/17] i965/gen6: Force non_mip_arrays for separate stencil/hiz

2014-07-18 Thread Jordan Justen
For gen6 we will use non-mipmapped array spacing, but with multiple
mip levels. This is needed because gen6 hiz and separate stencil only
support a single mip-level.

PRM Volume 1, Part 1, 7.18.3.7.2 For separate stencil buffer [DevILK]
to [DevSNB]:
 "The separate stencil buffer does not support mip mapping, thus the
  storage for LODs other than LOD 0 is not needed."

We still allocate storage for the other stencil mip-levels within a
single texture, but each mip-level will use non-mip-array spacing.

PRM Volume 2, Part 1, 7.5.3 Hierarchical Depth Buffer
 "[DevSNB]: The hierarchical depth buffer does not support the LOD
  field, it is assumed by hardware to be zero. A separate
  hierarachical depth buffer is required for each LOD used, and the
  corresponding buffer’s state delivered to hardware each time a new
  depth buffer state with modified LOD is delivered."

We allocate storage for the other hiz mip-levels within a single
texture, but each mip-level will use non-mip-array spacing.

Signed-off-by: Jordan Justen 
---
 src/mesa/drivers/dri/i965/intel_mipmap_tree.c | 12 
 1 file changed, 8 insertions(+), 4 deletions(-)

diff --git a/src/mesa/drivers/dri/i965/intel_mipmap_tree.c 
b/src/mesa/drivers/dri/i965/intel_mipmap_tree.c
index 2a5afab..67cf515 100644
--- a/src/mesa/drivers/dri/i965/intel_mipmap_tree.c
+++ b/src/mesa/drivers/dri/i965/intel_mipmap_tree.c
@@ -331,8 +331,10 @@ intel_miptree_create_layout(struct brw_context *brw,
   }
}
 
-   /* non_mip_arrays is only used for non-IMS MSAA surfaces.  TODO: can we
-* use it elsewhere?
+   /* non_mip_arrays is only used for:
+* - non-IMS MSAA surfaces
+* - gen6 separate stencil
+* - gen6 hiz
 */
switch (mt->msaa_layout) {
case INTEL_MSAA_LAYOUT_NONE:
@@ -358,6 +360,7 @@ intel_miptree_create_layout(struct brw_context *brw,
_mesa_get_format_base_format(format) == GL_DEPTH_STENCIL &&
(brw->must_use_separate_stencil ||
(brw->has_separate_stencil && brw_is_hiz_depth_format(brw, format {
+  bool force_non_mip_arrays = brw->gen == 6;
   mt->stencil_mt = intel_miptree_create(brw,
 mt->target,
 MESA_FORMAT_S_UINT8,
@@ -369,7 +372,7 @@ intel_miptree_create_layout(struct brw_context *brw,
 true,
 num_samples,
 INTEL_MIPTREE_TILING_ANY,
-false);
+force_non_mip_arrays);
   if (!mt->stencil_mt) {
 intel_miptree_release(&mt);
 return NULL;
@@ -1386,6 +1389,7 @@ intel_miptree_alloc_hiz(struct brw_context *brw,
struct intel_mipmap_tree *mt)
 {
assert(mt->hiz_mt == NULL);
+   bool force_non_mip_arrays = brw->gen == 6;
mt->hiz_mt = intel_miptree_create(brw,
  mt->target,
  mt->format,
@@ -1397,7 +1401,7 @@ intel_miptree_alloc_hiz(struct brw_context *brw,
  true,
  mt->num_samples,
  INTEL_MIPTREE_TILING_ANY,
- false);
+ force_non_mip_arrays);
 
if (!mt->hiz_mt)
   return false;
-- 
2.0.1

___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
http://lists.freedesktop.org/mailman/listinfo/mesa-dev


[Mesa-dev] [PATCH 05/17] i965/gen6 depth surface: calculate more specific surface type

2014-07-18 Thread Jordan Justen
(171e633 for gen6)

This will be used in 3DSTATE_DEPTH_BUFFER in a later patch.

Note: Cube maps are treated as 2D arrays with 6 times as
many array elements as the cube map array would have.

Signed-off-by: Jordan Justen 
---
 src/mesa/drivers/dri/i965/gen6_blorp.cpp | 17 ++
 src/mesa/drivers/dri/i965/gen6_depth_state.c | 33 
 2 files changed, 50 insertions(+)

diff --git a/src/mesa/drivers/dri/i965/gen6_blorp.cpp 
b/src/mesa/drivers/dri/i965/gen6_blorp.cpp
index eb865b9..3fc36aa 100644
--- a/src/mesa/drivers/dri/i965/gen6_blorp.cpp
+++ b/src/mesa/drivers/dri/i965/gen6_blorp.cpp
@@ -791,6 +791,23 @@ gen6_blorp_emit_depth_stencil_config(struct brw_context 
*brw,
uint32_t draw_x = params->depth.x_offset;
uint32_t draw_y = params->depth.y_offset;
uint32_t tile_mask_x, tile_mask_y;
+   uint32_t surftype;
+   GLenum gl_target = params->depth.mt->target;
+
+   switch (gl_target) {
+   case GL_TEXTURE_CUBE_MAP_ARRAY:
+   case GL_TEXTURE_CUBE_MAP:
+  /* The PRM claims that we should use BRW_SURFACE_CUBE for this
+   * situation, but experiments show that gl_Layer doesn't work when we do
+   * this.  So we use BRW_SURFACE_2D, since for rendering purposes this is
+   * equivalent.
+   */
+  surftype = BRW_SURFACE_2D;
+  break;
+   default:
+  surftype = translate_tex_target(gl_target);
+  break;
+   }
 
brw_get_depthstencil_tile_masks(params->depth.mt,
params->depth.level,
diff --git a/src/mesa/drivers/dri/i965/gen6_depth_state.c 
b/src/mesa/drivers/dri/i965/gen6_depth_state.c
index d37aae8..0480142 100644
--- a/src/mesa/drivers/dri/i965/gen6_depth_state.c
+++ b/src/mesa/drivers/dri/i965/gen6_depth_state.c
@@ -30,6 +30,7 @@
 #include "brw_state.h"
 #include "brw_defines.h"
 
+#include "main/mtypes.h"
 #include "main/fbobject.h"
 #include "main/glformats.h"
 
@@ -43,6 +44,13 @@ gen6_emit_depth_stencil_hiz(struct brw_context *brw,
 uint32_t width, uint32_t height,
 uint32_t tile_x, uint32_t tile_y)
 {
+   struct gl_context *ctx = &brw->ctx;
+   struct gl_framebuffer *fb = ctx->DrawBuffer;
+   uint32_t surftype;
+   GLenum gl_target = GL_TEXTURE_2D;
+   const struct intel_renderbuffer *irb = NULL;
+   const struct gl_renderbuffer *rb = NULL;
+
/* Enable the hiz bit if we're doing separate stencil, because it and the
 * separate stencil bit must have the same value. From Section 2.11.5.6.1.1
 * 3DSTATE_DEPTH_BUFFER, Bit 1.21 "Separate Stencil Enable":
@@ -63,6 +71,31 @@ gen6_emit_depth_stencil_hiz(struct brw_context *brw,
   intel_emit_depth_stall_flushes(brw);
}
 
+   irb = intel_get_renderbuffer(fb, BUFFER_DEPTH);
+   if (!irb)
+  irb = intel_get_renderbuffer(fb, BUFFER_STENCIL);
+   rb = (struct gl_renderbuffer*) irb;
+
+   if (rb) {
+  if (rb->TexImage)
+ gl_target = rb->TexImage->TexObject->Target;
+   }
+
+   switch (gl_target) {
+   case GL_TEXTURE_CUBE_MAP_ARRAY:
+   case GL_TEXTURE_CUBE_MAP:
+  /* The PRM claims that we should use BRW_SURFACE_CUBE for this
+   * situation, but experiments show that gl_Layer doesn't work when we do
+   * this.  So we use BRW_SURFACE_2D, since for rendering purposes this is
+   * equivalent.
+   */
+  surftype = BRW_SURFACE_2D;
+  break;
+   default:
+  surftype = translate_tex_target(gl_target);
+  break;
+   }
+
unsigned int len;
if (brw->gen >= 6)
   len = 7;
-- 
2.0.1

___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
http://lists.freedesktop.org/mailman/listinfo/mesa-dev


[Mesa-dev] [PATCH 15/17] i965/gen6: Force tile alignment for each stencil/hiz LOD

2014-07-18 Thread Jordan Justen
Gen6 doesn't support multiple miplevels for hiz and stencil.

Therefore, we must point to the LOD directly during rendering.

But, we also have removed the tile offsets from normal depth surfaces,
so we need to align each LOD to a tile boundary for hiz and stencil.

Signed-off-by: Jordan Justen 
---
 src/mesa/drivers/dri/i965/brw_tex_layout.c | 39 +++---
 1 file changed, 36 insertions(+), 3 deletions(-)

diff --git a/src/mesa/drivers/dri/i965/brw_tex_layout.c 
b/src/mesa/drivers/dri/i965/brw_tex_layout.c
index 9d248cb..b6fed4d 100644
--- a/src/mesa/drivers/dri/i965/brw_tex_layout.c
+++ b/src/mesa/drivers/dri/i965/brw_tex_layout.c
@@ -35,6 +35,7 @@
 #include "intel_mipmap_tree.h"
 #include "brw_context.h"
 #include "main/macros.h"
+#include "main/glformats.h"
 
 #define FILE_DEBUG_FLAG DEBUG_MIPTREE
 
@@ -318,9 +319,41 @@ void
 brw_miptree_layout(struct brw_context *brw, struct intel_mipmap_tree *mt)
 {
bool multisampled = mt->num_samples > 1;
-   mt->align_w = intel_horizontal_texture_alignment_unit(brw, mt->format);
-   mt->align_h =
-  intel_vertical_texture_alignment_unit(brw, mt->format, multisampled);
+   bool gen6_hiz_or_stencil = false;
+
+   if (brw->gen == 6 && mt->non_mip_arrays) {
+  GLenum base_format = _mesa_get_format_base_format(mt->format);
+  gen6_hiz_or_stencil = _mesa_is_depth_or_stencil_format(base_format);
+   }
+
+   if (gen6_hiz_or_stencil) {
+  /* On gen6, we use non_mip_arrays for stencil/hiz because the hardware
+   * doesn't support multiple mip levels on stencil/hiz.
+   *
+   * PRM Vol 2, Part 1, 7.5.3 Hierarchical Depth Buffer:
+   * "The hierarchical depth buffer does not support the LOD field"
+   *
+   * PRM Vol 2, Part 1, 7.5.4.1 Separate Stencil Buffer:
+   * "The stencil depth buffer does not support the LOD field"
+   */
+  if (mt->format == MESA_FORMAT_S_UINT8) {
+ /* Stencil uses W tiling, so we force W tiling alignment for the
+  * non_mip_arrays based mip-levels.
+  */
+ mt->align_w = 64;
+ mt->align_h = 64;
+  } else {
+ /* Depth uses Y tiling, so we force need Y tiling alignment for the
+  * non_mip_arrays based mip-levels.
+  */
+ mt->align_w = 128 / mt->cpp;
+ mt->align_h = 32;
+  }
+   } else {
+  mt->align_w = intel_horizontal_texture_alignment_unit(brw, mt->format);
+  mt->align_h =
+ intel_vertical_texture_alignment_unit(brw, mt->format, multisampled);
+   }
 
switch (mt->target) {
case GL_TEXTURE_CUBE_MAP:
-- 
2.0.1

___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
http://lists.freedesktop.org/mailman/listinfo/mesa-dev


[Mesa-dev] [PATCH 10/17] i965/gen6 fbo: make unmatched depth/stencil configs return unsupported

2014-07-18 Thread Jordan Justen
(f3c886b for gen6)

Signed-off-by: Jordan Justen 
---
 src/mesa/drivers/dri/i965/intel_fbo.c | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/src/mesa/drivers/dri/i965/intel_fbo.c 
b/src/mesa/drivers/dri/i965/intel_fbo.c
index e43e18b..22f707f 100644
--- a/src/mesa/drivers/dri/i965/intel_fbo.c
+++ b/src/mesa/drivers/dri/i965/intel_fbo.c
@@ -673,8 +673,8 @@ intel_validate_framebuffer(struct gl_context *ctx, struct 
gl_framebuffer *fb)
}
 
if (depth_mt && stencil_mt) {
-  if (brw->gen >= 7) {
- /* For gen >= 7, we are using the lod/minimum-array-element fields
+  if (brw->gen >= 6) {
+ /* For gen >= 6, we are using the lod/minimum-array-element fields
   * and supportting layered rendering. This means that we must restrict
   * the depth & stencil attachments to match in various more retrictive
   * ways. (width, height, depth, LOD and layer)
-- 
2.0.1

___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
http://lists.freedesktop.org/mailman/listinfo/mesa-dev


[Mesa-dev] [PATCH 07/17] i965/gen6 depth surface: calculate LOD being rendered to

2014-07-18 Thread Jordan Justen
(08ef1dd for gen6)

This will be used in 3DSTATE_DEPTH_BUFFER in a later patch.

Signed-off-by: Jordan Justen 
---
 src/mesa/drivers/dri/i965/gen6_blorp.cpp | 3 +++
 src/mesa/drivers/dri/i965/gen6_depth_state.c | 3 +++
 2 files changed, 6 insertions(+)

diff --git a/src/mesa/drivers/dri/i965/gen6_blorp.cpp 
b/src/mesa/drivers/dri/i965/gen6_blorp.cpp
index 20ba31e..131c4aa 100644
--- a/src/mesa/drivers/dri/i965/gen6_blorp.cpp
+++ b/src/mesa/drivers/dri/i965/gen6_blorp.cpp
@@ -794,6 +794,7 @@ gen6_blorp_emit_depth_stencil_config(struct brw_context 
*brw,
uint32_t surftype;
unsigned int depth = MAX2(params->depth.mt->logical_depth0, 1);
GLenum gl_target = params->depth.mt->target;
+   unsigned int lod;
 
switch (gl_target) {
case GL_TEXTURE_CUBE_MAP_ARRAY:
@@ -817,6 +818,8 @@ gen6_blorp_emit_depth_stencil_config(struct brw_context 
*brw,
NULL,
&tile_mask_x, &tile_mask_y);
 
+   lod = params->depth.level - params->depth.mt->first_level;
+
/* 3DSTATE_DEPTH_BUFFER */
{
   uint32_t tile_x = draw_x & tile_mask_x;
diff --git a/src/mesa/drivers/dri/i965/gen6_depth_state.c 
b/src/mesa/drivers/dri/i965/gen6_depth_state.c
index 8bc0073..8ee7c00 100644
--- a/src/mesa/drivers/dri/i965/gen6_depth_state.c
+++ b/src/mesa/drivers/dri/i965/gen6_depth_state.c
@@ -49,6 +49,7 @@ gen6_emit_depth_stencil_hiz(struct brw_context *brw,
uint32_t surftype;
unsigned int depth = 1;
GLenum gl_target = GL_TEXTURE_2D;
+   unsigned int lod;
const struct intel_renderbuffer *irb = NULL;
const struct gl_renderbuffer *rb = NULL;
 
@@ -99,6 +100,8 @@ gen6_emit_depth_stencil_hiz(struct brw_context *brw,
   break;
}
 
+   lod = irb ? irb->mt_level - irb->mt->first_level : 0;
+
unsigned int len;
if (brw->gen >= 6)
   len = 7;
-- 
2.0.1

___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
http://lists.freedesktop.org/mailman/listinfo/mesa-dev


[Mesa-dev] [PATCH 02/17] i965/gen6: add support for layered renderbuffers

2014-07-18 Thread Jordan Justen
Rather than pointing the surface_state directly at a single
sub-image of the texture for rendering, we now point the
surface_state at the top level of the texture, and configure
the surface_state as needed based on this.

v2:
 * Use SET_FIELD as suggested by Topi
 * Simplify min_array_element assignment as suggested by Topi

Signed-off-by: Jordan Justen 
---
 src/mesa/drivers/dri/i965/brw_defines.h|  4 ++
 src/mesa/drivers/dri/i965/gen6_surface_state.c | 76 --
 2 files changed, 40 insertions(+), 40 deletions(-)

diff --git a/src/mesa/drivers/dri/i965/brw_defines.h 
b/src/mesa/drivers/dri/i965/brw_defines.h
index 8b73c5c..fa39e4e 100644
--- a/src/mesa/drivers/dri/i965/brw_defines.h
+++ b/src/mesa/drivers/dri/i965/brw_defines.h
@@ -548,6 +548,10 @@
 /* Surface state DW4 */
 #define BRW_SURFACE_MIN_LOD_SHIFT  28
 #define BRW_SURFACE_MIN_LOD_MASK   INTEL_MASK(31, 28)
+#define BRW_SURFACE_MIN_ARRAY_ELEMENT_SHIFT17
+#define BRW_SURFACE_MIN_ARRAY_ELEMENT_MASK INTEL_MASK(27, 17)
+#define BRW_SURFACE_RENDER_TARGET_VIEW_EXTENT_SHIFT8
+#define BRW_SURFACE_RENDER_TARGET_VIEW_EXTENT_MASK INTEL_MASK(16, 8)
 #define BRW_SURFACE_MULTISAMPLECOUNT_1  (0 << 4)
 #define BRW_SURFACE_MULTISAMPLECOUNT_4  (2 << 4)
 #define GEN7_SURFACE_MULTISAMPLECOUNT_1 (0 << 3)
diff --git a/src/mesa/drivers/dri/i965/gen6_surface_state.c 
b/src/mesa/drivers/dri/i965/gen6_surface_state.c
index 9fec372..6fc8bdf 100644
--- a/src/mesa/drivers/dri/i965/gen6_surface_state.c
+++ b/src/mesa/drivers/dri/i965/gen6_surface_state.c
@@ -26,6 +26,7 @@
 #include "main/blend.h"
 #include "main/mtypes.h"
 #include "main/samplerobj.h"
+#include "main/texformat.h"
 #include "program/prog_parameter.h"
 
 #include "intel_mipmap_tree.h"
@@ -54,30 +55,17 @@ gen6_update_renderbuffer_surface(struct brw_context *brw,
struct intel_renderbuffer *irb = intel_renderbuffer(rb);
struct intel_mipmap_tree *mt = irb->mt;
uint32_t *surf;
-   uint32_t tile_x, tile_y;
uint32_t format = 0;
/* _NEW_BUFFERS */
mesa_format rb_format = _mesa_get_render_format(ctx, intel_rb_format(irb));
+   uint32_t surftype;
+   int depth = MAX2(rb->Depth, 1);
+   GLenum gl_target = rb->TexImage ?
+ rb->TexImage->TexObject->Target : GL_TEXTURE_2D;
+
uint32_t surf_index =
   brw->wm.prog_data->binding_table.render_target_start + unit;
 
-   assert(!layered);
-
-   if (rb->TexImage && !brw->has_surface_tile_offset) {
-  intel_renderbuffer_get_tile_offsets(irb, &tile_x, &tile_y);
-
-  if (tile_x != 0 || tile_y != 0) {
-/* Original gen4 hardware couldn't draw to a non-tile-aligned
- * destination in a miptree unless you actually setup your renderbuffer
- * as a miptree and used the fragile lod/array_index/etc. controls to
- * select the image.  So, instead, we just make a new single-level
- * miptree and render into that.
- */
-intel_renderbuffer_move_to_temp(brw, irb, false);
-mt = irb->mt;
-  }
-   }
-
intel_miptree_used_for_rendering(irb->mt);
 
surf = brw_state_batch(brw, AUB_TRACE_SURFACE_STATE, 6 * 4, 32,
@@ -89,30 +77,38 @@ gen6_update_renderbuffer_surface(struct brw_context *brw,
 __FUNCTION__, _mesa_get_format_name(rb_format));
}
 
-   surf[0] = (BRW_SURFACE_2D << BRW_SURFACE_TYPE_SHIFT |
- format << BRW_SURFACE_FORMAT_SHIFT);
+   switch (gl_target) {
+   case GL_TEXTURE_CUBE_MAP_ARRAY:
+   case GL_TEXTURE_CUBE_MAP:
+  surftype = BRW_SURFACE_2D;
+  depth *= 6;
+  break;
+   default:
+  surftype = translate_tex_target(gl_target);
+  break;
+   }
+
+   const int min_array_element = layered ? 0 : irb->mt_layer;
+
+   surf[0] = SET_FIELD(surftype, BRW_SURFACE_TYPE) |
+ SET_FIELD(format, BRW_SURFACE_FORMAT);
 
/* reloc */
-   surf[1] = (intel_renderbuffer_get_tile_offsets(irb, &tile_x, &tile_y) +
- mt->bo->offset64);
-
-   surf[2] = ((rb->Width - 1) << BRW_SURFACE_WIDTH_SHIFT |
- (rb->Height - 1) << BRW_SURFACE_HEIGHT_SHIFT);
-
-   surf[3] = (brw_get_surface_tiling_bits(mt->tiling) |
- (mt->pitch - 1) << BRW_SURFACE_PITCH_SHIFT);
-
-   surf[4] = brw_get_surface_num_multisamples(mt->num_samples);
-
-   assert(brw->has_surface_tile_offset || (tile_x == 0 && tile_y == 0));
-   /* Note that the low bits of these fields are missing, so
-* there's the possibility of getting in trouble.
-*/
-   assert(tile_x % 4 == 0);
-   assert(tile_y % 2 == 0);
-   surf[5] = ((tile_x / 4) << BRW_SURFACE_X_OFFSET_SHIFT |
- (tile_y / 2) << BRW_SURFACE_Y_OFFSET_SHIFT |
- (mt->align_h == 4 ? BRW_SURFACE_VERTICAL_ALIGN_ENABLE : 0));
+   surf[1] = mt->bo->offset64;
+
+   surf[2] = SET_FIELD(mt->logical_width0 - 1, BRW_SURFACE_WIDTH) |
+ SET_FIELD(mt->logical_height0 - 1, BRW_SURFACE_HEIGHT) |
+ SET_FIELD(irb->mt_level - irb->mt->first_level, BRW_SURFACE_LOD);
+
+   sur

[Mesa-dev] [PATCH 03/17] i965/gen6: Adjust render height in errata case for MSAA

2014-07-18 Thread Jordan Justen
In the gen6 PRM Volume 1 Part 1: Graphics Core, Section
7.18.3.7.1 (Surface Arrays For all surfaces other than separate
stencil buffer):

 "[DevSNB] Errata: Sampler MSAA Qpitch will be 4 greater than the
  value calculated in the equation above , for every other odd Surface
  Height starting from 1 i.e. 1,5,9,13"

Since this Qpitch errata only impacts the sampler, we have to adjust
the input for the rendering surface to achieve the same qpitch. For
the affected heights, we increment the height by 1 for the rendering
surface.

Signed-off-by: Jordan Justen 
---
 src/mesa/drivers/dri/i965/gen6_surface_state.c | 18 +-
 1 file changed, 17 insertions(+), 1 deletion(-)

diff --git a/src/mesa/drivers/dri/i965/gen6_surface_state.c 
b/src/mesa/drivers/dri/i965/gen6_surface_state.c
index 6fc8bdf..29a4800 100644
--- a/src/mesa/drivers/dri/i965/gen6_surface_state.c
+++ b/src/mesa/drivers/dri/i965/gen6_surface_state.c
@@ -96,8 +96,24 @@ gen6_update_renderbuffer_surface(struct brw_context *brw,
/* reloc */
surf[1] = mt->bo->offset64;
 
+   /* In the gen6 PRM Volume 1 Part 1: Graphics Core, Section 7.18.3.7.1
+* (Surface Arrays For all surfaces other than separate stencil buffer):
+*
+* "[DevSNB] Errata: Sampler MSAA Qpitch will be 4 greater than the value
+*  calculated in the equation above , for every other odd Surface Height
+*  starting from 1 i.e. 1,5,9,13"
+*
+* Since this Qpitch errata only impacts the sampler, we have to adjust the
+* input for the rendering surface to achieve the same qpitch. For the
+* affected heights, we increment the height by 1 for the rendering
+* surface.
+*/
+   int height0 = irb->mt->logical_height0;
+   if (brw->gen == 6 && irb->mt->num_samples > 1 && (height0 % 4) == 1)
+  height0++;
+
surf[2] = SET_FIELD(mt->logical_width0 - 1, BRW_SURFACE_WIDTH) |
- SET_FIELD(mt->logical_height0 - 1, BRW_SURFACE_HEIGHT) |
+ SET_FIELD(height0 - 1, BRW_SURFACE_HEIGHT) |
  SET_FIELD(irb->mt_level - irb->mt->first_level, BRW_SURFACE_LOD);
 
surf[3] = brw_get_surface_tiling_bits(mt->tiling) |
-- 
2.0.1

___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
http://lists.freedesktop.org/mailman/listinfo/mesa-dev


[Mesa-dev] [PATCH 14/17] i965: Support non_mip_arrays for multiple miplevels

2014-07-18 Thread Jordan Justen
Previously array spacing lod0 was only used with a single mip level.
It indicated that no mip level spacing should be used between array
slices.

gen6 separate stencil & hiz only support LOD0, so we need to allocate
the miptree similar to array spacing lod0, except we also need
multiple mip levels.

So, the miptree is allocated with tightly packed array slice spacing,
but we still also pack the miplevels into the region similar to a
normal multi mip level packing.

Signed-off-by: Jordan Justen 
---
 src/mesa/drivers/dri/i965/brw_tex_layout.c | 21 +++--
 1 file changed, 19 insertions(+), 2 deletions(-)

diff --git a/src/mesa/drivers/dri/i965/brw_tex_layout.c 
b/src/mesa/drivers/dri/i965/brw_tex_layout.c
index 9e2720b..9d248cb 100644
--- a/src/mesa/drivers/dri/i965/brw_tex_layout.c
+++ b/src/mesa/drivers/dri/i965/brw_tex_layout.c
@@ -203,6 +203,11 @@ brw_miptree_layout_2d(struct intel_mipmap_tree *mt)
   if (mt->compressed)
 img_height /= mt->align_h;
 
+  if (mt->non_mip_arrays) {
+ /* Compact arrays with separated miplevels */
+img_height *= depth;
+  }
+
   /* Because the images are packed better, the final offset
* might not be the maximal one:
*/
@@ -238,6 +243,7 @@ brw_miptree_layout_texture_array(struct brw_context *brw,
 struct intel_mipmap_tree *mt)
 {
int h0, h1;
+   unsigned height = mt->physical_height0;
 
h0 = ALIGN(mt->physical_height0, mt->align_h);
h1 = ALIGN(minify(mt->physical_height0, 1), mt->align_h);
@@ -251,11 +257,22 @@ brw_miptree_layout_texture_array(struct brw_context *brw,
brw_miptree_layout_2d(mt);
 
for (unsigned level = mt->first_level; level <= mt->last_level; level++) {
+  unsigned img_height;
+  img_height = ALIGN(height, mt->align_h);
+  if (mt->compressed)
+ img_height /= mt->align_h;
+
   for (int q = 0; q < mt->physical_depth0; q++) {
-intel_miptree_set_image_offset(mt, level, q, 0, q * physical_qpitch);
+ if (mt->non_mip_arrays) {
+intel_miptree_set_image_offset(mt, level, q, 0, q * img_height);
+ } else {
+intel_miptree_set_image_offset(mt, level, q, 0, q * 
physical_qpitch);
+ }
   }
+  height = minify(height, 1);
}
-   mt->total_height = physical_qpitch * mt->physical_depth0;
+   if (!mt->non_mip_arrays)
+  mt->total_height = physical_qpitch * mt->physical_depth0;
 
align_cube(mt);
 }
-- 
2.0.1

___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
http://lists.freedesktop.org/mailman/listinfo/mesa-dev


[Mesa-dev] [PATCH 08/17] i965/gen6 depth surface: calculate minimum array element being rendered

2014-07-18 Thread Jordan Justen
(a23cfb8 for gen6)

In layered rendering this will be 0. Otherwise it will be the
selected slice.

Signed-off-by: Jordan Justen 
---
 src/mesa/drivers/dri/i965/gen6_blorp.cpp |  3 +++
 src/mesa/drivers/dri/i965/gen6_depth_state.c | 10 ++
 2 files changed, 13 insertions(+)

diff --git a/src/mesa/drivers/dri/i965/gen6_blorp.cpp 
b/src/mesa/drivers/dri/i965/gen6_blorp.cpp
index 131c4aa..ff1732d 100644
--- a/src/mesa/drivers/dri/i965/gen6_blorp.cpp
+++ b/src/mesa/drivers/dri/i965/gen6_blorp.cpp
@@ -793,6 +793,7 @@ gen6_blorp_emit_depth_stencil_config(struct brw_context 
*brw,
uint32_t tile_mask_x, tile_mask_y;
uint32_t surftype;
unsigned int depth = MAX2(params->depth.mt->logical_depth0, 1);
+   unsigned int min_array_element;
GLenum gl_target = params->depth.mt->target;
unsigned int lod;
 
@@ -818,6 +819,8 @@ gen6_blorp_emit_depth_stencil_config(struct brw_context 
*brw,
NULL,
&tile_mask_x, &tile_mask_y);
 
+   min_array_element = params->depth.layer;
+
lod = params->depth.level - params->depth.mt->first_level;
 
/* 3DSTATE_DEPTH_BUFFER */
diff --git a/src/mesa/drivers/dri/i965/gen6_depth_state.c 
b/src/mesa/drivers/dri/i965/gen6_depth_state.c
index 8ee7c00..abb2124 100644
--- a/src/mesa/drivers/dri/i965/gen6_depth_state.c
+++ b/src/mesa/drivers/dri/i965/gen6_depth_state.c
@@ -48,6 +48,7 @@ gen6_emit_depth_stencil_hiz(struct brw_context *brw,
struct gl_framebuffer *fb = ctx->DrawBuffer;
uint32_t surftype;
unsigned int depth = 1;
+   unsigned int min_array_element;
GLenum gl_target = GL_TEXTURE_2D;
unsigned int lod;
const struct intel_renderbuffer *irb = NULL;
@@ -100,6 +101,15 @@ gen6_emit_depth_stencil_hiz(struct brw_context *brw,
   break;
}
 
+   if (fb->MaxNumLayers > 0 || !irb) {
+  min_array_element = 0;
+   } else if (irb->mt->num_samples > 1) {
+  /* Convert physical layer to logical layer. */
+  min_array_element = irb->mt_layer / irb->mt->num_samples;
+   } else {
+  min_array_element = irb->mt_layer;
+   }
+
lod = irb ? irb->mt_level - irb->mt->first_level : 0;
 
unsigned int len;
-- 
2.0.1

___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
http://lists.freedesktop.org/mailman/listinfo/mesa-dev


[Mesa-dev] [PATCH 00/17] Gen6 render surface state changes

2014-07-18 Thread Jordan Justen
The goal for this series was to allow layered rendering to work with
gen6. On gen6, it also fixes 10 failing piglit tests, 54 crashing
piglit tests, and a performance regression bug
(https://bugs.freedesktop.org/show_bug.cgi?id=56127).

This series is available on my gen6-layered branch in
git://people.freedesktop.org/~jljusten/mesa

On gm45, the previous version of series had no piglit changes, but I
did not test this version.

Jordan Justen (17):
  i965: Split gen6 renderbuffer surface state from gen5 and older
  i965/gen6: add support for layered renderbuffers
  i965/gen6: Adjust render height in errata case for MSAA
  i965: Split gen6 depth hiz state out from brw
  i965/gen6 depth surface: calculate more specific surface type
  i965/gen6 depth surface: calculate depth (array size) for depth
surface
  i965/gen6 depth surface: calculate LOD being rendered to
  i965/gen6 depth surface: calculate minimum array element being
rendered
  i965/gen6 blorp depth: calculate base surface width/height
  i965/gen6 fbo: make unmatched depth/stencil configs return unsupported
  i965/gen6 depth surface: program 3DSTATE_DEPTH_BUFFER to top of
surface
  i965: Rename array_spacing_lod0 to non_mip_arrays
  i965: Allow forcing non-mipmapped array spacing miptree layout
  i965: Support non_mip_arrays for multiple miplevels
  i965/gen6: Force tile alignment for each stencil/hiz LOD
  i965/gen6: Stencil/hiz needs an offset for LOD > 0
  i965/gen6: Force non_mip_arrays for separate stencil/hiz

 src/mesa/drivers/dri/i965/Makefile.sources|   2 +
 src/mesa/drivers/dri/i965/brw_blorp.cpp   |   2 +-
 src/mesa/drivers/dri/i965/brw_blorp.h |   2 +-
 src/mesa/drivers/dri/i965/brw_context.c   |   4 +
 src/mesa/drivers/dri/i965/brw_context.h   |  10 +
 src/mesa/drivers/dri/i965/brw_defines.h   |   4 +
 src/mesa/drivers/dri/i965/brw_misc_state.c|   4 +-
 src/mesa/drivers/dri/i965/brw_state.h |   3 +
 src/mesa/drivers/dri/i965/brw_tex_layout.c|  62 -
 src/mesa/drivers/dri/i965/gen6_blorp.cpp  | 115 +
 src/mesa/drivers/dri/i965/gen6_depth_state.c  | 273 ++
 src/mesa/drivers/dri/i965/gen6_surface_state.c| 164 +
 src/mesa/drivers/dri/i965/gen7_blorp.cpp  |   2 +-
 src/mesa/drivers/dri/i965/gen7_wm_surface_state.c |   6 +-
 src/mesa/drivers/dri/i965/intel_fbo.c |   7 +-
 src/mesa/drivers/dri/i965/intel_mipmap_tree.c |  43 ++--
 src/mesa/drivers/dri/i965/intel_mipmap_tree.h |   8 +-
 src/mesa/drivers/dri/i965/intel_tex.c |   3 +-
 src/mesa/drivers/dri/i965/intel_tex_image.c   |   3 +-
 src/mesa/drivers/dri/i965/intel_tex_subimage.c|   3 +-
 src/mesa/drivers/dri/i965/intel_tex_validate.c|   3 +-
 21 files changed, 637 insertions(+), 86 deletions(-)
 create mode 100644 src/mesa/drivers/dri/i965/gen6_depth_state.c
 create mode 100644 src/mesa/drivers/dri/i965/gen6_surface_state.c

-- 
2.0.1

___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
http://lists.freedesktop.org/mailman/listinfo/mesa-dev


[Mesa-dev] [PATCH 12/17] i965: Rename array_spacing_lod0 to non_mip_arrays

2014-07-18 Thread Jordan Justen
Generalize the name array_spacing_lod0 to non_mip_arrays. Previously
it was only used in certain cases where only a single mip-level was
used.

For gen6 we will use non-mipmapped array spacing, but with multiple
mip levels. This is needed because gen6 hiz and stencil only support a
single mip-level.

PRM Volume 1, Part 1, 7.18.3.7.2 For separate stencil buffer [DevILK]
to [DevSNB]:
 "The separate stencil buffer does not support mip mapping, thus the
  storage for LODs other than LOD 0 is not needed."

PRM Volume 2, Part 1, 7.5.3 Hierarchical Depth Buffer
 "[DevSNB]: The hierarchical depth buffer does not support the LOD
  field, it is assumed by hardware to be zero. A separate
  hierarachical depth buffer is required for each LOD used, and the
  corresponding buffer’s state delivered to hardware each time a new
  depth buffer state with modified LOD is delivered."

Signed-off-by: Jordan Justen 
---
 src/mesa/drivers/dri/i965/brw_blorp.cpp   | 2 +-
 src/mesa/drivers/dri/i965/brw_blorp.h | 2 +-
 src/mesa/drivers/dri/i965/brw_tex_layout.c| 2 +-
 src/mesa/drivers/dri/i965/gen7_blorp.cpp  | 2 +-
 src/mesa/drivers/dri/i965/gen7_wm_surface_state.c | 6 +++---
 src/mesa/drivers/dri/i965/intel_mipmap_tree.c | 6 +++---
 src/mesa/drivers/dri/i965/intel_mipmap_tree.h | 2 +-
 7 files changed, 11 insertions(+), 11 deletions(-)

diff --git a/src/mesa/drivers/dri/i965/brw_blorp.cpp 
b/src/mesa/drivers/dri/i965/brw_blorp.cpp
index b57721c..c5ed84a 100644
--- a/src/mesa/drivers/dri/i965/brw_blorp.cpp
+++ b/src/mesa/drivers/dri/i965/brw_blorp.cpp
@@ -82,7 +82,7 @@ brw_blorp_surface_info::set(struct brw_context *brw,
 {
brw_blorp_mip_info::set(mt, level, layer);
this->num_samples = mt->num_samples;
-   this->array_spacing_lod0 = mt->array_spacing_lod0;
+   this->non_mip_arrays = mt->non_mip_arrays;
this->map_stencil_as_y_tiled = false;
this->msaa_layout = mt->msaa_layout;
 
diff --git a/src/mesa/drivers/dri/i965/brw_blorp.h 
b/src/mesa/drivers/dri/i965/brw_blorp.h
index 683f09e..0b360c5 100644
--- a/src/mesa/drivers/dri/i965/brw_blorp.h
+++ b/src/mesa/drivers/dri/i965/brw_blorp.h
@@ -153,7 +153,7 @@ public:
/* Setting this flag indicates that the surface should be set up in
 * ARYSPC_LOD0 mode.  Ignored prior to Gen7.
 */
-   bool array_spacing_lod0;
+   bool non_mip_arrays;
 
/**
 * Format that should be used when setting up the surface state for this
diff --git a/src/mesa/drivers/dri/i965/brw_tex_layout.c 
b/src/mesa/drivers/dri/i965/brw_tex_layout.c
index 76044b2..9e2720b 100644
--- a/src/mesa/drivers/dri/i965/brw_tex_layout.c
+++ b/src/mesa/drivers/dri/i965/brw_tex_layout.c
@@ -241,7 +241,7 @@ brw_miptree_layout_texture_array(struct brw_context *brw,
 
h0 = ALIGN(mt->physical_height0, mt->align_h);
h1 = ALIGN(minify(mt->physical_height0, 1), mt->align_h);
-   if (mt->array_spacing_lod0)
+   if (mt->non_mip_arrays)
   mt->qpitch = h0;
else
   mt->qpitch = (h0 + h1 + (brw->gen >= 7 ? 12 : 11) * mt->align_h);
diff --git a/src/mesa/drivers/dri/i965/gen7_blorp.cpp 
b/src/mesa/drivers/dri/i965/gen7_blorp.cpp
index 0ad570b..c33cfeb 100644
--- a/src/mesa/drivers/dri/i965/gen7_blorp.cpp
+++ b/src/mesa/drivers/dri/i965/gen7_blorp.cpp
@@ -169,7 +169,7 @@ gen7_blorp_emit_surface_state(struct brw_context *brw,
if (surface->mt->align_w == 8)
   surf[0] |= GEN7_SURFACE_HALIGN_8;
 
-   if (surface->array_spacing_lod0)
+   if (surface->non_mip_arrays)
   surf[0] |= GEN7_SURFACE_ARYSPC_LOD0;
else
   surf[0] |= GEN7_SURFACE_ARYSPC_FULL;
diff --git a/src/mesa/drivers/dri/i965/gen7_wm_surface_state.c 
b/src/mesa/drivers/dri/i965/gen7_wm_surface_state.c
index 01120af..5d068d4 100644
--- a/src/mesa/drivers/dri/i965/gen7_wm_surface_state.c
+++ b/src/mesa/drivers/dri/i965/gen7_wm_surface_state.c
@@ -315,7 +315,7 @@ gen7_update_texture_surface(struct gl_context *ctx,
uint32_t effective_depth = (tObj->Immutable && tObj->Target != 
GL_TEXTURE_3D)
   ? tObj->NumLayers : mt->logical_depth0;
 
-   if (mt->array_spacing_lod0)
+   if (mt->non_mip_arrays)
   surf[0] |= GEN7_SURFACE_ARYSPC_LOD0;
 
surf[1] = mt->bo->offset64 + mt->offset; /* reloc */
@@ -508,8 +508,8 @@ gen7_update_renderbuffer_surface(struct brw_context *brw,
 
surf[0] = surftype << BRW_SURFACE_TYPE_SHIFT |
  format << BRW_SURFACE_FORMAT_SHIFT |
- (irb->mt->array_spacing_lod0 ? GEN7_SURFACE_ARYSPC_LOD0
-  : GEN7_SURFACE_ARYSPC_FULL) |
+ (irb->mt->non_mip_arrays ? GEN7_SURFACE_ARYSPC_LOD0
+  : GEN7_SURFACE_ARYSPC_FULL) |
  gen7_surface_tiling_mode(mt->tiling);
 
if (irb->mt->align_h == 4)
diff --git a/src/mesa/drivers/dri/i965/intel_mipmap_tree.c 
b/src/mesa/drivers/dri/i965/intel_mipmap_tree.c
index b36ffc7..cc3b705 100644
--- a/src/mesa/drivers/dri/i965/intel_mipmap_tree.c
+++ b/src/mesa/drivers/dri/i96

[Mesa-dev] [PATCH 16/17] i965/gen6: Stencil/hiz needs an offset for LOD > 0

2014-07-18 Thread Jordan Justen
Since gen6 separate stencil & hiz only supports LOD0, we need to
program an offset to the LOD when emitting the separate stencil/hiz.

Signed-off-by: Jordan Justen 
---
 src/mesa/drivers/dri/i965/gen6_blorp.cpp | 10 +++-
 src/mesa/drivers/dri/i965/gen6_depth_state.c | 34 ++--
 2 files changed, 41 insertions(+), 3 deletions(-)

diff --git a/src/mesa/drivers/dri/i965/gen6_blorp.cpp 
b/src/mesa/drivers/dri/i965/gen6_blorp.cpp
index 5a56442..4dab569 100644
--- a/src/mesa/drivers/dri/i965/gen6_blorp.cpp
+++ b/src/mesa/drivers/dri/i965/gen6_blorp.cpp
@@ -871,13 +871,21 @@ gen6_blorp_emit_depth_stencil_config(struct brw_context 
*brw,
/* 3DSTATE_HIER_DEPTH_BUFFER */
{
   struct intel_mipmap_tree *hiz_mt = params->depth.mt->hiz_mt;
+  uint32_t offset = 0;
+
+  if (hiz_mt->non_mip_arrays) {
+ offset = intel_miptree_get_aligned_offset(hiz_mt,
+   hiz_mt->level[lod].level_x,
+   hiz_mt->level[lod].level_y,
+   false);
+  }
 
   BEGIN_BATCH(3);
   OUT_BATCH((_3DSTATE_HIER_DEPTH_BUFFER << 16) | (3 - 2));
   OUT_BATCH(hiz_mt->pitch - 1);
   OUT_RELOC(hiz_mt->bo,
 I915_GEM_DOMAIN_RENDER, I915_GEM_DOMAIN_RENDER,
-0);
+offset);
   ADVANCE_BATCH();
}
 
diff --git a/src/mesa/drivers/dri/i965/gen6_depth_state.c 
b/src/mesa/drivers/dri/i965/gen6_depth_state.c
index b58f970..fd37594 100644
--- a/src/mesa/drivers/dri/i965/gen6_depth_state.c
+++ b/src/mesa/drivers/dri/i965/gen6_depth_state.c
@@ -183,12 +183,22 @@ gen6_emit_depth_stencil_hiz(struct brw_context *brw,
   /* Emit hiz buffer. */
   if (hiz) {
  struct intel_mipmap_tree *hiz_mt = depth_mt->hiz_mt;
+ uint32_t offset = 0;
+
+ if (hiz_mt->non_mip_arrays) {
+offset = intel_miptree_get_aligned_offset(
+hiz_mt,
+hiz_mt->level[lod].level_x,
+hiz_mt->level[lod].level_y,
+false);
+ }
+
 BEGIN_BATCH(3);
 OUT_BATCH((_3DSTATE_HIER_DEPTH_BUFFER << 16) | (3 - 2));
 OUT_BATCH(hiz_mt->pitch - 1);
 OUT_RELOC(hiz_mt->bo,
   I915_GEM_DOMAIN_RENDER, I915_GEM_DOMAIN_RENDER,
-  0);
+  offset);
 ADVANCE_BATCH();
   } else {
 BEGIN_BATCH(3);
@@ -200,6 +210,26 @@ gen6_emit_depth_stencil_hiz(struct brw_context *brw,
 
   /* Emit stencil buffer. */
   if (separate_stencil) {
+ uint32_t offset = 0;
+
+ if (stencil_mt->non_mip_arrays) {
+if (stencil_mt->format == MESA_FORMAT_S_UINT8) {
+   /* Note: we can't compute the stencil offset using
+* intel_region_get_aligned_offset(), because stencil_region
+* claims that the region is untiled even though it's W tiled.
+*/
+   offset =
+  stencil_mt->level[lod].level_y * stencil_mt->pitch +
+  stencil_mt->level[lod].level_x * 64;
+} else {
+   offset = intel_miptree_get_aligned_offset(
+   stencil_mt,
+   stencil_mt->level[lod].level_x,
+   stencil_mt->level[lod].level_y,
+   false);
+}
+ }
+
 BEGIN_BATCH(3);
 OUT_BATCH((_3DSTATE_STENCIL_BUFFER << 16) | (3 - 2));
  /* The stencil buffer has quirky pitch requirements.  From Vol 2a,
@@ -210,7 +240,7 @@ gen6_emit_depth_stencil_hiz(struct brw_context *brw,
 OUT_BATCH(2 * stencil_mt->pitch - 1);
 OUT_RELOC(stencil_mt->bo,
   I915_GEM_DOMAIN_RENDER, I915_GEM_DOMAIN_RENDER,
-  0);
+  offset);
 ADVANCE_BATCH();
   } else {
 BEGIN_BATCH(3);
-- 
2.0.1

___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
http://lists.freedesktop.org/mailman/listinfo/mesa-dev


[Mesa-dev] [PATCH 13/17] i965: Allow forcing non-mipmapped array spacing miptree layout

2014-07-18 Thread Jordan Justen
gen6 does not support multiple miplevels with separate
stencil/hiz. Therefore we need to layout its miptree with no mipmap
spacing between the slices of each miplevel.

Signed-off-by: Jordan Justen 
---
 src/mesa/drivers/dri/i965/intel_fbo.c  |  3 ++-
 src/mesa/drivers/dri/i965/intel_mipmap_tree.c  | 31 +-
 src/mesa/drivers/dri/i965/intel_mipmap_tree.h  |  6 +++--
 src/mesa/drivers/dri/i965/intel_tex.c  |  3 ++-
 src/mesa/drivers/dri/i965/intel_tex_image.c|  3 ++-
 src/mesa/drivers/dri/i965/intel_tex_subimage.c |  3 ++-
 src/mesa/drivers/dri/i965/intel_tex_validate.c |  3 ++-
 7 files changed, 35 insertions(+), 17 deletions(-)

diff --git a/src/mesa/drivers/dri/i965/intel_fbo.c 
b/src/mesa/drivers/dri/i965/intel_fbo.c
index 22f707f..87abbf6 100644
--- a/src/mesa/drivers/dri/i965/intel_fbo.c
+++ b/src/mesa/drivers/dri/i965/intel_fbo.c
@@ -980,7 +980,8 @@ intel_renderbuffer_move_to_temp(struct brw_context *brw,
  width, height, depth,
  true,
  irb->mt->num_samples,
- INTEL_MIPTREE_TILING_ANY);
+ INTEL_MIPTREE_TILING_ANY,
+ false);
 
if (brw_is_hiz_depth_format(brw, new_mt->format)) {
   intel_miptree_alloc_hiz(brw, new_mt);
diff --git a/src/mesa/drivers/dri/i965/intel_mipmap_tree.c 
b/src/mesa/drivers/dri/i965/intel_mipmap_tree.c
index cc3b705..2a5afab 100644
--- a/src/mesa/drivers/dri/i965/intel_mipmap_tree.c
+++ b/src/mesa/drivers/dri/i965/intel_mipmap_tree.c
@@ -232,7 +232,8 @@ intel_miptree_create_layout(struct brw_context *brw,
 GLuint height0,
 GLuint depth0,
 bool for_bo,
-GLuint num_samples)
+GLuint num_samples,
+bool force_non_mip_arrays)
 {
struct intel_mipmap_tree *mt = calloc(sizeof(*mt), 1);
if (!mt)
@@ -367,7 +368,8 @@ intel_miptree_create_layout(struct brw_context *brw,
 mt->logical_depth0,
 true,
 num_samples,
-INTEL_MIPTREE_TILING_ANY);
+INTEL_MIPTREE_TILING_ANY,
+false);
   if (!mt->stencil_mt) {
 intel_miptree_release(&mt);
 return NULL;
@@ -385,6 +387,9 @@ intel_miptree_create_layout(struct brw_context *brw,
   }
}
 
+   if (force_non_mip_arrays)
+  mt->non_mip_arrays = true;
+
brw_miptree_layout(brw, mt);
 
return mt;
@@ -539,7 +544,8 @@ intel_miptree_create(struct brw_context *brw,
 GLuint depth0,
 bool expect_accelerated_upload,
  GLuint num_samples,
- enum intel_miptree_tiling_mode requested_tiling)
+ enum intel_miptree_tiling_mode requested_tiling,
+ bool force_non_mip_arrays)
 {
struct intel_mipmap_tree *mt;
mesa_format tex_format = format;
@@ -553,7 +559,8 @@ intel_miptree_create(struct brw_context *brw,
mt = intel_miptree_create_layout(brw, target, format,
  first_level, last_level, width0,
  height0, depth0,
- false, num_samples);
+false, num_samples,
+force_non_mip_arrays);
/*
 * pitch == 0 || height == 0  indicates the null texture
 */
@@ -664,7 +671,7 @@ intel_miptree_create_for_bo(struct brw_context *brw,
mt = intel_miptree_create_layout(brw, GL_TEXTURE_2D, format,
 0, 0,
 width, height, 1,
-true, 0 /* num_samples */);
+true, 0, false);
if (!mt) {
   free(mt);
   return mt;
@@ -773,7 +780,7 @@ intel_miptree_create_for_renderbuffer(struct brw_context 
*brw,
 
mt = intel_miptree_create(brw, target, format, 0, 0,
 width, height, depth, true, num_samples,
- INTEL_MIPTREE_TILING_ANY);
+ INTEL_MIPTREE_TILING_ANY, false);
if (!mt)
   goto fail;
 
@@ -1274,7 +1281,8 @@ intel_miptree_alloc_mcs(struct brw_context *brw,
  mt->logical_depth0,
  true,
  0 /* num_samples */,
- INTEL_MIPTREE_TILING_Y);
+ INTEL_MIPTREE_TILING_Y,
+ false);
 
/* From the Ivy Bridge PRM, Vol 2 Part 1 p326:
 *
@@

[Mesa-dev] [PATCH 11/17] i965/gen6 depth surface: program 3DSTATE_DEPTH_BUFFER to top of surface

2014-07-18 Thread Jordan Justen
(bf25ee2 for gen6)

Previously we would always find the 2D sub-surface of interest,
and then program the surface to this location. Now we always
program the 3DSTATE_DEPTH_BUFFER at the start of the surface.
To select the lod/slice, we utilize the lod & minimum array
element fields.

We also must disable brw_workaround_depthstencil_alignment for
gen >= 6. Now the hardware will handle alignment when rendering
to additional slices/LODs.

Signed-off-by: Jordan Justen 
Bugzilla: https://bugs.freedesktop.org/show_bug.cgi?id=56127
---
 src/mesa/drivers/dri/i965/brw_misc_state.c   |  4 +-
 src/mesa/drivers/dri/i965/gen6_blorp.cpp | 71 +---
 src/mesa/drivers/dri/i965/gen6_depth_state.c | 32 ++---
 3 files changed, 49 insertions(+), 58 deletions(-)

diff --git a/src/mesa/drivers/dri/i965/brw_misc_state.c 
b/src/mesa/drivers/dri/i965/brw_misc_state.c
index 76e22bd..e3980fc 100644
--- a/src/mesa/drivers/dri/i965/brw_misc_state.c
+++ b/src/mesa/drivers/dri/i965/brw_misc_state.c
@@ -261,10 +261,10 @@ brw_workaround_depthstencil_alignment(struct brw_context 
*brw,
if (stencil_irb)
   brw->depthstencil.stencil_mt = get_stencil_miptree(stencil_irb);
 
-   /* Gen7+ doesn't require the workarounds, since we always program the
+   /* Gen6+ doesn't require the workarounds, since we always program the
 * surface state at the start of the whole surface.
 */
-   if (brw->gen >= 7)
+   if (brw->gen >= 6)
   return;
 
/* Check if depth buffer is in depth/stencil format.  If so, then it's only
diff --git a/src/mesa/drivers/dri/i965/gen6_blorp.cpp 
b/src/mesa/drivers/dri/i965/gen6_blorp.cpp
index dca6bfc..5a56442 100644
--- a/src/mesa/drivers/dri/i965/gen6_blorp.cpp
+++ b/src/mesa/drivers/dri/i965/gen6_blorp.cpp
@@ -787,10 +787,6 @@ static void
 gen6_blorp_emit_depth_stencil_config(struct brw_context *brw,
  const brw_blorp_params *params)
 {
-   struct gl_context *ctx = &brw->ctx;
-   uint32_t draw_x = params->depth.x_offset;
-   uint32_t draw_y = params->depth.y_offset;
-   uint32_t tile_mask_x, tile_mask_y;
uint32_t surfwidth, surfheight;
uint32_t surftype;
unsigned int depth = MAX2(params->depth.mt->logical_depth0, 1);
@@ -814,12 +810,6 @@ gen6_blorp_emit_depth_stencil_config(struct brw_context 
*brw,
   break;
}
 
-   brw_get_depthstencil_tile_masks(params->depth.mt,
-   params->depth.level,
-   params->depth.layer,
-   NULL,
-   &tile_mask_x, &tile_mask_y);
-
min_array_element = params->depth.layer;
 
lod = params->depth.level - params->depth.mt->first_level;
@@ -838,55 +828,42 @@ gen6_blorp_emit_depth_stencil_config(struct brw_context 
*brw,
 
/* 3DSTATE_DEPTH_BUFFER */
{
-  uint32_t tile_x = draw_x & tile_mask_x;
-  uint32_t tile_y = draw_y & tile_mask_y;
-  uint32_t offset =
- intel_miptree_get_aligned_offset(params->depth.mt,
-  draw_x & ~tile_mask_x,
-  draw_y & ~tile_mask_y, false);
-
-  /* According to the Sandy Bridge PRM, volume 2 part 1, pp326-327
-   * (3DSTATE_DEPTH_BUFFER dw5), in the documentation for "Depth
-   * Coordinate Offset X/Y":
-   *
-   *   "The 3 LSBs of both offsets must be zero to ensure correct
-   *   alignment"
-   *
-   * We have no guarantee that tile_x and tile_y are correctly aligned,
-   * since they are determined by the mipmap layout, which is only aligned
-   * to multiples of 4.
-   *
-   * So, to avoid hanging the GPU, just smash the low order 3 bits of
-   * tile_x and tile_y to 0.  This is a temporary workaround until we come
-   * up with a better solution.
-   */
-  WARN_ONCE((tile_x & 7) || (tile_y & 7),
-"Depth/stencil buffer needs alignment to 8-pixel boundaries.\n"
-"Truncating offset, bad rendering may occur.\n");
-  tile_x &= ~7;
-  tile_y &= ~7;
-
   intel_emit_post_sync_nonzero_flush(brw);
   intel_emit_depth_stall_flushes(brw);
 
   BEGIN_BATCH(7);
+  /* 3DSTATE_DEPTH_BUFFER dw0 */
   OUT_BATCH(_3DSTATE_DEPTH_BUFFER << 16 | (7 - 2));
+
+  /* 3DSTATE_DEPTH_BUFFER dw1 */
   OUT_BATCH((params->depth.mt->pitch - 1) |
 params->depth_format << 18 |
 1 << 21 | /* separate stencil enable */
 1 << 22 | /* hiz enable */
 BRW_TILEWALK_YMAJOR << 26 |
 1 << 27 | /* y-tiled */
-BRW_SURFACE_2D << 29);
+surftype << 29);
+
+  /* 3DSTATE_DEPTH_BUFFER dw2 */
   OUT_RELOC(params->depth.mt->bo,
 I915_GEM_DOMAIN_RENDER, I915_GEM_DOMAIN_RENDER,
-offset);
+0);
+
+  /* 3DSTATE_DEPTH_BUFFER dw3 */
   OUT_BATCH(BRW_SURFACE_MIPMAPLAYOUT_BELOW << 1

[Mesa-dev] [PATCH 06/17] i965/gen6 depth surface: calculate depth (array size) for depth surface

2014-07-18 Thread Jordan Justen
(bc1acaa for gen6)

This will be used in 3DSTATE_DEPTH_BUFFER in a later patch.

Note: Cube maps are treated as 2D arrays with 6 times as
many array elements as the cube map array would have.

Signed-off-by: Jordan Justen 
---
 src/mesa/drivers/dri/i965/gen6_blorp.cpp | 2 ++
 src/mesa/drivers/dri/i965/gen6_depth_state.c | 3 +++
 2 files changed, 5 insertions(+)

diff --git a/src/mesa/drivers/dri/i965/gen6_blorp.cpp 
b/src/mesa/drivers/dri/i965/gen6_blorp.cpp
index 3fc36aa..20ba31e 100644
--- a/src/mesa/drivers/dri/i965/gen6_blorp.cpp
+++ b/src/mesa/drivers/dri/i965/gen6_blorp.cpp
@@ -792,6 +792,7 @@ gen6_blorp_emit_depth_stencil_config(struct brw_context 
*brw,
uint32_t draw_y = params->depth.y_offset;
uint32_t tile_mask_x, tile_mask_y;
uint32_t surftype;
+   unsigned int depth = MAX2(params->depth.mt->logical_depth0, 1);
GLenum gl_target = params->depth.mt->target;
 
switch (gl_target) {
@@ -803,6 +804,7 @@ gen6_blorp_emit_depth_stencil_config(struct brw_context 
*brw,
* equivalent.
*/
   surftype = BRW_SURFACE_2D;
+  depth *= 6;
   break;
default:
   surftype = translate_tex_target(gl_target);
diff --git a/src/mesa/drivers/dri/i965/gen6_depth_state.c 
b/src/mesa/drivers/dri/i965/gen6_depth_state.c
index 0480142..8bc0073 100644
--- a/src/mesa/drivers/dri/i965/gen6_depth_state.c
+++ b/src/mesa/drivers/dri/i965/gen6_depth_state.c
@@ -47,6 +47,7 @@ gen6_emit_depth_stencil_hiz(struct brw_context *brw,
struct gl_context *ctx = &brw->ctx;
struct gl_framebuffer *fb = ctx->DrawBuffer;
uint32_t surftype;
+   unsigned int depth = 1;
GLenum gl_target = GL_TEXTURE_2D;
const struct intel_renderbuffer *irb = NULL;
const struct gl_renderbuffer *rb = NULL;
@@ -77,6 +78,7 @@ gen6_emit_depth_stencil_hiz(struct brw_context *brw,
rb = (struct gl_renderbuffer*) irb;
 
if (rb) {
+  depth = MAX2(rb->Depth, 1);
   if (rb->TexImage)
  gl_target = rb->TexImage->TexObject->Target;
}
@@ -90,6 +92,7 @@ gen6_emit_depth_stencil_hiz(struct brw_context *brw,
* equivalent.
*/
   surftype = BRW_SURFACE_2D;
+  depth *= 6;
   break;
default:
   surftype = translate_tex_target(gl_target);
-- 
2.0.1

___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
http://lists.freedesktop.org/mailman/listinfo/mesa-dev


[Mesa-dev] [PATCH 04/17] i965: Split gen6 depth hiz state out from brw

2014-07-18 Thread Jordan Justen
We will program the gen6 hiz depth state differently to enable layered
rendering on gen6.

v2:
 * Remove unneeded gen6_emit_depthbuffer as suggested by Topi

Signed-off-by: Jordan Justen 
---
 src/mesa/drivers/dri/i965/Makefile.sources   |   1 +
 src/mesa/drivers/dri/i965/brw_context.c  |   2 +-
 src/mesa/drivers/dri/i965/brw_context.h  |  10 ++
 src/mesa/drivers/dri/i965/gen6_depth_state.c | 176 +++
 4 files changed, 188 insertions(+), 1 deletion(-)
 create mode 100644 src/mesa/drivers/dri/i965/gen6_depth_state.c

diff --git a/src/mesa/drivers/dri/i965/Makefile.sources 
b/src/mesa/drivers/dri/i965/Makefile.sources
index 43e3378..17256b6 100644
--- a/src/mesa/drivers/dri/i965/Makefile.sources
+++ b/src/mesa/drivers/dri/i965/Makefile.sources
@@ -122,6 +122,7 @@ i965_FILES = \
gen6_blorp.cpp \
gen6_cc.c \
gen6_clip_state.c \
+   gen6_depth_state.c \
gen6_depthstencil.c \
gen6_gs_state.c \
 gen6_multisample_state.c \
diff --git a/src/mesa/drivers/dri/i965/brw_context.c 
b/src/mesa/drivers/dri/i965/brw_context.c
index dbe68a8..c7dfa87 100644
--- a/src/mesa/drivers/dri/i965/brw_context.c
+++ b/src/mesa/drivers/dri/i965/brw_context.c
@@ -648,7 +648,7 @@ brwCreateContext(gl_api api,
} else if (brw->gen >= 6) {
   gen6_init_vtable_surface_functions(brw);
   gen4_init_vtable_sampler_functions(brw);
-  brw->vtbl.emit_depth_stencil_hiz = brw_emit_depth_stencil_hiz;
+  brw->vtbl.emit_depth_stencil_hiz = gen6_emit_depth_stencil_hiz;
} else {
   gen4_init_vtable_surface_functions(brw);
   gen4_init_vtable_sampler_functions(brw);
diff --git a/src/mesa/drivers/dri/i965/brw_context.h 
b/src/mesa/drivers/dri/i965/brw_context.h
index 2943a20..408939c 100644
--- a/src/mesa/drivers/dri/i965/brw_context.h
+++ b/src/mesa/drivers/dri/i965/brw_context.h
@@ -1751,6 +1751,16 @@ brw_emit_depth_stencil_hiz(struct brw_context *brw,
uint32_t tile_x, uint32_t tile_y);
 
 void
+gen6_emit_depth_stencil_hiz(struct brw_context *brw,
+struct intel_mipmap_tree *depth_mt,
+uint32_t depth_offset, uint32_t depthbuffer_format,
+uint32_t depth_surface_type,
+struct intel_mipmap_tree *stencil_mt,
+bool hiz, bool separate_stencil,
+uint32_t width, uint32_t height,
+uint32_t tile_x, uint32_t tile_y);
+
+void
 gen7_emit_depth_stencil_hiz(struct brw_context *brw,
 struct intel_mipmap_tree *depth_mt,
 uint32_t depth_offset, uint32_t depthbuffer_format,
diff --git a/src/mesa/drivers/dri/i965/gen6_depth_state.c 
b/src/mesa/drivers/dri/i965/gen6_depth_state.c
new file mode 100644
index 000..d37aae8
--- /dev/null
+++ b/src/mesa/drivers/dri/i965/gen6_depth_state.c
@@ -0,0 +1,176 @@
+/*
+ * Copyright (c) 2014 Intel Corporation
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice (including the next
+ * paragraph) shall be included in all copies or substantial portions of the
+ * Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
+ * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
+ * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
+ * IN THE SOFTWARE.
+ */
+
+
+#include "intel_batchbuffer.h"
+#include "intel_fbo.h"
+#include "intel_mipmap_tree.h"
+
+#include "brw_context.h"
+#include "brw_state.h"
+#include "brw_defines.h"
+
+#include "main/fbobject.h"
+#include "main/glformats.h"
+
+void
+gen6_emit_depth_stencil_hiz(struct brw_context *brw,
+struct intel_mipmap_tree *depth_mt,
+uint32_t depth_offset, uint32_t depthbuffer_format,
+uint32_t depth_surface_type,
+struct intel_mipmap_tree *stencil_mt,
+bool hiz, bool separate_stencil,
+uint32_t width, uint32_t height,
+uint32_t tile_x, uint32_t tile_y)
+{
+   /* Enable the hiz bit if we're doing separate stencil, because it and the
+* separate

[Mesa-dev] [PATCH 09/17] i965/gen6 blorp depth: calculate base surface width/height

2014-07-18 Thread Jordan Justen
(e3a49e1 for gen6)

This will be used in 3DSTATE_DEPTH_BUFFER in a later patch.

Signed-off-by: Jordan Justen 
---
 src/mesa/drivers/dri/i965/gen6_blorp.cpp | 13 +
 1 file changed, 13 insertions(+)

diff --git a/src/mesa/drivers/dri/i965/gen6_blorp.cpp 
b/src/mesa/drivers/dri/i965/gen6_blorp.cpp
index ff1732d..dca6bfc 100644
--- a/src/mesa/drivers/dri/i965/gen6_blorp.cpp
+++ b/src/mesa/drivers/dri/i965/gen6_blorp.cpp
@@ -791,6 +791,7 @@ gen6_blorp_emit_depth_stencil_config(struct brw_context 
*brw,
uint32_t draw_x = params->depth.x_offset;
uint32_t draw_y = params->depth.y_offset;
uint32_t tile_mask_x, tile_mask_y;
+   uint32_t surfwidth, surfheight;
uint32_t surftype;
unsigned int depth = MAX2(params->depth.mt->logical_depth0, 1);
unsigned int min_array_element;
@@ -823,6 +824,18 @@ gen6_blorp_emit_depth_stencil_config(struct brw_context 
*brw,
 
lod = params->depth.level - params->depth.mt->first_level;
 
+   if (params->hiz_op != GEN6_HIZ_OP_NONE && lod == 0) {
+  /* HIZ ops for lod 0 may set the width & height a little
+   * larger to allow the fast depth clear to fit the hardware
+   * alignment requirements. (8x4)
+   */
+  surfwidth = params->depth.width;
+  surfheight = params->depth.height;
+   } else {
+  surfwidth = params->depth.mt->logical_width0;
+  surfheight = params->depth.mt->logical_height0;
+   }
+
/* 3DSTATE_DEPTH_BUFFER */
{
   uint32_t tile_x = draw_x & tile_mask_x;
-- 
2.0.1

___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
http://lists.freedesktop.org/mailman/listinfo/mesa-dev


Re: [Mesa-dev] [PATCH] glsl: ensure that frexp returns a 0 exponent for zero values

2014-07-18 Thread Matt Turner
On Fri, Jul 18, 2014 at 9:19 AM, Ilia Mirkin  wrote:
> The current code appears to work in simple tests, however this will
> guarantee that the returned exponent is 0 for a 0 value.
>
> Signed-off-by: Ilia Mirkin 
> ---
>
> I couldn't make a simple test-case that would cause the current logic to
> fail. However when I did the same thing with doubles, I ran into trouble. It
> seems safer to move the csel outside of the add in case the value actually has
> a non-0 exponent despite a 0 significand.
>
>  src/glsl/builtin_functions.cpp | 4 ++--
>  1 file changed, 2 insertions(+), 2 deletions(-)
>
> diff --git a/src/glsl/builtin_functions.cpp b/src/glsl/builtin_functions.cpp
> index e01742c..5755de9 100644
> --- a/src/glsl/builtin_functions.cpp
> +++ b/src/glsl/builtin_functions.cpp
> @@ -4229,8 +4229,8 @@ builtin_builder::_frexp(const glsl_type *x_type, const 
> glsl_type *exp_type)
>  * to unsigned integers to ensure that 1 bits aren't shifted in.
>  */
> body.emit(assign(exponent, rshift(bitcast_f2i(abs(x)), exponent_shift)));
> -   body.emit(assign(exponent, add(exponent, csel(is_not_zero, exponent_bias,
> - imm(0, vec_elem);
> +   body.emit(assign(exponent, csel(is_not_zero, add(exponent, exponent_bias),
> +   imm(0, vec_elem;

So you're changing the logic from

exponent = (f2i(abs(x) >> 23) + (x != 0.0f) ? -126 : 0;

to

exponent = (x != 0.0f) ? (f2i(abs(x) >> 23) - 126 : 0;

These seem identical to me, and trivially so for 0.0f/-0.0f. I have a
feeling that this patch is papering over a bug in your code generation
for f2i(abs(x)).

In commit 9c48ae75 I fixed a bug in i965 where instead of generating
f2i(abs(x)) we'd apply the source modifier effectively after the
bitcast (reading the register with a different type), so we'd get
abs(f2i(x)).

For 0.0f, applying the f2i and abs out of order doesn't affect the
result, but for -0.0f (0x8000, -2147483648) instead of getting 0,
you'd get abs(-2147483648) (which is likely -2147483648 itself!).

I don't have any idea about why this might occur for you with doubles
but not floats, but you should have something to look into now.
___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
http://lists.freedesktop.org/mailman/listinfo/mesa-dev


Re: [Mesa-dev] [PATCH 1/2] i965/fs: Fix gl_SampleID for 2x MSAA and SIMD16 mode.

2014-07-18 Thread Matt Turner
On Fri, Jul 18, 2014 at 1:19 PM, Kenneth Graunke  wrote:
> We might be able to do this without an extra program key field, but this
> is non-invasive and fixes the bug, for now.
>
> This fixes the following Piglit tests on Broadwell:
> - ARB_sample_shading/builtin-gl-sample-id 2
> - ARB_sample_shading/builtin-gl-sample-position 2
> - EXT_framebuffer_multisample/multisample-blit 2 color
> - EXT_framebuffer_multisample/multisample-blit 2 color linear
> - EXT_framebuffer_multisample/multisample-blit 2 depth
> - EXT_framebuffer_multisample/no-color 2 depth combined
> - EXT_framebuffer_multisample/no-color 2 depth separate
> - EXT_framebuffer_multisample/no-color 2 depth single
> - EXT_framebuffer_multisample/no-color 2 depth-computed combined
> - EXT_framebuffer_multisample/no-color 2 depth-computed separate
> - EXT_framebuffer_multisample/no-color 2 depth-computed single
> - EXT_framebuffer_multisample/unaligned-blit 2 color msaa
> - EXT_framebuffer_multisample/unaligned-blit 2 depth msaa
>
> Signed-off-by: Kenneth Graunke 
> Bugzilla: https://bugs.freedesktop.org/show_bug.cgi?id=80991
> Cc: "10.2" 
> ---
>  src/mesa/drivers/dri/i965/brw_fs.cpp | 7 ++-
>  src/mesa/drivers/dri/i965/brw_wm.c   | 4 
>  src/mesa/drivers/dri/i965/brw_wm.h   | 1 +
>  3 files changed, 11 insertions(+), 1 deletion(-)
>
> diff --git a/src/mesa/drivers/dri/i965/brw_fs.cpp 
> b/src/mesa/drivers/dri/i965/brw_fs.cpp
> index 1a5d987..a749e89 100644
> --- a/src/mesa/drivers/dri/i965/brw_fs.cpp
> +++ b/src/mesa/drivers/dri/i965/brw_fs.cpp
> @@ -1304,6 +1304,11 @@ fs_visitor::emit_sampleid_setup(ir_variable *ir)
> * populating a temporary variable with the sequence (0, 1, 2, 3),
> * and then reading from it using vstride=1, width=4, hstride=0.
> * These computations hold good for 4x multisampling as well.
> +   *
> +   * For 2x MSAA and SIMD16, we want to use the sequence (0, 1, 0, 1):
> +   * the first four slots are sample 0 of subspan 0; the next four
> +   * are sample 1 of subspan 0; the third group is sample 0 of
> +   * subspan 1, and finally sample 1 of subspan 1.
> */
>fs_inst *inst;
>inst = emit(BRW_OPCODE_AND, t1,
> @@ -1313,7 +1318,7 @@ fs_visitor::emit_sampleid_setup(ir_variable *ir)
>inst = emit(BRW_OPCODE_SHR, t1, t1, fs_reg(5));
>inst->force_writemask_all = true;
>/* This works for both SIMD8 and SIMD16 */
> -  inst = emit(MOV(t2, brw_imm_v(0x3210)));
> +  inst = emit(MOV(t2, brw_imm_v(key->persample_2x ? 0x1010 : 0x3210)));

Is this still safe for SIMD8?

If so,

Reviewed-by: Matt Turner 
___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
http://lists.freedesktop.org/mailman/listinfo/mesa-dev


Re: [Mesa-dev] [PATCH 2/2] i965/fs: Fix gl_SampleMask handling for SIMD16 on Gen8+.

2014-07-18 Thread Matt Turner
Reviewed-by: Matt Turner 
___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
http://lists.freedesktop.org/mailman/listinfo/mesa-dev


Re: [Mesa-dev] [PATCH] glsl: ensure that frexp returns a 0 exponent for zero values

2014-07-18 Thread Ilia Mirkin
On Fri, Jul 18, 2014 at 5:27 PM, Matt Turner  wrote:
> On Fri, Jul 18, 2014 at 9:19 AM, Ilia Mirkin  wrote:
>> The current code appears to work in simple tests, however this will
>> guarantee that the returned exponent is 0 for a 0 value.
>>
>> Signed-off-by: Ilia Mirkin 
>> ---
>>
>> I couldn't make a simple test-case that would cause the current logic to
>> fail. However when I did the same thing with doubles, I ran into trouble. It
>> seems safer to move the csel outside of the add in case the value actually 
>> has
>> a non-0 exponent despite a 0 significand.
>>
>>  src/glsl/builtin_functions.cpp | 4 ++--
>>  1 file changed, 2 insertions(+), 2 deletions(-)
>>
>> diff --git a/src/glsl/builtin_functions.cpp b/src/glsl/builtin_functions.cpp
>> index e01742c..5755de9 100644
>> --- a/src/glsl/builtin_functions.cpp
>> +++ b/src/glsl/builtin_functions.cpp
>> @@ -4229,8 +4229,8 @@ builtin_builder::_frexp(const glsl_type *x_type, const 
>> glsl_type *exp_type)
>>  * to unsigned integers to ensure that 1 bits aren't shifted in.
>>  */
>> body.emit(assign(exponent, rshift(bitcast_f2i(abs(x)), exponent_shift)));
>> -   body.emit(assign(exponent, add(exponent, csel(is_not_zero, exponent_bias,
>> - imm(0, vec_elem);
>> +   body.emit(assign(exponent, csel(is_not_zero, add(exponent, 
>> exponent_bias),
>> +   imm(0, vec_elem;
>
> So you're changing the logic from
>
> exponent = (f2i(abs(x) >> 23) + (x != 0.0f) ? -126 : 0;
>
> to
>
> exponent = (x != 0.0f) ? (f2i(abs(x) >> 23) - 126 : 0;
>
> These seem identical to me, and trivially so for 0.0f/-0.0f. I have a
> feeling that this patch is papering over a bug in your code generation
> for f2i(abs(x)).

Could be, I'll take a closer look at the generated code. Thanks for the hint.

> In commit 9c48ae75 I fixed a bug in i965 where instead of generating
> f2i(abs(x)) we'd apply the source modifier effectively after the
> bitcast (reading the register with a different type), so we'd get
> abs(f2i(x)).
>
> For 0.0f, applying the f2i and abs out of order doesn't affect the
> result, but for -0.0f (0x8000, -2147483648) instead of getting 0,
> you'd get abs(-2147483648) (which is likely -2147483648 itself!).

Couldn't you have a situation where 0.0 or -0.0 are actually not in
normal form, and are, e.g. 0xff80 or something (i.e. non-0
exponent)? I was concerned about that situation. If the floats are
guaranteed to be in normalized form, then you're right that those two
should be identical.

  -ilia
___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
http://lists.freedesktop.org/mailman/listinfo/mesa-dev


Re: [Mesa-dev] [PATCH] glsl: ensure that frexp returns a 0 exponent for zero values

2014-07-18 Thread Matt Turner
On Fri, Jul 18, 2014 at 2:37 PM, Ilia Mirkin  wrote:
> On Fri, Jul 18, 2014 at 5:27 PM, Matt Turner  wrote:
>> For 0.0f, applying the f2i and abs out of order doesn't affect the
>> result, but for -0.0f (0x8000, -2147483648) instead of getting 0,
>> you'd get abs(-2147483648) (which is likely -2147483648 itself!).
>
> Couldn't you have a situation where 0.0 or -0.0 are actually not in
> normal form, and are, e.g. 0xff80 or something (i.e. non-0
> exponent)? I was concerned about that situation.

That /may/ be possible? ARB_shader_precision says

"Any denormalized value input into a shader or potentially generated
by an operation in a shader can be flushed to 0."

On i965, even applying the absolute value source modifier on a
register with a denormal value flushes that source to zero before it's
fed to the instruction. Maybe other hardware doesn't operate like
this?
___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
http://lists.freedesktop.org/mailman/listinfo/mesa-dev


Re: [Mesa-dev] [PATCH 0/5] radeon: Write-combined CPU mappings of BOs in GTT

2014-07-18 Thread Marek Olšák
On Fri, Jul 18, 2014 at 7:47 PM, Marek Olšák  wrote:
> On Fri, Jul 18, 2014 at 5:47 PM, Christian König
>  wrote:
>> Am 18.07.2014 05:07, schrieb Michel Dänzer:
>
> [PATCH 5/5] drm/radeon: Use VRAM for indirect buffers on >= SI

 I'm still not very keen with this change since I still don't understand
 the reason why it's faster than with GTT. Definitely needs more testing
 on a wider range of systems.
>>>
>>> Sure. If anyone wants to give this patch a spin and see if they can
>>> measure any performance difference, good or bad, that would be
>>> interesting.
>>>
 Maybe limit it to APUs for now?
>>>
>>> But IIRC, CPU writes to VRAM vs. write-combined GTT are actually an even
>>> bigger win with dedicated GPUs than with the Kaveri built-in GPU on my
>>> system. I suspect it may depend on the bandwidth available for PCIe vs.
>>> system memory though.
>>
>>
>> I've made a few tests today with the kernel part of the patches running
>> Xonotic on Ultra in 1920 x 1080.
>>
>> Without any patches I get around ~47.0fps on average with my dedicated
>> HD7870.
>>
>> Adding only "drm/radeon: Use write-combined CPU mappings of rings and IBs on
>>>= SI" and that goes down to ~45.3fps.
>>
>> Adding on to off that "drm/radeon: Use VRAM for indirect buffers on >= SI"
>> and the frame rate goes down to ~27.74fps.
>>
>> So enabling this unconditionally is definitely not a good idea. What I don't
>> understand yet is why using USWC reduces the fps on SI as well. It looks
>> like the reads from the IB buffer for command stream validation on SI affect
>> that more than thought.
>
> Yes, there is a CS parser with SI, but shouldn't the parser read from
> the CPU copy that came with the ioctl instead? Anyway, I recommend
> only using VRAM for IBs which are not parsed and patched by the CPU
> (which reduces it down to CIK graphics and DMA IBs, right?)

Oh, sorry. There is no CPU copy, just the IB. My recommendation still stands.

Marek
___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
http://lists.freedesktop.org/mailman/listinfo/mesa-dev


[Mesa-dev] [Bug 81500] wrong color in flightgear for the c172p if "Atmospheric light scattering" is used

2014-07-18 Thread bugzilla-daemon
https://bugs.freedesktop.org/show_bug.cgi?id=81500

--- Comment #8 from Barto  ---
I have just realized that the last git version of mesa solves the bug :

Mesa 10.3.0-devel (git-f14d217)

but I will continue the bisect process in order to find the commit who triggers
the bug in mesa 10.2.x,

it's difficult because there are a lot of commits who fail to compile :

drivers/common/meta.c: In function '_mesa_meta_begin':
drivers/common/meta.c:1202:1: error: invalid storage class for function
'invert_z'
 invert_z(GLfloat normZ)

$ git bisect skip
Bisecting: 5 revisions left to test after this (roughly 3 steps)
[4a868a984d6ae73eb38a69b045b004663cdac20c] mesa/sso: Refactor new function
_mesa_bind_pipeline


here is the bissect log :

git bisect start
# bad: [a06c9791d1b7fcedfb56ecbdc601d42fab196916] docs: Add missing release
notes for ARB_separate_shader_objects
git bisect bad a06c9791d1b7fcedfb56ecbdc601d42fab196916
# good: [5f41cae633af3603ab369c139bfe2de6bbcc6369] docs: Add release notes for
the 10.1.6 release.
git bisect good 5f41cae633af3603ab369c139bfe2de6bbcc6369
# good: [81144c049bc7c12e4edcdf28f91c3c024c6e8b2b] meta: Silence several
'unused parameter' warnings
git bisect good 81144c049bc7c12e4edcdf28f91c3c024c6e8b2b
# good: [2d2e60bd19798c9828b543ffb1aca3270325] gallium/docs: fix
PIPE_CAP_ENDIANNESS delimiter, remove trailing spaces
git bisect good 2d2e60bd19798c9828b543ffb1aca3270325
# good: [741f5d58e649cbc35c0d8661616f4e718b4718f0] radeon: Drop the remaining
driver usage of _ReallyEnabled.
git bisect good 741f5d58e649cbc35c0d8661616f4e718b4718f0
# good: [2a255704564c9ed7cd1a081f7cc3828ee698aa2a] mesa: Implement
glBindImageTextures
git bisect good 2a255704564c9ed7cd1a081f7cc3828ee698aa2a
# bad: [dc675919d30df407269a5e5b3d682e7801db3f1e] mesa: add missing null checks
in _tnl_register_fastpath()
git bisect bad dc675919d30df407269a5e5b3d682e7801db3f1e
# skip: [0939d3d0974a579fa65b76ebc6074d61e11f03b0] sso: Add display list
support for ARB_separate_shader_objects new functions
git bisect skip 0939d3d0974a579fa65b76ebc6074d61e11f03b0
# skip: [5699220cd5719be6fbafdefd75025a817bcb200a] glsl: Exit when the shader
IR contains an interface block instance
git bisect skip 5699220cd5719be6fbafdefd75025a817bcb200a
# bad: [e608449d3e7dc86b90acfb31d9c948c57cf0e920] mesa/sso: Enable
GL_ARB_separate_shader_objects by default
git bisect bad e608449d3e7dc86b90acfb31d9c948c57cf0e920
# skip: [8f5852bd2b91df7b259e5aeafb6a62a4268ca4c4] linker: Refactor code that
builds hash tables of varyings during linking
git bisect skip 8f5852bd2b91df7b259e5aeafb6a62a4268ca4c4
# good: [e05cebafd8ff127ead71fadc20f2e2c8c719481a] clover: Add a stub
implementation of clCreateImage() v3
git bisect good e05cebafd8ff127ead71fadc20f2e2c8c719481a
# skip: [c557eb77225433fa9415a94fc9db3ce36374df64] linker: Allow geometry
shader without vertex shader for separable programs
git bisect skip c557eb77225433fa9415a94fc9db3ce36374df64
# skip: [ba7195d126ce20bf74a27725224662aaca4d90ef] glsl/tests: Add first simple
tests of populate_consumer_input_sets
git bisect skip ba7195d126ce20bf74a27725224662aaca4d90ef
# good: [5998fd536a1bb1d13218c995aa69723c6767cf04] linker: Make
lower_packed_varyings work with explicit locations
git bisect good 5998fd536a1bb1d13218c995aa69723c6767cf04
# skip: [7d73c3e99ec14031e3834096f7e8e257338b64d4] linker: Allow consumer stage
or producer stage to be NULL
git bisect skip 7d73c3e99ec14031e3834096f7e8e257338b64d4
# skip: [7ff937e5793dc8709c916e043b41142033c8e69e] linker: Modify
cross_validate_outputs_to_inputs to match using explicit locations
git bisect skip 7ff937e5793dc8709c916e043b41142033c8e69e
# skip: [fe37cb0ac67071759a88ea767027368399e1fdb6] linker: Refactor code that
gets an input matching an output
git bisect skip fe37cb0ac67071759a88ea767027368399e1fdb6
# skip: [d030a3404ca0fedf365cb0fd41eaad7abc8ff132] linker: Sort shader I/O
variables into a canonical order
git bisect skip d030a3404ca0fedf365cb0fd41eaad7abc8ff132
# skip: [1ff5a2b1ba2148b772f5e5c86d64c3cb18e1ce97] linker: Assign varying
locations for separable programs
git bisect skip 1ff5a2b1ba2148b772f5e5c86d64c3cb18e1ce97
# skip: [ca21cffebd063354291d561eadc2ded8795a5333] meta: Fix saving the program
pipeline state
git bisect skip ca21cffebd063354291d561eadc2ded8795a5333

-- 
You are receiving this mail because:
You are the assignee for the bug.
___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
http://lists.freedesktop.org/mailman/listinfo/mesa-dev


Re: [Mesa-dev] Backends and support for pow-instructions

2014-07-18 Thread Matt Turner
On Wed, Jul 16, 2014 at 4:14 PM, Thomas Helland
 wrote:
> 2014-07-13 20:13 GMT+02:00 Matt Turner :
>>
>> On Sun, Jul 13, 2014 at 10:50 AM, Thomas Helland
>>  wrote:
>> > I've considered writing an algebraic optimization to convert
>> > this into an ir_binop_pow. If my understanding is correct the backend
>> > will then implement this in a similar fashion as above if it does not
>> > have a native pow() instruction.
>> >
>> > If, on the other hand, we have a pow() instruction, my guess is
>> > we'd see reduced instruction-counts.
>> >
>> > Is my understanding correct? Is this something that's worth doing?
>>
>> Yes and yes :)
>>
>> It's something I've thought about doing for a while. The only hang-up
>> is that we don't get nice expression trees to match in opt_algebraic.
>> Ideally, we'd get an ir_instruction with an rvalue that looked like
>>
>> (assign (xyz) (var_ref r3) (expression vec3 log2 (expression vec3 *
>> (expression vec3 exp2 (swiz xyz (var_ref r3))) (constant vec3
>> (2.20 2.20 2.20)
>>
>> and then the bit of code in opt_algebraic is simple. Unfortunately, r3
>> is likely a vec4 and is used repeatedly throughout the shader for many
>> unrelated things. If we were able to split up these variables (i.e.,
>> recognize that the use of r3 for log2/mul/exp2 is a distinct live
>> range from the other uses of r3, and give it a new variable name) then
>> tree grafting would be able to give us the expression tree that we
>> want.
>>
>
> So we would probably be helped with a UD-chain, and a pass to
> make new variables for each of the new definitions?
> As far as I've managed to aclimate to the code-base we
> do not have such a feature yet in the glsl-compiler?

Right. UD chains would probably help a lot in solving this problem.

>> That would let a lot of existing optimization passes perform better as well.
>>
>> Ken and I worked on this kind of pass in the i965 backend [0]. It
>> looked for full register writes outside of control flow, assigned the
>> result to a new register, and rewrote future uses of the old with the
>> new register. Something like that at the GLSL IR level would do the
>> trick. One problem to solve is how to handle partial writes of
>> variables, since in the case you brought up the shader only uses 3
>> components of a vec4, but they're still a distinct live range.
>>
>
> I guess we would need to keep track of the uses and defs for
> each component in the vector, some kind of fancy UD-chain
> that works component-wise, and also globally on the vector.
>
> I accidentally stumbled across some work in Eric's git-repo that
> looks pretty useful as a basis for how to go about this. [1]
> It seems to implement live-variable analysis that are both
> control-flow and swizzle-aware, and works component-wise.
> I have only given it a short glimpse, but seems promising.

I hadn't considered using that code, but yeah, that would probably be
really helpful.
___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
http://lists.freedesktop.org/mailman/listinfo/mesa-dev


Re: [Mesa-dev] Backends and support for pow-instructions

2014-07-18 Thread Marek Olšák
All Gallium drivers must support POW, but some drivers like r300-r500
fragment shaders lower it to LG2+MUL+EX2.

Marek

On Sun, Jul 13, 2014 at 7:50 PM, Thomas Helland
 wrote:
> Hi all,
>
> I've been looking at some shaders from Portal recently.
> A lot of them seem to hand-roll a pow-function with
> log2, multiply, and exp2, like this:
>
> r3.x = log2( r3.x );
> r3.y = log2( r3.y );
> r3.z = log2( r3.z );
> r3.xyz = r3.xyz * vd2.zzz;
> r3.x = exp2( r3.x );
> r3.y = exp2( r3.y );
> r3.z = exp2( r3.z );
>
> The corresponding output from the shader-db run shows:
>
>(assign  (xyz) (var_ref r3)  (expression vec3 log2 (swiz xyz (var_ref r3)
> )) )
> 0x03c8: math log(8) g27<1>.xyzF g27<4,4,1>.xyzzF null
> { align16 WE_normal 1Q compacted };
>(assign  (xz) (var_ref r6)  (swiz yy (array_ref (var_ref vc) (constant
> int (0)) ) ))
> 0x03d0: mov(8)  g25<1>.xzF  g1<0,4,1>.yF
> { align16 WE_normal 1Q };
>(assign  (w) (var_ref r0)  (expression float + (expression float dot
> (swiz xyz (array_ref (var_ref vc) (constant int (28)) ) )(expression vec3
> neg (swiz xyz (var_ref r5) )) ) (expression float neg (swiz z (array_ref
> (var_ref vc) (constant int (30)) ) )) ) )
> 0x03e0: dp3(8)  g105<1>.xF  g5.4<0,4,1>.xyzzF
> -g26<4,4,1>.xyzzF { align16 WE_normal 1Q };
>(assign  (xyz) (var_ref r0)  (expression vec3 + (expression vec3 * (swiz
> xyz (var_ref r0) )(expression vec3 neg (swiz xxx (var_ref
> inversesqrt_retval) )) ) (expression vec3 neg (swiz xyz (array_ref (var_ref
> vc) (constant int (28)) ) )) ) )
> 0x03f0: mad(8)  g30<1>.xyzF g108<4,4,1>F.xyzz
> g30<4,4,1>F.xyzz -g97<4,4,1>F.x { align16 WE_normal 1Q };
>(assign  (xyz) (var_ref r6)  (expression vec3 * (swiz yxx (var_ref r7)
> )(swiz xyz (var_ref r6) )) )
> 0x0400: mul(8)  g25<1>.xyzF g24<4,4,1>.yxxxF
> g25<4,4,1>.xyzzF { align16 WE_normal 1Q };
>(assign  (xyz) (var_ref r3)  (expression vec3 * (swiz xyz (var_ref r3)
> )(constant vec3 (2.20 2.20 2.20)) ) )
> 0x0410: mul(8)  g27<1>.xyzF g27<4,4,1>.xyzzF 2.2F
> { align16 WE_normal 1Q };
>(assign  (w) (var_ref r0)  (expression float + (expression float dot
> (swiz xyz (array_ref (var_ref vc) (constant int (28)) ) )(expression vec3
> neg (swiz xyz (var_ref r5) )) ) (expression float neg (swiz z (array_ref
> (var_ref vc) (constant int (30)) ) )) ) )
> 0x0420: add(8)  g107<1>.xF  g105<4,4,1>.xF  -g6.4<0,4,1>.zF
> { align16 WE_normal 1Q };
>(assign  (xyz) (var_ref r0)  (expression vec3 + (expression vec3 * (swiz
> www (array_ref (var_ref vc) (constant int (27)) ) )(swiz xyz (var_ref r0) ))
> (swiz xyz (var_ref r5) )) )
> 0x0430: mad(8)  g30<1>.xyzF g26<4,4,1>F.xyzz g5.3<0,1,0>F.w
> g30<4,4,1>F.xyzz { align16 WE_normal 1Q };
>(assign  (x) (var_ref dot_retval)  (expression float dot (swiz xyz
> (array_ref (var_ref vc) (constant int (31)) ) )(swiz xyz (var_ref r6) )) )
> 0x0440: dp3(8)  g18<1>.xF   g7<0,4,1>.xyzzF g25<4,4,1>.xyzzF
> { align16 WE_normal 1Q compacted };
>(assign  (xyz) (var_ref r3)  (expression vec3 exp2 (swiz xyz (var_ref r3)
> )) )
> 0x0448: math exp(8) g27<1>.xyzF g27<4,4,1>.xyzzF null
> { align16 WE_normal 1Q compacted };
>
>
> As far as I've been able to understand, the i965 hardware
> has a pow() instruction, how is this with other hardware?
>
> I've considered writing an algebraic optimization to convert
> this into an ir_binop_pow. If my understanding is correct the backend
> will then implement this in a similar fashion as above if it does not
> have a native pow() instruction.
>
> If, on the other hand, we have a pow() instruction, my guess is
> we'd see reduced instruction-counts.
>
> Is my understanding correct? Is this something that's worth doing?
>
> regards,
> Thomas Helland
>
> ___
> mesa-dev mailing list
> mesa-dev@lists.freedesktop.org
> http://lists.freedesktop.org/mailman/listinfo/mesa-dev
>
___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
http://lists.freedesktop.org/mailman/listinfo/mesa-dev


[Mesa-dev] [Bug 81500] wrong color in flightgear for the c172p if "Atmospheric light scattering" is used

2014-07-18 Thread bugzilla-daemon
https://bugs.freedesktop.org/show_bug.cgi?id=81500

--- Comment #9 from Barto  ---
I finished the bisect process,

because of the multiple commands "git bisect skip"  the result of the bisect
process is complicated,

in fact there are 5 commits who can be the guilty :

# only skipped commits left to test
# possible first bad commit: [e608449d3e7dc86b90acfb31d9c948c57cf0e920]
mesa/sso: Enable GL_ARB_separate_shader_objects by default

# possible first bad commit: [0939d3d0974a579fa65b76ebc6074d61e11f03b0] sso:
Add display list support for ARB_separate_shader_objects new functions

# possible first bad commit: [7ff937e5793dc8709c916e043b41142033c8e69e] linker:
Modify cross_validate_outputs_to_inputs to match using explicit locations

# possible first bad commit: [d030a3404ca0fedf365cb0fd41eaad7abc8ff132] linker:
Sort shader I/O variables into a canonical order

# possible first bad commit: [c557eb77225433fa9415a94fc9db3ce36374df64] linker:
Allow geometry shader without vertex shader for separable programs

# possible first bad commit: [1ff5a2b1ba2148b772f5e5c86d64c3cb18e1ce97] linker:
Assign varying locations for separable programs

# possible first bad commit: [7d73c3e99ec14031e3834096f7e8e257338b64d4] linker:
Allow consumer stage or producer stage to be NULL

# possible first bad commit: [fe37cb0ac67071759a88ea767027368399e1fdb6] linker:
Refactor code that gets an input matching an output

# possible first bad commit: [5699220cd5719be6fbafdefd75025a817bcb200a] glsl:
Exit when the shader IR contains an interface block instance

# possible first bad commit: [ba7195d126ce20bf74a27725224662aaca4d90ef]
glsl/tests: Add first simple tests of populate_consumer_input_sets

# possible first bad commit: [8f5852bd2b91df7b259e5aeafb6a62a4268ca4c4] linker:
Refactor code that builds hash tables of varyings during linking

# possible first bad commit: [ca21cffebd063354291d561eadc2ded8795a5333] meta:
Fix saving the program pipeline state

in order to get the true guilty it would be great if I can solve the
compilation error :

drivers/common/meta.c: In function '_mesa_meta_begin':
drivers/common/meta.c:1202:1: error: invalid storage class for function
'invert_z'
 invert_z(GLfloat normZ)

I need a patch for this error in drivers/common/meta.c, in order to find which
is guilty among the 5 commits,

full bisect log will come in attachment

-- 
You are receiving this mail because:
You are the assignee for the bug.
___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
http://lists.freedesktop.org/mailman/listinfo/mesa-dev


Re: [Mesa-dev] Backends and support for pow-instructions

2014-07-18 Thread Marek Olšák
BTW, I have just noticed r600g also lowers POW and there is no mention
of POW in the SI ISA guide either, so I don't think radeons would
benefit from an optimization pass that adds POW instructions.

Marek

On Sat, Jul 19, 2014 at 12:15 AM, Marek Olšák  wrote:
> All Gallium drivers must support POW, but some drivers like r300-r500
> fragment shaders lower it to LG2+MUL+EX2.
>
> Marek
>
> On Sun, Jul 13, 2014 at 7:50 PM, Thomas Helland
>  wrote:
>> Hi all,
>>
>> I've been looking at some shaders from Portal recently.
>> A lot of them seem to hand-roll a pow-function with
>> log2, multiply, and exp2, like this:
>>
>> r3.x = log2( r3.x );
>> r3.y = log2( r3.y );
>> r3.z = log2( r3.z );
>> r3.xyz = r3.xyz * vd2.zzz;
>> r3.x = exp2( r3.x );
>> r3.y = exp2( r3.y );
>> r3.z = exp2( r3.z );
>>
>> The corresponding output from the shader-db run shows:
>>
>>(assign  (xyz) (var_ref r3)  (expression vec3 log2 (swiz xyz (var_ref r3)
>> )) )
>> 0x03c8: math log(8) g27<1>.xyzF g27<4,4,1>.xyzzF null
>> { align16 WE_normal 1Q compacted };
>>(assign  (xz) (var_ref r6)  (swiz yy (array_ref (var_ref vc) (constant
>> int (0)) ) ))
>> 0x03d0: mov(8)  g25<1>.xzF  g1<0,4,1>.yF
>> { align16 WE_normal 1Q };
>>(assign  (w) (var_ref r0)  (expression float + (expression float dot
>> (swiz xyz (array_ref (var_ref vc) (constant int (28)) ) )(expression vec3
>> neg (swiz xyz (var_ref r5) )) ) (expression float neg (swiz z (array_ref
>> (var_ref vc) (constant int (30)) ) )) ) )
>> 0x03e0: dp3(8)  g105<1>.xF  g5.4<0,4,1>.xyzzF
>> -g26<4,4,1>.xyzzF { align16 WE_normal 1Q };
>>(assign  (xyz) (var_ref r0)  (expression vec3 + (expression vec3 * (swiz
>> xyz (var_ref r0) )(expression vec3 neg (swiz xxx (var_ref
>> inversesqrt_retval) )) ) (expression vec3 neg (swiz xyz (array_ref (var_ref
>> vc) (constant int (28)) ) )) ) )
>> 0x03f0: mad(8)  g30<1>.xyzF g108<4,4,1>F.xyzz
>> g30<4,4,1>F.xyzz -g97<4,4,1>F.x { align16 WE_normal 1Q };
>>(assign  (xyz) (var_ref r6)  (expression vec3 * (swiz yxx (var_ref r7)
>> )(swiz xyz (var_ref r6) )) )
>> 0x0400: mul(8)  g25<1>.xyzF g24<4,4,1>.yxxxF
>> g25<4,4,1>.xyzzF { align16 WE_normal 1Q };
>>(assign  (xyz) (var_ref r3)  (expression vec3 * (swiz xyz (var_ref r3)
>> )(constant vec3 (2.20 2.20 2.20)) ) )
>> 0x0410: mul(8)  g27<1>.xyzF g27<4,4,1>.xyzzF 2.2F
>> { align16 WE_normal 1Q };
>>(assign  (w) (var_ref r0)  (expression float + (expression float dot
>> (swiz xyz (array_ref (var_ref vc) (constant int (28)) ) )(expression vec3
>> neg (swiz xyz (var_ref r5) )) ) (expression float neg (swiz z (array_ref
>> (var_ref vc) (constant int (30)) ) )) ) )
>> 0x0420: add(8)  g107<1>.xF  g105<4,4,1>.xF  -g6.4<0,4,1>.zF
>> { align16 WE_normal 1Q };
>>(assign  (xyz) (var_ref r0)  (expression vec3 + (expression vec3 * (swiz
>> www (array_ref (var_ref vc) (constant int (27)) ) )(swiz xyz (var_ref r0) ))
>> (swiz xyz (var_ref r5) )) )
>> 0x0430: mad(8)  g30<1>.xyzF g26<4,4,1>F.xyzz g5.3<0,1,0>F.w
>> g30<4,4,1>F.xyzz { align16 WE_normal 1Q };
>>(assign  (x) (var_ref dot_retval)  (expression float dot (swiz xyz
>> (array_ref (var_ref vc) (constant int (31)) ) )(swiz xyz (var_ref r6) )) )
>> 0x0440: dp3(8)  g18<1>.xF   g7<0,4,1>.xyzzF g25<4,4,1>.xyzzF
>> { align16 WE_normal 1Q compacted };
>>(assign  (xyz) (var_ref r3)  (expression vec3 exp2 (swiz xyz (var_ref r3)
>> )) )
>> 0x0448: math exp(8) g27<1>.xyzF g27<4,4,1>.xyzzF null
>> { align16 WE_normal 1Q compacted };
>>
>>
>> As far as I've been able to understand, the i965 hardware
>> has a pow() instruction, how is this with other hardware?
>>
>> I've considered writing an algebraic optimization to convert
>> this into an ir_binop_pow. If my understanding is correct the backend
>> will then implement this in a similar fashion as above if it does not
>> have a native pow() instruction.
>>
>> If, on the other hand, we have a pow() instruction, my guess is
>> we'd see reduced instruction-counts.
>>
>> Is my understanding correct? Is this something that's worth doing?
>>
>> regards,
>> Thomas Helland
>>
>> ___
>> mesa-dev mailing list
>> mesa-dev@lists.freedesktop.org
>> http://lists.freedesktop.org/mailman/listinfo/mesa-dev
>>
___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
http://lists.freedesktop.org/mailman/listinfo/mesa-dev


[Mesa-dev] [Bug 81500] wrong color in flightgear for the c172p if "Atmospheric light scattering" is used

2014-07-18 Thread bugzilla-daemon
https://bugs.freedesktop.org/show_bug.cgi?id=81500

--- Comment #10 from Barto  ---
Created attachment 103066
  --> https://bugs.freedesktop.org/attachment.cgi?id=103066&action=edit
bisect log, 12 commits who can be the guilty

the bisect log,

in fact there 12 commits who can be the guilty

The first bad commit could be any of:
7d73c3e99ec14031e3834096f7e8e257338b64d4
1ff5a2b1ba2148b772f5e5c86d64c3cb18e1ce97
fe37cb0ac67071759a88ea767027368399e1fdb6
5699220cd5719be6fbafdefd75025a817bcb200a
c557eb77225433fa9415a94fc9db3ce36374df64
ba7195d126ce20bf74a27725224662aaca4d90ef
d030a3404ca0fedf365cb0fd41eaad7abc8ff132
7ff937e5793dc8709c916e043b41142033c8e69e
8f5852bd2b91df7b259e5aeafb6a62a4268ca4c4
0939d3d0974a579fa65b76ebc6074d61e11f03b0
ca21cffebd063354291d561eadc2ded8795a5333
e608449d3e7dc86b90acfb31d9c948c57cf0e920

-- 
You are receiving this mail because:
You are the assignee for the bug.
___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
http://lists.freedesktop.org/mailman/listinfo/mesa-dev


  1   2   >