date:20160722

[Mesa-dev] [PATCH 3/3] i965/miptree: Stop multiplying cube depth by 6 in HiZ calculations

2016-07-22 Thread Jason Ekstrand

intel_mipmap_tree::logical_depth0 is now in number of 2D slices so we no
longer need to be multiplying by 6.

Signed-off-by: Jason Ekstrand 
Cc: "12.0" 
---
 src/mesa/drivers/dri/i965/intel_mipmap_tree.c | 19 ++-
 1 file changed, 2 insertions(+), 17 deletions(-)

diff --git a/src/mesa/drivers/dri/i965/intel_mipmap_tree.c 
b/src/mesa/drivers/dri/i965/intel_mipmap_tree.c
index c8e84b7..a988221 100644
--- a/src/mesa/drivers/dri/i965/intel_mipmap_tree.c
+++ b/src/mesa/drivers/dri/i965/intel_mipmap_tree.c
@@ -1794,14 +1794,8 @@ intel_gen7_hiz_buf_create(struct brw_context *brw,
   hz_height = DIV_ROUND_UP(hz_height, 2);
} else {
   const unsigned hz_qpitch = h0 + h1 + (12 * vertical_align);
-  if (mt->target == GL_TEXTURE_CUBE_MAP_ARRAY ||
-  mt->target == GL_TEXTURE_CUBE_MAP) {
- /* HZ_Height (rows) = Ceiling ( ( Q_pitch * Z_depth * 6/2) /8 ) * 8 */
- hz_height = DIV_ROUND_UP(hz_qpitch * Z0 * 6, 2 * 8) * 8;
-  } else {
- /* HZ_Height (rows) = Ceiling ( ( Q_pitch * Z_depth/2) /8 ) * 8 */
- hz_height = DIV_ROUND_UP(hz_qpitch * Z0, 2 * 8) * 8;
-  }
+  /* HZ_Height (rows) = Ceiling ( ( Q_pitch * Z_depth/2) /8 ) * 8 */
+  hz_height = DIV_ROUND_UP(hz_qpitch * Z0, 2 * 8) * 8;
}
 
unsigned long pitch;
@@ -1898,15 +1892,6 @@ intel_gen8_hiz_buf_create(struct brw_context *brw,
} else {
   /* HZ_Height (rows) = ceiling( (HZ_QPitch/2)/8) *8 * Z_Depth */
   hz_height = DIV_ROUND_UP(buf->qpitch, 2 * 8) * 8 * Z0;
-  if (mt->target == GL_TEXTURE_CUBE_MAP_ARRAY ||
-  mt->target == GL_TEXTURE_CUBE_MAP) {
- /* HZ_Height (rows) = ceiling( (HZ_QPitch/2)/8) *8 * 6 * Z_Depth
-  *
-  * We can can just take our hz_height calculation from above, and
-  * multiply by 6 for the cube map and cube map array types.
-  */
- hz_height *= 6;
-  }
}
 
unsigned long pitch;
-- 
2.5.0.400.gff86faf

___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/mesa-dev

[Mesa-dev] [PATCH 2/3] i965/miptree/isl: Stop multiplying depth by 6 for cubes

2016-07-22 Thread Jason Ekstrand

Now that the logical_depth0 field is in number of 2D slices, we don't need
to be multiplying by 6 when creating the surface.  It wasn't hurting
anything primarily because we get the actual length from the view which was
already handling it correctly.

Signed-off-by: Jason Ekstrand 
---
 src/mesa/drivers/dri/i965/intel_mipmap_tree.c | 5 -
 1 file changed, 5 deletions(-)

diff --git a/src/mesa/drivers/dri/i965/intel_mipmap_tree.c 
b/src/mesa/drivers/dri/i965/intel_mipmap_tree.c
index 1e03f7e..c8e84b7 100644
--- a/src/mesa/drivers/dri/i965/intel_mipmap_tree.c
+++ b/src/mesa/drivers/dri/i965/intel_mipmap_tree.c
@@ -3102,11 +3102,6 @@ intel_miptree_get_isl_surf(struct brw_context *brw,
if (surf->dim == ISL_SURF_DIM_3D) {
   surf->logical_level0_px.depth = mt->logical_depth0;
   surf->logical_level0_px.array_len = 1;
-   } else if (mt->target == GL_TEXTURE_CUBE_MAP ||
-  mt->target == GL_TEXTURE_CUBE_MAP_ARRAY) {
-  /* For cube maps, mt->logical_depth0 is in number of cubes */
-  surf->logical_level0_px.depth = 1;
-  surf->logical_level0_px.array_len = mt->logical_depth0 * 6;
} else {
   surf->logical_level0_px.depth = 1;
   surf->logical_level0_px.array_len = mt->logical_depth0;
-- 
2.5.0.400.gff86faf

___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/mesa-dev

[Mesa-dev] [PATCH 1/3] i965/blorp/gen8: Stop multiplying depth by 6 for cubes

2016-07-22 Thread Jason Ekstrand

intel_mipmap_tree::logical_depth0 is now in 2-D slices so there is no need
for us to multiply by 6 when we go to fill out a blorp surface state.

Signed-off-by: Jason Ekstrand 
---
 src/mesa/drivers/dri/i965/gen8_blorp.c | 5 +
 1 file changed, 1 insertion(+), 4 deletions(-)

diff --git a/src/mesa/drivers/dri/i965/gen8_blorp.c 
b/src/mesa/drivers/dri/i965/gen8_blorp.c
index 870b67f..ab9b747 100644
--- a/src/mesa/drivers/dri/i965/gen8_blorp.c
+++ b/src/mesa/drivers/dri/i965/gen8_blorp.c
@@ -526,9 +526,6 @@ gen8_blorp_emit_surface_states(struct brw_context *brw,
   mt->msaa_layout == INTEL_MSAA_LAYOUT_CMS) ?
  MAX2(mt->num_samples, 1) : 1;
 
-  const bool is_cube = mt->target == GL_TEXTURE_CUBE_MAP_ARRAY ||
-   mt->target == GL_TEXTURE_CUBE_MAP;
-  const unsigned depth = (is_cube ? 6 : 1) * mt->logical_depth0;
   const unsigned layer = mt->target != GL_TEXTURE_3D ?
 surface->layer / layer_divider : 0;
 
@@ -537,7 +534,7 @@ gen8_blorp_emit_surface_states(struct brw_context *brw,
  .base_level = surface->level,
  .levels = mt->last_level - surface->level + 1,
  .base_array_layer = layer,
- .array_len = depth - layer,
+ .array_len = mt->logical_depth0 - layer,
  .channel_select = {
 swizzle_to_scs(GET_SWZ(surface->swizzle, 0)),
 swizzle_to_scs(GET_SWZ(surface->swizzle, 1)),
-- 
2.5.0.400.gff86faf

___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/mesa-dev

[Mesa-dev] [PATCH 13/21] i965: Resolve color for non-coherent FB fetch at UpdateState time.

2016-07-22 Thread Francisco Jerez

This is required because the sampler unit used to fetch from the
framebuffer is unable to interpret non-color-compressed fast-cleared
single-sample texture data.  Roughly the same limitation applies for
surfaces bound to texture or image units, but unlike texture sampling,
non-coherent framebuffer fetch is by definition non-coherent with
previous rendering, so the brw_render_cache_set_check_flush() call can
be omitted except after resolve.
---
 src/mesa/drivers/dri/i965/brw_context.c | 17 +
 1 file changed, 17 insertions(+)

diff --git a/src/mesa/drivers/dri/i965/brw_context.c 
b/src/mesa/drivers/dri/i965/brw_context.c
index cb74200..d6f0d7b 100644
--- a/src/mesa/drivers/dri/i965/brw_context.c
+++ b/src/mesa/drivers/dri/i965/brw_context.c
@@ -262,6 +262,23 @@ intel_update_state(struct gl_context * ctx, GLuint 
new_state)
   }
}
 
+   /* Resolve color buffers for non-coherent framebufer fetch. */
+   if (!ctx->Extensions.MESA_shader_framebuffer_fetch &&
+   ctx->FragmentProgram._Current &&
+   ctx->FragmentProgram._Current->Base.OutputsRead) {
+  const struct gl_framebuffer *fb = ctx->DrawBuffer;
+
+  for (unsigned i = 0; i < fb->_NumColorDrawBuffers; i++) {
+ const struct intel_renderbuffer *irb =
+intel_renderbuffer(fb->_ColorDrawBuffers[i]);
+
+ if (irb &&
+ intel_miptree_resolve_color(brw, irb->mt,
+ INTEL_MIPTREE_IGNORE_CCS_E))
+brw_render_cache_set_check_flush(brw, irb->mt->bo);
+  }
+   }
+
/* If FRAMEBUFFER_SRGB is used on Gen9+ then we need to resolve any of the
 * single-sampled color renderbuffers because the CCS buffer isn't
 * supported for SRGB formats. This only matters if FRAMEBUFFER_SRGB is
-- 
2.9.0

___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/mesa-dev

[Mesa-dev] [PATCH 03/21] i965: Allocate space in the binding table for non-coherent FB fetch.

2016-07-22 Thread Francisco Jerez

Unfortunately due to the inconsistent meaning of some surface state
structure fields, we cannot re-use the same binding table entries for
sampling from and rendering into the same set of render buffers, so we
need to allocate a separate binding table block specifically for
render target reads if the non-coherent path is in use.

The slight noise is due to the change of
brw_assign_common_binding_table_offsets to return the next available
binding table index rather than void.
---
 src/mesa/drivers/dri/i965/brw_compiler.h |  1 +
 src/mesa/drivers/dri/i965/brw_shader.cpp |  7 ---
 src/mesa/drivers/dri/i965/brw_shader.h   |  2 +-
 src/mesa/drivers/dri/i965/brw_wm.c   | 13 ++---
 4 files changed, 16 insertions(+), 7 deletions(-)

diff --git a/src/mesa/drivers/dri/i965/brw_compiler.h 
b/src/mesa/drivers/dri/i965/brw_compiler.h
index d3bbdee..e0d3dff 100644
--- a/src/mesa/drivers/dri/i965/brw_compiler.h
+++ b/src/mesa/drivers/dri/i965/brw_compiler.h
@@ -387,6 +387,7 @@ struct brw_wm_prog_data {
* surface indices the WM-specific surfaces
*/
   uint32_t render_target_start;
+  uint32_t render_target_read_start;
   /** @} */
} binding_table;
 
diff --git a/src/mesa/drivers/dri/i965/brw_shader.cpp 
b/src/mesa/drivers/dri/i965/brw_shader.cpp
index 559e44c..7e2c5f5 100644
--- a/src/mesa/drivers/dri/i965/brw_shader.cpp
+++ b/src/mesa/drivers/dri/i965/brw_shader.cpp
@@ -1148,7 +1148,7 @@ backend_shader::calculate_cfg()
  * unused but also make sure that addition of small offsets to them will
  * trigger some of our asserts that surface indices are < BRW_MAX_SURFACES.
  */
-void
+uint32_t
 brw_assign_common_binding_table_offsets(gl_shader_stage stage,
 const struct brw_device_info *devinfo,
 const struct gl_shader_program 
*shader_prog,
@@ -1224,9 +1224,10 @@ brw_assign_common_binding_table_offsets(gl_shader_stage 
stage,
stage_prog_data->binding_table.plane_start[2] = next_binding_table_offset;
next_binding_table_offset += num_textures;
 
-   assert(next_binding_table_offset <= BRW_MAX_SURFACES);
-
/* prog_data->base.binding_table.size will be set by brw_mark_surface_used. 
*/
+
+   assert(next_binding_table_offset <= BRW_MAX_SURFACES);
+   return next_binding_table_offset;
 }
 
 static void
diff --git a/src/mesa/drivers/dri/i965/brw_shader.h 
b/src/mesa/drivers/dri/i965/brw_shader.h
index e61c080..3b3be07 100644
--- a/src/mesa/drivers/dri/i965/brw_shader.h
+++ b/src/mesa/drivers/dri/i965/brw_shader.h
@@ -261,7 +261,7 @@ struct brw_gs_compile
unsigned control_data_header_size_bits;
 };
 
-void
+uint32_t
 brw_assign_common_binding_table_offsets(gl_shader_stage stage,
 const struct brw_device_info *devinfo,
 const struct gl_shader_program 
*shader_prog,
diff --git a/src/mesa/drivers/dri/i965/brw_wm.c 
b/src/mesa/drivers/dri/i965/brw_wm.c
index 6209fc1..f3e6ece 100644
--- a/src/mesa/drivers/dri/i965/brw_wm.c
+++ b/src/mesa/drivers/dri/i965/brw_wm.c
@@ -56,9 +56,16 @@ assign_fs_binding_table_offsets(const struct brw_device_info 
*devinfo,
prog_data->binding_table.render_target_start = next_binding_table_offset;
next_binding_table_offset += MAX2(key->nr_color_regions, 1);
 
-   brw_assign_common_binding_table_offsets(MESA_SHADER_FRAGMENT, devinfo,
-   shader_prog, prog, &prog_data->base,
-   next_binding_table_offset);
+   next_binding_table_offset =
+  brw_assign_common_binding_table_offsets(MESA_SHADER_FRAGMENT, devinfo,
+  shader_prog, prog, 
&prog_data->base,
+  next_binding_table_offset);
+
+   if (prog->nir->info.outputs_read && !key->coherent_fb_fetch) {
+  prog_data->binding_table.render_target_read_start =
+ next_binding_table_offset;
+  next_binding_table_offset += key->nr_color_regions;
+   }
 }
 
 /**
-- 
2.9.0

___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/mesa-dev

[Mesa-dev] [PATCH 19/21] i965: Upload surface state for non-coherent framebuffer fetch.

2016-07-22 Thread Francisco Jerez

This iterates over the list of attached render buffers and binds
appropriate surface state structures to the binding table block
allocated for shader framebuffer read.
---
 src/mesa/drivers/dri/i965/brw_state.h|  1 +
 src/mesa/drivers/dri/i965/brw_state_upload.c |  4 ++
 src/mesa/drivers/dri/i965/brw_wm_surface_state.c | 89 
 3 files changed, 94 insertions(+)

diff --git a/src/mesa/drivers/dri/i965/brw_state.h 
b/src/mesa/drivers/dri/i965/brw_state.h
index e713e1d..8bbaa07 100644
--- a/src/mesa/drivers/dri/i965/brw_state.h
+++ b/src/mesa/drivers/dri/i965/brw_state.h
@@ -86,6 +86,7 @@ extern const struct brw_tracked_state brw_gs_abo_surfaces;
 extern const struct brw_tracked_state brw_gs_image_surfaces;
 extern const struct brw_tracked_state brw_vs_unit;
 extern const struct brw_tracked_state brw_renderbuffer_surfaces;
+extern const struct brw_tracked_state brw_renderbuffer_read_surfaces;
 extern const struct brw_tracked_state brw_texture_surfaces;
 extern const struct brw_tracked_state brw_wm_binding_table;
 extern const struct brw_tracked_state brw_gs_binding_table;
diff --git a/src/mesa/drivers/dri/i965/brw_state_upload.c 
b/src/mesa/drivers/dri/i965/brw_state_upload.c
index 6d45856..69acf3b 100644
--- a/src/mesa/drivers/dri/i965/brw_state_upload.c
+++ b/src/mesa/drivers/dri/i965/brw_state_upload.c
@@ -61,6 +61,7 @@ static const struct brw_tracked_state *gen4_atoms[] =
&brw_vs_pull_constants,
&brw_wm_pull_constants,
&brw_renderbuffer_surfaces,
+   &brw_renderbuffer_read_surfaces,
&brw_texture_surfaces,
&brw_vs_binding_table,
&brw_wm_binding_table,
@@ -130,6 +131,7 @@ static const struct brw_tracked_state *gen6_atoms[] =
&brw_wm_pull_constants,
&brw_wm_ubo_surfaces,
&gen6_renderbuffer_surfaces,
+   &brw_renderbuffer_read_surfaces,
&brw_texture_surfaces,
&gen6_sol_surface,
&brw_vs_binding_table,
@@ -214,6 +216,7 @@ static const struct brw_tracked_state *gen7_render_atoms[] =
&brw_wm_ubo_surfaces,
&brw_wm_abo_surfaces,
&gen6_renderbuffer_surfaces,
+   &brw_renderbuffer_read_surfaces,
&brw_texture_surfaces,
&brw_vs_binding_table,
&brw_tcs_binding_table,
@@ -317,6 +320,7 @@ static const struct brw_tracked_state *gen8_render_atoms[] =
&brw_wm_ubo_surfaces,
&brw_wm_abo_surfaces,
&gen6_renderbuffer_surfaces,
+   &brw_renderbuffer_read_surfaces,
&brw_texture_surfaces,
&brw_vs_binding_table,
&brw_tcs_binding_table,
diff --git a/src/mesa/drivers/dri/i965/brw_wm_surface_state.c 
b/src/mesa/drivers/dri/i965/brw_wm_surface_state.c
index 80c087f..162a7e1 100644
--- a/src/mesa/drivers/dri/i965/brw_wm_surface_state.c
+++ b/src/mesa/drivers/dri/i965/brw_wm_surface_state.c
@@ -1024,6 +1024,95 @@ const struct brw_tracked_state 
gen6_renderbuffer_surfaces = {
.emit = update_renderbuffer_surfaces,
 };
 
+static void
+update_renderbuffer_read_surfaces(struct brw_context *brw)
+{
+   const struct gl_context *ctx = &brw->ctx;
+
+   /* BRW_NEW_FRAGMENT_PROGRAM */
+   if (!ctx->Extensions.MESA_shader_framebuffer_fetch &&
+   brw->fragment_program &&
+   brw->fragment_program->Base.OutputsRead) {
+  /* _NEW_BUFFERS */
+  const struct gl_framebuffer *fb = ctx->DrawBuffer;
+
+  for (unsigned i = 0; i < fb->_NumColorDrawBuffers; i++) {
+ struct gl_renderbuffer *rb = fb->_ColorDrawBuffers[i];
+ const struct intel_renderbuffer *irb = intel_renderbuffer(rb);
+ const unsigned surf_index =
+brw->wm.prog_data->binding_table.render_target_read_start + i;
+ uint32_t *surf_offset = &brw->wm.base.surf_offset[surf_index];
+
+ if (irb) {
+const unsigned format = brw->render_target_format[
+   _mesa_get_render_format(ctx, intel_rb_format(irb))];
+assert(isl_format_supports_sampling(brw->intelScreen->devinfo,
+format));
+
+/* Override the target of the texture if the render buffer is a
+ * single slice of a 3D texture (since the minimum array element
+ * field of the surface state structure is ignored by the sampler
+ * unit for 3D textures on some hardware), or if the render buffer
+ * is a 1D array (since shaders always provide the array index
+ * coordinate at the Z component to avoid state-dependent
+ * recompiles when changing the texture target of the
+ * framebuffer).
+ */
+const GLenum target =
+   (irb->mt->target == GL_TEXTURE_3D &&
+irb->layer_count == 1) ? GL_TEXTURE_2D :
+   irb->mt->target == GL_TEXTURE_1D_ARRAY ? GL_TEXTURE_2D_ARRAY :
+   irb->mt->target;
+
+/* intel_renderbuffer::mt_layer is expressed in sample units for
+ * the UMS and CMS multisample layouts, but
+ * intel_renderbuffer::layer_count is expressed in units of whole
+

[Mesa-dev] [PATCH 14/21] i965: Factor out isl_surf_dim/isl_dim_layout calculation into functions.

2016-07-22 Thread Francisco Jerez

The logic to calculate the right layout and dimensionality for a given
GL texture target is going to be useful elsewhere, factor it out from
intel_miptree_get_isl_surf().
---
 src/mesa/drivers/dri/i965/intel_mipmap_tree.c | 71 ++-
 src/mesa/drivers/dri/i965/intel_mipmap_tree.h |  7 +++
 2 files changed, 55 insertions(+), 23 deletions(-)

diff --git a/src/mesa/drivers/dri/i965/intel_mipmap_tree.c 
b/src/mesa/drivers/dri/i965/intel_mipmap_tree.c
index d53fd74..5bf9243 100644
--- a/src/mesa/drivers/dri/i965/intel_mipmap_tree.c
+++ b/src/mesa/drivers/dri/i965/intel_mipmap_tree.c
@@ -2993,21 +2993,41 @@ intel_miptree_unmap(struct brw_context *brw,
intel_miptree_release_map(mt, level, slice);
 }
 
-void
-intel_miptree_get_isl_surf(struct brw_context *brw,
-   const struct intel_mipmap_tree *mt,
-   struct isl_surf *surf)
+enum isl_surf_dim
+get_isl_surf_dim(GLenum target)
 {
-   switch (mt->target) {
+   switch (target) {
case GL_TEXTURE_1D:
-   case GL_TEXTURE_1D_ARRAY: {
-  surf->dim = ISL_SURF_DIM_1D;
-  if (brw->gen >= 9 && mt->tiling == I915_TILING_NONE)
- surf->dim_layout = ISL_DIM_LAYOUT_GEN9_1D;
-  else
- surf->dim_layout = ISL_DIM_LAYOUT_GEN4_2D;
-  break;
+   case GL_TEXTURE_1D_ARRAY:
+  return ISL_SURF_DIM_1D;
+
+   case GL_TEXTURE_2D:
+   case GL_TEXTURE_2D_ARRAY:
+   case GL_TEXTURE_RECTANGLE:
+   case GL_TEXTURE_CUBE_MAP:
+   case GL_TEXTURE_CUBE_MAP_ARRAY:
+   case GL_TEXTURE_2D_MULTISAMPLE:
+   case GL_TEXTURE_2D_MULTISAMPLE_ARRAY:
+   case GL_TEXTURE_EXTERNAL_OES:
+  return ISL_SURF_DIM_2D;
+
+   case GL_TEXTURE_3D:
+  return ISL_SURF_DIM_3D;
}
+
+   unreachable("Invalid texture target");
+}
+
+enum isl_dim_layout
+get_isl_dim_layout(const struct brw_device_info *devinfo, uint32_t tiling,
+   GLenum target)
+{
+   switch (target) {
+   case GL_TEXTURE_1D:
+   case GL_TEXTURE_1D_ARRAY:
+  return (devinfo->gen >= 9 && tiling == I915_TILING_NONE ?
+  ISL_DIM_LAYOUT_GEN9_1D : ISL_DIM_LAYOUT_GEN4_2D);
+
case GL_TEXTURE_2D:
case GL_TEXTURE_2D_ARRAY:
case GL_TEXTURE_RECTANGLE:
@@ -3016,20 +3036,25 @@ intel_miptree_get_isl_surf(struct brw_context *brw,
case GL_TEXTURE_2D_MULTISAMPLE:
case GL_TEXTURE_2D_MULTISAMPLE_ARRAY:
case GL_TEXTURE_EXTERNAL_OES:
-  surf->dim = ISL_SURF_DIM_2D;
-  surf->dim_layout = ISL_DIM_LAYOUT_GEN4_2D;
-  break;
+  return ISL_DIM_LAYOUT_GEN4_2D;
+
case GL_TEXTURE_3D:
-  surf->dim = ISL_SURF_DIM_3D;
-  if (brw->gen >= 9)
- surf->dim_layout = ISL_DIM_LAYOUT_GEN4_2D;
-  else
- surf->dim_layout = ISL_DIM_LAYOUT_GEN4_3D;
-  break;
-   default:
-  unreachable("Invalid texture target");
+  return (devinfo->gen >= 9 ?
+  ISL_DIM_LAYOUT_GEN4_2D : ISL_DIM_LAYOUT_GEN4_3D);
}
 
+   unreachable("Invalid texture target");
+}
+
+void
+intel_miptree_get_isl_surf(struct brw_context *brw,
+   const struct intel_mipmap_tree *mt,
+   struct isl_surf *surf)
+{
+   surf->dim = get_isl_surf_dim(mt->target);
+   surf->dim_layout = get_isl_dim_layout(brw->intelScreen->devinfo,
+ mt->tiling, mt->target);
+
if (mt->num_samples > 1) {
   switch (mt->msaa_layout) {
   case INTEL_MSAA_LAYOUT_IMS:
diff --git a/src/mesa/drivers/dri/i965/intel_mipmap_tree.h 
b/src/mesa/drivers/dri/i965/intel_mipmap_tree.h
index f3bc9e1..4e22182 100644
--- a/src/mesa/drivers/dri/i965/intel_mipmap_tree.h
+++ b/src/mesa/drivers/dri/i965/intel_mipmap_tree.h
@@ -797,6 +797,13 @@ intel_miptree_get_image_offset(const struct 
intel_mipmap_tree *mt,
   GLuint level, GLuint slice,
   GLuint *x, GLuint *y);
 
+enum isl_surf_dim
+get_isl_surf_dim(GLenum target);
+
+enum isl_dim_layout
+get_isl_dim_layout(const struct brw_device_info *devinfo, uint32_t tiling,
+   GLenum target);
+
 void
 intel_miptree_get_isl_surf(struct brw_context *brw,
const struct intel_mipmap_tree *mt,
-- 
2.9.0

___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/mesa-dev

[Mesa-dev] [PATCH 10/21] i965/fs: Allocate fragment output temporaries on demand.

2016-07-22 Thread Francisco Jerez

This gets rid of the duplication of logic between nir_setup_outputs()
and get_frag_output() by allocating fragment output temporaries lazily
whenever get_frag_output() is called.  This makes nir_setup_outputs()
a no-op for the fragment shader stage.
---
 src/mesa/drivers/dri/i965/brw_fs_nir.cpp | 73 
 1 file changed, 27 insertions(+), 46 deletions(-)

diff --git a/src/mesa/drivers/dri/i965/brw_fs_nir.cpp 
b/src/mesa/drivers/dri/i965/brw_fs_nir.cpp
index 8e069e0..281c704 100644
--- a/src/mesa/drivers/dri/i965/brw_fs_nir.cpp
+++ b/src/mesa/drivers/dri/i965/brw_fs_nir.cpp
@@ -81,11 +81,9 @@ fs_visitor::nir_setup_single_output_varying(fs_reg *reg,
 void
 fs_visitor::nir_setup_outputs()
 {
-   if (stage == MESA_SHADER_TESS_CTRL)
+   if (stage == MESA_SHADER_TESS_CTRL || stage == MESA_SHADER_FRAGMENT)
   return;
 
-   brw_wm_prog_key *key = (brw_wm_prog_key*) this->key;
-
nir_outputs = bld.vgrf(BRW_REGISTER_TYPE_F, nir->num_outputs);
 
nir_foreach_variable(var, &nir->outputs) {
@@ -98,41 +96,6 @@ fs_visitor::nir_setup_outputs()
  nir_setup_single_output_varying(®, var->type, &location);
  break;
   }
-  case MESA_SHADER_FRAGMENT: {
- const fs_reg reg = bld.vgrf(BRW_REGISTER_TYPE_F,
- type_size_vec4_times_4(var->type));
-
- if (key->force_dual_color_blend &&
- var->data.location == FRAG_RESULT_DATA1) {
-this->dual_src_output = reg;
- } else if (var->data.index > 0) {
-assert(var->data.location == FRAG_RESULT_DATA0);
-assert(var->data.index == 1);
-this->dual_src_output = reg;
- } else if (var->data.location == FRAG_RESULT_COLOR) {
-/* Writing gl_FragColor outputs to all color regions. */
-for (unsigned int i = 0; i < MAX2(key->nr_color_regions, 1); i++) {
-   this->outputs[i] = reg;
-}
- } else if (var->data.location == FRAG_RESULT_DEPTH) {
-this->frag_depth = reg;
- } else if (var->data.location == FRAG_RESULT_STENCIL) {
-this->frag_stencil = reg;
- } else if (var->data.location == FRAG_RESULT_SAMPLE_MASK) {
-this->sample_mask = reg;
- } else {
-/* gl_FragData or a user-defined FS output */
-assert(var->data.location >= FRAG_RESULT_DATA0 &&
-   var->data.location < 
FRAG_RESULT_DATA0+BRW_MAX_DRAW_BUFFERS);
-
-/* General color output. */
-for (unsigned int i = 0; i < MAX2(1, var->type->length); i++) {
-   int output = var->data.location - FRAG_RESULT_DATA0 + i;
-   this->outputs[output] = offset(reg, bld, 4 * i);
-}
- }
- break;
-  }
   default:
  unreachable("unhandled shader stage");
   }
@@ -3248,7 +3211,23 @@ emit_non_coherent_fb_read(fs_visitor *v, const fs_reg 
&dst, unsigned target)
 }
 
 static fs_reg
-get_frag_output(const fs_visitor *v, unsigned location)
+alloc_temporary(const fs_builder &bld, unsigned size, fs_reg *regs, unsigned n)
+{
+   if (n && regs[0].file != BAD_FILE) {
+  return regs[0];
+
+   } else {
+  const fs_reg tmp = bld.vgrf(BRW_REGISTER_TYPE_F, size);
+
+  for (unsigned i = 0; i < n; i++)
+ regs[i] = tmp;
+
+  return tmp;
+   }
+}
+
+static fs_reg
+alloc_frag_output(fs_visitor *v, unsigned location)
 {
assert(v->stage == MESA_SHADER_FRAGMENT);
const brw_wm_prog_key *const key =
@@ -3257,23 +3236,25 @@ get_frag_output(const fs_visitor *v, unsigned location)
const unsigned i = GET_FIELD(location, BRW_NIR_FRAG_OUTPUT_INDEX);
 
if (i > 0 || (key->force_dual_color_blend && l == FRAG_RESULT_DATA1))
-  return v->dual_src_output;
+  return alloc_temporary(v->bld, 4, &v->dual_src_output, 1);
 
else if (l == FRAG_RESULT_COLOR)
-  return v->outputs[0];
+  return alloc_temporary(v->bld, 4, v->outputs,
+ MAX2(key->nr_color_regions, 1));
 
else if (l == FRAG_RESULT_DEPTH)
-  return v->frag_depth;
+  return alloc_temporary(v->bld, 1, &v->frag_depth, 1);
 
else if (l == FRAG_RESULT_STENCIL)
-  return v->frag_stencil;
+  return alloc_temporary(v->bld, 1, &v->frag_stencil, 1);
 
else if (l == FRAG_RESULT_SAMPLE_MASK)
-  return v->sample_mask;
+  return alloc_temporary(v->bld, 1, &v->sample_mask, 1);
 
else if (l >= FRAG_RESULT_DATA0 &&
 l < FRAG_RESULT_DATA0 + BRW_MAX_DRAW_BUFFERS)
-  return v->outputs[l - FRAG_RESULT_DATA0];
+  return alloc_temporary(v->bld, 4,
+ &v->outputs[l - FRAG_RESULT_DATA0], 1);
 
else
   unreachable("Invalid location");
@@ -3321,7 +3302,7 @@ fs_visitor::nir_emit_fs_intrinsic(const fs_builder &bld,
   assert(const_offset && "Indirect output stores not allowed");
   const unsigned location = nir_intrinsic_base(instr) +
  SET_FIELD(const_offset-

[Mesa-dev] [PATCH 06/21] i965/fs: Implement non-coherent framebuffer fetch using the sampler unit.

2016-07-22 Thread Francisco Jerez

---
 src/mesa/drivers/dri/i965/brw_fs_nir.cpp | 89 
 1 file changed, 89 insertions(+)

diff --git a/src/mesa/drivers/dri/i965/brw_fs_nir.cpp 
b/src/mesa/drivers/dri/i965/brw_fs_nir.cpp
index 2872b2d..f5f918d 100644
--- a/src/mesa/drivers/dri/i965/brw_fs_nir.cpp
+++ b/src/mesa/drivers/dri/i965/brw_fs_nir.cpp
@@ -3155,6 +3155,95 @@ fs_visitor::nir_emit_gs_intrinsic(const fs_builder &bld,
}
 }
 
+/**
+ * Fetch the current render target layer index.
+ */
+static fs_reg
+fetch_render_target_array_index(const fs_builder &bld)
+{
+   if (bld.shader->devinfo->gen >= 6) {
+  /* The render target array index is provided in the thread payload as
+   * bits 26:16 of r0.0.
+   */
+  const fs_reg idx = bld.vgrf(BRW_REGISTER_TYPE_UD);
+  bld.AND(idx, brw_uw1_reg(BRW_GENERAL_REGISTER_FILE, 0, 1),
+  brw_imm_uw(0x7ff));
+  return idx;
+   } else {
+  /* Pre-SNB we only ever render into the first layer of the framebuffer
+   * since layered rendering is not implemented.
+   */
+  return brw_imm_ud(0);
+   }
+}
+
+/**
+ * Fake non-coherent framebuffer read implemented using TXF to fetch from the
+ * framebuffer at the current fragment coordinates and sample index.
+ */
+static fs_inst *
+emit_non_coherent_fb_read(fs_visitor *v, const fs_reg &dst, unsigned target)
+{
+   const fs_builder &bld = v->bld;
+   const struct brw_device_info *devinfo = bld.shader->devinfo;
+
+   assert(bld.shader->stage == MESA_SHADER_FRAGMENT);
+   const brw_wm_prog_key *wm_key =
+  reinterpret_cast(v->key);
+   assert(!wm_key->coherent_fb_fetch);
+   const brw_wm_prog_data *wm_prog_data =
+  reinterpret_cast(bld.shader->stage_prog_data);
+
+   /* Calculate the surface index relative to the start of the texture binding
+* table block, since that's what the texturing messages expect.
+*/
+   const unsigned surface = target +
+  wm_prog_data->binding_table.render_target_read_start -
+  wm_prog_data->base.binding_table.texture_start;
+
+   brw_mark_surface_used(
+  bld.shader->stage_prog_data,
+  wm_prog_data->binding_table.render_target_read_start + target);
+
+   /* Calculate the fragment coordinates. */
+   const fs_reg coords = bld.vgrf(BRW_REGISTER_TYPE_UD, 3);
+   bld.MOV(offset(coords, bld, 0), v->pixel_x);
+   bld.MOV(offset(coords, bld, 1), v->pixel_y);
+   bld.MOV(offset(coords, bld, 2), fetch_render_target_array_index(bld));
+
+   /* Calculate the sample index and MCS payload when multisampling.  Luckily
+* the MCS fetch message behaves deterministically for UMS surfaces, so it
+* shouldn't be necessary to recompile based on whether the framebuffer is
+* CMS or UMS.
+*/
+   const fs_reg sample = wm_key->multisample_fbo ?
+  *v->emit_sampleid_setup() : fs_reg();
+   const fs_reg mcs = wm_key->multisample_fbo ?
+  v->emit_mcs_fetch(coords, 3, brw_imm_ud(surface)) : fs_reg();
+
+   /* Use either a normal or a CMS texel fetch message depending on whether
+* the framebuffer is single or multisample.  On SKL+ use the wide CMS
+* message just in case the framebuffer uses 16x multisampling, it should
+* be equivalent to the normal CMS fetch for lower multisampling modes.
+*/
+   const opcode op = (!wm_key->multisample_fbo ? SHADER_OPCODE_TXF_LOGICAL :
+  devinfo->gen >= 9 ? SHADER_OPCODE_TXF_CMS_W_LOGICAL :
+  SHADER_OPCODE_TXF_CMS_LOGICAL);
+
+   /* Emit the instruction. */
+   const fs_reg srcs[] = { coords, fs_reg(), brw_imm_ud(0), fs_reg(),
+   sample, mcs,
+   brw_imm_ud(surface), brw_imm_ud(0),
+   fs_reg(), brw_imm_ud(3), brw_imm_ud(0) };
+   STATIC_ASSERT(ARRAY_SIZE(srcs) == TEX_LOGICAL_NUM_SRCS);
+
+   fs_inst *inst = bld.emit(op, dst, srcs, ARRAY_SIZE(srcs));
+   inst->regs_written = 4 * inst->dst.component_size(inst->exec_size) /
+REG_SIZE;
+
+   return inst;
+}
+
 void
 fs_visitor::nir_emit_fs_intrinsic(const fs_builder &bld,
   nir_intrinsic_instr *instr)
-- 
2.9.0

___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/mesa-dev

[Mesa-dev] [PATCH 21/21] i965: Flip the non-coherent framebuffer fetch extension bit on G45-Gen8 hardware.

2016-07-22 Thread Francisco Jerez

This is not enabled on the original Gen4 part because it lacks surface
state tile offsets so it may not be possible to sample from arbitrary
non-zero layers of the framebuffer depending on the miptree layout (it
should be possible to work around this by allocating a scratch surface
and doing the same hack currently used for render targets, but meh...).

On Gen9+ even though it should mostly work (feel free to force-enable
it in order to compare the coherent and non-coherent paths in terms of
performance), there are some corner cases like 1D array layered
framebuffers that cannot be handled easily by the non-coherent path
because of the incompatible layout in memory of 1D and 2D miptrees (it
should be possible to work around this too by doing state-dependent
recompiles, but it's hard to care enough since Gen9 has native support
for coherent render target reads...)
---
 src/mesa/drivers/dri/i965/intel_extensions.c | 3 +++
 1 file changed, 3 insertions(+)

diff --git a/src/mesa/drivers/dri/i965/intel_extensions.c 
b/src/mesa/drivers/dri/i965/intel_extensions.c
index 40fe5aa..12bf454 100644
--- a/src/mesa/drivers/dri/i965/intel_extensions.c
+++ b/src/mesa/drivers/dri/i965/intel_extensions.c
@@ -280,6 +280,9 @@ intelInitExtensions(struct gl_context *ctx)
ctx->Extensions.EXT_shader_integer_mix = ctx->Const.GLSLVersion >= 130;
ctx->Extensions.MESA_shader_integer_functions = ctx->Const.GLSLVersion >= 
130;
 
+   if ((brw->is_g4x || brw->gen >= 5) && brw->gen < 9)
+  ctx->Extensions.MESA_shader_framebuffer_fetch_non_coherent = true;
+
if (brw->gen >= 5) {
   ctx->Extensions.ARB_texture_query_levels = ctx->Const.GLSLVersion >= 130;
   ctx->Extensions.ARB_texture_query_lod = true;
-- 
2.9.0

___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/mesa-dev

[Mesa-dev] [PATCH 04/21] i965/fs: Force per-sample dispatch if the shader reads from a multisample FBO.

2016-07-22 Thread Francisco Jerez

The result of a framebuffer fetch from a multisample FBO is inherently
per-sample, so the spec requires at least those sections of the shader
that depend on the framebuffer fetch result to be executed once per
sample.
---
 src/mesa/drivers/dri/i965/brw_fs.cpp | 3 ++-
 1 file changed, 2 insertions(+), 1 deletion(-)

diff --git a/src/mesa/drivers/dri/i965/brw_fs.cpp 
b/src/mesa/drivers/dri/i965/brw_fs.cpp
index f9af525..c8686d1 100644
--- a/src/mesa/drivers/dri/i965/brw_fs.cpp
+++ b/src/mesa/drivers/dri/i965/brw_fs.cpp
@@ -6518,7 +6518,8 @@ brw_compile_fs(const struct brw_compiler *compiler, void 
*log_data,
   (key->persample_interp ||
(shader->info.system_values_read & (SYSTEM_BIT_SAMPLE_ID |
SYSTEM_BIT_SAMPLE_POS)) ||
-   shader->info.fs.uses_sample_qualifier);
+   shader->info.fs.uses_sample_qualifier ||
+   shader->info.outputs_read);
 
prog_data->early_fragment_tests = shader->info.fs.early_fragment_tests;
 
-- 
2.9.0

___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/mesa-dev

[Mesa-dev] [PATCH 16/21] i965: Add missing has_surface_tile_offset flag to the Gen8+ device info structures.

2016-07-22 Thread Francisco Jerez

This surface state control has been supported by all hardware
generations since G45.
---
 src/mesa/drivers/dri/i965/brw_device_info.c | 2 ++
 1 file changed, 2 insertions(+)

diff --git a/src/mesa/drivers/dri/i965/brw_device_info.c 
b/src/mesa/drivers/dri/i965/brw_device_info.c
index 77bbe78..4d90aa3 100644
--- a/src/mesa/drivers/dri/i965/brw_device_info.c
+++ b/src/mesa/drivers/dri/i965/brw_device_info.c
@@ -252,6 +252,7 @@ static const struct brw_device_info brw_device_info_hsw_gt3 
= {
.has_llc = true, \
.has_pln = true, \
.supports_simd16_3src = true,\
+   .has_surface_tile_offset = true, \
.max_vs_threads = 504,   \
.max_hs_threads = 504,   \
.max_ds_threads = 504,   \
@@ -332,6 +333,7 @@ static const struct brw_device_info brw_device_info_chv = {
.has_llc = true, \
.has_pln = true, \
.supports_simd16_3src = true,\
+   .has_surface_tile_offset = true, \
.max_vs_threads = 336,   \
.max_gs_threads = 336,   \
.max_hs_threads = 336,   \
-- 
2.9.0

___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/mesa-dev

[Mesa-dev] [PATCH 18/21] i965: Implement support for overriding the texture target in brw_emit_surface_state.

2016-07-22 Thread Francisco Jerez

This allows the caller to bind a miptree using a texture target other
than the one it it was created with.  The code should work even if the
memory layouts of the specified and original targets don't match, as
long as the caller only intends to access a single slice of the
miptree structure.

This will be exploited by the next commit in order to support
non-coherent framebuffer fetch of a single layer of a 3D texture
(since some generations lack the minimum array element control for 3D
textures bound to the sampler unit), and multiple layers of a 1D array
texture (since binding it as an actual 1D array texture would require
state-dependent recompiles because the same shader couldn't
simultaneously work for 1D and 2D array textures due to the different
texel fetch coordinate ordering).
---
 src/mesa/drivers/dri/i965/brw_wm_surface_state.c | 54 ++--
 1 file changed, 50 insertions(+), 4 deletions(-)

diff --git a/src/mesa/drivers/dri/i965/brw_wm_surface_state.c 
b/src/mesa/drivers/dri/i965/brw_wm_surface_state.c
index 009660d..80c087f 100644
--- a/src/mesa/drivers/dri/i965/brw_wm_surface_state.c
+++ b/src/mesa/drivers/dri/i965/brw_wm_surface_state.c
@@ -78,12 +78,57 @@ brw_emit_surface_state(struct brw_context *brw,
uint32_t mocs, uint32_t *surf_offset, int surf_index,
unsigned read_domains, unsigned write_domains)
 {
-   assert(mt->target == target);
const struct surface_state_info ss_info = surface_state_infos[brw->gen];
+   uint32_t tile_x = 0, tile_y = 0;
+   uint32_t offset = mt->offset;
 
struct isl_surf surf;
intel_miptree_get_isl_surf(brw, mt, &surf);
 
+   surf.dim = get_isl_surf_dim(target);
+
+   const enum isl_dim_layout dim_layout =
+  get_isl_dim_layout(brw->intelScreen->devinfo, mt->tiling, target);
+
+   if (surf.dim_layout != dim_layout) {
+  /* The layout of the specified texture target is not compatible with the
+   * actual layout of the miptree structure in memory -- You're entering
+   * dangerous territory, this can only possibly work if you only intended
+   * to access a single level and slice of the texture, and the hardware
+   * supports the tile offset feature in order to allow non-tile-aligned
+   * base offsets, since we'll have to point the hardware to the first
+   * texel of the level instead of relying on the usual base level/layer
+   * controls.
+   */
+  assert(brw->has_surface_tile_offset);
+  assert(view.levels == 1 && view.array_len == 1);
+
+  offset += intel_miptree_get_tile_offsets(mt, view.base_level,
+   view.base_array_layer,
+   &tile_x, &tile_y);
+
+  /* Minify the logical dimensions of the texture. */
+  const unsigned l = view.base_level - mt->first_level;
+  surf.logical_level0_px.width = minify(surf.logical_level0_px.width, l);
+  surf.logical_level0_px.height = surf.dim <= ISL_SURF_DIM_1D ? 1 :
+ minify(surf.logical_level0_px.height, l);
+  surf.logical_level0_px.depth = surf.dim <= ISL_SURF_DIM_2D ? 1 :
+ minify(surf.logical_level0_px.depth, l);
+
+  /* Only the base level and layer can be addressed with the overridden
+   * layout.
+   */
+  surf.logical_level0_px.array_len = 1;
+  surf.levels = 1;
+  surf.dim_layout = dim_layout;
+
+  /* The requested slice of the texture is now at the base level and
+   * layer.
+   */
+  view.base_level = 0;
+  view.base_array_layer = 0;
+   }
+
union isl_color_value clear_color = { .u32 = { 0, 0, 0, 0 } };
 
struct isl_surf *aux_surf = NULL, aux_surf_s;
@@ -108,14 +153,15 @@ brw_emit_surface_state(struct brw_context *brw,
 surf_index, surf_offset);
 
isl_surf_fill_state(&brw->isl_dev, dw, .surf = &surf, .view = &view,
-   .address = mt->bo->offset64 + mt->offset,
+   .address = mt->bo->offset64 + offset,
.aux_surf = aux_surf, .aux_usage = aux_usage,
.aux_address = aux_offset,
-   .mocs = mocs, .clear_color = clear_color);
+   .mocs = mocs, .clear_color = clear_color,
+   .x_offset_sa = tile_x, .y_offset_sa = tile_y);
 
drm_intel_bo_emit_reloc(brw->batch.bo,
*surf_offset + 4 * ss_info.reloc_dw,
-   mt->bo, mt->offset,
+   mt->bo, offset,
read_domains, write_domains);
 
if (aux_surf) {
-- 
2.9.0

___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/mesa-dev

[Mesa-dev] [PATCH 12/21] i965: Return whether the miptree was resolved from intel_miptree_resolve_color().

2016-07-22 Thread Francisco Jerez

This will allow optimizing out the cache flush in some cases when
resolving wasn't necessary.
---
 src/mesa/drivers/dri/i965/intel_mipmap_tree.c | 12 
 src/mesa/drivers/dri/i965/intel_mipmap_tree.h |  2 +-
 2 files changed, 9 insertions(+), 5 deletions(-)

diff --git a/src/mesa/drivers/dri/i965/intel_mipmap_tree.c 
b/src/mesa/drivers/dri/i965/intel_mipmap_tree.c
index 1e03f7e..d53fd74 100644
--- a/src/mesa/drivers/dri/i965/intel_mipmap_tree.c
+++ b/src/mesa/drivers/dri/i965/intel_mipmap_tree.c
@@ -2151,7 +2151,7 @@ intel_miptree_all_slices_resolve_depth(struct brw_context 
*brw,
 }
 
 
-void
+bool
 intel_miptree_resolve_color(struct brw_context *brw,
 struct intel_mipmap_tree *mt,
 int flags)
@@ -2162,21 +2162,25 @@ intel_miptree_resolve_color(struct brw_context *brw,
 */
if ((flags & INTEL_MIPTREE_IGNORE_CCS_E) &&
intel_miptree_is_lossless_compressed(brw, mt))
-  return;
+  return false;
 
switch (mt->fast_clear_state) {
case INTEL_FAST_CLEAR_STATE_NO_MCS:
case INTEL_FAST_CLEAR_STATE_RESOLVED:
   /* No resolve needed */
-  break;
+  return false;
case INTEL_FAST_CLEAR_STATE_UNRESOLVED:
case INTEL_FAST_CLEAR_STATE_CLEAR:
   /* Fast color clear resolves only make sense for non-MSAA buffers. */
   if (mt->msaa_layout == INTEL_MSAA_LAYOUT_NONE ||
   intel_miptree_is_lossless_compressed(brw, mt)) {
  brw_blorp_resolve_color(brw, mt);
+ return true;
+  } else {
+ return false;
   }
-  break;
+   default:
+  unreachable("Invalid fast clear state");
}
 }
 
diff --git a/src/mesa/drivers/dri/i965/intel_mipmap_tree.h 
b/src/mesa/drivers/dri/i965/intel_mipmap_tree.h
index 4388741..f3bc9e1 100644
--- a/src/mesa/drivers/dri/i965/intel_mipmap_tree.h
+++ b/src/mesa/drivers/dri/i965/intel_mipmap_tree.h
@@ -942,7 +942,7 @@ intel_miptree_used_for_rendering(struct intel_mipmap_tree 
*mt)
  */
 #define INTEL_MIPTREE_IGNORE_CCS_E (1 << 0)
 
-void
+bool
 intel_miptree_resolve_color(struct brw_context *brw,
 struct intel_mipmap_tree *mt,
 int flags);
-- 
2.9.0

___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/mesa-dev

[Mesa-dev] [PATCH 11/21] i965/fs: Translate nir_intrinsic_load_output on a fragment output.

2016-07-22 Thread Francisco Jerez

This gets the non-coherent framebuffer fetch path hooked up to the NIR
front-end.
---
 src/mesa/drivers/dri/i965/brw_fs_nir.cpp | 19 +++
 1 file changed, 19 insertions(+)

diff --git a/src/mesa/drivers/dri/i965/brw_fs_nir.cpp 
b/src/mesa/drivers/dri/i965/brw_fs_nir.cpp
index 281c704..d547366 100644
--- a/src/mesa/drivers/dri/i965/brw_fs_nir.cpp
+++ b/src/mesa/drivers/dri/i965/brw_fs_nir.cpp
@@ -3312,6 +3312,25 @@ fs_visitor::nir_emit_fs_intrinsic(const fs_builder &bld,
   break;
}
 
+   case nir_intrinsic_load_output: {
+  const unsigned l = GET_FIELD(nir_intrinsic_base(instr),
+   BRW_NIR_FRAG_OUTPUT_LOCATION);
+  assert(l >= FRAG_RESULT_DATA0);
+  nir_const_value *const_offset = nir_src_as_const_value(instr->src[0]);
+  assert(const_offset && "Indirect output loads not allowed");
+  const unsigned target = l - FRAG_RESULT_DATA0 + const_offset->u32[0];
+  const fs_reg tmp = bld.vgrf(dest.type, 4);
+
+  assert(!reinterpret_cast(key)->coherent_fb_fetch);
+  emit_non_coherent_fb_read(this, tmp, target);
+
+  for (unsigned j = 0; j < instr->num_components; j++)
+ bld.MOV(offset(dest, bld, j),
+ offset(tmp, bld, nir_intrinsic_component(instr) + j));
+
+  break;
+   }
+
case nir_intrinsic_discard:
case nir_intrinsic_discard_if: {
   /* We track our discarded pixels in f0.1.  By predicating on it, we can
-- 
2.9.0

___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/mesa-dev

[Mesa-dev] [PATCH 01/21] i965/fs: Get rid of fs_visitor::do_dual_src.

2016-07-22 Thread Francisco Jerez

This boolean flag was being used for two different things:

 - To set the brw_wm_prog_data::dual_src_blend flag.  Instead we can
   just set it based on whether the dual_src_output register is valid,
   which will be the case if the shader writes the secondary blending
   color.

 - To decide whether to call emit_single_fb_write() once, or in a loop
   that would iterate only once, which seems pretty useless.
---
 src/mesa/drivers/dri/i965/brw_fs.h   |  1 -
 src/mesa/drivers/dri/i965/brw_fs_nir.cpp |  2 --
 src/mesa/drivers/dri/i965/brw_fs_visitor.cpp | 37 +++-
 3 files changed, 14 insertions(+), 26 deletions(-)

diff --git a/src/mesa/drivers/dri/i965/brw_fs.h 
b/src/mesa/drivers/dri/i965/brw_fs.h
index fc1e1c4..46b15b4 100644
--- a/src/mesa/drivers/dri/i965/brw_fs.h
+++ b/src/mesa/drivers/dri/i965/brw_fs.h
@@ -318,7 +318,6 @@ public:
fs_reg sample_mask;
fs_reg outputs[VARYING_SLOT_MAX];
fs_reg dual_src_output;
-   bool do_dual_src;
int first_non_payload_grf;
/** Either BRW_MAX_GRF or GEN7_MRF_HACK_START */
unsigned max_grf;
diff --git a/src/mesa/drivers/dri/i965/brw_fs_nir.cpp 
b/src/mesa/drivers/dri/i965/brw_fs_nir.cpp
index 50d73eb..2872b2d 100644
--- a/src/mesa/drivers/dri/i965/brw_fs_nir.cpp
+++ b/src/mesa/drivers/dri/i965/brw_fs_nir.cpp
@@ -103,12 +103,10 @@ fs_visitor::nir_setup_outputs()
  if (key->force_dual_color_blend &&
  var->data.location == FRAG_RESULT_DATA1) {
 this->dual_src_output = reg;
-this->do_dual_src = true;
  } else if (var->data.index > 0) {
 assert(var->data.location == FRAG_RESULT_DATA0);
 assert(var->data.index == 1);
 this->dual_src_output = reg;
-this->do_dual_src = true;
  } else if (var->data.location == FRAG_RESULT_COLOR) {
 /* Writing gl_FragColor outputs to all color regions. */
 for (unsigned int i = 0; i < MAX2(key->nr_color_regions, 1); i++) {
diff --git a/src/mesa/drivers/dri/i965/brw_fs_visitor.cpp 
b/src/mesa/drivers/dri/i965/brw_fs_visitor.cpp
index 6d84374..808d8af 100644
--- a/src/mesa/drivers/dri/i965/brw_fs_visitor.cpp
+++ b/src/mesa/drivers/dri/i965/brw_fs_visitor.cpp
@@ -437,33 +437,25 @@ fs_visitor::emit_fb_writes()
"in SIMD16+ mode.\n");
}
 
-   if (do_dual_src) {
-  const fs_builder abld = bld.annotate("FB dual-source write");
+   for (int target = 0; target < key->nr_color_regions; target++) {
+  /* Skip over outputs that weren't written. */
+  if (this->outputs[target].file == BAD_FILE)
+ continue;
 
-  inst = emit_single_fb_write(abld, this->outputs[0],
-  this->dual_src_output, reg_undef, 4);
-  inst->target = 0;
-
-  prog_data->dual_src_blend = true;
-   } else {
-  for (int target = 0; target < key->nr_color_regions; target++) {
- /* Skip over outputs that weren't written. */
- if (this->outputs[target].file == BAD_FILE)
-continue;
+  const fs_builder abld = bld.annotate(
+ ralloc_asprintf(this->mem_ctx, "FB write target %d", target));
 
- const fs_builder abld = bld.annotate(
-ralloc_asprintf(this->mem_ctx, "FB write target %d", target));
+  fs_reg src0_alpha;
+  if (devinfo->gen >= 6 && key->replicate_alpha && target != 0)
+ src0_alpha = offset(outputs[0], bld, 3);
 
- fs_reg src0_alpha;
- if (devinfo->gen >= 6 && key->replicate_alpha && target != 0)
-src0_alpha = offset(outputs[0], bld, 3);
-
- inst = emit_single_fb_write(abld, this->outputs[target], reg_undef,
- src0_alpha, 4);
- inst->target = target;
-  }
+  inst = emit_single_fb_write(abld, this->outputs[target],
+  this->dual_src_output, src0_alpha, 4);
+  inst->target = target;
}
 
+   prog_data->dual_src_blend = (this->dual_src_output.file != BAD_FILE);
+
if (inst == NULL) {
   /* Even if there's no color buffers enabled, we still need to send
* alpha out the pipeline to our null renderbuffer to support
@@ -914,7 +906,6 @@ fs_visitor::init()
this->promoted_constants = 0,
 
this->spilled_any_registers = false;
-   this->do_dual_src = false;
 }
 
 fs_visitor::~fs_visitor()
-- 
2.9.0

___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/mesa-dev

[Mesa-dev] [PATCH 07/21] i965/fs: Special-case nir_intrinsic_store_output for the fragment shader.

2016-07-22 Thread Francisco Jerez

I'm about to change how fragment shader output locations are
represented, so the generic nir_intrinsic_store_output implementation
that assumes that outputs are just contiguous elements in the big
nir_outputs array won't work anymore.  This somewhat simplified
implementation of nir_intrinsic_store_output for fragment shaders
should be functionally equivalent to the current fall-back one.
---
 src/mesa/drivers/dri/i965/brw_fs_nir.cpp | 15 +++
 1 file changed, 15 insertions(+)

diff --git a/src/mesa/drivers/dri/i965/brw_fs_nir.cpp 
b/src/mesa/drivers/dri/i965/brw_fs_nir.cpp
index f5f918d..28de29a 100644
--- a/src/mesa/drivers/dri/i965/brw_fs_nir.cpp
+++ b/src/mesa/drivers/dri/i965/brw_fs_nir.cpp
@@ -3280,6 +3280,21 @@ fs_visitor::nir_emit_fs_intrinsic(const fs_builder &bld,
   break;
}
 
+   case nir_intrinsic_store_output: {
+  const fs_reg src = get_nir_src(instr->src[0]);
+  nir_const_value *const_offset = nir_src_as_const_value(instr->src[1]);
+  assert(const_offset && "Indirect output stores not allowed");
+  const fs_reg new_dest = offset(retype(nir_outputs, src.type), bld,
+ nir_intrinsic_base(instr) +
+ const_offset->u32[0]);
+
+  for (unsigned j = 0; j < instr->num_components; j++)
+ bld.MOV(offset(new_dest, bld, nir_intrinsic_component(instr) + j),
+ offset(src, bld, j));
+
+  break;
+   }
+
case nir_intrinsic_discard:
case nir_intrinsic_discard_if: {
   /* We track our discarded pixels in f0.1.  By predicating on it, we can
-- 
2.9.0

___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/mesa-dev

[Mesa-dev] [PATCH 09/21] i965/fs: Rework representation of fragment output locations in NIR.

2016-07-22 Thread Francisco Jerez

The problem with the current approach is that driver output locations
are represented as a linear offset within the nir_outputs array, which
makes it rather difficult for the back-end to figure out what color
output and index some nir_intrinsic_load/store_output was meant for,
because the offset of a given output within the nir_output array is
dependent on the type and size of all previously allocated outputs.
Instead this defines the driver location of an output to be the pair
formed by its GLSL-assigned location and index (I've borrowed the
bitfield macros from brw_defines.h in order to represent the pair of
integers as a single scalar value that can be assigned to
nir_variable_data::driver_location).  nir_assign_var_locations is no
longer useful for fragment outputs.

Because fragment outputs are now allocated independently rather than
within the nir_outputs array, the get_frag_output() helper becomes
necessary in order to obtain the right temporary register for a given
location-index pair.

The type_size helper passed to nir_lower_io is now type_size_dvec4
rather than type_size_vec4_times_4 so that output array offsets are
provided in terms of whole array elements rather than in terms of
scalar components (dvec4 is the largest vector type supported by the
GLSL so this will cause all individual fragment outputs to have a size
of one regardless of the type).
---
 src/mesa/drivers/dri/i965/brw_fs_nir.cpp | 50 +++-
 src/mesa/drivers/dri/i965/brw_nir.c  | 10 +--
 src/mesa/drivers/dri/i965/brw_nir.h  |  5 
 3 files changed, 55 insertions(+), 10 deletions(-)

diff --git a/src/mesa/drivers/dri/i965/brw_fs_nir.cpp 
b/src/mesa/drivers/dri/i965/brw_fs_nir.cpp
index 28de29a..8e069e0 100644
--- a/src/mesa/drivers/dri/i965/brw_fs_nir.cpp
+++ b/src/mesa/drivers/dri/i965/brw_fs_nir.cpp
@@ -89,17 +89,19 @@ fs_visitor::nir_setup_outputs()
nir_outputs = bld.vgrf(BRW_REGISTER_TYPE_F, nir->num_outputs);
 
nir_foreach_variable(var, &nir->outputs) {
-  fs_reg reg = offset(nir_outputs, bld, var->data.driver_location);
-
   switch (stage) {
   case MESA_SHADER_VERTEX:
   case MESA_SHADER_TESS_EVAL:
   case MESA_SHADER_GEOMETRY: {
+ fs_reg reg = offset(nir_outputs, bld, var->data.driver_location);
  unsigned location = var->data.location;
  nir_setup_single_output_varying(®, var->type, &location);
  break;
   }
-  case MESA_SHADER_FRAGMENT:
+  case MESA_SHADER_FRAGMENT: {
+ const fs_reg reg = bld.vgrf(BRW_REGISTER_TYPE_F,
+ type_size_vec4_times_4(var->type));
+
  if (key->force_dual_color_blend &&
  var->data.location == FRAG_RESULT_DATA1) {
 this->dual_src_output = reg;
@@ -130,6 +132,7 @@ fs_visitor::nir_setup_outputs()
 }
  }
  break;
+  }
   default:
  unreachable("unhandled shader stage");
   }
@@ -3244,6 +3247,38 @@ emit_non_coherent_fb_read(fs_visitor *v, const fs_reg 
&dst, unsigned target)
return inst;
 }
 
+static fs_reg
+get_frag_output(const fs_visitor *v, unsigned location)
+{
+   assert(v->stage == MESA_SHADER_FRAGMENT);
+   const brw_wm_prog_key *const key =
+  reinterpret_cast(v->key);
+   const unsigned l = GET_FIELD(location, BRW_NIR_FRAG_OUTPUT_LOCATION);
+   const unsigned i = GET_FIELD(location, BRW_NIR_FRAG_OUTPUT_INDEX);
+
+   if (i > 0 || (key->force_dual_color_blend && l == FRAG_RESULT_DATA1))
+  return v->dual_src_output;
+
+   else if (l == FRAG_RESULT_COLOR)
+  return v->outputs[0];
+
+   else if (l == FRAG_RESULT_DEPTH)
+  return v->frag_depth;
+
+   else if (l == FRAG_RESULT_STENCIL)
+  return v->frag_stencil;
+
+   else if (l == FRAG_RESULT_SAMPLE_MASK)
+  return v->sample_mask;
+
+   else if (l >= FRAG_RESULT_DATA0 &&
+l < FRAG_RESULT_DATA0 + BRW_MAX_DRAW_BUFFERS)
+  return v->outputs[l - FRAG_RESULT_DATA0];
+
+   else
+  unreachable("Invalid location");
+}
+
 void
 fs_visitor::nir_emit_fs_intrinsic(const fs_builder &bld,
   nir_intrinsic_instr *instr)
@@ -3282,11 +3317,12 @@ fs_visitor::nir_emit_fs_intrinsic(const fs_builder &bld,
 
case nir_intrinsic_store_output: {
   const fs_reg src = get_nir_src(instr->src[0]);
-  nir_const_value *const_offset = nir_src_as_const_value(instr->src[1]);
+  const nir_const_value *const_offset = 
nir_src_as_const_value(instr->src[1]);
   assert(const_offset && "Indirect output stores not allowed");
-  const fs_reg new_dest = offset(retype(nir_outputs, src.type), bld,
- nir_intrinsic_base(instr) +
- const_offset->u32[0]);
+  const unsigned location = nir_intrinsic_base(instr) +
+ SET_FIELD(const_offset->u32[0], BRW_NIR_FRAG_OUTPUT_LOCATION);
+  const fs_reg new_dest = retype(get_frag_output(this, location),
+

[Mesa-dev] [PATCH 02/21] i965/fs: Add brw_wm_prog_key bit specifying whether FB reads should be coherent.

2016-07-22 Thread Francisco Jerez

Some of the following changes in this series are specific to the
non-coherent path, so I need some way to tell whether the coherent or
non-coherent path is in use.  The flag defaults to the value of the
gl_extensions::MESA_shader_framebuffer_fetch enable so that it can be
overridden easily on hardware that supports both framebuffer fetch
extensions in order to test the non-coherent path, like:

 MESA_EXTENSION_OVERRIDE=-GL_EXT_shader_framebuffer_fetch

(Of course trying to force-enable the coherent framebuffer fetch
extension on hardware without native support won't work and lead to
assertion failures).
---
 src/mesa/drivers/dri/i965/brw_compiler.h | 1 +
 src/mesa/drivers/dri/i965/brw_wm.c   | 6 ++
 2 files changed, 7 insertions(+)

diff --git a/src/mesa/drivers/dri/i965/brw_compiler.h 
b/src/mesa/drivers/dri/i965/brw_compiler.h
index 10e9f47..d3bbdee 100644
--- a/src/mesa/drivers/dri/i965/brw_compiler.h
+++ b/src/mesa/drivers/dri/i965/brw_compiler.h
@@ -258,6 +258,7 @@ struct brw_wm_prog_key {
unsigned line_aa:2;
bool high_quality_derivatives:1;
bool force_dual_color_blend:1;
+   bool coherent_fb_fetch:1;
 
uint16_t drawable_height;
uint64_t input_slots_valid;
diff --git a/src/mesa/drivers/dri/i965/brw_wm.c 
b/src/mesa/drivers/dri/i965/brw_wm.c
index 7b1b839..6209fc1 100644
--- a/src/mesa/drivers/dri/i965/brw_wm.c
+++ b/src/mesa/drivers/dri/i965/brw_wm.c
@@ -549,6 +549,9 @@ brw_wm_populate_key(struct brw_context *brw, struct 
brw_wm_prog_key *key)
 
/* The unique fragment program ID */
key->program_string_id = fp->id;
+
+   /* Whether reads from the framebuffer should behave coherently. */
+   key->coherent_fb_fetch = ctx->Extensions.MESA_shader_framebuffer_fetch;
 }
 
 void
@@ -612,6 +615,9 @@ brw_fs_precompile(struct gl_context *ctx,
 
key.program_string_id = bfp->id;
 
+   /* Whether reads from the framebuffer should behave coherently. */
+   key.coherent_fb_fetch = ctx->Extensions.MESA_shader_framebuffer_fetch;
+
uint32_t old_prog_offset = brw->wm.base.prog_offset;
struct brw_wm_prog_data *old_prog_data = brw->wm.prog_data;
 
-- 
2.9.0

___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/mesa-dev

[Mesa-dev] [PATCH 08/21] i965: Fix undefined signed overflow in INTEL_MASK for bitfields of 31 bits.

2016-07-22 Thread Francisco Jerez

Most likely we had only ever used this macro on bitfields of less than
31 bits -- That's going to change shortly.
---
 src/mesa/drivers/dri/i965/brw_defines.h | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/src/mesa/drivers/dri/i965/brw_defines.h 
b/src/mesa/drivers/dri/i965/brw_defines.h
index 2814fa7..2368931 100644
--- a/src/mesa/drivers/dri/i965/brw_defines.h
+++ b/src/mesa/drivers/dri/i965/brw_defines.h
@@ -29,7 +29,7 @@
   *   Keith Whitwell 
   */
 
-#define INTEL_MASK(high, low) (((1<<((high)-(low)+1))-1)<<(low))
+#define INTEL_MASK(high, low) (((1u<<((high)-(low)+1))-1)<<(low))
 /* Using the GNU statement expression extension */
 #define SET_FIELD(value, field) \
({   \
-- 
2.9.0

___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/mesa-dev

[Mesa-dev] [PATCH 05/21] i965/fs: Emit interpolation setup if non-coherent framebuffer fetch is in use.

2016-07-22 Thread Francisco Jerez

This will be required for the next commit since the non-coherent path
makes use of the fragment coordinates implicitly, so they need to be
calculated.
---
 src/mesa/drivers/dri/i965/brw_fs.cpp | 3 ++-
 1 file changed, 2 insertions(+), 1 deletion(-)

diff --git a/src/mesa/drivers/dri/i965/brw_fs.cpp 
b/src/mesa/drivers/dri/i965/brw_fs.cpp
index c8686d1..088f190 100644
--- a/src/mesa/drivers/dri/i965/brw_fs.cpp
+++ b/src/mesa/drivers/dri/i965/brw_fs.cpp
@@ -6150,7 +6150,8 @@ fs_visitor::run_fs(bool allow_spilling, bool do_rep_send)
  emit_shader_time_begin();
 
   calculate_urb_setup();
-  if (nir->info.inputs_read > 0) {
+  if (nir->info.inputs_read > 0 ||
+  (nir->info.outputs_read > 0 && !wm_key->coherent_fb_fetch)) {
  if (devinfo->gen < 6)
 emit_interpolation_setup_gen4();
  else
-- 
2.9.0

___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/mesa-dev

[Mesa-dev] [PATCH 20/21] i965: Implement glBlendBarrier.

2016-07-22 Thread Francisco Jerez

This is a no-op if the platform supports coherent framebuffer fetch,
-- If it doesn't we just need to flush the render cache and invalidate
the texture cache in order for previous rendering to be visible to
framebuffer fetch.
---
 src/mesa/drivers/dri/i965/brw_program.c | 20 
 1 file changed, 20 insertions(+)

diff --git a/src/mesa/drivers/dri/i965/brw_program.c 
b/src/mesa/drivers/dri/i965/brw_program.c
index 7785490..5b246d3 100644
--- a/src/mesa/drivers/dri/i965/brw_program.c
+++ b/src/mesa/drivers/dri/i965/brw_program.c
@@ -310,6 +310,25 @@ brw_memory_barrier(struct gl_context *ctx, GLbitfield 
barriers)
brw_emit_pipe_control_flush(brw, bits);
 }
 
+static void
+brw_blend_barrier(struct gl_context *ctx)
+{
+   struct brw_context *brw = brw_context(ctx);
+
+   if (!ctx->Extensions.MESA_shader_framebuffer_fetch) {
+  if (brw->gen >= 6) {
+ brw_emit_pipe_control_flush(brw,
+ PIPE_CONTROL_RENDER_TARGET_FLUSH |
+ PIPE_CONTROL_CS_STALL);
+ brw_emit_pipe_control_flush(brw,
+ PIPE_CONTROL_TEXTURE_CACHE_INVALIDATE);
+  } else {
+ brw_emit_pipe_control_flush(brw,
+ PIPE_CONTROL_RENDER_TARGET_FLUSH);
+  }
+   }
+}
+
 void
 brw_add_texrect_params(struct gl_program *prog)
 {
@@ -379,6 +398,7 @@ void brwInitFragProgFuncs( struct dd_function_table 
*functions )
functions->LinkShader = brw_link_shader;
 
functions->MemoryBarrier = brw_memory_barrier;
+   functions->BlendBarrier = brw_blend_barrier;
 }
 
 struct shader_times {
-- 
2.9.0

___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/mesa-dev

[Mesa-dev] [PATCH 15/21] i965: Return the correct layout from get_isl_dim_layout for pre-ILK cube textures.

2016-07-22 Thread Francisco Jerez

---
 src/mesa/drivers/dri/i965/intel_mipmap_tree.c | 7 +--
 1 file changed, 5 insertions(+), 2 deletions(-)

diff --git a/src/mesa/drivers/dri/i965/intel_mipmap_tree.c 
b/src/mesa/drivers/dri/i965/intel_mipmap_tree.c
index 5bf9243..602306b 100644
--- a/src/mesa/drivers/dri/i965/intel_mipmap_tree.c
+++ b/src/mesa/drivers/dri/i965/intel_mipmap_tree.c
@@ -3031,13 +3031,16 @@ get_isl_dim_layout(const struct brw_device_info 
*devinfo, uint32_t tiling,
case GL_TEXTURE_2D:
case GL_TEXTURE_2D_ARRAY:
case GL_TEXTURE_RECTANGLE:
-   case GL_TEXTURE_CUBE_MAP:
-   case GL_TEXTURE_CUBE_MAP_ARRAY:
case GL_TEXTURE_2D_MULTISAMPLE:
case GL_TEXTURE_2D_MULTISAMPLE_ARRAY:
case GL_TEXTURE_EXTERNAL_OES:
   return ISL_DIM_LAYOUT_GEN4_2D;
 
+   case GL_TEXTURE_CUBE_MAP:
+   case GL_TEXTURE_CUBE_MAP_ARRAY:
+  return (devinfo->gen == 4 ? ISL_DIM_LAYOUT_GEN4_3D :
+  ISL_DIM_LAYOUT_GEN4_2D);
+
case GL_TEXTURE_3D:
   return (devinfo->gen >= 9 ?
   ISL_DIM_LAYOUT_GEN4_2D : ISL_DIM_LAYOUT_GEN4_3D);
-- 
2.9.0

___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/mesa-dev

[Mesa-dev] [PATCH 17/21] i965: Massage argument list of brw_emit_surface_state().

2016-07-22 Thread Francisco Jerez

This commit does three different things in a single pass in order to
keep the amount of churn low: Remove the for_gather boolean argument
which was unused, pass the isl_view argument by value rather than by
reference since I'll have to modify it from within the function, and
add a target argument to allow callers to bind textures using a target
other than the original.  The prototype of the function now looks
like:

 void brw_emit_surface_state(struct brw_context *brw,
 struct intel_mipmap_tree *mt,
 GLenum target, struct isl_view view,
 uint32_t mocs, uint32_t *surf_offset, int 
surf_index,
 unsigned read_domains, unsigned write_domains);
---
 src/mesa/drivers/dri/i965/brw_state.h|  5 ++---
 src/mesa/drivers/dri/i965/brw_wm_surface_state.c | 22 +++---
 src/mesa/drivers/dri/i965/gen8_blorp.c   |  4 ++--
 3 files changed, 15 insertions(+), 16 deletions(-)

diff --git a/src/mesa/drivers/dri/i965/brw_state.h 
b/src/mesa/drivers/dri/i965/brw_state.h
index 81f874d..e713e1d 100644
--- a/src/mesa/drivers/dri/i965/brw_state.h
+++ b/src/mesa/drivers/dri/i965/brw_state.h
@@ -276,9 +276,8 @@ int brw_get_texture_swizzle(const struct gl_context *ctx,
 
 void brw_emit_surface_state(struct brw_context *brw,
 struct intel_mipmap_tree *mt,
-const struct isl_view *view,
-uint32_t mocs, bool for_gather,
-uint32_t *surf_offset, int surf_index,
+GLenum target, struct isl_view view,
+uint32_t mocs, uint32_t *surf_offset, int 
surf_index,
 unsigned read_domains, unsigned write_domains);
 
 void brw_emit_buffer_surface_state(struct brw_context *brw,
diff --git a/src/mesa/drivers/dri/i965/brw_wm_surface_state.c 
b/src/mesa/drivers/dri/i965/brw_wm_surface_state.c
index 9bee7dd..009660d 100644
--- a/src/mesa/drivers/dri/i965/brw_wm_surface_state.c
+++ b/src/mesa/drivers/dri/i965/brw_wm_surface_state.c
@@ -74,11 +74,11 @@ static const struct surface_state_info 
surface_state_infos[] = {
 void
 brw_emit_surface_state(struct brw_context *brw,
struct intel_mipmap_tree *mt,
-   const struct isl_view *view,
-   uint32_t mocs, bool for_gather,
-   uint32_t *surf_offset, int surf_index,
+   GLenum target, struct isl_view view,
+   uint32_t mocs, uint32_t *surf_offset, int surf_index,
unsigned read_domains, unsigned write_domains)
 {
+   assert(mt->target == target);
const struct surface_state_info ss_info = surface_state_infos[brw->gen];
 
struct isl_surf surf;
@@ -90,7 +90,7 @@ brw_emit_surface_state(struct brw_context *brw,
uint64_t aux_offset = 0;
enum isl_aux_usage aux_usage = ISL_AUX_USAGE_NONE;
if (mt->mcs_mt &&
-   ((view->usage & ISL_SURF_USAGE_RENDER_TARGET_BIT) ||
+   ((view.usage & ISL_SURF_USAGE_RENDER_TARGET_BIT) ||
 mt->fast_clear_state != INTEL_FAST_CLEAR_STATE_RESOLVED)) {
   intel_miptree_get_aux_isl_surf(brw, mt, &aux_surf_s, &aux_usage);
   aux_surf = &aux_surf_s;
@@ -107,7 +107,7 @@ brw_emit_surface_state(struct brw_context *brw,
 ss_info.num_dwords * 4, ss_info.ss_align,
 surf_index, surf_offset);
 
-   isl_surf_fill_state(&brw->isl_dev, dw, .surf = &surf, .view = view,
+   isl_surf_fill_state(&brw->isl_dev, dw, .surf = &surf, .view = &view,
.address = mt->bo->offset64 + mt->offset,
.aux_surf = aux_surf, .aux_usage = aux_usage,
.aux_address = aux_offset,
@@ -173,8 +173,8 @@ brw_update_renderbuffer_surface(struct brw_context *brw,
};
 
uint32_t offset;
-   brw_emit_surface_state(brw, mt, &view,
-  surface_state_infos[brw->gen].rb_mocs, false,
+   brw_emit_surface_state(brw, mt, mt->target, view,
+  surface_state_infos[brw->gen].rb_mocs,
   &offset, surf_index,
   I915_GEM_DOMAIN_RENDER,
   I915_GEM_DOMAIN_RENDER);
@@ -478,8 +478,8 @@ brw_update_texture_surface(struct gl_context *ctx,
   obj->Target == GL_TEXTURE_CUBE_MAP_ARRAY)
  view.usage |= ISL_SURF_USAGE_CUBE_BIT;
 
-  brw_emit_surface_state(brw, mt, &view,
- surface_state_infos[brw->gen].tex_mocs, 
for_gather,
+  brw_emit_surface_state(brw, mt, mt->target, view,
+ surface_state_infos[brw->gen].tex_mocs,
  surf_offset, surf_index,
  I915_GEM_DOMAIN_SAMPLER, 0);
}
@@ -1513,8 +1513,8 @@ update_image_surface(struct brw_context *brw,

[Mesa-dev] [PATCH 00/21] i965: Implement non-coherent framebuffer fetch.

2016-07-22 Thread Francisco Jerez

This is an implementation of non-coherent framebuffer fetch as
described here [1] working on most hardware generations supported
by the i965 driver (from Gen5 to Gen8).  My plan was to send the
coherent framebuffer fetch implementation for SKL+ first since
it's actually simpler than the non-coherent path, but I've
noticed some potential hardware issues that need further
investigation, so here's the non-coherent path so it hopefully
gets some reviews in the meantime -- I plan to send the
implementation of coherent framebuffer fetch next week.

Patches 01-11 get the compiler ready for non-coherent framebuffer
fetch (some of the changes like the NIR fragment output location
rework will also be useful for the coherent path).  Patches 12-20
implement the required state setup logic and the new glBlendBarrier
entry point.

You can find the whole series along with the driver-independent
changes for EXT_shader_framebuffer_fetch in my Mesa tree [2], but note
that in order to test it you still need to add an additional entry to
extensions_table.h manually since the non-coherent extension is not
exposed yet.

[1] https://lists.freedesktop.org/archives/mesa-dev/2016-July/124028.html
[2] https://cgit.freedesktop.org/~currojerez/mesa/log/?h=i965-fb-fetch

[PATCH 01/21] i965/fs: Get rid of fs_visitor::do_dual_src.
[PATCH 02/21] i965/fs: Add brw_wm_prog_key bit specifying whether FB reads 
should be coherent.
[PATCH 03/21] i965: Allocate space in the binding table for non-coherent FB 
fetch.
[PATCH 04/21] i965/fs: Force per-sample dispatch if the shader reads from a 
multisample FBO.
[PATCH 05/21] i965/fs: Emit interpolation setup if non-coherent framebuffer 
fetch is in use.
[PATCH 06/21] i965/fs: Implement non-coherent framebuffer fetch using the 
sampler unit.
[PATCH 07/21] i965/fs: Special-case nir_intrinsic_store_output for the fragment 
shader.
[PATCH 08/21] i965: Fix undefined signed overflow in INTEL_MASK for bitfields 
of 31 bits.
[PATCH 09/21] i965/fs: Rework representation of fragment output locations in 
NIR.
[PATCH 10/21] i965/fs: Allocate fragment output temporaries on demand.
[PATCH 11/21] i965/fs: Translate nir_intrinsic_load_output on a fragment output.
[PATCH 12/21] i965: Return whether the miptree was resolved from 
intel_miptree_resolve_color().
[PATCH 13/21] i965: Resolve color for non-coherent FB fetch at UpdateState time.
[PATCH 14/21] i965: Factor out isl_surf_dim/isl_dim_layout calculation into 
functions.
[PATCH 15/21] i965: Return the correct layout from get_isl_dim_layout for 
pre-ILK cube textures.
[PATCH 16/21] i965: Add missing has_surface_tile_offset flag to the Gen8+ 
device info structures.
[PATCH 17/21] i965: Massage argument list of brw_emit_surface_state().
[PATCH 18/21] i965: Implement support for overriding the texture target in 
brw_emit_surface_state.
[PATCH 19/21] i965: Upload surface state for non-coherent framebuffer fetch.
[PATCH 20/21] i965: Implement glBlendBarrier.
[PATCH 21/21] i965: Flip the non-coherent framebuffer fetch extension bit on 
G45-Gen8 hardware.
___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/mesa-dev

Re: [Mesa-dev] [PATCH] nvc0/ir: remove wrong assert in emitUADD()

2016-07-22 Thread Ilia Mirkin

No, it's not. But if the real op is OP_SUB, then the logic is a bit
different, since  there's an implied extra neg on src(1).

On Fri, Jul 22, 2016 at 7:47 PM, Samuel Pitoiset
 wrote:
> It's actually legal to have neg modifiers on both sources.
>
> Signed-off-by: Samuel Pitoiset 
> ---
>  src/gallium/drivers/nouveau/codegen/nv50_ir_emit_nvc0.cpp | 1 -
>  1 file changed, 1 deletion(-)
>
> diff --git a/src/gallium/drivers/nouveau/codegen/nv50_ir_emit_nvc0.cpp 
> b/src/gallium/drivers/nouveau/codegen/nv50_ir_emit_nvc0.cpp
> index 1c3e519..2eb5851 100644
> --- a/src/gallium/drivers/nouveau/codegen/nv50_ir_emit_nvc0.cpp
> +++ b/src/gallium/drivers/nouveau/codegen/nv50_ir_emit_nvc0.cpp
> @@ -698,7 +698,6 @@ CodeEmitterNVC0::emitUADD(const Instruction *i)
> uint32_t addOp = 0;
>
> assert(!i->src(0).mod.abs() && !i->src(1).mod.abs());
> -   assert(!i->src(0).mod.neg() || !i->src(1).mod.neg());
>
> if (i->src(0).mod.neg())
>addOp |= 0x200;
> --
> 2.8.0
>
> ___
> mesa-dev mailing list
> mesa-dev@lists.freedesktop.org
> https://lists.freedesktop.org/mailman/listinfo/mesa-dev
___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/mesa-dev

[Mesa-dev] [PATCH 09/15] ddebug: separate draw call dumping logic

2016-07-22 Thread Marek Olšák

From: Marek Olšák 

---
 src/gallium/drivers/ddebug/dd_draw.c | 47 
 1 file changed, 26 insertions(+), 21 deletions(-)

diff --git a/src/gallium/drivers/ddebug/dd_draw.c 
b/src/gallium/drivers/ddebug/dd_draw.c
index 95146ed..1b5cdc8 100644
--- a/src/gallium/drivers/ddebug/dd_draw.c
+++ b/src/gallium/drivers/ddebug/dd_draw.c
@@ -520,49 +520,54 @@ dd_dump_driver_state(struct dd_context *dctx, FILE *f, 
unsigned flags)
 }
 
 static void
-dd_dump_call(struct dd_context *dctx, struct dd_call *call, unsigned flags,
- bool dump_dmesg)
+dd_dump_call(FILE *f, struct dd_draw_state *state, struct dd_call *call)
 {
-   FILE *f = dd_get_file_stream(dd_screen(dctx->base.screen),
-dctx->draw_state.apitrace_call_number);
-
-   if (!f)
-  return;
-
switch (call->type) {
case CALL_DRAW_VBO:
-  dd_dump_draw_vbo(&dctx->draw_state, &call->info.draw_vbo, f);
+  dd_dump_draw_vbo(state, &call->info.draw_vbo, f);
   break;
case CALL_LAUNCH_GRID:
-  dd_dump_launch_grid(&dctx->draw_state, &call->info.launch_grid, f);
+  dd_dump_launch_grid(state, &call->info.launch_grid, f);
   break;
case CALL_RESOURCE_COPY_REGION:
-  dd_dump_resource_copy_region(&dctx->draw_state,
+  dd_dump_resource_copy_region(state,
&call->info.resource_copy_region, f);
   break;
case CALL_BLIT:
-  dd_dump_blit(&dctx->draw_state, &call->info.blit, f);
+  dd_dump_blit(state, &call->info.blit, f);
   break;
case CALL_FLUSH_RESOURCE:
-  dd_dump_flush_resource(&dctx->draw_state, call->info.flush_resource, f);
+  dd_dump_flush_resource(state, call->info.flush_resource, f);
   break;
case CALL_CLEAR:
-  dd_dump_clear(&dctx->draw_state, &call->info.clear, f);
+  dd_dump_clear(state, &call->info.clear, f);
   break;
case CALL_CLEAR_BUFFER:
-  dd_dump_clear_buffer(&dctx->draw_state, &call->info.clear_buffer, f);
+  dd_dump_clear_buffer(state, &call->info.clear_buffer, f);
   break;
case CALL_CLEAR_RENDER_TARGET:
-  dd_dump_clear_render_target(&dctx->draw_state, f);
+  dd_dump_clear_render_target(state, f);
   break;
case CALL_CLEAR_DEPTH_STENCIL:
-  dd_dump_clear_depth_stencil(&dctx->draw_state, f);
+  dd_dump_clear_depth_stencil(state, f);
   break;
case CALL_GENERATE_MIPMAP:
-  dd_dump_generate_mipmap(&dctx->draw_state, f);
+  dd_dump_generate_mipmap(state, f);
   break;
}
+}
+
+static void
+dd_write_report(struct dd_context *dctx, struct dd_call *call, unsigned flags,
+bool dump_dmesg)
+{
+   FILE *f = dd_get_file_stream(dd_screen(dctx->base.screen),
+dctx->draw_state.apitrace_call_number);
+
+   if (!f)
+  return;
 
+   dd_dump_call(f, &dctx->draw_state, call);
dd_dump_driver_state(dctx, f, flags);
if (dump_dmesg)
   dd_dump_dmesg(f);
@@ -675,7 +680,7 @@ dd_after_draw(struct dd_context *dctx, struct dd_call *call)
   case DD_DETECT_HANGS:
  if (!dscreen->no_flush &&
 dd_flush_and_check_hang(dctx, NULL, 0)) {
-dd_dump_call(dctx, call,
+dd_write_report(dctx, call,
  PIPE_DUMP_DEVICE_STATUS_REGISTERS |
  PIPE_DUMP_CURRENT_STATES |
  PIPE_DUMP_CURRENT_SHADERS |
@@ -689,12 +694,12 @@ dd_after_draw(struct dd_context *dctx, struct dd_call 
*call)
   case DD_DUMP_ALL_CALLS:
  if (!dscreen->no_flush)
 pipe->flush(pipe, NULL, 0);
- dd_dump_call(dctx, call, 0, false);
+ dd_write_report(dctx, call, 0, false);
  break;
   case DD_DUMP_APITRACE_CALL:
  if (dscreen->apitrace_dump_call ==
  dctx->draw_state.apitrace_call_number) {
-dd_dump_call(dctx, call, 0, false);
+dd_write_report(dctx, call, 0, false);
 /* No need to continue. */
 exit(0);
  }
-- 
2.7.4

___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/mesa-dev

[Mesa-dev] [PATCH 04/15] ddebug: don't use abort()

2016-07-22 Thread Marek Olšák

From: Marek Olšák 

We don't want a core dump.
---
 src/gallium/drivers/ddebug/dd_draw.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/src/gallium/drivers/ddebug/dd_draw.c 
b/src/gallium/drivers/ddebug/dd_draw.c
index 35ae774..8cc51fc 100644
--- a/src/gallium/drivers/ddebug/dd_draw.c
+++ b/src/gallium/drivers/ddebug/dd_draw.c
@@ -556,7 +556,7 @@ dd_kill_process(void)
fprintf(stderr, "dd: Aborting the process...\n");
fflush(stdout);
fflush(stderr);
-   abort();
+   exit(1);
 }
 
 static bool
-- 
2.7.4

___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/mesa-dev

[Mesa-dev] [PATCH 08/15] ddebug: move all states into a separate structure

2016-07-22 Thread Marek Olšák

From: Marek Olšák 

---
 src/gallium/drivers/ddebug/dd_context.c |  50 
 src/gallium/drivers/ddebug/dd_draw.c| 204 
 src/gallium/drivers/ddebug/dd_pipe.h|  15 ++-
 3 files changed, 140 insertions(+), 129 deletions(-)

diff --git a/src/gallium/drivers/ddebug/dd_context.c 
b/src/gallium/drivers/ddebug/dd_context.c
index 77fb9ae..96fea3a 100644
--- a/src/gallium/drivers/ddebug/dd_context.c
+++ b/src/gallium/drivers/ddebug/dd_context.c
@@ -164,11 +164,12 @@ dd_context_render_condition(struct pipe_context *_pipe,
 {
struct dd_context *dctx = dd_context(_pipe);
struct pipe_context *pipe = dctx->pipe;
+   struct dd_draw_state *dstate = &dctx->draw_state;
 
pipe->render_condition(pipe, dd_query_unwrap(query), condition, mode);
-   dctx->render_cond.query = dd_query(query);
-   dctx->render_cond.condition = condition;
-   dctx->render_cond.mode = mode;
+   dstate->render_cond.query = dd_query(query);
+   dstate->render_cond.condition = condition;
+   dstate->render_cond.mode = mode;
 }
 
 
@@ -199,7 +200,7 @@ dd_context_render_condition(struct pipe_context *_pipe,
   struct pipe_context *pipe = dctx->pipe; \
   struct dd_state *hstate = state; \
  \
-  dctx->shortname = hstate; \
+  dctx->draw_state.shortname = hstate; \
   pipe->bind_##name##_state(pipe, hstate ? hstate->cso : NULL); \
}
 
@@ -234,7 +235,7 @@ dd_context_bind_sampler_states(struct pipe_context *_pipe, 
unsigned shader,
struct dd_context *dctx = dd_context(_pipe);
struct pipe_context *pipe = dctx->pipe;
 
-   memcpy(&dctx->sampler_states[shader][start], states,
+   memcpy(&dctx->draw_state.sampler_states[shader][start], states,
   sizeof(void*) * count);
 
if (states) {
@@ -284,7 +285,7 @@ DD_CSO_DELETE(vertex_elements)
   struct pipe_context *pipe = dctx->pipe; \
   struct dd_state *hstate = state; \
\
-  dctx->shaders[PIPE_SHADER_##NAME] = hstate; \
+  dctx->draw_state.shaders[PIPE_SHADER_##NAME] = hstate; \
   pipe->bind_##name##_state(pipe, hstate ? hstate->cso : NULL); \
} \
 \
@@ -354,7 +355,7 @@ DD_SHADER_NOCREATE(COMPUTE, compute)
   struct dd_context *dctx = dd_context(_pipe); \
   struct pipe_context *pipe = dctx->pipe; \
  \
-  dctx->name = deref; \
+  dctx->draw_state.name = deref; \
   pipe->set_##name(pipe, ref); \
}
 
@@ -374,8 +375,8 @@ dd_context_set_constant_buffer(struct pipe_context *_pipe,
struct dd_context *dctx = dd_context(_pipe);
struct pipe_context *pipe = dctx->pipe;
 
-   safe_memcpy(&dctx->constant_buffers[shader][index], constant_buffer,
-  sizeof(*constant_buffer));
+   safe_memcpy(&dctx->draw_state.constant_buffers[shader][index],
+   constant_buffer, sizeof(*constant_buffer));
pipe->set_constant_buffer(pipe, shader, index, constant_buffer);
 }
 
@@ -387,7 +388,7 @@ dd_context_set_scissor_states(struct pipe_context *_pipe,
struct dd_context *dctx = dd_context(_pipe);
struct pipe_context *pipe = dctx->pipe;
 
-   safe_memcpy(&dctx->scissors[start_slot], states,
+   safe_memcpy(&dctx->draw_state.scissors[start_slot], states,
sizeof(*states) * num_scissors);
pipe->set_scissor_states(pipe, start_slot, num_scissors, states);
 }
@@ -400,7 +401,7 @@ dd_context_set_viewport_states(struct pipe_context *_pipe,
struct dd_context *dctx = dd_context(_pipe);
struct pipe_context *pipe = dctx->pipe;
 
-   safe_memcpy(&dctx->viewports[start_slot], states,
+   safe_memcpy(&dctx->draw_state.viewports[start_slot], states,
sizeof(*states) * num_viewports);
pipe->set_viewport_states(pipe, start_slot, num_viewports, states);
 }
@@ -412,8 +413,10 @@ static void dd_context_set_tess_state(struct pipe_context 
*_pipe,
struct dd_context *dctx = dd_context(_pipe);
struct pipe_context *pipe = dctx->pipe;
 
-   memcpy(dctx->tess_default_levels, default_outer_level, sizeof(float) * 4);
-   memcpy(dctx->tess_default_levels+4, default_inner_level, sizeof(float) * 2);
+   memcpy(dctx->draw_state.tess_default_levels, default_outer_level,
+  sizeof(float) * 4);
+   memcpy(dctx->draw_state.tess_default_levels+4, default_inner_level,
+  sizeof(float) * 2);
pipe->set_tess_state(pipe, default_outer_level, default_inner_level);
 }
 
@@ -509,7 +512,7 @@ dd_context_set_sampler_views(struct pipe_context *_pipe, 
unsigned shader,
struct dd_context *dctx = dd_context(_pipe);
struct pipe_context *pipe = dctx->pipe;
 
-   safe_memcpy(&dctx->sampler_views[shader][start], views,
+   safe_memcpy(&dctx->draw_state.sampler_views[shader][start], views,
sizeof(views[0]) * num);
pipe->set_sampler_views(pipe, shader, start, num, views);
 }
@@ -522,7 +525,7 @@ dd_context_set_shader_images(struct pipe_context *_pipe, 
unsigned shader,
struct dd_context *dctx = dd_context(_pipe);
struct pipe_context *pipe = dctx->pipe;
 
-   safe_memcpy(&dctx->shader_images[shader][start]

[Mesa-dev] [PATCH 13/15] radeonsi: move the shader key dumping to si_shader_dump

2016-07-22 Thread Marek Olšák

From: Marek Olšák 

---
 src/gallium/drivers/radeonsi/si_debug.c  |  1 -
 src/gallium/drivers/radeonsi/si_shader.c | 12 +---
 src/gallium/drivers/radeonsi/si_shader.h |  1 -
 3 files changed, 9 insertions(+), 5 deletions(-)

diff --git a/src/gallium/drivers/radeonsi/si_debug.c 
b/src/gallium/drivers/radeonsi/si_debug.c
index 35d961d..d9d4890 100644
--- a/src/gallium/drivers/radeonsi/si_debug.c
+++ b/src/gallium/drivers/radeonsi/si_debug.c
@@ -40,7 +40,6 @@ static void si_dump_shader(struct si_screen *sscreen,
if (!state->cso || !state->current)
return;
 
-   si_dump_shader_key(state->cso->type, &state->current->key, f);
si_shader_dump(sscreen, state->current, NULL,
   state->cso->info.processor, f);
 }
diff --git a/src/gallium/drivers/radeonsi/si_shader.c 
b/src/gallium/drivers/radeonsi/si_shader.c
index adf706c..cb270e0 100644
--- a/src/gallium/drivers/radeonsi/si_shader.c
+++ b/src/gallium/drivers/radeonsi/si_shader.c
@@ -151,6 +151,9 @@ static void si_llvm_emit_barrier(const struct 
lp_build_tgsi_action *action,
 struct lp_build_tgsi_context *bld_base,
 struct lp_build_emit_data *emit_data);
 
+static void si_dump_shader_key(unsigned shader, union si_shader_key *key,
+  FILE *f);
+
 /* Ideally pass the sample mask input to the PS epilog as v13, which
  * is its usual location, so that the shader doesn't have to add v_mov.
  */
@@ -6244,6 +6247,10 @@ void si_shader_dump(struct si_screen *sscreen, struct 
si_shader *shader,
struct pipe_debug_callback *debug, unsigned processor,
FILE *file)
 {
+   if (file != stderr ||
+   r600_can_dump_shader(&sscreen->b, processor))
+   si_dump_shader_key(processor, &shader->key, file);
+
if (file != stderr && shader->binary.llvm_ir_string) {
fprintf(file, "\n%s - main shader part - LLVM IR:\n\n",
si_get_shader_name(shader, processor));
@@ -6440,7 +6447,8 @@ static int si_generate_gs_copy_shader(struct si_screen 
*sscreen,
return r;
 }
 
-void si_dump_shader_key(unsigned shader, union si_shader_key *key, FILE *f)
+static void si_dump_shader_key(unsigned shader, union si_shader_key *key,
+  FILE *f)
 {
int i;
 
@@ -6613,8 +6621,6 @@ int si_compile_tgsi_shader(struct si_screen *sscreen,
 * conversion fails. */
if (r600_can_dump_shader(&sscreen->b, sel->info.processor) &&
!(sscreen->b.debug_flags & DBG_NO_TGSI)) {
-   if (is_monolithic)
-   si_dump_shader_key(sel->type, &shader->key, stderr);
tgsi_dump(sel->tokens, 0);
si_dump_streamout(&sel->so);
}
diff --git a/src/gallium/drivers/radeonsi/si_shader.h 
b/src/gallium/drivers/radeonsi/si_shader.h
index 2906931..6073296 100644
--- a/src/gallium/drivers/radeonsi/si_shader.h
+++ b/src/gallium/drivers/radeonsi/si_shader.h
@@ -488,7 +488,6 @@ int si_compile_tgsi_shader(struct si_screen *sscreen,
 int si_shader_create(struct si_screen *sscreen, LLVMTargetMachineRef tm,
 struct si_shader *shader,
 struct pipe_debug_callback *debug);
-void si_dump_shader_key(unsigned shader, union si_shader_key *key, FILE *f);
 int si_compile_llvm(struct si_screen *sscreen,
struct radeon_shader_binary *binary,
struct si_shader_config *conf,
-- 
2.7.4

___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/mesa-dev

[Mesa-dev] [PATCH 14/15] radeonsi: add empty lines after shader stats

2016-07-22 Thread Marek Olšák

From: Marek Olšák 

to separate individual shaders dumped consecutively.
---
 src/gallium/drivers/radeonsi/si_shader.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/src/gallium/drivers/radeonsi/si_shader.c 
b/src/gallium/drivers/radeonsi/si_shader.c
index cb270e0..4794737 100644
--- a/src/gallium/drivers/radeonsi/si_shader.c
+++ b/src/gallium/drivers/radeonsi/si_shader.c
@@ -6194,7 +6194,7 @@ static void si_shader_dump_stats(struct si_screen 
*sscreen,
"LDS: %d blocks\n"
"Scratch: %d bytes per wave\n"
"Max Waves: %d\n"
-   "\n",
+   "\n\n\n",
conf->num_sgprs, conf->num_vgprs,
conf->spilled_sgprs, conf->spilled_vgprs, code_size,
conf->lds_size, conf->scratch_bytes_per_wave,
-- 
2.7.4

___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/mesa-dev

[Mesa-dev] [PATCH 06/15] ddebug: implement create_batch_create

2016-07-22 Thread Marek Olšák

From: Marek Olšák 

---
 src/gallium/drivers/ddebug/dd_context.c | 27 +++
 1 file changed, 27 insertions(+)

diff --git a/src/gallium/drivers/ddebug/dd_context.c 
b/src/gallium/drivers/ddebug/dd_context.c
index c0b2b3d..77fb9ae 100644
--- a/src/gallium/drivers/ddebug/dd_context.c
+++ b/src/gallium/drivers/ddebug/dd_context.c
@@ -85,6 +85,32 @@ dd_context_create_query(struct pipe_context *_pipe, unsigned 
query_type,
return query;
 }
 
+static struct pipe_query *
+dd_context_create_batch_query(struct pipe_context *_pipe, unsigned num_queries,
+  unsigned *query_types)
+{
+   struct pipe_context *pipe = dd_context(_pipe)->pipe;
+   struct pipe_query *query;
+
+   query = pipe->create_batch_query(pipe, num_queries, query_types);
+
+   /* Wrap query object. */
+   if (query) {
+  struct dd_query *dd_query = CALLOC_STRUCT(dd_query);
+  if (dd_query) {
+ /* no special handling for batch queries yet */
+ dd_query->type = query_types[0];
+ dd_query->query = query;
+ query = (struct pipe_query *)dd_query;
+  } else {
+ pipe->destroy_query(pipe, query);
+ query = NULL;
+  }
+   }
+
+   return query;
+}
+
 static void
 dd_context_destroy_query(struct pipe_context *_pipe,
  struct pipe_query *query)
@@ -714,6 +740,7 @@ dd_context_create(struct dd_screen *dscreen, struct 
pipe_context *pipe)
 
CTX_INIT(render_condition);
CTX_INIT(create_query);
+   CTX_INIT(create_batch_query);
CTX_INIT(destroy_query);
CTX_INIT(begin_query);
CTX_INIT(end_query);
-- 
2.7.4

___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/mesa-dev

[Mesa-dev] [PATCH 03/15] ddebug: make dd_get_file_stream accept the screen only

2016-07-22 Thread Marek Olšák

From: Marek Olšák 

---
 src/gallium/drivers/ddebug/dd_draw.c | 15 ---
 1 file changed, 8 insertions(+), 7 deletions(-)

diff --git a/src/gallium/drivers/ddebug/dd_draw.c 
b/src/gallium/drivers/ddebug/dd_draw.c
index c3fd968..35ae774 100644
--- a/src/gallium/drivers/ddebug/dd_draw.c
+++ b/src/gallium/drivers/ddebug/dd_draw.c
@@ -99,10 +99,9 @@ struct dd_call
 };
 
 static FILE *
-dd_get_file_stream(struct dd_context *dctx)
+dd_get_file_stream(struct dd_screen *dscreen, unsigned apitrace_call_number)
 {
-   struct dd_screen *dscreen = dd_screen(dctx->base.screen);
-   struct pipe_screen *screen = dctx->pipe->screen;
+   struct pipe_screen *screen = dscreen->screen;
FILE *f = dd_get_debug_file(dscreen->verbose);
if (!f)
   return NULL;
@@ -111,9 +110,9 @@ dd_get_file_stream(struct dd_context *dctx)
fprintf(f, "Device vendor: %s\n", screen->get_device_vendor(screen));
fprintf(f, "Device name: %s\n\n", screen->get_name(screen));
 
-   if (dctx->apitrace_call_number)
+   if (apitrace_call_number)
   fprintf(f, "Last apitrace call: %u\n\n",
-  dctx->apitrace_call_number);
+  apitrace_call_number);
return f;
 }
 
@@ -507,7 +506,8 @@ dd_dump_driver_state(struct dd_context *dctx, FILE *f, 
unsigned flags)
 static void
 dd_dump_call(struct dd_context *dctx, struct dd_call *call, unsigned flags)
 {
-   FILE *f = dd_get_file_stream(dctx);
+   FILE *f = dd_get_file_stream(dd_screen(dctx->base.screen),
+dctx->apitrace_call_number);
 
if (!f)
   return;
@@ -591,7 +591,8 @@ dd_flush_and_handle_hang(struct dd_context *dctx,
  const char *cause)
 {
if (dd_flush_and_check_hang(dctx, fence, flags)) {
-  FILE *f = dd_get_file_stream(dctx);
+  FILE *f = dd_get_file_stream(dd_screen(dctx->base.screen),
+   dctx->apitrace_call_number);
 
   if (f) {
  fprintf(f, "dd: %s.\n", cause);
-- 
2.7.4

___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/mesa-dev

[Mesa-dev] [PATCH 01/15] gallium: rework flags for pipe_context::dump_debug_state

2016-07-22 Thread Marek Olšák

From: Marek Olšák 

The pipelined hang detection mode will not want to dump everything.
(and it's also time consuming) It will only dump shaders after a draw call
and then dump the status registers separately if a hang is detected.
---
 src/gallium/drivers/ddebug/dd_draw.c| 12 ++--
 src/gallium/drivers/radeonsi/si_debug.c | 32 +++-
 src/gallium/include/pipe/p_context.h|  2 +-
 src/gallium/include/pipe/p_defines.h|  5 -
 4 files changed, 34 insertions(+), 17 deletions(-)

diff --git a/src/gallium/drivers/ddebug/dd_draw.c 
b/src/gallium/drivers/ddebug/dd_draw.c
index f8047cc..c3fd968 100644
--- a/src/gallium/drivers/ddebug/dd_draw.c
+++ b/src/gallium/drivers/ddebug/dd_draw.c
@@ -595,7 +595,11 @@ dd_flush_and_handle_hang(struct dd_context *dctx,
 
   if (f) {
  fprintf(f, "dd: %s.\n", cause);
- dd_dump_driver_state(dctx, f, PIPE_DEBUG_DEVICE_IS_HUNG);
+ dd_dump_driver_state(dctx, f,
+  PIPE_DUMP_DEVICE_STATUS_REGISTERS |
+  PIPE_DUMP_CURRENT_STATES |
+  PIPE_DUMP_CURRENT_SHADERS |
+  PIPE_DUMP_LAST_COMMAND_BUFFER);
  dd_close_file_stream(f);
   }
 
@@ -649,7 +653,11 @@ dd_after_draw(struct dd_context *dctx, struct dd_call 
*call)
   case DD_DETECT_HANGS:
  if (!dscreen->no_flush &&
 dd_flush_and_check_hang(dctx, NULL, 0)) {
-dd_dump_call(dctx, call, PIPE_DEBUG_DEVICE_IS_HUNG);
+dd_dump_call(dctx, call,
+ PIPE_DUMP_DEVICE_STATUS_REGISTERS |
+ PIPE_DUMP_CURRENT_STATES |
+ PIPE_DUMP_CURRENT_SHADERS |
+ PIPE_DUMP_LAST_COMMAND_BUFFER);
 
 /* Terminate the process to prevent future hangs. */
 dd_kill_process();
diff --git a/src/gallium/drivers/radeonsi/si_debug.c 
b/src/gallium/drivers/radeonsi/si_debug.c
index 73e0bfe..35d961d 100644
--- a/src/gallium/drivers/radeonsi/si_debug.c
+++ b/src/gallium/drivers/radeonsi/si_debug.c
@@ -665,24 +665,30 @@ static void si_dump_debug_state(struct pipe_context *ctx, 
FILE *f,
 {
struct si_context *sctx = (struct si_context*)ctx;
 
-   if (flags & PIPE_DEBUG_DEVICE_IS_HUNG)
+   if (flags & PIPE_DUMP_DEVICE_STATUS_REGISTERS)
si_dump_debug_registers(sctx, f);
 
-   si_dump_framebuffer(sctx, f);
-   si_dump_shader(sctx->screen, &sctx->vs_shader, f);
-   si_dump_shader(sctx->screen, &sctx->tcs_shader, f);
-   si_dump_shader(sctx->screen, &sctx->tes_shader, f);
-   si_dump_shader(sctx->screen, &sctx->gs_shader, f);
-   si_dump_shader(sctx->screen, &sctx->ps_shader, f);
+   if (flags & PIPE_DUMP_CURRENT_STATES)
+   si_dump_framebuffer(sctx, f);
 
-   si_dump_bo_list(sctx, &sctx->last_gfx, f);
-   si_dump_last_ib(sctx, f);
+   if (flags & PIPE_DUMP_CURRENT_SHADERS) {
+   si_dump_shader(sctx->screen, &sctx->vs_shader, f);
+   si_dump_shader(sctx->screen, &sctx->tcs_shader, f);
+   si_dump_shader(sctx->screen, &sctx->tes_shader, f);
+   si_dump_shader(sctx->screen, &sctx->gs_shader, f);
+   si_dump_shader(sctx->screen, &sctx->ps_shader, f);
+   }
+
+   if (flags & PIPE_DUMP_LAST_COMMAND_BUFFER) {
+   si_dump_bo_list(sctx, &sctx->last_gfx, f);
+   si_dump_last_ib(sctx, f);
 
-   fprintf(f, "Done.\n");
+   fprintf(f, "Done.\n");
 
-   /* dump only once */
-   radeon_clear_saved_cs(&sctx->last_gfx);
-   r600_resource_reference(&sctx->last_trace_buf, NULL);
+   /* dump only once */
+   radeon_clear_saved_cs(&sctx->last_gfx);
+   r600_resource_reference(&sctx->last_trace_buf, NULL);
+   }
 }
 
 static void si_dump_dma(struct si_context *sctx,
diff --git a/src/gallium/include/pipe/p_context.h 
b/src/gallium/include/pipe/p_context.h
index fe567b6..f1de189 100644
--- a/src/gallium/include/pipe/p_context.h
+++ b/src/gallium/include/pipe/p_context.h
@@ -690,7 +690,7 @@ struct pipe_context {
 *
 * \param ctxpipe context
 * \param stream where the output should be written to
-* \param flags  a mask of PIPE_DEBUG_* flags
+* \param flags  a mask of PIPE_DUMP_* flags
 */
void (*dump_debug_state)(struct pipe_context *ctx, FILE *stream,
 unsigned flags);
diff --git a/src/gallium/include/pipe/p_defines.h 
b/src/gallium/include/pipe/p_defines.h
index 2524e42..c5f1b8f 100644
--- a/src/gallium/include/pipe/p_defines.h
+++ b/src/gallium/include/pipe/p_defines.h
@@ -355,7 +355,10 @@ enum pipe_flush_flags
 /**
  * Flags for pipe_context::dump_debug_state.
  */
-#define PIPE_DEBUG_DEVICE_IS_HUNG  (1 << 0)
+#define PIPE_DUMP_DEVICE_STATUS_REGISTERS(1 << 0)
+#define PIPE_DUMP_CURRENT_STATES (1 << 1)
+#define PIPE_DU

[Mesa-dev] [PATCH 10/15] ddebug: move dd_call into dd_pipe.h

2016-07-22 Thread Marek Olšák

From: Marek Olšák 

---
 src/gallium/drivers/ddebug/dd_draw.c | 66 
 src/gallium/drivers/ddebug/dd_pipe.h | 66 
 2 files changed, 66 insertions(+), 66 deletions(-)

diff --git a/src/gallium/drivers/ddebug/dd_draw.c 
b/src/gallium/drivers/ddebug/dd_draw.c
index 1b5cdc8..86d6102 100644
--- a/src/gallium/drivers/ddebug/dd_draw.c
+++ b/src/gallium/drivers/ddebug/dd_draw.c
@@ -32,72 +32,6 @@
 #include "tgsi/tgsi_scan.h"
 
 
-enum call_type
-{
-   CALL_DRAW_VBO,
-   CALL_LAUNCH_GRID,
-   CALL_RESOURCE_COPY_REGION,
-   CALL_BLIT,
-   CALL_FLUSH_RESOURCE,
-   CALL_CLEAR,
-   CALL_CLEAR_BUFFER,
-   CALL_CLEAR_RENDER_TARGET,
-   CALL_CLEAR_DEPTH_STENCIL,
-   CALL_GENERATE_MIPMAP,
-};
-
-struct call_resource_copy_region
-{
-   struct pipe_resource *dst;
-   unsigned dst_level;
-   unsigned dstx, dsty, dstz;
-   struct pipe_resource *src;
-   unsigned src_level;
-   const struct pipe_box *src_box;
-};
-
-struct call_clear
-{
-   unsigned buffers;
-   const union pipe_color_union *color;
-   double depth;
-   unsigned stencil;
-};
-
-struct call_clear_buffer
-{
-   struct pipe_resource *res;
-   unsigned offset;
-   unsigned size;
-   const void *clear_value;
-   int clear_value_size;
-};
-
-struct call_generate_mipmap {
-   struct pipe_resource *res;
-   enum pipe_format format;
-   unsigned base_level;
-   unsigned last_level;
-   unsigned first_layer;
-   unsigned last_layer;
-};
-
-struct dd_call
-{
-   enum call_type type;
-
-   union {
-  struct pipe_draw_info draw_vbo;
-  struct pipe_grid_info launch_grid;
-  struct call_resource_copy_region resource_copy_region;
-  struct pipe_blit_info blit;
-  struct pipe_resource *flush_resource;
-  struct call_clear clear;
-  struct call_clear_buffer clear_buffer;
-  struct call_generate_mipmap generate_mipmap;
-   } info;
-};
-
 static FILE *
 dd_get_file_stream(struct dd_screen *dscreen, unsigned apitrace_call_number)
 {
diff --git a/src/gallium/drivers/ddebug/dd_pipe.h 
b/src/gallium/drivers/ddebug/dd_pipe.h
index 072e9ac..e4960d5 100644
--- a/src/gallium/drivers/ddebug/dd_pipe.h
+++ b/src/gallium/drivers/ddebug/dd_pipe.h
@@ -51,6 +51,72 @@ struct dd_screen
unsigned apitrace_dump_call;
 };
 
+enum call_type
+{
+   CALL_DRAW_VBO,
+   CALL_LAUNCH_GRID,
+   CALL_RESOURCE_COPY_REGION,
+   CALL_BLIT,
+   CALL_FLUSH_RESOURCE,
+   CALL_CLEAR,
+   CALL_CLEAR_BUFFER,
+   CALL_CLEAR_RENDER_TARGET,
+   CALL_CLEAR_DEPTH_STENCIL,
+   CALL_GENERATE_MIPMAP,
+};
+
+struct call_resource_copy_region
+{
+   struct pipe_resource *dst;
+   unsigned dst_level;
+   unsigned dstx, dsty, dstz;
+   struct pipe_resource *src;
+   unsigned src_level;
+   const struct pipe_box *src_box;
+};
+
+struct call_clear
+{
+   unsigned buffers;
+   const union pipe_color_union *color;
+   double depth;
+   unsigned stencil;
+};
+
+struct call_clear_buffer
+{
+   struct pipe_resource *res;
+   unsigned offset;
+   unsigned size;
+   const void *clear_value;
+   int clear_value_size;
+};
+
+struct call_generate_mipmap {
+   struct pipe_resource *res;
+   enum pipe_format format;
+   unsigned base_level;
+   unsigned last_level;
+   unsigned first_layer;
+   unsigned last_layer;
+};
+
+struct dd_call
+{
+   enum call_type type;
+
+   union {
+  struct pipe_draw_info draw_vbo;
+  struct pipe_grid_info launch_grid;
+  struct call_resource_copy_region resource_copy_region;
+  struct pipe_blit_info blit;
+  struct pipe_resource *flush_resource;
+  struct call_clear clear;
+  struct call_clear_buffer clear_buffer;
+  struct call_generate_mipmap generate_mipmap;
+   } info;
+};
+
 struct dd_query
 {
unsigned type;
-- 
2.7.4

___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/mesa-dev

[Mesa-dev] [PATCH 07/15] ddebug: write contents of dmesg into hang reports

2016-07-22 Thread Marek Olšák

From: Marek Olšák 

---
 src/gallium/drivers/ddebug/dd_draw.c | 29 +
 1 file changed, 25 insertions(+), 4 deletions(-)

diff --git a/src/gallium/drivers/ddebug/dd_draw.c 
b/src/gallium/drivers/ddebug/dd_draw.c
index 8cc51fc..1361c80 100644
--- a/src/gallium/drivers/ddebug/dd_draw.c
+++ b/src/gallium/drivers/ddebug/dd_draw.c
@@ -117,6 +117,22 @@ dd_get_file_stream(struct dd_screen *dscreen, unsigned 
apitrace_call_number)
 }
 
 static void
+dd_dump_dmesg(FILE *f)
+{
+   char line[2000];
+   FILE *p = popen("dmesg | tail -n60", "r");
+
+   if (!p)
+  return;
+
+   fprintf(f, "\nLast 60 lines of dmesg:\n\n");
+   while (fgets(line, sizeof(line), p))
+  fputs(line, f);
+
+   fclose(p);
+}
+
+static void
 dd_close_file_stream(FILE *f)
 {
fclose(f);
@@ -504,7 +520,8 @@ dd_dump_driver_state(struct dd_context *dctx, FILE *f, 
unsigned flags)
 }
 
 static void
-dd_dump_call(struct dd_context *dctx, struct dd_call *call, unsigned flags)
+dd_dump_call(struct dd_context *dctx, struct dd_call *call, unsigned flags,
+ bool dump_dmesg)
 {
FILE *f = dd_get_file_stream(dd_screen(dctx->base.screen),
 dctx->apitrace_call_number);
@@ -546,6 +563,8 @@ dd_dump_call(struct dd_context *dctx, struct dd_call *call, 
unsigned flags)
}
 
dd_dump_driver_state(dctx, f, flags);
+   if (dump_dmesg)
+  dd_dump_dmesg(f);
dd_close_file_stream(f);
 }
 
@@ -601,6 +620,7 @@ dd_flush_and_handle_hang(struct dd_context *dctx,
   PIPE_DUMP_CURRENT_STATES |
   PIPE_DUMP_CURRENT_SHADERS |
   PIPE_DUMP_LAST_COMMAND_BUFFER);
+ dd_dump_dmesg(f);
  dd_close_file_stream(f);
   }
 
@@ -658,7 +678,8 @@ dd_after_draw(struct dd_context *dctx, struct dd_call *call)
  PIPE_DUMP_DEVICE_STATUS_REGISTERS |
  PIPE_DUMP_CURRENT_STATES |
  PIPE_DUMP_CURRENT_SHADERS |
- PIPE_DUMP_LAST_COMMAND_BUFFER);
+ PIPE_DUMP_LAST_COMMAND_BUFFER,
+ true);
 
 /* Terminate the process to prevent future hangs. */
 dd_kill_process();
@@ -667,11 +688,11 @@ dd_after_draw(struct dd_context *dctx, struct dd_call 
*call)
   case DD_DUMP_ALL_CALLS:
  if (!dscreen->no_flush)
 pipe->flush(pipe, NULL, 0);
- dd_dump_call(dctx, call, 0);
+ dd_dump_call(dctx, call, 0, false);
  break;
   case DD_DUMP_APITRACE_CALL:
  if (dscreen->apitrace_dump_call == dctx->apitrace_call_number) {
-dd_dump_call(dctx, call, 0);
+dd_dump_call(dctx, call, 0, false);
 /* No need to continue. */
 exit(0);
  }
-- 
2.7.4

___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/mesa-dev

[Mesa-dev] [PATCH 02/15] ddebug: clean up ddebug_screen_create

2016-07-22 Thread Marek Olšák

From: Marek Olšák 

---
 src/gallium/drivers/ddebug/dd_screen.c | 39 --
 1 file changed, 23 insertions(+), 16 deletions(-)

diff --git a/src/gallium/drivers/ddebug/dd_screen.c 
b/src/gallium/drivers/ddebug/dd_screen.c
index 119ee47..46869ab 100644
--- a/src/gallium/drivers/ddebug/dd_screen.c
+++ b/src/gallium/drivers/ddebug/dd_screen.c
@@ -290,18 +290,17 @@ struct pipe_screen *
 ddebug_screen_create(struct pipe_screen *screen)
 {
struct dd_screen *dscreen;
-   const char *option = debug_get_option("GALLIUM_DDEBUG", NULL);
-   bool dump_always = option && !strncmp(option, "always", 6);
-   bool dump_apitrace = option && !strncmp(option, "apitrace", 8);
-   bool no_flush = option && strstr(option, "noflush");
-   bool help = option && !strcmp(option, "help");
+   const char *option;
+   bool no_flush;
unsigned timeout = 0;
unsigned apitrace_dump_call = 0;
+   enum dd_mode mode;
 
-   if (dump_apitrace)
-  no_flush = true;
+   option = debug_get_option("GALLIUM_DDEBUG", NULL);
+   if (!option)
+  return screen;
 
-   if (help) {
+   if (!strcmp(option, "help")) {
   puts("Gallium driver debugger");
   puts("");
   puts("Usage:");
@@ -328,13 +327,22 @@ ddebug_screen_create(struct pipe_screen *screen)
   exit(0);
}
 
-   if (!option)
-  return screen;
-   if (!dump_always && !dump_apitrace && sscanf(option, "%u", &timeout) != 1)
-  return screen;
+   no_flush = strstr(option, "noflush") != NULL;
 
-   if (dump_apitrace && sscanf(option+8, "%u", &apitrace_dump_call) != 1)
-  return screen;
+   if (!strncmp(option, "always", 6)) {
+  mode = DD_DUMP_ALL_CALLS;
+   } else if (!strncmp(option, "apitrace", 8)) {
+  mode = DD_DUMP_APITRACE_CALL;
+  no_flush = true;
+
+  if (sscanf(option+8, "%u", &apitrace_dump_call) != 1)
+ return screen;
+   } else {
+  mode = DD_DETECT_HANGS;
+
+  if (sscanf(option, "%u", &timeout) != 1)
+ return screen;
+   }
 
dscreen = CALLOC_STRUCT(dd_screen);
if (!dscreen)
@@ -374,8 +382,7 @@ ddebug_screen_create(struct pipe_screen *screen)
 
dscreen->screen = screen;
dscreen->timeout_ms = timeout;
-   dscreen->mode = dump_always ? DD_DUMP_ALL_CALLS :
-   dump_apitrace ? DD_DUMP_APITRACE_CALL : DD_DETECT_HANGS;
+   dscreen->mode = mode;
dscreen->no_flush = no_flush;
dscreen->verbose = strstr(option, "verbose") != NULL;
dscreen->apitrace_dump_call = apitrace_dump_call;
-- 
2.7.4

___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/mesa-dev

[Mesa-dev] [PATCH 05/15] ddebug: use a debug context for GPU hang debugging only

2016-07-22 Thread Marek Olšák

From: Marek Olšák 

---
 src/gallium/drivers/ddebug/dd_screen.c | 3 ++-
 1 file changed, 2 insertions(+), 1 deletion(-)

diff --git a/src/gallium/drivers/ddebug/dd_screen.c 
b/src/gallium/drivers/ddebug/dd_screen.c
index 46869ab..d4a50ac 100644
--- a/src/gallium/drivers/ddebug/dd_screen.c
+++ b/src/gallium/drivers/ddebug/dd_screen.c
@@ -116,7 +116,8 @@ dd_screen_context_create(struct pipe_screen *_screen, void 
*priv,
struct dd_screen *dscreen = dd_screen(_screen);
struct pipe_screen *screen = dscreen->screen;
 
-   flags |= PIPE_CONTEXT_DEBUG;
+   if (dscreen->mode == DD_DETECT_HANGS)
+  flags |= PIPE_CONTEXT_DEBUG;
 
return dd_context_create(dscreen,
 screen->context_create(screen, priv, flags));
-- 
2.7.4

___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/mesa-dev

[Mesa-dev] [PATCH 11/15] ddebug: don't save pointers to call parameters

2016-07-22 Thread Marek Olšák

From: Marek Olšák 

---
 src/gallium/drivers/ddebug/dd_draw.c | 8 
 src/gallium/drivers/ddebug/dd_pipe.h | 4 ++--
 2 files changed, 6 insertions(+), 6 deletions(-)

diff --git a/src/gallium/drivers/ddebug/dd_draw.c 
b/src/gallium/drivers/ddebug/dd_draw.c
index 86d6102..836f812 100644
--- a/src/gallium/drivers/ddebug/dd_draw.c
+++ b/src/gallium/drivers/ddebug/dd_draw.c
@@ -357,7 +357,7 @@ dd_dump_resource_copy_region(struct dd_draw_state *dstate,
DUMP_M(uint, info, dstz);
DUMP_M(resource, info, src);
DUMP_M(uint, info, src_level);
-   DUMP_M(box, info, src_box);
+   DUMP_M_ADDR(box, info, src_box);
 }
 
 static void
@@ -404,7 +404,7 @@ dd_dump_clear(struct dd_draw_state *dstate, struct 
call_clear *info, FILE *f)
 {
fprintf(f, "%s:\n", __func__+8);
DUMP_M(uint, info, buffers);
-   DUMP_M(color_union, info, color);
+   DUMP_M_ADDR(color_union, info, color);
DUMP_M(double, info, depth);
DUMP_M(hex, info, stencil);
 }
@@ -700,7 +700,7 @@ dd_context_resource_copy_region(struct pipe_context *_pipe,
call.info.resource_copy_region.dstz = dstz;
call.info.resource_copy_region.src = src;
call.info.resource_copy_region.src_level = src_level;
-   call.info.resource_copy_region.src_box = src_box;
+   call.info.resource_copy_region.src_box = *src_box;
 
dd_before_draw(dctx);
pipe->resource_copy_region(pipe,
@@ -780,7 +780,7 @@ dd_context_clear(struct pipe_context *_pipe, unsigned 
buffers,
 
call.type = CALL_CLEAR;
call.info.clear.buffers = buffers;
-   call.info.clear.color = color;
+   call.info.clear.color = *color;
call.info.clear.depth = depth;
call.info.clear.stencil = stencil;
 
diff --git a/src/gallium/drivers/ddebug/dd_pipe.h 
b/src/gallium/drivers/ddebug/dd_pipe.h
index e4960d5..5ba1b7a 100644
--- a/src/gallium/drivers/ddebug/dd_pipe.h
+++ b/src/gallium/drivers/ddebug/dd_pipe.h
@@ -72,13 +72,13 @@ struct call_resource_copy_region
unsigned dstx, dsty, dstz;
struct pipe_resource *src;
unsigned src_level;
-   const struct pipe_box *src_box;
+   struct pipe_box src_box;
 };
 
 struct call_clear
 {
unsigned buffers;
-   const union pipe_color_union *color;
+   union pipe_color_union color;
double depth;
unsigned stencil;
 };
-- 
2.7.4

___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/mesa-dev

[Mesa-dev] [PATCH 15/15] radeonsi: pre-generate shader logs for ddebug

2016-07-22 Thread Marek Olšák

From: Marek Olšák 

This cuts down the overhead of si_dump_shader when ddebug is capturing
shader logs, which is done for every draw call unconditionally (that's
quite a lot of work for a draw call).
---
 src/gallium/drivers/radeonsi/si_debug.c | 11 ---
 src/gallium/drivers/radeonsi/si_shader.c|  2 ++
 src/gallium/drivers/radeonsi/si_shader.h|  7 +++
 src/gallium/drivers/radeonsi/si_state_shaders.c | 20 +---
 4 files changed, 34 insertions(+), 6 deletions(-)

diff --git a/src/gallium/drivers/radeonsi/si_debug.c 
b/src/gallium/drivers/radeonsi/si_debug.c
index d9d4890..e030f48 100644
--- a/src/gallium/drivers/radeonsi/si_debug.c
+++ b/src/gallium/drivers/radeonsi/si_debug.c
@@ -37,11 +37,16 @@ DEBUG_GET_ONCE_OPTION(replace_shaders, 
"RADEON_REPLACE_SHADERS", NULL)
 static void si_dump_shader(struct si_screen *sscreen,
   struct si_shader_ctx_state *state, FILE *f)
 {
-   if (!state->cso || !state->current)
+   struct si_shader *current = state->current;
+
+   if (!state->cso || !current)
return;
 
-   si_shader_dump(sscreen, state->current, NULL,
-  state->cso->info.processor, f);
+   if (current->shader_log)
+   fwrite(current->shader_log, current->shader_log_size, 1, f);
+   else
+   si_shader_dump(sscreen, state->current, NULL,
+  state->cso->info.processor, f);
 }
 
 /**
diff --git a/src/gallium/drivers/radeonsi/si_shader.c 
b/src/gallium/drivers/radeonsi/si_shader.c
index 4794737..62a1486 100644
--- a/src/gallium/drivers/radeonsi/si_shader.c
+++ b/src/gallium/drivers/radeonsi/si_shader.c
@@ -7951,4 +7951,6 @@ void si_shader_destroy(struct si_shader *shader)
 
if (!shader->is_binary_shared)
radeon_shader_binary_clean(&shader->binary);
+
+   free(shader->shader_log);
 }
diff --git a/src/gallium/drivers/radeonsi/si_shader.h 
b/src/gallium/drivers/radeonsi/si_shader.h
index 6073296..e856049 100644
--- a/src/gallium/drivers/radeonsi/si_shader.h
+++ b/src/gallium/drivers/radeonsi/si_shader.h
@@ -240,6 +240,7 @@ struct si_shader_selector {
 * if thread_index == -1 (non-threaded). */
LLVMTargetMachineReftm;
struct pipe_debug_callback debug;
+   boolis_debug_context;
 
pipe_mutex  mutex;
struct si_shader*first_variant; /* immutable after the first 
variant */
@@ -438,6 +439,12 @@ struct si_shader {
struct radeon_shader_binary binary;
struct si_shader_config config;
struct si_shader_info   info;
+
+   /* Shader key + LLVM IR + disassembly + statistics.
+* Generated for debug contexts only.
+*/
+   char*shader_log;
+   size_t  shader_log_size;
 };
 
 struct si_shader_part {
diff --git a/src/gallium/drivers/radeonsi/si_state_shaders.c 
b/src/gallium/drivers/radeonsi/si_state_shaders.c
index a423296..f6b2541 100644
--- a/src/gallium/drivers/radeonsi/si_state_shaders.c
+++ b/src/gallium/drivers/radeonsi/si_state_shaders.c
@@ -992,7 +992,8 @@ static int si_shader_select_with_key(struct si_screen 
*sscreen,
 union si_shader_key *key,
 LLVMTargetMachineRef tm,
 struct pipe_debug_callback *debug,
-bool wait)
+bool wait,
+bool is_debug_context)
 {
struct si_shader_selector *sel = state->cso;
struct si_shader *current = state->current;
@@ -1043,6 +1044,16 @@ static int si_shader_select_with_key(struct si_screen 
*sscreen,
pipe_mutex_unlock(sel->mutex);
return r;
}
+
+   if (is_debug_context) {
+   FILE *f = open_memstream(&shader->shader_log,
+&shader->shader_log_size);
+   if (f) {
+   si_shader_dump(sscreen, shader, NULL, sel->type, f);
+   fclose(f);
+   }
+   }
+
si_shader_init_pm4_state(sscreen, shader);
 
if (!sel->last_variant) {
@@ -1065,7 +1076,8 @@ static int si_shader_select(struct pipe_context *ctx,
 
si_shader_selector_key(ctx, state->cso, &key);
return si_shader_select_with_key(sctx->screen, state, &key,
-sctx->tm, &sctx->b.debug, true);
+sctx->tm, &sctx->b.debug, true,
+sctx->is_debug);
 }
 
 static void si_parse_next_shader_property(const struct tgsi_shader_info *info,
@@ -1190,7 +1202,7 @@ void si_init_shader_selector_async(void *job, int 
thread_index)
}
 
if (si_shader_select_with_key(sscreen, &state, &key, tm, deb

[Mesa-dev] [PATCH 12/15] ddebug: implement pipelined hang detection mode

2016-07-22 Thread Marek Olšák

From: Marek Olšák 

For good performance while being able to generate decent hang reports.
The report doesn't contain the parsed IB and the buffer list, but it
isolates the draw call and dumps shaders while not having to flush
the context.

This is for GPU hangs that are harder to reproduce and require interactive
playing for minutes or even hours.

dd_pipe.h explains some implementation details. Initializing, copying
(recording) and clearing states is most of the code.

The performance should be at least 50% of the normal performance depending
on the circumstances. (i.e. 50% is expected to be the worst case scenario,
not the best case) The majority of time is spent in
dump_debug_state(PIPE_DUMP_CURRENT_SHADERS) and that's after all
the optimizations in later patches. There is no obvious way to optimize
that further.
---
 src/gallium/drivers/ddebug/dd_context.c |  55 +++-
 src/gallium/drivers/ddebug/dd_draw.c| 449 
 src/gallium/drivers/ddebug/dd_pipe.h|  56 
 src/gallium/drivers/ddebug/dd_screen.c  |  14 +-
 4 files changed, 569 insertions(+), 5 deletions(-)

diff --git a/src/gallium/drivers/ddebug/dd_context.c 
b/src/gallium/drivers/ddebug/dd_context.c
index 96fea3a..4423e90 100644
--- a/src/gallium/drivers/ddebug/dd_context.c
+++ b/src/gallium/drivers/ddebug/dd_context.c
@@ -27,6 +27,7 @@
 
 #include "dd_pipe.h"
 #include "tgsi/tgsi_parse.h"
+#include "util/u_inlines.h"
 #include "util/u_memory.h"
 
 
@@ -589,6 +590,19 @@ dd_context_destroy(struct pipe_context *_pipe)
struct dd_context *dctx = dd_context(_pipe);
struct pipe_context *pipe = dctx->pipe;
 
+   if (dctx->thread) {
+  pipe_mutex_lock(dctx->mutex);
+  dctx->kill_thread = 1;
+  pipe_mutex_unlock(dctx->mutex);
+  pipe_thread_wait(dctx->thread);
+  pipe_mutex_destroy(dctx->mutex);
+  assert(!dctx->records);
+   }
+
+   if (dctx->fence) {
+  pipe->transfer_unmap(pipe, dctx->fence_transfer);
+  pipe_resource_reference(&dctx->fence, NULL);
+   }
pipe->destroy(pipe);
FREE(dctx);
 }
@@ -731,10 +745,8 @@ dd_context_create(struct dd_screen *dscreen, struct 
pipe_context *pipe)
   return NULL;
 
dctx = CALLOC_STRUCT(dd_context);
-   if (!dctx) {
-  pipe->destroy(pipe);
-  return NULL;
-   }
+   if (!dctx)
+  goto fail;
 
dctx->pipe = pipe;
dctx->base.priv = pipe->priv; /* expose wrapped priv data */
@@ -826,5 +838,40 @@ dd_context_create(struct dd_screen *dscreen, struct 
pipe_context *pipe)
dd_init_draw_functions(dctx);
 
dctx->draw_state.sample_mask = ~0;
+
+   if (dscreen->mode == DD_DETECT_HANGS_PIPELINED) {
+  dctx->fence = pipe_buffer_create(dscreen->screen, PIPE_BIND_CUSTOM,
+PIPE_USAGE_STAGING, 4);
+  if (!dctx->fence)
+ goto fail;
+
+  dctx->mapped_fence = pipe_buffer_map(pipe, dctx->fence,
+   PIPE_TRANSFER_READ_WRITE |
+   PIPE_TRANSFER_PERSISTENT |
+   PIPE_TRANSFER_COHERENT,
+   &dctx->fence_transfer);
+  if (!dctx->mapped_fence)
+ goto fail;
+
+  *dctx->mapped_fence = 0;
+
+  pipe_mutex_init(dctx->mutex);
+  dctx->thread = pipe_thread_create(dd_thread_pipelined_hang_detect, dctx);
+  if (!dctx->thread) {
+ pipe_mutex_destroy(dctx->mutex);
+ goto fail;
+  }
+   }
+
return &dctx->base;
+
+fail:
+   if (dctx) {
+  if (dctx->mapped_fence)
+ pipe_transfer_unmap(pipe, dctx->fence_transfer);
+  pipe_resource_reference(&dctx->fence, NULL);
+  FREE(dctx);
+   }
+   pipe->destroy(pipe);
+   return NULL;
 }
diff --git a/src/gallium/drivers/ddebug/dd_draw.c 
b/src/gallium/drivers/ddebug/dd_draw.c
index 836f812..a54cecb 100644
--- a/src/gallium/drivers/ddebug/dd_draw.c
+++ b/src/gallium/drivers/ddebug/dd_draw.c
@@ -29,7 +29,14 @@
 
 #include "util/u_dump.h"
 #include "util/u_format.h"
+#include "util/u_framebuffer.h"
+#include "util/u_helpers.h"
+#include "util/u_inlines.h"
+#include "util/u_memory.h"
+#include "tgsi/tgsi_parse.h"
 #include "tgsi/tgsi_scan.h"
+#include "os/os_time.h"
+#include 
 
 
 static FILE *
@@ -570,6 +577,444 @@ dd_flush_and_handle_hang(struct dd_context *dctx,
 }
 
 static void
+dd_unreference_copy_of_call(struct dd_call *dst)
+{
+   switch (dst->type) {
+   case CALL_DRAW_VBO:
+  pipe_so_target_reference(&dst->info.draw_vbo.count_from_stream_output, 
NULL);
+  pipe_resource_reference(&dst->info.draw_vbo.indirect, NULL);
+  pipe_resource_reference(&dst->info.draw_vbo.indirect_params, NULL);
+  break;
+   case CALL_LAUNCH_GRID:
+  pipe_resource_reference(&dst->info.launch_grid.indirect, NULL);
+  break;
+   case CALL_RESOURCE_COPY_REGION:
+  pipe_resource_reference(&dst->info.resource_copy_region.dst, NULL);
+  pipe_resource_reference(&dst->info.resource_copy_region.src, NULL);
+

[Mesa-dev] [PATCH 00/15] Gallium ddebug module: Pipelined GPU hang detection

2016-07-22 Thread Marek Olšák

Hi,

This is for GPU hangs that are hard to reproduce and require interactive
playing for minutes or even hours.

The performance should be at least 50% of the performance without ddebug.
The added CPU overhead is mainly due to recording all states after every
draw call. The added GPU overhead is PS/CS partial flushes and clear_buffer
for writing a user fence. There are no cache flushes between draw calls.

The command is:
  GALLIUM_DDEBUG="pipelined 2000" [executable]

The generated hang report contains everything except the parsed IB and
the buffer list.

My strategy for rare random GPU hangs is to get several hang reports and
see if they have anything in common.

Please review.

Marek
___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/mesa-dev

Re: [Mesa-dev] [PATCH 3/9] st/va: add conversion for yv12 to nv12in putimage v2

2016-07-22 Thread Andy Furniss


Christian König wrote:

From: Boyuan Zhang 

For putimage call, if image format is yv12 (or IYUV with U V field swap)


This comment confuses me
AIUI + checking on fourcc.org yv12 is YVU and IYUV/I420 is YUV and nv12 
 is UVUVUV... so compared to the normal way of writing yuv/yCbCr

I wouldn't call IYUV as being the one "with U V field swap"


and

surface format is nv12, then we need to convert yv12 to nv12 and then copy
the converted data from image to surface. We can't use the existing logic
where surface is destroyed and re-created with yv12 format.

v2 (chk): fix some compiler warnings and commit message

Signed-off-by: Boyuan Zhang 
Signed-off-by: Christian König 
---
  src/gallium/state_trackers/va/image.c | 34 +++---
  1 file changed, 27 insertions(+), 7 deletions(-)

diff --git a/src/gallium/state_trackers/va/image.c 
b/src/gallium/state_trackers/va/image.c
index 1b956e3..0364556 100644
--- a/src/gallium/state_trackers/va/image.c
+++ b/src/gallium/state_trackers/va/image.c
@@ -471,7 +471,9 @@ vlVaPutImage(VADriverContextP ctx, VASurfaceID surface, 
VAImageID image,
return VA_STATUS_ERROR_OPERATION_FAILED;
 }

-   if (format != surf->buffer->buffer_format) {
+   if ((format != surf->buffer->buffer_format) &&
+ ((format != PIPE_FORMAT_YV12) || (surf->buffer->buffer_format != 
PIPE_FORMAT_NV12)) &&
+ ((format != PIPE_FORMAT_IYUV) || (surf->buffer->buffer_format != 
PIPE_FORMAT_NV12))) {
struct pipe_video_buffer *tmp_buf;
struct pipe_video_buffer templat = surf->templat;

@@ -513,12 +515,30 @@ vlVaPutImage(VADriverContextP ctx, VASurfaceID surface, 
VAImageID image,
unsigned width, height;
if (!views[i]) continue;
vlVaVideoSurfaceSize(surf, i, &width, &height);
-  for (j = 0; j < views[i]->texture->array_size; ++j) {
- struct pipe_box dst_box = {0, 0, j, width, height, 1};
- drv->pipe->transfer_inline_write(drv->pipe, views[i]->texture, 0,
-PIPE_TRANSFER_WRITE, &dst_box,
-data[i] + pitches[i] * j,
-pitches[i] * views[i]->texture->array_size, 0);
+  if (((format == PIPE_FORMAT_YV12) || (format == PIPE_FORMAT_IYUV)) &&
+(surf->buffer->buffer_format == PIPE_FORMAT_NV12)) {
+ struct pipe_transfer *transfer = NULL;
+ uint8_t *map = NULL;
+ struct pipe_box dst_box_1 = {0, 0, 0, width, height, 1};
+ map = drv->pipe->transfer_map(drv->pipe,
+   views[i]->texture,
+   0,
+   PIPE_TRANSFER_DISCARD_RANGE,
+   &dst_box_1, &transfer);
+ if (map == NULL)
+return VA_STATUS_ERROR_OPERATION_FAILED;
+
+ u_copy_yv12_img_to_nv12_surf ((ubyte * const*)data, map, width, 
height,
+  pitches[i], transfer->stride, i);
+ pipe_transfer_unmap(drv->pipe, transfer);
+  } else {
+ for (j = 0; j < views[i]->texture->array_size; ++j) {
+struct pipe_box dst_box = {0, 0, j, width, height, 1};
+drv->pipe->transfer_inline_write(drv->pipe, views[i]->texture, 0,
+ PIPE_TRANSFER_WRITE, &dst_box,
+ data[i] + pitches[i] * j,
+ pitches[i] * 
views[i]->texture->array_size, 0);
+ }
}
 }
 pipe_mutex_unlock(drv->mutex);



___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/mesa-dev

[Mesa-dev] [PATCH] nvc0/ir: remove wrong assert in emitUADD()

2016-07-22 Thread Samuel Pitoiset

It's actually legal to have neg modifiers on both sources.

Signed-off-by: Samuel Pitoiset 
---
 src/gallium/drivers/nouveau/codegen/nv50_ir_emit_nvc0.cpp | 1 -
 1 file changed, 1 deletion(-)

diff --git a/src/gallium/drivers/nouveau/codegen/nv50_ir_emit_nvc0.cpp 
b/src/gallium/drivers/nouveau/codegen/nv50_ir_emit_nvc0.cpp
index 1c3e519..2eb5851 100644
--- a/src/gallium/drivers/nouveau/codegen/nv50_ir_emit_nvc0.cpp
+++ b/src/gallium/drivers/nouveau/codegen/nv50_ir_emit_nvc0.cpp
@@ -698,7 +698,6 @@ CodeEmitterNVC0::emitUADD(const Instruction *i)
uint32_t addOp = 0;
 
assert(!i->src(0).mod.abs() && !i->src(1).mod.abs());
-   assert(!i->src(0).mod.neg() || !i->src(1).mod.neg());
 
if (i->src(0).mod.neg())
   addOp |= 0x200;
-- 
2.8.0

___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/mesa-dev

Re: [Mesa-dev] Final review VA-API encode support

2016-07-22 Thread Andy Furniss


Christian König wrote:

Hi guys,

I would like to get this commit today. I just cleaned up patch #1 a bit, fixes
some compiler warning in patch #3 and a build break in #4.

Andy any more comments/testing on this or can I add your Tested-by: on it?


I guess you saw comment in the other thread - one more below WRT the 
color issue.



___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/mesa-dev

[Mesa-dev] [AppVeyor] mesa master #1811 completed

2016-07-22 Thread AppVeyor



Build mesa 1811 completed



Commit b33bccb519 by Jason Ekstrand on 7/22/2016 10:59 PM:

anv/pipeline: Set up point coord enables\n\nSigned-off-by: Jason Ekstrand \nReviewed-by: Kenneth Graunke \nTested-by: Lionel Landwerlin \nCc: "12.0" 


Configure your notification preferences

___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/mesa-dev

Re: [Mesa-dev] [PATCH 1/3] gallium: split transfer_inline_write into buffer and texture callbacks

2016-07-22 Thread Roland Scheidegger

Am 22.07.2016 um 22:40 schrieb Marek Olšák:
> Would anybody from VMWare like to comment?
I didn't really feel compelled to ;-).
The change looks reasonable to me. The whole transfer_inline_write thing
is there to make things faster, so if it's help saving cpu cycles by
using two separate functions that looks good (plus, buffers and textures
are quite different indeed). (We'll have some code to adapt of course,
but this should be trivial.)

So, Acked-by:
Roland Scheidegger 


> 
> Marek
> 
> On Mon, Jul 18, 2016 at 2:25 PM, Marek Olšák  wrote:
>> From: Marek Olšák 
>>
>> to reduce the call indirections with u_resource_vtbl.
>>
>> The worst call tree you could get was:
>>   - u_transfer_inline_write_vtbl
>> - u_default_transfer_inline_write
>>   - u_transfer_map_vtbl
>> - driver_transfer_map
>>   - u_transfer_unmap_vtbl
>> - driver_transfer_unmap
>>
>> That's 6 indirect calls. Some drivers only had 5. The goal is to have
>> 1 indirect call for drivers that care. The resource type can be determined
>> statically at most call sites.
>>
>> The new interface is:
>>   pipe_context::buffer_subdata(ctx, resource, usage, offset, size, data)
>>   pipe_context::texture_subdata(ctx, resource, level, usage, box, data,
>> stride, layer_stride)
>> ---
> ___
> mesa-dev mailing list
> mesa-dev@lists.freedesktop.org
> https://urldefense.proofpoint.com/v2/url?u=https-3A__lists.freedesktop.org_mailman_listinfo_mesa-2Ddev&d=CwIGaQ&c=Sqcl0Ez6M0X8aeM67LKIiDJAXVeAw-YihVMNtXt-uEs&r=Vjtt0vs_iqoI31UfJxBl7yv9I2FeiaeAYgMTLKRBc_I&m=s2X_1io_PXD1egZQfheQjckhnywN9aNskmn_3OXJbkA&s=Co_Nq1RW9hhWF1eyX42-yY6adnS2VRD4zJEEkAwvxzM&e=
>  
> 

___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/mesa-dev

[Mesa-dev] [AppVeyor] mesa master #1810 failed

2016-07-22 Thread AppVeyor




Build mesa 1810 failed


Commit 4031efaa57 by Jason Ekstrand on 7/22/2016 10:59 PM:

anv/pipeline: Set up point coord enables\n\nSigned-off-by: Jason Ekstrand \nReviewed-by: Kenneth Graunke \nTested-by: Lionel Landwerlin \nCc: "12.0" 


Configure your notification preferences

___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/mesa-dev

Re: [Mesa-dev] [PATCH] anv/pipeline: Set up point coord enables

2016-07-22 Thread Lionel Landwerlin


Tested-by: Lionel Landwerlin 

On 23/07/16 00:28, Jason Ekstrand wrote:

Signed-off-by: Jason Ekstrand 
Cc: "12.0" 
---
  src/intel/vulkan/genX_pipeline_util.h | 5 +
  1 file changed, 5 insertions(+)

diff --git a/src/intel/vulkan/genX_pipeline_util.h 
b/src/intel/vulkan/genX_pipeline_util.h
index 52263df..516fc49 100644
--- a/src/intel/vulkan/genX_pipeline_util.h
+++ b/src/intel/vulkan/genX_pipeline_util.h
@@ -291,6 +291,11 @@ emit_3dstate_sbe(struct anv_pipeline *pipeline)
if (input_index < 0)
   continue;
  
+  if (attr == VARYING_SLOT_PNTC) {

+ sbe.PointSpriteTextureCoordinateEnable = 1 << input_index;
+ continue;
+  }
+
const int slot = fs_input_map->varying_to_slot[attr];
  
if (input_index >= 16)



___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/mesa-dev

[Mesa-dev] [PATCH] anv/pipeline: Set up point coord enables

2016-07-22 Thread Jason Ekstrand

Signed-off-by: Jason Ekstrand 
Cc: "12.0" 
---
 src/intel/vulkan/genX_pipeline_util.h | 5 +
 1 file changed, 5 insertions(+)

diff --git a/src/intel/vulkan/genX_pipeline_util.h 
b/src/intel/vulkan/genX_pipeline_util.h
index 52263df..516fc49 100644
--- a/src/intel/vulkan/genX_pipeline_util.h
+++ b/src/intel/vulkan/genX_pipeline_util.h
@@ -291,6 +291,11 @@ emit_3dstate_sbe(struct anv_pipeline *pipeline)
   if (input_index < 0)
  continue;
 
+  if (attr == VARYING_SLOT_PNTC) {
+ sbe.PointSpriteTextureCoordinateEnable = 1 << input_index;
+ continue;
+  }
+
   const int slot = fs_input_map->varying_to_slot[attr];
 
   if (input_index >= 16)
-- 
2.5.0.400.gff86faf

___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/mesa-dev

Re: [Mesa-dev] [PATCH 00/56] Die copy-and-paste code, die

2016-07-22 Thread Rob Clark

On Tue, Jul 19, 2016 at 3:24 PM, Ian Romanick  wrote:
> After seeing Dave's series to add support GL_ARB_shader_gpu_int64 and
> thinking about adding support for 8- and  16-bit integers, I decided
> that something had to be done about the cut-and-paste madness that is
> ir_constant_expression.cpp.  I decided to take a page from Jason's book
> and generate it from a machine description of the expressions.  The
> result is this series.

fwiw, I did go down the path of adding half-precision types in IR for
a spell, but ended up deciding it was a maddening explosion of
builtin-types and builtin-functions..

At some point (ie. might be a month or two or more before I get to
it), I was going to go back to my original idea of just keeping things
as-is in glsl IR (as type+precision-qualifier for <=32b types), and
dealing with it in glsl->nir.  Basically just need to add a
precision-visitor (which I think I already pushed on a branch
somewhere) to infer an expression's precision, and wire that up in a
few places (function inlining and various other places where
intermediate vars are created in ir), and mapping type+precision to
nir type in glsl_to_nir.

(Not an argument against autogenerating some of the IR bits..  I think
that would be a really good idea, I'm a much bigger fan of how some of
that is handled in NIR.. maybe there is some room to autogen some of
the builtin-types and builtin-functions, which might make me
reconsider..)

BR,
-R

> You may notice from some of the earlier patches in this series that I
> started this work over a year ago.  The previous work was an attempt to
> generate opt_algebraic.cpp which was ultimately abandonded.  It may be
> worth picking that up again.
>
> I haven't done *anything* for SCons, so hopefully Jose or someone can
> help out there.
>
> All of this is available at:
>
> https://cgit.freedesktop.org/~idr/mesa/log/?h=generated-glsl-ir
>
> Other possible follow-up work:
>
>  - A few expressions don't have constant evaluation support.  I don't
>think I've seen a real shader use any of these, so there's a reason
>we haven't "missed" them.
>
> - frexp_sig
> - frexp_exp
> - vote_any
> - vote_all
> - vote_eq
> - imul_high
> - carry
> - borrow
>
>  - Generate validation code for expressions.  A few times while
>developing this series I had questions about what the IR actually
>supported.  In quite a few cases the IR support is different from
>what GLSL supports.  I would often look to ir_validate.cpp to answer
>these questions, and I was almost always disappointed.  I would
>really like to see missing type information added to
>ir_expression_operation.py and generate a much more complete set of
>validations for ir_expression.  Then the assertions remaining in
>ir_expression_operation_constant.h could be removed.
>
>  - Generate ir_expression constructors.
>
>  - Generate ir_builder.  We don't support every possible expression in
>ir_builder, so this may not be worth the effort.
>
>  src/compiler/Android.glsl.gen.mk |2 +-
>  src/compiler/Makefile.glsl.am|   24 +-
>  src/compiler/Makefile.sources|8 +-
>  src/compiler/glsl/.gitignore |2 +
>  src/compiler/glsl/ir.cpp |  133 +-
>  src/compiler/glsl/ir.h   |  329 +
>  src/compiler/glsl/ir_constant_expression.cpp | 1332 ++
>  src/compiler/glsl/ir_expression_operation.py |  738 ++
>  src/compiler/glsl/ir_print_visitor.cpp   |2 +-
>  src/compiler/glsl/ir_validate.cpp|   11 +
>  src/compiler/glsl/lower_mat_op_to_vec.cpp|2 +-
>  src/mesa/Makefile.sources|1 +
>  src/mesa/drivers/dri/i965/Makefile.am|1 +
>  13 files changed, 901 insertions(+), 1684 deletions(-)
>
> ___
> mesa-dev mailing list
> mesa-dev@lists.freedesktop.org
> https://lists.freedesktop.org/mailman/listinfo/mesa-dev
___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/mesa-dev

Re: [Mesa-dev] [PATCH] main/shaderimage: image unit invalid if texture is incomplete, independently of the level

2016-07-22 Thread Francisco Jerez

Alejandro Piñeiro  writes:

> Hi,
>
> On 15/07/16 22:46, Francisco Jerez wrote:
>> Alejandro Piñeiro  writes:
>>
>>> On 14/07/16 21:24, Francisco Jerez wrote:
 Alejandro Piñeiro  writes:

> Without this commit, a image is considered valid if the level of the
> texture bound to the image is complete, something we can check as mesa
> save independently if it is "base incomplete" of "mipmap incomplete".
>
> But, from the OpenGL 4.3 Core Specification, section 8.25 ("Texture
> Image Loads and Stores"):
>
>   "An access is considered invalid if:
> the texture bound to the selected image unit is incomplete;"
>
> This implies that the access to the image unit is invalid if the
> texture is incomplete, no mattering details about the specific texture
> level bound to the image.
>
> This fixes:
> GL44-CTS.shader_image_load_store.incomplete_textures
> ---
>
> Current piglit test is not testing what this commit tries to fix. I
> will send a patch to piglit in short.
>
>  src/mesa/main/shaderimage.c | 14 +++---
>  1 file changed, 11 insertions(+), 3 deletions(-)
>
> diff --git a/src/mesa/main/shaderimage.c b/src/mesa/main/shaderimage.c
> index 90643c4..d20cd90 100644
> --- a/src/mesa/main/shaderimage.c
> +++ b/src/mesa/main/shaderimage.c
> @@ -469,10 +469,18 @@ _mesa_is_image_unit_valid(struct gl_context *ctx, 
> struct gl_image_unit *u)
> if (!t->_BaseComplete && !t->_MipmapComplete)
> _mesa_test_texobj_completeness(ctx, t);
>  
> +   /* From the OpenGL 4.3 Core Specification, Chapter 8.25, Texture Image
> +* Loads and Stores:
> +*
> +*  "An access is considered invalid if:
> +*the texture bound to the selected image unit is incomplete;"
> +*/
> +   if (!t->_BaseComplete ||
> +   !t->_MipmapComplete)
> +  return GL_FALSE;
 I don't think this is correct, AFAIUI a texture having _MipmapComplete
 equal to false doesn't imply that the texture as a whole would be
 considered incomplete according to the GL's definition of completeness.
 Whether or not it's considered complete usually depends on the sampler
 state while you're doing regular texture sampling: If the sampler a
 texture object is used with has any of the mipmap filtering modes
 enabled you need to check _MipmapComplete, otherwise you need to check
 _BaseComplete.  The problem when you attempt to carry over this
 definition to shader images (as the spec implies) is that image units
 have no sampler state as such, and that they can only ever access one
 specified level of the texture at a time (potentially a texture level
 other than the base).  This patch makes image units behave like a
 sampler unit with mipmap filtering enabled for the purpose of texture
 completeness validation, which is almost definitely too strong.
>>> Yes, I didn't realize that _BaseComplete and _MipmapComplete were not
>>> checking the state at all. Thanks for pointing it.
>>>
 An alternative would be to do something along the lines of:

 | if (!_mesa_is_texture_complete(t, &t->Sampler))
 |return GL_FALSE;
>>> Yes, that is what I wanted, to return false if the texture is incomplete.
>>>
 The problem is that you would then run into problems when some of the
 non-base mipmap levels are missing but the sampler state baked into the
 gl_texture_object says that you aren't mipmapping, so the GL spec would
 normally consider the texture to be complete and
 _mesa_is_texture_complete would return true accordingly, but still you
 wouldn't be able to use any of the missing texture levels as shader
 image if the application tried to bind them to an image unit (that's the
 reason for the u->Level vs t->BaseLevel checks below you're removing).
>>> Ok, then if I understand correctly, the solution is not about replacing
>>> the level checks for _mesa_is_texture_complete, but keeping current
>>> checks, and add a _mesa_is_texture_complete check. Just checked and
>>> everything seems to work fine (except that now the behaviour is more
>>> strict, see below). I will send a patch in short.
>>>
>> Yeah, that would likely work and get the CTS test to pass, but it would
>> still be more strict than the spec says and consider cases that are OK
>> according to the spec to be incomplete, so I was reluctant to call it a
>> solution.
>>
>> I think the ideal solution would be for the state of an image unit to be
>> independent from the filtering and sampling state, and depend on the
>> completeness of the bound level *only*.  Any idea if this CTS (or your
>> equivalent piglit test) passes on other GL implementations that support
>> image load/store (e.g. nVidia's -- I would be surprised if it does).
>
> Just checked today with NVIDIA 352.30 and 352.30. I was not able to
> directly te

Re: [Mesa-dev] [PATCH 00/56] Die copy-and-paste code, die

2016-07-22 Thread Dylan Baker

I didn't read over the code super closely, but all of the python looked
good and obvious minus a few nits (and they were all just nits).

For the python bits:
Acked-by: Dylan Baker 


signature.asc
Description: signature
___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/mesa-dev

[Mesa-dev] The background of the x-window is transparent when running glxgears with mesa

2016-07-22 Thread Fu, Sheng

Hi,

When I ran glxgears on my CentOS box with Mesa, the background of the x-window 
looks transparent. When run with NVIDA graphics cad, it is solid. Is there a 
setting to make the background a solid color?

Thanks

Sheng

___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/mesa-dev

Re: [Mesa-dev] [PATCH 23.5/36] isl: Take the slice0_extent shortcut for interleaved MSAA

2016-07-22 Thread Nanley Chery

On Fri, Jul 01, 2016 at 04:08:50PM -0700, Jason Ekstrand wrote:
> The shortcut works just fine for MSAA and the comment even says so.

This patch is,
Reviewed-by: Nanley Chery 

> ---
>  src/intel/isl/isl.c | 2 +-
>  1 file changed, 1 insertion(+), 1 deletion(-)
> 
> diff --git a/src/intel/isl/isl.c b/src/intel/isl/isl.c
> index be3adfc..6bdb248 100644
> --- a/src/intel/isl/isl.c
> +++ b/src/intel/isl/isl.c
> @@ -575,7 +575,7 @@ isl_calc_phys_slice0_extent_sa_gen4_2d(
>  
> assert(phys_level0_sa->depth == 1);
>  
> -   if (info->levels == 1 && msaa_layout != ISL_MSAA_LAYOUT_INTERLEAVED) {
> +   if (info->levels == 1) {
>/* Do not pad the surface to the image alignment. Instead, pad it only
> * to the pixel format's block alignment.
> *
> -- 
> 2.5.0.400.gff86faf
> 
> ___
> mesa-dev mailing list
> mesa-dev@lists.freedesktop.org
> https://lists.freedesktop.org/mailman/listinfo/mesa-dev
___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/mesa-dev

Re: [Mesa-dev] [PATCH 38/56] glsl: Generate code for constant ir_binop_ldexp expressions

2016-07-22 Thread Dylan Baker

Quoting Ian Romanick (2016-07-19 12:24:57)
> From: Ian Romanick 
> 
> ldexp is weird because its two operands have different types.  Add
> support for directly specifying the exact signatures of all the possible
> variations of an operation.
> 
> Signed-off-by: Ian Romanick 
> ---
>  src/compiler/glsl/ir_expression_operation.py | 23 +++
>  1 file changed, 19 insertions(+), 4 deletions(-)
> 
> diff --git a/src/compiler/glsl/ir_expression_operation.py 
> b/src/compiler/glsl/ir_expression_operation.py
> index 2351dcf..de9c7b7 100644
> --- a/src/compiler/glsl/ir_expression_operation.py
> +++ b/src/compiler/glsl/ir_expression_operation.py
> @@ -202,7 +202,7 @@ types_identical_operation = "identical"
>  non_assign_operation = "nonassign"
>  
>  class operation(object):
> -   def __init__(self, name, num_operands, printable_name = None, 
> source_types = None, dest_type = None, c_expression = None, flags = None):
> +   def __init__(self, name, num_operands, printable_name = None, 
> source_types = None, dest_type = None, c_expression = None, flags = None, 
> all_signatures = None):
>self.name = name
>self.num_operands = num_operands
>  
> @@ -211,7 +211,13 @@ class operation(object):
>else:
>   self.printable_name = printable_name
>  
> -  self.source_types = source_types
> +  self.all_signatures = all_signatures
> +
> +  if source_types is None:
> + self.source_types = ()

Just for clarity could you make this "self.source_types = tuple()"?
Parens don't always mean tuple in python and I actually assumed that
would be None.

> +  else:
> + self.source_types = source_types
> +
>self.dest_type = dest_type
>  
>if c_expression is None:
> @@ -261,6 +267,8 @@ class operation(object):
>  return constant_template0.render(op=self)
>   elif self.dest_type is not None:
>  return constant_template5.render(op=self)
> + else:
> +return constant_template3.render(op=self)
>  
>return None
>  
> @@ -276,7 +284,10 @@ class operation(object):
>  
>  
> def signatures(self):
> -  return type_signature_iter(self.dest_type, self.source_types, 
> self.num_operands)
> +  if self.all_signatures is not None:
> + return self.all_signatures
> +  else:
> + return type_signature_iter(self.dest_type, self.source_types, 
> self.num_operands)
>  
>  
>  ir_expression_operation = [
> @@ -469,7 +480,11 @@ ir_expression_operation = [
> operation("ubo_load", 2),
>  
> # Multiplies a number by two to a power, part of ARB_gpu_shader5.
> -   operation("ldexp", 2),
> +   operation("ldexp", 2,
> + all_signatures=((float_type, (float_type, int_type)),
> + (double_type, (double_type, int_type))),
> + c_expression={'f': "ldexpf_flush_subnormal({src0}, {src1})",
> +   'd': "ldexp_flush_subnormal({src0}, {src1})"}),
>  
> # Extract a scalar from a vector
> #
> -- 
> 2.5.5
> 
> ___
> mesa-dev mailing list
> mesa-dev@lists.freedesktop.org
> https://lists.freedesktop.org/mailman/listinfo/mesa-dev


signature.asc
Description: signature
___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/mesa-dev

Re: [Mesa-dev] [PATCH 10/17] glsl: Define a gl_LastFragData built-in for GLSL versions that have gl_FragData.

2016-07-22 Thread Kenneth Graunke

On Wednesday, July 20, 2016 9:49:40 PM PDT Francisco Jerez wrote:
> The EXT_shader_framebuffer_fetch extension defines alternative
> language for GLES2 shaders where user-defined fragment outputs are not
> allowed.  Instead of using inout user-defined fragment outputs the
> shader is expected to read from the gl_LastFragData built-in array.
> In addition this allows using the same language on desktop GLSL
> versions prior to 4.2 that support the deprecated gl_FragData built-in
> in preparation for the MESA_shader_framebuffer_fetch desktop GL
> extension.
> 
> Both legacy and user-defined inout outputs have a common
> representation at the GLSL IR level, so it shouldn't make any
> difference for optimization passes and back-ends whether the
> application is using gl_LastFragData or user-defined outputs, all
> they'll see is a variable dereference of a fragment output at a
> certain interface location with the fb_fetch_output bit set to one.
> ---
>  src/compiler/glsl/builtin_variables.cpp | 10 ++
>  1 file changed, 10 insertions(+)
> 
> diff --git a/src/compiler/glsl/builtin_variables.cpp 
> b/src/compiler/glsl/builtin_variables.cpp
> index f63dc3a..6a756ed 100644
> --- a/src/compiler/glsl/builtin_variables.cpp
> +++ b/src/compiler/glsl/builtin_variables.cpp
> @@ -1136,6 +1136,16 @@ builtin_variable_generator::generate_fs_special_vars()
>   array(vec4_t, state->Const.MaxDrawBuffers), "gl_FragData");
> }
>  
> +   if (state->has_framebuffer_fetch() && !state->is_version(420, 300)) {
> +  ir_variable *const var =
> + add_output(FRAG_RESULT_DATA0,
> +array(vec4_t, state->Const.MaxDrawBuffers),
> +"gl_LastFragData");
> +  var->data.precision = GLSL_PRECISION_MEDIUM;
> +  var->data.read_only = 1;
> +  var->data.fb_fetch_output = 1;
> +   }
> +

Personally, I'd only create gl_LastFragData in desktop 1.10/1.20,
and not 1.30+ where it's deprecated.  Sure, you /can/ use it, but
you can also do the 'inout' syntax that's the preferred way going
forward, so we may as well just require shader authors to do so.

> if (state->es_shader && state->language_version == 100 && 
> state->EXT_blend_func_extended_enable) {
>/* We make an assumption here that there will only ever be one 
> dual-source draw buffer
> * In case this assumption is ever proven to be false, make sure to 
> assert here
> 



signature.asc
Description: This is a digitally signed message part.
___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/mesa-dev

Re: [Mesa-dev] [PATCH 48/56] glsl: Generate code for constant ir_triop_csel expressions

2016-07-22 Thread Dylan Baker

Quoting Ian Romanick (2016-07-19 12:25:07)
> From: Ian Romanick 
> 
> Signed-off-by: Ian Romanick 
> ---
>  src/compiler/glsl/ir_expression_operation.py | 24 +++-
>  1 file changed, 23 insertions(+), 1 deletion(-)
> 
> diff --git a/src/compiler/glsl/ir_expression_operation.py 
> b/src/compiler/glsl/ir_expression_operation.py
> index 7161713..b1b7101 100644
> --- a/src/compiler/glsl/ir_expression_operation.py
> +++ b/src/compiler/glsl/ir_expression_operation.py
> @@ -357,6 +357,24 @@ constant_template_lrp = mako.template.Template("""\
>break;
> }""")
>  
> +# This template is for ir_triop_csel.  This expression is really unique
> +# because not all of the operands are the same type, and the second operand
> +# determines the type of the expression (instead of the first).
> +constant_template_csel = mako.template.Template("""\
> +   case ${op.get_enum_name()}:
> +  for (unsigned c = 0; c < components; c++) {
> + switch (this->type->base_type) {
> +% for (dst_type, src_types) in op.signatures():

nit: python doesn't requires braces around container explosions except
in some corner cases with nested containers. (you can take it or leave
it, though)

> + case ${src_types[1].glsl_type}:
> +data.${dst_type.union_field}[c] = 
> ${op.get_c_expression(src_types)};
> +break;
> +% endfor
> + default:
> +assert(0);
> + }
> +  }
> +  break;""")
> +
>  
>  vector_scalar_operation = "vector-scalar"
>  horizontal_operation = "horizontal"
> @@ -443,6 +461,8 @@ class operation(object):
>  return constant_template_vector_insert.render(op=self)
>   elif self.name == "lrp":
>  return constant_template_lrp.render(op=self)
> + elif self.name == "csel":
> +return constant_template_csel.render(op=self)
>   else:
>  return constant_template3.render(op=self)
>elif self.num_operands == 4:
> @@ -700,7 +720,9 @@ ir_expression_operation = [
> # component on vectors).
> #
> # See also lower_instructions_visitor::ldexp_to_arith
> -   operation("csel", 3),
> +   operation("csel", 3,
> + all_signatures=zip(all_types, zip(len(all_types) * 
> (bool_type,), all_types, all_types)),

The zip(len(all_types) * (bool_type,) bit is a little confusing at
first, it might deserve a comment.

Also, you might be able to use itertools.izip instead of zip here, it
will probably speed up the build, but it doesn't make a list, just an
iterator, so if internally operation() needs to iterate all_signatures
more than once it wont work.

> + c_expression="{src0} ? {src1} : {src2}"),
>  
> operation("bitfield_extract", 3,
>   all_signatures=((int_type, (uint_type, int_type, int_type)),
> -- 
> 2.5.5
> 
> ___
> mesa-dev mailing list
> mesa-dev@lists.freedesktop.org
> https://lists.freedesktop.org/mailman/listinfo/mesa-dev


signature.asc
Description: signature
___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/mesa-dev

Re: [Mesa-dev] [PATCH 00/16] Various Vulkan/SPIR-V fixes (mostly SPIR-V)

2016-07-22 Thread Kenneth Graunke

On Thursday, July 21, 2016 9:21:39 PM PDT Jason Ekstrand wrote:
> I pulled the latest version of the Vulkan CTS and gave it a run.  One of
> the major aditions was a pile of tests for different texturing operations.
> This demonstrated that our texturing support was, well, sub-par.  These
> patches get it most of the way fixed up.  As a nice side-benifit, it also
> fixes 5 piglit tests on GL.
> 
> Jason Ekstrand (16):
>   spirv/nir: Fix some texture opcode asserts
>   spirv/nir: Add support for shadow samplers that return vec4
>   spirv/nir: Properly handle gather components
>   anv/pipeline: Set binding_table.gather_texture_start
>   nir: Add a helper for determining the type of a texture source
>   nir/lower_tex: Add some helpers for working with tex sources
>   nir/lower_tex: Add support for lowering coordinate offsets
>   i965/nir: Enable NIR lowering of txf and rect offsets
>   i965: Get rid of the do_lower_unnormalized_offsets pass
>   spirv/nir: Don't increment coord_components for array lod queries
>   anv/image: Assert that the image format is actually supported
>   spirv/nir/: Move opcode selection higher up in handle_texture
>   spirv/nir: Refactor type handling in handle_texture
>   nir/spirv: Refactor coordinate handling in handle_texture
>   spirv/nir: Handle texture projectors
>   spirv/nir: Add support for ImageQuerySamples
> 
>  src/compiler/nir/nir.h |  54 ++
>  src/compiler/nir/nir_lower_tex.c   |  98 +--
>  src/compiler/spirv/spirv_to_nir.c  | 189 
> -
>  src/intel/vulkan/anv_image.c   |   7 +-
>  src/intel/vulkan/anv_pipeline.c|   1 +
>  src/mesa/drivers/dri/i965/Makefile.sources |   1 -
>  src/mesa/drivers/dri/i965/brw_context.h|   1 -
>  src/mesa/drivers/dri/i965/brw_link.cpp |   1 -
>  .../dri/i965/brw_lower_unnormalized_offset.cpp | 106 
>  src/mesa/drivers/dri/i965/brw_nir.c|   2 +
>  10 files changed, 255 insertions(+), 205 deletions(-)
>  delete mode 100644 
> src/mesa/drivers/dri/i965/brw_lower_unnormalized_offset.cpp

Patch 15 has a "projetor" typo in a comment, and patch 12 has a bonus /
in the commit title.  Other than that, series is:

Reviewed-by: Kenneth Graunke 


signature.asc
Description: This is a digitally signed message part.
___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/mesa-dev

[Mesa-dev] [Bug 86326] clEnqueueNDRangeKernel global_work_offset ignored

2016-07-22 Thread bugzilla-daemon

https://bugs.freedesktop.org/show_bug.cgi?id=86326

Jan Vesely  changed:

   What|Removed |Added

 Resolution|--- |FIXED
 Status|NEW |RESOLVED

--- Comment #18 from Jan Vesely  ---
Fixed with latest llvm/libclc.

-- 
You are receiving this mail because:
You are the assignee for the bug.___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/mesa-dev

Re: [Mesa-dev] [PATCH 0/9] st/mesa state tracking rewrite

2016-07-22 Thread Marek Olšák

Would anybody from VMWare like to comment?

Marek

On Mon, Jul 18, 2016 at 3:11 PM, Marek Olšák  wrote:
> Hi,
>
> This series rewrites state tracking in st/mesa. The advantages of the new
> approach are described in the main patch. Here I'll just say it that it was
> motivated by a little inefficient code in st_validate_state and the mapping
> of _NEW_* flags to atoms. The new approach works with one ST_NEW_* flag per
> atom and those are the only flags that are available to st/mesa.
> The _NEW_* flags are translated by Driver.UpdateState to ST_NEW_* flags
> and then forgotten, so st/mesa doesn't use them internally.
>
> Please review.
>
> Marek
___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/mesa-dev

Re: [Mesa-dev] [PATCH 1/3] gallium: split transfer_inline_write into buffer and texture callbacks

2016-07-22 Thread Marek Olšák

Would anybody from VMWare like to comment?

Marek

On Mon, Jul 18, 2016 at 2:25 PM, Marek Olšák  wrote:
> From: Marek Olšák 
>
> to reduce the call indirections with u_resource_vtbl.
>
> The worst call tree you could get was:
>   - u_transfer_inline_write_vtbl
> - u_default_transfer_inline_write
>   - u_transfer_map_vtbl
> - driver_transfer_map
>   - u_transfer_unmap_vtbl
> - driver_transfer_unmap
>
> That's 6 indirect calls. Some drivers only had 5. The goal is to have
> 1 indirect call for drivers that care. The resource type can be determined
> statically at most call sites.
>
> The new interface is:
>   pipe_context::buffer_subdata(ctx, resource, usage, offset, size, data)
>   pipe_context::texture_subdata(ctx, resource, level, usage, box, data,
> stride, layer_stride)
> ---
___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/mesa-dev

Re: [Mesa-dev] [PATCH shader-db 4/4] si-report.py: add completely new shader statistics reporting

2016-07-22 Thread Marek Olšák

For those who would like to try this out, FYI, I've pushed this series just now.

Marek

On Thu, Jul 14, 2016 at 4:28 PM, Marek Olšák  wrote:
> From: Marek Olšák 
>
> We can remove the old reporting if people are OK with that.
> The old reporting has a bug that it reports 0->N changes as 0 % in several
> places. (should be inf %)
>
> The new reporting shows:
> - VGPR spilling shaders and apps (from the second file only)
> - the same for SGPRs
> - worst regressions (from the comparison of both files)
> - percentage deltas at the end
> .. and colors!!!
>
> Example:
>
>  WORST VGPR SPILLS (not deltas)VGPRs 
> SpillVGPR ScratchVGPR
>  shaders/private/f1-2015/18.shader_test [0]   32  
>0   516
>  shaders/private/bioshock-infinite/256.shader_test [0]64  
>  176   180
>  shaders/private/ue4_lightroom_interior_day/42.shader_test [0]28  
>076
>  shaders/private/dirt-showdown/676.shader_test [0]68  
>   4972
>  shaders/private/f1-2015/1102.shader_test [0] 52  
>072
>  shaders/private/bioshock-infinite/814.shader_test [0]64  
>   5760
>  shaders/private/ue4_lightroom_interior_day/33.shader_test [0]24  
>052
>  shaders/private/ue4_lightroom_interior_day/37.shader_test [0]24  
>052
>  shaders/private/bioshock-infinite/698.shader_test [0]16  
>036
>  shaders/private/dirt-showdown/406.shader_test [0]64  
>   3336
>
>  VGPR SPILLING APPS   Shaders SpillVGPR ScratchVGPR
>  alien_isolation 29381216
>  bioshock-infinite   1769   233   720
>  dirt-showdown54182   108
>  f1-2015  774 0   624
>  tesseract430 2 4
>  ue4_lightroom_inter.. 74 0   180
>
>  WORST SGPR SPILLS (not deltas)SGPRs 
> SpillSGPR
>  shaders/private/talos_principle/1942.shader_test [1] 80  
>  168
>  shaders/private/ue4_effects_cave/289.shader_test [0] 80  
>  168
>  shaders/private/talos_principle/2052.shader_test [1] 80  
>  161
>  shaders/private/serious_sam_3_bfe/1081.shader_test [1]   80  
>  148
>  shaders/private/borderlands2/5330.shader_test [0]80  
>  137
>  shaders/private/talos_principle/2040.shader_test [1] 80  
>  133
>  shaders/private/talos_principle/2041.shader_test [1] 80  
>  133
>  shaders/private/talos_principle/2036.shader_test [1] 80  
>  133
>  shaders/private/talos_principle/2035.shader_test [1] 80  
>  133
>  shaders/private/borderlands2/5548.shader_test [0]80  
>  131
>
>  SGPR SPILLING APPS   Shaders SpillSGPR AvgPerSh
>  alien_isolation 2938 23198  7.9
>  batman_arkham_origins58930  0.1
>  bioshock-infinite   176984  0.0
>  borderlands23968  6449  1.6
>  brutal-legend338   647  1.9
>  civilization_beyond..116   213  1.8
>  counter_strike_glob..   1142  4338  3.8
>  dirt-showdown541  1071  2.0
>  dolphin   2262  2.8
>  dota2   1747   338  0.2
>  europa_universalis_4  7644  0.6
>  f1-2015  774  6245  8.1
>  left_4_dead_2   1762 13778  7.8
>  metro_2033_redux2670   547  0.2
>  nexuiz80   111  1.4
>  portal   474  2211  4.7
>  serious_sam_3_bfe392  6626 16.9
>  talos_principle  324  4539 14.0
>  team_fortress_2  808  4823  6.0
>  thea 17241  0.2
>  ue4_effects_cave 299   494  1.7
>  ue4_elemental586   355  0.6
>  ue4_lightroom_inter.. 7429  0.4
>  ue4_realistic_rende.. 9260  0.7
>  unigine_heaven   322   174  0.5
>  unigine_sanctuary264   400  1.5
>  unigine_tropics  210   328  1.6
>  unigine_valley   278   427  1.5
>  unity 7245  0.6
>  warsow   17613  0.1
>  witcher21040 2  0.0
>
>  WORST REGRESSIONS - VGPRSBefore  
>After Delta Percentage
>  shaders/private/metro_2033_redux/1082.shader_test [0]   136  
>  18044   32.35 %
>  shaders/private/witcher2/20.shader_test [0]

[Mesa-dev] [PATCH v2] vc4: add hash table look-up for exported dmabufs

2016-07-22 Thread Rob Herring

It is necessary to reuse existing BOs when dmabufs are imported. There
are 2 cases that need to be handled. dmabufs can be created/exported and
imported by the same process and can be imported multiple times.
Copying other drivers, add a hash table to track exported BOs so the
BOs get reused.

Cc: Eric Anholt 
Signed-off-by: Rob Herring 
---
v2:
- Avoid taking mutex on unreference if private
- Fix use after free with util_hash_table_remove

 src/gallium/drivers/vc4/vc4_bufmgr.c | 20 +++-
 src/gallium/drivers/vc4/vc4_bufmgr.h | 19 ++-
 src/gallium/drivers/vc4/vc4_screen.c | 15 +++
 src/gallium/drivers/vc4/vc4_screen.h |  3 +++
 4 files changed, 55 insertions(+), 2 deletions(-)

diff --git a/src/gallium/drivers/vc4/vc4_bufmgr.c 
b/src/gallium/drivers/vc4/vc4_bufmgr.c
index 21e3bde..f6bacfd 100644
--- a/src/gallium/drivers/vc4/vc4_bufmgr.c
+++ b/src/gallium/drivers/vc4/vc4_bufmgr.c
@@ -28,6 +28,7 @@
 #include 
 #include 
 
+#include "util/u_hash_table.h"
 #include "util/u_memory.h"
 #include "util/ralloc.h"
 
@@ -329,10 +330,19 @@ vc4_bo_open_handle(struct vc4_screen *screen,
uint32_t winsys_stride,
uint32_t handle, uint32_t size)
 {
-struct vc4_bo *bo = CALLOC_STRUCT(vc4_bo);
+struct vc4_bo *bo;
 
 assert(size);
 
+pipe_mutex_lock(screen->bo_handles_mutex);
+
+bo = util_hash_table_get(screen->bo_handles, (void*)(uintptr_t)handle);
+if (bo) {
+pipe_reference(NULL, &bo->reference);
+goto done;
+}
+
+bo = CALLOC_STRUCT(vc4_bo);
 pipe_reference_init(&bo->reference, 1);
 bo->screen = screen;
 bo->handle = handle;
@@ -347,6 +357,10 @@ vc4_bo_open_handle(struct vc4_screen *screen,
 bo->map = malloc(bo->size);
 #endif
 
+util_hash_table_set(screen->bo_handles, (void *)(uintptr_t)handle, bo);
+
+done:
+pipe_mutex_unlock(screen->bo_handles_mutex);
 return bo;
 }
 
@@ -399,7 +413,11 @@ vc4_bo_get_dmabuf(struct vc4_bo *bo)
 bo->handle);
 return -1;
 }
+
+pipe_mutex_lock(bo->screen->bo_handles_mutex);
 bo->private = false;
+util_hash_table_set(bo->screen->bo_handles, (void 
*)(uintptr_t)bo->handle, bo);
+pipe_mutex_unlock(bo->screen->bo_handles_mutex);
 
 return fd;
 }
diff --git a/src/gallium/drivers/vc4/vc4_bufmgr.h 
b/src/gallium/drivers/vc4/vc4_bufmgr.h
index b77506e..c84b850 100644
--- a/src/gallium/drivers/vc4/vc4_bufmgr.h
+++ b/src/gallium/drivers/vc4/vc4_bufmgr.h
@@ -25,6 +25,7 @@
 #define VC4_BUFMGR_H
 
 #include 
+#include "util/u_hash_table.h"
 #include "util/u_inlines.h"
 #include "vc4_qir.h"
 
@@ -87,11 +88,27 @@ vc4_bo_reference(struct vc4_bo *bo)
 static inline void
 vc4_bo_unreference(struct vc4_bo **bo)
 {
+struct vc4_screen *screen;
 if (!*bo)
 return;
 
-if (pipe_reference(&(*bo)->reference, NULL))
+if ((*bo)->private) {
+/* Avoid the mutex for private BOs */
+if (pipe_reference(&(*bo)->reference, NULL))
 vc4_bo_last_unreference(*bo);
+} else {
+screen = (*bo)->screen;
+pipe_mutex_lock(screen->bo_handles_mutex);
+
+if (pipe_reference(&(*bo)->reference, NULL)) {
+util_hash_table_remove(screen->bo_handles,
+   (void *)(uintptr_t)(*bo)->handle);
+vc4_bo_last_unreference(*bo);
+}
+
+pipe_mutex_unlock(screen->bo_handles_mutex);
+}
+
 *bo = NULL;
 }
 
diff --git a/src/gallium/drivers/vc4/vc4_screen.c 
b/src/gallium/drivers/vc4/vc4_screen.c
index 29c0f94..82544e0 100644
--- a/src/gallium/drivers/vc4/vc4_screen.c
+++ b/src/gallium/drivers/vc4/vc4_screen.c
@@ -30,6 +30,7 @@
 #include "util/u_debug.h"
 #include "util/u_memory.h"
 #include "util/u_format.h"
+#include "util/u_hash_table.h"
 #include "util/ralloc.h"
 
 #include "vc4_screen.h"
@@ -496,6 +497,18 @@ vc4_screen_is_format_supported(struct pipe_screen *pscreen,
 return retval == usage;
 }
 
+#define PTR_TO_UINT(x) ((unsigned)((intptr_t)(x)))
+
+static unsigned handle_hash(void *key)
+{
+return PTR_TO_UINT(key);
+}
+
+static int handle_compare(void *key1, void *key2)
+{
+return PTR_TO_UINT(key1) != PTR_TO_UINT(key2);
+}
+
 static bool
 vc4_supports_branches(struct vc4_screen *screen)
 {
@@ -523,6 +536,8 @@ vc4_screen_create(int fd)
 
 screen->fd = fd;
 list_inithead(&screen->bo_cache.time_list);
+pipe_mutex_init(screen->bo_handles_mutex);
+screen->bo_handles = util_hash_table_create(handle_hash, 
handle_compare);
 
 if (vc4_supports_branches(screen))
 screen->has_control_flow = true;
diff --git a/src/gallium/drivers/vc4/vc4_screen.h 
b/src/gallium/drivers/vc4/vc4_screen.h
index 6cecca6..16b3a6c 100644
--- a/src/gallium/

Re: [Mesa-dev] [PATCH] nir: Lower interp_var_at_* like a normal load_var for flat inputs.

2016-07-22 Thread Anuj Phogat

On Fri, Jul 22, 2016 at 11:04 AM, Kenneth Graunke  wrote:
> "flat centroid" and "flat sample" both just mean "flat", so we should
> ignore interpolateAtCentroid/Sample and just return the flat value.
>
> Bugzilla: https://bugs.freedesktop.org/show_bug.cgi?id=97032
> Signed-off-by: Kenneth Graunke 
> ---
>  src/compiler/nir/nir_lower_io.c | 4 
>  1 file changed, 4 insertions(+)
>
> diff --git a/src/compiler/nir/nir_lower_io.c b/src/compiler/nir/nir_lower_io.c
> index bf8296f..b36836f 100644
> --- a/src/compiler/nir/nir_lower_io.c
> +++ b/src/compiler/nir/nir_lower_io.c
> @@ -340,6 +340,10 @@ lower_interpolate_at(nir_intrinsic_instr *intrin, struct 
> lower_io_state *state,
>
> assert(var->data.mode == nir_var_shader_in);
>
> +   /* Ignore interpolateAt() for flat variables - flat is flat. */
> +   if (var->data.interpolation == INTERP_MODE_FLAT)
> +  return lower_load(intrin, state, NULL, offset);
> +
> nir_intrinsic_op bary_op;
> switch (intrin->intrinsic) {
> case nir_intrinsic_interp_var_at_centroid:
> --
> 2.9.0
>
> ___
> mesa-dev mailing list
> mesa-dev@lists.freedesktop.org
> https://lists.freedesktop.org/mailman/listinfo/mesa-dev

Reviewed-by: Anuj Phogat 
___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/mesa-dev

Re: [Mesa-dev] [PATCH] nir: Lower interp_var_at_* like a normal load_var for flat inputs.

2016-07-22 Thread Jason Ekstrand

Seems reasonable. Rb

On Jul 22, 2016 11:04 AM, "Kenneth Graunke"  wrote:

> "flat centroid" and "flat sample" both just mean "flat", so we should
> ignore interpolateAtCentroid/Sample and just return the flat value.
>
> Bugzilla: https://bugs.freedesktop.org/show_bug.cgi?id=97032
> Signed-off-by: Kenneth Graunke 
> ---
>  src/compiler/nir/nir_lower_io.c | 4 
>  1 file changed, 4 insertions(+)
>
> diff --git a/src/compiler/nir/nir_lower_io.c
> b/src/compiler/nir/nir_lower_io.c
> index bf8296f..b36836f 100644
> --- a/src/compiler/nir/nir_lower_io.c
> +++ b/src/compiler/nir/nir_lower_io.c
> @@ -340,6 +340,10 @@ lower_interpolate_at(nir_intrinsic_instr *intrin,
> struct lower_io_state *state,
>
> assert(var->data.mode == nir_var_shader_in);
>
> +   /* Ignore interpolateAt() for flat variables - flat is flat. */
> +   if (var->data.interpolation == INTERP_MODE_FLAT)
> +  return lower_load(intrin, state, NULL, offset);
> +
> nir_intrinsic_op bary_op;
> switch (intrin->intrinsic) {
> case nir_intrinsic_interp_var_at_centroid:
> --
> 2.9.0
>
> ___
> mesa-dev mailing list
> mesa-dev@lists.freedesktop.org
> https://lists.freedesktop.org/mailman/listinfo/mesa-dev
>
___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/mesa-dev

Re: [Mesa-dev] [PATCH] Rename the DEBUG macro to MESA_DEBUG

2016-07-22 Thread Matt Turner

On Thu, Jul 21, 2016 at 10:48 AM, Vedran Miletić  wrote:
> LLVM and Mesa both define the DEBUG macro in incompatible ways. As a
> general practice, we should avoid using such generic names when it is
> possible to do so.
>
> This patch renames all occurrences of the DEBUG macro to MESA_DEBUG,
> and removes workarounds previously used to enable building Mesa with
> LLVM (pop_macro() and push_macro() function calls).

I wonder if we shouldn't change

  #ifdef DEBUG

to

   #ifndef NDEBUG

to match the behavior of assert(). We already have many uses of #ifndef NDEBUG.
___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/mesa-dev

Re: [Mesa-dev] [Mesa-stable] [PATCH] mesa: Don't call GenerateMipmap if Width or Height == 0.

2016-07-22 Thread Anuj Phogat

On Fri, Jul 22, 2016 at 11:04 AM, Kenneth Graunke  wrote:
>
> One of the WebGL 2.0 conformance tests is trying to call
> glGenerateMipmaps with a width and height of 0.  With the meta
> implementation, this generates a "framebuffer attachment incomplete"
> status, and falls back to the CPU path, calling MapTextureImage.
>
> Except that there's no actual texture to map, and we assert fail.
>
> There's no work to do in this case.  The test expects it to succeed,
> so just return early with no error and avoid hassling the driver.
>
> Cc: mesa-sta...@lists.freedesktop.org
> Bugzilla: https://bugs.freedesktop.org/show_bug.cgi?id=96911
> Signed-off-by: Kenneth Graunke 
> ---
>  src/mesa/main/genmipmap.c | 5 +
>  1 file changed, 5 insertions(+)
>
> diff --git a/src/mesa/main/genmipmap.c b/src/mesa/main/genmipmap.c
> index c952c4f..97d3c62 100644
> --- a/src/mesa/main/genmipmap.c
> +++ b/src/mesa/main/genmipmap.c
> @@ -149,6 +149,11 @@ _mesa_generate_texture_mipmap(struct gl_context *ctx,
>return;
> }
>
> +   if (srcImage->Width == 0 || srcImage->Height == 0) {
> +  _mesa_unlock_texture(ctx, texObj);
> +  return;
> +   }
> +
> if (target == GL_TEXTURE_CUBE_MAP) {
>GLuint face;
>for (face = 0; face < 6; face++) {
> --
> 2.9.0
>
> ___
> mesa-stable mailing list
> mesa-sta...@lists.freedesktop.org
> https://lists.freedesktop.org/mailman/listinfo/mesa-stable


Reviewed-by: Anuj Phogat 
___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/mesa-dev

Re: [Mesa-dev] [PATCH v4 03/11] gallium: add common pipe_screen reference counting functions

2016-07-22 Thread Ilia Mirkin

On Fri, Jul 22, 2016 at 2:01 PM, Rob Herring  wrote:
> On Fri, Jul 22, 2016 at 11:46 AM, Ilia Mirkin  wrote:
>> On Fri, Jul 22, 2016 at 12:22 PM, Rob Herring  wrote:
>>> In order to prevent multiple pipe_screens being created in the same
>>> process, lookup of the DRM FD and reference counting of the pipe_screen
>>> are needed. Several implementations of this exist in various gallium
>>> drivers/winsys already. This creates a common version which is opt-in
>>> for winsys implementations.
>>>
>>> Signed-off-by: Rob Herring 
>>> ---
>>>  src/gallium/auxiliary/Makefile.sources |   2 +
>>>  src/gallium/auxiliary/util/u_screen.c  | 114 
>>> +
>>>  src/gallium/auxiliary/util/u_screen.h  |  32 +
>>>  src/gallium/include/pipe/p_screen.h|   3 +
>>>  4 files changed, 151 insertions(+)
>>>  create mode 100644 src/gallium/auxiliary/util/u_screen.c
>>>  create mode 100644 src/gallium/auxiliary/util/u_screen.h
>>>
>>> diff --git a/src/gallium/auxiliary/Makefile.sources 
>>> b/src/gallium/auxiliary/Makefile.sources
>>> index e0311bf..197ed36 100644
>>> --- a/src/gallium/auxiliary/Makefile.sources
>>> +++ b/src/gallium/auxiliary/Makefile.sources
>>> @@ -284,6 +284,8 @@ C_SOURCES := \
>>> util/u_ringbuffer.h \
>>> util/u_sampler.c \
>>> util/u_sampler.h \
>>> +   util/u_screen.c \
>>> +   util/u_screen.h \
>>> util/u_simple_shaders.c \
>>> util/u_simple_shaders.h \
>>> util/u_slab.c \
>>> diff --git a/src/gallium/auxiliary/util/u_screen.c 
>>> b/src/gallium/auxiliary/util/u_screen.c
>>> new file mode 100644
>>> index 000..47bad11
>>> --- /dev/null
>>> +++ b/src/gallium/auxiliary/util/u_screen.c
>>> @@ -0,0 +1,114 @@
>>> +/*
>>> + * Copyright 2016 Linaro, Ltd., Rob Herring 
>>> + *
>>> + * Permission is hereby granted, free of charge, to any person obtaining a
>>> + * copy of this software and associated documentation files (the
>>> + * "Software"), to deal in the Software without restriction, including
>>> + * without limitation the rights to use, copy, modify, merge, publish,
>>> + * distribute, sub license, and/or sell copies of the Software, and to
>>> + * permit persons to whom the Software is furnished to do so, subject to
>>> + * the following conditions:
>>> + *
>>> + * The above copyright notice and this permission notice (including the
>>> + * next paragraph) shall be included in all copies or substantial portions
>>> + * of the Software.
>>> + *
>>> + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS
>>> + * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
>>> + * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT.
>>> + * IN NO EVENT SHALL VMWARE AND/OR ITS SUPPLIERS BE LIABLE FOR
>>> + * ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT,
>>> + * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE
>>> + * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
>>> + */
>>> +
>>> +/**
>>> + * Functions for managing pipe_screen's
>>> + */
>>> +
>>> +#include 
>>> +
>>> +#include "os/os_thread.h"
>>> +
>>> +#include "pipe/p_screen.h"
>>> +#include "util/u_hash_table.h"
>>> +#include "util/u_inlines.h"
>>> +#include "util/u_pointer.h"
>>> +#include "util/u_screen.h"
>>> +
>>> +static struct util_hash_table *fd_tab = NULL;
>>> +pipe_static_mutex(fd_tab_mutex);
>>> +
>>> +static unsigned hash_fd(void *key)
>>> +{
>>> +   int fd = pointer_to_intptr(key);
>>> +   struct stat stat;
>>> +   fstat(fd, &stat);
>>> +
>>> +   return stat.st_dev ^ stat.st_ino ^ stat.st_rdev;
>>> +}
>>> +
>>> +static int compare_fd(void *key1, void *key2)
>>> +{
>>> +   int fd1 = pointer_to_intptr(key1);
>>> +   int fd2 = pointer_to_intptr(key2);
>>> +   struct stat stat1, stat2;
>>> +   fstat(fd1, &stat1);
>>> +   fstat(fd2, &stat2);
>>> +
>>> +   return stat1.st_dev != stat2.st_dev ||
>>> + stat1.st_ino != stat2.st_ino ||
>>> + stat1.st_rdev != stat2.st_rdev;
>>> +}
>>> +
>>> +struct pipe_screen *
>>> +pipe_screen_reference(int fd)
>>> +{
>>> +   struct pipe_screen *pscreen;
>>> +
>>> +   if (!fd_tab) {
>>> +  fd_tab = util_hash_table_create(hash_fd, compare_fd);
>>
>> Do you need to grab the fd_tab_mutex around this? What if two
>> pipe_screen_reference() calls race to be the first ones?
>
> No, but only because the loader_mutex serializes things. That's not
> obvious though so putting fd_tab_mutex around it would make this
> function more robust.
>
>>> +  return NULL;
>>> +   }
>>> +
>>> +   pipe_mutex_lock(fd_tab_mutex);
>>> +   pscreen = util_hash_table_get(fd_tab, intptr_to_pointer(fd));
>>> +   if (pscreen)
>>> +  pipe_reference(NULL, &pscreen->reference);
>>> +   pipe_mutex_unlock(fd_tab_mutex);
>>> +
>>> +   return pscreen;
>>> +}
>>> +
>>> +boolean
>>> +pipe_screen_unreference(struct pipe_screen *pscreen)
>>> +{
>>> +   boolean destroy;
>>> +
>>> +   if (!pscreen)
>>> +  return FALSE;
>>> +
>>> +   /* Work-around

[Mesa-dev] [PATCH] nir: Lower interp_var_at_* like a normal load_var for flat inputs.

2016-07-22 Thread Kenneth Graunke

"flat centroid" and "flat sample" both just mean "flat", so we should
ignore interpolateAtCentroid/Sample and just return the flat value.

Bugzilla: https://bugs.freedesktop.org/show_bug.cgi?id=97032
Signed-off-by: Kenneth Graunke 
---
 src/compiler/nir/nir_lower_io.c | 4 
 1 file changed, 4 insertions(+)

diff --git a/src/compiler/nir/nir_lower_io.c b/src/compiler/nir/nir_lower_io.c
index bf8296f..b36836f 100644
--- a/src/compiler/nir/nir_lower_io.c
+++ b/src/compiler/nir/nir_lower_io.c
@@ -340,6 +340,10 @@ lower_interpolate_at(nir_intrinsic_instr *intrin, struct 
lower_io_state *state,
 
assert(var->data.mode == nir_var_shader_in);
 
+   /* Ignore interpolateAt() for flat variables - flat is flat. */
+   if (var->data.interpolation == INTERP_MODE_FLAT)
+  return lower_load(intrin, state, NULL, offset);
+
nir_intrinsic_op bary_op;
switch (intrin->intrinsic) {
case nir_intrinsic_interp_var_at_centroid:
-- 
2.9.0

___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/mesa-dev

[Mesa-dev] [PATCH] mesa: Don't call GenerateMipmap if Width or Height == 0.

2016-07-22 Thread Kenneth Graunke

One of the WebGL 2.0 conformance tests is trying to call
glGenerateMipmaps with a width and height of 0.  With the meta
implementation, this generates a "framebuffer attachment incomplete"
status, and falls back to the CPU path, calling MapTextureImage.

Except that there's no actual texture to map, and we assert fail.

There's no work to do in this case.  The test expects it to succeed,
so just return early with no error and avoid hassling the driver.

Cc: mesa-sta...@lists.freedesktop.org
Bugzilla: https://bugs.freedesktop.org/show_bug.cgi?id=96911
Signed-off-by: Kenneth Graunke 
---
 src/mesa/main/genmipmap.c | 5 +
 1 file changed, 5 insertions(+)

diff --git a/src/mesa/main/genmipmap.c b/src/mesa/main/genmipmap.c
index c952c4f..97d3c62 100644
--- a/src/mesa/main/genmipmap.c
+++ b/src/mesa/main/genmipmap.c
@@ -149,6 +149,11 @@ _mesa_generate_texture_mipmap(struct gl_context *ctx,
   return;
}
 
+   if (srcImage->Width == 0 || srcImage->Height == 0) {
+  _mesa_unlock_texture(ctx, texObj);
+  return;
+   }
+
if (target == GL_TEXTURE_CUBE_MAP) {
   GLuint face;
   for (face = 0; face < 6; face++) {
-- 
2.9.0

___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/mesa-dev

Re: [Mesa-dev] [PATCH v4 03/11] gallium: add common pipe_screen reference counting functions

2016-07-22 Thread Rob Herring

On Fri, Jul 22, 2016 at 11:46 AM, Ilia Mirkin  wrote:
> On Fri, Jul 22, 2016 at 12:22 PM, Rob Herring  wrote:
>> In order to prevent multiple pipe_screens being created in the same
>> process, lookup of the DRM FD and reference counting of the pipe_screen
>> are needed. Several implementations of this exist in various gallium
>> drivers/winsys already. This creates a common version which is opt-in
>> for winsys implementations.
>>
>> Signed-off-by: Rob Herring 
>> ---
>>  src/gallium/auxiliary/Makefile.sources |   2 +
>>  src/gallium/auxiliary/util/u_screen.c  | 114 
>> +
>>  src/gallium/auxiliary/util/u_screen.h  |  32 +
>>  src/gallium/include/pipe/p_screen.h|   3 +
>>  4 files changed, 151 insertions(+)
>>  create mode 100644 src/gallium/auxiliary/util/u_screen.c
>>  create mode 100644 src/gallium/auxiliary/util/u_screen.h
>>
>> diff --git a/src/gallium/auxiliary/Makefile.sources 
>> b/src/gallium/auxiliary/Makefile.sources
>> index e0311bf..197ed36 100644
>> --- a/src/gallium/auxiliary/Makefile.sources
>> +++ b/src/gallium/auxiliary/Makefile.sources
>> @@ -284,6 +284,8 @@ C_SOURCES := \
>> util/u_ringbuffer.h \
>> util/u_sampler.c \
>> util/u_sampler.h \
>> +   util/u_screen.c \
>> +   util/u_screen.h \
>> util/u_simple_shaders.c \
>> util/u_simple_shaders.h \
>> util/u_slab.c \
>> diff --git a/src/gallium/auxiliary/util/u_screen.c 
>> b/src/gallium/auxiliary/util/u_screen.c
>> new file mode 100644
>> index 000..47bad11
>> --- /dev/null
>> +++ b/src/gallium/auxiliary/util/u_screen.c
>> @@ -0,0 +1,114 @@
>> +/*
>> + * Copyright 2016 Linaro, Ltd., Rob Herring 
>> + *
>> + * Permission is hereby granted, free of charge, to any person obtaining a
>> + * copy of this software and associated documentation files (the
>> + * "Software"), to deal in the Software without restriction, including
>> + * without limitation the rights to use, copy, modify, merge, publish,
>> + * distribute, sub license, and/or sell copies of the Software, and to
>> + * permit persons to whom the Software is furnished to do so, subject to
>> + * the following conditions:
>> + *
>> + * The above copyright notice and this permission notice (including the
>> + * next paragraph) shall be included in all copies or substantial portions
>> + * of the Software.
>> + *
>> + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS
>> + * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
>> + * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT.
>> + * IN NO EVENT SHALL VMWARE AND/OR ITS SUPPLIERS BE LIABLE FOR
>> + * ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT,
>> + * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE
>> + * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
>> + */
>> +
>> +/**
>> + * Functions for managing pipe_screen's
>> + */
>> +
>> +#include 
>> +
>> +#include "os/os_thread.h"
>> +
>> +#include "pipe/p_screen.h"
>> +#include "util/u_hash_table.h"
>> +#include "util/u_inlines.h"
>> +#include "util/u_pointer.h"
>> +#include "util/u_screen.h"
>> +
>> +static struct util_hash_table *fd_tab = NULL;
>> +pipe_static_mutex(fd_tab_mutex);
>> +
>> +static unsigned hash_fd(void *key)
>> +{
>> +   int fd = pointer_to_intptr(key);
>> +   struct stat stat;
>> +   fstat(fd, &stat);
>> +
>> +   return stat.st_dev ^ stat.st_ino ^ stat.st_rdev;
>> +}
>> +
>> +static int compare_fd(void *key1, void *key2)
>> +{
>> +   int fd1 = pointer_to_intptr(key1);
>> +   int fd2 = pointer_to_intptr(key2);
>> +   struct stat stat1, stat2;
>> +   fstat(fd1, &stat1);
>> +   fstat(fd2, &stat2);
>> +
>> +   return stat1.st_dev != stat2.st_dev ||
>> + stat1.st_ino != stat2.st_ino ||
>> + stat1.st_rdev != stat2.st_rdev;
>> +}
>> +
>> +struct pipe_screen *
>> +pipe_screen_reference(int fd)
>> +{
>> +   struct pipe_screen *pscreen;
>> +
>> +   if (!fd_tab) {
>> +  fd_tab = util_hash_table_create(hash_fd, compare_fd);
>
> Do you need to grab the fd_tab_mutex around this? What if two
> pipe_screen_reference() calls race to be the first ones?

No, but only because the loader_mutex serializes things. That's not
obvious though so putting fd_tab_mutex around it would make this
function more robust.

>> +  return NULL;
>> +   }
>> +
>> +   pipe_mutex_lock(fd_tab_mutex);
>> +   pscreen = util_hash_table_get(fd_tab, intptr_to_pointer(fd));
>> +   if (pscreen)
>> +  pipe_reference(NULL, &pscreen->reference);
>> +   pipe_mutex_unlock(fd_tab_mutex);
>> +
>> +   return pscreen;
>> +}
>> +
>> +boolean
>> +pipe_screen_unreference(struct pipe_screen *pscreen)
>> +{
>> +   boolean destroy;
>> +
>> +   if (!pscreen)
>> +  return FALSE;
>> +
>> +   /* Work-around until all pipe_screens have ref counting */
>> +   if (!pipe_is_referenced(&pscreen->reference)) {
>> +  pscreen->destroy(pscreen);
>> +  return TRUE;
>> +   }
>> +
>> +   pipe_mutex_lock

Re: [Mesa-dev] [PATCH] Rename the DEBUG macro to MESA_DEBUG

2016-07-22 Thread Jose Fonseca


(2nd try. 1st email is being held due to size.)

On 21/07/16 18:48, Vedran Miletić wrote:

LLVM and Mesa both define the DEBUG macro in incompatible ways. As a
general practice, we should avoid using such generic names when it is
possible to do so.

This patch renames all occurrences of the DEBUG macro to MESA_DEBUG,
and removes workarounds previously used to enable building Mesa with
LLVM (pop_macro() and push_macro() function calls).

Please let me know if I missed any.

Signed-off-by: Vedran Miletić 
---
  configure.ac   |  2 +-
  src/compiler/glsl/ir_validate.cpp  |  4 +-
  src/compiler/nir/nir.h |  6 +-
  src/compiler/nir/nir_metadata.c|  4 +-
  src/compiler/nir/nir_validate.c|  5 +-
  src/egl/drivers/haiku/egl_haiku.cpp|  6 +-
  src/egl/main/eglconfig.c   |  6 +-
  src/gallium/auxiliary/draw/draw_cliptest_tmp.h |  4 +-
  src/gallium/auxiliary/gallivm/lp_bld_debug.h   | 12 ++--
  src/gallium/auxiliary/gallivm/lp_bld_init.c| 16 +++---
  src/gallium/auxiliary/gallivm/lp_bld_misc.cpp  | 23 ++--
  src/gallium/auxiliary/gallivm/lp_bld_struct.c  | 16 +++---
  src/gallium/auxiliary/os/os_memory.h   |  6 +-
  src/gallium/auxiliary/os/os_misc.c |  4 +-
  src/gallium/auxiliary/os/os_misc.h |  6 +-
  .../auxiliary/pipebuffer/pb_buffer_fenced.c| 10 ++--
  src/gallium/auxiliary/pipebuffer/pb_bufmgr_debug.c |  6 +-
  src/gallium/auxiliary/tgsi/tgsi_exec.c | 16 +++---
  src/gallium/auxiliary/tgsi/tgsi_ureg.c |  8 +--
  src/gallium/auxiliary/util/u_cache.c   | 16 +++---
  src/gallium/auxiliary/util/u_cpu_detect.c  |  8 +--
  src/gallium/auxiliary/util/u_debug.c   | 18 +++---
  src/gallium/auxiliary/util/u_debug.h   | 66 +++---
  src/gallium/auxiliary/util/u_debug_flush.c |  4 +-
  src/gallium/auxiliary/util/u_debug_flush.h |  6 +-
  src/gallium/auxiliary/util/u_debug_image.c |  4 +-
  src/gallium/auxiliary/util/u_debug_image.h |  8 +--
  src/gallium/drivers/freedreno/ir3/ir3.c| 16 +++---
  src/gallium/drivers/freedreno/ir3/ir3.h| 18 +++---
  src/gallium/drivers/freedreno/ir3/ir3_print.c  |  4 +-
  src/gallium/drivers/freedreno/ir3/ir3_ra.c |  4 +-
  src/gallium/drivers/i915/i915_debug.c  |  6 +-
  src/gallium/drivers/i915/i915_debug.h  |  6 +-
  src/gallium/drivers/ilo/core/ilo_debug.h   | 17 +++---
  src/gallium/drivers/llvmpipe/lp_debug.h|  6 +-
  src/gallium/drivers/llvmpipe/lp_perf.h |  6 +-
  src/gallium/drivers/llvmpipe/lp_rast.c |  4 +-
  src/gallium/drivers/llvmpipe/lp_rast.h |  4 +-
  src/gallium/drivers/llvmpipe/lp_rast_priv.h|  6 +-
  src/gallium/drivers/llvmpipe/lp_scene.c|  4 +-
  src/gallium/drivers/llvmpipe/lp_screen.c   |  8 +--
  src/gallium/drivers/llvmpipe/lp_setup_line.c   |  4 +-
  src/gallium/drivers/llvmpipe/lp_setup_point.c  |  4 +-
  src/gallium/drivers/llvmpipe/lp_state_sampler.c|  4 +-
  src/gallium/drivers/llvmpipe/lp_test_main.c|  4 +-
  src/gallium/drivers/llvmpipe/lp_texture.c  | 24 
  src/gallium/drivers/llvmpipe/lp_texture.h  |  4 +-
  .../drivers/nouveau/codegen/nv50_ir_driver.h   |  6 +-
  .../drivers/nouveau/codegen/nv50_ir_inlines.h  |  4 +-
  src/gallium/drivers/nouveau/nouveau_screen.h   |  4 +-
  src/gallium/drivers/nouveau/nouveau_statebuf.h | 10 ++--
  src/gallium/drivers/nouveau/nv50/nv50_program.c|  6 +-
  src/gallium/drivers/nouveau/nvc0/nvc0_program.c| 14 ++---
  src/gallium/drivers/nouveau/nvc0/nve4_compute.c| 12 ++--
  src/gallium/drivers/r300/r300_cb.h |  6 +-
  src/gallium/drivers/r300/r300_context.c|  6 +-
  src/gallium/drivers/r300/r300_cs.h |  6 +-
  src/gallium/drivers/softpipe/sp_tex_sample.c   |  4 +-
  src/gallium/drivers/svga/svga_debug.h  |  8 +--
  src/gallium/drivers/svga/svga_draw.c   |  6 +-
  src/gallium/drivers/svga/svga_format.c |  6 +-
  src/gallium/drivers/svga/svga_pipe_draw.c  |  4 +-
  .../drivers/svga/svga_resource_buffer_upload.c |  4 +-
  src/gallium/drivers/svga/svga_screen.c | 18 +++---
  src/gallium/drivers/svga/svga_screen.h |  6 +-
  src/gallium/drivers/svga/svga_state.c  |  6 +-
  src/gallium/drivers/svga/svga_state_constants.c|  4 +-
  src/gallium/drivers/svga/svga_state_fs.c   | 10 ++--
  .../drivers/swr/rasterizer/jitter/JitManager.cpp   |  5 --
  .../drivers/swr/rasterizer/jitter/JitManager.h |  6 --
  src/gallium/drivers/swr/swr_shader.cpp |  4 --
  src/gallium/drivers/swr/swr_state.cpp  |  4 --
  src/g

Re: [Mesa-dev] [PATCH mesa] mapi: fix typo in macro name

2016-07-22 Thread Matt Turner

On Fri, Jul 22, 2016 at 2:24 AM, Eric Engestrom
 wrote:
> Fixes: 5ec140c17b54c2592009 ("mapi: Massage code to allow clang to compile.")
> Reported-by: Alexandre Demers 
> Signed-off-by: Eric Engestrom 

Doh!

Thanks. R-b and pushed.
___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/mesa-dev

Re: [Mesa-dev] [PATCH v4 03/11] gallium: add common pipe_screen reference counting functions

2016-07-22 Thread Ilia Mirkin

On Fri, Jul 22, 2016 at 12:22 PM, Rob Herring  wrote:
> In order to prevent multiple pipe_screens being created in the same
> process, lookup of the DRM FD and reference counting of the pipe_screen
> are needed. Several implementations of this exist in various gallium
> drivers/winsys already. This creates a common version which is opt-in
> for winsys implementations.
>
> Signed-off-by: Rob Herring 
> ---
>  src/gallium/auxiliary/Makefile.sources |   2 +
>  src/gallium/auxiliary/util/u_screen.c  | 114 
> +
>  src/gallium/auxiliary/util/u_screen.h  |  32 +
>  src/gallium/include/pipe/p_screen.h|   3 +
>  4 files changed, 151 insertions(+)
>  create mode 100644 src/gallium/auxiliary/util/u_screen.c
>  create mode 100644 src/gallium/auxiliary/util/u_screen.h
>
> diff --git a/src/gallium/auxiliary/Makefile.sources 
> b/src/gallium/auxiliary/Makefile.sources
> index e0311bf..197ed36 100644
> --- a/src/gallium/auxiliary/Makefile.sources
> +++ b/src/gallium/auxiliary/Makefile.sources
> @@ -284,6 +284,8 @@ C_SOURCES := \
> util/u_ringbuffer.h \
> util/u_sampler.c \
> util/u_sampler.h \
> +   util/u_screen.c \
> +   util/u_screen.h \
> util/u_simple_shaders.c \
> util/u_simple_shaders.h \
> util/u_slab.c \
> diff --git a/src/gallium/auxiliary/util/u_screen.c 
> b/src/gallium/auxiliary/util/u_screen.c
> new file mode 100644
> index 000..47bad11
> --- /dev/null
> +++ b/src/gallium/auxiliary/util/u_screen.c
> @@ -0,0 +1,114 @@
> +/*
> + * Copyright 2016 Linaro, Ltd., Rob Herring 
> + *
> + * Permission is hereby granted, free of charge, to any person obtaining a
> + * copy of this software and associated documentation files (the
> + * "Software"), to deal in the Software without restriction, including
> + * without limitation the rights to use, copy, modify, merge, publish,
> + * distribute, sub license, and/or sell copies of the Software, and to
> + * permit persons to whom the Software is furnished to do so, subject to
> + * the following conditions:
> + *
> + * The above copyright notice and this permission notice (including the
> + * next paragraph) shall be included in all copies or substantial portions
> + * of the Software.
> + *
> + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS
> + * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
> + * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT.
> + * IN NO EVENT SHALL VMWARE AND/OR ITS SUPPLIERS BE LIABLE FOR
> + * ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT,
> + * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE
> + * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
> + */
> +
> +/**
> + * Functions for managing pipe_screen's
> + */
> +
> +#include 
> +
> +#include "os/os_thread.h"
> +
> +#include "pipe/p_screen.h"
> +#include "util/u_hash_table.h"
> +#include "util/u_inlines.h"
> +#include "util/u_pointer.h"
> +#include "util/u_screen.h"
> +
> +static struct util_hash_table *fd_tab = NULL;
> +pipe_static_mutex(fd_tab_mutex);
> +
> +static unsigned hash_fd(void *key)
> +{
> +   int fd = pointer_to_intptr(key);
> +   struct stat stat;
> +   fstat(fd, &stat);
> +
> +   return stat.st_dev ^ stat.st_ino ^ stat.st_rdev;
> +}
> +
> +static int compare_fd(void *key1, void *key2)
> +{
> +   int fd1 = pointer_to_intptr(key1);
> +   int fd2 = pointer_to_intptr(key2);
> +   struct stat stat1, stat2;
> +   fstat(fd1, &stat1);
> +   fstat(fd2, &stat2);
> +
> +   return stat1.st_dev != stat2.st_dev ||
> + stat1.st_ino != stat2.st_ino ||
> + stat1.st_rdev != stat2.st_rdev;
> +}
> +
> +struct pipe_screen *
> +pipe_screen_reference(int fd)
> +{
> +   struct pipe_screen *pscreen;
> +
> +   if (!fd_tab) {
> +  fd_tab = util_hash_table_create(hash_fd, compare_fd);

Do you need to grab the fd_tab_mutex around this? What if two
pipe_screen_reference() calls race to be the first ones?

> +  return NULL;
> +   }
> +
> +   pipe_mutex_lock(fd_tab_mutex);
> +   pscreen = util_hash_table_get(fd_tab, intptr_to_pointer(fd));
> +   if (pscreen)
> +  pipe_reference(NULL, &pscreen->reference);
> +   pipe_mutex_unlock(fd_tab_mutex);
> +
> +   return pscreen;
> +}
> +
> +boolean
> +pipe_screen_unreference(struct pipe_screen *pscreen)
> +{
> +   boolean destroy;
> +
> +   if (!pscreen)
> +  return FALSE;
> +
> +   /* Work-around until all pipe_screens have ref counting */
> +   if (!pipe_is_referenced(&pscreen->reference)) {
> +  pscreen->destroy(pscreen);
> +  return TRUE;
> +   }
> +
> +   pipe_mutex_lock(fd_tab_mutex);
> +   destroy = pipe_reference(&pscreen->reference, NULL);
> +   if (destroy) {
> +  pscreen->destroy(pscreen);
> +  util_hash_table_remove(fd_tab, intptr_to_pointer(pscreen->fd));
> +  close(pscreen->fd);

It seems a little odd that you're closing a fd that you didn't
open/dup in this library. It's a bit of asymmetry in t

[Mesa-dev] [PATCH v4 05/11] nouveau: use common screen ref counting

2016-07-22 Thread Rob Herring

Use the common pipe_screen ref counting and fd hashing functions. The
mutex can be dropped as the pipe loader protects the create_screen()
calls.

Signed-off-by: Rob Herring 
Cc: Alexandre Courbot 
---
 src/gallium/drivers/nouveau/nouveau_screen.c   |  6 --
 src/gallium/drivers/nouveau/nouveau_screen.h   |  4 --
 src/gallium/drivers/nouveau/nv30/nv30_screen.c |  3 -
 src/gallium/drivers/nouveau/nv50/nv50_screen.c |  3 -
 src/gallium/drivers/nouveau/nvc0/nvc0_screen.c |  3 -
 .../winsys/nouveau/drm/nouveau_drm_winsys.c| 69 ++
 6 files changed, 5 insertions(+), 83 deletions(-)

diff --git a/src/gallium/drivers/nouveau/nouveau_screen.c 
b/src/gallium/drivers/nouveau/nouveau_screen.c
index 2c421cc..41d4bef 100644
--- a/src/gallium/drivers/nouveau/nouveau_screen.c
+++ b/src/gallium/drivers/nouveau/nouveau_screen.c
@@ -159,12 +159,6 @@ nouveau_screen_init(struct nouveau_screen *screen, struct 
nouveau_device *dev)
screen->drm = nouveau_drm(&dev->object);
screen->device = dev;
 
-   /*
-* this is initialized to 1 in nouveau_drm_screen_create after screen
-* is fully constructed and added to the global screen list.
-*/
-   screen->refcount = -1;
-
if (dev->chipset < 0xc0) {
   data = &nv04_data;
   size = sizeof(nv04_data);
diff --git a/src/gallium/drivers/nouveau/nouveau_screen.h 
b/src/gallium/drivers/nouveau/nouveau_screen.h
index 28c4760..55156c3 100644
--- a/src/gallium/drivers/nouveau/nouveau_screen.h
+++ b/src/gallium/drivers/nouveau/nouveau_screen.h
@@ -23,8 +23,6 @@ struct nouveau_screen {
struct nouveau_client *client;
struct nouveau_pushbuf *pushbuf;
 
-   int refcount;
-
unsigned vidmem_bindings; /* PIPE_BIND_* where VRAM placement is desired */
unsigned sysmem_bindings; /* PIPE_BIND_* where GART placement is desired */
unsigned lowmem_bindings; /* PIPE_BIND_* that require an address < 4 GiB */
@@ -119,8 +117,6 @@ nouveau_screen(struct pipe_screen *pscreen)
return (struct nouveau_screen *)pscreen;
 }
 
-bool nouveau_drm_screen_unref(struct nouveau_screen *screen);
-
 bool
 nouveau_screen_bo_get_handle(struct pipe_screen *pscreen,
  struct nouveau_bo *bo,
diff --git a/src/gallium/drivers/nouveau/nv30/nv30_screen.c 
b/src/gallium/drivers/nouveau/nv30/nv30_screen.c
index 68d8317..591cf92 100644
--- a/src/gallium/drivers/nouveau/nv30/nv30_screen.c
+++ b/src/gallium/drivers/nouveau/nv30/nv30_screen.c
@@ -408,9 +408,6 @@ nv30_screen_destroy(struct pipe_screen *pscreen)
 {
struct nv30_screen *screen = nv30_screen(pscreen);
 
-   if (!nouveau_drm_screen_unref(&screen->base))
-  return;
-
if (screen->base.fence.current) {
   struct nouveau_fence *current = NULL;
 
diff --git a/src/gallium/drivers/nouveau/nv50/nv50_screen.c 
b/src/gallium/drivers/nouveau/nv50/nv50_screen.c
index 303ecf1..7dbf66f 100644
--- a/src/gallium/drivers/nouveau/nv50/nv50_screen.c
+++ b/src/gallium/drivers/nouveau/nv50/nv50_screen.c
@@ -428,9 +428,6 @@ nv50_screen_destroy(struct pipe_screen *pscreen)
 {
struct nv50_screen *screen = nv50_screen(pscreen);
 
-   if (!nouveau_drm_screen_unref(&screen->base))
-  return;
-
if (screen->base.fence.current) {
   struct nouveau_fence *current = NULL;
 
diff --git a/src/gallium/drivers/nouveau/nvc0/nvc0_screen.c 
b/src/gallium/drivers/nouveau/nvc0/nvc0_screen.c
index f681631..f789de4 100644
--- a/src/gallium/drivers/nouveau/nvc0/nvc0_screen.c
+++ b/src/gallium/drivers/nouveau/nvc0/nvc0_screen.c
@@ -485,9 +485,6 @@ nvc0_screen_destroy(struct pipe_screen *pscreen)
 {
struct nvc0_screen *screen = nvc0_screen(pscreen);
 
-   if (!nouveau_drm_screen_unref(&screen->base))
-  return;
-
if (screen->base.fence.current) {
   struct nouveau_fence *current = NULL;
 
diff --git a/src/gallium/winsys/nouveau/drm/nouveau_drm_winsys.c 
b/src/gallium/winsys/nouveau/drm/nouveau_drm_winsys.c
index f90572f..d208d9c 100644
--- a/src/gallium/winsys/nouveau/drm/nouveau_drm_winsys.c
+++ b/src/gallium/winsys/nouveau/drm/nouveau_drm_winsys.c
@@ -1,12 +1,9 @@
-#include 
 #include 
 #include "pipe/p_context.h"
 #include "pipe/p_state.h"
 #include "util/u_format.h"
 #include "util/u_memory.h"
-#include "util/u_inlines.h"
-#include "util/u_hash_table.h"
-#include "os/os_thread.h"
+#include "util/u_screen.h"
 
 #include "nouveau_drm_public.h"
 
@@ -16,47 +13,6 @@
 #include 
 #include 
 
-static struct util_hash_table *fd_tab = NULL;
-
-pipe_static_mutex(nouveau_screen_mutex);
-
-bool nouveau_drm_screen_unref(struct nouveau_screen *screen)
-{
-   int ret;
-   if (screen->refcount == -1)
-   return true;
-
-   pipe_mutex_lock(nouveau_screen_mutex);
-   ret = --screen->refcount;
-   assert(ret >= 0);
-   if (ret == 0)
-   util_hash_table_remove(fd_tab, 
intptr_to_pointer(screen->drm->fd));
-   pipe_mutex_unlock(nouveau_screen_mutex);
-   return ret == 0;
-}
-
-static unsigned hash_fd(void *key)
-{
-

[Mesa-dev] [PATCH v4 08/11] radeon: use common screen ref counting

2016-07-22 Thread Rob Herring

Use the common pipe_screen ref counting and fd hashing functions. The
mutex can be dropped as the pipe loader protects the create_screen()
calls.

Signed-off-by: Rob Herring 
Cc: "Marek Olšák" 
Cc: Ilia Mirkin 
---
 src/gallium/drivers/r300/r300_screen.c|  3 -
 src/gallium/drivers/r600/r600_pipe.c  |  6 --
 src/gallium/drivers/radeon/radeon_winsys.h|  8 ---
 src/gallium/drivers/radeonsi/si_pipe.c|  6 --
 src/gallium/winsys/radeon/drm/radeon_drm_winsys.c | 80 ++-
 5 files changed, 6 insertions(+), 97 deletions(-)

diff --git a/src/gallium/drivers/r300/r300_screen.c 
b/src/gallium/drivers/r300/r300_screen.c
index d47b70d..1340009 100644
--- a/src/gallium/drivers/r300/r300_screen.c
+++ b/src/gallium/drivers/r300/r300_screen.c
@@ -676,9 +676,6 @@ static void r300_destroy_screen(struct pipe_screen* pscreen)
 struct r300_screen* r300screen = r300_screen(pscreen);
 struct radeon_winsys *rws = radeon_winsys(pscreen);
 
-if (rws && !rws->unref(rws))
-  return;
-
 pipe_mutex_destroy(r300screen->cmask_mutex);
 
 if (rws)
diff --git a/src/gallium/drivers/r600/r600_pipe.c 
b/src/gallium/drivers/r600/r600_pipe.c
index f23daf9..c645295 100644
--- a/src/gallium/drivers/r600/r600_pipe.c
+++ b/src/gallium/drivers/r600/r600_pipe.c
@@ -570,12 +570,6 @@ static void r600_destroy_screen(struct pipe_screen* 
pscreen)
 {
struct r600_screen *rscreen = (struct r600_screen *)pscreen;
 
-   if (!rscreen)
-   return;
-
-   if (!rscreen->b.ws->unref(rscreen->b.ws))
-   return;
-
if (rscreen->global_pool) {
compute_memory_pool_delete(rscreen->global_pool);
}
diff --git a/src/gallium/drivers/radeon/radeon_winsys.h 
b/src/gallium/drivers/radeon/radeon_winsys.h
index a9c9b9e..2ca19f4 100644
--- a/src/gallium/drivers/radeon/radeon_winsys.h
+++ b/src/gallium/drivers/radeon/radeon_winsys.h
@@ -426,14 +426,6 @@ struct radeon_winsys {
 struct pipe_screen *screen;
 
 /**
- * Decrement the winsys reference count.
- *
- * \param ws  The winsys this function is called for.
- * \returnTrue if the winsys and screen should be destroyed.
- */
-bool (*unref)(struct radeon_winsys *ws);
-
-/**
  * Destroy this winsys.
  *
  * \param wsThe winsys this function is called from.
diff --git a/src/gallium/drivers/radeonsi/si_pipe.c 
b/src/gallium/drivers/radeonsi/si_pipe.c
index ee97bcf..1c6920c 100644
--- a/src/gallium/drivers/radeonsi/si_pipe.c
+++ b/src/gallium/drivers/radeonsi/si_pipe.c
@@ -657,12 +657,6 @@ static void si_destroy_screen(struct pipe_screen* pscreen)
};
unsigned i;
 
-   if (!sscreen)
-   return;
-
-   if (!sscreen->b.ws->unref(sscreen->b.ws))
-   return;
-
if (util_queue_is_initialized(&sscreen->shader_compiler_queue))
util_queue_destroy(&sscreen->shader_compiler_queue);
 
diff --git a/src/gallium/winsys/radeon/drm/radeon_drm_winsys.c 
b/src/gallium/winsys/radeon/drm/radeon_drm_winsys.c
index 1b32c37..355197c 100644
--- a/src/gallium/winsys/radeon/drm/radeon_drm_winsys.c
+++ b/src/gallium/winsys/radeon/drm/radeon_drm_winsys.c
@@ -38,11 +38,11 @@
 #include "pipebuffer/pb_bufmgr.h"
 #include "util/u_memory.h"
 #include "util/u_hash_table.h"
+#include "util/u_screen.h"
 
 #include 
 #include 
 #include 
-#include 
 #include 
 #include 
 
@@ -63,9 +63,6 @@
 #define RADEON_INFO_GPU_RESET_COUNTER   0x26
 #endif
 
-static struct util_hash_table *fd_tab = NULL;
-pipe_static_mutex(fd_tab_mutex);
-
 /* Enable/disable feature access for one command stream.
  * If enable == true, return true on success.
  * Otherwise, return false.
@@ -558,9 +555,6 @@ static void radeon_winsys_destroy(struct radeon_winsys *rws)
 pipe_mutex_destroy(ws->bo_handles_mutex);
 pipe_mutex_destroy(ws->bo_va_mutex);
 
-if (ws->fd >= 0)
-close(ws->fd);
-
 FREE(rws);
 }
 
@@ -665,49 +659,8 @@ static bool radeon_read_registers(struct radeon_winsys 
*rws,
 return true;
 }
 
-static unsigned hash_fd(void *key)
-{
-int fd = pointer_to_intptr(key);
-struct stat stat;
-fstat(fd, &stat);
-
-return stat.st_dev ^ stat.st_ino ^ stat.st_rdev;
-}
-
-static int compare_fd(void *key1, void *key2)
-{
-int fd1 = pointer_to_intptr(key1);
-int fd2 = pointer_to_intptr(key2);
-struct stat stat1, stat2;
-fstat(fd1, &stat1);
-fstat(fd2, &stat2);
-
-return stat1.st_dev != stat2.st_dev ||
-   stat1.st_ino != stat2.st_ino ||
-   stat1.st_rdev != stat2.st_rdev;
-}
-
 DEBUG_GET_ONCE_BOOL_OPTION(thread, "RADEON_THREAD", true)
 
-static bool radeon_winsys_unref(struct radeon_winsys *ws)
-{
-struct radeon_drm_winsys *rws = (struct radeon_drm_winsys*)ws;
-bool destroy;
-
-/* When the reference counter drops to zero, remove the fd from the table.
- * This must happen while the mutex is locked, so that
- * radeon_drm_winsys_c

[Mesa-dev] [PATCH v4 11/11] vc4: use common screen ref counting

2016-07-22 Thread Rob Herring

Use the common pipe_screen ref counting and fd hashing functions for
vc4. This is necessary to only create a single pipe_screen for a
process and avoid multiple imports of same prime fd among other things
(probably).

Cc: Eric Anholt 
Signed-off-by: Rob Herring 
---
 src/gallium/winsys/vc4/drm/vc4_drm_winsys.c | 11 ++-
 1 file changed, 10 insertions(+), 1 deletion(-)

diff --git a/src/gallium/winsys/vc4/drm/vc4_drm_winsys.c 
b/src/gallium/winsys/vc4/drm/vc4_drm_winsys.c
index c5434ad..e0d9cc2 100644
--- a/src/gallium/winsys/vc4/drm/vc4_drm_winsys.c
+++ b/src/gallium/winsys/vc4/drm/vc4_drm_winsys.c
@@ -22,6 +22,7 @@
  */
 
 #include 
+#include "util/u_screen.h"
 
 #include "vc4_drm_public.h"
 
@@ -30,5 +31,13 @@
 struct pipe_screen *
 vc4_drm_screen_create(int fd)
 {
-   return vc4_screen_create(dup(fd));
+   int dupfd;
+   struct pipe_screen *pscreen = pipe_screen_reference(fd);
+   if (pscreen)
+   return pscreen;
+
+   dupfd = dup(fd);
+   pscreen = vc4_screen_create(dupfd);
+   pipe_screen_reference_init(pscreen, dupfd);
+   return pscreen;
 }
-- 
2.9.2

___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/mesa-dev

[Mesa-dev] [PATCH v4 04/11] pipe-loader-drm: use pipe_screen_unreference to destroy screen

2016-07-22 Thread Rob Herring

Use pipe_screen_unreference as it will call pipe_screen->destroy() when
the pipe_screen is no longer referenced.

The pipe_screen referencing is done within create_screen() functions
as drivers (like amdgpu) may have special needs for ref counting.

Signed-off-by: Rob Herring 
Cc: Emil Velikov 
---
 src/gallium/auxiliary/pipe-loader/pipe_loader_drm.c | 3 ++-
 1 file changed, 2 insertions(+), 1 deletion(-)

diff --git a/src/gallium/auxiliary/pipe-loader/pipe_loader_drm.c 
b/src/gallium/auxiliary/pipe-loader/pipe_loader_drm.c
index 554e59a..2edb291 100644
--- a/src/gallium/auxiliary/pipe-loader/pipe_loader_drm.c
+++ b/src/gallium/auxiliary/pipe-loader/pipe_loader_drm.c
@@ -45,6 +45,7 @@
 #include "util/u_memory.h"
 #include "util/u_dl.h"
 #include "util/u_debug.h"
+#include "util/u_screen.h"
 
 #define DRM_RENDER_NODE_DEV_NAME_FORMAT "%s/renderD%d"
 #define DRM_RENDER_NODE_MAX_NODES 63
@@ -275,7 +276,7 @@ pipe_loader_drm_release(struct pipe_loader_device **dev)
struct pipe_loader_drm_device *ddev = pipe_loader_drm_device(*dev);
struct pipe_screen *pscreen = ddev->base.pscreen;
 
-   pscreen->destroy(pscreen);
+   pipe_screen_unreference(pscreen);
 
 #ifndef GALLIUM_STATIC_TARGETS
if (ddev->lib)
-- 
2.9.2

___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/mesa-dev

[Mesa-dev] [PATCH v4 03/11] gallium: add common pipe_screen reference counting functions

2016-07-22 Thread Rob Herring

In order to prevent multiple pipe_screens being created in the same
process, lookup of the DRM FD and reference counting of the pipe_screen
are needed. Several implementations of this exist in various gallium
drivers/winsys already. This creates a common version which is opt-in
for winsys implementations.

Signed-off-by: Rob Herring 
---
 src/gallium/auxiliary/Makefile.sources |   2 +
 src/gallium/auxiliary/util/u_screen.c  | 114 +
 src/gallium/auxiliary/util/u_screen.h  |  32 +
 src/gallium/include/pipe/p_screen.h|   3 +
 4 files changed, 151 insertions(+)
 create mode 100644 src/gallium/auxiliary/util/u_screen.c
 create mode 100644 src/gallium/auxiliary/util/u_screen.h

diff --git a/src/gallium/auxiliary/Makefile.sources 
b/src/gallium/auxiliary/Makefile.sources
index e0311bf..197ed36 100644
--- a/src/gallium/auxiliary/Makefile.sources
+++ b/src/gallium/auxiliary/Makefile.sources
@@ -284,6 +284,8 @@ C_SOURCES := \
util/u_ringbuffer.h \
util/u_sampler.c \
util/u_sampler.h \
+   util/u_screen.c \
+   util/u_screen.h \
util/u_simple_shaders.c \
util/u_simple_shaders.h \
util/u_slab.c \
diff --git a/src/gallium/auxiliary/util/u_screen.c 
b/src/gallium/auxiliary/util/u_screen.c
new file mode 100644
index 000..47bad11
--- /dev/null
+++ b/src/gallium/auxiliary/util/u_screen.c
@@ -0,0 +1,114 @@
+/*
+ * Copyright 2016 Linaro, Ltd., Rob Herring 
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the
+ * "Software"), to deal in the Software without restriction, including
+ * without limitation the rights to use, copy, modify, merge, publish,
+ * distribute, sub license, and/or sell copies of the Software, and to
+ * permit persons to whom the Software is furnished to do so, subject to
+ * the following conditions:
+ *
+ * The above copyright notice and this permission notice (including the
+ * next paragraph) shall be included in all copies or substantial portions
+ * of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS
+ * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+ * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT.
+ * IN NO EVENT SHALL VMWARE AND/OR ITS SUPPLIERS BE LIABLE FOR
+ * ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT,
+ * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE
+ * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
+ */
+
+/**
+ * Functions for managing pipe_screen's
+ */
+
+#include 
+
+#include "os/os_thread.h"
+
+#include "pipe/p_screen.h"
+#include "util/u_hash_table.h"
+#include "util/u_inlines.h"
+#include "util/u_pointer.h"
+#include "util/u_screen.h"
+
+static struct util_hash_table *fd_tab = NULL;
+pipe_static_mutex(fd_tab_mutex);
+
+static unsigned hash_fd(void *key)
+{
+   int fd = pointer_to_intptr(key);
+   struct stat stat;
+   fstat(fd, &stat);
+
+   return stat.st_dev ^ stat.st_ino ^ stat.st_rdev;
+}
+
+static int compare_fd(void *key1, void *key2)
+{
+   int fd1 = pointer_to_intptr(key1);
+   int fd2 = pointer_to_intptr(key2);
+   struct stat stat1, stat2;
+   fstat(fd1, &stat1);
+   fstat(fd2, &stat2);
+
+   return stat1.st_dev != stat2.st_dev ||
+ stat1.st_ino != stat2.st_ino ||
+ stat1.st_rdev != stat2.st_rdev;
+}
+
+struct pipe_screen *
+pipe_screen_reference(int fd)
+{
+   struct pipe_screen *pscreen;
+
+   if (!fd_tab) {
+  fd_tab = util_hash_table_create(hash_fd, compare_fd);
+  return NULL;
+   }
+
+   pipe_mutex_lock(fd_tab_mutex);
+   pscreen = util_hash_table_get(fd_tab, intptr_to_pointer(fd));
+   if (pscreen)
+  pipe_reference(NULL, &pscreen->reference);
+   pipe_mutex_unlock(fd_tab_mutex);
+
+   return pscreen;
+}
+
+boolean
+pipe_screen_unreference(struct pipe_screen *pscreen)
+{
+   boolean destroy;
+
+   if (!pscreen)
+  return FALSE;
+
+   /* Work-around until all pipe_screens have ref counting */
+   if (!pipe_is_referenced(&pscreen->reference)) {
+  pscreen->destroy(pscreen);
+  return TRUE;
+   }
+
+   pipe_mutex_lock(fd_tab_mutex);
+   destroy = pipe_reference(&pscreen->reference, NULL);
+   if (destroy) {
+  pscreen->destroy(pscreen);
+  util_hash_table_remove(fd_tab, intptr_to_pointer(pscreen->fd));
+  close(pscreen->fd);
+   }
+   pipe_mutex_unlock(fd_tab_mutex);
+   return destroy;
+}
+
+
+void pipe_screen_reference_init(struct pipe_screen *pscreen, int fd)
+{
+   pscreen->fd = fd;
+   pipe_reference_init(&pscreen->reference, 1);
+   util_hash_table_set(fd_tab, intptr_to_pointer(pscreen->fd), pscreen);
+}
diff --git a/src/gallium/auxiliary/util/u_screen.h 
b/src/gallium/auxiliary/util/u_screen.h
new file mode 100644
index 000..fc91782
--- /dev/null
+++ b/src/gallium/auxiliary/util/u_screen.h
@@ -0,0 +1,32 @@
+/*
+ * Copyright 2016 Linaro, Ltd., Rob Herring 
+ *
+ * Permission i

[Mesa-dev] [PATCH v4 10/11] virgl: use common screen ref counting

2016-07-22 Thread Rob Herring

Use the common pipe_screen ref counting and fd hashing functions. The
mutex can be dropped as the pipe loader protects the create_screen()
calls.

Signed-off-by: Rob Herring 
Cc: Dave Airlie 
---
 src/gallium/winsys/virgl/drm/virgl_drm_winsys.c | 88 +++--
 1 file changed, 10 insertions(+), 78 deletions(-)

diff --git a/src/gallium/winsys/virgl/drm/virgl_drm_winsys.c 
b/src/gallium/winsys/virgl/drm/virgl_drm_winsys.c
index 81afa84..61b041a 100644
--- a/src/gallium/winsys/virgl/drm/virgl_drm_winsys.c
+++ b/src/gallium/winsys/virgl/drm/virgl_drm_winsys.c
@@ -25,7 +25,6 @@
 #include 
 #include 
 #include 
-#include 
 
 #include "os/os_mman.h"
 #include "os/os_time.h"
@@ -33,6 +32,7 @@
 #include "util/u_format.h"
 #include "util/u_hash_table.h"
 #include "util/u_inlines.h"
+#include "util/u_screen.h"
 #include "state_tracker/drm_driver.h"
 #include "virgl/virgl_screen.h"
 #include "virgl/virgl_public.h"
@@ -802,86 +802,18 @@ virgl_drm_winsys_create(int drmFD)
 
 }
 
-static struct util_hash_table *fd_tab = NULL;
-pipe_static_mutex(virgl_screen_mutex);
-
-static void
-virgl_drm_screen_destroy(struct pipe_screen *pscreen)
-{
-   struct virgl_screen *screen = virgl_screen(pscreen);
-   boolean destroy;
-
-   pipe_mutex_lock(virgl_screen_mutex);
-   destroy = --screen->refcnt == 0;
-   if (destroy) {
-  int fd = virgl_drm_winsys(screen->vws)->fd;
-  util_hash_table_remove(fd_tab, intptr_to_pointer(fd));
-   }
-   pipe_mutex_unlock(virgl_screen_mutex);
-
-   if (destroy) {
-  pscreen->destroy = screen->winsys_priv;
-  pscreen->destroy(pscreen);
-   }
-}
-
-static unsigned hash_fd(void *key)
-{
-   int fd = pointer_to_intptr(key);
-   struct stat stat;
-   fstat(fd, &stat);
-
-   return stat.st_dev ^ stat.st_ino ^ stat.st_rdev;
-}
-
-static int compare_fd(void *key1, void *key2)
-{
-   int fd1 = pointer_to_intptr(key1);
-   int fd2 = pointer_to_intptr(key2);
-   struct stat stat1, stat2;
-   fstat(fd1, &stat1);
-   fstat(fd2, &stat2);
-
-   return stat1.st_dev != stat2.st_dev ||
- stat1.st_ino != stat2.st_ino ||
- stat1.st_rdev != stat2.st_rdev;
-}
-
 struct pipe_screen *
 virgl_drm_screen_create(int fd)
 {
-   struct pipe_screen *pscreen = NULL;
-
-   pipe_mutex_lock(virgl_screen_mutex);
-   if (!fd_tab) {
-  fd_tab = util_hash_table_create(hash_fd, compare_fd);
-  if (!fd_tab)
- goto unlock;
-   }
-
-   pscreen = util_hash_table_get(fd_tab, intptr_to_pointer(fd));
-   if (pscreen) {
-  virgl_screen(pscreen)->refcnt++;
-   } else {
-  struct virgl_winsys *vws;
-  int dup_fd = dup(fd);
-
-  vws = virgl_drm_winsys_create(dup_fd);
-
-  pscreen = virgl_create_screen(vws);
-  if (pscreen) {
- util_hash_table_set(fd_tab, intptr_to_pointer(dup_fd), pscreen);
-
- /* Bit of a hack, to avoid circular linkage dependency,
-  * ie. pipe driver having to call in to winsys, we
-  * override the pipe drivers screen->destroy():
-  */
- virgl_screen(pscreen)->winsys_priv = pscreen->destroy;
- pscreen->destroy = virgl_drm_screen_destroy;
-  }
-   }
+   int dupfd;
+   struct virgl_winsys *vws;
+   struct pipe_screen *pscreen = pipe_screen_reference(fd);
+   if (pscreen)
+  return pscreen;
 
-unlock:
-   pipe_mutex_unlock(virgl_screen_mutex);
+   dupfd = dup(fd);
+   vws = virgl_drm_winsys_create(dupfd);
+   pscreen = virgl_create_screen(vws);
+   pipe_screen_reference_init(pscreen, dupfd);
return pscreen;
 }
-- 
2.9.2

___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/mesa-dev

[Mesa-dev] [PATCH v4 07/11] amdgpu: use common screen ref counting

2016-07-22 Thread Rob Herring

Use the common pipe_screen ref count. amdgpu is unique in its hashing
the dev pointer rather than the fd, so the common fd hashing cannot be
used. However, the same reference count can be used instead of the
private one. The mutex can be dropped as the pipe loader protects the
create_screen() calls.

Signed-off-by: Rob Herring 
Cc: "Marek Olšák" 
Cc: Ilia Mirkin 
---
 src/gallium/winsys/amdgpu/drm/amdgpu_winsys.c | 45 ---
 src/gallium/winsys/amdgpu/drm/amdgpu_winsys.h |  1 -
 2 files changed, 6 insertions(+), 40 deletions(-)

diff --git a/src/gallium/winsys/amdgpu/drm/amdgpu_winsys.c 
b/src/gallium/winsys/amdgpu/drm/amdgpu_winsys.c
index 9a04cbe..27293ac 100644
--- a/src/gallium/winsys/amdgpu/drm/amdgpu_winsys.c
+++ b/src/gallium/winsys/amdgpu/drm/amdgpu_winsys.c
@@ -60,8 +60,6 @@
 #define CIK__PIPE_CONFIG__ADDR_SURF_P16_32X32_16X16  17
 
 static struct util_hash_table *dev_tab = NULL;
-pipe_static_mutex(dev_tab_mutex);
-
 static unsigned cik_get_num_tile_pipes(struct amdgpu_gpu_info *info)
 {
unsigned mode2d = info->gb_tile_mode[CIK_TILE_MODE_COLOR_2D];
@@ -329,6 +327,7 @@ static void amdgpu_winsys_destroy(struct radeon_winsys *rws)
pipe_mutex_destroy(ws->global_bo_list_lock);
AddrDestroy(ws->addrlib);
amdgpu_device_deinitialize(ws->dev);
+   util_hash_table_remove(dev_tab, ws->dev);
FREE(rws);
 }
 
@@ -410,26 +409,6 @@ static int compare_dev(void *key1, void *key2)
 
 DEBUG_GET_ONCE_BOOL_OPTION(thread, "RADEON_THREAD", true)
 
-static bool amdgpu_winsys_unref(struct radeon_winsys *rws)
-{
-   struct amdgpu_winsys *ws = (struct amdgpu_winsys*)rws;
-   bool destroy;
-
-   /* When the reference counter drops to zero, remove the device pointer
-* from the table.
-* This must happen while the mutex is locked, so that
-* amdgpu_winsys_create in another thread doesn't get the winsys
-* from the table when the counter drops to 0. */
-   pipe_mutex_lock(dev_tab_mutex);
-
-   destroy = pipe_reference(&ws->reference, NULL);
-   if (destroy && dev_tab)
-  util_hash_table_remove(dev_tab, ws->dev);
-
-   pipe_mutex_unlock(dev_tab_mutex);
-   return destroy;
-}
-
 PUBLIC struct radeon_winsys *
 amdgpu_winsys_create(int fd, radeon_screen_create_t screen_create)
 {
@@ -446,7 +425,6 @@ amdgpu_winsys_create(int fd, radeon_screen_create_t 
screen_create)
drmFreeVersion(version);
 
/* Look up the winsys from the dev table. */
-   pipe_mutex_lock(dev_tab_mutex);
if (!dev_tab)
   dev_tab = util_hash_table_create(hash_dev, compare_dev);
 
@@ -454,7 +432,6 @@ amdgpu_winsys_create(int fd, radeon_screen_create_t 
screen_create)
 * for the same fd. */
r = amdgpu_device_initialize(fd, &drm_major, &drm_minor, &dev);
if (r) {
-  pipe_mutex_unlock(dev_tab_mutex);
   fprintf(stderr, "amdgpu: amdgpu_device_initialize failed.\n");
   return NULL;
}
@@ -462,17 +439,14 @@ amdgpu_winsys_create(int fd, radeon_screen_create_t 
screen_create)
/* Lookup a winsys if we have already created one for this device. */
ws = util_hash_table_get(dev_tab, dev);
if (ws) {
-  pipe_reference(NULL, &ws->reference);
-  pipe_mutex_unlock(dev_tab_mutex);
+  pipe_reference(NULL, &ws->base.screen->reference);
   return &ws->base;
}
 
/* Create a new winsys. */
ws = CALLOC_STRUCT(amdgpu_winsys);
-   if (!ws) {
-  pipe_mutex_unlock(dev_tab_mutex);
+   if (!ws)
   return NULL;
-   }
 
ws->dev = dev;
ws->info.drm_major = drm_major;
@@ -486,11 +460,7 @@ amdgpu_winsys_create(int fd, radeon_screen_create_t 
screen_create)
  (ws->info.vram_size + ws->info.gart_size) / 8,
  amdgpu_bo_destroy, amdgpu_bo_can_reclaim);
 
-   /* init reference */
-   pipe_reference_init(&ws->reference, 1);
-
/* Set functions. */
-   ws->base.unref = amdgpu_winsys_unref;
ws->base.destroy = amdgpu_winsys_destroy;
ws->base.query_info = amdgpu_winsys_query_info;
ws->base.cs_request_feature = amdgpu_cs_request_feature;
@@ -516,21 +486,18 @@ amdgpu_winsys_create(int fd, radeon_screen_create_t 
screen_create)
ws->base.screen = screen_create(&ws->base);
if (!ws->base.screen) {
   amdgpu_winsys_destroy(&ws->base);
-  pipe_mutex_unlock(dev_tab_mutex);
   return NULL;
}
 
util_hash_table_set(dev_tab, dev, ws);
 
-   /* We must unlock the mutex once the winsys is fully initialized, so that
-* other threads attempting to create the winsys from the same fd will
-* get a fully initialized winsys and not just half-way initialized. */
-   pipe_mutex_unlock(dev_tab_mutex);
+   /* init reference */
+   pipe_reference_init(&ws->base.screen->reference, 1);
+   ws->base.screen->fd = -1;
 
return &ws->base;
 
 fail:
-   pipe_mutex_unlock(dev_tab_mutex);
pb_cache_deinit(&ws->bo_cache);
FREE(ws);
return NULL;
diff --git a/src/gallium/winsys/amdgpu/drm/amdgpu_winsys.h 
b/src/gallium/winsys/amdgpu/drm/amdgpu_winsys.h
index 8489530..b45c2d7 100644
--- a/src/

[Mesa-dev] [PATCH v4 06/11] freedreno: use common screen ref counting

2016-07-22 Thread Rob Herring

Use the common pipe_screen ref counting and fd hashing functions. The
mutex can be dropped as the pipe loader protects the create_screen()
calls.

Signed-off-by: Rob Herring 
Cc: Rob Clark 
---
 src/gallium/drivers/freedreno/freedreno_screen.c   |  1 -
 src/gallium/drivers/freedreno/freedreno_screen.h   | 10 ---
 .../winsys/freedreno/drm/freedreno_drm_winsys.c| 98 ++
 3 files changed, 9 insertions(+), 100 deletions(-)

diff --git a/src/gallium/drivers/freedreno/freedreno_screen.c 
b/src/gallium/drivers/freedreno/freedreno_screen.c
index 5255c10..324f712 100644
--- a/src/gallium/drivers/freedreno/freedreno_screen.c
+++ b/src/gallium/drivers/freedreno/freedreno_screen.c
@@ -565,7 +565,6 @@ fd_screen_create(struct fd_device *dev)
pscreen = &screen->base;
 
screen->dev = dev;
-   screen->refcnt = 1;
 
// maybe this should be in context?
screen->pipe = fd_pipe_new(screen->dev, FD_PIPE_3D);
diff --git a/src/gallium/drivers/freedreno/freedreno_screen.h 
b/src/gallium/drivers/freedreno/freedreno_screen.h
index a81c778..8dcacca 100644
--- a/src/gallium/drivers/freedreno/freedreno_screen.h
+++ b/src/gallium/drivers/freedreno/freedreno_screen.h
@@ -42,16 +42,6 @@ struct fd_bo;
 struct fd_screen {
struct pipe_screen base;
 
-   /* it would be tempting to use pipe_reference here, but that
-* really doesn't work well if it isn't the first member of
-* the struct, so not quite so awesome to be adding refcnting
-* further down the inheritance hierarchy:
-*/
-   int refcnt;
-
-   /* place for winsys to stash it's own stuff: */
-   void *winsys_priv;
-
uint32_t gmemsize_bytes;
uint32_t device_id;
uint32_t gpu_id; /* 220, 305, etc */
diff --git a/src/gallium/winsys/freedreno/drm/freedreno_drm_winsys.c 
b/src/gallium/winsys/freedreno/drm/freedreno_drm_winsys.c
index e4785f8..7d61ec9 100644
--- a/src/gallium/winsys/freedreno/drm/freedreno_drm_winsys.c
+++ b/src/gallium/winsys/freedreno/drm/freedreno_drm_winsys.c
@@ -26,102 +26,22 @@
  *Rob Clark 
  */
 
-#include 
-
-#include "pipe/p_context.h"
-#include "pipe/p_state.h"
-#include "util/u_format.h"
-#include "util/u_memory.h"
-#include "util/u_inlines.h"
-#include "util/u_hash_table.h"
-#include "os/os_thread.h"
+#include "util/u_screen.h"
 
 #include "freedreno_drm_public.h"
 
 #include "freedreno/freedreno_screen.h"
 
-static struct util_hash_table *fd_tab = NULL;
-
-pipe_static_mutex(fd_screen_mutex);
-
-static void
-fd_drm_screen_destroy(struct pipe_screen *pscreen)
-{
-   struct fd_screen *screen = fd_screen(pscreen);
-   boolean destroy;
-
-   pipe_mutex_lock(fd_screen_mutex);
-   destroy = --screen->refcnt == 0;
-   if (destroy) {
-   int fd = fd_device_fd(screen->dev);
-   util_hash_table_remove(fd_tab, intptr_to_pointer(fd));
-   }
-   pipe_mutex_unlock(fd_screen_mutex);
-
-   if (destroy) {
-   pscreen->destroy = screen->winsys_priv;
-   pscreen->destroy(pscreen);
-   }
-}
-
-static unsigned hash_fd(void *key)
-{
-   int fd = pointer_to_intptr(key);
-   struct stat stat;
-   fstat(fd, &stat);
-
-   return stat.st_dev ^ stat.st_ino ^ stat.st_rdev;
-}
-
-static int compare_fd(void *key1, void *key2)
-{
-   int fd1 = pointer_to_intptr(key1);
-   int fd2 = pointer_to_intptr(key2);
-   struct stat stat1, stat2;
-   fstat(fd1, &stat1);
-   fstat(fd2, &stat2);
-
-   return stat1.st_dev != stat2.st_dev ||
-   stat1.st_ino != stat2.st_ino ||
-   stat1.st_rdev != stat2.st_rdev;
-}
-
 struct pipe_screen *
 fd_drm_screen_create(int fd)
 {
-   struct pipe_screen *pscreen = NULL;
-
-   pipe_mutex_lock(fd_screen_mutex);
-   if (!fd_tab) {
-   fd_tab = util_hash_table_create(hash_fd, compare_fd);
-   if (!fd_tab)
-   goto unlock;
-   }
-
-   pscreen = util_hash_table_get(fd_tab, intptr_to_pointer(fd));
-   if (pscreen) {
-   fd_screen(pscreen)->refcnt++;
-   } else {
-   struct fd_device *dev = fd_device_new_dup(fd);
-   if (!dev)
-   goto unlock;
-
-   pscreen = fd_screen_create(dev);
-   if (pscreen) {
-   int fd = fd_device_fd(dev);
-
-   util_hash_table_set(fd_tab, intptr_to_pointer(fd), 
pscreen);
-
-   /* Bit of a hack, to avoid circular linkage dependency,
-* ie. pipe driver having to call in to winsys, we
-* override the pipe drivers screen->destroy():
-*/
-   fd_screen(pscreen)->winsys_priv = pscreen->destroy;
-   pscreen->destroy = fd_drm_screen_destroy;
-   }
-   }
-
-unlock:
-   pipe_mutex_unlock(fd_screen_mutex);
+   i

[Mesa-dev] [PATCH v4 01/11] gallium: move pipe_screen destroy into pipe-loader

2016-07-22 Thread Rob Herring

In preparation to add reference counting of pipe_screen in the pipe-loader,
pipe_loader_release needs to destroy the pipe_screen instead of state
trackers.

Signed-off-by: Rob Herring 
Cc: Emil Velikov 
---
 src/gallium/auxiliary/pipe-loader/pipe_loader.h | 1 +
 src/gallium/auxiliary/pipe-loader/pipe_loader_drm.c | 9 -
 src/gallium/auxiliary/pipe-loader/pipe_loader_sw.c  | 6 ++
 src/gallium/auxiliary/vl/vl_winsys_dri.c| 1 -
 src/gallium/auxiliary/vl/vl_winsys_dri3.c   | 1 -
 src/gallium/auxiliary/vl/vl_winsys_drm.c| 1 -
 src/gallium/state_trackers/clover/core/device.cpp   | 4 +---
 src/gallium/state_trackers/dri/dri_screen.c | 3 ---
 src/gallium/state_trackers/xa/xa_tracker.c  | 2 --
 src/gallium/tests/trivial/compute.c | 1 -
 src/gallium/tests/trivial/quad-tex.c| 1 -
 src/gallium/tests/trivial/tri.c | 1 -
 12 files changed, 16 insertions(+), 15 deletions(-)

diff --git a/src/gallium/auxiliary/pipe-loader/pipe_loader.h 
b/src/gallium/auxiliary/pipe-loader/pipe_loader.h
index 690d088..25cd4d1 100644
--- a/src/gallium/auxiliary/pipe-loader/pipe_loader.h
+++ b/src/gallium/auxiliary/pipe-loader/pipe_loader.h
@@ -65,6 +65,7 @@ struct pipe_loader_device {
 
char *driver_name;
const struct pipe_loader_ops *ops;
+   struct pipe_screen *pscreen;
 };
 
 /**
diff --git a/src/gallium/auxiliary/pipe-loader/pipe_loader_drm.c 
b/src/gallium/auxiliary/pipe-loader/pipe_loader_drm.c
index 994a284..7bdd2ec 100644
--- a/src/gallium/auxiliary/pipe-loader/pipe_loader_drm.c
+++ b/src/gallium/auxiliary/pipe-loader/pipe_loader_drm.c
@@ -39,6 +39,7 @@
 #include "target-helpers/drm_helper_public.h"
 #include "state_tracker/drm_driver.h"
 #include "pipe_loader_priv.h"
+#include "pipe/p_screen.h"
 
 #include "util/u_memory.h"
 #include "util/u_dl.h"
@@ -269,6 +270,9 @@ static void
 pipe_loader_drm_release(struct pipe_loader_device **dev)
 {
struct pipe_loader_drm_device *ddev = pipe_loader_drm_device(*dev);
+   struct pipe_screen *pscreen = ddev->base.pscreen;
+
+   pscreen->destroy(pscreen);
 
 #ifndef GALLIUM_STATIC_TARGETS
if (ddev->lib)
@@ -297,8 +301,11 @@ static struct pipe_screen *
 pipe_loader_drm_create_screen(struct pipe_loader_device *dev)
 {
struct pipe_loader_drm_device *ddev = pipe_loader_drm_device(dev);
+   struct pipe_screen *pscreen;
 
-   return ddev->dd->create_screen(ddev->fd);
+   pscreen = ddev->dd->create_screen(ddev->fd);
+   ddev->base.pscreen = pscreen;
+   return pscreen;
 }
 
 static const struct pipe_loader_ops pipe_loader_drm_ops = {
diff --git a/src/gallium/auxiliary/pipe-loader/pipe_loader_sw.c 
b/src/gallium/auxiliary/pipe-loader/pipe_loader_sw.c
index e7fa974..ce5c2b3 100644
--- a/src/gallium/auxiliary/pipe-loader/pipe_loader_sw.c
+++ b/src/gallium/auxiliary/pipe-loader/pipe_loader_sw.c
@@ -27,6 +27,7 @@
 
 #include "pipe_loader_priv.h"
 
+#include "pipe/p_screen.h"
 #include "util/u_memory.h"
 #include "util/u_dl.h"
 #include "sw/dri/dri_sw_winsys.h"
@@ -271,6 +272,9 @@ static void
 pipe_loader_sw_release(struct pipe_loader_device **dev)
 {
struct pipe_loader_sw_device *sdev = pipe_loader_sw_device(*dev);
+   struct pipe_screen *pscreen = sdev->base.pscreen;
+
+   pscreen->destroy(pscreen);
 
 #ifndef GALLIUM_STATIC_TARGETS
if (sdev->lib)
@@ -301,6 +305,8 @@ pipe_loader_sw_create_screen(struct pipe_loader_device *dev)
if (!screen)
   sdev->ws->destroy(sdev->ws);
 
+   sdev->base.pscreen = screen;
+
return screen;
 }
 
diff --git a/src/gallium/auxiliary/vl/vl_winsys_dri.c 
b/src/gallium/auxiliary/vl/vl_winsys_dri.c
index 9ecc216..db90c54 100644
--- a/src/gallium/auxiliary/vl/vl_winsys_dri.c
+++ b/src/gallium/auxiliary/vl/vl_winsys_dri.c
@@ -461,7 +461,6 @@ vl_dri2_screen_destroy(struct vl_screen *vscreen)
}
 
vl_dri2_destroy_drawable(scrn);
-   scrn->base.pscreen->destroy(scrn->base.pscreen);
pipe_loader_release(&scrn->base.dev, 1);
FREE(scrn);
 }
diff --git a/src/gallium/auxiliary/vl/vl_winsys_dri3.c 
b/src/gallium/auxiliary/vl/vl_winsys_dri3.c
index 493e645..c8c0198 100644
--- a/src/gallium/auxiliary/vl/vl_winsys_dri3.c
+++ b/src/gallium/auxiliary/vl/vl_winsys_dri3.c
@@ -611,7 +611,6 @@ vl_dri3_screen_destroy(struct vl_screen *vscreen)
 
if (scrn->special_event)
   xcb_unregister_for_special_event(scrn->conn, scrn->special_event);
-   scrn->base.pscreen->destroy(scrn->base.pscreen);
pipe_loader_release(&scrn->base.dev, 1);
FREE(scrn);
 
diff --git a/src/gallium/auxiliary/vl/vl_winsys_drm.c 
b/src/gallium/auxiliary/vl/vl_winsys_drm.c
index 6a759ae..aa690a2 100644
--- a/src/gallium/auxiliary/vl/vl_winsys_drm.c
+++ b/src/gallium/auxiliary/vl/vl_winsys_drm.c
@@ -80,7 +80,6 @@ vl_drm_screen_destroy(struct vl_screen *vscreen)
 {
assert(vscreen);
 
-   vscreen->pscreen->destroy(vscreen->pscreen);
pipe_loader_release(&vscreen->dev, 1);
FREE(vscreen);
 }
diff --git a/src/gallium/state_trackers/clover/core/devic

[Mesa-dev] [PATCH v4 02/11] pipe-loader-drm: protect create_screen() calls with a mutex

2016-07-22 Thread Rob Herring

Creating a screen needs to be serialized in order to support reusing
existing screen. With this, driver private mutexes in create_screen()
functions can be removed.

Signed-off-by: Rob Herring 
Cc: Emil Velikov 
---
 src/gallium/auxiliary/pipe-loader/pipe_loader_drm.c | 5 +
 1 file changed, 5 insertions(+)

diff --git a/src/gallium/auxiliary/pipe-loader/pipe_loader_drm.c 
b/src/gallium/auxiliary/pipe-loader/pipe_loader_drm.c
index 7bdd2ec..554e59a 100644
--- a/src/gallium/auxiliary/pipe-loader/pipe_loader_drm.c
+++ b/src/gallium/auxiliary/pipe-loader/pipe_loader_drm.c
@@ -41,6 +41,7 @@
 #include "pipe_loader_priv.h"
 #include "pipe/p_screen.h"
 
+#include "os/os_thread.h"
 #include "util/u_memory.h"
 #include "util/u_dl.h"
 #include "util/u_debug.h"
@@ -63,6 +64,8 @@ struct pipe_loader_drm_device {
 
 static const struct pipe_loader_ops pipe_loader_drm_ops;
 
+pipe_static_mutex(loader_mutex);
+
 #ifdef GALLIUM_STATIC_TARGETS
 static const struct drm_conf_ret throttle_ret = {
DRM_CONF_INT,
@@ -303,8 +306,10 @@ pipe_loader_drm_create_screen(struct pipe_loader_device 
*dev)
struct pipe_loader_drm_device *ddev = pipe_loader_drm_device(dev);
struct pipe_screen *pscreen;
 
+   pipe_mutex_lock(loader_mutex);
pscreen = ddev->dd->create_screen(ddev->fd);
ddev->base.pscreen = pscreen;
+   pipe_mutex_unlock(loader_mutex);
return pscreen;
 }
 
-- 
2.9.2

___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/mesa-dev

[Mesa-dev] [PATCH v4 09/11] vmwgfx: use common screen ref counting

2016-07-22 Thread Rob Herring

Use the common pipe_screen ref counting and fd hashing functions. The
mutex can be dropped as the pipe loader protects the create_screen()
calls.

Signed-off-by: Rob Herring 
---
 src/gallium/auxiliary/target-helpers/drm_helper.h |  2 +-
 src/gallium/drivers/svga/svga_public.h|  2 +-
 src/gallium/drivers/svga/svga_screen.c|  5 ++-
 src/gallium/targets/pipe-loader/pipe_vmwgfx.c |  2 +-
 src/gallium/winsys/svga/drm/vmw_screen.c  | 54 +--
 src/gallium/winsys/svga/drm/vmw_screen.h  |  6 ---
 6 files changed, 17 insertions(+), 54 deletions(-)

diff --git a/src/gallium/auxiliary/target-helpers/drm_helper.h 
b/src/gallium/auxiliary/target-helpers/drm_helper.h
index 90820d3..a042162 100644
--- a/src/gallium/auxiliary/target-helpers/drm_helper.h
+++ b/src/gallium/auxiliary/target-helpers/drm_helper.h
@@ -181,7 +181,7 @@ pipe_vmwgfx_create_screen(int fd)
if (!sws)
   return NULL;
 
-   screen = svga_screen_create(sws);
+   screen = svga_screen_create(sws, fd);
return screen ? debug_screen_wrap(screen) : NULL;
 }
 
diff --git a/src/gallium/drivers/svga/svga_public.h 
b/src/gallium/drivers/svga/svga_public.h
index ded2e24..5a95660 100644
--- a/src/gallium/drivers/svga/svga_public.h
+++ b/src/gallium/drivers/svga/svga_public.h
@@ -37,6 +37,6 @@ struct pipe_screen;
 struct svga_winsys_screen;
 
 struct pipe_screen *
-svga_screen_create(struct svga_winsys_screen *sws);
+svga_screen_create(struct svga_winsys_screen *sws, int fd);
 
 #endif /* SVGA_PUBLIC_H_ */
diff --git a/src/gallium/drivers/svga/svga_screen.c 
b/src/gallium/drivers/svga/svga_screen.c
index 5b4ac74..b353b92 100644
--- a/src/gallium/drivers/svga/svga_screen.c
+++ b/src/gallium/drivers/svga/svga_screen.c
@@ -26,6 +26,7 @@
 #include "util/u_format.h"
 #include "util/u_memory.h"
 #include "util/u_inlines.h"
+#include "util/u_screen.h"
 #include "util/u_string.h"
 #include "util/u_math.h"
 
@@ -906,7 +907,7 @@ svga_destroy_screen( struct pipe_screen *screen )
  * Create a new svga_screen object
  */
 struct pipe_screen *
-svga_screen_create(struct svga_winsys_screen *sws)
+svga_screen_create(struct svga_winsys_screen *sws, int fd)
 {
struct svga_screen *svgascreen;
struct pipe_screen *screen;
@@ -1081,6 +1082,8 @@ svga_screen_create(struct svga_winsys_screen *sws)
 
svga_screen_cache_init(svgascreen);
 
+   pipe_screen_reference_init(screen, dup(fd));
+
return screen;
 error2:
FREE(svgascreen);
diff --git a/src/gallium/targets/pipe-loader/pipe_vmwgfx.c 
b/src/gallium/targets/pipe-loader/pipe_vmwgfx.c
index 71015df..d246022 100644
--- a/src/gallium/targets/pipe-loader/pipe_vmwgfx.c
+++ b/src/gallium/targets/pipe-loader/pipe_vmwgfx.c
@@ -14,7 +14,7 @@ create_screen(int fd)
if (!sws)
   return NULL;
 
-   screen = svga_screen_create(sws);
+   screen = svga_screen_create(sws, fd);
if (!screen)
   return NULL;
 
diff --git a/src/gallium/winsys/svga/drm/vmw_screen.c 
b/src/gallium/winsys/svga/drm/vmw_screen.c
index 7fcb6d2..e0fa763 100644
--- a/src/gallium/winsys/svga/drm/vmw_screen.c
+++ b/src/gallium/winsys/svga/drm/vmw_screen.c
@@ -29,25 +29,11 @@
 #include "vmw_context.h"
 
 #include "util/u_memory.h"
+#include "util/u_screen.h"
 #include "pipe/p_compiler.h"
-#include "util/u_hash_table.h"
 #include 
-#include 
 #include 
 
-static struct util_hash_table *dev_hash = NULL;
-
-static int vmw_dev_compare(void *key1, void *key2)
-{
-   return (major(*(dev_t *)key1) == major(*(dev_t *)key2) &&
-   minor(*(dev_t *)key1) == minor(*(dev_t *)key2)) ? 0 : 1;
-}
-
-static unsigned vmw_dev_hash(void *key)
-{
-   return (major(*(dev_t *) key) << 16) | minor(*(dev_t *) key);
-}
-
 /* Called from vmw_drm_create_screen(), creates and initializes the
  * vmw_winsys_screen structure, which is the main entity in this
  * module.
@@ -60,29 +46,15 @@ struct vmw_winsys_screen *
 vmw_winsys_create( int fd )
 {
struct vmw_winsys_screen *vws;
-   struct stat stat_buf;
-
-   if (dev_hash == NULL) {
-  dev_hash = util_hash_table_create(vmw_dev_hash, vmw_dev_compare);
-  if (dev_hash == NULL)
- return NULL;
-   }
+   struct pipe_screen *pscreen = pipe_screen_reference(fd);
 
-   if (fstat(fd, &stat_buf))
-  return NULL;
-
-   vws = util_hash_table_get(dev_hash, &stat_buf.st_rdev);
-   if (vws) {
-  vws->open_count++;
-  return vws;
-   }
+   if (pscreen)
+  return vmw_winsys_screen(svga_winsys_screen(pscreen));
 
vws = CALLOC_STRUCT(vmw_winsys_screen);
if (!vws)
   goto out_no_vws;
 
-   vws->device = stat_buf.st_rdev;
-   vws->open_count = 1;
vws->ioctl.drm_fd = dup(fd);
vws->base.have_gb_dma = TRUE;
vws->base.need_to_rebind_resources = FALSE;
@@ -100,11 +72,8 @@ vmw_winsys_create( int fd )
if (!vmw_winsys_screen_init_svga(vws))
   goto out_no_svga;
 
-   if (util_hash_table_set(dev_hash, &vws->device, vws) != PIPE_OK)
-  goto out_no_hash_insert;
-
return vws;
-out_no_hash_insert:
+
 out_no_svga

[Mesa-dev] [PATCH v4 00/11] Common pipe screen ref counting

2016-07-22 Thread Rob Herring

Another version of common pipe_screen reference counting. Please help 
test on AMD and Nouveau as those are the more complicated ones and I 
don't have h/w to test.

Changes in v4:
- Move fd dup() back into driver winsys create screen functions which 
  sometimes need the dup'ed fd before the pipe_screen is created.
- Update vmwgfx driver which I missed updating in v3
- Update vc4 commit msg to reflect this is a new feature.

Changes in v3:
- dup() fd and store in pipe_screen as the lifetime of the
pipe_loader_drm_device and pipe_screen are different.
- Fix leaking of pipe_loader_drm_device. Only the last one closed was
getting freed.
- Move mutex for fd hash table into u_screen.c

Rob


Rob Herring (11):
  gallium: move pipe_screen destroy into pipe-loader
  pipe-loader-drm: protect create_screen() calls with a mutex
  gallium: add common pipe_screen reference counting functions
  pipe-loader-drm: use pipe_screen_unreference to destroy screen
  nouveau: use common screen ref counting
  freedreno: use common screen ref counting
  amdgpu: use common screen ref counting
  radeon: use common screen ref counting
  vmwgfx: use common screen ref counting
  virgl: use common screen ref counting
  vc4: use common screen ref counting

 src/gallium/auxiliary/Makefile.sources |   2 +
 src/gallium/auxiliary/pipe-loader/pipe_loader.h|   1 +
 .../auxiliary/pipe-loader/pipe_loader_drm.c|  15 ++-
 src/gallium/auxiliary/pipe-loader/pipe_loader_sw.c |   6 ++
 src/gallium/auxiliary/target-helpers/drm_helper.h  |   2 +-
 src/gallium/auxiliary/util/u_screen.c  | 114 +
 src/gallium/auxiliary/util/u_screen.h  |  32 ++
 src/gallium/auxiliary/vl/vl_winsys_dri.c   |   1 -
 src/gallium/auxiliary/vl/vl_winsys_dri3.c  |   1 -
 src/gallium/auxiliary/vl/vl_winsys_drm.c   |   1 -
 src/gallium/drivers/freedreno/freedreno_screen.c   |   1 -
 src/gallium/drivers/freedreno/freedreno_screen.h   |  10 --
 src/gallium/drivers/nouveau/nouveau_screen.c   |   6 --
 src/gallium/drivers/nouveau/nouveau_screen.h   |   4 -
 src/gallium/drivers/nouveau/nv30/nv30_screen.c |   3 -
 src/gallium/drivers/nouveau/nv50/nv50_screen.c |   3 -
 src/gallium/drivers/nouveau/nvc0/nvc0_screen.c |   3 -
 src/gallium/drivers/r300/r300_screen.c |   3 -
 src/gallium/drivers/r600/r600_pipe.c   |   6 --
 src/gallium/drivers/radeon/radeon_winsys.h |   8 --
 src/gallium/drivers/radeonsi/si_pipe.c |   6 --
 src/gallium/drivers/svga/svga_public.h |   2 +-
 src/gallium/drivers/svga/svga_screen.c |   5 +-
 src/gallium/include/pipe/p_screen.h|   3 +
 src/gallium/state_trackers/clover/core/device.cpp  |   4 +-
 src/gallium/state_trackers/dri/dri_screen.c|   3 -
 src/gallium/state_trackers/xa/xa_tracker.c |   2 -
 src/gallium/targets/pipe-loader/pipe_vmwgfx.c  |   2 +-
 src/gallium/tests/trivial/compute.c|   1 -
 src/gallium/tests/trivial/quad-tex.c   |   1 -
 src/gallium/tests/trivial/tri.c|   1 -
 src/gallium/winsys/amdgpu/drm/amdgpu_winsys.c  |  45 ++--
 src/gallium/winsys/amdgpu/drm/amdgpu_winsys.h  |   1 -
 .../winsys/freedreno/drm/freedreno_drm_winsys.c|  98 ++
 .../winsys/nouveau/drm/nouveau_drm_winsys.c|  69 +
 src/gallium/winsys/radeon/drm/radeon_drm_winsys.c  |  80 ++-
 src/gallium/winsys/svga/drm/vmw_screen.c   |  54 ++
 src/gallium/winsys/svga/drm/vmw_screen.h   |   6 --
 src/gallium/winsys/vc4/drm/vc4_drm_winsys.c|  11 +-
 src/gallium/winsys/virgl/drm/virgl_drm_winsys.c|  88 ++--
 40 files changed, 236 insertions(+), 468 deletions(-)
 create mode 100644 src/gallium/auxiliary/util/u_screen.c
 create mode 100644 src/gallium/auxiliary/util/u_screen.h

-- 
2.9.2

___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/mesa-dev

Re: [Mesa-dev] [PATCH] Rename the DEBUG macro to MESA_DEBUG

2016-07-22 Thread Rob Clark

On Fri, Jul 22, 2016 at 4:39 AM, Vedran Miletić  wrote:
> On 07/22/2016 03:37 AM, Rob Clark wrote:
>>
>> On Thu, Jul 21, 2016 at 9:35 PM, Rob Clark  wrote:
>>>
>>> On Thu, Jul 21, 2016 at 1:48 PM, Vedran Miletić 
>>> wrote:

 LLVM and Mesa both define the DEBUG macro in incompatible ways. As a
 general practice, we should avoid using such generic names when it is
 possible to do so.

 This patch renames all occurrences of the DEBUG macro to MESA_DEBUG,
 and removes workarounds previously used to enable building Mesa with
 LLVM (pop_macro() and push_macro() function calls).

 Please let me know if I missed any.
>>>
>>>
>>> I guess at least some in-flight patches (at least my
>>> pipe_mutex_assert_locked() patch, but I guess DEBUG is common enough
>>> that it might effect others).. not sure if there is a better way to
>>> deal with that without things falling through the cracks..  maybe
>>> introduce MESA_DEBUG which is the same as DEBUG first, and then a
>>> later patch to remove DEBUG.  Or at least including sed/etc rule to
>>> re-do the mass-change on a later baseline in the commit msg?
>>>
>>> I don't mind rebasing my patch, just more worried about things falling
>>> through the cracks with other in-progress stuff, since it seems like
>>> the end result would be a silent fail to enable intended debug code..
>>
>>
>> btw, possibly tilting at windmills here, but afaik we don't export
>> DEBUG outside the mesa codebase.. so actually it should be llvm that
>> s/DEBUG/LLVM_DEBUG/
>>
>> BR,
>> -R
>
>
> Regarding in-flight patches, I did this change manually ("it can't be that
> hard, right, there's just a bunch of them") but I suppose it could be
> scripted and I would prefer this approach to having both macros at the same
> time.

well, I wouldn't expect both macros to exist at the same time
forever.. and it would let you avoid the flag-day patch.  At any rate,
if the patch could be easily regenerated via sed or whatever, I guess
I'd be less concerned about that.

My main concern is that we silently lose some debug code.. for example
when backporting to release branches or rebasing work-in-progress
stuff, etc.  Not sure there is a way to catch that other than
follow-up audits.

> Regarding s/DEBUG/LLVM_DEBUG/, I understand the reasoning and agree that
> ideally LLVM should rename the macro and not export macros with generic
> names. However, to avoid potential future conflicts, Mesa should use
> non-generic macro names anyhow.

yeah, hence the 'tilting at windmills' comment..  mostly just
grumbling about how llvm is kind of a pita as a dependency ;-)

BR,
-R

> Regards,
> Vedran
>
> --
> Vedran Miletić
> vedran.miletic.net
___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/mesa-dev

Re: [Mesa-dev] [PATCH] Rename the DEBUG macro to MESA_DEBUG

2016-07-22 Thread Jose Fonseca


On 22/07/16 16:21, Emil Velikov wrote:

On 22 July 2016 at 09:42, Christian König  wrote:

Am 22.07.2016 um 03:37 schrieb Rob Clark:


On Thu, Jul 21, 2016 at 9:35 PM, Rob Clark  wrote:


On Thu, Jul 21, 2016 at 1:48 PM, Vedran Miletić 
wrote:


LLVM and Mesa both define the DEBUG macro in incompatible ways. As a
general practice, we should avoid using such generic names when it is
possible to do so.

This patch renames all occurrences of the DEBUG macro to MESA_DEBUG,
and removes workarounds previously used to enable building Mesa with
LLVM (pop_macro() and push_macro() function calls).

Please let me know if I missed any.


I guess at least some in-flight patches (at least my
pipe_mutex_assert_locked() patch, but I guess DEBUG is common enough
that it might effect others).. not sure if there is a better way to
deal with that without things falling through the cracks..  maybe
introduce MESA_DEBUG which is the same as DEBUG first, and then a
later patch to remove DEBUG.  Or at least including sed/etc rule to
re-do the mass-change on a later baseline in the commit msg?

I don't mind rebasing my patch, just more worried about things falling
through the cracks with other in-progress stuff, since it seems like
the end result would be a silent fail to enable intended debug code..


btw, possibly tilting at windmills here, but afaik we don't export
DEBUG outside the mesa codebase.. so actually it should be llvm that
s/DEBUG/LLVM_DEBUG/



I already had the same issue with other libraries/headers as well which
define DEBUG as something.


Out of curiosity: can you give some examples ?


I clearly agree that those libraries shouldn't do that with such a common
name, but renaming the Mesa DEBUG define to something more library specific
would still be a good idea to avoid such problems in the future.

So general approach is Acked-by: Christian König 


Note that doing this will likely break things for the VMWare people
since (IIRC) on Windows/MSVC DEBUG is commonly used/set by the
compiler.

Jose can you confirm/dismiss if this will cause issues ?



We used to rely on DEBUG for Windows XP kernel drivers, but that's long 
gone (thankfully.)  It should be fine.



Jose
___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/mesa-dev

Re: [Mesa-dev] [PATCH 1/7] glsl: Separate overlapping sentinel nodes in exec_list.

2016-07-22 Thread Ian Romanick

There are a couple nits and suggestions for small follow-ups below.
With the couple nits (all in ast_to_hir.cpp) fixed, this patch is

Reviewed-by: Ian Romanick 

On 07/08/2016 03:18 PM, Matt Turner wrote:
> I do appreciate the cleverness, but unfortunately it prevents a lot more
> cleverness in the form of additional compiler optimizations brought on
> by -fstrict-aliasing.
> 
> No difference in OglBatch7 (n=20).
> 
> Co-authored-by: Davin McCall 
> ---
> I took Ian's suggestion to add get_head_raw() and get_tail_raw() methods
> and use them in place of head_sentinel.next and tail_sentinel.prev.
> 
>  src/compiler/glsl/ast.h|   4 +-
>  src/compiler/glsl/ast_function.cpp |  22 +--
>  src/compiler/glsl/ast_to_hir.cpp   |   6 +-
>  src/compiler/glsl/ast_type.cpp |   2 +-
>  src/compiler/glsl/glsl_parser_extras.cpp   |   6 +-
>  src/compiler/glsl/ir.cpp   |   8 +-
>  src/compiler/glsl/ir_clone.cpp |   2 +-
>  src/compiler/glsl/ir_constant_expression.cpp   |   2 +-
>  src/compiler/glsl/ir_function.cpp  |  14 +-
>  src/compiler/glsl/ir_reader.cpp|   4 +-
>  src/compiler/glsl/ir_validate.cpp  |   4 +-
>  src/compiler/glsl/list.h   | 184 
> -
>  src/compiler/glsl/lower_distance.cpp   |   4 +-
>  src/compiler/glsl/lower_jumps.cpp  |   2 +-
>  src/compiler/glsl/lower_packed_varyings.cpp|   8 +-
>  src/compiler/glsl/lower_tess_level.cpp |   4 +-
>  src/compiler/glsl/opt_conditional_discard.cpp  |   6 +-
>  src/compiler/glsl/opt_dead_builtin_varyings.cpp|   2 +-
>  src/compiler/glsl/opt_dead_code.cpp|   2 +-
>  src/compiler/glsl/opt_flatten_nested_if_blocks.cpp |   2 +-
>  src/compiler/nir/nir.h |   4 +-
>  src/compiler/nir/nir_opt_gcm.c |   2 +-
>  src/mesa/drivers/dri/i965/brw_cfg.h|   2 +-
>  src/mesa/drivers/dri/i965/brw_fs_builder.h |   2 +-
>  src/mesa/drivers/dri/i965/brw_vec4_builder.h   |   2 +-
>  25 files changed, 164 insertions(+), 136 deletions(-)
> 
> diff --git a/src/compiler/glsl/ast.h b/src/compiler/glsl/ast.h
> index 06c7b03..fa5a731 100644
> --- a/src/compiler/glsl/ast.h
> +++ b/src/compiler/glsl/ast.h
> @@ -346,8 +346,8 @@ public:
>  
> bool is_single_dimension() const
> {
> -  return this->array_dimensions.tail_pred->prev != NULL &&
> - this->array_dimensions.tail_pred->prev->is_head_sentinel();
> +  return this->array_dimensions.get_tail_raw()->prev != NULL &&
> + this->array_dimensions.get_tail_raw()->is_head_sentinel();
> }
>  
> virtual void print(void) const;
> diff --git a/src/compiler/glsl/ast_function.cpp 
> b/src/compiler/glsl/ast_function.cpp
> index f74394f..9dcec50 100644
> --- a/src/compiler/glsl/ast_function.cpp
> +++ b/src/compiler/glsl/ast_function.cpp
> @@ -186,8 +186,8 @@ verify_parameter_modes(_mesa_glsl_parse_state *state,
>  exec_list &actual_ir_parameters,
>  exec_list &actual_ast_parameters)
>  {
> -   exec_node *actual_ir_node  = actual_ir_parameters.head;
> -   exec_node *actual_ast_node = actual_ast_parameters.head;
> +   exec_node *actual_ir_node  = actual_ir_parameters.get_head_raw();
> +   exec_node *actual_ast_node = actual_ast_parameters.get_head_raw();
>  
> foreach_in_list(const ir_variable, formal, &sig->parameters) {
>/* The lists must be the same length. */
> @@ -318,10 +318,12 @@ verify_parameter_modes(_mesa_glsl_parse_state *state,
> const char *func_name = sig->function_name();
> bool is_atomic = is_atomic_function(func_name);
> if (is_atomic) {
> -  const ir_rvalue *const actual = (ir_rvalue *) 
> actual_ir_parameters.head;
> +  const ir_rvalue *const actual =
> + (ir_rvalue *) actual_ir_parameters.get_head_raw();
>  
>const ast_expression *const actual_ast =
> - exec_node_data(ast_expression, actual_ast_parameters.head, link);
> + exec_node_data(ast_expression,
> +actual_ast_parameters.get_head_raw(), link);
>YYLTYPE loc = actual_ast->get_location();
>  
>if (!verify_first_atomic_parameter(&loc, state,
> @@ -1176,7 +1178,7 @@ constant_record_constructor(const glsl_type 
> *constructor_type,
>  bool
>  single_scalar_parameter(exec_list *parameters)
>  {
> -   const ir_rvalue *const p = (ir_rvalue *) parameters->head;
> +   const ir_rvalue *const p = (ir_rvalue *) parameters->get_head_raw();
> assert(((ir_rvalue *)p)->as_rvalue() != NULL);
>  
> return (p->type->is_scalar() && p->next->is_tail_sentinel());
> @@ -1220,7 +1222,7 @@ emit_inline_vector_constructor(const glsl_type *type,
>  */
> const unsigned lhs_components = type->components();
> if (single_scalar_parameter(parame

Re: [Mesa-dev] [PATCH] main/shaderimage: image unit invalid if texture is incomplete, independently of the level

2016-07-22 Thread Alejandro Piñeiro

Ping. Also including Ian Romanick on the conversation, as right now the
main question is about the spec.

The thread is somewhat messy, so in order to be clear, this is the last
patch I sent to the list (and the one Im proposing):

https://patchwork.freedesktop.org/patch/98877/

On 18/07/16 14:44, Alejandro Piñeiro wrote:
> Hi,
>
> On 15/07/16 22:46, Francisco Jerez wrote:
>> Alejandro Piñeiro  writes:
>>
>>> On 14/07/16 21:24, Francisco Jerez wrote:
 Alejandro Piñeiro  writes:

> Without this commit, a image is considered valid if the level of the
> texture bound to the image is complete, something we can check as mesa
> save independently if it is "base incomplete" of "mipmap incomplete".
>
> But, from the OpenGL 4.3 Core Specification, section 8.25 ("Texture
> Image Loads and Stores"):
>
>   "An access is considered invalid if:
> the texture bound to the selected image unit is incomplete;"
>
> This implies that the access to the image unit is invalid if the
> texture is incomplete, no mattering details about the specific texture
> level bound to the image.
>
> This fixes:
> GL44-CTS.shader_image_load_store.incomplete_textures
> ---
>
> Current piglit test is not testing what this commit tries to fix. I
> will send a patch to piglit in short.
>
>  src/mesa/main/shaderimage.c | 14 +++---
>  1 file changed, 11 insertions(+), 3 deletions(-)
>
> diff --git a/src/mesa/main/shaderimage.c b/src/mesa/main/shaderimage.c
> index 90643c4..d20cd90 100644
> --- a/src/mesa/main/shaderimage.c
> +++ b/src/mesa/main/shaderimage.c
> @@ -469,10 +469,18 @@ _mesa_is_image_unit_valid(struct gl_context *ctx, 
> struct gl_image_unit *u)
> if (!t->_BaseComplete && !t->_MipmapComplete)
> _mesa_test_texobj_completeness(ctx, t);
>  
> +   /* From the OpenGL 4.3 Core Specification, Chapter 8.25, Texture Image
> +* Loads and Stores:
> +*
> +*  "An access is considered invalid if:
> +*the texture bound to the selected image unit is incomplete;"
> +*/
> +   if (!t->_BaseComplete ||
> +   !t->_MipmapComplete)
> +  return GL_FALSE;
 I don't think this is correct, AFAIUI a texture having _MipmapComplete
 equal to false doesn't imply that the texture as a whole would be
 considered incomplete according to the GL's definition of completeness.
 Whether or not it's considered complete usually depends on the sampler
 state while you're doing regular texture sampling: If the sampler a
 texture object is used with has any of the mipmap filtering modes
 enabled you need to check _MipmapComplete, otherwise you need to check
 _BaseComplete.  The problem when you attempt to carry over this
 definition to shader images (as the spec implies) is that image units
 have no sampler state as such, and that they can only ever access one
 specified level of the texture at a time (potentially a texture level
 other than the base).  This patch makes image units behave like a
 sampler unit with mipmap filtering enabled for the purpose of texture
 completeness validation, which is almost definitely too strong.
>>> Yes, I didn't realize that _BaseComplete and _MipmapComplete were not
>>> checking the state at all. Thanks for pointing it.
>>>
 An alternative would be to do something along the lines of:

 | if (!_mesa_is_texture_complete(t, &t->Sampler))
 |return GL_FALSE;
>>> Yes, that is what I wanted, to return false if the texture is incomplete.
>>>
 The problem is that you would then run into problems when some of the
 non-base mipmap levels are missing but the sampler state baked into the
 gl_texture_object says that you aren't mipmapping, so the GL spec would
 normally consider the texture to be complete and
 _mesa_is_texture_complete would return true accordingly, but still you
 wouldn't be able to use any of the missing texture levels as shader
 image if the application tried to bind them to an image unit (that's the
 reason for the u->Level vs t->BaseLevel checks below you're removing).
>>> Ok, then if I understand correctly, the solution is not about replacing
>>> the level checks for _mesa_is_texture_complete, but keeping current
>>> checks, and add a _mesa_is_texture_complete check. Just checked and
>>> everything seems to work fine (except that now the behaviour is more
>>> strict, see below). I will send a patch in short.
>>>
>> Yeah, that would likely work and get the CTS test to pass, but it would
>> still be more strict than the spec says and consider cases that are OK
>> according to the spec to be incomplete, so I was reluctant to call it a
>> solution.
>>
>> I think the ideal solution would be for the state of an image unit to be
>> independent from the filtering and sampling state, and depend on the
>> complet

Re: [Mesa-dev] [PATCH] Rename the DEBUG macro to MESA_DEBUG

2016-07-22 Thread Emil Velikov

On 22 July 2016 at 09:42, Christian König  wrote:
> Am 22.07.2016 um 03:37 schrieb Rob Clark:
>>
>> On Thu, Jul 21, 2016 at 9:35 PM, Rob Clark  wrote:
>>>
>>> On Thu, Jul 21, 2016 at 1:48 PM, Vedran Miletić 
>>> wrote:

 LLVM and Mesa both define the DEBUG macro in incompatible ways. As a
 general practice, we should avoid using such generic names when it is
 possible to do so.

 This patch renames all occurrences of the DEBUG macro to MESA_DEBUG,
 and removes workarounds previously used to enable building Mesa with
 LLVM (pop_macro() and push_macro() function calls).

 Please let me know if I missed any.
>>>
>>> I guess at least some in-flight patches (at least my
>>> pipe_mutex_assert_locked() patch, but I guess DEBUG is common enough
>>> that it might effect others).. not sure if there is a better way to
>>> deal with that without things falling through the cracks..  maybe
>>> introduce MESA_DEBUG which is the same as DEBUG first, and then a
>>> later patch to remove DEBUG.  Or at least including sed/etc rule to
>>> re-do the mass-change on a later baseline in the commit msg?
>>>
>>> I don't mind rebasing my patch, just more worried about things falling
>>> through the cracks with other in-progress stuff, since it seems like
>>> the end result would be a silent fail to enable intended debug code..
>>
>> btw, possibly tilting at windmills here, but afaik we don't export
>> DEBUG outside the mesa codebase.. so actually it should be llvm that
>> s/DEBUG/LLVM_DEBUG/
>
>
> I already had the same issue with other libraries/headers as well which
> define DEBUG as something.
>
Out of curiosity: can you give some examples ?

> I clearly agree that those libraries shouldn't do that with such a common
> name, but renaming the Mesa DEBUG define to something more library specific
> would still be a good idea to avoid such problems in the future.
>
> So general approach is Acked-by: Christian König 
>
Note that doing this will likely break things for the VMWare people
since (IIRC) on Windows/MSVC DEBUG is commonly used/set by the
compiler.

Jose can you confirm/dismiss if this will cause issues ?

-Emil
___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/mesa-dev

Re: [Mesa-dev] [PATCH 2/9] vl/util: add copy func for yv12image to nv12surface

2016-07-22 Thread Emil Velikov

Hi Christian,

A couple of small suggestions. Feel free to amend before pushing or
ignore if they're too picky.

On 22 July 2016 at 15:14, Christian König  wrote:
> From: Boyuan Zhang 
>
> Add function to copy from yv12 image to nv12 surface for VAAPI putimage call. 
> We need this function in VaPutImage call where copying from yv12 image to 
> nv12 surface for encoding. Existing function can't be used because it only 
> work for copying from yv12 surface to nv12 image in Vaapi.
>
Please wrap this a bit, just like you've done for the other patches.

> Signed-off-by: Boyuan Zhang 
> ---
>  src/gallium/auxiliary/util/u_video.h | 37 
> 
>  1 file changed, 37 insertions(+)
>
> diff --git a/src/gallium/auxiliary/util/u_video.h 
> b/src/gallium/auxiliary/util/u_video.h
> index 9196afc..86a7adc 100644
> --- a/src/gallium/auxiliary/util/u_video.h
> +++ b/src/gallium/auxiliary/util/u_video.h
> @@ -130,6 +130,43 @@ u_copy_yv12_to_nv12(void *const *destination_data,
>  }
>
>  static inline void
> +u_copy_yv12_img_to_nv12_surf(ubyte *const *src,
> + ubyte *dst,
> + unsigned width,
> + unsigned height,
> + unsigned src_stride,
> + unsigned dst_stride,
> + int field)
> +{
> +   if (field == 0) {
> +  ubyte *src_0 = src[field];
> +  for (int i = 0; i < height ; i++) {
> + memcpy(dst, src_0, width);
> + dst += dst_stride;
> + src_0 += src_stride;
> +  }
> +   } else if (field == 1) {
> +  ubyte *src_1 = src[field];
> +  ubyte *src_2 = src[field+1];
> +  bool odd = false;
> +  for (int i = 0; i < height ; i++) {
> + for (int j = 0; j < width*2 ; j++) {
Please make the i, j unsigned and constify src_X.

Thanks
Emil
___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/mesa-dev

Re: [Mesa-dev] [RFC mesa] Introduce .editorconfig

2016-07-22 Thread Emil Velikov

On 18 July 2016 at 00:46, Eric Engestrom  wrote:
> A few weeks ago, Jose Fonseca suggested [0] we use .editorconfig files
> to try and enforce the formatting of the code, to which Michel Dänzer
> suggested [1] we start by importing the existing .dir-locals.el
> settings.
>
> This is a first draft, with settings from the mentioned emacs files,
> augmented with what I could see was the current practice in various
> submodules.
> Note that it might not reflects the desired formatting. Please say so if
> this is the case. I expect several of these .editorconfig files are in
> this situation, so don't hesitate to just tell me "the current formatting
> for X is wrong, we want to eventually match the project-wide formatting,
> so don't create this .editorconfig".
>
From a quick skim all I believe all of those are fine. We might need
tab_width in the odd place (r600/radeonsi/fdno/haiku), but those are
to be added by people who know the correct value.


> --- /dev/null
> +++ b/.editorconfig
> @@ -0,0 +1,40 @@
> +# To use this config on you editor, follow the instructions on:
> +# http://editorconfig.org
> +
> +root = true
> +
> +[*]
> +charset = utf-8
> +end_of_line = lf
Agree with Jose on this one. While we don't have any \n\r instances
in-tree, this will cause some annoying churn for out-of-tree code
which depend on \n\r.


> +[{Makefile*,*.mk}]
> +indent_style = tab
> +
> +[*.py,SCons*]
Do we want to use {} here, just like the Makefile(s) a couple of lines above ?
Side note: we have a few python scripts which don't end with .py that
we might(?) want to rename.

> +[*.{dsp,dsw,sln,vcproj}]
We don't have any such files in-tree. Considering the above comment
can we drop this ?


> --- /dev/null
> +++ b/src/mesa/drivers/dri/radeon/.editorconfig
> @@ -0,0 +1,3 @@
> +# Coin toss?  Leaving this one empty (ie. global rules) for now because
> +# there are files with 3-spaces, 4-spaces and tab indentations.
> +# Tell me what you'd prefer if not the 3-space style of the rest of Mesa.
IMHO having something is always better than none. So fwiw I'd just
keep this file.

Thanks for going through and adding all of these :-)

-Emil
___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/mesa-dev

Re: [Mesa-dev] [PATCH 07/16] nir/lower_tex: Add support for lowering coordinate offsets

2016-07-22 Thread Jason Ekstrand

On Fri, Jul 22, 2016 at 1:11 AM, Pohjolainen, Topi <
topi.pohjolai...@intel.com> wrote:

> On Thu, Jul 21, 2016 at 09:21:53PM -0700, Jason Ekstrand wrote:
> > On i965, we can't support coordinate offsets for texelFetch or rectangle
> > textures.  Previously, we were doing this with a GLSL pass but we need to
> > do it in NIR if we want those workarounds for SPIR-V.
> >
> > Signed-off-by: Jason Ekstrand 
> > Cc: "12.0" 
> > ---
> >  src/compiler/nir/nir.h   | 10 
> >  src/compiler/nir/nir_lower_tex.c | 54
> 
> >  2 files changed, 64 insertions(+)
> >
> > diff --git a/src/compiler/nir/nir.h b/src/compiler/nir/nir.h
> > index d0f52b0..45f758c 100644
> > --- a/src/compiler/nir/nir.h
> > +++ b/src/compiler/nir/nir.h
> > @@ -2405,6 +2405,16 @@ typedef struct nir_lower_tex_options {
> > unsigned lower_txp;
> >
> > /**
> > +* If true, lower away nir_tex_src_offset for all texelfetch
> instructions.
> > +*/
> > +   bool lower_txf_offset;
> > +
> > +   /**
> > +* If true, lower away nir_tex_src_offset for all rect textures.
> > +*/
> > +   bool lower_rect_offset;
> > +
> > +   /**
> >  * If true, lower rect textures to 2D, using txs to fetch the
> >  * texture dimensions and dividing the texture coords by the
> >  * texture dims to normalize.
> > diff --git a/src/compiler/nir/nir_lower_tex.c
> b/src/compiler/nir/nir_lower_tex.c
> > index 0cf1071..a1280e1 100644
> > --- a/src/compiler/nir/nir_lower_tex.c
> > +++ b/src/compiler/nir/nir_lower_tex.c
> > @@ -128,6 +128,54 @@ project_src(nir_builder *b, nir_tex_instr *tex)
> > tex_instr_remove_src(tex, proj_index);
> >  }
> >
> > +static bool
> > +lower_offset(nir_builder *b, nir_tex_instr *tex)
> > +{
> > +   int offset_index = tex_instr_find_src(tex, nir_tex_src_offset);
>
> Could be 'const'.
>
> > +   if (offset_index < 0)
> > +  return false;
> > +
> > +   int coord_index = tex_instr_find_src(tex, nir_tex_src_coord);
>
> Same here.
>
> > +   assert(coord_index >= 0);
> > +
> > +   assert(tex->src[offset_index].src.is_ssa);
> > +   assert(tex->src[coord_index].src.is_ssa);
> > +   nir_ssa_def *offset = tex->src[offset_index].src.ssa;
> > +   nir_ssa_def *coord = tex->src[coord_index].src.ssa;
>
> In principle, it looks these could be declared constants as well:
>
>   const nir_ssa_def * const offset = tex->src[offset_index].src.ssa;
>   const nir_ssa_def * const coord = tex->src[coord_index].src.ssa;
>
> But further digging tells me that they can be only:
>
>   nir_ssa_def * const offset = tex->src[offset_index].src.ssa;
>   nir_ssa_def * const coord = tex->src[coord_index].src.ssa;
>
>
> Quite a few of the helpers in nir declare their inputs as read-write even
> though they only read the contents...
>

I have tried on multiple occasions to make NIR more const-safe.
Unfortunately, every attempt only seems to lead to frustration.  Some of
this is due to limitations in C (which are fixed in C11 which we can't use)
that make it very hard to make constructs that can handle either const or
non-const.  This, in turn, makes it very hard to constify things.  There is
also the fact that we use a lot of embedded linked lists which make a bunch
of things that look like they should be const non-const.  I've given up.
It's better to not claim any sort of constness than to try and claim it and
have it all be lies.


>
> > +
> > +   b->cursor = nir_before_instr(&tex->instr);
> > +
> > +   nir_ssa_def *offset_coord;
> > +   if (nir_tex_instr_src_type(tex, coord_index) == nir_type_float) {
> > +  assert(tex->sampler_dim == GLSL_SAMPLER_DIM_RECT);
> > +  offset_coord = nir_fadd(b, coord, nir_i2f(b, offset));
> > +   } else {
> > +  offset_coord = nir_iadd(b, coord, offset);
> > +   }
> > +
> > +   if (tex->is_array) {
> > +  /* The offset is not applied to the array index */
> > +  if (tex->coord_components == 2) {
> > + offset_coord = nir_vec2(b, nir_channel(b, offset_coord, 0),
> > +nir_channel(b, coord, 1));
> > +  } else if (tex->coord_components == 3) {
> > + offset_coord = nir_vec3(b, nir_channel(b, offset_coord, 0),
> > +nir_channel(b, offset_coord, 1),
> > +nir_channel(b, coord, 2));
> > +  } else {
> > + unreachable("Invalid number of components");
> > +  }
> > +   }
> > +
> > +   nir_instr_rewrite_src(&tex->instr, &tex->src[coord_index].src,
> > + nir_src_for_ssa(offset_coord));
> > +
> > +   tex_instr_remove_src(tex, offset_index);
> > +
> > +   return true;
> > +}
> > +
> > +
> >  static nir_ssa_def *
> >  get_texture_size(nir_builder *b, nir_tex_instr *tex)
> >  {
> > @@ -458,6 +506,12 @@ nir_lower_tex_block(nir_block *block, nir_builder
> *b,
> >   progress = true;
> >}
> >
> > +  if ((tex->op == nir_texop_txf && options->lower_txf_offset) ||
> > +

Re: [Mesa-dev] [PATCH] nvc0: add support for BGRA8 images

2016-07-22 Thread Samuel Pitoiset




On 07/22/2016 02:14 PM, Ilia Mirkin wrote:

You said earlier that pbo-readpixels-small worked with this patch on
Fermi :( Oh well.



It worked, but maybe an other issue has happened in the meantime :/



On Jul 22, 2016 5:39 AM, "Samuel Pitoiset" mailto:samuel.pitoi...@gmail.com>> wrote:

This patch introduces the following regressions:

bin/fcc-read-to-pbo-after-clear -auto
bin/gl-2.1-pbo -auto -fbo
bin/fbo-pbo-readpixels-small -auto
bin/pbo-read-argb -auto
bin/pbo-readpixels-small -auto
bin/mesa_pack_invert-readpixels -auto -fbo

Tested on Fermi (GF119).

On 07/16/2016 09:09 PM, Ilia Mirkin wrote:

This is useful for pbo downloads, which are now accelerated with
images.
BGRA8 is a moderately common format to do that in.

Signed-off-by: Ilia Mirkin mailto:imir...@alum.mit.edu>>
---

This needs testing on SM20 and SM30. I've tested it on SM35 and

bin/pbo-readpixels-small -auto

worked fine. (Didn't until I properly fixed the various items.)

 src/gallium/drivers/nouveau/codegen/nv50_ir.cpp   |
2 ++
 src/gallium/drivers/nouveau/codegen/nv50_ir.h |
3 +++
 src/gallium/drivers/nouveau/codegen/nv50_ir_from_tgsi.cpp |
2 ++
 src/gallium/drivers/nouveau/codegen/nv50_ir_lowering_nvc0.cpp |
4 
 src/gallium/drivers/nouveau/nv50/g80_defs.xml.h   | 1 +
 src/gallium/drivers/nouveau/nv50/nv50_formats.c   |
3 ++-
 src/gallium/drivers/nouveau/nvc0/nvc0_tex.c   |
2 ++
 7 files changed, 16 insertions(+), 1 deletion(-)

diff --git a/src/gallium/drivers/nouveau/codegen/nv50_ir.cpp
b/src/gallium/drivers/nouveau/codegen/nv50_ir.cpp
index 2caebe8..179ad0b 100644
--- a/src/gallium/drivers/nouveau/codegen/nv50_ir.cpp
+++ b/src/gallium/drivers/nouveau/codegen/nv50_ir.cpp
@@ -1012,6 +1012,8 @@ const struct TexInstruction::ImgFormatDesc
TexInstruction::formatTable[] =
{ "RG8_SNORM",2, {  8,  8,  0,  0 }, SNORM },
{ "R16_SNORM",1, { 16,  0,  0,  0 }, SNORM },
{ "R8_SNORM", 1, {  8,  0,  0,  0 }, SNORM },
+
+   { "BGRA8",4, {  8,  8,  8,  8 }, UNORM, true },
 };

 void
diff --git a/src/gallium/drivers/nouveau/codegen/nv50_ir.h
b/src/gallium/drivers/nouveau/codegen/nv50_ir.h
index 41804b6..6d2ee8b 100644
--- a/src/gallium/drivers/nouveau/codegen/nv50_ir.h
+++ b/src/gallium/drivers/nouveau/codegen/nv50_ir.h
@@ -412,6 +412,8 @@ enum ImgFormat
FMT_R16_SNORM,
FMT_R8_SNORM,

+   FMT_BGRA8,
+
IMG_FORMAT_COUNT,
 };

@@ -967,6 +969,7 @@ public:
   uint8_t components;
   uint8_t bits[4];
   ImgType type;
+  bool bgra;
};

static const struct ImgFormatDesc formatTable[IMG_FORMAT_COUNT];
diff --git
a/src/gallium/drivers/nouveau/codegen/nv50_ir_from_tgsi.cpp
b/src/gallium/drivers/nouveau/codegen/nv50_ir_from_tgsi.cpp
index 61eb7f5..7dff08a 100644
--- a/src/gallium/drivers/nouveau/codegen/nv50_ir_from_tgsi.cpp
+++ b/src/gallium/drivers/nouveau/codegen/nv50_ir_from_tgsi.cpp
@@ -508,6 +508,8 @@ static nv50_ir::ImgFormat
translateImgFormat(uint format)
FMT_CASE(R8G8_SNORM, RG8_SNORM);
FMT_CASE(R16_SNORM, R16_SNORM);
FMT_CASE(R8_SNORM, R8_SNORM);
+
+   FMT_CASE(B8G8R8A8_UNORM, BGRA8);
}

assert(!"Unexpected format");
diff --git
a/src/gallium/drivers/nouveau/codegen/nv50_ir_lowering_nvc0.cpp
b/src/gallium/drivers/nouveau/codegen/nv50_ir_lowering_nvc0.cpp
index 18955eb..92bc0bb 100644
--- a/src/gallium/drivers/nouveau/codegen/nv50_ir_lowering_nvc0.cpp
+++ b/src/gallium/drivers/nouveau/codegen/nv50_ir_lowering_nvc0.cpp
@@ -1940,6 +1940,10 @@
NVC0LoweringPass::convertSurfaceFormat(TexInstruction *su)
  bld.mkCvt(OP_CVT, TYPE_F32, typedDst[i], TYPE_F16,
typedDst[i]);
   }
}
+
+   if (format->bgra) {
+  std::swap(typedDst[0], typedDst[2]);
+   }
 }

 void
diff --git a/src/gallium/drivers/nouveau/nv50/g80_defs.xml.h
b/src/gallium/drivers/nouveau/nv50/g80_defs.xml.h
index 5d40624..49bf860 100644
--- a/src/gallium/drivers/nouveau/nv50/g80_defs.xml.h
+++ b/src/gallium/drivers/nouveau/nv50/g80_defs.xml.h
@@ -177,6 +177,7 @@ WITH THE SOFTWARE OR THE USE OR OTHER
DEALINGS IN THE SOFTWARE.
 #define GK104_IMAGE_FORMAT_RG32_FLOAT
0x000d
 #define GK104_IMAGE_FORMAT_RG32_SIN

Re: [Mesa-dev] [PATCH 06/11] vl/util: add copy func for yv12image to nv12surface

2016-07-22 Thread Andy Furniss


Zhang, Boyuan wrote:

Hi Andy,

I just submitted another patch set.

1. Fixed previously reported regression when using ffmpeg to encode.
2. Fixed I420 "width=720,height=480" garbage output issue.

Please give a try from your side.


Hi,

The width 720 is OK now. Colors are still wrong. It seems from more
testing that it's not just I420, inputting YV12 is also wrong, which
makes me think that the conversions could be producing nv21 rather
than nv12.

I updated my gstreamer to current git just in case it changed something
but it didn't, though they decided to make vaapi264enc default to high, 
so now I have to add ! video/x-h264,profile=baseline ! everywhere.


ffmepeg/avconv no longer hit the division by zero.

There is something a bit strange about their rate control though, it's
like there is a max bitrate per frame limit - maybe they are sending
a min qp as well as bitrate or something, but I think it may be more
complicated that that.

Below this limit bitrate control works as expected with varying 
framerates eg. 2560x1440 24mbit fps 50 and 25 = OK, start going higher

and things start getting limited for the lower fps. The source is not
limiting, qp is high 20s at this rate/fps.

Of course this may just be an ffmpeg/avconv issue when the patches go
in I will ask the author of the vaapi code (who IIRC has a bonaire) so
will be able to test.

Another ffmpeg specific feature is that though the files made play OK,
the JM ref decoder doesn't like them as below. It was always like this
though I can't recall the frame numbering being in 4s IIRC it used to
be paired like gstreamer.

gstreamer doesn't have the POC issue, example output below.

ffmpeg/avconv -

 ./ldecod.exe -i /mnt/ramdisk/av-2425.264 -o /dev/null
Setting Default Parameters...
Parsing Configfile decoder.cfg
...

- JM 19.0 (FRExt) -
--
 Input H.264 bitstream  : /mnt/ramdisk/av-2425.264
 Output decoded YUV : /dev/null
 Input reference file   : test_rec.yuv
--
 Input reference file   : test_rec.yuv does not exist
  SNR values are not available
Profile IDC  : 66
Image Format : 2560x1440 (2560x1440)
Color Format : 4:2:0 (8:8:8)
--
POC must = frame# or field# for SNRs to be correct
--
  Frame  POC  Pic#   QPSnrY SnrU SnrV   Y:U:V Time(ms)
--
0(IDR)0 031 4:2:0 238
0( P )0 131 4:2:0  93
0( P )1 229 4:2:0  84
0( P )1 329 4:2:0  84
1( P )2 429 4:2:0  86
1( P )2 529 4:2:0  86
1( P )3 629 4:2:0  88
1( P )3 729 4:2:0  89
2( P )4 829 4:2:0 165
2( P )4 929 4:2:0  88
2( P )51028 4:2:0 189
2( P )51128 4:2:0 100
3( P )61228 4:2:0 189
3( P )61328 4:2:0 102
output POC must be in ascending order
output POC must be in ascending order
output POC must be in ascending order
output POC must be in ascending order
output POC must be in ascending order
output POC must be in ascending order
output POC must be in ascending order


Gstreamer -

./ldecod.exe -i /mnt/ramdisk/gs-2425.264 -o /dev/null
Setting Default Parameters...
Parsing Configfile decoder.cfg
...

- JM 19.0 (FRExt) -
--
 Input H.264 bitstream  : /mnt/ramdisk/gs-2425.264
 Output decoded YUV : /dev/null
 Input reference file   : test_rec.yuv
--
 Input reference file   : test_rec.yuv does not exist
  SNR values are not available
Profile IDC  : 66
Image Format : 2560x1440 (2560x1440)
Color Format : 4:2:0 (8:8:8)
--
POC must = frame# or field# for SNRs to be correct
---

Re: [Mesa-dev] [PATCH] glsl: subroutine types cannot be compared

2016-07-22 Thread Andres Gomez

On Fri, 2016-07-22 at 06:07 +1000, Dave Airlie wrote:
> > 
> > On Mon, 2016-07-18 at 16:39 +0300, Andres Gomez wrote:
> > > subroutine variables are to be used just in the way functions are
> > > called. Although the spec doesn't say it explicitely, this means that
> > > these variables are not to be used in any other way than those left
> > > for function calls. Therefore, a comparison between 2 subroutine
> > > variables should also cause a compilation error.
> > > 
> > > From The OpenGL® Shading Language 4.40, page 117:
> > > 
> > >   "  To use subroutines, a subroutine type is declared, one or more
> > >  functions are associated with that subroutine type, and a
> > >  subroutine variable of that type is declared. The function
> > >  currently assigned to the variable function is then called by
> > >  using function calling syntax replacing a function name with the
> > >  name of the subroutine variable. Subroutine variables are
> > >  uniforms, and are assigned to specific functions only through
> > >  commands (UniformSubroutinesuiv) in the OpenGL API."
> > > 
> > > From The OpenGL® Shading Language 4.40, page 118:
> > > 
> > >   "  Subroutine uniform variables are called the same way functions
> > >  are called. When a subroutine variable (or an element of a
> > >  subroutine variable array) is associated with a particular
> > >  function, all function calls through that variable will call that
> > >  particular function."
> > > 
> > > Fixes 
> > > GL44-CTS.shader_subroutine.subroutines_cannot_be_assigned_float_int_values_or_be_compared
> > > 
> > > Signed-off-by: Andres Gomez 
> 
> I prefer this one, please push it with Ian's r-b, and consider my one 
> withdrawn.
> 
> Reviewed-by: Dave Airlie 

Thanks, Dave.

Already pushed!
-- 

Br,

Andres
___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/mesa-dev

[Mesa-dev] [PATCH 7/9] st/va: add enviromental variable to disable interlace

2016-07-22 Thread Christian König

From: Boyuan Zhang 

Add environmental variable to disable interlace mode. At VAAPI decoding stage, 
driver can not distinguish b/w pure decoding case and transcoding case. And 
since interlace encoding is not supported, we have to disable interlace for 
transcoding case. The temporary solution is to use enviromental variable to 
disable interlace mode.

Signed-off-by: Boyuan Zhang 
---
 src/gallium/state_trackers/va/surface.c | 4 
 1 file changed, 4 insertions(+)

diff --git a/src/gallium/state_trackers/va/surface.c 
b/src/gallium/state_trackers/va/surface.c
index 8ce4143..63727b6 100644
--- a/src/gallium/state_trackers/va/surface.c
+++ b/src/gallium/state_trackers/va/surface.c
@@ -43,6 +43,8 @@
 
 #include "va_private.h"
 
+DEBUG_GET_ONCE_BOOL_OPTION(nointerlace, "VAAPI_DISABLE_INTERLACE", FALSE);
+
 #include 
 
 static const enum pipe_format vpp_surface_formats[] = {
@@ -620,6 +622,8 @@ vlVaCreateSurfaces2(VADriverContextP ctx, unsigned int 
format,
 
templat.width = width;
templat.height = height;
+   if (debug_get_option_nointerlace())
+  templat.interlaced = false;
 
memset(surfaces, VA_INVALID_ID, num_surfaces * sizeof(VASurfaceID));
 
-- 
2.5.0

___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/mesa-dev

[Mesa-dev] [PATCH 1/9] st/va: add encode entrypoint v2

2016-07-22 Thread Christian König

From: Boyuan Zhang 

VAAPI passes PIPE_VIDEO_ENTRYPOINT_ENCODE as entry point for encoding case. We
will save this encode entry point in config. config_id was used as profile
previously. Now, config has both profile and entrypoint field, and config_id is
used to get the config object. Later on, we pass this entrypoint to
context->templat.entrypoint instead of always hardcoded to
PIPE_VIDEO_ENTRYPOINT_BITSTREAM for decoding case previously. Encode entrypoint
is not accepted by driver until we enable Vaapi encode in later patch.

v2 (chk): fix commit message to match 80 chars, use switch instead of ifs,
  fix memory leaks in the error path, implement 
vlVaQueryConfigEntrypoints
  as well, drop VAEntrypointEncPicture (only used for JPEG).

Signed-off-by: Boyuan Zhang 
Signed-off-by: Christian König 
---
 src/gallium/state_trackers/va/config.c | 111 +
 src/gallium/state_trackers/va/context.c|  59 ---
 src/gallium/state_trackers/va/surface.c|  14 +++-
 src/gallium/state_trackers/va/va_private.h |   5 ++
 4 files changed, 150 insertions(+), 39 deletions(-)

diff --git a/src/gallium/state_trackers/va/config.c 
b/src/gallium/state_trackers/va/config.c
index 9ca0aa8..7742087 100644
--- a/src/gallium/state_trackers/va/config.c
+++ b/src/gallium/state_trackers/va/config.c
@@ -34,6 +34,8 @@
 
 #include "va_private.h"
 
+#include "util/u_handle_table.h"
+
 DEBUG_GET_ONCE_BOOL_OPTION(mpeg4, "VAAPI_MPEG4_ENABLED", false)
 
 VAStatus
@@ -88,10 +90,18 @@ vlVaQueryConfigEntrypoints(VADriverContextP ctx, VAProfile 
profile,
   return VA_STATUS_ERROR_UNSUPPORTED_PROFILE;
 
pscreen = VL_VA_PSCREEN(ctx);
-   if (!pscreen->get_video_param(pscreen, p, PIPE_VIDEO_ENTRYPOINT_BITSTREAM, 
PIPE_VIDEO_CAP_SUPPORTED))
-  return VA_STATUS_ERROR_UNSUPPORTED_PROFILE;
+   if (pscreen->get_video_param(pscreen, p, PIPE_VIDEO_ENTRYPOINT_BITSTREAM,
+   PIPE_VIDEO_CAP_SUPPORTED))
+  entrypoint_list[(*num_entrypoints)++] = VAEntrypointVLD;
 
-   entrypoint_list[(*num_entrypoints)++] = VAEntrypointVLD;
+#if 0
+   if (pscreen->get_video_param(pscreen, p, PIPE_VIDEO_ENTRYPOINT_ENCODE,
+   PIPE_VIDEO_CAP_SUPPORTED))
+  entrypoint_list[(*num_entrypoints)++] = VAEntrypointEncSlice;
+#endif
+
+   if (num_entrypoints == 0)
+  return VA_STATUS_ERROR_UNSUPPORTED_PROFILE;
 
return VA_STATUS_SUCCESS;
 }
@@ -128,29 +138,73 @@ VAStatus
 vlVaCreateConfig(VADriverContextP ctx, VAProfile profile, VAEntrypoint 
entrypoint,
  VAConfigAttrib *attrib_list, int num_attribs, VAConfigID 
*config_id)
 {
+   vlVaDriver *drv;
+   vlVaConfig *config;
struct pipe_screen *pscreen;
enum pipe_video_profile p;
 
if (!ctx)
   return VA_STATUS_ERROR_INVALID_CONTEXT;
 
+   drv = VL_VA_DRIVER(ctx);
+
+   if (!drv)
+  return VA_STATUS_ERROR_INVALID_CONTEXT;
+
+   config = CALLOC(1, sizeof(vlVaConfig));
+   if (!config)
+  return VA_STATUS_ERROR_ALLOCATION_FAILED;
+
if (profile == VAProfileNone && entrypoint == VAEntrypointVideoProc) {
-  *config_id = PIPE_VIDEO_PROFILE_UNKNOWN;
+  config->entrypoint = VAEntrypointVideoProc;
+  config->profile = PIPE_VIDEO_PROFILE_UNKNOWN;
+  pipe_mutex_lock(drv->mutex);
+  *config_id = handle_table_add(drv->htab, config);
+  pipe_mutex_unlock(drv->mutex);
   return VA_STATUS_SUCCESS;
}
 
p = ProfileToPipe(profile);
-   if (p == PIPE_VIDEO_PROFILE_UNKNOWN)
+   if (p == PIPE_VIDEO_PROFILE_UNKNOWN) {
+  FREE(config);
   return VA_STATUS_ERROR_UNSUPPORTED_PROFILE;
+   }
 
pscreen = VL_VA_PSCREEN(ctx);
-   if (!pscreen->get_video_param(pscreen, p, PIPE_VIDEO_ENTRYPOINT_BITSTREAM, 
PIPE_VIDEO_CAP_SUPPORTED))
-  return VA_STATUS_ERROR_UNSUPPORTED_PROFILE;
 
-   if (entrypoint != VAEntrypointVLD)
+   switch (entrypoint) {
+   case VAEntrypointVLD:
+  if (!pscreen->get_video_param(pscreen, p, 
PIPE_VIDEO_ENTRYPOINT_BITSTREAM,
+   PIPE_VIDEO_CAP_SUPPORTED)) {
+ FREE(config);
+ return VA_STATUS_ERROR_UNSUPPORTED_PROFILE;
+  }
+
+  config->entrypoint = PIPE_VIDEO_ENTRYPOINT_BITSTREAM;
+  break;
+
+#if 0
+   case VAEntrypointEncSlice:
+  if (!pscreen->get_video_param(pscreen, p, PIPE_VIDEO_ENTRYPOINT_ENCODE,
+   PIPE_VIDEO_CAP_SUPPORTED)) {
+ FREE(config);
+ return VA_STATUS_ERROR_UNSUPPORTED_PROFILE;
+  }
+
+  config->entrypoint = PIPE_VIDEO_ENTRYPOINT_ENCODE;
+  break;
+#endif
+
+   default:
+  FREE(config);
   return VA_STATUS_ERROR_UNSUPPORTED_ENTRYPOINT;
+   }
 
-   *config_id = p;
+   config->profile = p;
+
+   pipe_mutex_lock(drv->mutex);
+   *config_id = handle_table_add(drv->htab, config);
+   pipe_mutex_unlock(drv->mutex);
 
return VA_STATUS_SUCCESS;
 }
@@ -158,9 +212,27 @@ vlVaCreateConfig(VADriverContextP ctx, VAProfile profile, 
VAEntrypoint entrypoin
 VAStatus
 vlVaDes

1 2 >

1 - 100 of 131 matches

Mail list logo