Re: [Mesa-dev] [PATCH 14/14] ac/gpu_info: add has_read_registers_query

2018-05-08 Thread Nicolai Hähnle

For the series:

Reviewed-by: Nicolai Hähnle 


On 03.05.2018 02:19, Marek Olšák wrote:

From: Marek Olšák 

---
  src/amd/common/ac_gpu_info.c  | 2 ++
  src/amd/common/ac_gpu_info.h  | 1 +
  src/gallium/drivers/radeonsi/si_debug.c   | 5 ++---
  src/gallium/winsys/radeon/drm/radeon_drm_winsys.c | 1 +
  4 files changed, 6 insertions(+), 3 deletions(-)

diff --git a/src/amd/common/ac_gpu_info.c b/src/amd/common/ac_gpu_info.c
index 99f1996b414..a02fb4e4dc4 100644
--- a/src/amd/common/ac_gpu_info.c
+++ b/src/amd/common/ac_gpu_info.c
@@ -331,20 +331,21 @@ bool ac_query_gpu_info(int fd, amdgpu_device_handle dev,
/* SI doesn't support unaligned loads. */
info->has_unaligned_shader_loads = info->chip_class != SI;
/* Disable sparse mappings on SI due to VM faults in CP DMA. Enable 
them once
 * these faults are mitigated in software.
 * Disable sparse mappings on GFX9 due to hangs.
 */
info->has_sparse_vm_mappings =
info->chip_class >= CIK && info->chip_class <= VI &&
info->drm_minor >= 13;
info->has_2d_tiling = true;
+   info->has_read_registers_query = true;
  
  	info->num_render_backends = amdinfo->rb_pipes;

/* The value returned by the kernel driver was wrong. */
if (info->family == CHIP_KAVERI)
info->num_render_backends = 2;
  
  	info->clock_crystal_freq = amdinfo->gpu_counter_freq;

if (!info->clock_crystal_freq) {
fprintf(stderr, "amdgpu: clock crystal frequency is 0, timestamps 
will be wrong\n");
info->clock_crystal_freq = 1;
@@ -491,20 +492,21 @@ void ac_print_gpu_info(struct radeon_info *info)
printf("has_bo_metadata = %u\n", info->has_bo_metadata);
printf("has_gpu_reset_status_query = %u\n", 
info->has_gpu_reset_status_query);
printf("has_gpu_reset_counter_query = %u\n", 
info->has_gpu_reset_counter_query);
printf("has_eqaa_surface_allocator = %u\n", 
info->has_eqaa_surface_allocator);
printf("has_format_bc1_through_bc7 = %u\n", 
info->has_format_bc1_through_bc7);
printf("kernel_flushes_tc_l2_after_ib = %u\n", 
info->kernel_flushes_tc_l2_after_ib);
printf("has_indirect_compute_dispatch = %u\n", 
info->has_indirect_compute_dispatch);
printf("has_unaligned_shader_loads = %u\n", 
info->has_unaligned_shader_loads);
printf("has_sparse_vm_mappings = %u\n", 
info->has_sparse_vm_mappings);
printf("has_2d_tiling = %u\n", info->has_2d_tiling);
+   printf("has_read_registers_query = %u\n", 
info->has_read_registers_query);
  
  	printf("Shader core info:\n");

printf("max_shader_clock = %i\n", info->max_shader_clock);
printf("num_good_compute_units = %i\n", 
info->num_good_compute_units);
printf("max_se = %i\n", info->max_se);
printf("max_sh_per_se = %i\n", info->max_sh_per_se);
  
  	printf("Render backend info:\n");

printf("num_render_backends = %i\n", info->num_render_backends);
printf("num_tile_pipes = %i\n", info->num_tile_pipes);
diff --git a/src/amd/common/ac_gpu_info.h b/src/amd/common/ac_gpu_info.h
index fb44f7c8af4..1201d811361 100644
--- a/src/amd/common/ac_gpu_info.h
+++ b/src/amd/common/ac_gpu_info.h
@@ -102,20 +102,21 @@ struct radeon_info {
boolhas_bo_metadata;
boolhas_gpu_reset_status_query;
boolhas_gpu_reset_counter_query;
boolhas_eqaa_surface_allocator;
boolhas_format_bc1_through_bc7;
boolkernel_flushes_tc_l2_after_ib;
boolhas_indirect_compute_dispatch;
boolhas_unaligned_shader_loads;
boolhas_sparse_vm_mappings;
boolhas_2d_tiling;
+   boolhas_read_registers_query;
  
  	/* Shader cores. */

uint32_tr600_max_quad_pipes; /* wave size / 16 */
uint32_tmax_shader_clock;
uint32_tnum_good_compute_units;
uint32_tmax_se; /* shader engines */
uint32_tmax_sh_per_se; /* shader arrays per shader 
engine */
  
  	/* Render backends (color + depth blocks). */

uint32_tr300_num_gb_pipes;
diff --git a/src/gallium/drivers/radeonsi/si_debug.c 
b/src/gallium/drivers/radeonsi/si_debug.c
index b7d40db21cb..36cbb8866ed 100644
--- a/src/gallium/drivers/radeonsi/si_debug.c
+++ b/src/gallium/drivers/radeonsi/si_debug.c
@@ -287,23 +287,22 @@ static void si_dump_mmapped_reg(struct si_context *sctx, 
FILE *f,
  {
struct radeon_winsys *ws = sctx->ws;
uint32_t value;
  
  	if (ws->read_re

Re: [Mesa-dev] [PATCH 08/16] ac/surface/gfx6: don't overallocate mipmapped HTILE

2018-05-08 Thread Nicolai Hähnle

On 02.05.2018 06:00, Marek Olšák wrote:

From: Marek Olšák 

---
  src/amd/common/ac_surface.c | 9 +++--
  1 file changed, 7 insertions(+), 2 deletions(-)

diff --git a/src/amd/common/ac_surface.c b/src/amd/common/ac_surface.c
index b2af1f70b69..341a7854fe5 100644
--- a/src/amd/common/ac_surface.c
+++ b/src/amd/common/ac_surface.c
@@ -841,22 +841,27 @@ static int gfx6_compute_surface(ADDR_HANDLE addrlib,
 *
 * "dcc_alignment * 4" was determined by trial and error.
 */
surf->dcc_size = align64(surf->surf_size >> 8,
 surf->dcc_alignment * 4);
}
  
  	/* Make sure HTILE covers the whole miptree, because the shader reads

 * TC-compatible HTILE even for levels where it's disabled by DB.
 */
-   if (surf->htile_size && config->info.levels > 1)
-   surf->htile_size *= 2;
+   if (surf->htile_size && config->info.levels > 1 &&
+   surf->flags & RADEON_SURF_TC_COMPATIBLE_HTILE) {
+   surf->htile_size =
+   surf->surf_size * 4 / (8 * 8 * surf->bpe *
+  MAX2(1, config->info.samples));


Can you explain this formula? In particular where the 4 comes from?

Thanks,
Nicolai



+   surf->htile_size = align64(surf->htile_size, 
surf->htile_alignment);
+   }
  
  	surf->is_linear = surf->u.legacy.level[0].mode == RADEON_SURF_MODE_LINEAR_ALIGNED;

surf->is_displayable = surf->is_linear ||
   surf->micro_tile_mode == 
RADEON_MICRO_MODE_DISPLAY ||
   surf->micro_tile_mode == 
RADEON_MICRO_MODE_ROTATED;
return 0;
  }
  
  /* This is only called when expecting a tiled layout. */

  static int




--
Lerne, wie die Welt wirklich ist,
Aber vergiss niemals, wie sie sein sollte.
___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/mesa-dev


Re: [Mesa-dev] [PATCH] virgl: clear render state before submitting clear command

2018-05-08 Thread Gert Wollny
Am Dienstag, den 08.05.2018, 16:38 +1000 schrieb Dave Airlie:
> 
> Care to elaborate here? Nothing in gallium or virgl is really set in
> stone we should be able to engineer new interfaces if needed, they
> just might take some time.

Taking Mareks comment [1] into account, I wouldn't want to try doing
this in gallium directly. Instead. I thought about some forwarding
state tracker that build on top of gallium (like a derived class in C++
terms), that is used by virgl, and that overrides what in gallium is
the st_Clear function, e.g. with a function "stfw_Clear" which would
then look more or less like this:

void stfw_Clear(struct gl_context *ctx, GLbitfield mask)
{
st_validate_state(st, ST_PIPELINE_CLEAR);
st_validate_state(st, ST_PIPELINE_RENDER);
st->pipe->glclear(st->pipe, mask); 
}

glclear would replace clear in virgl (rename to not mix with gallium
names) and send a (new) VIRGL_GLCLEAR command with "mask" to the host
which then directly exectues glClear(mask).

Since the states are properly updated, no further handling would be
needed and all this decoding the mask and putting it back togther on
the host would also go away.

[1] https://lists.freedesktop.org/archives/mesa-dev/2018-May/194343.htm
l

> > Anyway, If you confirm that we should do it on the host side, then
> > I will retract this patch and prepare something for virglrenderer.
> 
> I think since we've fixed similiar on the host side we should
> continue to do so.
I was afraid someone would say this ;) Okay, I'm on it.

best, 
Gert 

___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/mesa-dev


[Mesa-dev] [PATCH 2/2] i965: add {X, A}BGR2101010 to 'intel_image_formats'

2018-05-08 Thread Miguel Casas
This patch adds {X,A}BGR2101010 entries to the list of supported
'intel_image_formats'.

Bug: https://crbug.com/776093
---
 src/mesa/drivers/dri/i965/intel_screen.c | 6 ++
 1 file changed, 6 insertions(+)

diff --git a/src/mesa/drivers/dri/i965/intel_screen.c 
b/src/mesa/drivers/dri/i965/intel_screen.c
index 409f763b64..d3488b9f29 100644
--- a/src/mesa/drivers/dri/i965/intel_screen.c
+++ b/src/mesa/drivers/dri/i965/intel_screen.c
@@ -190,6 +190,12 @@ static const struct intel_image_format 
intel_image_formats[] = {
{ __DRI_IMAGE_FOURCC_XRGB2101010, __DRI_IMAGE_COMPONENTS_RGB, 1,
  { { 0, 0, 0, __DRI_IMAGE_FORMAT_XRGB2101010, 4 } } },
 
+   { __DRI_IMAGE_FOURCC_ABGR2101010, __DRI_IMAGE_COMPONENTS_RGBA, 1,
+ { { 0, 0, 0, __DRI_IMAGE_FORMAT_ABGR2101010, 4 } } },
+
+   { __DRI_IMAGE_FOURCC_XBGR2101010, __DRI_IMAGE_COMPONENTS_RGB, 1,
+ { { 0, 0, 0, __DRI_IMAGE_FORMAT_XBGR2101010, 4 } } },
+
{ __DRI_IMAGE_FOURCC_ARGB, __DRI_IMAGE_COMPONENTS_RGBA, 1,
  { { 0, 0, 0, __DRI_IMAGE_FORMAT_ARGB, 4 } } },
 
-- 
2.17.0.441.gb46fe60e1d-goog

___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/mesa-dev


Re: [Mesa-dev] [PATCH] vulkan/wsi: Only use LINEAR modifier for prime if supported.

2018-05-08 Thread Abel García Dorta
This patch has been

Tested-by: Abel Garcia Dorta 

on top of master
and on top of mesa-18.1.0_rc2 provided by gentoo.

Thank you for fixing it!


2018-05-05 15:34 GMT+02:00 Bas Nieuwenhuizen :
> This was setting the LINEAR modifier if neither the
> X server nor the driver supported modifiers.
>
> Bugzilla: https://bugs.freedesktop.org/show_bug.cgi?id=106180
> Fixes: c80c08e226 "vulkan/wsi/x11: Add support for DRI3 v1.2"
> CC: 18.1 
> CC: Abel Garcia Dorta 
> CC: Daniel Stone 
> ---
>  src/vulkan/wsi/wsi_common.c | 3 ++-
>  src/vulkan/wsi/wsi_common_private.h | 1 +
>  src/vulkan/wsi/wsi_common_x11.c | 3 ++-
>  3 files changed, 5 insertions(+), 2 deletions(-)
>
> diff --git a/src/vulkan/wsi/wsi_common.c b/src/vulkan/wsi/wsi_common.c
> index fe262b4968d..87e508ddf85 100644
> --- a/src/vulkan/wsi/wsi_common.c
> +++ b/src/vulkan/wsi/wsi_common.c
> @@ -442,6 +442,7 @@ fail:
>  VkResult
>  wsi_create_prime_image(const struct wsi_swapchain *chain,
> const VkSwapchainCreateInfoKHR *pCreateInfo,
> +   bool use_modifier,
> struct wsi_image *image)
>  {
> const struct wsi_device *wsi = chain->wsi;
> @@ -626,7 +627,7 @@ wsi_create_prime_image(const struct wsi_swapchain *chain,
> if (result != VK_SUCCESS)
>goto fail;
>
> -   image->drm_modifier = DRM_FORMAT_MOD_LINEAR;
> +   image->drm_modifier = use_modifier ? DRM_FORMAT_MOD_LINEAR : 
> DRM_FORMAT_MOD_INVALID;
> image->num_planes = 1;
> image->sizes[0] = linear_size;
> image->row_pitches[0] = linear_stride;
> diff --git a/src/vulkan/wsi/wsi_common_private.h 
> b/src/vulkan/wsi/wsi_common_private.h
> index b608119b969..90941c8201b 100644
> --- a/src/vulkan/wsi/wsi_common_private.h
> +++ b/src/vulkan/wsi/wsi_common_private.h
> @@ -89,6 +89,7 @@ wsi_create_native_image(const struct wsi_swapchain *chain,
>  VkResult
>  wsi_create_prime_image(const struct wsi_swapchain *chain,
> const VkSwapchainCreateInfoKHR *pCreateInfo,
> +   bool use_modifier,
> struct wsi_image *image);
>
>  void
> diff --git a/src/vulkan/wsi/wsi_common_x11.c b/src/vulkan/wsi/wsi_common_x11.c
> index 3a00caddfb9..62739b99125 100644
> --- a/src/vulkan/wsi/wsi_common_x11.c
> +++ b/src/vulkan/wsi/wsi_common_x11.c
> @@ -1043,7 +1043,8 @@ x11_image_init(VkDevice device_h, struct x11_swapchain 
> *chain,
> uint32_t bpp = 32;
>
> if (chain->base.use_prime_blit) {
> -  result = wsi_create_prime_image(&chain->base, pCreateInfo, 
> &image->base);
> +  bool use_modifier = num_tranches > 0;
> +  result = wsi_create_prime_image(&chain->base, pCreateInfo, 
> use_modifier, &image->base);
> } else {
>result = wsi_create_native_image(&chain->base, pCreateInfo,
> num_tranches, num_modifiers, 
> modifiers,
> --
> 2.17.0
>
___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/mesa-dev


[Mesa-dev] [PATCH 1/2] dri_util: Add R10G10B10{A, X}2 translation between DRI and mesa_format.

2018-05-08 Thread Miguel Casas
Add R10G10B10{A,X}2 translation between mesa_format and DRI format
to driGLFormatToImageFormat() and driImageFormatToGLFormat().

Bug: https://crbug.com/776093
---
 src/mesa/drivers/dri/common/dri_util.c | 8 
 1 file changed, 8 insertions(+)

diff --git a/src/mesa/drivers/dri/common/dri_util.c 
b/src/mesa/drivers/dri/common/dri_util.c
index 7cb6248b13..78c6bbf234 100644
--- a/src/mesa/drivers/dri/common/dri_util.c
+++ b/src/mesa/drivers/dri/common/dri_util.c
@@ -886,6 +886,10 @@ driGLFormatToImageFormat(mesa_format format)
   return __DRI_IMAGE_FORMAT_ARGB2101010;
case MESA_FORMAT_B10G10R10X2_UNORM:
   return __DRI_IMAGE_FORMAT_XRGB2101010;
+   case MESA_FORMAT_R10G10B10A2_UNORM:
+  return __DRI_IMAGE_FORMAT_ABGR2101010;
+   case MESA_FORMAT_R10G10B10X2_UNORM:
+  return __DRI_IMAGE_FORMAT_XBGR2101010;
case MESA_FORMAT_B8G8R8A8_UNORM:
   return __DRI_IMAGE_FORMAT_ARGB;
case MESA_FORMAT_R8G8B8A8_UNORM:
@@ -923,6 +927,10 @@ driImageFormatToGLFormat(uint32_t image_format)
   return MESA_FORMAT_B10G10R10A2_UNORM;
case __DRI_IMAGE_FORMAT_XRGB2101010:
   return MESA_FORMAT_B10G10R10X2_UNORM;
+   case __DRI_IMAGE_FORMAT_ABGR2101010:
+  return MESA_FORMAT_R10G10B10A2_UNORM;
+   case __DRI_IMAGE_FORMAT_XBGR2101010:
+  return MESA_FORMAT_R10G10B10X2_UNORM;
case __DRI_IMAGE_FORMAT_ARGB:
   return MESA_FORMAT_B8G8R8A8_UNORM;
case __DRI_IMAGE_FORMAT_ABGR:
-- 
2.17.0.441.gb46fe60e1d-goog

___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/mesa-dev


[Mesa-dev] [PATCH] i965: add {X, A}BGR2101010 to 'intel_image_formats'

2018-05-08 Thread Miguel Casas
This patch adds {X,A}BGR2101010 entries to the list of supported
'intel_image_formats'.

Bug: https://crbug.com/776093
---
 src/mesa/drivers/dri/i965/intel_screen.c | 6 ++
 1 file changed, 6 insertions(+)

diff --git a/src/mesa/drivers/dri/i965/intel_screen.c 
b/src/mesa/drivers/dri/i965/intel_screen.c
index 409f763b64..d3488b9f29 100644
--- a/src/mesa/drivers/dri/i965/intel_screen.c
+++ b/src/mesa/drivers/dri/i965/intel_screen.c
@@ -190,6 +190,12 @@ static const struct intel_image_format 
intel_image_formats[] = {
{ __DRI_IMAGE_FOURCC_XRGB2101010, __DRI_IMAGE_COMPONENTS_RGB, 1,
  { { 0, 0, 0, __DRI_IMAGE_FORMAT_XRGB2101010, 4 } } },
 
+   { __DRI_IMAGE_FOURCC_ABGR2101010, __DRI_IMAGE_COMPONENTS_RGBA, 1,
+ { { 0, 0, 0, __DRI_IMAGE_FORMAT_ABGR2101010, 4 } } },
+
+   { __DRI_IMAGE_FOURCC_XBGR2101010, __DRI_IMAGE_COMPONENTS_RGB, 1,
+ { { 0, 0, 0, __DRI_IMAGE_FORMAT_XBGR2101010, 4 } } },
+
{ __DRI_IMAGE_FOURCC_ARGB, __DRI_IMAGE_COMPONENTS_RGBA, 1,
  { { 0, 0, 0, __DRI_IMAGE_FORMAT_ARGB, 4 } } },
 
-- 
2.17.0.441.gb46fe60e1d-goog

___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/mesa-dev


Re: [Mesa-dev] [PATCH 00/16] RadeonSI: A bunch of random changes

2018-05-08 Thread Nicolai Hähnle

I don't quite understand patch 8.

All the rest (v2 of patch 2) are:

Reviewed-by: Nicolai Hähnle 


On 02.05.2018 06:00, Marek Olšák wrote:

Hi,

These are pretty random. Please review.

Thanks,
Marek

___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/mesa-dev




--
Lerne, wie die Welt wirklich ist,
Aber vergiss niemals, wie sie sein sollte.
___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/mesa-dev


Re: [Mesa-dev] [PATCH 8/8] i965/miptree: Remove redundant fields from intel_miptree_aux_buffer

2018-05-08 Thread Kenneth Graunke
On Monday, May 7, 2018 12:49:39 PM PDT Jason Ekstrand wrote:
> ---
>  src/mesa/drivers/dri/i965/intel_mipmap_tree.c | 19 +++
>  src/mesa/drivers/dri/i965/intel_mipmap_tree.h | 25 -
>  2 files changed, 7 insertions(+), 37 deletions(-)

Nice to see this done with ISL finally, and good riddance to multiple
atoms, vtable entries, prototypes, and piles of manual OUT_BATCH.
Thanks for taking care of this!

Series is:

Reviewed-by: Kenneth Graunke 


signature.asc
Description: This is a digitally signed message part.
___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/mesa-dev


Re: [Mesa-dev] [PATCH 2/8] i965: Re-order depth/stencil/hiz/clear packets to match ISL

2018-05-08 Thread Pohjolainen, Topi
On Mon, May 07, 2018 at 12:49:33PM -0700, Jason Ekstrand wrote:
> ---
>  src/mesa/drivers/dri/i965/gen6_depth_state.c | 34 
> ++--
>  src/mesa/drivers/dri/i965/gen7_misc_state.c  | 32 +-
>  src/mesa/drivers/dri/i965/gen8_depth_state.c | 28 +++
>  3 files changed, 47 insertions(+), 47 deletions(-)

I have locally re-ordered blorp packets in order to ease comparison of state
dumps between blorp and non-blorp paths. Thanks for doing this properly.

Reviewed-by: Topi Pohjolainen 

> 
> diff --git a/src/mesa/drivers/dri/i965/gen6_depth_state.c 
> b/src/mesa/drivers/dri/i965/gen6_depth_state.c
> index 8a1d580..bca956e 100644
> --- a/src/mesa/drivers/dri/i965/gen6_depth_state.c
> +++ b/src/mesa/drivers/dri/i965/gen6_depth_state.c
> @@ -155,44 +155,44 @@ gen6_emit_depth_stencil_hiz(struct brw_context *brw,
> * failure to do so causes hangs on gen5 and a stall on gen6.
> */
>  
> -  /* Emit hiz buffer. */
> -  if (hiz) {
> - assert(depth_mt);
> +  /* Emit stencil buffer. */
> +  if (separate_stencil) {
> + assert(stencil_mt->format == MESA_FORMAT_S_UINT8);
> + assert(stencil_mt->surf.size > 0);
>  
>   uint32_t offset;
> - isl_surf_get_image_offset_B_tile_sa(&depth_mt->aux_buf->surf,
> + isl_surf_get_image_offset_B_tile_sa(&stencil_mt->surf,
>   lod, 0, 0, &offset, NULL, NULL);
>  
>BEGIN_BATCH(3);
> -  OUT_BATCH((_3DSTATE_HIER_DEPTH_BUFFER << 16) | (3 - 2));
> -  OUT_BATCH(depth_mt->aux_buf->surf.row_pitch - 1);
> -  OUT_RELOC(depth_mt->aux_buf->bo, RELOC_WRITE, offset);
> +  OUT_BATCH((_3DSTATE_STENCIL_BUFFER << 16) | (3 - 2));
> +  OUT_BATCH(stencil_mt->surf.row_pitch - 1);
> +  OUT_RELOC(stencil_mt->bo, RELOC_WRITE, offset);
>ADVANCE_BATCH();
>} else {
>BEGIN_BATCH(3);
> -  OUT_BATCH((_3DSTATE_HIER_DEPTH_BUFFER << 16) | (3 - 2));
> +  OUT_BATCH((_3DSTATE_STENCIL_BUFFER << 16) | (3 - 2));
>OUT_BATCH(0);
>OUT_BATCH(0);
>ADVANCE_BATCH();
>}
>  
> -  /* Emit stencil buffer. */
> -  if (separate_stencil) {
> - assert(stencil_mt->format == MESA_FORMAT_S_UINT8);
> - assert(stencil_mt->surf.size > 0);
> +  /* Emit hiz buffer. */
> +  if (hiz) {
> + assert(depth_mt);
>  
>   uint32_t offset;
> - isl_surf_get_image_offset_B_tile_sa(&stencil_mt->surf,
> + isl_surf_get_image_offset_B_tile_sa(&depth_mt->aux_buf->surf,
>   lod, 0, 0, &offset, NULL, NULL);
>  
>BEGIN_BATCH(3);
> -  OUT_BATCH((_3DSTATE_STENCIL_BUFFER << 16) | (3 - 2));
> -  OUT_BATCH(stencil_mt->surf.row_pitch - 1);
> -  OUT_RELOC(stencil_mt->bo, RELOC_WRITE, offset);
> +  OUT_BATCH((_3DSTATE_HIER_DEPTH_BUFFER << 16) | (3 - 2));
> +  OUT_BATCH(depth_mt->aux_buf->surf.row_pitch - 1);
> +  OUT_RELOC(depth_mt->aux_buf->bo, RELOC_WRITE, offset);
>ADVANCE_BATCH();
>} else {
>BEGIN_BATCH(3);
> -  OUT_BATCH((_3DSTATE_STENCIL_BUFFER << 16) | (3 - 2));
> +  OUT_BATCH((_3DSTATE_HIER_DEPTH_BUFFER << 16) | (3 - 2));
>OUT_BATCH(0);
>OUT_BATCH(0);
>ADVANCE_BATCH();
> diff --git a/src/mesa/drivers/dri/i965/gen7_misc_state.c 
> b/src/mesa/drivers/dri/i965/gen7_misc_state.c
> index 1508473..e3a355f 100644
> --- a/src/mesa/drivers/dri/i965/gen7_misc_state.c
> +++ b/src/mesa/drivers/dri/i965/gen7_misc_state.c
> @@ -137,39 +137,39 @@ gen7_emit_depth_stencil_hiz(struct brw_context *brw,
> OUT_BATCH((depth - 1) << 21);
> ADVANCE_BATCH();
>  
> -   if (!hiz) {
> +   if (stencil_mt == NULL) {
>BEGIN_BATCH(3);
> -  OUT_BATCH(GEN7_3DSTATE_HIER_DEPTH_BUFFER << 16 | (3 - 2));
> +  OUT_BATCH(GEN7_3DSTATE_STENCIL_BUFFER << 16 | (3 - 2));
>OUT_BATCH(0);
>OUT_BATCH(0);
>ADVANCE_BATCH();
> } else {
> -  assert(depth_mt);
> +  stencil_mt->r8stencil_needs_update = true;
> +  const int enabled = devinfo->is_haswell ? HSW_STENCIL_ENABLED : 0;
>  
>BEGIN_BATCH(3);
> -  OUT_BATCH(GEN7_3DSTATE_HIER_DEPTH_BUFFER << 16 | (3 - 2));
> -  OUT_BATCH((mocs << 25) |
> -(depth_mt->aux_buf->pitch - 1));
> -  OUT_RELOC(depth_mt->aux_buf->bo, RELOC_WRITE, 0);
> +  OUT_BATCH(GEN7_3DSTATE_STENCIL_BUFFER << 16 | (3 - 2));
> +  OUT_BATCH(enabled |
> +mocs << 25 |
> + (stencil_mt->surf.row_pitch - 1));
> +  OUT_RELOC(stencil_mt->bo, RELOC_WRITE, 0);
>ADVANCE_BATCH();
> }
>  
> -   if (stencil_mt == NULL) {
> +   if (!hiz) {
>BEGIN_BATCH(3);
> -  OUT_BATCH(GEN7_3DSTATE_STENCIL_BUFFER << 16 | (3 - 2));
> +  OUT_BATCH(GEN7_3DSTATE_HIER_DEPTH_BUFFER << 16 | (3 - 2));
>OUT_BATCH(0);
>OUT_BATCH(0);
>ADVANCE_BATCH();
> } else {
> -  stencil_mt->r8stenc

Re: [Mesa-dev] [PATCH 1/2] intel/isl: Several UNORM formats support typed writes on gen11+

2018-05-08 Thread Kenneth Graunke
On Monday, May 7, 2018 2:56:40 PM PDT Jason Ekstrand wrote:
> ---
>  src/intel/isl/isl_format.c | 26 +-
>  1 file changed, 13 insertions(+), 13 deletions(-)

Awesome, thanks for taking care of this.

Reviewed-by: Kenneth Graunke 


signature.asc
Description: This is a digitally signed message part.
___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/mesa-dev


Re: [Mesa-dev] [PATCH] virgl: clear render state before submitting clear command

2018-05-08 Thread Gert Wollny
Am Montag, den 07.05.2018, 18:25 -0700 schrieb Gurchetan Singh:
> In vrend_clear, we already save and restore colormasks and stencils:
> 
> https://cgit.freedesktop.org/virglrenderer/commit/?id=b75e0a1dabdfbda
> 44c310a69026a9dbd7d980294
> https://cgit.freedesktop.org/virglrenderer/commit/?id=252b00d77c30ce3
> 9608c1a9de18523cbdcaca623
> 
> It would be nice if we can put everything on the host side.  Wouldn't
> the following code also solve the RASTERIZER_DISCARD problem?
> 
> if (ctx->sub->hw_rs_state.rasterizer_discard)
> glDisable(GL_RASTERIZER_DISCARD)
> 
> ...
> 
> glClear(..)
> 
> ...
> 
> if (ctx->sub->hw_rs_state.rasterizer_discard)
> glEnable(GL_RASTERIZER_DISCARD)

Thinking of it, re-enabling this should not be needed, because if
gallium clear is called from the guest this state should be disabled
anyway, the state just didn't get transmitted to the host.

For now I'll add it but leave a comment. 




___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/mesa-dev


Re: [Mesa-dev] gcc bug / crash in ast_type_qualifier::validate_in_qualifier()?

2018-05-08 Thread Eero Tamminen

Hi,

On 08.05.2018 06:45, Matt Turner wrote:

On Mon, May 7, 2018 at 8:02 PM, Brian Paul  wrote:


I don't know when this started happening (I'll try bisecting tomorrow) but
we're seeing a crash in ast_type_qualifier::validate_in_qualifier() in -O3
builds with gcc 5.4.0 on Ubuntu 16.04.

Specifically, at ast_type.cpp:654:

if ((this->flags.i & ~valid_in_mask.flags.i) != 0) {

It seems to be the ~ operator/function which is implemented with an SSE pxor
instruction.

I found that this patch avoids the issue:

diff --git a/src/compiler/glsl/ast.h b/src/compiler/glsl/ast.h
index a1ec0d5..2e518ce 100644
--- a/src/compiler/glsl/ast.h
+++ b/src/compiler/glsl/ast.h
@@ -474,7 +474,7 @@ enum {

  struct ast_type_qualifier {
 DECLARE_RALLOC_CXX_OPERATORS(ast_type_qualifier);
-   DECLARE_BITSET_T(bitset_t, 128);
+   DECLARE_BITSET_T(bitset_t, 96);

 union flags {
struct {

This probably prevents use of xmm instructions, but I haven't inspected the
code.

Is anyone else seeing this?


Yes, it's https://bugs.freedesktop.org/show_bug.cgi?id=105497

I was surprised that we decided it's not worth working around.


By making above part perform worse for everybody using -O3, or by
disabling vectorization optimization (enabled by -O3) just for
the buggy GCC version?

(If that GCC version gets it wrong in this place, it may get it
wrong also elsewhere, so better turn that particular -O3 optimization
off completely.)

Is there an upstream GCC bug report about that, which would tell
which GCC versions are affected?


- Eero
___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/mesa-dev


Re: [Mesa-dev] [PATCH 3/8] anv: move canonical_address calculation into a separate function

2018-05-08 Thread Chris Wilson
Quoting Scott D Phillips (2018-05-08 01:30:45)
> A later patch will make use of this in other places. Also, remove
> dependency on undefined behavior of left-shifting a signed value.

Can it find a home in src/intel/common/gen_gtt.h  (or gen_vma.h,
gen_address.h)?
-Chris
___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/mesa-dev


Re: [Mesa-dev] [PATCH 2/2] loader_dri3: Variant 2: Wait for pending swaps to complete before drawable_fini.

2018-05-08 Thread Michel Dänzer
On 2018-05-05 06:25 AM, Mario Kleiner wrote:
> On Sat, May 5, 2018 at 4:08 AM, Mike Lothian  wrote:
>> I definately saw the steam bug with patch 1 but not with plasmashell,
>> I started seeing it with patch 2 but it seemed to fix itself
> 
> I had two hangs of kwin_x11 within the last 6 hours when alt-tabbing
> between windows, where it got stuck in the
> loader_dri3_swapbuffer_barrier() from patch 1/2. Not sure how that is
> possible, or if the stacktrace was misleading, because i had to VT
> switch to a text console to attach the debugger and this might be just
> a side effect of that. But if it is true, then patch 1/2 would not be
> it. Also 1/2 has a potential performance impact, whereas 2/2 doesn't.
> However 2/2 would also need more work, as i can think of more complex
> scenarios where it would filter the wrong events, although not in the
> case of plasmashell or steam. Probably we'd need to sacrifice a few
> sbc bits in the Present events serial field to transport a unique tag
> for each incarnation of the loader_dri3_drawable, like a mini-hash of
> the draw->eid. Ugly ugly...

How about the below?

Idle notify events shouldn't need special treatment, since the pixmap
XIDs of the buffers will be different between loader_dri3_drawable
incarnations, aren't they?


This still leaves the issue that the SBC moves backwards, which could
theoretically result in hangs with apps using glXWaitForSbcOML. Fixing
that would probably require changing the loader_dri3_drawable lifetime
cycle, which would probably be very invasive, if feasible at all. Maybe
we don't need to care about that for the time being, until there's a
real world app running into it.


diff --git a/src/loader/loader_dri3_helper.c b/src/loader/loader_dri3_helper.c
index 6db8303d26d..f0ff2f07bde 100644
--- a/src/loader/loader_dri3_helper.c
+++ b/src/loader/loader_dri3_helper.c
@@ -370,9 +370,17 @@ dri3_handle_present_event(struct loader_dri3_drawable 
*draw,
* checking for wrap.
*/
   if (ce->kind == XCB_PRESENT_COMPLETE_KIND_PIXMAP) {
- draw->recv_sbc = (draw->send_sbc & 0xLL) | ce->serial;
- if (draw->recv_sbc > draw->send_sbc)
-draw->recv_sbc -= 0x1;
+ uint64_t recv_sbc = (draw->send_sbc & 0xLL) | 
ce->serial;
+
+ /* Only assume wraparound if that results in exactly the previous
+  * SBC + 1, otherwise ignore received SBC > sent SBC (those are
+  * probably from a previous loader_dri3_drawable instance) to avoid
+  * calculating bogus target MSC values in loader_dri3_swap_buffers_msc
+  */
+ if (recv_sbc <= draw->send_sbc)
+draw->recv_sbc = recv_sbc;
+ else if (recv_sbc == (draw->recv_sbc + 0x10001ULL))
+draw->recv_sbc = recv_sbc - 0x1ULL;

  /* When moving from flip to copy, we assume that we can allocate in
   * a more optimal way if we don't need to cater for the display


-- 
Earthling Michel Dänzer   |   http://www.amd.com
Libre software enthusiast | Mesa and X developer
___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/mesa-dev


Re: [Mesa-dev] [PATCH] vulkan/wsi: Only use LINEAR modifier for prime if supported.

2018-05-08 Thread Daniel Stone
On 5 May 2018 at 14:34, Bas Nieuwenhuizen  wrote:
> This was setting the LINEAR modifier if neither the
> X server nor the driver supported modifiers.
>
> Bugzilla: https://bugs.freedesktop.org/show_bug.cgi?id=106180
> Fixes: c80c08e226 "vulkan/wsi/x11: Add support for DRI3 v1.2"
> CC: 18.1 
> CC: Abel Garcia Dorta 
> CC: Daniel Stone 

Acked-by: Daniel Stone 
Reviewed-by: Jason Ekstrand 

Cheers,
Daniel
___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/mesa-dev


[Mesa-dev] [Bug 106283] Shader replacements works only for limited use cases

2018-05-08 Thread bugzilla-daemon
https://bugs.freedesktop.org/show_bug.cgi?id=106283

--- Comment #7 from i...@yahoo.com ---
(In reply to Tapani Pälli from comment #4)
> (In reply to iive from comment #3)
> OK I see .. it looks like it should be pretty straightforward to implement
> this support by sharing the dumping code. Will take a look at this later.

Your words made this seems trivial and something that you could make straight
away.
Yet it's been more than a week without any new development (afaik).

I hope you haven't forgotten about it and you could work on it in reasonable
time frame.

-- 
You are receiving this mail because:
You are the assignee for the bug.___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/mesa-dev


[Mesa-dev] [Bug 106283] Shader replacements works only for limited use cases

2018-05-08 Thread bugzilla-daemon
https://bugs.freedesktop.org/show_bug.cgi?id=106283

Tapani Pälli  changed:

   What|Removed |Added

 CC||lem...@gmail.com

-- 
You are receiving this mail because:
You are the assignee for the bug.___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/mesa-dev


[Mesa-dev] [Bug 106283] Shader replacements works only for limited use cases

2018-05-08 Thread bugzilla-daemon
https://bugs.freedesktop.org/show_bug.cgi?id=106283

--- Comment #8 from Tapani Pälli  ---
(In reply to iive from comment #7)
> (In reply to Tapani Pälli from comment #4)
> > (In reply to iive from comment #3)
> > OK I see .. it looks like it should be pretty straightforward to implement
> > this support by sharing the dumping code. Will take a look at this later.
> 
> Your words made this seems trivial and something that you could make
> straight away.
> Yet it's been more than a week without any new development (afaik).
> 
> I hope you haven't forgotten about it and you could work on it in reasonable
> time frame.

I'm working on something different ATM and would not like to context switch but
will look at this later. If changes from b...@besd.de make sense, it would be
good to land those first. I'm not sure what implications his set has on using
shader-db so I'm hoping some active user of shader-db to comment/review his
series.

-- 
You are receiving this mail because:
You are the assignee for the bug.___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/mesa-dev


Re: [Mesa-dev] [PATCH 3/5] st/mesa: add support for ARB_sample_locations

2018-05-08 Thread Rhys Perry
I can't remember why I memset them to 0x88. I don't think it's
necessary, so I'll remove it.

I think I will memset the pipe_sample_locations_state to zero though
so cso_set_sample_locations() works better.

On Mon, May 7, 2018 at 4:30 PM, Brian Paul  wrote:
> More nit-picks below.
>
>
>
> On 05/04/2018 06:09 AM, Rhys Perry wrote:
>>
>> Signed-off-by: Rhys Perry 
>> ---
>>   src/mesa/state_tracker/st_atom_framebuffer.c | 64
>> 
>>   src/mesa/state_tracker/st_cb_msaa.c  | 22 ++
>>   src/mesa/state_tracker/st_extensions.c   |  1 +
>>   3 files changed, 87 insertions(+)
>>
>> diff --git a/src/mesa/state_tracker/st_atom_framebuffer.c
>> b/src/mesa/state_tracker/st_atom_framebuffer.c
>> index 3ef3ff34a9..bb5f02125f 100644
>> --- a/src/mesa/state_tracker/st_atom_framebuffer.c
>> +++ b/src/mesa/state_tracker/st_atom_framebuffer.c
>> @@ -102,6 +102,68 @@ framebuffer_quantize_num_samples(struct st_context
>> *st, unsigned num_samples)
>>  return quantized_samples;
>>   }
>>   +/**
>> + * Update the pipe_context's sample location state
>> + */
>> +static void
>> +update_sample_locations(struct st_context *st,
>> +const struct pipe_framebuffer_state *fb_state)
>> +{
>> +   struct pipe_sample_locations_state locations;
>> +   struct gl_framebuffer *fb = st->ctx->DrawBuffer;
>> +
>> +   if (!st->ctx->Extensions.ARB_sample_locations)
>> +  return;
>> +
>> +   locations.enabled = fb->ProgrammableSampleLocations;
>> +   if (locations.enabled) {
>> +  unsigned grid_width, grid_height;
>> +  int samples = _mesa_geometric_samples(fb);
>> +  int pixel, sample_index;
>> +  bool sample_location_pixel_grid = fb->SampleLocationPixelGrid;
>> +
>> +  st->pipe->get_sample_pixel_grid(st->pipe, samples, &grid_width,
>> &grid_height);
>> +
>> +  /**
>> +   * when a dimension is greater than MAX_SAMPLE_LOCATION_GRID_SIZE,
>> +   * st->ctx->Driver.GetSamplePixelGrid() returns 1 for both
>> dimensions.
>> +   */
>> +  if (grid_width>MAX_SAMPLE_LOCATION_GRID_SIZE ||
>> +  grid_height>MAX_SAMPLE_LOCATION_GRID_SIZE)
>
>
> Space before/after >
>
>
>> + sample_location_pixel_grid = false;
>> +
>> +  for (pixel = 0; pixel < grid_width * grid_height; pixel++) {
>> + for (sample_index = 0; sample_index < samples; sample_index++) {
>> +int table_index = sample_index;
>> +float x = 0.5f, y = 0.5f;
>> +uint8_t loc;
>> +if (sample_location_pixel_grid)
>> +   table_index = pixel * samples + sample_index;
>> +if (fb->SampleLocationTable) {
>> +   x = fb->SampleLocationTable[table_index*2];
>> +   y = fb->SampleLocationTable[table_index*2+1];
>> +}
>> +if (st->state.fb_orientation == Y_0_BOTTOM)
>> +   y = 1.0 - y;
>> +
>> +loc = roundf(CLAMP(x*16.0f, 0.0f, 15.0f));
>> +loc |= (int)roundf(CLAMP(y*16.0f, 0.0f, 15.0f)) << 4;
>
>
> spaces before/after *
>
>
>> +locations.locations[pixel*samples+sample_index] = loc;
>
>
> Spaces before/after *, +
>
>
>> + }
>> +  }
>> +
>> +  util_sample_locations_flip_y(st->pipe, &locations, fb_state);
>> +   } else {
>> +  /**
>> +   * util_sample_locations_flip_y() initializes unused data to 0x88,
>> so
>> +   * this memset is not useful when locations.enabled is true.
>> +   */
>> +  memset(locations.locations, 0x88, sizeof(locations.locations));
>
>
> OK, what's the significance of 0x88 here and in the previous patch?
>
>
>
>> +   }
>> +
>> +   cso_set_sample_locations(st->cso_context, &locations);
>> +}
>> +
>>   /**
>>* Update framebuffer state (color, depth, stencil, etc. buffers)
>>*/
>> @@ -209,4 +271,6 @@ st_update_framebuffer_state( struct st_context *st )
>>  st->state.fb_num_samples =
>> util_framebuffer_get_num_samples(&framebuffer);
>>  st->state.fb_num_layers =
>> util_framebuffer_get_num_layers(&framebuffer);
>>  st->state.fb_num_cb = framebuffer.nr_cbufs;
>> +
>> +   update_sample_locations(st, &framebuffer);
>>   }
>> diff --git a/src/mesa/state_tracker/st_cb_msaa.c
>> b/src/mesa/state_tracker/st_cb_msaa.c
>> index 7f1b4fde91..092e74d28e 100644
>> --- a/src/mesa/state_tracker/st_cb_msaa.c
>> +++ b/src/mesa/state_tracker/st_cb_msaa.c
>> @@ -56,8 +56,30 @@ st_GetSamplePosition(struct gl_context *ctx,
>>   }
>> +static void
>> +st_GetProgrammableSampleCaps(struct gl_context *ctx, struct
>> gl_framebuffer *fb,
>> + GLuint *outBits, GLuint *outWidth, GLuint
>> *outHeight)
>> +{
>> +   struct st_context *st = st_context(ctx);
>> +
>> +   st_validate_state(st, ST_PIPELINE_UPDATE_FRAMEBUFFER);
>> +
>> +   if (st->pipe->get_sample_pixel_grid)
>> +  st->pipe->get_sample_pixel_grid(st->pipe,
>> _mesa_geometric_samples(fb),
>> +  outWidth, outHeight);
>> +   *outBits = 4;
>> +
>> 

[Mesa-dev] [Bug 103852] Rendering errors when running dolphin-emu with Vulkan backend, radv (Super Smash Bros. Melee)

2018-05-08 Thread bugzilla-daemon
https://bugs.freedesktop.org/show_bug.cgi?id=103852

--- Comment #19 from Ben Clapp  ---
Hello all, I have some insight and fixes for some of the issues described in
this ticket:

First, regarding the "black screen when cropping is turned on issue", this can
be worked around with the following pull request:
https://github.com/dolphin-emu/dolphin/pull/6786

In theory, there shouldn't be anything wrong with negative Y in the viewport,
and you can still see black screen flickering when adjusting the window size,
but with this change to dolphin's code made, the screen will never remain black
after a resize (only flicker for a moment).
So this issue is probably still worth investigating on the mesa side at some
point.

Regarding the strange stuttering issues I was experiencing, this is a CPU-side
issue that has nothing to do with mesa.
The TR 1950X is essentially two Ryzen chips glued together.
The TR 1950X has two memory controllers, and each memory controller is owned by
one of the two Ryzen chips.
So, for example, I have two 16GB RAM cards plugged into the two memory
controllers on my system, and when running "numactl -H", I can see that 16GB of
RAM are assigned to each of the two NUMA nodes.
It seems that the memory allocator (or maybe the scheduler?) in Linux wasn't
properly allocating memory (or maybe processes) to just one of the two physical
chips/just one of the RAM cards, and this resulted in stuttering (perhaps due
to needing to transfer some memory from one RAM card to the other for use by
another process on the other Ryzen chip?)
The stuttering can be prevented by using numactl like this:
numactl --cpunodebind=0 --membind=0 ./dolphin-emu

-- 
You are receiving this mail because:
You are the assignee for the bug.
You are the QA Contact for the bug.___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/mesa-dev


Re: [Mesa-dev] gcc bug / crash in ast_type_qualifier::validate_in_qualifier()?

2018-05-08 Thread Brian Paul

On 05/07/2018 09:45 PM, Matt Turner wrote:

On Mon, May 7, 2018 at 8:02 PM, Brian Paul  wrote:


I don't know when this started happening (I'll try bisecting tomorrow) but
we're seeing a crash in ast_type_qualifier::validate_in_qualifier() in -O3
builds with gcc 5.4.0 on Ubuntu 16.04.

Specifically, at ast_type.cpp:654:

if ((this->flags.i & ~valid_in_mask.flags.i) != 0) {

It seems to be the ~ operator/function which is implemented with an SSE pxor
instruction.

I found that this patch avoids the issue:

diff --git a/src/compiler/glsl/ast.h b/src/compiler/glsl/ast.h
index a1ec0d5..2e518ce 100644
--- a/src/compiler/glsl/ast.h
+++ b/src/compiler/glsl/ast.h
@@ -474,7 +474,7 @@ enum {

  struct ast_type_qualifier {
 DECLARE_RALLOC_CXX_OPERATORS(ast_type_qualifier);
-   DECLARE_BITSET_T(bitset_t, 128);
+   DECLARE_BITSET_T(bitset_t, 96);

 union flags {
struct {

This probably prevents use of xmm instructions, but I haven't inspected the
code.

Is anyone else seeing this?


Yes, it's 
https://urldefense.proofpoint.com/v2/url?u=https-3A__bugs.freedesktop.org_show-5Fbug.cgi-3Fid-3D105497&d=DwIBaQ&c=uilaK90D4TOVoH58JNXRgQ&r=Ie7_encNUsqxbSRbqbNgofw0ITcfE8JKfaUjIQhncGA&m=XjpUGeYdJc6dOmww18kR-siiyCXP1ik8Kmbf8WCZBTw&s=-k8yjJcyWTLYwu0GYXEVsgySdL1AgIW_SvYTKCbPNF8&e=

I was surprised that we decided it's not worth working around.


Yeah.  Thanks for pointing me at that, Matt.

I think this is something we should deal with.  I can't imagine not 
using SSE in this one corner of the compiler would make a measurable 
difference overall.  AFAICT, the code is only hit once per shader input.


I'll post a patch soon.

-Brian
___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/mesa-dev


[Mesa-dev] [PATCH] glsl: change ast_type_qualifier bitset size to work around GCC 5.4 bug

2018-05-08 Thread Brian Paul
Change the size of the bitset from 128 bits to 96.  This works around an
apparent GCC 5.4 bug in which bad SSE code is generated, leading to a
crash in ast_type_qualifier::validate_in_qualifier() (ast_type.cpp:654).

This can be repro'd with the Piglit test tests/spec/glsl-1.50/execution/
varying-struct-basic-gs-fs.shader_test

Bugzilla:https://bugs.freedesktop.org/show_bug.cgi?id=105497
Cc: mesa-sta...@lists.freedesktop.org
---
 src/compiler/glsl/ast.h | 8 +++-
 1 file changed, 7 insertions(+), 1 deletion(-)

diff --git a/src/compiler/glsl/ast.h b/src/compiler/glsl/ast.h
index a1ec0d5..9b88ff5 100644
--- a/src/compiler/glsl/ast.h
+++ b/src/compiler/glsl/ast.h
@@ -474,7 +474,13 @@ enum {
 
 struct ast_type_qualifier {
DECLARE_RALLOC_CXX_OPERATORS(ast_type_qualifier);
-   DECLARE_BITSET_T(bitset_t, 128);
+   /* Note: this bitset needs to have at least as many bits as the 'q'
+* struct has flags, below.  Previously, the size was 128 instead of 96.
+* But an apparent bug in GCC 5.4.0 causes bad SSE code generation
+* elsewhere, leading to a crash.  96 bits works around the issue.
+* See https://bugs.freedesktop.org/show_bug.cgi?id=105497
+*/
+   DECLARE_BITSET_T(bitset_t, 96);
 
union flags {
   struct {
-- 
2.7.4

___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/mesa-dev


Re: [Mesa-dev] [PATCH 1/8] i965: Re-emit depth/stencil/hiz on BRW_NEW_AUX_STATE

2018-05-08 Thread Jason Ekstrand
On Mon, May 7, 2018 at 11:44 PM, Kenneth Graunke 
wrote:

> On Monday, May 7, 2018 12:49:32 PM PDT Jason Ekstrand wrote:
> > ---
> >  src/mesa/drivers/dri/i965/gen7_misc_state.c | 3 ++-
> >  1 file changed, 2 insertions(+), 1 deletion(-)
> >
> > diff --git a/src/mesa/drivers/dri/i965/gen7_misc_state.c
> b/src/mesa/drivers/dri/i965/gen7_misc_state.c
> > index 1ce7658..1508473 100644
> > --- a/src/mesa/drivers/dri/i965/gen7_misc_state.c
> > +++ b/src/mesa/drivers/dri/i965/gen7_misc_state.c
> > @@ -195,7 +195,8 @@ const struct brw_tracked_state gen7_depthbuffer = {
> >.mesa = _NEW_BUFFERS |
> >_NEW_DEPTH |
> >_NEW_STENCIL,
> > -  .brw = BRW_NEW_BATCH |
> > +  .brw = BRW_NEW_AUX_STATE |
> > + BRW_NEW_BATCH |
> >   BRW_NEW_BLORP,
> > },
> > .emit = brw_emit_depthbuffer,
> >
>
> Changes like this warrant an explanation - is this fixing any known
> issues?  Found by inspection?
>
> I was surprised to see this, as BRW_NEW_AUX_STATE is usually about color
> surfaces - CCS_E compression, CCS_D fast clears...not HiZ.  But I see
> that intel_miptree_make_shareable might deal with HiZ...as does depth
> clear values...as does set_aux_state...so it certainly seems plausible.
>
> Still, I'm curious to know if you were thinking of anything specific.
>

No, I don't know of anything specific.  I found it while I was working on
the rest of this series and thought it seemed off.  In particular, if we do
a trivial depth clear clear (which doesn't actually clear but just changes
the clear color), we may not get 3DSTATE_CLEAR_PARAMS updated.
___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/mesa-dev


Re: [Mesa-dev] [PATCH] anv: ignore pColorBlendState if all color attachments of the subpass are unused

2018-05-08 Thread Jason Ekstrand
On Mon, May 7, 2018 at 11:42 PM, Samuel Iglesias Gonsálvez <
sigles...@igalia.com> wrote:

> On 07/05/18 16:46, Jason Ekstrand wrote:
>
> Reviewed-by: Jason Ekstrand 
>
>
> Thanks.
>
> Have you audited to ensure that we don't actually use it in this case?
>
>
> I checked its usage in the driver. There is no problem except in one call
> to has_color_buffer_write_enable() that could be problematic. If you
> agree, I will add this hunk to this patch to avoid any problem:
>
> diff --git a/src/intel/vulkan/genX_pipeline.c b/src/intel/vulkan/genX_
> pipeline.c
> index d3af9304ba3..6016d257584 100644
> --- a/src/intel/vulkan/genX_pipeline.c
> +++ b/src/intel/vulkan/genX_pipeline.c
> @@ -1361,7 +1361,7 @@ has_color_buffer_write_enabled(const struct
> anv_pipeline *pipeline,
>if (binding->index == UINT32_MAX)
>   continue;
>
> -  if (blend->pAttachments[binding->index].colorWriteMask != 0)
> +  if (blend && blend->pAttachments[binding->index].colorWriteMask !=
> 0)
>   return true;
> }
>
> What do you think?
>

Sounds good.

--Jason


> Sam
>
>
>
> On Mon, May 7, 2018 at 1:01 AM, Samuel Iglesias Gonsálvez <
> sigles...@igalia.com> wrote:
>
>> According to Vulkan spec:
>>
>>   "pColorBlendState is a pointer to an instance of the
>>VkPipelineColorBlendStateCreateInfo structure, and is ignored if the
>>pipeline has rasterization disabled or if the subpass of the render
>> pass the
>>pipeline is created against does not use any color attachments."
>>
>> Fixes tests from CL#2505:
>>
>>dEQP-VK.renderpass.*.simple.color_unused_omit_blend_state
>>
>> Signed-off-by: Samuel Iglesias Gonsálvez 
>> ---
>>  src/intel/vulkan/anv_pipeline.c | 14 --
>>  1 file changed, 12 insertions(+), 2 deletions(-)
>>
>> diff --git a/src/intel/vulkan/anv_pipeline.c
>> b/src/intel/vulkan/anv_pipeline.c
>> index 87788de10a5..8f30136b100 100644
>> --- a/src/intel/vulkan/anv_pipeline.c
>> +++ b/src/intel/vulkan/anv_pipeline.c
>> @@ -1247,8 +1247,18 @@ anv_pipeline_validate_create_info(const
>> VkGraphicsPipelineCreateInfo *info)
>>if (subpass && subpass->depth_stencil_attachment.attachment !=
>> VK_ATTACHMENT_UNUSED)
>>   assert(info->pDepthStencilState);
>>
>> -  if (subpass && subpass->color_count > 0)
>> - assert(info->pColorBlendState);
>> +  if (subpass && subpass->color_count > 0) {
>> + bool all_color_unused = true;
>> + for (int i = 0; i < subpass->color_count; i++) {
>> +if (subpass->color_attachments[i].attachment !=
>> VK_ATTACHMENT_UNUSED)
>> +   all_color_unused = false;
>> + }
>> + /* pColorBlendState is ignored if the pipeline has rasterization
>> +  * disabled or if the subpass of the render pass the pipeline is
>> +  * created against does not use any color attachments.
>> +  */
>> + assert(info->pColorBlendState || all_color_unused);
>> +  }
>> }
>>
>> for (uint32_t i = 0; i < info->stageCount; ++i) {
>> --
>> 2.17.0
>>
>> ___
>> mesa-dev mailing list
>> mesa-dev@lists.freedesktop.org
>> https://lists.freedesktop.org/mailman/listinfo/mesa-dev
>>
>
>
>
___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/mesa-dev


Re: [Mesa-dev] [PATCH 1/8] i965: Re-emit depth/stencil/hiz on BRW_NEW_AUX_STATE

2018-05-08 Thread Kenneth Graunke
On Tuesday, May 8, 2018 8:07:36 AM PDT Jason Ekstrand wrote:
> On Mon, May 7, 2018 at 11:44 PM, Kenneth Graunke 
> wrote:
> 
> > On Monday, May 7, 2018 12:49:32 PM PDT Jason Ekstrand wrote:
> > > ---
> > >  src/mesa/drivers/dri/i965/gen7_misc_state.c | 3 ++-
> > >  1 file changed, 2 insertions(+), 1 deletion(-)
> > >
> > > diff --git a/src/mesa/drivers/dri/i965/gen7_misc_state.c
> > b/src/mesa/drivers/dri/i965/gen7_misc_state.c
> > > index 1ce7658..1508473 100644
> > > --- a/src/mesa/drivers/dri/i965/gen7_misc_state.c
> > > +++ b/src/mesa/drivers/dri/i965/gen7_misc_state.c
> > > @@ -195,7 +195,8 @@ const struct brw_tracked_state gen7_depthbuffer = {
> > >.mesa = _NEW_BUFFERS |
> > >_NEW_DEPTH |
> > >_NEW_STENCIL,
> > > -  .brw = BRW_NEW_BATCH |
> > > +  .brw = BRW_NEW_AUX_STATE |
> > > + BRW_NEW_BATCH |
> > >   BRW_NEW_BLORP,
> > > },
> > > .emit = brw_emit_depthbuffer,
> > >
> >
> > Changes like this warrant an explanation - is this fixing any known
> > issues?  Found by inspection?
> >
> > I was surprised to see this, as BRW_NEW_AUX_STATE is usually about color
> > surfaces - CCS_E compression, CCS_D fast clears...not HiZ.  But I see
> > that intel_miptree_make_shareable might deal with HiZ...as does depth
> > clear values...as does set_aux_state...so it certainly seems plausible.
> >
> > Still, I'm curious to know if you were thinking of anything specific.
> >
> 
> No, I don't know of anything specific.  I found it while I was working on
> the rest of this series and thought it seemed off.  In particular, if we do
> a trivial depth clear clear (which doesn't actually clear but just changes
> the clear color), we may not get 3DSTATE_CLEAR_PARAMS updated.
> 

Ahh, right, that includes 3DSTATE_CLEAR_PARAMS...we probably do want
this then.  Maybe record that and some hand waving in the commit
message.


signature.asc
Description: This is a digitally signed message part.
___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/mesa-dev


Re: [Mesa-dev] [PATCH 1/8] i965: Re-emit depth/stencil/hiz on BRW_NEW_AUX_STATE

2018-05-08 Thread Jason Ekstrand
On Tue, May 8, 2018 at 8:19 AM, Kenneth Graunke 
wrote:

> On Tuesday, May 8, 2018 8:07:36 AM PDT Jason Ekstrand wrote:
> > On Mon, May 7, 2018 at 11:44 PM, Kenneth Graunke 
> > wrote:
> >
> > > On Monday, May 7, 2018 12:49:32 PM PDT Jason Ekstrand wrote:
> > > > ---
> > > >  src/mesa/drivers/dri/i965/gen7_misc_state.c | 3 ++-
> > > >  1 file changed, 2 insertions(+), 1 deletion(-)
> > > >
> > > > diff --git a/src/mesa/drivers/dri/i965/gen7_misc_state.c
> > > b/src/mesa/drivers/dri/i965/gen7_misc_state.c
> > > > index 1ce7658..1508473 100644
> > > > --- a/src/mesa/drivers/dri/i965/gen7_misc_state.c
> > > > +++ b/src/mesa/drivers/dri/i965/gen7_misc_state.c
> > > > @@ -195,7 +195,8 @@ const struct brw_tracked_state gen7_depthbuffer
> = {
> > > >.mesa = _NEW_BUFFERS |
> > > >_NEW_DEPTH |
> > > >_NEW_STENCIL,
> > > > -  .brw = BRW_NEW_BATCH |
> > > > +  .brw = BRW_NEW_AUX_STATE |
> > > > + BRW_NEW_BATCH |
> > > >   BRW_NEW_BLORP,
> > > > },
> > > > .emit = brw_emit_depthbuffer,
> > > >
> > >
> > > Changes like this warrant an explanation - is this fixing any known
> > > issues?  Found by inspection?
> > >
> > > I was surprised to see this, as BRW_NEW_AUX_STATE is usually about
> color
> > > surfaces - CCS_E compression, CCS_D fast clears...not HiZ.  But I see
> > > that intel_miptree_make_shareable might deal with HiZ...as does depth
> > > clear values...as does set_aux_state...so it certainly seems plausible.
> > >
> > > Still, I'm curious to know if you were thinking of anything specific.
> > >
> >
> > No, I don't know of anything specific.  I found it while I was working on
> > the rest of this series and thought it seemed off.  In particular, if we
> do
> > a trivial depth clear clear (which doesn't actually clear but just
> changes
> > the clear color), we may not get 3DSTATE_CLEAR_PARAMS updated.
> >
>
> Ahh, right, that includes 3DSTATE_CLEAR_PARAMS...we probably do want
> this then.  Maybe record that and some hand waving in the commit
> message.
>

I've added the following:

Certain things can change the aux usage or fast clear color of a depth
surface and we want to re-emit if that happens.  For instance, if you do
a fast depth clear of an already clear depth surface, we will just set
the clear color and not do anything else.  In that case, we could fail
to re-emit 3DSTATE_CLEAR_PARAMS and not get the new fast-clear color.
___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/mesa-dev


[Mesa-dev] [Bug 106157] [Tracker] Mesa 18.1 release tracker

2018-05-08 Thread bugzilla-daemon
https://bugs.freedesktop.org/show_bug.cgi?id=106157
Bug 106157 depends on bug 105938, which changed state.

Bug 105938 Summary: Incorrect colors since "i965: Use blorp instead of meta for 
PBO texture downloads"
https://bugs.freedesktop.org/show_bug.cgi?id=105938

   What|Removed |Added

 Status|NEEDINFO|RESOLVED
 Resolution|--- |WORKSFORME

-- 
You are receiving this mail because:
You are the assignee for the bug.
You are the QA Contact for the bug.___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/mesa-dev


Re: [Mesa-dev] [PATCH] glsl: change ast_type_qualifier bitset size to work around GCC 5.4 bug

2018-05-08 Thread Charmaine Lee

Looks good.
Also verified that the workaround fixes the crashes that are seen with VMware 
svga driver due to this bug.

Reviewed-by: Charmaine Lee 


From: Brian Paul 
Sent: Tuesday, May 8, 2018 7:43:49 AM
To: mesa-dev@lists.freedesktop.org
Cc: Matt Turner; Kenneth Graunke; Charmaine Lee; 
mesa-sta...@lists.freedesktop.org
Subject: [PATCH] glsl: change ast_type_qualifier bitset size to work around GCC 
5.4 bug

Change the size of the bitset from 128 bits to 96.  This works around an
apparent GCC 5.4 bug in which bad SSE code is generated, leading to a
crash in ast_type_qualifier::validate_in_qualifier() (ast_type.cpp:654).

This can be repro'd with the Piglit test tests/spec/glsl-1.50/execution/
varying-struct-basic-gs-fs.shader_test

Bugzilla:https://bugs.freedesktop.org/show_bug.cgi?id=105497
Cc: mesa-sta...@lists.freedesktop.org
---
 src/compiler/glsl/ast.h | 8 +++-
 1 file changed, 7 insertions(+), 1 deletion(-)

diff --git a/src/compiler/glsl/ast.h b/src/compiler/glsl/ast.h
index a1ec0d5..9b88ff5 100644
--- a/src/compiler/glsl/ast.h
+++ b/src/compiler/glsl/ast.h
@@ -474,7 +474,13 @@ enum {

 struct ast_type_qualifier {
DECLARE_RALLOC_CXX_OPERATORS(ast_type_qualifier);
-   DECLARE_BITSET_T(bitset_t, 128);
+   /* Note: this bitset needs to have at least as many bits as the 'q'
+* struct has flags, below.  Previously, the size was 128 instead of 96.
+* But an apparent bug in GCC 5.4.0 causes bad SSE code generation
+* elsewhere, leading to a crash.  96 bits works around the issue.
+* See https://bugs.freedesktop.org/show_bug.cgi?id=105497
+*/
+   DECLARE_BITSET_T(bitset_t, 96);

union flags {
   struct {
--
2.7.4

___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/mesa-dev


Re: [Mesa-dev] [PATCH 03/10] util: Make vma.c support non-power-of-two alignments.

2018-05-08 Thread Scott D Phillips
Kenneth Graunke  writes:

> I want to use this in a bucketing allocator for i965.

Reviewed-by: Scott D Phillips 

> ---
>  src/util/vma.c | 3 +--
>  1 file changed, 1 insertion(+), 2 deletions(-)
>
> diff --git a/src/util/vma.c b/src/util/vma.c
> index 3d61f6969ed..d6ee05988ef 100644
___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/mesa-dev


Re: [Mesa-dev] [PATCH 04/10] i965: Dump validation list on INTEL_DEBUG=bat, submit.

2018-05-08 Thread Scott D Phillips
Kenneth Graunke  writes:

> This is really useful when debugging any sort of buffer management
> issues, so just printing it during INTEL_DEBUG=bat,submit seems
> reasonable.  With bat, we're already spamming so much output that
> it doesn't really hurt.  With submit, it's still easy to grep for
> the older information, and the new information is nice too.

Reviewed-by: Scott D Phillips 

> ---
>  src/mesa/drivers/dri/i965/intel_batchbuffer.c | 4 +++-
>  1 file changed, 3 insertions(+), 1 deletion(-)
>
___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/mesa-dev


Re: [Mesa-dev] [PATCH 05/10] i965: Introduce a "memory zone" concept on BO allocation.

2018-05-08 Thread Scott D Phillips
Kenneth Graunke  writes:

> We're planning to start managing the PPGTT in userspace in the near
> future, rather than relying on the kernel to assign addresses.  While
> most buffers can go anywhere, some need to be restricted to within 4GB
> of a base address.
>
> This commit adds a "memory zone" parameter to the BO allocation
> functions, which lets the caller specify which base address the BO will
> be associated with, or BRW_MEMZONE_OTHER for the full 48-bit VMA.

As an aside, in anv I implemented OTHER to be something more like ANY,
where it allocates from either the high or low vma ranges. That's only
relevant though because I apply a size restriction to the 'high'
range. There's no such restriction here, so no need to have high
allocations try the low range if they fail.

Reviewed-by: Scott D Phillips 

> ---
>  src/mesa/drivers/dri/i965/brw_blorp.c |  3 +-
>  src/mesa/drivers/dri/i965/brw_bufmgr.c| 20 ++
>  src/mesa/drivers/dri/i965/brw_bufmgr.h| 40 ++-
>  src/mesa/drivers/dri/i965/brw_context.h   |  1 +
>  .../drivers/dri/i965/brw_performance_query.c  |  5 ++-
>  src/mesa/drivers/dri/i965/brw_pipe_control.c  |  3 +-
>  src/mesa/drivers/dri/i965/brw_program.c   |  8 ++--
>  src/mesa/drivers/dri/i965/brw_program_cache.c |  6 ++-
>  src/mesa/drivers/dri/i965/brw_queryobj.c  |  8 ++--
>  src/mesa/drivers/dri/i965/gen6_queryobj.c |  3 +-
>  src/mesa/drivers/dri/i965/gen6_sol.c  |  6 ++-
>  src/mesa/drivers/dri/i965/intel_batchbuffer.c | 15 ---
>  .../drivers/dri/i965/intel_buffer_objects.c   |  8 ++--
>  src/mesa/drivers/dri/i965/intel_mipmap_tree.c |  7 +++-
>  src/mesa/drivers/dri/i965/intel_screen.c  |  8 ++--
>  src/mesa/drivers/dri/i965/intel_upload.c  |  3 +-
>  16 files changed, 107 insertions(+), 37 deletions(-)
>
> For what it's worth, I have a prototype that has separate memzones
> for Instruction Base Address, Surface State Base Address, Dynamic State
> Base Address, and "other" for the rest of the VMA.  It's worked out very
> nicely, so I feel pretty confident that this is a good approach.
___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/mesa-dev


Re: [Mesa-dev] [Mesa-stable] [PATCH 2/2] eg/compute: Drop reference to kernel_param bo in destructor

2018-05-08 Thread Mark Janes
Hi Jan,


Jan Vesely  writes:

> CC: 
> Signed-off-by: Jan Vesely 
> ---
>  src/gallium/drivers/r600/evergreen_compute.c | 1 +
>  1 file changed, 1 insertion(+)
>
> diff --git a/src/gallium/drivers/r600/evergreen_compute.c 
> b/src/gallium/drivers/r600/evergreen_compute.c
> index 027930b586..5070243914 100644
> --- a/src/gallium/drivers/r600/evergreen_compute.c
> +++ b/src/gallium/drivers/r600/evergreen_compute.c
> @@ -463,6 +463,7 @@ static void evergreen_delete_compute_state(struct 
> pipe_context *ctx, void *state
>  #ifdef HAVE_OPENCL
>   radeon_shader_binary_clean(&shader->binary);
>   pipe_resource_reference(&shader->code_bo, NULL);

The stable branches do not have this ^^^ pipe_resource_reference call,
so the patch does not apply.  Can you make a proper backport of the fix
to clarify your intentions for stable?

> + pipe_resource_reference(&shader->kernel_param, NULL);
>  #endif
>   r600_destroy_shader(&shader->bc);
>   }
> -- 
> 2.17.0
>
> ___
> mesa-stable mailing list
> mesa-sta...@lists.freedesktop.org
> https://lists.freedesktop.org/mailman/listinfo/mesa-stable
___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/mesa-dev


[Mesa-dev] [Bug 106157] [Tracker] Mesa 18.1 release tracker

2018-05-08 Thread bugzilla-daemon
https://bugs.freedesktop.org/show_bug.cgi?id=106157
Bug 106157 depends on bug 106180, which changed state.

Bug 106180 Summary: [bisected] radv vulkan smoke test black screen (Add support 
for DRI3 v1.2)
https://bugs.freedesktop.org/show_bug.cgi?id=106180

   What|Removed |Added

 Status|NEW |RESOLVED
 Resolution|--- |FIXED

-- 
You are receiving this mail because:
You are the assignee for the bug.
You are the QA Contact for the bug.___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/mesa-dev


[Mesa-dev] [Bug 106180] [bisected] radv vulkan smoke test black screen (Add support for DRI3 v1.2)

2018-05-08 Thread bugzilla-daemon
https://bugs.freedesktop.org/show_bug.cgi?id=106180

Jason Ekstrand  changed:

   What|Removed |Added

 Status|NEW |RESOLVED
 Resolution|--- |FIXED

--- Comment #18 from Jason Ekstrand  ---
This should be fixed by the following commit on master:

commit b17cfb08a3fc9a599eff64fffe48daba398a672f
Author: Bas Nieuwenhuizen 
Date:   Sat May 5 15:34:44 2018 +0200

vulkan/wsi: Only use LINEAR modifier for prime if supported.

This was setting the LINEAR modifier if neither the
X server nor the driver supported modifiers.

Bugzilla: https://bugs.freedesktop.org/show_bug.cgi?id=106180
Fixes: c80c08e226 "vulkan/wsi/x11: Add support for DRI3 v1.2"
CC: 18.1 
Tested-by: Abel Garcia Dorta 
Acked-by: Daniel Stone 
Reviewed-by: Jason Ekstrand 

-- 
You are receiving this mail because:
You are the assignee for the bug.___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/mesa-dev


Re: [Mesa-dev] [RFC] Fix attempt for Mesa + X-Server 1.20 + modesetting-ddx hangs on KDE5.

2018-05-08 Thread Michel Dänzer
On 2018-05-08 06:41 PM, Adam Jackson wrote:
> On Fri, 2018-05-04 at 15:45 +0200, Mario Kleiner wrote:
> 
>> The real problem, if i understand it correctly, is the way the life-time
>> of dri3_drawables and loader_dri3_drawables is managed atm. by Mesa's
>> bindContext() functions. Whenever glXMakeCurrent() etc. are called to
>> assign new/different GLXDrawables to the same context (ie. one context
>> reused for drawing into many different drawables, as opposed to using
>> one dedicated context for each drawable), we destroy the underlying
>> DRIDrawables/dri3_drawables_loader_dri3_drawables and they lose all
>> state wrt. pending bufferswaps, msc, sbc, ust.
> 
> That's utterly, utterly, utterly broken.
> 
>> Therefore one of these patches is either a good enough fix for the KDE
>> hang problems atm. or a diagnosis of the problem as a starting point for
>> brighter people to deal with the root cause ;-)
> 
> I'll see what I can come up with. I'm not sure there's a great fix for
> this that doesn't involve a few more roundtrips at MakeCurrent time,
> since we can lose drawables asynchronously, but such is life.

I had an idea, at least for SBC:

In dri3_destroy_drawable, store the drawable's send_sbc value in a hash
table (keyed on the XID) in struct dri3_screen. Then in
dri3_create_drawable, if there's an entry for the drawable's XID in the
hash table, initialize send_sbc and recv_sbc to that.

If nobody beats me to it, I'll try this tomorrow.


-- 
Earthling Michel Dänzer   |   http://www.amd.com
Libre software enthusiast | Mesa and X developer
___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/mesa-dev


Re: [Mesa-dev] [PATCH 06/10] i965: Add virtual memory allocator infrastructure to brw_bufmgr.

2018-05-08 Thread Scott D Phillips
Kenneth Graunke  writes:

> This introduces a new fast virtual memory allocator integrated with our
> BO cache bucketing.  For larger objects, it falls back to the simple
> free-list allocator (util_vma).
>
> This puts the allocators in place but doesn't enable softpin yet.
> ---
>  src/mesa/drivers/dri/i965/brw_bufmgr.c | 291 -
>  src/mesa/drivers/dri/i965/brw_bufmgr.h |   2 +
>  2 files changed, 292 insertions(+), 1 deletion(-)
>
> I'm happy to write more comments here.  It's a pretty simple system, but
> not necessarily the most intuitive.  Feel free to ask questions.
>
> diff --git a/src/mesa/drivers/dri/i965/brw_bufmgr.c 
> b/src/mesa/drivers/dri/i965/brw_bufmgr.c
> index 66828f319be..07c0d2f7633 100644
> --- a/src/mesa/drivers/dri/i965/brw_bufmgr.c
> +++ b/src/mesa/drivers/dri/i965/brw_bufmgr.c
> @@ -60,6 +60,8 @@
>  #include "util/macros.h"
>  #include "util/hash_table.h"
>  #include "util/list.h"
> +#include "util/u_dynarray.h"
> +#include "util/vma.h"
>  #include "brw_bufmgr.h"
>  #include "brw_context.h"
>  #include "string.h"
> @@ -98,9 +100,40 @@ atomic_add_unless(int *v, int add, int unless)
> return c == unless;
>  }
>  
> +/**
> + * i965 fixed-size bucketing VMA allocator.
> + *
> + * The BO cache maintains "cache buckets" for buffers of various sizes.
> + * All buffers in a given bucket are identically sized - when allocating,
> + * we always round up to the bucket size.  This means that virtually all
> + * allocations are fixed-size; only buffers which are too large to fit in
> + * a bucket can be variably-sized.
> + *
> + * We create an allocator for each bucket.  Each contains a free-list, where
> + * each node contains a  pair.  Each bit
> + * represents a bucket-sized block of memory.  (At the first level, each
> + * bit corresponds to a page.  For the second bucket, bits correspond to
> + * two pages, and so on.)  1 means a block is free, and 0 means it's in-use.

maybe add in "the lowest bit in the bitmap is for the first block"

> + *
> + * This makes allocations cheap - any bit of any node will do.  We can pick
> + * the head of the list and use ffs() to find a free block.  If there are
> + * none, we allocate 64 blocks from a larger allocator - either a bigger
> + * bucketing allocator, or a fallback top-level allocator for large objects.
> + */
> +struct vma_bucket_node {
> +   uint64_t start_address;
> +   uint64_t bitmap;
> +};
> +
>  struct bo_cache_bucket {
> +   /** List of cached BOs. */
> struct list_head head;
> +
> +   /** Size of this bucket, in bytes. */
> uint64_t size;
> +
> +   /** List of vma_bucket_nodes. */
> +   struct util_dynarray vma_list[BRW_MEMZONE_COUNT];
>  };
>  
>  struct brw_bufmgr {
> @@ -116,6 +149,8 @@ struct brw_bufmgr {
> struct hash_table *name_table;
> struct hash_table *handle_table;
>  
> +   struct util_vma_heap vma_allocator[BRW_MEMZONE_COUNT];
> +
> bool has_llc:1;
> bool has_mmap_wc:1;
> bool bo_reuse:1;
> @@ -128,6 +163,10 @@ static int bo_set_tiling_internal(struct brw_bo *bo, 
> uint32_t tiling_mode,
>  
>  static void bo_free(struct brw_bo *bo);
>  
> +static uint64_t __vma_alloc(struct brw_bufmgr *bufmgr,
> +enum brw_memory_zone memzone,
> +uint64_t size, uint64_t alignment);
> +
>  static uint32_t
>  key_hash_uint(const void *key)
>  {
> @@ -222,6 +261,198 @@ bucket_for_size(struct brw_bufmgr *bufmgr, uint64_t 
> size)
>&bufmgr->cache_bucket[index] : NULL;
>  }
>  
> +static enum brw_memory_zone
> +memzone_for_address(uint64_t address)
> +{
> +   const uint64_t _4GB = 1ull << 32;
> +
> +   if (address >= _4GB)
> +  return BRW_MEMZONE_OTHER;
> +
> +   return BRW_MEMZONE_LOW_4G;
> +}
> +
> +static uint64_t
> +bucket_vma_alloc(struct brw_bufmgr *bufmgr,
> + struct bo_cache_bucket *bucket,
> + enum brw_memory_zone memzone)
> +{
> +   struct util_dynarray *vma_list = &bucket->vma_list[memzone];
> +   struct vma_bucket_node *node;
> +
> +   if (vma_list->size == 0) {
> +  /* This bucket allocator is out of space - allocate a new block of
> +   * memory for 64 blocks from a larger allocator (either a larger
> +   * bucket or util_vma).
> +   *
> +   * We align the address to the node size (64 blocks) so that
> +   * bucket_vma_free can easily compute the starting address of this
> +   * block by rounding any address we return down to the node size.
> +   *
> +   * Set the first bit used, and return the start address.
> +   */
> +  uint64_t node_size = 64ull * bucket->size;
> +  node = util_dynarray_grow(vma_list, sizeof(struct vma_bucket_node));
> +  node->start_address = __vma_alloc(bufmgr, memzone, node_size, 
> node_size);
> +  node->bitmap = ~1ull;
> +  return node->start_address;
> +   }
> +
> +   /* Pick any bit from any node - they're all the right size and free. */
> +   node = util_dynarray_top_ptr(vma_list, struct 

[Mesa-dev] [PATCH 0/2] intel: Stall before disable indirect state pointers

2018-05-08 Thread Lionel Landwerlin
Hi all,

Here are a couple of patches stalling the command streamer before
disabling the indirect state pointers.

We started disabling indirect state pointers on CNL and later applied
it back to Gen7+. A recent bug report on Gen9 seems to imply that we
get a page fault right after the instruction disabling the indirect
state pointers.

The theory here is that disabling the pointers might affect the
previous running 3DPRIMITIVE which is still pulling data out of the
pointers (we can see that EUs & samplers still at work while the page
fault happens).

These patches pass the CI but it's not confirmed whether that solves
the issue reported.

Cheers,

Lionel Landwerlin (2):
  i965: require post sync operation prior to ISP disable
  anv: emit stall at pixel scoreboard before ISP disable

 src/intel/vulkan/genX_cmd_buffer.c   | 9 -
 src/mesa/drivers/dri/i965/brw_pipe_control.c | 8 ++--
 2 files changed, 14 insertions(+), 3 deletions(-)

--
2.17.0
___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/mesa-dev


[Mesa-dev] [PATCH 2/2] anv: emit stall at pixel scoreboard before ISP disable

2018-05-08 Thread Lionel Landwerlin
We want to make sure that all indirect state data has been loaded into
the EUs before disable the pointers.

Signed-off-by: Lionel Landwerlin 
Fixes: 78c125af3904c ("anv/gen10: Ignore push constant packets during context 
restore.")
---
 src/intel/vulkan/genX_cmd_buffer.c | 9 -
 1 file changed, 8 insertions(+), 1 deletion(-)

diff --git a/src/intel/vulkan/genX_cmd_buffer.c 
b/src/intel/vulkan/genX_cmd_buffer.c
index 2882cf36506..526e18af108 100644
--- a/src/intel/vulkan/genX_cmd_buffer.c
+++ b/src/intel/vulkan/genX_cmd_buffer.c
@@ -1420,14 +1420,21 @@ genX(BeginCommandBuffer)(
  * context restore, so the mentioned hang doesn't happen. However,
  * software must program push constant commands for all stages prior to
  * rendering anything. So we flag them dirty in BeginCommandBuffer.
+ *
+ * Finally, we also make sure to stall at pixel scoreboard to make sure the
+ * constants have been loaded into the EUs prior to disable the push constants
+ * so that it doesn't hang a previous 3DPRIMITIVE.
  */
 static void
 emit_isp_disable(struct anv_cmd_buffer *cmd_buffer)
 {
anv_batch_emit(&cmd_buffer->batch, GENX(PIPE_CONTROL), pc) {
- pc.IndirectStatePointersDisable = true;
+ pc.StallAtPixelScoreboard = true;
  pc.CommandStreamerStallEnable = true;
}
+   anv_batch_emit(&cmd_buffer->batch, GENX(PIPE_CONTROL), pc) {
+ pc.IndirectStatePointersDisable = true;
+   }
 }
 
 VkResult
-- 
2.17.0

___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/mesa-dev


[Mesa-dev] [PATCH 1/2] i965: require post sync operation prior to ISP disable

2018-05-08 Thread Lionel Landwerlin
Invalidating the indirect state pointers might affect a previously
scheduled & still running 3DPRIMITIVE (causing page fault). So stall
on pixel scoreboard before that.

v2: Fix compile issue :(

v3: Stall on pixel scoreboard

Signed-off-by: Lionel Landwerlin 
Fixes: ca19ee33d7d39 ("i965/gen10: Ignore push constant packets during context 
restore.")
Bugzilla: https://bugs.freedesktop.org/show_bug.cgi?id=106243
---
 src/mesa/drivers/dri/i965/brw_pipe_control.c | 8 ++--
 1 file changed, 6 insertions(+), 2 deletions(-)

diff --git a/src/mesa/drivers/dri/i965/brw_pipe_control.c 
b/src/mesa/drivers/dri/i965/brw_pipe_control.c
index 02278be6d62..1a32e9a8e96 100644
--- a/src/mesa/drivers/dri/i965/brw_pipe_control.c
+++ b/src/mesa/drivers/dri/i965/brw_pipe_control.c
@@ -349,13 +349,17 @@ gen7_emit_vs_workaround_flush(struct brw_context *brw)
  * context restore, so the mentioned hang doesn't happen. However,
  * software must program push constant commands for all stages prior to
  * rendering anything, so we flag them as dirty.
+ *
+ * Finally, we also make sure to stall at pixel scoreboard to make sure the
+ * constants have been loaded into the EUs prior to disable the push constants
+ * so that it doesn't hang a previous 3DPRIMITIVE.
  */
 void
 gen10_emit_isp_disable(struct brw_context *brw)
 {
+   brw_emit_end_of_pipe_sync(brw, PIPE_CONTROL_STALL_AT_SCOREBOARD);
brw_emit_pipe_control(brw,
- PIPE_CONTROL_ISP_DIS |
- PIPE_CONTROL_CS_STALL,
+ PIPE_CONTROL_ISP_DIS,
  NULL, 0, 0);
 
brw->vs.base.push_constants_dirty = true;
-- 
2.17.0

___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/mesa-dev


Re: [Mesa-dev] [Mesa-stable] [PATCH 2/2] eg/compute: Drop reference to kernel_param bo in destructor

2018-05-08 Thread Dylan Baker
Indeed. This is currently not queued for 18.1 as it doesn't apply cleanly. The
18.1-proposed branch at git://people.freedesktop.org/~dbaker/mesa is where it
needs to apply if that's what needs to happen.

Thanks,
Dylan

Quoting Mark Janes (2018-05-08 09:28:24)
> Hi Jan,
> 
> 
> Jan Vesely  writes:
> 
> > CC: 
> > Signed-off-by: Jan Vesely 
> > ---
> >  src/gallium/drivers/r600/evergreen_compute.c | 1 +
> >  1 file changed, 1 insertion(+)
> >
> > diff --git a/src/gallium/drivers/r600/evergreen_compute.c 
> > b/src/gallium/drivers/r600/evergreen_compute.c
> > index 027930b586..5070243914 100644
> > --- a/src/gallium/drivers/r600/evergreen_compute.c
> > +++ b/src/gallium/drivers/r600/evergreen_compute.c
> > @@ -463,6 +463,7 @@ static void evergreen_delete_compute_state(struct 
> > pipe_context *ctx, void *state
> >  #ifdef HAVE_OPENCL
> >   radeon_shader_binary_clean(&shader->binary);
> >   pipe_resource_reference(&shader->code_bo, NULL);
> 
> The stable branches do not have this ^^^ pipe_resource_reference call,
> so the patch does not apply.  Can you make a proper backport of the fix
> to clarify your intentions for stable?
> 
> > + pipe_resource_reference(&shader->kernel_param, NULL);
> >  #endif
> >   r600_destroy_shader(&shader->bc);
> >   }
> > -- 
> > 2.17.0
> >
> > ___
> > mesa-stable mailing list
> > mesa-sta...@lists.freedesktop.org
> > https://lists.freedesktop.org/mailman/listinfo/mesa-stable
> ___
> mesa-stable mailing list
> mesa-sta...@lists.freedesktop.org
> https://lists.freedesktop.org/mailman/listinfo/mesa-stable


signature.asc
Description: signature
___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/mesa-dev


Re: [Mesa-dev] gcc bug / crash in ast_type_qualifier::validate_in_qualifier()?

2018-05-08 Thread Kenneth Graunke
On Tuesday, May 8, 2018 1:23:32 AM PDT Eero Tamminen wrote:
> Hi,
> 
> On 08.05.2018 06:45, Matt Turner wrote:
> > On Mon, May 7, 2018 at 8:02 PM, Brian Paul  wrote:
> >>
> >> I don't know when this started happening (I'll try bisecting tomorrow) but
> >> we're seeing a crash in ast_type_qualifier::validate_in_qualifier() in -O3
> >> builds with gcc 5.4.0 on Ubuntu 16.04.
> >>
> >> Specifically, at ast_type.cpp:654:
> >>
> >> if ((this->flags.i & ~valid_in_mask.flags.i) != 0) {
> >>
> >> It seems to be the ~ operator/function which is implemented with an SSE 
> >> pxor
> >> instruction.
> >>
> >> I found that this patch avoids the issue:
> >>
> >> diff --git a/src/compiler/glsl/ast.h b/src/compiler/glsl/ast.h
> >> index a1ec0d5..2e518ce 100644
> >> --- a/src/compiler/glsl/ast.h
> >> +++ b/src/compiler/glsl/ast.h
> >> @@ -474,7 +474,7 @@ enum {
> >>
> >>   struct ast_type_qualifier {
> >>  DECLARE_RALLOC_CXX_OPERATORS(ast_type_qualifier);
> >> -   DECLARE_BITSET_T(bitset_t, 128);
> >> +   DECLARE_BITSET_T(bitset_t, 96);
> >>
> >>  union flags {
> >> struct {
> >>
> >> This probably prevents use of xmm instructions, but I haven't inspected the
> >> code.
> >>
> >> Is anyone else seeing this?
> > 
> > Yes, it's https://bugs.freedesktop.org/show_bug.cgi?id=105497
> > 
> > I was surprised that we decided it's not worth working around.
> 
> By making above part perform worse for everybody using -O3, or by
> disabling vectorization optimization (enabled by -O3) just for
> the buggy GCC version?
> 
> (If that GCC version gets it wrong in this place, it may get it
> wrong also elsewhere, so better turn that particular -O3 optimization
> off completely.)
> 
> Is there an upstream GCC bug report about that, which would tell
> which GCC versions are affected?
> 
> 
>   - Eero

I wouldn't worry about performance here, the AST code is basically
never the hot path (even without shader cache, and now it's glacial).
I was honestly surprised to see it start using xmm intrinsics.

--Ken


signature.asc
Description: This is a digitally signed message part.
___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/mesa-dev


Re: [Mesa-dev] [PATCH 07/10] i965: Prepare batchbuffer module for softpin support.

2018-05-08 Thread Scott D Phillips
Kenneth Graunke  writes:

> If EXEC_OBJECT_PINNED is set, we don't want to emit any relocations.
> We simply want to add the BO to the validation list, and possibly mark
> it as writeable.  The new brw_use_pinned_bo() interface does just that.
>
> To avoid having to make every caller consider both the relocation and
> softpin cases, we make emit_reloc() call brw_use_pinned_bo() when given
> a softpinned buffer.
>
> We also can't grow buffers that are softpinned - the mechanism places a
> larger BO at the same offset as the original, which requires moving BOs
> around in the VMA.  With softpin, we only allocate enough VMA for the
> original size of the BO.
> ---
>  src/mesa/drivers/dri/i965/intel_batchbuffer.c | 40 +--
>  src/mesa/drivers/dri/i965/intel_batchbuffer.h |  4 ++
>  2 files changed, 40 insertions(+), 4 deletions(-)
>
> Overallocating is gross, but I have to make incremental progress somehow.
> For batch buffers, the ultimate plan is to switch from growing to batch
> chaining (just create another batch and MI_BATCH_BUFFER_START to GOTO
> the new batch and carry on).  We can do that on Gen8+.  It's easier to
> do that in the softpin world - otherwise, we'd need a third set of
> relocation lists, which gets messier.
>
> For state buffers, the plan is to set Dynamic State Base Address to a
> fixed 4GB region of the VMA, then just use intel_upload.c to make
> however many buffers we need, allocated out of that memzone.  Being able
> to fix BO addresses within 4GB of the base address eliminates the need
> to force all state to be in a single BO, and gives us a lot more
> flexibility with less magic required.
>
> But again...can't convert everything overnight, especially when having
> to care about older hardware and ancient kernels.

The plan of changing overallocation to chaining in a future patch sounds
good to me. A nice enhancement at some point might be a
INTEL_DEBUG=wasted-memory where we print out how much memory we're
wasting due to overallocation, either here or by bucketing.

I don't quite understand Chris's comment so this isn't meant to be an
assertion that I think what he's saying isn't an issue, but fwiw:

Reviewed-by: Scott D Phillips 
___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/mesa-dev


Re: [Mesa-dev] [Mesa-stable] [PATCH 2/2] eg/compute: Drop reference to kernel_param bo in destructor

2018-05-08 Thread Jan Vesely
Hi,

the code_bo line was added in ea1fff4416036066cff51826f95b4703d7211008
Which was also requested for stable [0].
sorry for the confusion. Is there a way to indicate dependencies that I
missed?

regards,
Jan

[0] https://lists.freedesktop.org/archives/mesa-stable/2018-May/008249.
html

On Tue, 2018-05-08 at 10:10 -0700, Dylan Baker wrote:
> Indeed. This is currently not queued for 18.1 as it doesn't apply cleanly. The
> 18.1-proposed branch at git://people.freedesktop.org/~dbaker/mesa is where it
> needs to apply if that's what needs to happen.
> 
> Thanks,
> Dylan
> 
> Quoting Mark Janes (2018-05-08 09:28:24)
> > Hi Jan,
> > 
> > 
> > Jan Vesely  writes:
> > 
> > > CC: 
> > > Signed-off-by: Jan Vesely 
> > > ---
> > >  src/gallium/drivers/r600/evergreen_compute.c | 1 +
> > >  1 file changed, 1 insertion(+)
> > > 
> > > diff --git a/src/gallium/drivers/r600/evergreen_compute.c 
> > > b/src/gallium/drivers/r600/evergreen_compute.c
> > > index 027930b586..5070243914 100644
> > > --- a/src/gallium/drivers/r600/evergreen_compute.c
> > > +++ b/src/gallium/drivers/r600/evergreen_compute.c
> > > @@ -463,6 +463,7 @@ static void evergreen_delete_compute_state(struct 
> > > pipe_context *ctx, void *state
> > >  #ifdef HAVE_OPENCL
> > >   radeon_shader_binary_clean(&shader->binary);
> > >   pipe_resource_reference(&shader->code_bo, NULL);
> > 
> > The stable branches do not have this ^^^ pipe_resource_reference call,
> > so the patch does not apply.  Can you make a proper backport of the fix
> > to clarify your intentions for stable?
> > 
> > > + pipe_resource_reference(&shader->kernel_param, NULL);
> > >  #endif
> > >   r600_destroy_shader(&shader->bc);
> > >   }
> > > -- 
> > > 2.17.0
> > > 
> > > ___
> > > mesa-stable mailing list
> > > mesa-sta...@lists.freedesktop.org
> > > https://lists.freedesktop.org/mailman/listinfo/mesa-stable
> > 
> > ___
> > mesa-stable mailing list
> > mesa-sta...@lists.freedesktop.org
> > https://lists.freedesktop.org/mailman/listinfo/mesa-stable


signature.asc
Description: This is a digitally signed message part
___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/mesa-dev


Re: [Mesa-dev] [PATCH 08/10] i965: Emit VF cache invalidates for 48-bit addressing bugs with softpin.

2018-05-08 Thread Scott D Phillips
Kenneth Graunke  writes:

> We'd like to start using soft-pin to assign BO addresses up front, and
> never move them again.  Our previous plan for dealing with 48-bit VF
> cache bugs was to relocate vertex buffers to the low 4GB, so we'd never
> have addresses that alias in the low 32 bits.  But that requires moving
> buffers dynamically.
>
> This patch tracks the last seen BO address for each vertex/index buffer,
> and emits a VF cache invalidate if the high bits change.  (Ideally, we
> won't hit this case very often.)  This should work for the soft-pin
> case, but unfortunately won't work in the relocation case, as we don't
> actually know the addresses.  So, we have to use both methods.
> ---
>  src/mesa/drivers/dri/i965/brw_context.h   |  6 ++
>  src/mesa/drivers/dri/i965/genX_state_upload.c | 62 +++
>  2 files changed, 68 insertions(+)
>
> Migration is nice at times, but keeping everything static is also really
> nice for pre-baking states...hard to know exactly what to do here.  It
> would be nice if we could allocate all GL buffer objects that might be
> VBOs out of the same 4GB, but that's...difficult to know, and might be
> too limiting.  *shrug*
>
> diff --git a/src/mesa/drivers/dri/i965/brw_context.h 
> b/src/mesa/drivers/dri/i965/brw_context.h
> index fb637e22281..7d6aa1a9c51 100644
> --- a/src/mesa/drivers/dri/i965/brw_context.h
> +++ b/src/mesa/drivers/dri/i965/brw_context.h
> @@ -983,6 +983,9 @@ struct brw_context
>  
>/* For the initial pushdown, keep the list of vbo inputs. */
>struct vbo_inputs draw_arrays;
> +
> +  /* High bits of the last seen vertex buffer address (for workarounds). 
> */
> +  uint16_t last_bo_high_bits[33];

Maybe it's time to add a #define for 33 being the maximum number of
vertex buffers.

> } vb;
>  
> struct {
> @@ -1003,6 +1006,9 @@ struct brw_context
> * referencing the same index buffer.
> */
>unsigned int start_vertex_offset;
> +
> +  /* High bits of the last seen index buffer address (for workarounds). 
> */
> +  uint16_t last_bo_high_bits;
> } ib;
>  
> /* Active vertex program:
> diff --git a/src/mesa/drivers/dri/i965/genX_state_upload.c 
> b/src/mesa/drivers/dri/i965/genX_state_upload.c
> index b1867c1a1cc..e517b91de93 100644
> --- a/src/mesa/drivers/dri/i965/genX_state_upload.c
> +++ b/src/mesa/drivers/dri/i965/genX_state_upload.c
> @@ -480,6 +480,64 @@ upload_format_size(uint32_t upload_format)
> }
>  }
>  
> +static UNUSED uint16_t
> +pinned_bo_high_bits(struct brw_bo *bo)
> +{
> +   return (bo->kflags & EXEC_OBJECT_PINNED) ? bo->gtt_offset >> 32ull : 0;
> +}
> +
> +/* The VF cache designers apparently cut corners, and made the cache
> + * only consider the bottom 32 bits of memory addresses.

You mentioned it in the thread here, but I think it's important to
mention in the comment too that the cache is considering the tuple of
the bottom 32 bits and the vertex buffer's index in the state
message. Otherwise it would look like an individual set of vertices
could be self-conflicting between indices.

with those,

Reviewed-by: Scott D Phillips 
___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/mesa-dev


Re: [Mesa-dev] gcc bug / crash in ast_type_qualifier::validate_in_qualifier()?

2018-05-08 Thread Francisco Jerez
Kenneth Graunke  writes:

> On Tuesday, May 8, 2018 1:23:32 AM PDT Eero Tamminen wrote:
>> Hi,
>> 
>> On 08.05.2018 06:45, Matt Turner wrote:
>> > On Mon, May 7, 2018 at 8:02 PM, Brian Paul  wrote:
>> >>
>> >> I don't know when this started happening (I'll try bisecting tomorrow) but
>> >> we're seeing a crash in ast_type_qualifier::validate_in_qualifier() in -O3
>> >> builds with gcc 5.4.0 on Ubuntu 16.04.
>> >>
>> >> Specifically, at ast_type.cpp:654:
>> >>
>> >> if ((this->flags.i & ~valid_in_mask.flags.i) != 0) {
>> >>
>> >> It seems to be the ~ operator/function which is implemented with an SSE 
>> >> pxor
>> >> instruction.
>> >>
>> >> I found that this patch avoids the issue:
>> >>
>> >> diff --git a/src/compiler/glsl/ast.h b/src/compiler/glsl/ast.h
>> >> index a1ec0d5..2e518ce 100644
>> >> --- a/src/compiler/glsl/ast.h
>> >> +++ b/src/compiler/glsl/ast.h
>> >> @@ -474,7 +474,7 @@ enum {
>> >>
>> >>   struct ast_type_qualifier {
>> >>  DECLARE_RALLOC_CXX_OPERATORS(ast_type_qualifier);
>> >> -   DECLARE_BITSET_T(bitset_t, 128);
>> >> +   DECLARE_BITSET_T(bitset_t, 96);
>> >>
>> >>  union flags {
>> >> struct {
>> >>
>> >> This probably prevents use of xmm instructions, but I haven't inspected 
>> >> the
>> >> code.
>> >>
>> >> Is anyone else seeing this?
>> > 
>> > Yes, it's https://bugs.freedesktop.org/show_bug.cgi?id=105497
>> > 
>> > I was surprised that we decided it's not worth working around.
>> 
>> By making above part perform worse for everybody using -O3, or by
>> disabling vectorization optimization (enabled by -O3) just for
>> the buggy GCC version?
>> 
>> (If that GCC version gets it wrong in this place, it may get it
>> wrong also elsewhere, so better turn that particular -O3 optimization
>> off completely.)
>> 
>> Is there an upstream GCC bug report about that, which would tell
>> which GCC versions are affected?
>> 
>> 
>>  - Eero
>
> I wouldn't worry about performance here, the AST code is basically
> never the hot path (even without shader cache, and now it's glacial).
> I was honestly surprised to see it start using xmm intrinsics.
>

I agree that vectorizing this data structure is unlikely to make any
measurable performance difference in practice, but I think Eero still
has a point -- How do we know that this GCC optimization is not
miscompiling code elsewhere, potentially in a less frequently hit
codepath?  I wouldn't take the risk of shipping a binary of Mesa built
with GCC 5.4 and -O3 even with this workaround.  It may make more sense
to drop support for this GCC version (or as Eero suggested to turn the
optimization off).


> --Ken
> ___
> mesa-dev mailing list
> mesa-dev@lists.freedesktop.org
> https://lists.freedesktop.org/mailman/listinfo/mesa-dev


signature.asc
Description: PGP signature
___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/mesa-dev


Re: [Mesa-dev] [PATCH 1/8] util: Add a virtual memory allocator

2018-05-08 Thread Jordan Justen
On 2018-05-07 17:30:43, Scott D Phillips wrote:
> From: Jason Ekstrand 
> 
> This is simple linear-walk first-fit allocator roughly based on the
> allocator in the radeon winsys code.  This allocator has two primary
> functional differences:
> 
>  1) It cleanly returns 0 on allocation failure
> 
>  2) It allocates addresses top-down instead of bottom-up.
> 
> The second one is needed for Intel because high addresses (with bit 47
> set) need to be canonicalized in order to work properly.  If we allocate
> bottom-up, then high addresses will be very rare (if they ever happen).
> We'd rather always have high addresses so that the canonicalization code
> gets better testing.
> 
> Reviewed-by: Scott D Phillips 
> Tested-by: Scott D Phillips 
> ---
>  src/util/Makefile.sources |   4 +-
>  src/util/meson.build  |   2 +
>  src/util/vma.c| 231 
> ++
>  src/util/vma.h|  53 +++
>  4 files changed, 289 insertions(+), 1 deletion(-)
>  create mode 100644 src/util/vma.c
>  create mode 100644 src/util/vma.h
> 
> diff --git a/src/util/Makefile.sources b/src/util/Makefile.sources
> index 104ecae8ed3..534520ce763 100644
> --- a/src/util/Makefile.sources
> +++ b/src/util/Makefile.sources
> @@ -56,7 +56,9 @@ MESA_UTIL_FILES := \
> u_string.h \
> u_thread.h \
> u_vector.c \
> -   u_vector.h
> +   u_vector.h \
> +   vma.c \
> +   vma.h
>  
>  MESA_UTIL_GENERATED_FILES = \
> format_srgb.c
> diff --git a/src/util/meson.build b/src/util/meson.build
> index eece1cefef6..14660e0fa0c 100644
> --- a/src/util/meson.build
> +++ b/src/util/meson.build
> @@ -81,6 +81,8 @@ files_mesa_util = files(
>'u_thread.h',
>'u_vector.c',
>'u_vector.h',
> +  'vma.c',
> +  'vma.h',
>  )
>  
>  install_data('drirc', install_dir : get_option('sysconfdir'))
> diff --git a/src/util/vma.c b/src/util/vma.c
> new file mode 100644
> index 000..0d4e097e21f
> --- /dev/null
> +++ b/src/util/vma.c
> @@ -0,0 +1,231 @@
> +/*
> + * Copyright © 2018 Intel Corporation
> + *
> + * Permission is hereby granted, free of charge, to any person obtaining a
> + * copy of this software and associated documentation files (the "Software"),
> + * to deal in the Software without restriction, including without limitation
> + * the rights to use, copy, modify, merge, publish, distribute, sublicense,
> + * and/or sell copies of the Software, and to permit persons to whom the
> + * Software is furnished to do so, subject to the following conditions:
> + *
> + * The above copyright notice and this permission notice (including the next
> + * paragraph) shall be included in all copies or substantial portions of the
> + * Software.
> + *
> + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
> + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
> + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
> + * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
> + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
> + * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER 
> DEALINGS
> + * IN THE SOFTWARE.
> + */
> +
> +#include 
> +
> +#include "util/u_math.h"
> +#include "util/vma.h"
> +
> +struct util_vma_hole {
> +   struct list_head link;
> +   uint64_t offset;
> +   uint64_t size;
> +};
> +
> +#define util_vma_foreach_hole(_hole, _heap) \
> +   list_for_each_entry(struct util_vma_hole, _hole, &(_heap)->holes, link)
> +
> +#define util_vma_foreach_hole_safe(_hole, _heap) \
> +   list_for_each_entry_safe(struct util_vma_hole, _hole, &(_heap)->holes, 
> link)
> +
> +void
> +util_vma_heap_init(struct util_vma_heap *heap,
> +   uint64_t start, uint64_t size)
> +{
> +   list_inithead(&heap->holes);
> +   util_vma_heap_free(heap, start, size);
> +}
> +
> +void
> +util_vma_heap_finish(struct util_vma_heap *heap)
> +{
> +   util_vma_foreach_hole_safe(hole, heap)
> +  free(hole);
> +}
> +
> +static void
> +util_vma_heap_validate(struct util_vma_heap *heap)
> +{
> +   uint64_t prev_offset = 0;
> +   util_vma_foreach_hole(hole, heap) {
> +  assert(hole->offset > 0);
> +  assert(hole->size > 0);
> +
> +  if (&hole->link == heap->holes.next) {
> + /* This must be the top-most hole.  Assert that, if it overflows, it
> +  * overflows to 0, i.e. 2^64.
> +  */
> + assert(hole->size + hole->offset == 0 ||
> +hole->size + hole->offset > hole->offset);
> +  } else {
> + /* This is not the top-most hole so it must not overflow and, in
> +  * fact, must be strictly lower than the top-most hole.  If
> +  * hole->size + hole->offset == prev_offset, then we failed to join
> +  * holes during a util_vma_heap_free.
> +  */
> + assert(hole->size + hole->offset > hole->offset &&
> +hole->size +

Re: [Mesa-dev] [Mesa-stable] [PATCH 2/2] eg/compute: Drop reference to kernel_param bo in destructor

2018-05-08 Thread Dylan Baker
I have both pulled into the 18.1-proposed tree now. I think we need to have a
wider discussion about better ways to propose patches to stable after the fact
like ea1fff4416036066cff51826f95b4703d7211008. Thanks for helping get this
resolved so quickly.

Dylan

Quoting Jan Vesely (2018-05-08 10:24:53)
> Hi,
> 
> the code_bo line was added in ea1fff4416036066cff51826f95b4703d7211008
> Which was also requested for stable [0].
> sorry for the confusion. Is there a way to indicate dependencies that I
> missed?
> 
> regards,
> Jan
> 
> [0] https://lists.freedesktop.org/archives/mesa-stable/2018-May/008249.
> html
> 
> On Tue, 2018-05-08 at 10:10 -0700, Dylan Baker wrote:
> > Indeed. This is currently not queued for 18.1 as it doesn't apply cleanly. 
> > The
> > 18.1-proposed branch at git://people.freedesktop.org/~dbaker/mesa is where 
> > it
> > needs to apply if that's what needs to happen.
> > 
> > Thanks,
> > Dylan
> > 
> > Quoting Mark Janes (2018-05-08 09:28:24)
> > > Hi Jan,
> > > 
> > > 
> > > Jan Vesely  writes:
> > > 
> > > > CC: 
> > > > Signed-off-by: Jan Vesely 
> > > > ---
> > > >  src/gallium/drivers/r600/evergreen_compute.c | 1 +
> > > >  1 file changed, 1 insertion(+)
> > > > 
> > > > diff --git a/src/gallium/drivers/r600/evergreen_compute.c 
> > > > b/src/gallium/drivers/r600/evergreen_compute.c
> > > > index 027930b586..5070243914 100644
> > > > --- a/src/gallium/drivers/r600/evergreen_compute.c
> > > > +++ b/src/gallium/drivers/r600/evergreen_compute.c
> > > > @@ -463,6 +463,7 @@ static void evergreen_delete_compute_state(struct 
> > > > pipe_context *ctx, void *state
> > > >  #ifdef HAVE_OPENCL
> > > >   radeon_shader_binary_clean(&shader->binary);
> > > >   pipe_resource_reference(&shader->code_bo, NULL);
> > > 
> > > The stable branches do not have this ^^^ pipe_resource_reference call,
> > > so the patch does not apply.  Can you make a proper backport of the fix
> > > to clarify your intentions for stable?
> > > 
> > > > + pipe_resource_reference(&shader->kernel_param, NULL);
> > > >  #endif
> > > >   r600_destroy_shader(&shader->bc);
> > > >   }
> > > > -- 
> > > > 2.17.0
> > > > 
> > > > ___
> > > > mesa-stable mailing list
> > > > mesa-sta...@lists.freedesktop.org
> > > > https://lists.freedesktop.org/mailman/listinfo/mesa-stable
> > > 
> > > ___
> > > mesa-stable mailing list
> > > mesa-sta...@lists.freedesktop.org
> > > https://lists.freedesktop.org/mailman/listinfo/mesa-stable


signature.asc
Description: signature
___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/mesa-dev


Re: [Mesa-dev] [PATCH 09/10] i965: Softpin all buffers and never use relocations.

2018-05-08 Thread Scott D Phillips
Kenneth Graunke  writes:

> On Thursday, May 3, 2018 11:51:52 PM PDT Chris Wilson wrote:
>> Quoting Kenneth Graunke (2018-05-04 02:12:39)
>> > ---
>> >  src/mesa/drivers/dri/i965/brw_bufmgr.c | 2 +-
>> >  1 file changed, 1 insertion(+), 1 deletion(-)
>> > 
>> > This enables it for Broadwell (with a 64-bit kernel) and Skylake+ (with
>> > any kernel).  Unfortunately, it doesn't enable it for Cherryview as that
>> > has a 32-bit GTT.  We could switch that over as well, but we'd have to
>> > have a single memory zone, which is kind of a special case...
>> 
>> I would make it conditional on gem_param(HAS_ALIASING_PPGTT) > 1 (that
>> is on full-ppgtt, where you don't have to care about framebuffers and
>> GTT mmaps conflicting).
>> -Chris
>
> Good idea, thanks!  Fixed for v2.

I think the ALIASING_PPGTT test is actually redundant with the
I915_CONTEXT_PARAM_GTT_SIZE > 4 GiB test that you're already doing. So
patch v1 is

Reviewed-by: Scott D Phillips 

> --Ken
> ___
> mesa-dev mailing list
> mesa-dev@lists.freedesktop.org
> https://lists.freedesktop.org/mailman/listinfo/mesa-dev
___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/mesa-dev


Re: [Mesa-dev] [PATCH 03/10] util: Make vma.c support non-power-of-two alignments.

2018-05-08 Thread Jordan Justen
Could potentially just fold into "util: Add a virtual memory
allocator".

Reviewed-by: Jordan Justen 

On 2018-05-03 18:12:33, Kenneth Graunke wrote:
> I want to use this in a bucketing allocator for i965.
> ---
>  src/util/vma.c | 3 +--
>  1 file changed, 1 insertion(+), 2 deletions(-)
> 
> diff --git a/src/util/vma.c b/src/util/vma.c
> index 3d61f6969ed..d6ee05988ef 100644
> --- a/src/util/vma.c
> +++ b/src/util/vma.c
> @@ -88,7 +88,6 @@ util_vma_heap_alloc(struct util_vma_heap *heap,
> assert(size > 0);
>  
> assert(alignment > 0);
> -   assert(util_is_power_of_two_nonzero(alignment));
>  
> util_vma_heap_validate(heap);
>  
> @@ -107,7 +106,7 @@ util_vma_heap_alloc(struct util_vma_heap *heap,
>/* Align the offset.  We align down and not up because we are 
> allocating
> * from the top of the hole and not the bottom.
> */
> -  offset &= ~(alignment - 1);
> +  offset = (offset / alignment) * alignment;
>  
>if (offset < hole->offset)
>   continue;
> -- 
> 2.17.0
> 
> ___
> mesa-dev mailing list
> mesa-dev@lists.freedesktop.org
> https://lists.freedesktop.org/mailman/listinfo/mesa-dev
___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/mesa-dev


Re: [Mesa-dev] [PATCH 10/10] i965: Require softpin support for Cannonlake and later.

2018-05-08 Thread Scott D Phillips
Kenneth Graunke  writes:

> This isn't strictly necessary, but anyone running Cannonlake will
> already have Kernel 4.5 or later, so there's no reason to support
> the relocation model on Gen10+.
>
> This will let us avoid dealing with them for new features.

I think the discussion about aliasing ppgtt won't impact this bit of
code where we've already checked for gtt_size > 4 GiB. Maybe we should
warn about aliasing ppgtt on newer gens? Either way,

Reviewed-by: Scott D Phillips 

> ---
>  src/mesa/drivers/dri/i965/brw_bufmgr.c | 4 
>  1 file changed, 4 insertions(+)
>
> diff --git a/src/mesa/drivers/dri/i965/brw_bufmgr.c 
> b/src/mesa/drivers/dri/i965/brw_bufmgr.c
> index 4fd95e1d78c..9a059f38aaa 100644
> --- a/src/mesa/drivers/dri/i965/brw_bufmgr.c
> +++ b/src/mesa/drivers/dri/i965/brw_bufmgr.c
> @@ -1738,6 +1738,10 @@ brw_bufmgr_init(struct gen_device_info *devinfo, int 
> fd)
>  4096, _4GB);
>   util_vma_heap_init(&bufmgr->vma_allocator[BRW_MEMZONE_OTHER],
>  1 * _4GB, gtt_size - 1 * _4GB);
> +  } else if (devinfo->gen >= 10) {
> + fprintf(stderr, "i965 requires softpin (Kernel 4.5) on Gen10+.");
> + free(bufmgr);
> + return NULL;
>}
> }
>  
> -- 
> 2.17.0
>
> ___
> mesa-dev mailing list
> mesa-dev@lists.freedesktop.org
> https://lists.freedesktop.org/mailman/listinfo/mesa-dev
___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/mesa-dev


Re: [Mesa-dev] [PATCH 1/8] util: Add a virtual memory allocator

2018-05-08 Thread Jason Ekstrand
On Tue, May 8, 2018 at 10:58 AM, Jordan Justen 
wrote:

> On 2018-05-07 17:30:43, Scott D Phillips wrote:
> > From: Jason Ekstrand 
> >
> > This is simple linear-walk first-fit allocator roughly based on the
> > allocator in the radeon winsys code.  This allocator has two primary
> > functional differences:
> >
> >  1) It cleanly returns 0 on allocation failure
> >
> >  2) It allocates addresses top-down instead of bottom-up.
> >
> > The second one is needed for Intel because high addresses (with bit 47
> > set) need to be canonicalized in order to work properly.  If we allocate
> > bottom-up, then high addresses will be very rare (if they ever happen).
> > We'd rather always have high addresses so that the canonicalization code
> > gets better testing.
> >
> > Reviewed-by: Scott D Phillips 
> > Tested-by: Scott D Phillips 
> > ---
> >  src/util/Makefile.sources |   4 +-
> >  src/util/meson.build  |   2 +
> >  src/util/vma.c| 231 ++
> 
> >  src/util/vma.h|  53 +++
> >  4 files changed, 289 insertions(+), 1 deletion(-)
> >  create mode 100644 src/util/vma.c
> >  create mode 100644 src/util/vma.h
> >
> > diff --git a/src/util/Makefile.sources b/src/util/Makefile.sources
> > index 104ecae8ed3..534520ce763 100644
> > --- a/src/util/Makefile.sources
> > +++ b/src/util/Makefile.sources
> > @@ -56,7 +56,9 @@ MESA_UTIL_FILES := \
> > u_string.h \
> > u_thread.h \
> > u_vector.c \
> > -   u_vector.h
> > +   u_vector.h \
> > +   vma.c \
> > +   vma.h
> >
> >  MESA_UTIL_GENERATED_FILES = \
> > format_srgb.c
> > diff --git a/src/util/meson.build b/src/util/meson.build
> > index eece1cefef6..14660e0fa0c 100644
> > --- a/src/util/meson.build
> > +++ b/src/util/meson.build
> > @@ -81,6 +81,8 @@ files_mesa_util = files(
> >'u_thread.h',
> >'u_vector.c',
> >'u_vector.h',
> > +  'vma.c',
> > +  'vma.h',
> >  )
> >
> >  install_data('drirc', install_dir : get_option('sysconfdir'))
> > diff --git a/src/util/vma.c b/src/util/vma.c
> > new file mode 100644
> > index 000..0d4e097e21f
> > --- /dev/null
> > +++ b/src/util/vma.c
> > @@ -0,0 +1,231 @@
> > +/*
> > + * Copyright © 2018 Intel Corporation
> > + *
> > + * Permission is hereby granted, free of charge, to any person
> obtaining a
> > + * copy of this software and associated documentation files (the
> "Software"),
> > + * to deal in the Software without restriction, including without
> limitation
> > + * the rights to use, copy, modify, merge, publish, distribute,
> sublicense,
> > + * and/or sell copies of the Software, and to permit persons to whom the
> > + * Software is furnished to do so, subject to the following conditions:
> > + *
> > + * The above copyright notice and this permission notice (including the
> next
> > + * paragraph) shall be included in all copies or substantial portions
> of the
> > + * Software.
> > + *
> > + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
> EXPRESS OR
> > + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
> MERCHANTABILITY,
> > + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT
> SHALL
> > + * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR
> OTHER
> > + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
> ARISING
> > + * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
> DEALINGS
> > + * IN THE SOFTWARE.
> > + */
> > +
> > +#include 
> > +
> > +#include "util/u_math.h"
> > +#include "util/vma.h"
> > +
> > +struct util_vma_hole {
> > +   struct list_head link;
> > +   uint64_t offset;
> > +   uint64_t size;
> > +};
> > +
> > +#define util_vma_foreach_hole(_hole, _heap) \
> > +   list_for_each_entry(struct util_vma_hole, _hole, &(_heap)->holes,
> link)
> > +
> > +#define util_vma_foreach_hole_safe(_hole, _heap) \
> > +   list_for_each_entry_safe(struct util_vma_hole, _hole,
> &(_heap)->holes, link)
> > +
> > +void
> > +util_vma_heap_init(struct util_vma_heap *heap,
> > +   uint64_t start, uint64_t size)
> > +{
> > +   list_inithead(&heap->holes);
> > +   util_vma_heap_free(heap, start, size);
> > +}
> > +
> > +void
> > +util_vma_heap_finish(struct util_vma_heap *heap)
> > +{
> > +   util_vma_foreach_hole_safe(hole, heap)
> > +  free(hole);
> > +}
> > +
> > +static void
> > +util_vma_heap_validate(struct util_vma_heap *heap)
> > +{
> > +   uint64_t prev_offset = 0;
> > +   util_vma_foreach_hole(hole, heap) {
> > +  assert(hole->offset > 0);
> > +  assert(hole->size > 0);
> > +
> > +  if (&hole->link == heap->holes.next) {
> > + /* This must be the top-most hole.  Assert that, if it
> overflows, it
> > +  * overflows to 0, i.e. 2^64.
> > +  */
> > + assert(hole->size + hole->offset == 0 ||
> > +hole->size + hole->offset > hole->offset);
> > +  } else {
> > + /* This is not the top-most hole so it mu

Re: [Mesa-dev] [Mesa-stable] [PATCH 2/2] eg/compute: Drop reference to kernel_param bo in destructor

2018-05-08 Thread Mark Janes
Dylan Baker  writes:

> I have both pulled into the 18.1-proposed tree now. I think we need to have a
> wider discussion about better ways to propose patches to stable after the fact
> like ea1fff4416036066cff51826f95b4703d7211008. Thanks for helping get this
> resolved so quickly.

We have to expect that stable annotations will be missed for some
patches.  No one is perfect.  The patch author could help a lot by
making sure the commit gets into the correct stable branches, because
they are the ones with the most awareness of the situation.

> Dylan
>
> Quoting Jan Vesely (2018-05-08 10:24:53)
>> Hi,
>> 
>> the code_bo line was added in ea1fff4416036066cff51826f95b4703d7211008
>> Which was also requested for stable [0].
>> sorry for the confusion. Is there a way to indicate dependencies that I
>> missed?
>> 
>> regards,
>> Jan
>> 
>> [0] https://lists.freedesktop.org/archives/mesa-stable/2018-May/008249.
>> html
>> 
>> On Tue, 2018-05-08 at 10:10 -0700, Dylan Baker wrote:
>> > Indeed. This is currently not queued for 18.1 as it doesn't apply cleanly. 
>> > The
>> > 18.1-proposed branch at git://people.freedesktop.org/~dbaker/mesa is where 
>> > it
>> > needs to apply if that's what needs to happen.
>> > 
>> > Thanks,
>> > Dylan
>> > 
>> > Quoting Mark Janes (2018-05-08 09:28:24)
>> > > Hi Jan,
>> > > 
>> > > 
>> > > Jan Vesely  writes:
>> > > 
>> > > > CC: 
>> > > > Signed-off-by: Jan Vesely 
>> > > > ---
>> > > >  src/gallium/drivers/r600/evergreen_compute.c | 1 +
>> > > >  1 file changed, 1 insertion(+)
>> > > > 
>> > > > diff --git a/src/gallium/drivers/r600/evergreen_compute.c 
>> > > > b/src/gallium/drivers/r600/evergreen_compute.c
>> > > > index 027930b586..5070243914 100644
>> > > > --- a/src/gallium/drivers/r600/evergreen_compute.c
>> > > > +++ b/src/gallium/drivers/r600/evergreen_compute.c
>> > > > @@ -463,6 +463,7 @@ static void evergreen_delete_compute_state(struct 
>> > > > pipe_context *ctx, void *state
>> > > >  #ifdef HAVE_OPENCL
>> > > >   radeon_shader_binary_clean(&shader->binary);
>> > > >   pipe_resource_reference(&shader->code_bo, NULL);
>> > > 
>> > > The stable branches do not have this ^^^ pipe_resource_reference call,
>> > > so the patch does not apply.  Can you make a proper backport of the fix
>> > > to clarify your intentions for stable?
>> > > 
>> > > > + pipe_resource_reference(&shader->kernel_param, NULL);
>> > > >  #endif
>> > > >   r600_destroy_shader(&shader->bc);
>> > > >   }
>> > > > -- 
>> > > > 2.17.0
>> > > > 
>> > > > ___
>> > > > mesa-stable mailing list
>> > > > mesa-sta...@lists.freedesktop.org
>> > > > https://lists.freedesktop.org/mailman/listinfo/mesa-stable
>> > > 
>> > > ___
>> > > mesa-stable mailing list
>> > > mesa-sta...@lists.freedesktop.org
>> > > https://lists.freedesktop.org/mailman/listinfo/mesa-stable
> ___
> mesa-stable mailing list
> mesa-sta...@lists.freedesktop.org
> https://lists.freedesktop.org/mailman/listinfo/mesa-stable
___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/mesa-dev


Re: [Mesa-dev] [Mesa-stable] [PATCH 2/2] eg/compute: Drop reference to kernel_param bo in destructor

2018-05-08 Thread Dylan Baker
Quoting Mark Janes (2018-05-08 11:10:19)
> Dylan Baker  writes:
> 
> > I have both pulled into the 18.1-proposed tree now. I think we need to have 
> > a
> > wider discussion about better ways to propose patches to stable after the 
> > fact
> > like ea1fff4416036066cff51826f95b4703d7211008. Thanks for helping get this
> > resolved so quickly.
> 
> We have to expect that stable annotations will be missed for some
> patches.  No one is perfect.  The patch author could help a lot by
> making sure the commit gets into the correct stable branches, because
> they are the ones with the most awareness of the situation.
> 

I agree 100%. 

I'm just trying to think of a more robust solution than manually mailing
mesa-stable. The signal to noise ratio of that list is pretty bad. I'm
wondering if after the fdo migration to gitlab it would be better to use pull
requests for these kind of nominations.

Dylan


signature.asc
Description: signature
___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/mesa-dev


Re: [Mesa-dev] [PATCH 06/10] i965: Add virtual memory allocator infrastructure to brw_bufmgr.

2018-05-08 Thread Kenneth Graunke
On Tuesday, May 8, 2018 10:02:22 AM PDT Scott D Phillips wrote:
> Kenneth Graunke  writes:
[snip]
> > +   /* If this node is now completely full, remove it from the free list. */
> > +   if (node->bitmap == 0ull) {
> > +  (void) util_dynarray_pop(vma_list, struct vma_bucket_node);
> > +   }
> > +
> > +   return node->start_address + bit * bucket->size;
> 
> This looks like a use-after-free. It's not really unsafe because
> dynarray is just reducing .size, so it could only go bad if somebody
> else sneaks in there and _trims or _grows. So it's safe, but it hits the
> danger pattern matcher in my head.

Whoops.  Indeed, that should be safe, but it's also stupid.  Fixed
(by making a uint64_t addr = ... before popping and then using that).

> Along similar lines, what is the mechanism that prevents multiple
> threads from entering into brw_bo_alloc with the same bufmgr
> simultaneously? The util/vma functions explode under simultaneous entry,
> so I had to guard them with a mutex in anv. You don't have something
> like that so I'm assuming something like the base mesa state tracking
> code is keeping that from happening for you.

The bufmgr->lock mutex is held on all paths calling vma_alloc or
vma_free...except for brw_bufmgr_destroy, but there's no concurrency
going on at final tear down.

[snip]
> > +   /* Canonicalize the address.
> > +*
> > +* The Broadwell PRM Vol. 2a, MI_LOAD_REGISTER_MEM::MemoryAddress says:
> > +*
> > +*"This field specifies the address of the memory location where the
> > +* register value specified in the DWord above will read from. The
> > +* address specifies the DWord location of the data. Range =
> > +* GraphicsVirtualAddress[63:2] for a DWord register GraphicsAddress
> > +* [63:48] are ignored by the HW and assumed to be in correct
> > +* canonical form [63:48] == [47]."
> > +*/
> > +   const int shift = 63 - 47;
> > +   addr = (((int64_t) addr) << shift) >> shift;
> 
> I updated this in anv to be:
> 
> (int64_t)(addr << shift) >> shift;
> 
> If addr happened to have any of the top 16 bits set then you will get
> Undefined Behavior(tm) in C. Of course, that won't happen right here
> because __vma_alloc always returns non-canonical addresses, but better
> to have the fixed copy of the function sitting around.
> 
> With that update, this is
> 
> Reviewed-by: Scott D Phillips 

Good call.  If you move your copy into a common header, I'll just use
that.  For now, I've updated it to match your code.


signature.asc
Description: This is a digitally signed message part.
___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/mesa-dev


Re: [Mesa-dev] [PATCH 08/16] ac/surface/gfx6: don't overallocate mipmapped HTILE

2018-05-08 Thread Marek Olšák
On Tue, May 8, 2018 at 3:06 AM, Nicolai Hähnle  wrote:

> On 02.05.2018 06:00, Marek Olšák wrote:
>
>> From: Marek Olšák 
>>
>> ---
>>   src/amd/common/ac_surface.c | 9 +++--
>>   1 file changed, 7 insertions(+), 2 deletions(-)
>>
>> diff --git a/src/amd/common/ac_surface.c b/src/amd/common/ac_surface.c
>> index b2af1f70b69..341a7854fe5 100644
>> --- a/src/amd/common/ac_surface.c
>> +++ b/src/amd/common/ac_surface.c
>> @@ -841,22 +841,27 @@ static int gfx6_compute_surface(ADDR_HANDLE
>> addrlib,
>>  *
>>  * "dcc_alignment * 4" was determined by trial and error.
>>  */
>> surf->dcc_size = align64(surf->surf_size >> 8,
>>  surf->dcc_alignment * 4);
>> }
>> /* Make sure HTILE covers the whole miptree, because the shader
>> reads
>>  * TC-compatible HTILE even for levels where it's disabled by DB.
>>  */
>> -   if (surf->htile_size && config->info.levels > 1)
>> -   surf->htile_size *= 2;
>> +   if (surf->htile_size && config->info.levels > 1 &&
>> +   surf->flags & RADEON_SURF_TC_COMPATIBLE_HTILE) {
>> +   surf->htile_size =
>> +   surf->surf_size * 4 / (8 * 8 * surf->bpe *
>> +  MAX2(1,
>> config->info.samples));
>>
>
> Can you explain this formula? In particular where the 4 comes from?
>

Sure.

Definitions (for the whole miptree):
1) surf_size = bpe * samples * num_pixels
2) htile_size = num_pixels * 4 / (8 * 8)

Therefore:
num_pixels = surf_size / (bpe * samples), so:
htile_size = (surf_size / (bpe * samples)) * 4 / (8 * 8)

Marek
___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/mesa-dev


[Mesa-dev] [PATCH] radeon/vce: add firmware support for ver 53 and up

2018-05-08 Thread boyuan.zhang
From: Boyuan Zhang 

All vce firmwares with major version greater than or equal to 53 are supported

Signed-off-by: Boyuan Zhang 
---
 src/gallium/drivers/radeon/radeon_vce.c | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/src/gallium/drivers/radeon/radeon_vce.c 
b/src/gallium/drivers/radeon/radeon_vce.c
index 427bf01e..17a1125 100644
--- a/src/gallium/drivers/radeon/radeon_vce.c
+++ b/src/gallium/drivers/radeon/radeon_vce.c
@@ -506,7 +506,7 @@ struct pipe_video_codec *si_vce_create_encoder(struct 
pipe_context *context,
break;
 
default:
-   if ((sscreen->info.vce_fw_version & (0xff << 24)) == FW_53) {
+   if ((sscreen->info.vce_fw_version & (0xff << 24)) >= FW_53) {
si_vce_52_init(enc);
si_get_pic_param = si_vce_52_get_param;
} else
@@ -542,7 +542,7 @@ bool si_vce_is_fw_version_supported(struct si_screen 
*sscreen)
case FW_52_8_3:
return true;
default:
-   if ((sscreen->info.vce_fw_version & (0xff << 24)) == FW_53)
+   if ((sscreen->info.vce_fw_version & (0xff << 24)) >= FW_53)
return true;
else
return false;
-- 
2.7.4

___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/mesa-dev


Re: [Mesa-dev] [Mesa-stable] [PATCH 2/2] eg/compute: Drop reference to kernel_param bo in destructor

2018-05-08 Thread Marek Olšák
On Tue, May 8, 2018 at 2:20 PM, Dylan Baker  wrote:

> Quoting Mark Janes (2018-05-08 11:10:19)
> > Dylan Baker  writes:
> >
> > > I have both pulled into the 18.1-proposed tree now. I think we need to
> have a
> > > wider discussion about better ways to propose patches to stable after
> the fact
> > > like ea1fff4416036066cff51826f95b4703d7211008. Thanks for helping get
> this
> > > resolved so quickly.
> >
> > We have to expect that stable annotations will be missed for some
> > patches.  No one is perfect.  The patch author could help a lot by
> > making sure the commit gets into the correct stable branches, because
> > they are the ones with the most awareness of the situation.
> >
>
> I agree 100%.
>
> I'm just trying to think of a more robust solution than manually mailing
> mesa-stable. The signal to noise ratio of that list is pretty bad. I'm
> wondering if after the fdo migration to gitlab it would be better to use
> pull
> requests for these kind of nominations.
>

A pull request is something I'm probably going to use for missed patches.
AMD has its own stable branches mirroring Mesa stable branches for our
official driver releases, and we have some fixes there that were not
applied to upstream stable branches for various reasons. So we can easily
see the diff between upstream and internal.

Marek
___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/mesa-dev


Re: [Mesa-dev] [Mesa-stable] [PATCH] glsl: change ast_type_qualifier bitset size to work around GCC 5.4 bug

2018-05-08 Thread Ian Romanick
On 05/08/2018 07:43 AM, Brian Paul wrote:
> Change the size of the bitset from 128 bits to 96.  This works around an
> apparent GCC 5.4 bug in which bad SSE code is generated, leading to a
> crash in ast_type_qualifier::validate_in_qualifier() (ast_type.cpp:654).

Is there a difference in the code quality on newer versions of GCC?
This work-around seems fine, but I wonder if it should be restricted to
the affected GCC versions.

> This can be repro'd with the Piglit test tests/spec/glsl-1.50/execution/
> varying-struct-basic-gs-fs.shader_test
> 
> Bugzilla:https://bugs.freedesktop.org/show_bug.cgi?id=105497
> Cc: mesa-sta...@lists.freedesktop.org
> ---
>  src/compiler/glsl/ast.h | 8 +++-
>  1 file changed, 7 insertions(+), 1 deletion(-)
> 
> diff --git a/src/compiler/glsl/ast.h b/src/compiler/glsl/ast.h
> index a1ec0d5..9b88ff5 100644
> --- a/src/compiler/glsl/ast.h
> +++ b/src/compiler/glsl/ast.h
> @@ -474,7 +474,13 @@ enum {
>  
>  struct ast_type_qualifier {
> DECLARE_RALLOC_CXX_OPERATORS(ast_type_qualifier);
> -   DECLARE_BITSET_T(bitset_t, 128);
> +   /* Note: this bitset needs to have at least as many bits as the 'q'
> +* struct has flags, below.  Previously, the size was 128 instead of 96.
> +* But an apparent bug in GCC 5.4.0 causes bad SSE code generation
> +* elsewhere, leading to a crash.  96 bits works around the issue.
> +* See https://bugs.freedesktop.org/show_bug.cgi?id=105497
> +*/
> +   DECLARE_BITSET_T(bitset_t, 96);
>  
> union flags {
>struct {
___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/mesa-dev


Re: [Mesa-dev] [PATCH 05/10] i965: Introduce a "memory zone" concept on BO allocation.

2018-05-08 Thread Jordan Justen
Reviewed-by: Jordan Justen 

On 2018-05-03 18:12:35, Kenneth Graunke wrote:
> We're planning to start managing the PPGTT in userspace in the near
> future, rather than relying on the kernel to assign addresses.  While
> most buffers can go anywhere, some need to be restricted to within 4GB
> of a base address.
> 
> This commit adds a "memory zone" parameter to the BO allocation
> functions, which lets the caller specify which base address the BO will
> be associated with, or BRW_MEMZONE_OTHER for the full 48-bit VMA.
> ---
>  src/mesa/drivers/dri/i965/brw_blorp.c |  3 +-
>  src/mesa/drivers/dri/i965/brw_bufmgr.c| 20 ++
>  src/mesa/drivers/dri/i965/brw_bufmgr.h| 40 ++-
>  src/mesa/drivers/dri/i965/brw_context.h   |  1 +
>  .../drivers/dri/i965/brw_performance_query.c  |  5 ++-
>  src/mesa/drivers/dri/i965/brw_pipe_control.c  |  3 +-
>  src/mesa/drivers/dri/i965/brw_program.c   |  8 ++--
>  src/mesa/drivers/dri/i965/brw_program_cache.c |  6 ++-
>  src/mesa/drivers/dri/i965/brw_queryobj.c  |  8 ++--
>  src/mesa/drivers/dri/i965/gen6_queryobj.c |  3 +-
>  src/mesa/drivers/dri/i965/gen6_sol.c  |  6 ++-
>  src/mesa/drivers/dri/i965/intel_batchbuffer.c | 15 ---
>  .../drivers/dri/i965/intel_buffer_objects.c   |  8 ++--
>  src/mesa/drivers/dri/i965/intel_mipmap_tree.c |  7 +++-
>  src/mesa/drivers/dri/i965/intel_screen.c  |  8 ++--
>  src/mesa/drivers/dri/i965/intel_upload.c  |  3 +-
>  16 files changed, 107 insertions(+), 37 deletions(-)
> 
> For what it's worth, I have a prototype that has separate memzones
> for Instruction Base Address, Surface State Base Address, Dynamic State
> Base Address, and "other" for the rest of the VMA.  It's worked out very
> nicely, so I feel pretty confident that this is a good approach.
> 
> diff --git a/src/mesa/drivers/dri/i965/brw_blorp.c 
> b/src/mesa/drivers/dri/i965/brw_blorp.c
> index ba14136edc6..44394be0781 100644
> --- a/src/mesa/drivers/dri/i965/brw_blorp.c
> +++ b/src/mesa/drivers/dri/i965/brw_blorp.c
> @@ -817,7 +817,8 @@ blorp_get_client_bo(struct brw_context *brw,
> * data which we need to copy into a BO.
> */
>struct brw_bo *bo =
> - brw_bo_alloc(brw->bufmgr, "tmp_tex_subimage_src", size);
> + brw_bo_alloc(brw->bufmgr, "tmp_tex_subimage_src", size,
> +  BRW_MEMZONE_OTHER);
>if (bo == NULL) {
>   perf_debug("intel_texsubimage: temp bo creation failed: size = 
> %u\n",
>  size);
> diff --git a/src/mesa/drivers/dri/i965/brw_bufmgr.c 
> b/src/mesa/drivers/dri/i965/brw_bufmgr.c
> index 66f30a1637f..66828f319be 100644
> --- a/src/mesa/drivers/dri/i965/brw_bufmgr.c
> +++ b/src/mesa/drivers/dri/i965/brw_bufmgr.c
> @@ -268,6 +268,7 @@ static struct brw_bo *
>  bo_alloc_internal(struct brw_bufmgr *bufmgr,
>const char *name,
>uint64_t size,
> +  enum brw_memory_zone memzone,
>unsigned flags,
>uint32_t tiling_mode,
>uint32_t stride)
> @@ -426,23 +427,27 @@ err:
>  
>  struct brw_bo *
>  brw_bo_alloc(struct brw_bufmgr *bufmgr,
> - const char *name, uint64_t size)
> + const char *name, uint64_t size,
> + enum brw_memory_zone memzone)
>  {
> -   return bo_alloc_internal(bufmgr, name, size, 0, I915_TILING_NONE, 0);
> +   return bo_alloc_internal(bufmgr, name, size, memzone,
> +0, I915_TILING_NONE, 0);
>  }
>  
>  struct brw_bo *
>  brw_bo_alloc_tiled(struct brw_bufmgr *bufmgr, const char *name,
> -   uint64_t size, uint32_t tiling_mode, uint32_t pitch,
> +   uint64_t size, enum brw_memory_zone memzone,
> +   uint32_t tiling_mode, uint32_t pitch,
> unsigned flags)
>  {
> -   return bo_alloc_internal(bufmgr, name, size, flags, tiling_mode, pitch);
> +   return bo_alloc_internal(bufmgr, name, size, memzone,
> +flags, tiling_mode, pitch);
>  }
>  
>  struct brw_bo *
>  brw_bo_alloc_tiled_2d(struct brw_bufmgr *bufmgr, const char *name,
> -  int x, int y, int cpp, uint32_t tiling,
> -  uint32_t *pitch, unsigned flags)
> +  int x, int y, int cpp, enum brw_memory_zone memzone,
> +  uint32_t tiling, uint32_t *pitch, unsigned flags)
>  {
> uint64_t size;
> uint32_t stride;
> @@ -477,7 +482,8 @@ brw_bo_alloc_tiled_2d(struct brw_bufmgr *bufmgr, const 
> char *name,
> if (tiling == I915_TILING_NONE)
>stride = 0;
>  
> -   return bo_alloc_internal(bufmgr, name, size, flags, tiling, stride);
> +   return bo_alloc_internal(bufmgr, name, size, flags,
> +memzone, tiling, stride);
>  }
>  
>  /**
> diff --git a/src/mesa/drivers/dri/i965/brw_bufmgr.h 
> b/src/mesa/drivers/dri/i965/brw_bufmgr.h
> index 68f5e0c2c85..9ad129744b2 

Re: [Mesa-dev] [Mesa-stable] [PATCH] glsl: change ast_type_qualifier bitset size to work around GCC 5.4 bug

2018-05-08 Thread Ian Romanick
On 05/08/2018 12:13 PM, Ian Romanick wrote:
> On 05/08/2018 07:43 AM, Brian Paul wrote:
>> Change the size of the bitset from 128 bits to 96.  This works around an
>> apparent GCC 5.4 bug in which bad SSE code is generated, leading to a
>> crash in ast_type_qualifier::validate_in_qualifier() (ast_type.cpp:654).
> 
> Is there a difference in the code quality on newer versions of GCC?
> This work-around seems fine, but I wonder if it should be restricted to
> the affected GCC versions.

Now that I've read the rest of the thread,

Reviewed-by: Ian Romanick 

>> This can be repro'd with the Piglit test tests/spec/glsl-1.50/execution/
>> varying-struct-basic-gs-fs.shader_test
>>
>> Bugzilla:https://bugs.freedesktop.org/show_bug.cgi?id=105497
>> Cc: mesa-sta...@lists.freedesktop.org
>> ---
>>  src/compiler/glsl/ast.h | 8 +++-
>>  1 file changed, 7 insertions(+), 1 deletion(-)
>>
>> diff --git a/src/compiler/glsl/ast.h b/src/compiler/glsl/ast.h
>> index a1ec0d5..9b88ff5 100644
>> --- a/src/compiler/glsl/ast.h
>> +++ b/src/compiler/glsl/ast.h
>> @@ -474,7 +474,13 @@ enum {
>>  
>>  struct ast_type_qualifier {
>> DECLARE_RALLOC_CXX_OPERATORS(ast_type_qualifier);
>> -   DECLARE_BITSET_T(bitset_t, 128);
>> +   /* Note: this bitset needs to have at least as many bits as the 'q'
>> +* struct has flags, below.  Previously, the size was 128 instead of 96.
>> +* But an apparent bug in GCC 5.4.0 causes bad SSE code generation
>> +* elsewhere, leading to a crash.  96 bits works around the issue.
>> +* See https://bugs.freedesktop.org/show_bug.cgi?id=105497
>> +*/
>> +   DECLARE_BITSET_T(bitset_t, 96);
>>  
>> union flags {
>>struct {
> ___
> mesa-dev mailing list
> mesa-dev@lists.freedesktop.org
> https://lists.freedesktop.org/mailman/listinfo/mesa-dev
___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/mesa-dev


Re: [Mesa-dev] [PATCH 3/5] st/mesa: add support for ARB_sample_locations

2018-05-08 Thread Ian Romanick
On 05/04/2018 05:09 AM, Rhys Perry wrote:
> Signed-off-by: Rhys Perry 
> ---
>  src/mesa/state_tracker/st_atom_framebuffer.c | 64 
> 
>  src/mesa/state_tracker/st_cb_msaa.c  | 22 ++
>  src/mesa/state_tracker/st_extensions.c   |  1 +
>  3 files changed, 87 insertions(+)
> 
> diff --git a/src/mesa/state_tracker/st_atom_framebuffer.c 
> b/src/mesa/state_tracker/st_atom_framebuffer.c
> index 3ef3ff34a9..bb5f02125f 100644
> --- a/src/mesa/state_tracker/st_atom_framebuffer.c
> +++ b/src/mesa/state_tracker/st_atom_framebuffer.c
> @@ -102,6 +102,68 @@ framebuffer_quantize_num_samples(struct st_context *st, 
> unsigned num_samples)
> return quantized_samples;
>  }
>  
> +/**
> + * Update the pipe_context's sample location state
> + */
> +static void
> +update_sample_locations(struct st_context *st,
> +const struct pipe_framebuffer_state *fb_state)
> +{
> +   struct pipe_sample_locations_state locations;
> +   struct gl_framebuffer *fb = st->ctx->DrawBuffer;
> +
> +   if (!st->ctx->Extensions.ARB_sample_locations)
> +  return;
> +
> +   locations.enabled = fb->ProgrammableSampleLocations;
> +   if (locations.enabled) {
> +  unsigned grid_width, grid_height;
> +  int samples = _mesa_geometric_samples(fb);
> +  int pixel, sample_index;
> +  bool sample_location_pixel_grid = fb->SampleLocationPixelGrid;
> +
> +  st->pipe->get_sample_pixel_grid(st->pipe, samples, &grid_width, 
> &grid_height);
> +
> +  /**

Since Brian picked some nits...  Don't use /** inside functions.  It's a
marker for Doxygen that has no meaning here.  Put /* and the first words
of the comment on the same line.

> +   * when a dimension is greater than MAX_SAMPLE_LOCATION_GRID_SIZE,
> +   * st->ctx->Driver.GetSamplePixelGrid() returns 1 for both dimensions.
> +   */
> +  if (grid_width>MAX_SAMPLE_LOCATION_GRID_SIZE ||
> +  grid_height>MAX_SAMPLE_LOCATION_GRID_SIZE)
> + sample_location_pixel_grid = false;
> +
> +  for (pixel = 0; pixel < grid_width * grid_height; pixel++) {
> + for (sample_index = 0; sample_index < samples; sample_index++) {
> +int table_index = sample_index;
> +float x = 0.5f, y = 0.5f;
> +uint8_t loc;
> +if (sample_location_pixel_grid)
> +   table_index = pixel * samples + sample_index;
> +if (fb->SampleLocationTable) {
> +   x = fb->SampleLocationTable[table_index*2];
> +   y = fb->SampleLocationTable[table_index*2+1];
> +}
> +if (st->state.fb_orientation == Y_0_BOTTOM)
> +   y = 1.0 - y;
> +
> +loc = roundf(CLAMP(x*16.0f, 0.0f, 15.0f));
> +loc |= (int)roundf(CLAMP(y*16.0f, 0.0f, 15.0f)) << 4;
> +locations.locations[pixel*samples+sample_index] = loc;
> + }
> +  }
> +
> +  util_sample_locations_flip_y(st->pipe, &locations, fb_state);
> +   } else {
> +  /**
> +   * util_sample_locations_flip_y() initializes unused data to 0x88, so
> +   * this memset is not useful when locations.enabled is true.
> +   */
> +  memset(locations.locations, 0x88, sizeof(locations.locations));
> +   }
> +
> +   cso_set_sample_locations(st->cso_context, &locations);
> +}
> +
>  /**
>   * Update framebuffer state (color, depth, stencil, etc. buffers)
>   */
> @@ -209,4 +271,6 @@ st_update_framebuffer_state( struct st_context *st )
> st->state.fb_num_samples = util_framebuffer_get_num_samples(&framebuffer);
> st->state.fb_num_layers = util_framebuffer_get_num_layers(&framebuffer);
> st->state.fb_num_cb = framebuffer.nr_cbufs;
> +
> +   update_sample_locations(st, &framebuffer);
>  }
> diff --git a/src/mesa/state_tracker/st_cb_msaa.c 
> b/src/mesa/state_tracker/st_cb_msaa.c
> index 7f1b4fde91..092e74d28e 100644
> --- a/src/mesa/state_tracker/st_cb_msaa.c
> +++ b/src/mesa/state_tracker/st_cb_msaa.c
> @@ -56,8 +56,30 @@ st_GetSamplePosition(struct gl_context *ctx,
>  }
>  
>  
> +static void
> +st_GetProgrammableSampleCaps(struct gl_context *ctx, struct gl_framebuffer 
> *fb,
> + GLuint *outBits, GLuint *outWidth, GLuint 
> *outHeight)
> +{
> +   struct st_context *st = st_context(ctx);
> +
> +   st_validate_state(st, ST_PIPELINE_UPDATE_FRAMEBUFFER);
> +
> +   if (st->pipe->get_sample_pixel_grid)
> +  st->pipe->get_sample_pixel_grid(st->pipe, _mesa_geometric_samples(fb),
> +  outWidth, outHeight);
> +   *outBits = 4;
> +
> +   /* We could handle this better in some circumstances,
> +* but it's not really an issue */
> +   if (*outWidth>MAX_SAMPLE_LOCATION_GRID_SIZE || 
> *outHeight>MAX_SAMPLE_LOCATION_GRID_SIZE) {
> +  *outWidth = 1;
> +  *outHeight = 1;
> +   }
> +}
> +
>  void
>  st_init_msaa_functions(struct dd_function_table *functions)
>  {
> functions->GetSamplePosition = st_GetSamplePosition;
> +   functions->Ge

[Mesa-dev] [PATCH] glx/dri: Take an extra reference on our own GLX drawables

2018-05-08 Thread Adam Jackson
dri*_bind_context, when switching current drawables, will drop the
reference on the old one; since that refcount has probably now gone to
zero that means we lose all the state we applied to that drawable
before, like when swaps are expected to complete.

Dropping this reference might make some sense for drawables that aren't
_ours_, since we don't get events for destroyed resources and need to
rely on the server throwing errors when we name a no-longer-valid
drawable. But if the resource is one that this client created, we can be
reasonably sure that it will be explicitly destroyed by the same client
- and if not, the client is likely to exit anyway, so the memory leak
doesn't matter.

So, bump the refcnt if the XID of the drawable indicates that it's one
of ours. This is, admittedly, a hack. The proper solution would involve
rather more surgery to the MakeCurrent path than I can type quickly, let
alone test promptly against a wide enough range of servers and DRIs to
have any confidence in. I'll work on the real solution, but in the
meantime this is effectively not a memory leak for any real scenario,
and fixes a real bug.

Signed-off-by: Adam Jackson 
Cc: Michel Dänzer 
Cc: Mike Lothian 
Cc: Mario Kleiner 
Cc: Tobias Klausmann 
Bugzilla: https://bugs.freedesktop.org/show_bug.cgi?id=106351
Bugzilla: https://bugs.freedesktop.org/show_bug.cgi?id=106372
---
 src/glx/dri_common.c | 5 -
 1 file changed, 4 insertions(+), 1 deletion(-)

diff --git a/src/glx/dri_common.c b/src/glx/dri_common.c
index ab5d6c5bc0..d42ca71124 100644
--- a/src/glx/dri_common.c
+++ b/src/glx/dri_common.c
@@ -411,7 +411,8 @@ driInferDrawableConfig(struct glx_screen *psc, GLXDrawable 
draw)
 _X_HIDDEN __GLXDRIdrawable *
 driFetchDrawable(struct glx_context *gc, GLXDrawable glxDrawable)
 {
-   struct glx_display *const priv = __glXInitialize(gc->psc->dpy);
+   Display *dpy = gc->psc->dpy;
+   struct glx_display *const priv = __glXInitialize(dpy);
__GLXDRIdrawable *pdraw;
struct glx_screen *psc;
struct glx_config *config = gc->config;
@@ -449,6 +450,8 @@ driFetchDrawable(struct glx_context *gc, GLXDrawable 
glxDrawable)
   return NULL;
}
pdraw->refcount = 1;
+   if ((glxDrawable & dpy->resource_mask) == dpy->resource_base)
+  pdraw->refcount ++;
 
return pdraw;
 }
-- 
2.17.0

___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/mesa-dev


[Mesa-dev] [PATCH 6/8] nir: Add lowering for nir_op_bitfield_reverse.

2018-05-08 Thread Eric Anholt
This is basically the same as the GLSL lowering path.
---
 src/compiler/nir/nir.h   |  2 ++
 src/compiler/nir/nir_lower_alu.c | 47 +++-
 2 files changed, 48 insertions(+), 1 deletion(-)

diff --git a/src/compiler/nir/nir.h b/src/compiler/nir/nir.h
index 5586a38c83a3..5b29645a6c48 100644
--- a/src/compiler/nir/nir.h
+++ b/src/compiler/nir/nir.h
@@ -1887,6 +1887,8 @@ typedef struct nir_shader_compiler_options {
bool lower_bitfield_insert;
/** Lowers bitfield_insert to bfm, compares, and shifts. */
bool lower_bitfield_insert_to_shifts;
+   /** Lowers bitfield_reverse to shifts. */
+   bool lower_bitfield_reverse;
/** Lowers bfm to shifts and subtracts. */
bool lower_bfm;
/** Lowers ifind_msb to compare and ufind_msb */
diff --git a/src/compiler/nir/nir_lower_alu.c b/src/compiler/nir/nir_lower_alu.c
index 28ecaf6badce..ff977f016961 100644
--- a/src/compiler/nir/nir_lower_alu.c
+++ b/src/compiler/nir/nir_lower_alu.c
@@ -50,6 +50,50 @@ lower_alu_instr(nir_alu_instr *instr, nir_builder *b)
b->exact = instr->exact;
 
switch (instr->op) {
+   case nir_op_bitfield_reverse:
+  if (b->shader->options->lower_bitfield_reverse) {
+ /* For more details, see:
+  *
+  * http://graphics.stanford.edu/~seander/bithacks.html#ReverseParallel
+  */
+ nir_ssa_def *c1 = nir_imm_int(b, 1);
+ nir_ssa_def *c2 = nir_imm_int(b, 2);
+ nir_ssa_def *c4 = nir_imm_int(b, 4);
+ nir_ssa_def *c8 = nir_imm_int(b, 8);
+ nir_ssa_def *c16 = nir_imm_int(b, 16);
+ nir_ssa_def *c = nir_imm_int(b, 0x);
+ nir_ssa_def *c = nir_imm_int(b, 0x);
+ nir_ssa_def *c0f0f0f0f = nir_imm_int(b, 0x0f0f0f0f);
+ nir_ssa_def *c00ff00ff = nir_imm_int(b, 0x00ff00ff);
+
+ lowered = nir_ssa_for_alu_src(b, instr, 0);
+
+ /* Swap odd and even bits. */
+ lowered = nir_ior(b,
+   nir_iand(b, nir_ushr(b, lowered, c1), c),
+   nir_ishl(b, nir_iand(b, lowered, c), c1));
+
+ /* Swap consecutive pairs. */
+ lowered = nir_ior(b,
+   nir_iand(b, nir_ushr(b, lowered, c2), c),
+   nir_ishl(b, nir_iand(b, lowered, c), c2));
+
+ /* Swap nibbles. */
+ lowered = nir_ior(b,
+   nir_iand(b, nir_ushr(b, lowered, c4), c0f0f0f0f),
+   nir_ishl(b, nir_iand(b, lowered, c0f0f0f0f), c4));
+
+ /* Swap bytes. */
+ lowered = nir_ior(b,
+   nir_iand(b, nir_ushr(b, lowered, c8), c00ff00ff),
+   nir_ishl(b, nir_iand(b, lowered, c00ff00ff), c8));
+
+ lowered = nir_ior(b,
+   nir_ushr(b, lowered, c16),
+   nir_ishl(b, lowered, c16));
+  }
+  break;
+
case nir_op_imul_high:
case nir_op_umul_high:
   if (b->shader->options->lower_mul_high) {
@@ -136,7 +180,8 @@ nir_lower_alu(nir_shader *shader)
 {
bool progress = false;
 
-   if (!shader->options->lower_mul_high)
+   if (!shader->options->lower_bitfield_reverse &&
+   !shader->options->lower_mul_high)
   return false;
 
nir_foreach_function(function, shader) {
-- 
2.17.0

___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/mesa-dev


[Mesa-dev] [PATCH 7/8] nir: Add lowering for nir_op_bit_count.

2018-05-08 Thread Eric Anholt
This is basically the same as the GLSL lowering path.
---
 src/compiler/nir/nir.h   |  2 ++
 src/compiler/nir/nir_lower_alu.c | 36 
 2 files changed, 38 insertions(+)

diff --git a/src/compiler/nir/nir.h b/src/compiler/nir/nir.h
index 5b29645a6c48..e424a01c8225 100644
--- a/src/compiler/nir/nir.h
+++ b/src/compiler/nir/nir.h
@@ -1889,6 +1889,8 @@ typedef struct nir_shader_compiler_options {
bool lower_bitfield_insert_to_shifts;
/** Lowers bitfield_reverse to shifts. */
bool lower_bitfield_reverse;
+   /** Lowers bit_count to shifts. */
+   bool lower_bit_count;
/** Lowers bfm to shifts and subtracts. */
bool lower_bfm;
/** Lowers ifind_msb to compare and ufind_msb */
diff --git a/src/compiler/nir/nir_lower_alu.c b/src/compiler/nir/nir_lower_alu.c
index ff977f016961..4b145db7c8c6 100644
--- a/src/compiler/nir/nir_lower_alu.c
+++ b/src/compiler/nir/nir_lower_alu.c
@@ -94,6 +94,42 @@ lower_alu_instr(nir_alu_instr *instr, nir_builder *b)
   }
   break;
 
+   case nir_op_bit_count:
+  if (b->shader->options->lower_bit_count) {
+ /* For more details, see:
+  *
+  * 
http://graphics.stanford.edu/~seander/bithacks.html#CountBitsSetPaallel
+  */
+ nir_ssa_def *c1 = nir_imm_int(b, 1);
+ nir_ssa_def *c2 = nir_imm_int(b, 2);
+ nir_ssa_def *c4 = nir_imm_int(b, 4);
+ nir_ssa_def *c24 = nir_imm_int(b, 24);
+ nir_ssa_def *c = nir_imm_int(b, 0x);
+ nir_ssa_def *c = nir_imm_int(b, 0x);
+ nir_ssa_def *c0f0f0f0f = nir_imm_int(b, 0x0f0f0f0f);
+ nir_ssa_def *c01010101 = nir_imm_int(b, 0x01010101);
+
+ lowered = nir_ssa_for_alu_src(b, instr, 0);
+
+ lowered = nir_isub(b, lowered,
+nir_iand(b, nir_ushr(b, lowered, c1), c));
+
+ lowered = nir_iadd(b,
+nir_iand(b, lowered, c),
+nir_iand(b, nir_ushr(b, lowered, c2), c));
+
+ lowered = nir_ushr(b,
+nir_imul(b,
+ nir_iand(b,
+  nir_iadd(b,
+   lowered,
+   nir_ushr(b, lowered, 
c4)),
+  c0f0f0f0f),
+ c01010101),
+c24);
+  }
+  break;
+
case nir_op_imul_high:
case nir_op_umul_high:
   if (b->shader->options->lower_mul_high) {
-- 
2.17.0

___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/mesa-dev


[Mesa-dev] [PATCH 2/8] nir: Add lowering from ibitfield_extract/ubitfield_extract to shifts.

2018-05-08 Thread Eric Anholt
V3D doesn't have opcodes for ibfe/ubfe, so we need to lower similarly to
glsl/lower_instructions.cpp.
---
 src/compiler/nir/nir.h|  3 +++
 src/compiler/nir/nir_opt_algebraic.py | 16 
 2 files changed, 19 insertions(+)

diff --git a/src/compiler/nir/nir.h b/src/compiler/nir/nir.h
index ed95dbf955d8..ee1d59ffe7cd 100644
--- a/src/compiler/nir/nir.h
+++ b/src/compiler/nir/nir.h
@@ -1879,7 +1879,10 @@ typedef struct nir_shader_compiler_options {
bool lower_fsqrt;
bool lower_fmod32;
bool lower_fmod64;
+   /** Lowers ibitfield_extract/ubitfield_extract to ibfe/ubfe. */
bool lower_bitfield_extract;
+   /** Lowers ibitfield_extract/ubitfield_extract to bfm, compares, shifts. */
+   bool lower_bitfield_extract_to_shifts;
/** Lowers bitfield_insert to bfi/bfm */
bool lower_bitfield_insert;
/** Lowers bitfield_insert to bfm, compares, and shifts. */
diff --git a/src/compiler/nir/nir_opt_algebraic.py 
b/src/compiler/nir/nir_opt_algebraic.py
index 2824dcebb81b..cb0ea5549169 100644
--- a/src/compiler/nir/nir_opt_algebraic.py
+++ b/src/compiler/nir/nir_opt_algebraic.py
@@ -516,6 +516,22 @@ optimizations = [
   ('ubfe', 'value', 'offset', 'bits')),
 'options->lower_bitfield_extract'),
 
+   (('ibitfield_extract', 'value', 'offset', 'bits'),
+('bcsel', ('ieq', 0, 'bits'),
+ 0,
+ ('ishr',
+   ('ishl', 'value', ('isub', ('isub', 32, 'bits'), 'offset')),
+   ('isub', 32, 'bits'))),
+'options->lower_bitfield_extract_to_shifts'),
+
+   (('ubitfield_extract', 'value', 'offset', 'bits'),
+('iand',
+ ('ushr', 'value', 'offset'),
+ ('bcsel', ('ieq', 'bits', 32),
+  0x,
+  ('bfm', 'bits', 0))),
+'options->lower_bitfield_extract_to_shifts'),
+
(('extract_i8', a, 'b@32'),
 ('ishr', ('ishl', a, ('imul', ('isub', 3, b), 8)), 24),
 'options->lower_extract_byte'),
-- 
2.17.0

___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/mesa-dev


[Mesa-dev] [PATCH 5/8] nir: Add an ALU lowering pass for mul_high.

2018-05-08 Thread Eric Anholt
This is based on the glsl/lower_instructions.cpp implementation, but
should be much more readable.
---
 src/compiler/Makefile.sources |   1 +
 src/compiler/nir/meson.build  |   1 +
 src/compiler/nir/nir.h|   3 +
 src/compiler/nir/nir_lower_alu.c  | 165 ++
 src/mesa/state_tracker/st_glsl_to_nir.cpp |   1 +
 5 files changed, 171 insertions(+)
 create mode 100644 src/compiler/nir/nir_lower_alu.c

diff --git a/src/compiler/Makefile.sources b/src/compiler/Makefile.sources
index 80a61e507b27..248c71df6fab 100644
--- a/src/compiler/Makefile.sources
+++ b/src/compiler/Makefile.sources
@@ -213,6 +213,7 @@ NIR_FILES = \
nir/nir_loop_analyze.c \
nir/nir_loop_analyze.h \
nir/nir_lower_alpha_test.c \
+   nir/nir_lower_alu.c \
nir/nir_lower_alu_to_scalar.c \
nir/nir_lower_atomics_to_ssbo.c \
nir/nir_lower_bitmap.c \
diff --git a/src/compiler/nir/meson.build b/src/compiler/nir/meson.build
index f5a4affc8935..e0154d7960b6 100644
--- a/src/compiler/nir/meson.build
+++ b/src/compiler/nir/meson.build
@@ -103,6 +103,7 @@ files_libnir = files(
   'nir_liveness.c',
   'nir_loop_analyze.c',
   'nir_loop_analyze.h',
+  'nir_lower_alu.c',
   'nir_lower_alu_to_scalar.c',
   'nir_lower_alpha_test.c',
   'nir_lower_atomics_to_ssbo.c',
diff --git a/src/compiler/nir/nir.h b/src/compiler/nir/nir.h
index 53ac1598dfc9..5586a38c83a3 100644
--- a/src/compiler/nir/nir.h
+++ b/src/compiler/nir/nir.h
@@ -1895,6 +1895,8 @@ typedef struct nir_shader_compiler_options {
bool lower_find_lsb;
bool lower_uadd_carry;
bool lower_usub_borrow;
+   /** Lowers imul_high/umul_high to 16-bit multiplies and carry operations. */
+   bool lower_mul_high;
/** lowers fneg and ineg to fsub and isub. */
bool lower_negate;
/** lowers fsub and isub to fadd+fneg and iadd+ineg. */
@@ -2601,6 +2603,7 @@ bool nir_move_vec_src_uses_to_dest(nir_shader *shader);
 bool nir_lower_vec_to_movs(nir_shader *shader);
 void nir_lower_alpha_test(nir_shader *shader, enum compare_func func,
   bool alpha_to_one);
+bool nir_lower_alu(nir_shader *shader);
 bool nir_lower_alu_to_scalar(nir_shader *shader);
 bool nir_lower_load_const_to_scalar(nir_shader *shader);
 bool nir_lower_read_invocation_to_scalar(nir_shader *shader);
diff --git a/src/compiler/nir/nir_lower_alu.c b/src/compiler/nir/nir_lower_alu.c
new file mode 100644
index ..28ecaf6badce
--- /dev/null
+++ b/src/compiler/nir/nir_lower_alu.c
@@ -0,0 +1,165 @@
+/*
+ * Copyright © 2010 Intel Corporation
+ * Copyright © 2018 Broadcom
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice (including the next
+ * paragraph) shall be included in all copies or substantial portions of the
+ * Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
+ * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
+ * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
+ * DEALINGS IN THE SOFTWARE.
+ */
+
+#include "nir.h"
+#include "nir_builder.h"
+
+/** nir_lower_alu.c
+ *
+ * NIR's home for miscellaneous ALU operation lowering implementations.
+ *
+ * Most NIR ALU lowering occurs in nir_opt_algebraic.py, since it's generally
+ * easy to write them there.  However, if terms appear multiple times in the
+ * lowered code, it can get very verbose and cause a lot of work for CSE, so
+ * it may end up being easier to write out in C code.
+ *
+ * The shader must be in SSA for this pass.
+ */
+
+#define LOWER_MUL_HIGH (1 << 0)
+
+static bool
+lower_alu_instr(nir_alu_instr *instr, nir_builder *b)
+{
+   nir_ssa_def *lowered = NULL;
+
+   assert(instr->dest.dest.is_ssa);
+
+   b->cursor = nir_before_instr(&instr->instr);
+   b->exact = instr->exact;
+
+   switch (instr->op) {
+   case nir_op_imul_high:
+   case nir_op_umul_high:
+  if (b->shader->options->lower_mul_high) {
+ nir_ssa_def *c1 = nir_imm_int(b, 1);
+ nir_ssa_def *c16 = nir_imm_int(b, 16);
+
+ nir_ssa_def *src0 = nir_ssa_for_alu_src(b, instr, 0);
+ nir_ssa_def *src1 = nir_ssa_for_alu_src(b, instr, 1);
+ nir_ssa_def *different_signs = NULL;
+ if (instr->op == nir_op_imul_high) {
+nir_ssa_def *c0

[Mesa-dev] [PATCH 1/8] nir: Add lowering for bitfieldInsert without using bfi.

2018-05-08 Thread Eric Anholt
If you don't have HW to do bfi, then lowering bitfieldInsert to bfi makes
things harder than keeping the "bits" argument around.

This still uses bfm, but I've added the obvious lowering of bfm if you
need it.
---
 src/compiler/nir/nir.h|  5 +
 src/compiler/nir/nir_opt_algebraic.py | 14 ++
 2 files changed, 19 insertions(+)

diff --git a/src/compiler/nir/nir.h b/src/compiler/nir/nir.h
index a379928cdcd9..ed95dbf955d8 100644
--- a/src/compiler/nir/nir.h
+++ b/src/compiler/nir/nir.h
@@ -1880,7 +1880,12 @@ typedef struct nir_shader_compiler_options {
bool lower_fmod32;
bool lower_fmod64;
bool lower_bitfield_extract;
+   /** Lowers bitfield_insert to bfi/bfm */
bool lower_bitfield_insert;
+   /** Lowers bitfield_insert to bfm, compares, and shifts. */
+   bool lower_bitfield_insert_to_shifts;
+   /** Lowers bfm to shifts and subtracts. */
+   bool lower_bfm;
bool lower_uadd_carry;
bool lower_usub_borrow;
/** lowers fneg and ineg to fsub and isub. */
diff --git a/src/compiler/nir/nir_opt_algebraic.py 
b/src/compiler/nir/nir_opt_algebraic.py
index 96232f0e549c..2824dcebb81b 100644
--- a/src/compiler/nir/nir_opt_algebraic.py
+++ b/src/compiler/nir/nir_opt_algebraic.py
@@ -492,6 +492,20 @@ optimizations = [
   ('bfi', ('bfm', 'bits', 'offset'), 'insert', 'base')),
 'options->lower_bitfield_insert'),
 
+   # Alternative lowering that doesn't rely on bfi.
+   (('bitfield_insert', 'base', 'insert', 'offset', 'bits'),
+('bcsel', ('ilt', 31, 'bits'),
+ 'insert',
+ ('ior',
+  ('iand', 'base', ('inot', ('bfm', 'bits', 'offset'))),
+  ('iand', ('ishl', 'insert', 'offset'), ('bfm', 'bits', 'offset',
+'options->lower_bitfield_insert_to_shifts'),
+
+   # bfm lowering -- note that the NIR opcode is undefined if either arg is 32.
+   (('bfm', 'bits', 'offset'),
+('ishl', ('isub', ('ishl', 1, 'bits'), 1), 'offset'),
+'options->lower_bfm'),
+
(('ibitfield_extract', 'value', 'offset', 'bits'),
 ('bcsel', ('ilt', 31, 'bits'), 'value',
   ('ibfe', 'value', 'offset', 'bits')),
-- 
2.17.0

___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/mesa-dev


[Mesa-dev] [PATCH 3/8] nir: Add lowering for ifind_msb to ufind_msb.

2018-05-08 Thread Eric Anholt
ufind_msb is easily expressed in terms of clz, and we can reduce ifind_msb
to that.
---
 src/compiler/nir/nir.h| 2 ++
 src/compiler/nir/nir_opt_algebraic.py | 4 
 2 files changed, 6 insertions(+)

diff --git a/src/compiler/nir/nir.h b/src/compiler/nir/nir.h
index ee1d59ffe7cd..ee45b0709636 100644
--- a/src/compiler/nir/nir.h
+++ b/src/compiler/nir/nir.h
@@ -1889,6 +1889,8 @@ typedef struct nir_shader_compiler_options {
bool lower_bitfield_insert_to_shifts;
/** Lowers bfm to shifts and subtracts. */
bool lower_bfm;
+   /** Lowers ifind_msb to compare and ufind_msb */
+   bool lower_ifind_msb;
bool lower_uadd_carry;
bool lower_usub_borrow;
/** lowers fneg and ineg to fsub and isub. */
diff --git a/src/compiler/nir/nir_opt_algebraic.py 
b/src/compiler/nir/nir_opt_algebraic.py
index cb0ea5549169..616f734ac9b2 100644
--- a/src/compiler/nir/nir_opt_algebraic.py
+++ b/src/compiler/nir/nir_opt_algebraic.py
@@ -532,6 +532,10 @@ optimizations = [
   ('bfm', 'bits', 0))),
 'options->lower_bitfield_extract_to_shifts'),
 
+   (('ifind_msb', 'value'),
+('ufind_msb', ('bcsel', ('ilt', 'value', 0), ('inot', 'value'), 'value')),
+'options->lower_ifind_msb'),
+
(('extract_i8', a, 'b@32'),
 ('ishr', ('ishl', a, ('imul', ('isub', 3, b), 8)), 24),
 'options->lower_extract_byte'),
-- 
2.17.0

___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/mesa-dev


[Mesa-dev] [PATCH 8/8] v3d: Enable the new NIR bitfield operation lowering paths.

2018-05-08 Thread Eric Anholt
These toegether get the GLSL 3.00 unpack functions and MESA_shader_integer
operations working.
---
 src/broadcom/compiler/nir_to_vir.c | 21 +++--
 1 file changed, 19 insertions(+), 2 deletions(-)

diff --git a/src/broadcom/compiler/nir_to_vir.c 
b/src/broadcom/compiler/nir_to_vir.c
index ec8f22321f3b..0f7e47689dc3 100644
--- a/src/broadcom/compiler/nir_to_vir.c
+++ b/src/broadcom/compiler/nir_to_vir.c
@@ -755,6 +755,10 @@ ntq_emit_alu(struct v3d_compile *c, nir_alu_instr *instr)
 result = vir_NOT(c, src[0]);
 break;
 
+case nir_op_ufind_msb:
+result = vir_SUB(c, vir_uniform_ui(c, 31), vir_CLZ(c, src[0]));
+break;
+
 case nir_op_imul:
 result = vir_UMUL(c, src[0], src[1]);
 break;
@@ -853,6 +857,13 @@ ntq_emit_alu(struct v3d_compile *c, nir_alu_instr *instr)
 result = vir_FDY(c, src[0]);
 break;
 
+case nir_op_uadd_carry:
+vir_PF(c, vir_ADD(c, src[0], src[1]), V3D_QPU_PF_PUSHC);
+result = vir_MOV(c, vir_SEL(c, V3D_QPU_COND_IFA,
+vir_uniform_ui(c, ~0),
+vir_uniform_ui(c, 0)));
+break;
+
 default:
 fprintf(stderr, "unknown NIR ALU inst: ");
 nir_print_instr(&instr->instr, stderr);
@@ -1894,8 +1905,11 @@ const nir_shader_compiler_options v3d_nir_options = {
 .lower_all_io_to_temps = true,
 .lower_extract_byte = true,
 .lower_extract_word = true,
-.lower_bitfield_insert = true,
-.lower_bitfield_extract = true,
+.lower_bfm = true,
+.lower_bitfield_insert_to_shifts = true,
+.lower_bitfield_extract_to_shifts = true,
+.lower_bitfield_reverse = true,
+.lower_bit_count = true,
 .lower_pack_unorm_2x16 = true,
 .lower_pack_snorm_2x16 = true,
 .lower_pack_unorm_4x8 = true,
@@ -1903,12 +1917,15 @@ const nir_shader_compiler_options v3d_nir_options = {
 .lower_unpack_unorm_4x8 = true,
 .lower_unpack_snorm_4x8 = true,
 .lower_fdiv = true,
+.lower_find_lsb = true,
 .lower_ffma = true,
 .lower_flrp32 = true,
 .lower_fpow = true,
 .lower_fsat = true,
 .lower_fsqrt = true,
+.lower_ifind_msb = true,
 .lower_ldexp = true,
+.lower_mul_high = true,
 .native_integers = true,
 };
 
-- 
2.17.0

___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/mesa-dev


[Mesa-dev] [PATCH 4/8] nir: Add lowering for find_lsb.

2018-05-08 Thread Eric Anholt
There is a fairly simple relation to turn this into ufind_msb.
---
 src/compiler/nir/nir.h| 2 ++
 src/compiler/nir/nir_opt_algebraic.py | 4 
 2 files changed, 6 insertions(+)

diff --git a/src/compiler/nir/nir.h b/src/compiler/nir/nir.h
index ee45b0709636..53ac1598dfc9 100644
--- a/src/compiler/nir/nir.h
+++ b/src/compiler/nir/nir.h
@@ -1891,6 +1891,8 @@ typedef struct nir_shader_compiler_options {
bool lower_bfm;
/** Lowers ifind_msb to compare and ufind_msb */
bool lower_ifind_msb;
+   /** Lowers find_lsb to ufind_msb and logic ops */
+   bool lower_find_lsb;
bool lower_uadd_carry;
bool lower_usub_borrow;
/** lowers fneg and ineg to fsub and isub. */
diff --git a/src/compiler/nir/nir_opt_algebraic.py 
b/src/compiler/nir/nir_opt_algebraic.py
index 616f734ac9b2..46de62eea0b5 100644
--- a/src/compiler/nir/nir_opt_algebraic.py
+++ b/src/compiler/nir/nir_opt_algebraic.py
@@ -536,6 +536,10 @@ optimizations = [
 ('ufind_msb', ('bcsel', ('ilt', 'value', 0), ('inot', 'value'), 'value')),
 'options->lower_ifind_msb'),
 
+   (('find_lsb', 'value'),
+('ufind_msb', ('iand', 'value', ('ineg', 'value'))),
+'options->lower_find_lsb'),
+
(('extract_i8', a, 'b@32'),
 ('ishr', ('ishl', a, ('imul', ('isub', 3, b), 8)), 24),
 'options->lower_extract_byte'),
-- 
2.17.0

___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/mesa-dev


Re: [Mesa-dev] [PATCH 09/10] i965: Softpin all buffers and never use relocations.

2018-05-08 Thread Kenneth Graunke
On Tuesday, May 8, 2018 11:03:38 AM PDT Scott D Phillips wrote:
> Kenneth Graunke  writes:
> 
> > On Thursday, May 3, 2018 11:51:52 PM PDT Chris Wilson wrote:
> >> Quoting Kenneth Graunke (2018-05-04 02:12:39)
> >> > ---
> >> >  src/mesa/drivers/dri/i965/brw_bufmgr.c | 2 +-
> >> >  1 file changed, 1 insertion(+), 1 deletion(-)
> >> > 
> >> > This enables it for Broadwell (with a 64-bit kernel) and Skylake+ (with
> >> > any kernel).  Unfortunately, it doesn't enable it for Cherryview as that
> >> > has a 32-bit GTT.  We could switch that over as well, but we'd have to
> >> > have a single memory zone, which is kind of a special case...
> >> 
> >> I would make it conditional on gem_param(HAS_ALIASING_PPGTT) > 1 (that
> >> is on full-ppgtt, where you don't have to care about framebuffers and
> >> GTT mmaps conflicting).
> >> -Chris
> >
> > Good idea, thanks!  Fixed for v2.
> 
> I think the ALIASING_PPGTT test is actually redundant with the
> I915_CONTEXT_PARAM_GTT_SIZE > 4 GiB test that you're already doing. So
> patch v1 is
> 
> Reviewed-by: Scott D Phillips 

I think that's probably true in practice, but checking for full PPGTT
makes sense, as that's what enables us to pre-assign addresses up front.

I may want to support softpin on Cherryview, where GTT_SIZE == 4GB.
But, I'll have to change the memory zone plan.  So since the current
memory zone plan requires > 4GB, we should check that too.

--Ken


signature.asc
Description: This is a digitally signed message part.
___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/mesa-dev


Re: [Mesa-dev] [PATCH 3/8] anv: move canonical_address calculation into a separate function

2018-05-08 Thread Jordan Justen
On 2018-05-08 01:54:13, Chris Wilson wrote:
> Quoting Scott D Phillips (2018-05-08 01:30:45)
> > A later patch will make use of this in other places. Also, remove
> > dependency on undefined behavior of left-shifting a signed value.
> 
> Can it find a home in src/intel/common/gen_gtt.h  (or gen_vma.h,
> gen_address.h)?
> -Chris

That sounds good. If you also rename it to gen_canonical_address it
will also help make the function name not so generic. (canonical
sounds generic, but it is actually addressing some specific gen
hardware trivia.)

With that:

Reviewed-by: Jordan Justen 
___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/mesa-dev


Re: [Mesa-dev] [PATCH 1/2] i965: require post sync operation prior to ISP disable

2018-05-08 Thread Kenneth Graunke
On Tuesday, May 8, 2018 10:09:29 AM PDT Lionel Landwerlin wrote:
> Invalidating the indirect state pointers might affect a previously
> scheduled & still running 3DPRIMITIVE (causing page fault). So stall
> on pixel scoreboard before that.
> 
> v2: Fix compile issue :(
> 
> v3: Stall on pixel scoreboard
> 
> Signed-off-by: Lionel Landwerlin 
> Fixes: ca19ee33d7d39 ("i965/gen10: Ignore push constant packets during 
> context restore.")
> Bugzilla: https://bugs.freedesktop.org/show_bug.cgi?id=106243
> ---
>  src/mesa/drivers/dri/i965/brw_pipe_control.c | 8 ++--
>  1 file changed, 6 insertions(+), 2 deletions(-)
> 
> diff --git a/src/mesa/drivers/dri/i965/brw_pipe_control.c 
> b/src/mesa/drivers/dri/i965/brw_pipe_control.c
> index 02278be6d62..1a32e9a8e96 100644
> --- a/src/mesa/drivers/dri/i965/brw_pipe_control.c
> +++ b/src/mesa/drivers/dri/i965/brw_pipe_control.c
> @@ -349,13 +349,17 @@ gen7_emit_vs_workaround_flush(struct brw_context *brw)
>   * context restore, so the mentioned hang doesn't happen. However,
>   * software must program push constant commands for all stages prior to
>   * rendering anything, so we flag them as dirty.
> + *
> + * Finally, we also make sure to stall at pixel scoreboard to make sure the
> + * constants have been loaded into the EUs prior to disable the push 
> constants
> + * so that it doesn't hang a previous 3DPRIMITIVE.
>   */
>  void
>  gen10_emit_isp_disable(struct brw_context *brw)
>  {
> +   brw_emit_end_of_pipe_sync(brw, PIPE_CONTROL_STALL_AT_SCOREBOARD);
> brw_emit_pipe_control(brw,
> - PIPE_CONTROL_ISP_DIS |
> - PIPE_CONTROL_CS_STALL,
> + PIPE_CONTROL_ISP_DIS,
>   NULL, 0, 0);
>  
> brw->vs.base.push_constants_dirty = true;
> 

Reviewed-by: Kenneth Graunke 


signature.asc
Description: This is a digitally signed message part.
___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/mesa-dev


Re: [Mesa-dev] [PATCH 2/2] anv: emit stall at pixel scoreboard before ISP disable

2018-05-08 Thread Kenneth Graunke
On Tuesday, May 8, 2018 10:09:30 AM PDT Lionel Landwerlin wrote:
> We want to make sure that all indirect state data has been loaded into
> the EUs before disable the pointers.
> 
> Signed-off-by: Lionel Landwerlin 
> Fixes: 78c125af3904c ("anv/gen10: Ignore push constant packets during context 
> restore.")
> ---
>  src/intel/vulkan/genX_cmd_buffer.c | 9 -
>  1 file changed, 8 insertions(+), 1 deletion(-)
> 
> diff --git a/src/intel/vulkan/genX_cmd_buffer.c 
> b/src/intel/vulkan/genX_cmd_buffer.c
> index 2882cf36506..526e18af108 100644
> --- a/src/intel/vulkan/genX_cmd_buffer.c
> +++ b/src/intel/vulkan/genX_cmd_buffer.c
> @@ -1420,14 +1420,21 @@ genX(BeginCommandBuffer)(
>   * context restore, so the mentioned hang doesn't happen. However,
>   * software must program push constant commands for all stages prior to
>   * rendering anything. So we flag them dirty in BeginCommandBuffer.
> + *
> + * Finally, we also make sure to stall at pixel scoreboard to make sure the
> + * constants have been loaded into the EUs prior to disable the push 
> constants
> + * so that it doesn't hang a previous 3DPRIMITIVE.
>   */
>  static void
>  emit_isp_disable(struct anv_cmd_buffer *cmd_buffer)
>  {
> anv_batch_emit(&cmd_buffer->batch, GENX(PIPE_CONTROL), pc) {
> - pc.IndirectStatePointersDisable = true;
> + pc.StallAtPixelScoreboard = true;
>   pc.CommandStreamerStallEnable = true;
> }
> +   anv_batch_emit(&cmd_buffer->batch, GENX(PIPE_CONTROL), pc) {
> + pc.IndirectStatePointersDisable = true;
> +   }
>  }
>  
>  VkResult
> 

Compared to GL, this is missing the immediate write.  Not sure if that
matters or not.  On Haswell, it's also missing a register write.


signature.asc
Description: This is a digitally signed message part.
___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/mesa-dev


Re: [Mesa-dev] [PATCH v2 4/8] anv: Add vma_heap allocators in anv_device

2018-05-08 Thread Jordan Justen
Reviewed-by: Jordan Justen 

On 2018-05-07 17:30:46, Scott D Phillips wrote:
> These will be used to assign virtual addresses to soft pinned
> buffers in a later patch.
> 
> Two allocators are added for separate 'low' and 'high' virtual
> memory areas. Another alternative would have been to add a
> double-sided allocator, which wasn't done here just because it
> didn't appear to give any code complexity advantages.
> 
> v2: - rename has_exec_softpin to use_softpin (Jason)
> - Only remove bottom one page and top 4 GiB from virt (Jason)
> - refer to comment in anv_allocator about state address + size
>   overflowing 48 bits (Jason)
> - Mention hi/lo allocators vs double-sided allocator in
>   commit message (Chris)
> - assign state pool memory ranges statically (Jason)
> ---
>  src/intel/vulkan/anv_device.c  | 77 
> ++
>  src/intel/vulkan/anv_private.h | 60 
>  2 files changed, 137 insertions(+)
> 
> diff --git a/src/intel/vulkan/anv_device.c b/src/intel/vulkan/anv_device.c
> index 374fc16c4c9..9e21818ead1 100644
> --- a/src/intel/vulkan/anv_device.c
> +++ b/src/intel/vulkan/anv_device.c
> @@ -374,6 +374,9 @@ anv_physical_device_init(struct anv_physical_device 
> *device,
>anv_gem_supports_syncobj_wait(fd);
> device->has_context_priority = anv_gem_has_context_priority(fd);
>  
> +   device->use_softpin = anv_gem_get_param(fd, I915_PARAM_HAS_EXEC_SOFTPIN)
> +  && device->supports_48bit_addresses;
> +
> bool swizzled = anv_gem_get_bit6_swizzle(fd, I915_TILING_X);
>  
> /* Starting with Gen10, the timestamp frequency of the command streamer 
> may
> @@ -1527,6 +1530,27 @@ VkResult anv_CreateDevice(
>goto fail_fd;
> }
>  
> +   if (physical_device->use_softpin) {
> +  if (pthread_mutex_init(&device->vma_mutex, NULL) != 0) {
> + result = vk_error(VK_ERROR_INITIALIZATION_FAILED);
> + goto fail_fd;
> +  }
> +
> +  /* keep the page with address zero out of the allocator */
> +  util_vma_heap_init(&device->vma_lo, LOW_HEAP_MIN_ADDRESS, 
> LOW_HEAP_SIZE);
> +  device->vma_lo_available =
> + physical_device->memory.heaps[physical_device->memory.heap_count - 
> 1].size;
> +
> +  /* Leave the last 4GiB out of the high vma range, so that no state base
> +   * address + size can overflow 48 bits. For more information see the
> +   * comment about Wa32bitGeneralStateOffset in anv_allocator.c
> +   */
> +  util_vma_heap_init(&device->vma_hi, HIGH_HEAP_MIN_ADDRESS,
> + HIGH_HEAP_SIZE);
> +  device->vma_hi_available = physical_device->memory.heap_count == 1 ? 0 
> :
> + physical_device->memory.heaps[0].size;
> +   }
> +
> /* As per spec, the driver implementation may deny requests to acquire
>  * a priority above the default priority (MEDIUM) if the caller does not
>  * have sufficient privileges. In this scenario VK_ERROR_NOT_PERMITTED_EXT
> @@ -1887,6 +1911,59 @@ VkResult anv_DeviceWaitIdle(
> return anv_device_submit_simple_batch(device, &batch);
>  }
>  
> +bool
> +anv_vma_alloc(struct anv_device *device, struct anv_bo *bo)
> +{
> +   if (!(bo->flags & EXEC_OBJECT_PINNED))
> +  return true;
> +
> +   pthread_mutex_lock(&device->vma_mutex);
> +
> +   bo->offset = 0;
> +
> +   if (bo->flags & EXEC_OBJECT_SUPPORTS_48B_ADDRESS &&
> +   device->vma_hi_available >= bo->size) {
> +  uint64_t addr = util_vma_heap_alloc(&device->vma_hi, bo->size, 4096);
> +  if (addr) {
> + bo->offset = canonical_address(addr);
> + device->vma_hi_available -= bo->size;
> +  }
> +   }
> +
> +   if (bo->offset == 0 && device->vma_lo_available >= bo->size) {
> +  uint64_t addr = util_vma_heap_alloc(&device->vma_lo, bo->size, 4096);
> +  if (addr) {
> + bo->offset = canonical_address(addr);
> + device->vma_lo_available -= bo->size;
> +  }
> +   }
> +
> +   pthread_mutex_unlock(&device->vma_mutex);
> +
> +   return bo->offset != 0;
> +}
> +
> +void
> +anv_vma_free(struct anv_device *device, struct anv_bo *bo)
> +{
> +   if (!(bo->flags & EXEC_OBJECT_PINNED))
> +  return;
> +
> +   pthread_mutex_lock(&device->vma_mutex);
> +
> +   if (bo->offset >= 1ull << 32) {
> +  util_vma_heap_free(&device->vma_hi, bo->offset, bo->size);
> +  device->vma_hi_available += bo->size;
> +   } else {
> +  util_vma_heap_free(&device->vma_lo, bo->offset, bo->size);
> +  device->vma_lo_available += bo->size;
> +   }
> +
> +   pthread_mutex_unlock(&device->vma_mutex);
> +
> +   bo->offset = 0;
> +}
> +
>  VkResult
>  anv_bo_init_new(struct anv_bo *bo, struct anv_device *device, uint64_t size)
>  {
> diff --git a/src/intel/vulkan/anv_private.h b/src/intel/vulkan/anv_private.h
> index 761601d1e37..8807fe7e5fb 100644
> --- a/src/intel/vulkan/anv_private.h
> +++ b/src/intel/vulkan/anv_private.h
> @@ -49,6 +49,7 @@
>  #include "util

Re: [Mesa-dev] [PATCH v2 2/8] util: Add a randomized test for the virtual memory allocator

2018-05-08 Thread Jason Ekstrand
I've got a bunch of comments below.  However, I think this test is
sufficient to demonstrate that the allocator works for the use-cases in
this series so it doesn't need to block the rest of the patches.  Over-all,
it looks really good.  Thanks!

On Mon, May 7, 2018 at 5:30 PM, Scott D Phillips  wrote:

> The test pseudo-randomly makes allocations and deallocations with
> the virtual memory allocator and checks that the results are
> consistent. Specifically, we test that:
>
>  * no result from the allocator overlaps an already allocated range
>  * allocated memory fulfills the stated alignment requirement
>  * a failed result from the allocator could not have been fulfilled
>  * memory freed to the allocator can later be allocated again
>
> v2: - fix if() in test() to actually run fill()
> ---
>  configure.ac   |   1 +
>  src/util/Makefile.am   |   3 +-
>  src/util/meson.build   |   1 +
>  src/util/tests/vma/Makefile.am |  37 +
>  src/util/tests/vma/meson.build |  29 
>  src/util/tests/vma/vma_random_test.cpp | 239
> +
>  6 files changed, 309 insertions(+), 1 deletion(-)
>  create mode 100644 src/util/tests/vma/Makefile.am
>  create mode 100644 src/util/tests/vma/meson.build
>  create mode 100644 src/util/tests/vma/vma_random_test.cpp
>
> diff --git a/configure.ac b/configure.ac
> index c0fbfe94135..8dee15cc305 100644
> --- a/configure.ac
> +++ b/configure.ac
> @@ -3111,6 +3111,7 @@ AC_CONFIG_FILES([Makefile
>   src/util/Makefile
>   src/util/tests/hash_table/Makefile
>   src/util/tests/string_buffer/Makefile
> + src/util/tests/vma/Makefile
>   src/util/xmlpool/Makefile
>   src/vulkan/Makefile])
>
> diff --git a/src/util/Makefile.am b/src/util/Makefile.am
> index 07bf052175b..b51dccdadfd 100644
> --- a/src/util/Makefile.am
> +++ b/src/util/Makefile.am
> @@ -22,7 +22,8 @@
>  SUBDIRS = . \
> xmlpool \
> tests/hash_table \
> -   tests/string_buffer
> +   tests/string_buffer \
> +   tests/vma
>
>  include Makefile.sources
>
> diff --git a/src/util/meson.build b/src/util/meson.build
> index 14660e0fa0c..c777984e28d 100644
> --- a/src/util/meson.build
> +++ b/src/util/meson.build
> @@ -159,4 +159,5 @@ if with_tests
>
>subdir('tests/hash_table')
>subdir('tests/string_buffer')
> +  subdir('tests/vma')
>  endif
> diff --git a/src/util/tests/vma/Makefile.am b/src/util/tests/vma/Makefile.
> am
> new file mode 100644
> index 000..1c4dd302bfa
> --- /dev/null
> +++ b/src/util/tests/vma/Makefile.am
> @@ -0,0 +1,37 @@
> +# Copyright © 2018 Intel Corporation
> +#
> +#  Permission is hereby granted, free of charge, to any person obtaining a
> +#  copy of this software and associated documentation files (the
> "Software"),
> +#  to deal in the Software without restriction, including without
> limitation
> +#  the rights to use, copy, modify, merge, publish, distribute,
> sublicense,
> +#  and/or sell copies of the Software, and to permit persons to whom the
> +#  Software is furnished to do so, subject to the following conditions:
> +#
> +#  The above copyright notice and this permission notice (including the
> next
> +#  paragraph) shall be included in all copies or substantial portions of
> the
> +#  Software.
> +#
> +#  THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
> EXPRESS OR
> +#  IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
> MERCHANTABILITY,
> +#  FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT
> SHALL
> +#  THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR
> OTHER
> +#  LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
> +#  FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
> DEALINGS
> +#  IN THE SOFTWARE.
> +
> +AM_CPPFLAGS = \
> +   -I$(top_srcdir)/include \
> +   -I$(top_srcdir)/src/util \
> +   $(DEFINES)
> +
> +TESTS = vma_random_test
> +
> +check_PROGRAMS = $(TESTS)
> +
> +vma_random_test_SOURCES = \
> +   vma_random_test.cpp
> +
> +vma_random_test_LDADD = \
> +   $(top_builddir)/src/util/libmesautil.la
> +
> +EXTRA_DIST = meson.build
> diff --git a/src/util/tests/vma/meson.build b/src/util/tests/vma/meson.
> build
> new file mode 100644
> index 000..53562db312b
> --- /dev/null
> +++ b/src/util/tests/vma/meson.build
> @@ -0,0 +1,29 @@
> +# Copyright © 2018 Intel Corporation
> +
> +# Permission is hereby granted, free of charge, to any person obtaining a
> copy
> +# of this software and associated documentation files (the "Software"),
> to deal
> +# in the Software without restriction, including without limitation the
> rights
> +# to use, copy, modify, merge, publish, distribute, sublicense, and/or
> sell
> +# copies of the Software, and to permit persons to whom the Software is
> +# furnished to do so, subject to the following 

Re: [Mesa-dev] [PATCH 04/17] i965/miptree: Initialize the indirect clear color to zero

2018-05-08 Thread Nanley Chery
On Tue, May 08, 2018 at 08:31:39AM +0300, Pohjolainen, Topi wrote:
> On Mon, May 07, 2018 at 10:11:39AM -0700, Nanley Chery wrote:
> > On Mon, May 07, 2018 at 11:30:15AM +0300, Pohjolainen, Topi wrote:
> > > On Thu, May 03, 2018 at 12:03:51PM -0700, Nanley Chery wrote:
> > > > The indirect clear color isn't correctly tracked in
> > > > intel_miptree::fast_clear_color. The initial value of ::fast_clear_color
> > > > is zero, while that of the indirect clear color is undefined or
> > > > non-zero.
> > > > 
> > > > Topi Pohjolainen discovered this issue with MCS buffers. This issue is
> > > > apparent when fast-clearing an MCS buffer for the first time with
> > > > glClearColor = {0.0,}. Although the indirect clear color is non-zero,
> > > > the initial aux state of the MCS is CLEAR and the tracked clear color is
> > > > zero, so we avoid updating the indirect clear color with {0.0,}.
> > > > 
> > > > Make the indirect clear color match the initial value of
> > > > ::fast_clear_color.
> > > > 
> > > > ---
> > > > 
> > > > Hey Topi,
> > > > 
> > > > Just FYI, this patch should fix the MCS bug you reported earlier.
> > > > 
> > > >  src/mesa/drivers/dri/i965/intel_mipmap_tree.c | 33 
> > > > ++-
> > > >  1 file changed, 22 insertions(+), 11 deletions(-)
> > > > 
> > > > diff --git a/src/mesa/drivers/dri/i965/intel_mipmap_tree.c 
> > > > b/src/mesa/drivers/dri/i965/intel_mipmap_tree.c
> > > > index 5d3ee569bd8..e70c9ff1ef4 100644
> > > > --- a/src/mesa/drivers/dri/i965/intel_mipmap_tree.c
> > > > +++ b/src/mesa/drivers/dri/i965/intel_mipmap_tree.c
> > > > @@ -978,11 +978,11 @@ create_ccs_buf_for_image(struct brw_context *brw,
> > > >  * system with CCS, we don't have the extra space at the end of the 
> > > > aux
> > > >  * buffer. So create a new bo here that will store that clear color.
> > > >  */
> > > > -   const struct gen_device_info *devinfo = &brw->screen->devinfo;
> > > > -   if (devinfo->gen >= 10) {
> > > > +   if (brw->isl_dev.ss.clear_color_state_size > 0) {
> > > >mt->aux_buf->clear_color_bo =
> > > > - brw_bo_alloc(brw->bufmgr, "clear_color_bo",
> > > > -  brw->isl_dev.ss.clear_color_state_size);
> > > > + brw_bo_alloc_tiled(brw->bufmgr, "clear_color_bo",
> > > > +brw->isl_dev.ss.clear_color_state_size,
> > > > +I915_TILING_NONE, 0, BO_ALLOC_ZEROED);
> > > >if (!mt->aux_buf->clear_color_bo) {
> > > >   free(mt->aux_buf);
> > > >   mt->aux_buf = NULL;
> > > > @@ -1673,9 +1673,9 @@ intel_alloc_aux_buffer(struct brw_context *brw,
> > > >  
> > > > buf->size = aux_surf->size;
> > > >  
> > > > -   const struct gen_device_info *devinfo = &brw->screen->devinfo;
> > > > -   if (devinfo->gen >= 10) {
> > > > -  /* On CNL, instead of setting the clear color in the 
> > > > SURFACE_STATE, we
> > > > +   const bool has_indirect_clear = 
> > > > brw->isl_dev.ss.clear_color_state_size > 0;
> > > > +   if (has_indirect_clear) {
> > > > +  /* On CNL+, instead of setting the clear color in the 
> > > > SURFACE_STATE, we
> > > > * will set a pointer to a dword somewhere that contains the 
> > > > color. So,
> > > > * allocate the space for the clear color value here on the aux 
> > > > buffer.
> > > > */
> > > > @@ -1698,7 +1698,8 @@ intel_alloc_aux_buffer(struct brw_context *brw,
> > > > }
> > > >  
> > > > /* Initialize the bo to the desired value */
> > > > -   if (wants_memset) {
> > > > +   const bool needs_memset = wants_memset || has_indirect_clear;
> > > > +   if (needs_memset) {
> > > >assert(!(alloc_flags & BO_ALLOC_BUSY));
> > > >  
> > > >void *map = brw_bo_map(brw, buf->bo, MAP_WRITE | MAP_RAW);
> > > > @@ -1706,11 +1707,21 @@ intel_alloc_aux_buffer(struct brw_context *brw,
> > > >   intel_miptree_aux_buffer_free(buf);
> > > >   return NULL;
> > > >}
> > > > -  memset(map, memset_value, mt->aux_buf->size);
> > > > +
> > > > +  /* Memset the aux_surf portion of the BO. */
> > > > +  if (wants_memset)
> > > > + memset(map, memset_value, aux_surf->size);
> > > > +
> > > > +  /* Zero the indirect clear color to match ::fast_clear_color. */
> > > > +  if (has_indirect_clear) {
> > > > + memset((char *)map + buf->clear_color_offset, 0,
> > > > +brw->isl_dev.ss.clear_color_state_size);
> > > > +  }
> > > > +
> > > >brw_bo_unmap(buf->bo);
> > > > }
> > > >  
> > > > -   if (devinfo->gen >= 10) {
> > > > +   if (has_indirect_clear) {
> > > >buf->clear_color_bo = buf->bo;
> > > >brw_bo_reference(buf->clear_color_bo);
> > > > }
> > > > @@ -1869,7 +1880,7 @@ intel_miptree_alloc_hiz(struct brw_context *brw,
> > > >isl_surf_get_hiz_surf(&brw->isl_dev, &mt->surf, &temp_hiz_surf);
> > > > assert(ok);
> > > >  
> > > > -   const uint32_t alloc_flags = BO_ALLOC_BUSY;
> > > > +   c

[Mesa-dev] [PATCH 1/1] winsys/radeon: Destroy fd_tab hash table when last winsys is removed.

2018-05-08 Thread Jan Vesely
Fixes memory leak on module unload.
CC: 
Signed-off-by: Jan Vesely 
---
Not the prettiest way to do this, but it works and imo shouldn't need
anything more fancy.

 src/gallium/winsys/radeon/drm/radeon_drm_winsys.c | 15 ++-
 1 file changed, 14 insertions(+), 1 deletion(-)

diff --git a/src/gallium/winsys/radeon/drm/radeon_drm_winsys.c 
b/src/gallium/winsys/radeon/drm/radeon_drm_winsys.c
index 3ee243adbc..f4555a1dc8 100644
--- a/src/gallium/winsys/radeon/drm/radeon_drm_winsys.c
+++ b/src/gallium/winsys/radeon/drm/radeon_drm_winsys.c
@@ -707,10 +707,17 @@ static int compare_fd(void *key1, void *key2)
 
 DEBUG_GET_ONCE_BOOL_OPTION(thread, "RADEON_THREAD", true)
 
+static enum pipe_error inc(void *k, void *v, void *d)
+{
+   (*(size_t *)d) += 1;
+   return PIPE_OK;
+}
+
 static bool radeon_winsys_unref(struct radeon_winsys *ws)
 {
 struct radeon_drm_winsys *rws = (struct radeon_drm_winsys*)ws;
 bool destroy;
+size_t count = 0;
 
 /* When the reference counter drops to zero, remove the fd from the table.
  * This must happen while the mutex is locked, so that
@@ -719,8 +726,14 @@ static bool radeon_winsys_unref(struct radeon_winsys *ws)
 mtx_lock(&fd_tab_mutex);
 
 destroy = pipe_reference(&rws->reference, NULL);
-if (destroy && fd_tab)
+if (destroy && fd_tab) {
 util_hash_table_remove(fd_tab, intptr_to_pointer(rws->fd));
+util_hash_table_foreach(fd_tab, inc, &count);
+if (count == 0) {
+   util_hash_table_destroy(fd_tab);
+   fd_tab = NULL;
+}
+}
 
 mtx_unlock(&fd_tab_mutex);
 return destroy;
-- 
2.17.0

___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/mesa-dev


Re: [Mesa-dev] [PATCH 05/17] i965/miptree: Drop the name param from alloc_aux_buffer

2018-05-08 Thread Nanley Chery
On Thu, May 03, 2018 at 12:03:52PM -0700, Nanley Chery wrote:
> A name of "aux-miptree" should be sufficient.

I should mention that I went over this some teamates on #intel-3d and
received no objections.

Another comment below.

> ---
>  src/mesa/drivers/dri/i965/intel_mipmap_tree.c | 9 -
>  1 file changed, 4 insertions(+), 5 deletions(-)
> 
> diff --git a/src/mesa/drivers/dri/i965/intel_mipmap_tree.c 
> b/src/mesa/drivers/dri/i965/intel_mipmap_tree.c
> index e70c9ff1ef4..566ead0d5c8 100644
> --- a/src/mesa/drivers/dri/i965/intel_mipmap_tree.c
> +++ b/src/mesa/drivers/dri/i965/intel_mipmap_tree.c
> @@ -1660,7 +1660,6 @@ intel_miptree_copy_teximage(struct brw_context *brw,
>  
>  static struct intel_miptree_aux_buffer *
>  intel_alloc_aux_buffer(struct brw_context *brw,
> -   const char *name,
> const struct isl_surf *aux_surf,
> uint32_t alloc_flags,
> bool wants_memset,
> @@ -1690,7 +1689,7 @@ intel_alloc_aux_buffer(struct brw_context *brw,
>  * Therefore one can pass the ISL dimensions in terms of bytes instead of
>  * trying to recalculate based on different format block sizes.
>  */
> -   buf->bo = brw_bo_alloc_tiled(brw->bufmgr, name, buf->size,
> +   buf->bo = brw_bo_alloc_tiled(brw->bufmgr, "aux-miptree", buf->size,
>  I915_TILING_Y, buf->pitch, alloc_flags);

I had a rebase conflict on this hunk today:

<<< HEAD
   buf->bo = brw_bo_alloc_tiled(brw->bufmgr, name, size,
I915_TILING_Y, aux_surf->row_pitch,
alloc_flags);
===
   buf->bo = brw_bo_alloc_tiled(brw->bufmgr, "aux-miptree", buf->size,
I915_TILING_Y, buf->pitch, alloc_flags);
>>> i965/miptree: Drop the name param from alloc_aux_buffer

This is the new diff (or resolution):

-   buf->bo = brw_bo_alloc_tiled(brw->bufmgr, name, size,
+   buf->bo = brw_bo_alloc_tiled(brw->bufmgr, "aux-miptree", size,


-Nanley

> if (!buf->bo) {
>free(buf);
> @@ -1769,7 +1768,7 @@ intel_miptree_alloc_mcs(struct brw_context *brw,
>  *
>  * Note: the clear value for MCS buffers is all 1's, so we memset to 0xff.
>  */
> -   mt->aux_buf = intel_alloc_aux_buffer(brw, "mcs-miptree", &temp_mcs_surf,
> +   mt->aux_buf = intel_alloc_aux_buffer(brw, &temp_mcs_surf,
>  alloc_flags, true, 0xFF, mt);
> if (!mt->aux_buf) {
>free(aux_state);
> @@ -1814,7 +1813,7 @@ intel_miptree_alloc_ccs(struct brw_context *brw,
>  * For CCS_D, do the same thing. On gen9+, this avoids having any 
> undefined
>  * bits in the aux buffer.
>  */
> -   mt->aux_buf = intel_alloc_aux_buffer(brw, "ccs-miptree", &temp_ccs_surf,
> +   mt->aux_buf = intel_alloc_aux_buffer(brw, &temp_ccs_surf,
>  BO_ALLOC_ZEROED, false, 0, mt);
> if (!mt->aux_buf) {
>free(aux_state);
> @@ -1881,7 +1880,7 @@ intel_miptree_alloc_hiz(struct brw_context *brw,
> assert(ok);
>  
> const uint32_t alloc_flags = 0;
> -   mt->aux_buf = intel_alloc_aux_buffer(brw, "hiz-miptree", &temp_hiz_surf,
> +   mt->aux_buf = intel_alloc_aux_buffer(brw, &temp_hiz_surf,
>  alloc_flags, false, 0, mt);
>  
> if (!mt->aux_buf) {
> -- 
> 2.16.2
> 
___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/mesa-dev


Re: [Mesa-dev] [PATCH 03/17] i965/miptree: Move init_mcs into alloc_aux_buffer

2018-05-08 Thread Nanley Chery
On Tue, May 08, 2018 at 08:22:39AM +0300, Pohjolainen, Topi wrote:
> On Mon, May 07, 2018 at 11:35:39AM -0700, Nanley Chery wrote:
> > On Mon, May 07, 2018 at 10:10:16AM -0700, Nanley Chery wrote:
> > > On Mon, May 07, 2018 at 11:51:50AM +0300, Pohjolainen, Topi wrote:
> > > > On Fri, May 04, 2018 at 11:04:40AM -0700, Nanley Chery wrote:
> > > > > On Fri, May 04, 2018 at 10:00:32AM -0700, Nanley Chery wrote:
> > > > > > On Fri, May 04, 2018 at 09:42:34AM -0700, Nanley Chery wrote:
> > > > > > > On Thu, May 03, 2018 at 12:03:50PM -0700, Nanley Chery wrote:
> > > > > > > > Add infrastructure for initializing the clear color BO.
> > > > > > > > ---
> > > > > > > >  src/mesa/drivers/dri/i965/intel_mipmap_tree.c | 68 
> > > > > > > > ---
> > > > > > > >  1 file changed, 31 insertions(+), 37 deletions(-)
> > > > > > > > 
> > > > > > > > diff --git a/src/mesa/drivers/dri/i965/intel_mipmap_tree.c 
> > > > > > > > b/src/mesa/drivers/dri/i965/intel_mipmap_tree.c
> > > > > > > > index 182a896e23a..5d3ee569bd8 100644
> > > > > > > > --- a/src/mesa/drivers/dri/i965/intel_mipmap_tree.c
> > > > > > > > +++ b/src/mesa/drivers/dri/i965/intel_mipmap_tree.c
> > > > > > > > @@ -1658,41 +1658,13 @@ intel_miptree_copy_teximage(struct 
> > > > > > > > brw_context *brw,
> > > > > > > > intel_obj->needs_validate = true;
> > > > > > > >  }
> > > > > > > >  
> > > > > > > > -static bool
> > > > > > > > -intel_miptree_init_mcs(struct brw_context *brw,
> > > > > > > > -   struct intel_mipmap_tree *mt,
> > > > > > > > -   int init_value)
> > > > > > > > -{
> > > > > > > > -   assert(mt->aux_buf != NULL);
> > > > > > > > -
> > > > > > > > -   /* From the Ivy Bridge PRM, Vol 2 Part 1 p326:
> > > > > > > > -*
> > > > > > > > -* When MCS buffer is enabled and bound to MSRT, it is 
> > > > > > > > required that it
> > > > > > > > -* is cleared prior to any rendering.
> > > > > > > > -*
> > > > > > > > -* Since we don't use the MCS buffer for any purpose other 
> > > > > > > > than rendering,
> > > > > > > > -* it makes sense to just clear it immediately upon 
> > > > > > > > allocation.
> > > > > > > > -*
> > > > > > > > -* Note: the clear value for MCS buffers is all 1's, so we 
> > > > > > > > memset to 0xff.
> > > > > > > > -*/
> > > > > > > > -   void *map = brw_bo_map(brw, mt->aux_buf->bo, MAP_WRITE | 
> > > > > > > > MAP_RAW);
> > > > > > > > -   if (unlikely(map == NULL)) {
> > > > > > > > -  fprintf(stderr, "Failed to map mcs buffer into GTT\n");
> > > > > > > > -  intel_miptree_aux_buffer_free(mt->aux_buf);
> > > > > > > > -  mt->aux_buf = NULL;
> > > > > > > > -  return false;
> > > > > > > > -   }
> > > > > > > > -   void *data = map;
> > > > > > > > -   memset(data, init_value, mt->aux_buf->size);
> > > > > > > > -   brw_bo_unmap(mt->aux_buf->bo);
> > > > > > > > -   return true;
> > > > > > > > -}
> > > > > > > > -
> > > > > > > >  static struct intel_miptree_aux_buffer *
> > > > > > > >  intel_alloc_aux_buffer(struct brw_context *brw,
> > > > > > > > const char *name,
> > > > > > > > const struct isl_surf *aux_surf,
> > > > > > > > uint32_t alloc_flags,
> > > > > > > > +   bool wants_memset,
> > > > > > > > +   uint8_t memset_value,
> > > > > > > > struct intel_mipmap_tree *mt)
> > > > > > > >  {
> > > > > > > > struct intel_miptree_aux_buffer *buf = calloc(sizeof(*buf), 
> > > > > > > > 1);
> > > > > > > > @@ -1725,6 +1697,19 @@ intel_alloc_aux_buffer(struct 
> > > > > > > > brw_context *brw,
> > > > > > > >return NULL;
> > > > > > > > }
> > > > > > > >  
> > > > > > > > +   /* Initialize the bo to the desired value */
> > > > > > > > +   if (wants_memset) {
> > > > > > > > +  assert(!(alloc_flags & BO_ALLOC_BUSY));
> > > > > > > > +
> > > > > > > > +  void *map = brw_bo_map(brw, buf->bo, MAP_WRITE | 
> > > > > > > > MAP_RAW);
> > > > > > > > +  if (map == NULL) {
> > > > > > > > + intel_miptree_aux_buffer_free(buf);
> > > > > > > > + return NULL;
> > > > > > > > +  }
> > > > > > > > +  memset(map, memset_value, mt->aux_buf->size);
> > > > > > > 
> > > > > > > Found a bug here. The last argument should be buf->size because
> > > > > > > mt->aux_buf hasn't been assigned yet. Will fix locally.
> > > > > > > 
> > > > > > > -Nanley
> > > > > > > 
> > > > > > 
> > > > > > False alarm. Sorry for the noise.
> > > > 
> > > > You are passing "aux_surf" as one of the arguments. Could you just use
> > > > "aux_surf->size"? That gives the value for "buf->size" before 
> > > > "buf->size"
> > > > gets augmented by the indirect clear color.
> > > > 
> > > 
> > > Sure. The only extra change I'll have to make is to rephrase this commit
> > > message to explain that this patch adds a memset capability to
> > > intel_alloc_aux_buffer

Re: [Mesa-dev] [PATCH 06/17] i965/miptree: Drop the alloc_flags param from alloc_aux_buffer

2018-05-08 Thread Nanley Chery
On Tue, May 08, 2018 at 08:43:20AM +0300, Pohjolainen, Topi wrote:
> On Mon, May 07, 2018 at 11:04:20AM -0700, Nanley Chery wrote:
> > On Mon, May 07, 2018 at 03:06:29PM +0300, Pohjolainen, Topi wrote:
> > > On Thu, May 03, 2018 at 12:03:53PM -0700, Nanley Chery wrote:
> > > > We have enough information to determine the optimal flags internally.
> > > > ---
> > > >  src/mesa/drivers/dri/i965/intel_mipmap_tree.c | 29 
> > > > +--
> > > >  1 file changed, 14 insertions(+), 15 deletions(-)
> > > > 
> > > > diff --git a/src/mesa/drivers/dri/i965/intel_mipmap_tree.c 
> > > > b/src/mesa/drivers/dri/i965/intel_mipmap_tree.c
> > > > index 566ead0d5c8..e065c2f62e0 100644
> > > > --- a/src/mesa/drivers/dri/i965/intel_mipmap_tree.c
> > > > +++ b/src/mesa/drivers/dri/i965/intel_mipmap_tree.c
> > > > @@ -1661,7 +1661,6 @@ intel_miptree_copy_teximage(struct brw_context 
> > > > *brw,
> > > >  static struct intel_miptree_aux_buffer *
> > > >  intel_alloc_aux_buffer(struct brw_context *brw,
> > > > const struct isl_surf *aux_surf,
> > > > -   uint32_t alloc_flags,
> > > > bool wants_memset,
> > > > uint8_t memset_value,
> > > > struct intel_mipmap_tree *mt)
> > > > @@ -1685,6 +1684,17 @@ intel_alloc_aux_buffer(struct brw_context *brw,
> > > > buf->pitch = aux_surf->row_pitch;
> > > > buf->qpitch = isl_surf_get_array_pitch_sa_rows(aux_surf);
> > > >  
> > > > +   /* If the buffer needs to be initialised (requiring the buffer to be
> > > > +* immediately mapped to cpu space for writing), do not use the gpu 
> > > > access
> > > > +* flag which can cause an unnecessary delay if the backing pages 
> > > > happened
> > > > +* to be just used by the GPU.
> > > > +*/
> > > > +   const bool alloc_zeroed = wants_memset && memset_value == 0;
> > > > +   const bool needs_memset =
> > > > +  !alloc_zeroed && (wants_memset || has_indirect_clear);
> > > > +   const uint32_t alloc_flags =
> > > > +  alloc_zeroed ? BO_ALLOC_ZEROED : (needs_memset ? 0 : 
> > > > BO_ALLOC_BUSY);
> > > > +
> > > 
> > > What you have is correct but double ternaries always make my head spin. 
> > > How
> > > would you feel:
> > > 
> > >   uint32_t alloc_flags = 0;
> > >   if (alloc_zeroed)
> > >  alloc_flags = BO_ALLOC_ZEROED;
> > >   else if (!wants_memset && !has_indirect_clear)
> > >  alloc_flags = BO_ALLOC_BUSY;
> > > 
> > 
> > I was hoping this nested ternary would survive, but I don't mind
> > replacing it. I'd prefer to be more explicit about the case in which we
> > want to assign alloc_flags to 0 with something like:
> > 
> >uint32_t alloc_flags;
> >if (alloc_zeroed) {
> >   alloc_flags = BO_ALLOC_ZEROED;
> >} else if (needs_memset) {
> >   alloc_flags = 0;
> >} else {
> >   alloc_flags = BO_ALLOC_BUSY;
> >}
> > 
> > OR:
> > 
> >uint32_t alloc_flags;
> >if (needs_memset) {
> >   alloc_flags = (memset_value == 0) ? BO_ALLOC_ZEROED : 0;
> >} else {
> >   alloc_flags = BO_ALLOC_BUSY;
> >}
> 
> What you had originally starts to look the cleanest, and we have similar code
> elsewhere.
> 
> Reviewed-by: Topi Pohjolainen 
> 

Great!

> > 
> > Thoughts?
> > 
> > I just noticed that the variable naming could use some work. Maybe:
> > 
> > * wants_memset -> wants_aux_surf_memset
> > * memset_value -> aux_surf_memset_value
> > * needs_memset -> aux_bo_needs_memset
> > 
> > Would you like me to do something like this in a follow-on patch?
> 
> Up to you really, the scope of the function is pretty limited and therefore it
> isn't hard to remember the semantics of the variables.
> 

Okay. I'll hold off for now.

-Nanley

> > 
> > -Nanley
> > 
> > > > /* ISL has stricter set of alignment rules then the drm allocator.
> > > >  * Therefore one can pass the ISL dimensions in terms of bytes 
> > > > instead of
> > > >  * trying to recalculate based on different format block sizes.
> > > > @@ -1697,7 +1707,6 @@ intel_alloc_aux_buffer(struct brw_context *brw,
> > > > }
> > > >  
> > > > /* Initialize the bo to the desired value */
> > > > -   const bool needs_memset = wants_memset || has_indirect_clear;
> > > > if (needs_memset) {
> > > >assert(!(alloc_flags & BO_ALLOC_BUSY));
> > > >  
> > > > @@ -1752,12 +1761,6 @@ intel_miptree_alloc_mcs(struct brw_context *brw,
> > > >isl_surf_get_mcs_surf(&brw->isl_dev, &mt->surf, &temp_mcs_surf);
> > > > assert(ok);
> > > >  
> > > > -   /* Buffer needs to be initialised requiring the buffer to be 
> > > > immediately
> > > > -* mapped to cpu space for writing. Therefore do not use the gpu 
> > > > access
> > > > -* flag which can cause an unnecessary delay if the backing pages 
> > > > happened
> > > > -* to be just used by the GPU.
> > > > -*/
> > > > -   const uint32_t alloc_flags = 0;
> > > > /* From the Ivy Bridg

Re: [Mesa-dev] [PATCH v2 4/8] anv: Add vma_heap allocators in anv_device

2018-05-08 Thread Jason Ekstrand
On Mon, May 7, 2018 at 5:30 PM, Scott D Phillips  wrote:

> These will be used to assign virtual addresses to soft pinned
> buffers in a later patch.
>
> Two allocators are added for separate 'low' and 'high' virtual
> memory areas. Another alternative would have been to add a
> double-sided allocator, which wasn't done here just because it
> didn't appear to give any code complexity advantages.
>
> v2: - rename has_exec_softpin to use_softpin (Jason)
> - Only remove bottom one page and top 4 GiB from virt (Jason)
> - refer to comment in anv_allocator about state address + size
>   overflowing 48 bits (Jason)
> - Mention hi/lo allocators vs double-sided allocator in
>   commit message (Chris)
> - assign state pool memory ranges statically (Jason)
> ---
>  src/intel/vulkan/anv_device.c  | 77 ++
> 
>  src/intel/vulkan/anv_private.h | 60 
>  2 files changed, 137 insertions(+)
>
> diff --git a/src/intel/vulkan/anv_device.c b/src/intel/vulkan/anv_device.c
> index 374fc16c4c9..9e21818ead1 100644
> --- a/src/intel/vulkan/anv_device.c
> +++ b/src/intel/vulkan/anv_device.c
> @@ -374,6 +374,9 @@ anv_physical_device_init(struct anv_physical_device
> *device,
>anv_gem_supports_syncobj_wait(fd);
> device->has_context_priority = anv_gem_has_context_priority(fd);
>
> +   device->use_softpin = anv_gem_get_param(fd,
> I915_PARAM_HAS_EXEC_SOFTPIN)
> +  && device->supports_48bit_addresses;
> +
> bool swizzled = anv_gem_get_bit6_swizzle(fd, I915_TILING_X);
>
> /* Starting with Gen10, the timestamp frequency of the command
> streamer may
> @@ -1527,6 +1530,27 @@ VkResult anv_CreateDevice(
>goto fail_fd;
> }
>
> +   if (physical_device->use_softpin) {
> +  if (pthread_mutex_init(&device->vma_mutex, NULL) != 0) {
> + result = vk_error(VK_ERROR_INITIALIZATION_FAILED);
> + goto fail_fd;
> +  }
> +
> +  /* keep the page with address zero out of the allocator */
> +  util_vma_heap_init(&device->vma_lo, LOW_HEAP_MIN_ADDRESS,
> LOW_HEAP_SIZE);
> +  device->vma_lo_available =
> + physical_device->memory.heaps[physical_device->memory.heap_count
> - 1].size;
> +
> +  /* Leave the last 4GiB out of the high vma range, so that no state
> base
> +   * address + size can overflow 48 bits. For more information see the
> +   * comment about Wa32bitGeneralStateOffset in anv_allocator.c
> +   */
> +  util_vma_heap_init(&device->vma_hi, HIGH_HEAP_MIN_ADDRESS,
> + HIGH_HEAP_SIZE);
>

As Jordan pointed out, this should probably depend on the aperature size in
case that ever changes to be something other than 48 bits.  That can be
handled as part of future platform enabling though.


> +  device->vma_hi_available = physical_device->memory.heap_count == 1
> ? 0 :
> + physical_device->memory.heaps[0].size;
> +   }
> +
> /* As per spec, the driver implementation may deny requests to acquire
>  * a priority above the default priority (MEDIUM) if the caller does
> not
>  * have sufficient privileges. In this scenario
> VK_ERROR_NOT_PERMITTED_EXT
> @@ -1887,6 +1911,59 @@ VkResult anv_DeviceWaitIdle(
> return anv_device_submit_simple_batch(device, &batch);
>  }
>
> +bool
> +anv_vma_alloc(struct anv_device *device, struct anv_bo *bo)
> +{
> +   if (!(bo->flags & EXEC_OBJECT_PINNED))
> +  return true;
> +
> +   pthread_mutex_lock(&device->vma_mutex);
> +
> +   bo->offset = 0;
> +
> +   if (bo->flags & EXEC_OBJECT_SUPPORTS_48B_ADDRESS &&
> +   device->vma_hi_available >= bo->size) {
> +  uint64_t addr = util_vma_heap_alloc(&device->vma_hi, bo->size,
> 4096);
> +  if (addr) {
> + bo->offset = canonical_address(addr);
> + device->vma_hi_available -= bo->size;
> +  }
> +   }
> +
> +   if (bo->offset == 0 && device->vma_lo_available >= bo->size) {
> +  uint64_t addr = util_vma_heap_alloc(&device->vma_lo, bo->size,
> 4096);
> +  if (addr) {
> + bo->offset = canonical_address(addr);
> + device->vma_lo_available -= bo->size;
> +  }
> +   }
> +
> +   pthread_mutex_unlock(&device->vma_mutex);
> +
> +   return bo->offset != 0;
> +}
> +
> +void
> +anv_vma_free(struct anv_device *device, struct anv_bo *bo)
> +{
> +   if (!(bo->flags & EXEC_OBJECT_PINNED))
> +  return;
> +
> +   pthread_mutex_lock(&device->vma_mutex);
> +
> +   if (bo->offset >= 1ull << 32) {
> +  util_vma_heap_free(&device->vma_hi, bo->offset, bo->size);
> +  device->vma_hi_available += bo->size;
> +   } else {
> +  util_vma_heap_free(&device->vma_lo, bo->offset, bo->size);
> +  device->vma_lo_available += bo->size;
> +   }
> +
> +   pthread_mutex_unlock(&device->vma_mutex);
> +
> +   bo->offset = 0;
> +}
> +
>  VkResult
>  anv_bo_init_new(struct anv_bo *bo, struct anv_device *device, uint64_t
> size)
>  {
> diff --git a/src/intel/vulkan/anv_private.h b/sr

Re: [Mesa-dev] [PATCH v2 5/8] anv: soft pin state pools

2018-05-08 Thread Jason Ekstrand
On Mon, May 7, 2018 at 5:30 PM, Scott D Phillips  wrote:

> The state_pools reserve virtual address space of the full
> BLOCK_POOL_MEMFD_SIZE, but maintain the current behavior of
> growing from the middle.
>
> v2: - rename block_pool::offset to block_pool::start_address (Jason)
> - assign state pool start_address statically (Jason)
> ---
>  src/intel/vulkan/anv_allocator.c   |  9 +
>  src/intel/vulkan/anv_device.c  | 21
> ++---
>  src/intel/vulkan/anv_private.h |  8 
>  src/intel/vulkan/tests/block_pool_no_free.c|  2 +-
>  src/intel/vulkan/tests/state_pool.c|  2 +-
>  src/intel/vulkan/tests/state_pool_free_list_only.c |  2 +-
>  src/intel/vulkan/tests/state_pool_no_free.c|  2 +-
>  7 files changed, 35 insertions(+), 11 deletions(-)
>
> diff --git a/src/intel/vulkan/anv_allocator.c b/src/intel/vulkan/anv_
> allocator.c
> index 642e1618c10..7d368f09c9e 100644
> --- a/src/intel/vulkan/anv_allocator.c
> +++ b/src/intel/vulkan/anv_allocator.c
> @@ -243,6 +243,7 @@ anv_block_pool_expand_range(struct anv_block_pool
> *pool,
>  VkResult
>  anv_block_pool_init(struct anv_block_pool *pool,
>  struct anv_device *device,
> +uint64_t start_address,
>  uint32_t initial_size,
>  uint64_t bo_flags)
>  {
> @@ -250,6 +251,8 @@ anv_block_pool_init(struct anv_block_pool *pool,
>
> pool->device = device;
> pool->bo_flags = bo_flags;
> +   pool->start_address = canonical_address(start_address);
> +
> anv_bo_init(&pool->bo, 0, 0);
>
> pool->fd = memfd_create("block pool", MFD_CLOEXEC);
> @@ -402,6 +405,10 @@ anv_block_pool_expand_range(struct anv_block_pool
> *pool,
>  * hard work for us.
>  */
> anv_bo_init(&pool->bo, gem_handle, size);
> +   if (pool->bo_flags & EXEC_OBJECT_PINNED) {
> +  pool->bo.offset = pool->start_address + BLOCK_POOL_MEMFD_CENTER -
> + center_bo_offset;
> +   }
> pool->bo.flags = pool->bo_flags;
> pool->bo.map = map;
>
> @@ -610,10 +617,12 @@ anv_block_pool_alloc_back(struct anv_block_pool
> *pool,
>  VkResult
>  anv_state_pool_init(struct anv_state_pool *pool,
>  struct anv_device *device,
> +uint64_t start_address,
>  uint32_t block_size,
>  uint64_t bo_flags)
>  {
> VkResult result = anv_block_pool_init(&pool->block_pool, device,
> + start_address,
>   block_size * 16,
>   bo_flags);
> if (result != VK_SUCCESS)
> diff --git a/src/intel/vulkan/anv_device.c b/src/intel/vulkan/anv_device.c
> index 9e21818ead1..0eafdf79696 100644
> --- a/src/intel/vulkan/anv_device.c
> +++ b/src/intel/vulkan/anv_device.c
> @@ -1615,21 +1615,28 @@ VkResult anv_CreateDevice(
> if (result != VK_SUCCESS)
>goto fail_batch_bo_pool;
>
> -   /* For the state pools we explicitly disable 48bit. */
> -   bo_flags = (physical_device->has_exec_async ? EXEC_OBJECT_ASYNC : 0) |
> -  (physical_device->has_exec_capture ? EXEC_OBJECT_CAPTURE :
> 0);
> +   if (physical_device->use_softpin)
> +  bo_flags |= EXEC_OBJECT_PINNED;
> +   else
> +  bo_flags &= ~EXEC_OBJECT_SUPPORTS_48B_ADDRESS;
>
> -   result = anv_state_pool_init(&device->dynamic_state_pool, device,
> 16384,
> -bo_flags);
> +   result = anv_state_pool_init(&device->dynamic_state_pool, device,
> +DYNAMIC_STATE_POOL_MIN_ADDRESS,
> +16384,
> +bo_flags & ~EXEC_OBJECT_SUPPORTS_48B_
> ADDRESS);
>

Is this really needed now that we have a fixed address?  If not, we should
drop it.

With that,

Reviewed-by: Jason Ekstrand 


> if (result != VK_SUCCESS)
>goto fail_bo_cache;
>
> -   result = anv_state_pool_init(&device->instruction_state_pool, device,
> 16384,
> +   result = anv_state_pool_init(&device->instruction_state_pool, device,
> +INSTRUCTION_STATE_POOL_MIN_ADDRESS,
> +16384,
>  bo_flags);
> if (result != VK_SUCCESS)
>goto fail_dynamic_state_pool;
>
> -   result = anv_state_pool_init(&device->surface_state_pool, device,
> 4096,
> +   result = anv_state_pool_init(&device->surface_state_pool, device,
> +SURFACE_STATE_POOL_MIN_ADDRESS,
> +4096,
>  bo_flags);
> if (result != VK_SUCCESS)
>goto fail_instruction_state_pool;
> diff --git a/src/intel/vulkan/anv_private.h b/src/intel/vulkan/anv_
> private.h
> index 8807fe7e5fb..e1c05c33a08 100644
> --- a/src/intel/vulkan/anv_private.h
> +++ b/src/intel/vulkan/anv_private.h
> @@ -631,6 +631,12 @@ struct anv_bloc

Re: [Mesa-dev] [PATCH v2 6/8] anv: use a separate pool for binding tables when soft pinning

2018-05-08 Thread Jason Ekstrand
On Mon, May 7, 2018 at 5:30 PM, Scott D Phillips  wrote:

> Soft pinning lets us satisfy the binding table address
> requirements without using both sides of a growing state_pool.
>
> If you do use both sides of a state pool, then you need to read
> the state pool's center_bo_offset (with the device mutex held) to
> know the final offset of relocations that target the state pool
> bo.
>
> By having a separate pool for binding tables that only grows in
> the forward direction, the center_bo_offset is always 0 and
> relocations don't need an update pass to adjust relocations with
> the mutex held.
>
> v2: - don't introduce a separate state flag for separate binding tables
> (Jason)
> - replace bo and map accessors with a single binding_table_pool
> accessor (Jason)
> ---
>  src/intel/vulkan/anv_batch_chain.c | 25 +++--
>  src/intel/vulkan/anv_device.c  | 14 +-
>  src/intel/vulkan/anv_private.h | 24 
>  3 files changed, 52 insertions(+), 11 deletions(-)
>
> diff --git a/src/intel/vulkan/anv_batch_chain.c
> b/src/intel/vulkan/anv_batch_chain.c
> index 09514c7b84a..53b24551088 100644
> --- a/src/intel/vulkan/anv_batch_chain.c
> +++ b/src/intel/vulkan/anv_batch_chain.c
> @@ -452,7 +452,7 @@ anv_cmd_buffer_surface_base_address(struct
> anv_cmd_buffer *cmd_buffer)
>  {
> struct anv_state *bt_block = u_vector_head(&cmd_buffer->bt_
> block_states);
> return (struct anv_address) {
> -  .bo = &cmd_buffer->device->surface_state_pool.block_pool.bo,
> +  .bo = &anv_binding_table_pool(cmd_buffer->device)->block_pool.bo,
>.offset = bt_block->offset,
> };
>  }
> @@ -619,7 +619,8 @@ struct anv_state
>  anv_cmd_buffer_alloc_binding_table(struct anv_cmd_buffer *cmd_buffer,
> uint32_t entries, uint32_t
> *state_offset)
>  {
> -   struct anv_state_pool *state_pool = &cmd_buffer->device->surface_
> state_pool;
> +   struct anv_device *device = cmd_buffer->device;
> +   struct anv_state_pool *state_pool = &device->surface_state_pool;
> struct anv_state *bt_block = u_vector_head(&cmd_buffer->bt_
> block_states);
> struct anv_state state;
>
> @@ -629,12 +630,18 @@ anv_cmd_buffer_alloc_binding_table(struct
> anv_cmd_buffer *cmd_buffer,
>return (struct anv_state) { 0 };
>
> state.offset = cmd_buffer->bt_next;
> -   state.map = state_pool->block_pool.map + bt_block->offset +
> state.offset;
> +   state.map = anv_binding_table_pool(device)->block_pool.map +
> +  bt_block->offset + state.offset;
>
> cmd_buffer->bt_next += state.alloc_size;
>
> -   assert(bt_block->offset < 0);
> -   *state_offset = -bt_block->offset;
> +   if (device->instance->physicalDevice.use_softpin) {
>

Should we assert bt_block->offset >= 0 here?


> +  *state_offset = device->surface_state_pool.block_pool.start_address
> -
> + device->binding_table_pool.block_pool.start_address -
> bt_block->offset;
>

We could use the #defines here.  Doesn't really matter though.

Reviewed-by: Jason Ekstrand 


> +   } else {
> +  assert(bt_block->offset < 0);
> +  *state_offset = -bt_block->offset;
> +   }
>
> return state;
>  }
> @@ -658,15 +665,13 @@ anv_cmd_buffer_alloc_dynamic_state(struct
> anv_cmd_buffer *cmd_buffer,
>  VkResult
>  anv_cmd_buffer_new_binding_table_block(struct anv_cmd_buffer *cmd_buffer)
>  {
> -   struct anv_state_pool *state_pool = &cmd_buffer->device->surface_
> state_pool;
> -
> struct anv_state *bt_block = u_vector_add(&cmd_buffer->bt_
> block_states);
> if (bt_block == NULL) {
>anv_batch_set_error(&cmd_buffer->batch,
> VK_ERROR_OUT_OF_HOST_MEMORY);
>return vk_error(VK_ERROR_OUT_OF_HOST_MEMORY);
> }
>
> -   *bt_block = anv_state_pool_alloc_back(state_pool);
> +   *bt_block = anv_binding_table_pool_alloc(cmd_buffer->device);
> cmd_buffer->bt_next = 0;
>
> return VK_SUCCESS;
> @@ -740,7 +745,7 @@ anv_cmd_buffer_fini_batch_bo_chain(struct
> anv_cmd_buffer *cmd_buffer)
>  {
> struct anv_state *bt_block;
> u_vector_foreach(bt_block, &cmd_buffer->bt_block_states)
> -  anv_state_pool_free(&cmd_buffer->device->surface_state_pool,
> *bt_block);
> +  anv_binding_table_pool_free(cmd_buffer->device, *bt_block);
> u_vector_finish(&cmd_buffer->bt_block_states);
>
> anv_reloc_list_finish(&cmd_buffer->surface_relocs,
> &cmd_buffer->pool->alloc);
> @@ -772,7 +777,7 @@ anv_cmd_buffer_reset_batch_bo_chain(struct
> anv_cmd_buffer *cmd_buffer)
>
> while (u_vector_length(&cmd_buffer->bt_block_states) > 1) {
>struct anv_state *bt_block = u_vector_remove(&cmd_buffer->
> bt_block_states);
> -  anv_state_pool_free(&cmd_buffer->device->surface_state_pool,
> *bt_block);
> +  anv_binding_table_pool_free(cmd_buffer->device, *bt_block);
> }
> assert(u_vector_length(&cmd_buffer->bt_block_states) == 1);
> cmd_buffer->bt_next = 0;
> diff --git a/src/intel/vulkan/anv_device.c b/src/intel/vulkan/anv_device.c
> ind

Re: [Mesa-dev] [PATCH v2 7/8] anv: elide relocations to pinned target bos

2018-05-08 Thread Jason Ekstrand
On Mon, May 7, 2018 at 5:30 PM, Scott D Phillips  wrote:

> References to pinned bos won't need relocated, so just write the
> final value of the reference into the bo. Add a `set` to the
> relocation lists for tracking dependencies that were previously
> tracked by relocations.
>
> v2: - visit bos from the dependency set in a deterministic order (Jason)
> ---
>  src/intel/vulkan/anv_batch_chain.c | 52 ++
> 
>  src/intel/vulkan/anv_private.h |  3 +++
>  2 files changed, 55 insertions(+)
>
> diff --git a/src/intel/vulkan/anv_batch_chain.c
> b/src/intel/vulkan/anv_batch_chain.c
> index 53b24551088..eaee9afbd29 100644
> --- a/src/intel/vulkan/anv_batch_chain.c
> +++ b/src/intel/vulkan/anv_batch_chain.c
> @@ -75,11 +75,24 @@ anv_reloc_list_init_clone(struct anv_reloc_list *list,
>return vk_error(VK_ERROR_OUT_OF_HOST_MEMORY);
> }
>
> +   list->deps = _mesa_set_create(NULL, _mesa_hash_pointer,
> + _mesa_key_pointer_equal);
> +
> +   if (!list->deps) {
> +  vk_free(alloc, list->relocs);
> +  vk_free(alloc, list->reloc_bos);
> +  return vk_error(VK_ERROR_OUT_OF_HOST_MEMORY);
> +   }
> +
> if (other_list) {
>memcpy(list->relocs, other_list->relocs,
>   list->array_length * sizeof(*list->relocs));
>memcpy(list->reloc_bos, other_list->reloc_bos,
>   list->array_length * sizeof(*list->reloc_bos));
> +  struct set_entry *entry;
> +  set_foreach(other_list->deps, entry) {
> + _mesa_set_add_pre_hashed(list->deps, entry->hash, entry->key);
> +  }
> }
>
> return VK_SUCCESS;
> @@ -98,6 +111,7 @@ anv_reloc_list_finish(struct anv_reloc_list *list,
>  {
> vk_free(alloc, list->relocs);
> vk_free(alloc, list->reloc_bos);
> +   _mesa_set_destroy(list->deps, NULL);
>  }
>
>  static VkResult
> @@ -148,6 +162,11 @@ anv_reloc_list_add(struct anv_reloc_list *list,
> struct drm_i915_gem_relocation_entry *entry;
> int index;
>
> +   if (target_bo->flags & EXEC_OBJECT_PINNED) {
> +  _mesa_set_add(list->deps, target_bo);
> +  return VK_SUCCESS;
> +   }
> +
> VkResult result = anv_reloc_list_grow(list, alloc, 1);
> if (result != VK_SUCCESS)
>return result;
> @@ -185,6 +204,12 @@ anv_reloc_list_append(struct anv_reloc_list *list,
>list->relocs[i + list->num_relocs].offset += offset;
>
> list->num_relocs += other->num_relocs;
> +
> +   struct set_entry *entry;
> +   set_foreach(other->deps, entry) {
> +  _mesa_set_add_pre_hashed(list->deps, entry->hash, entry->key);
> +   }
> +
> return VK_SUCCESS;
>  }
>
> @@ -338,6 +363,7 @@ anv_batch_bo_start(struct anv_batch_bo *bbo, struct
> anv_batch *batch,
> batch->end = bbo->bo.map + bbo->bo.size - batch_padding;
> batch->relocs = &bbo->relocs;
> bbo->relocs.num_relocs = 0;
> +   _mesa_set_clear(bbo->relocs.deps, NULL);
>  }
>
>  static void
> @@ -783,6 +809,7 @@ anv_cmd_buffer_reset_batch_bo_chain(struct
> anv_cmd_buffer *cmd_buffer)
> cmd_buffer->bt_next = 0;
>
> cmd_buffer->surface_relocs.num_relocs = 0;
> +   _mesa_set_clear(cmd_buffer->surface_relocs.deps, NULL);
> cmd_buffer->last_ss_pool_center = 0;
>
> /* Reset the list of seen buffers */
> @@ -985,6 +1012,14 @@ anv_execbuf_finish(struct anv_execbuf *exec,
> vk_free(alloc, exec->syncobjs);
>  }
>
> +static int
> +_compar_bo_handles(const void *_bo1, const void *_bo2)
>

As much as I like compar BO handles, I think I'd rather we compare them in
this case. :-)


> +{
> +   const struct anv_bo **bo1 = _bo1, **bo2 = _bo2;
> +
> +   return (*bo1)->gem_handle - (*bo2)->gem_handle;
> +}
> +
>  static VkResult
>  anv_execbuf_add_bo(struct anv_execbuf *exec,
> struct anv_bo *bo,
> @@ -1068,6 +1103,23 @@ anv_execbuf_add_bo(struct anv_execbuf *exec,
>   if (result != VK_SUCCESS)
>  return result;
>}
> +
> +  uint32_t entries = relocs->deps->entries;
>

Make this const please.  It makes me nervous when non-const things are used
for stack array sizes.  Also, I hope this doesn't blow up the stack too
big.  It's probably ok though.


> +  struct anv_bo *bos[entries], **bo = bos;
> +  struct set_entry *entry;
> +  set_foreach(relocs->deps, entry) {
> + *bo++ = entry->key;
> +  }
> +
> +  qsort(bos, entries, sizeof(struct anv_bo*), _compar_bo_handles);
> +
> +  for (bo = bos; bo < bos + entries; bo++) {
> + VkResult result = anv_execbuf_add_bo(exec, *bo, NULL,
> +  extra_flags, alloc);
> +
> + if (result != VK_SUCCESS)
> +return result;
> +  }
> }
>
> return VK_SUCCESS;
> diff --git a/src/intel/vulkan/anv_private.h b/src/intel/vulkan/anv_
> private.h
> index 36e4589abc6..6d9c7d4dfb3 100644
> --- a/src/intel/vulkan/anv_private.h
> +++ b/src/intel/vulkan/anv_private.h
> @@ -46,7 +46,9 @@
>  #include "blorp/blorp.h"
>  #include "compiler/brw

Re: [Mesa-dev] [PATCH v2 5/8] anv: soft pin state pools

2018-05-08 Thread Jordan Justen
On 2018-05-07 17:30:47, Scott D Phillips wrote:
> The state_pools reserve virtual address space of the full
> BLOCK_POOL_MEMFD_SIZE, but maintain the current behavior of
> growing from the middle.
> 
> v2: - rename block_pool::offset to block_pool::start_address (Jason)
> - assign state pool start_address statically (Jason)
> ---
>  src/intel/vulkan/anv_allocator.c   |  9 +
>  src/intel/vulkan/anv_device.c  | 21 ++---
>  src/intel/vulkan/anv_private.h |  8 
>  src/intel/vulkan/tests/block_pool_no_free.c|  2 +-
>  src/intel/vulkan/tests/state_pool.c|  2 +-
>  src/intel/vulkan/tests/state_pool_free_list_only.c |  2 +-
>  src/intel/vulkan/tests/state_pool_no_free.c|  2 +-
>  7 files changed, 35 insertions(+), 11 deletions(-)
> 
> diff --git a/src/intel/vulkan/anv_allocator.c 
> b/src/intel/vulkan/anv_allocator.c
> index 642e1618c10..7d368f09c9e 100644
> --- a/src/intel/vulkan/anv_allocator.c
> +++ b/src/intel/vulkan/anv_allocator.c
> @@ -243,6 +243,7 @@ anv_block_pool_expand_range(struct anv_block_pool *pool,
>  VkResult
>  anv_block_pool_init(struct anv_block_pool *pool,
>  struct anv_device *device,
> +uint64_t start_address,
>  uint32_t initial_size,
>  uint64_t bo_flags)
>  {
> @@ -250,6 +251,8 @@ anv_block_pool_init(struct anv_block_pool *pool,
>  
> pool->device = device;
> pool->bo_flags = bo_flags;
> +   pool->start_address = canonical_address(start_address);
> +
> anv_bo_init(&pool->bo, 0, 0);
>  
> pool->fd = memfd_create("block pool", MFD_CLOEXEC);
> @@ -402,6 +405,10 @@ anv_block_pool_expand_range(struct anv_block_pool *pool,
>  * hard work for us.
>  */
> anv_bo_init(&pool->bo, gem_handle, size);
> +   if (pool->bo_flags & EXEC_OBJECT_PINNED) {
> +  pool->bo.offset = pool->start_address + BLOCK_POOL_MEMFD_CENTER -
> + center_bo_offset;
> +   }
> pool->bo.flags = pool->bo_flags;
> pool->bo.map = map;
>  
> @@ -610,10 +617,12 @@ anv_block_pool_alloc_back(struct anv_block_pool *pool,
>  VkResult
>  anv_state_pool_init(struct anv_state_pool *pool,
>  struct anv_device *device,
> +uint64_t start_address,
>  uint32_t block_size,
>  uint64_t bo_flags)
>  {
> VkResult result = anv_block_pool_init(&pool->block_pool, device,
> + start_address,
>   block_size * 16,
>   bo_flags);
> if (result != VK_SUCCESS)
> diff --git a/src/intel/vulkan/anv_device.c b/src/intel/vulkan/anv_device.c
> index 9e21818ead1..0eafdf79696 100644
> --- a/src/intel/vulkan/anv_device.c
> +++ b/src/intel/vulkan/anv_device.c
> @@ -1615,21 +1615,28 @@ VkResult anv_CreateDevice(
> if (result != VK_SUCCESS)
>goto fail_batch_bo_pool;
>  
> -   /* For the state pools we explicitly disable 48bit. */
> -   bo_flags = (physical_device->has_exec_async ? EXEC_OBJECT_ASYNC : 0) |
> -  (physical_device->has_exec_capture ? EXEC_OBJECT_CAPTURE : 0);
> +   if (physical_device->use_softpin)
> +  bo_flags |= EXEC_OBJECT_PINNED;
> +   else
> +  bo_flags &= ~EXEC_OBJECT_SUPPORTS_48B_ADDRESS;
>  
> -   result = anv_state_pool_init(&device->dynamic_state_pool, device, 16384,
> -bo_flags);
> +   result = anv_state_pool_init(&device->dynamic_state_pool, device,
> +DYNAMIC_STATE_POOL_MIN_ADDRESS,

I guess you could add these defines in this patch vs the previous, but
it doesn't seem like too big of a deal.

Reviewed-by: Jordan Justen 

> +16384,
> +bo_flags & 
> ~EXEC_OBJECT_SUPPORTS_48B_ADDRESS);
> if (result != VK_SUCCESS)
>goto fail_bo_cache;
>  
> -   result = anv_state_pool_init(&device->instruction_state_pool, device, 
> 16384,
> +   result = anv_state_pool_init(&device->instruction_state_pool, device,
> +INSTRUCTION_STATE_POOL_MIN_ADDRESS,
> +16384,
>  bo_flags);
> if (result != VK_SUCCESS)
>goto fail_dynamic_state_pool;
>  
> -   result = anv_state_pool_init(&device->surface_state_pool, device, 4096,
> +   result = anv_state_pool_init(&device->surface_state_pool, device,
> +SURFACE_STATE_POOL_MIN_ADDRESS,
> +4096,
>  bo_flags);
> if (result != VK_SUCCESS)
>goto fail_instruction_state_pool;
> diff --git a/src/intel/vulkan/anv_private.h b/src/intel/vulkan/anv_private.h
> index 8807fe7e5fb..e1c05c33a08 100644
> --- a/src/intel/vulkan/anv_private.h
> +++ b/src/intel/vulkan/anv_private.h
> @@ -631,6 +631,12 @@ struct anv_blo

Re: [Mesa-dev] [PATCH 13/17] i965: Update the indirect buffer in set_clear_color

2018-05-08 Thread Nanley Chery
On Tue, May 08, 2018 at 09:24:19AM +0300, Pohjolainen, Topi wrote:
> On Thu, May 03, 2018 at 12:04:00PM -0700, Nanley Chery wrote:
> > Although BLORP currently does the update when performing a fast clear,
> > it's simpler to do it ourselves. Remove the dependency on BLORP.
> 
> Should we note in the commit message that until patch 17 this now gets done
> twice in a row in those cases where the actual fast clear op is submitted? But
> that it shouldn't matter much in practise as the subsequent flushes shouldn't
> do anything because there isn't any work submitted between.
> 

That was also my thinking about the flushes.

> Perhaps also a note that this allows later patch (number 15 in this series) to
> start skipping the actual fast clear op and just update the clear color.
> 

How about this:

   For depth buffers, we avoid fast-clearing if the aux_state is already
   CLEAR. We do the same for color buffers only if the clear color
   doesn't change. We require that the clear colors match because, in
   that case, we don't update the indirect clear color outside of BLORP.

   Update the indirect clear color for color buffers as well. We'll
   enable the same depth buffer optimization for color buffers in a
   later patch.

   Note that we're now actually updating the indirect clear color twice
   in the case where we use BLORP to perform the fast-clear. This is
   only temporary. In later patches, we'll prevent BLORP from performing
   the update.

-Nanley

> > ---
> >  src/mesa/drivers/dri/i965/brw_clear.c | 37 
> > ---
> >  src/mesa/drivers/dri/i965/intel_mipmap_tree.c | 13 ++
> >  2 files changed, 13 insertions(+), 37 deletions(-)
> > 
> > diff --git a/src/mesa/drivers/dri/i965/brw_clear.c 
> > b/src/mesa/drivers/dri/i965/brw_clear.c
> > index ba79447fc87..a65839a0a05 100644
> > --- a/src/mesa/drivers/dri/i965/brw_clear.c
> > +++ b/src/mesa/drivers/dri/i965/brw_clear.c
> > @@ -108,7 +108,6 @@ brw_fast_clear_depth(struct gl_context *ctx)
> > struct intel_mipmap_tree *mt = depth_irb->mt;
> > struct gl_renderbuffer_attachment *depth_att = 
> > &fb->Attachment[BUFFER_DEPTH];
> > const struct gen_device_info *devinfo = &brw->screen->devinfo;
> > -   bool same_clear_value = true;
> >  
> > if (devinfo->gen < 6)
> >return false;
> > @@ -215,42 +214,6 @@ brw_fast_clear_depth(struct gl_context *ctx)
> >  
> >const union isl_color_value clear_color = { .f32 = {clear_value, } };
> >intel_miptree_set_clear_color(brw, mt, clear_color);
> > -  same_clear_value = false;
> > -   }
> > -
> > -   bool need_clear = false;
> > -   for (unsigned a = 0; a < num_layers; a++) {
> > -  enum isl_aux_state aux_state =
> > - intel_miptree_get_aux_state(mt, depth_irb->mt_level,
> > - depth_irb->mt_layer + a);
> > -
> > -  if (aux_state != ISL_AUX_STATE_CLEAR) {
> > - need_clear = true;
> > - break;
> > -  }
> > -   }
> > -
> > -   if (!need_clear) {
> > -  if (devinfo->gen >= 10 && !same_clear_value) {
> > - /* Before gen10, it was enough to just update the clear value in 
> > the
> > -  * miptree. But on gen10+, we let blorp update the clear value 
> > state
> > -  * buffer when doing a fast clear. Since we are skipping the fast
> > -  * clear here, we need to update the clear color ourselves.
> > -  */
> > - uint32_t clear_offset = mt->aux_buf->clear_color_offset;
> > - union isl_color_value clear_color = { .f32 = { clear_value, } };
> > -
> > - /* We can't update the clear color while the hardware is still 
> > using
> > -  * the previous one for a resolve or sampling from it. So make 
> > sure
> > -  * that there's no pending commands at this point.
> > -  */
> > - brw_emit_pipe_control_flush(brw, PIPE_CONTROL_CS_STALL);
> > - for (int i = 0; i < 4; i++) {
> > -brw_store_data_imm32(brw, mt->aux_buf->clear_color_bo,
> > - clear_offset + i * 4, clear_color.u32[i]);
> > - }
> > - brw_emit_pipe_control_flush(brw, 
> > PIPE_CONTROL_STATE_CACHE_INVALIDATE);
> > -  }
> > }
> >  
> > for (unsigned a = 0; a < num_layers; a++) {
> > diff --git a/src/mesa/drivers/dri/i965/intel_mipmap_tree.c 
> > b/src/mesa/drivers/dri/i965/intel_mipmap_tree.c
> > index 07ce2ac2adf..bd4ddbc2f58 100644
> > --- a/src/mesa/drivers/dri/i965/intel_mipmap_tree.c
> > +++ b/src/mesa/drivers/dri/i965/intel_mipmap_tree.c
> > @@ -3739,6 +3739,19 @@ intel_miptree_set_clear_color(struct brw_context 
> > *brw,
> >  {
> > if (memcmp(&mt->fast_clear_color, &clear_color, sizeof(clear_color)) != 
> > 0) {
> >mt->fast_clear_color = clear_color;
> > +  if (mt->aux_buf->clear_color_bo) {
> > + /* We can't update the clear color while the hardware is still 
> > using
> > +  * the previous one for a resolve or sampli

Re: [Mesa-dev] [PATCH v2 6/8] anv: use a separate pool for binding tables when soft pinning

2018-05-08 Thread Jordan Justen
Reviewed-by: Jordan Justen 

On 2018-05-07 17:30:48, Scott D Phillips wrote:
> Soft pinning lets us satisfy the binding table address
> requirements without using both sides of a growing state_pool.
> 
> If you do use both sides of a state pool, then you need to read
> the state pool's center_bo_offset (with the device mutex held) to
> know the final offset of relocations that target the state pool
> bo.
> 
> By having a separate pool for binding tables that only grows in
> the forward direction, the center_bo_offset is always 0 and
> relocations don't need an update pass to adjust relocations with
> the mutex held.
> 
> v2: - don't introduce a separate state flag for separate binding tables 
> (Jason)
> - replace bo and map accessors with a single binding_table_pool accessor 
> (Jason)
> ---
>  src/intel/vulkan/anv_batch_chain.c | 25 +++--
>  src/intel/vulkan/anv_device.c  | 14 +-
>  src/intel/vulkan/anv_private.h | 24 
>  3 files changed, 52 insertions(+), 11 deletions(-)
> 
> diff --git a/src/intel/vulkan/anv_batch_chain.c 
> b/src/intel/vulkan/anv_batch_chain.c
> index 09514c7b84a..53b24551088 100644
> --- a/src/intel/vulkan/anv_batch_chain.c
> +++ b/src/intel/vulkan/anv_batch_chain.c
> @@ -452,7 +452,7 @@ anv_cmd_buffer_surface_base_address(struct anv_cmd_buffer 
> *cmd_buffer)
>  {
> struct anv_state *bt_block = u_vector_head(&cmd_buffer->bt_block_states);
> return (struct anv_address) {
> -  .bo = &cmd_buffer->device->surface_state_pool.block_pool.bo,
> +  .bo = &anv_binding_table_pool(cmd_buffer->device)->block_pool.bo,
>.offset = bt_block->offset,
> };
>  }
> @@ -619,7 +619,8 @@ struct anv_state
>  anv_cmd_buffer_alloc_binding_table(struct anv_cmd_buffer *cmd_buffer,
> uint32_t entries, uint32_t *state_offset)
>  {
> -   struct anv_state_pool *state_pool = 
> &cmd_buffer->device->surface_state_pool;
> +   struct anv_device *device = cmd_buffer->device;
> +   struct anv_state_pool *state_pool = &device->surface_state_pool;
> struct anv_state *bt_block = u_vector_head(&cmd_buffer->bt_block_states);
> struct anv_state state;
>  
> @@ -629,12 +630,18 @@ anv_cmd_buffer_alloc_binding_table(struct 
> anv_cmd_buffer *cmd_buffer,
>return (struct anv_state) { 0 };
>  
> state.offset = cmd_buffer->bt_next;
> -   state.map = state_pool->block_pool.map + bt_block->offset + state.offset;
> +   state.map = anv_binding_table_pool(device)->block_pool.map +
> +  bt_block->offset + state.offset;
>  
> cmd_buffer->bt_next += state.alloc_size;
>  
> -   assert(bt_block->offset < 0);
> -   *state_offset = -bt_block->offset;
> +   if (device->instance->physicalDevice.use_softpin) {
> +  *state_offset = device->surface_state_pool.block_pool.start_address -
> + device->binding_table_pool.block_pool.start_address - 
> bt_block->offset;
> +   } else {
> +  assert(bt_block->offset < 0);
> +  *state_offset = -bt_block->offset;
> +   }
>  
> return state;
>  }
> @@ -658,15 +665,13 @@ anv_cmd_buffer_alloc_dynamic_state(struct 
> anv_cmd_buffer *cmd_buffer,
>  VkResult
>  anv_cmd_buffer_new_binding_table_block(struct anv_cmd_buffer *cmd_buffer)
>  {
> -   struct anv_state_pool *state_pool = 
> &cmd_buffer->device->surface_state_pool;
> -
> struct anv_state *bt_block = u_vector_add(&cmd_buffer->bt_block_states);
> if (bt_block == NULL) {
>anv_batch_set_error(&cmd_buffer->batch, VK_ERROR_OUT_OF_HOST_MEMORY);
>return vk_error(VK_ERROR_OUT_OF_HOST_MEMORY);
> }
>  
> -   *bt_block = anv_state_pool_alloc_back(state_pool);
> +   *bt_block = anv_binding_table_pool_alloc(cmd_buffer->device);
> cmd_buffer->bt_next = 0;
>  
> return VK_SUCCESS;
> @@ -740,7 +745,7 @@ anv_cmd_buffer_fini_batch_bo_chain(struct anv_cmd_buffer 
> *cmd_buffer)
>  {
> struct anv_state *bt_block;
> u_vector_foreach(bt_block, &cmd_buffer->bt_block_states)
> -  anv_state_pool_free(&cmd_buffer->device->surface_state_pool, 
> *bt_block);
> +  anv_binding_table_pool_free(cmd_buffer->device, *bt_block);
> u_vector_finish(&cmd_buffer->bt_block_states);
>  
> anv_reloc_list_finish(&cmd_buffer->surface_relocs, 
> &cmd_buffer->pool->alloc);
> @@ -772,7 +777,7 @@ anv_cmd_buffer_reset_batch_bo_chain(struct anv_cmd_buffer 
> *cmd_buffer)
>  
> while (u_vector_length(&cmd_buffer->bt_block_states) > 1) {
>struct anv_state *bt_block = 
> u_vector_remove(&cmd_buffer->bt_block_states);
> -  anv_state_pool_free(&cmd_buffer->device->surface_state_pool, 
> *bt_block);
> +  anv_binding_table_pool_free(cmd_buffer->device, *bt_block);
> }
> assert(u_vector_length(&cmd_buffer->bt_block_states) == 1);
> cmd_buffer->bt_next = 0;
> diff --git a/src/intel/vulkan/anv_device.c b/src/intel/vulkan/anv_device.c
> index 0eafdf79696..afb5b2a4f5d 100644
> --- a/src/intel/vulkan/anv_device.c
> +++ b/src/intel/vulkan/anv_device.c
>

Re: [Mesa-dev] [PATCH 8/8] anv: soft pin the remaining bos

2018-05-08 Thread Jason Ekstrand
On Mon, May 7, 2018 at 5:30 PM, Scott D Phillips  wrote:

> ---
>  src/intel/vulkan/anv_allocator.c   | 16 +++-
>  src/intel/vulkan/anv_batch_chain.c | 27 +--
>  src/intel/vulkan/anv_device.c  | 32 
>  src/intel/vulkan/anv_private.h | 16 
>  src/intel/vulkan/anv_queue.c   |  2 +-
>  src/intel/vulkan/genX_blorp_exec.c |  6 ++
>  src/intel/vulkan/genX_cmd_buffer.c | 26 +-
>  src/intel/vulkan/genX_query.c  |  6 ++
>  8 files changed, 102 insertions(+), 29 deletions(-)
>
> diff --git a/src/intel/vulkan/anv_allocator.c b/src/intel/vulkan/anv_
> allocator.c
> index 7d368f09c9e..a1a306817e0 100644
> --- a/src/intel/vulkan/anv_allocator.c
> +++ b/src/intel/vulkan/anv_allocator.c
> @@ -985,6 +985,7 @@ anv_bo_pool_finish(struct anv_bo_pool *pool)
>   struct bo_pool_bo_link link_copy = VG_NOACCESS_READ(link);
>
>   anv_gem_munmap(link_copy.bo.map, link_copy.bo.size);
> + anv_vma_free(pool->device, &link_copy.bo);
>   anv_gem_close(pool->device, link_copy.bo.gem_handle);
>   link = link_copy.next;
>}
> @@ -1024,11 +1025,15 @@ anv_bo_pool_alloc(struct anv_bo_pool *pool, struct
> anv_bo *bo, uint32_t size)
>
> new_bo.flags = pool->bo_flags;
>
> +   if (!anv_vma_alloc(pool->device, &new_bo))
> +  return vk_error(VK_ERROR_OUT_OF_DEVICE_MEMORY);
> +
> assert(new_bo.size == pow2_size);
>
> new_bo.map = anv_gem_mmap(pool->device, new_bo.gem_handle, 0,
> pow2_size, 0);
> if (new_bo.map == MAP_FAILED) {
>anv_gem_close(pool->device, new_bo.gem_handle);
> +  anv_vma_free(pool->device, &new_bo);
>return vk_error(VK_ERROR_MEMORY_MAP_FAILED);
> }
>
> @@ -1072,8 +1077,10 @@ anv_scratch_pool_finish(struct anv_device *device,
> struct anv_scratch_pool *pool
> for (unsigned s = 0; s < MESA_SHADER_STAGES; s++) {
>for (unsigned i = 0; i < 16; i++) {
>   struct anv_scratch_bo *bo = &pool->bos[i][s];
> - if (bo->exists > 0)
> + if (bo->exists > 0) {
> +anv_vma_free(device, &bo->bo);
>  anv_gem_close(device, bo->bo.gem_handle);
> + }
>}
> }
>  }
> @@ -1171,6 +1178,11 @@ anv_scratch_pool_alloc(struct anv_device *device,
> struct anv_scratch_pool *pool,
> if (device->instance->physicalDevice.has_exec_async)
>bo->bo.flags |= EXEC_OBJECT_ASYNC;
>
> +   if (device->instance->physicalDevice.use_softpin)
> +  bo->bo.flags |= EXEC_OBJECT_PINNED;
> +
> +   anv_vma_alloc(device, &bo->bo);
> +
> /* Set the exists last because it may be read by other threads */
> __sync_synchronize();
> bo->exists = true;
> @@ -1390,6 +1402,8 @@ anv_bo_cache_release(struct anv_device *device,
> if (bo->bo.map)
>anv_gem_munmap(bo->bo.map, bo->bo.size);
>
> +   anv_vma_free(device, bo);
> +
> anv_gem_close(device, bo->bo.gem_handle);
>
> /* Don't unlock until we've actually closed the BO.  The whole point of
> diff --git a/src/intel/vulkan/anv_batch_chain.c
> b/src/intel/vulkan/anv_batch_chain.c
> index eaee9afbd29..9b0cc984599 100644
> --- a/src/intel/vulkan/anv_batch_chain.c
> +++ b/src/intel/vulkan/anv_batch_chain.c
> @@ -430,6 +430,7 @@ anv_batch_bo_list_clone(const struct list_head *list,
>  struct list_head *new_list)
>  {
> VkResult result = VK_SUCCESS;
> +   struct anv_device *device = cmd_buffer->device;
>
> list_inithead(new_list);
>
> @@ -448,8 +449,14 @@ anv_batch_bo_list_clone(const struct list_head *list,
>* as it will always be the last relocation in the list.
>*/
>   uint32_t last_idx = prev_bbo->relocs.num_relocs - 1;
> - assert(prev_bbo->relocs.reloc_bos[last_idx] == &bbo->bo);
> - prev_bbo->relocs.reloc_bos[last_idx] = &new_bbo->bo;
> + if (last_idx == -1) {
> +write_reloc(device, prev_bbo->bo.map + prev_bbo->length -
> +(device->info.gen >= 8 ? 8 : 4),
> new_bbo->bo.offset,
> +false);
> + } else {
> +assert(prev_bbo->relocs.reloc_bos[last_idx] == &bbo->bo);
> +prev_bbo->relocs.reloc_bos[last_idx] = &new_bbo->bo;
> + }
>

I'm not convinced this is correct.  It heavily depends on two things:

 1) We will either have a reloc to the next batch_bo or we will have no
relocs at all
 2) That we can find the address in the MI_BATCH_BUFFER_START by a fixed
calculation based on hardware generation

This code is fragile enough as is without adding yet more subtle
assumptions.

Maybe the thing to do here is to stash off some bit of information for
reliably finding the MI_BATCH_BUFFER_START such as a pointer to the command
or to it's jump address.

As for the "last_idx == -1" check itself.  Maybe that should just be a
"use_softpin" check.  Or, for that matter, you could just leave the
"chain_addr" field mentioned above NULL if softpin 

Re: [Mesa-dev] [PATCH 01/17] i965/miptree: Fix handling of uninitialized MCS buffers

2018-05-08 Thread Jason Ekstrand
On Thu, May 3, 2018 at 12:03 PM, Nanley Chery  wrote:

> Before this patch, if we failed to initialize an MCS buffer, we'd
> end up in a state in which the miptree thinks it has an MCS buffer,
> but doesn't. We also leaked the clear_color_bo if it existed.
>
> With this patch, we now free the miptree aux buffer resources and let
> intel_miptree_alloc_mcs() know that the MCS buffer no longer exists.
>
> Cc: 
> ---
>  src/mesa/drivers/dri/i965/intel_mipmap_tree.c | 14 +++---
>  1 file changed, 7 insertions(+), 7 deletions(-)
>
> diff --git a/src/mesa/drivers/dri/i965/intel_mipmap_tree.c
> b/src/mesa/drivers/dri/i965/intel_mipmap_tree.c
> index b9a564552df..377efae32c9 100644
> --- a/src/mesa/drivers/dri/i965/intel_mipmap_tree.c
> +++ b/src/mesa/drivers/dri/i965/intel_mipmap_tree.c
> @@ -1658,7 +1658,7 @@ intel_miptree_copy_teximage(struct brw_context *brw,
> intel_obj->needs_validate = true;
>  }
>
> -static void
> +static bool
>  intel_miptree_init_mcs(struct brw_context *brw,
> struct intel_mipmap_tree *mt,
> int init_value)
> @@ -1678,13 +1678,14 @@ intel_miptree_init_mcs(struct brw_context *brw,
> void *map = brw_bo_map(brw, mt->aux_buf->bo, MAP_WRITE | MAP_RAW);
> if (unlikely(map == NULL)) {
>fprintf(stderr, "Failed to map mcs buffer into GTT\n");
> -  brw_bo_unreference(mt->aux_buf->bo);
> -  free(mt->aux_buf);
> -  return;
> +  intel_miptree_aux_buffer_free(mt->aux_buf);
> +  mt->aux_buf = NULL;
> +  return false;
> }
> void *data = map;
> memset(data, init_value, mt->aux_buf->size);
> brw_bo_unmap(mt->aux_buf->bo);
> +   return true;
>  }
>
>  static struct intel_miptree_aux_buffer *
> @@ -1764,15 +1765,14 @@ intel_miptree_alloc_mcs(struct brw_context *brw,
> const uint32_t alloc_flags = 0;
> mt->aux_buf = intel_alloc_aux_buffer(brw, "mcs-miptree",
>  &temp_mcs_surf, alloc_flags, mt);
> -   if (!mt->aux_buf) {
> +   if (!mt->aux_buf ||
> +   !intel_miptree_init_mcs(brw, mt, 0xFF)) {
>

You're leaking mt->aux_buf here.


>free(aux_state);
>return false;
> }
>
> mt->aux_state = aux_state;
>
> -   intel_miptree_init_mcs(brw, mt, 0xFF);
> -
> return true;
>  }
>
> --
> 2.16.2
>
> ___
> mesa-dev mailing list
> mesa-dev@lists.freedesktop.org
> https://lists.freedesktop.org/mailman/listinfo/mesa-dev
>
___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/mesa-dev


Re: [Mesa-dev] [PATCH 04/17] i965/miptree: Initialize the indirect clear color to zero

2018-05-08 Thread Jason Ekstrand
On Thu, May 3, 2018 at 12:03 PM, Nanley Chery  wrote:

> The indirect clear color isn't correctly tracked in
> intel_miptree::fast_clear_color. The initial value of ::fast_clear_color
> is zero, while that of the indirect clear color is undefined or
> non-zero.
>
> Topi Pohjolainen discovered this issue with MCS buffers. This issue is
> apparent when fast-clearing an MCS buffer for the first time with
> glClearColor = {0.0,}. Although the indirect clear color is non-zero,
> the initial aux state of the MCS is CLEAR and the tracked clear color is
> zero, so we avoid updating the indirect clear color with {0.0,}.
>
> Make the indirect clear color match the initial value of
> ::fast_clear_color.
>
> ---
>
> Hey Topi,
>
> Just FYI, this patch should fix the MCS bug you reported earlier.
>
>  src/mesa/drivers/dri/i965/intel_mipmap_tree.c | 33
> ++-
>  1 file changed, 22 insertions(+), 11 deletions(-)
>
> diff --git a/src/mesa/drivers/dri/i965/intel_mipmap_tree.c
> b/src/mesa/drivers/dri/i965/intel_mipmap_tree.c
> index 5d3ee569bd8..e70c9ff1ef4 100644
> --- a/src/mesa/drivers/dri/i965/intel_mipmap_tree.c
> +++ b/src/mesa/drivers/dri/i965/intel_mipmap_tree.c
> @@ -978,11 +978,11 @@ create_ccs_buf_for_image(struct brw_context *brw,
>  * system with CCS, we don't have the extra space at the end of the aux
>  * buffer. So create a new bo here that will store that clear color.
>  */
> -   const struct gen_device_info *devinfo = &brw->screen->devinfo;
> -   if (devinfo->gen >= 10) {
> +   if (brw->isl_dev.ss.clear_color_state_size > 0) {
>mt->aux_buf->clear_color_bo =
> - brw_bo_alloc(brw->bufmgr, "clear_color_bo",
> -  brw->isl_dev.ss.clear_color_state_size);
> + brw_bo_alloc_tiled(brw->bufmgr, "clear_color_bo",
> +brw->isl_dev.ss.clear_color_state_size,
> +I915_TILING_NONE, 0, BO_ALLOC_ZEROED);
>if (!mt->aux_buf->clear_color_bo) {
>   free(mt->aux_buf);
>   mt->aux_buf = NULL;
> @@ -1673,9 +1673,9 @@ intel_alloc_aux_buffer(struct brw_context *brw,
>
> buf->size = aux_surf->size;
>
> -   const struct gen_device_info *devinfo = &brw->screen->devinfo;
> -   if (devinfo->gen >= 10) {
> -  /* On CNL, instead of setting the clear color in the SURFACE_STATE,
> we
> +   const bool has_indirect_clear = brw->isl_dev.ss.clear_color_state_size
> > 0;
> +   if (has_indirect_clear) {
> +  /* On CNL+, instead of setting the clear color in the
> SURFACE_STATE, we
> * will set a pointer to a dword somewhere that contains the color.
> So,
> * allocate the space for the clear color value here on the aux
> buffer.
> */
> @@ -1698,7 +1698,8 @@ intel_alloc_aux_buffer(struct brw_context *brw,
> }
>
> /* Initialize the bo to the desired value */
> -   if (wants_memset) {
> +   const bool needs_memset = wants_memset || has_indirect_clear;
> +   if (needs_memset) {
>

I don't think the temporary bool is doing you any good.  just doing

if (wants_memset || has_indirect_clear) {
   /* map */
   if (wants_memset) ...
   if (has_indirect_clear) ...
}

is simpler.  This needs_memset thing makes it look like we're going "Ha!
You have an indirect clear so you're getting a memset even though you
didn't ask for one!"


>assert(!(alloc_flags & BO_ALLOC_BUSY));
>
>void *map = brw_bo_map(brw, buf->bo, MAP_WRITE | MAP_RAW);
> @@ -1706,11 +1707,21 @@ intel_alloc_aux_buffer(struct brw_context *brw,
>   intel_miptree_aux_buffer_free(buf);
>   return NULL;
>}
> -  memset(map, memset_value, mt->aux_buf->size);
> +
> +  /* Memset the aux_surf portion of the BO. */
> +  if (wants_memset)
> + memset(map, memset_value, aux_surf->size);
> +
> +  /* Zero the indirect clear color to match ::fast_clear_color. */
> +  if (has_indirect_clear) {
> + memset((char *)map + buf->clear_color_offset, 0,
> +brw->isl_dev.ss.clear_color_state_size);
> +  }
> +
>brw_bo_unmap(buf->bo);
> }
>
> -   if (devinfo->gen >= 10) {
> +   if (has_indirect_clear) {
>buf->clear_color_bo = buf->bo;
>brw_bo_reference(buf->clear_color_bo);
> }
> @@ -1869,7 +1880,7 @@ intel_miptree_alloc_hiz(struct brw_context *brw,
>isl_surf_get_hiz_surf(&brw->isl_dev, &mt->surf, &temp_hiz_surf);
> assert(ok);
>
> -   const uint32_t alloc_flags = BO_ALLOC_BUSY;
> +   const uint32_t alloc_flags = 0;
> mt->aux_buf = intel_alloc_aux_buffer(brw, "hiz-miptree",
> &temp_hiz_surf,
>  alloc_flags, false, 0, mt);
>
> --
> 2.16.2
>
> ___
> mesa-dev mailing list
> mesa-dev@lists.freedesktop.org
> https://lists.freedesktop.org/mailman/listinfo/mesa-dev
>
___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/

Re: [Mesa-dev] [PATCH 06/17] i965/miptree: Drop the alloc_flags param from alloc_aux_buffer

2018-05-08 Thread Jason Ekstrand
On Mon, May 7, 2018 at 11:04 AM, Nanley Chery  wrote:

> On Mon, May 07, 2018 at 03:06:29PM +0300, Pohjolainen, Topi wrote:
> > On Thu, May 03, 2018 at 12:03:53PM -0700, Nanley Chery wrote:
> > > We have enough information to determine the optimal flags internally.
> > > ---
> > >  src/mesa/drivers/dri/i965/intel_mipmap_tree.c | 29
> +--
> > >  1 file changed, 14 insertions(+), 15 deletions(-)
> > >
> > > diff --git a/src/mesa/drivers/dri/i965/intel_mipmap_tree.c
> b/src/mesa/drivers/dri/i965/intel_mipmap_tree.c
> > > index 566ead0d5c8..e065c2f62e0 100644
> > > --- a/src/mesa/drivers/dri/i965/intel_mipmap_tree.c
> > > +++ b/src/mesa/drivers/dri/i965/intel_mipmap_tree.c
> > > @@ -1661,7 +1661,6 @@ intel_miptree_copy_teximage(struct brw_context
> *brw,
> > >  static struct intel_miptree_aux_buffer *
> > >  intel_alloc_aux_buffer(struct brw_context *brw,
> > > const struct isl_surf *aux_surf,
> > > -   uint32_t alloc_flags,
> > > bool wants_memset,
> > > uint8_t memset_value,
> > > struct intel_mipmap_tree *mt)
> > > @@ -1685,6 +1684,17 @@ intel_alloc_aux_buffer(struct brw_context *brw,
> > > buf->pitch = aux_surf->row_pitch;
> > > buf->qpitch = isl_surf_get_array_pitch_sa_rows(aux_surf);
> > >
> > > +   /* If the buffer needs to be initialised (requiring the buffer to
> be
> > > +* immediately mapped to cpu space for writing), do not use the
> gpu access
> > > +* flag which can cause an unnecessary delay if the backing pages
> happened
> > > +* to be just used by the GPU.
> > > +*/
> > > +   const bool alloc_zeroed = wants_memset && memset_value == 0;
> > > +   const bool needs_memset =
> > > +  !alloc_zeroed && (wants_memset || has_indirect_clear);
> > > +   const uint32_t alloc_flags =
> > > +  alloc_zeroed ? BO_ALLOC_ZEROED : (needs_memset ? 0 :
> BO_ALLOC_BUSY);
> > > +
> >
> > What you have is correct but double ternaries always make my head spin.
> How
> > would you feel:
> >
> >   uint32_t alloc_flags = 0;
> >   if (alloc_zeroed)
> >  alloc_flags = BO_ALLOC_ZEROED;
> >   else if (!wants_memset && !has_indirect_clear)
> >  alloc_flags = BO_ALLOC_BUSY;
> >
>
> I was hoping this nested ternary would survive, but I don't mind
> replacing it. I'd prefer to be more explicit about the case in which we
> want to assign alloc_flags to 0 with something like:
>
>uint32_t alloc_flags;
>if (alloc_zeroed) {
>   alloc_flags = BO_ALLOC_ZEROED;
>} else if (needs_memset) {
>   alloc_flags = 0;
>} else {
>   alloc_flags = BO_ALLOC_BUSY;
>}
>
> OR:
>
>uint32_t alloc_flags;
>if (needs_memset) {
>   alloc_flags = (memset_value == 0) ? BO_ALLOC_ZEROED : 0;
>} else {
>   alloc_flags = BO_ALLOC_BUSY;
>}
>
> Thoughts?
>

How about something like this:

bool memset_surface = wants_memset;
bool memset_clear_value = has_indirect_clear;
alloc_flags = 0;
if (wants_memset && memset_value == 0) {
   /* The allocator can do the memset to 0 for us */
   alloc_flags |= BO_ALLOC_ZEROED;
   wants_memset = false;
   memset_clear_value = false;
}

if (!memset_surface && !memset_clear_value)
   alloc_flags |= BO_ALLOC_BUSY;

I'm not sure that I'm helping either  Yeah, let's just go with what you
have here.


> I just noticed that the variable naming could use some work. Maybe:
>
> * wants_memset -> wants_aux_surf_memset
> * memset_value -> aux_surf_memset_value
> * needs_memset -> aux_bo_needs_memset
>
> Would you like me to do something like this in a follow-on patch?
>
> -Nanley
>
> > > /* ISL has stricter set of alignment rules then the drm allocator.
> > >  * Therefore one can pass the ISL dimensions in terms of bytes
> instead of
> > >  * trying to recalculate based on different format block sizes.
> > > @@ -1697,7 +1707,6 @@ intel_alloc_aux_buffer(struct brw_context *brw,
> > > }
> > >
> > > /* Initialize the bo to the desired value */
> > > -   const bool needs_memset = wants_memset || has_indirect_clear;
> > > if (needs_memset) {
> > >assert(!(alloc_flags & BO_ALLOC_BUSY));
> > >
> > > @@ -1752,12 +1761,6 @@ intel_miptree_alloc_mcs(struct brw_context *brw,
> > >isl_surf_get_mcs_surf(&brw->isl_dev, &mt->surf,
> &temp_mcs_surf);
> > > assert(ok);
> > >
> > > -   /* Buffer needs to be initialised requiring the buffer to be
> immediately
> > > -* mapped to cpu space for writing. Therefore do not use the gpu
> access
> > > -* flag which can cause an unnecessary delay if the backing pages
> happened
> > > -* to be just used by the GPU.
> > > -*/
> > > -   const uint32_t alloc_flags = 0;
> > > /* From the Ivy Bridge PRM, Vol 2 Part 1 p326:
> > >  *
> > >  * When MCS buffer is enabled and bound to MSRT, it is
> required that it
> > > @@ -1768,8 +1771,7 @@ intel_miptree_alloc_mcs(struct brw_context *brw,
> > >   

Re: [Mesa-dev] [PATCH 04/17] i965/miptree: Initialize the indirect clear color to zero

2018-05-08 Thread Jason Ekstrand
On Tue, May 8, 2018 at 3:41 PM, Jason Ekstrand  wrote:

> On Thu, May 3, 2018 at 12:03 PM, Nanley Chery 
> wrote:
>
>> The indirect clear color isn't correctly tracked in
>> intel_miptree::fast_clear_color. The initial value of ::fast_clear_color
>> is zero, while that of the indirect clear color is undefined or
>> non-zero.
>>
>> Topi Pohjolainen discovered this issue with MCS buffers. This issue is
>> apparent when fast-clearing an MCS buffer for the first time with
>> glClearColor = {0.0,}. Although the indirect clear color is non-zero,
>> the initial aux state of the MCS is CLEAR and the tracked clear color is
>> zero, so we avoid updating the indirect clear color with {0.0,}.
>>
>> Make the indirect clear color match the initial value of
>> ::fast_clear_color.
>>
>> ---
>>
>> Hey Topi,
>>
>> Just FYI, this patch should fix the MCS bug you reported earlier.
>>
>>  src/mesa/drivers/dri/i965/intel_mipmap_tree.c | 33
>> ++-
>>  1 file changed, 22 insertions(+), 11 deletions(-)
>>
>> diff --git a/src/mesa/drivers/dri/i965/intel_mipmap_tree.c
>> b/src/mesa/drivers/dri/i965/intel_mipmap_tree.c
>> index 5d3ee569bd8..e70c9ff1ef4 100644
>> --- a/src/mesa/drivers/dri/i965/intel_mipmap_tree.c
>> +++ b/src/mesa/drivers/dri/i965/intel_mipmap_tree.c
>> @@ -978,11 +978,11 @@ create_ccs_buf_for_image(struct brw_context *brw,
>>  * system with CCS, we don't have the extra space at the end of the
>> aux
>>  * buffer. So create a new bo here that will store that clear color.
>>  */
>> -   const struct gen_device_info *devinfo = &brw->screen->devinfo;
>> -   if (devinfo->gen >= 10) {
>> +   if (brw->isl_dev.ss.clear_color_state_size > 0) {
>>mt->aux_buf->clear_color_bo =
>> - brw_bo_alloc(brw->bufmgr, "clear_color_bo",
>> -  brw->isl_dev.ss.clear_color_state_size);
>> + brw_bo_alloc_tiled(brw->bufmgr, "clear_color_bo",
>> +brw->isl_dev.ss.clear_color_state_size,
>> +I915_TILING_NONE, 0, BO_ALLOC_ZEROED);
>>if (!mt->aux_buf->clear_color_bo) {
>>   free(mt->aux_buf);
>>   mt->aux_buf = NULL;
>> @@ -1673,9 +1673,9 @@ intel_alloc_aux_buffer(struct brw_context *brw,
>>
>> buf->size = aux_surf->size;
>>
>> -   const struct gen_device_info *devinfo = &brw->screen->devinfo;
>> -   if (devinfo->gen >= 10) {
>> -  /* On CNL, instead of setting the clear color in the
>> SURFACE_STATE, we
>> +   const bool has_indirect_clear = brw->isl_dev.ss.clear_color_state_size
>> > 0;
>> +   if (has_indirect_clear) {
>> +  /* On CNL+, instead of setting the clear color in the
>> SURFACE_STATE, we
>> * will set a pointer to a dword somewhere that contains the
>> color. So,
>> * allocate the space for the clear color value here on the aux
>> buffer.
>> */
>> @@ -1698,7 +1698,8 @@ intel_alloc_aux_buffer(struct brw_context *brw,
>> }
>>
>> /* Initialize the bo to the desired value */
>> -   if (wants_memset) {
>> +   const bool needs_memset = wants_memset || has_indirect_clear;
>> +   if (needs_memset) {
>>
>
> I don't think the temporary bool is doing you any good.  just doing
>
> if (wants_memset || has_indirect_clear) {
>/* map */
>if (wants_memset) ...
>if (has_indirect_clear) ...
> }
>
> is simpler.  This needs_memset thing makes it look like we're going "Ha!
> You have an indirect clear so you're getting a memset even though you
> didn't ask for one!"
>

Never mind me.  The next patch makes this make sense.

--Jason


>assert(!(alloc_flags & BO_ALLOC_BUSY));
>>
>>void *map = brw_bo_map(brw, buf->bo, MAP_WRITE | MAP_RAW);
>> @@ -1706,11 +1707,21 @@ intel_alloc_aux_buffer(struct brw_context *brw,
>>   intel_miptree_aux_buffer_free(buf);
>>   return NULL;
>>}
>> -  memset(map, memset_value, mt->aux_buf->size);
>> +
>> +  /* Memset the aux_surf portion of the BO. */
>> +  if (wants_memset)
>> + memset(map, memset_value, aux_surf->size);
>> +
>> +  /* Zero the indirect clear color to match ::fast_clear_color. */
>> +  if (has_indirect_clear) {
>> + memset((char *)map + buf->clear_color_offset, 0,
>> +brw->isl_dev.ss.clear_color_state_size);
>> +  }
>> +
>>brw_bo_unmap(buf->bo);
>> }
>>
>> -   if (devinfo->gen >= 10) {
>> +   if (has_indirect_clear) {
>>buf->clear_color_bo = buf->bo;
>>brw_bo_reference(buf->clear_color_bo);
>> }
>> @@ -1869,7 +1880,7 @@ intel_miptree_alloc_hiz(struct brw_context *brw,
>>isl_surf_get_hiz_surf(&brw->isl_dev, &mt->surf, &temp_hiz_surf);
>> assert(ok);
>>
>> -   const uint32_t alloc_flags = BO_ALLOC_BUSY;
>> +   const uint32_t alloc_flags = 0;
>> mt->aux_buf = intel_alloc_aux_buffer(brw, "hiz-miptree",
>> &temp_hiz_surf,
>>  alloc_flags, false, 0, mt);
>>
>> --
>> 2.16.2
>>
>> ___
>> 

Re: [Mesa-dev] [PATCH 01/17] i965/miptree: Fix handling of uninitialized MCS buffers

2018-05-08 Thread Nanley Chery
On Tue, May 08, 2018 at 03:33:22PM -0700, Jason Ekstrand wrote:
> On Thu, May 3, 2018 at 12:03 PM, Nanley Chery  wrote:
> 
> > Before this patch, if we failed to initialize an MCS buffer, we'd
> > end up in a state in which the miptree thinks it has an MCS buffer,
> > but doesn't. We also leaked the clear_color_bo if it existed.
> >
> > With this patch, we now free the miptree aux buffer resources and let
> > intel_miptree_alloc_mcs() know that the MCS buffer no longer exists.
> >
> > Cc: 
> > ---
> >  src/mesa/drivers/dri/i965/intel_mipmap_tree.c | 14 +++---
> >  1 file changed, 7 insertions(+), 7 deletions(-)
> >
> > diff --git a/src/mesa/drivers/dri/i965/intel_mipmap_tree.c
> > b/src/mesa/drivers/dri/i965/intel_mipmap_tree.c
> > index b9a564552df..377efae32c9 100644
> > --- a/src/mesa/drivers/dri/i965/intel_mipmap_tree.c
> > +++ b/src/mesa/drivers/dri/i965/intel_mipmap_tree.c
> > @@ -1658,7 +1658,7 @@ intel_miptree_copy_teximage(struct brw_context *brw,
> > intel_obj->needs_validate = true;
> >  }
> >
> > -static void
> > +static bool
> >  intel_miptree_init_mcs(struct brw_context *brw,
> > struct intel_mipmap_tree *mt,
> > int init_value)
> > @@ -1678,13 +1678,14 @@ intel_miptree_init_mcs(struct brw_context *brw,
> > void *map = brw_bo_map(brw, mt->aux_buf->bo, MAP_WRITE | MAP_RAW);
> > if (unlikely(map == NULL)) {
> >fprintf(stderr, "Failed to map mcs buffer into GTT\n");
> > -  brw_bo_unreference(mt->aux_buf->bo);
> > -  free(mt->aux_buf);
> > -  return;
> > +  intel_miptree_aux_buffer_free(mt->aux_buf);
> > +  mt->aux_buf = NULL;
> > +  return false;
> > }
> > void *data = map;
> > memset(data, init_value, mt->aux_buf->size);
> > brw_bo_unmap(mt->aux_buf->bo);
> > +   return true;
> >  }
> >
> >  static struct intel_miptree_aux_buffer *
> > @@ -1764,15 +1765,14 @@ intel_miptree_alloc_mcs(struct brw_context *brw,
> > const uint32_t alloc_flags = 0;
> > mt->aux_buf = intel_alloc_aux_buffer(brw, "mcs-miptree",
> >  &temp_mcs_surf, alloc_flags, mt);
> > -   if (!mt->aux_buf) {
> > +   if (!mt->aux_buf ||
> > +   !intel_miptree_init_mcs(brw, mt, 0xFF)) {
> >
> 
> You're leaking mt->aux_buf here.
> 
> 

How?

-Nanley

> >free(aux_state);
> >return false;
> > }
> >
> > mt->aux_state = aux_state;
> >
> > -   intel_miptree_init_mcs(brw, mt, 0xFF);
> > -
> > return true;
> >  }
> >
> > --
> > 2.16.2
> >
> > ___
> > mesa-dev mailing list
> > mesa-dev@lists.freedesktop.org
> > https://lists.freedesktop.org/mailman/listinfo/mesa-dev
> >
___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/mesa-dev


Re: [Mesa-dev] [PATCH v2 7/8] anv: elide relocations to pinned target bos

2018-05-08 Thread Jordan Justen
For the commit message, I have a suggestion:

---

anv: for pinned BOs, skip relocations, but track bo usage

References to pinned BOs won't need to be relocated, so just write the
final value of the reference into the bo.

Add a `set` to the relocation lists for tracking dependencies that
were previously tracked by relocations. When a batch is executed, we
add the referenced pinned BOs to the exec list.

---

Reviewed-by: Jordan Justen 

On 2018-05-07 17:30:49, Scott D Phillips wrote:
> References to pinned bos won't need relocated, so just write the
> final value of the reference into the bo. Add a `set` to the
> relocation lists for tracking dependencies that were previously
> tracked by relocations.
> 
> v2: - visit bos from the dependency set in a deterministic order (Jason)
> ---
>  src/intel/vulkan/anv_batch_chain.c | 52 
> ++
>  src/intel/vulkan/anv_private.h |  3 +++
>  2 files changed, 55 insertions(+)
> 
> diff --git a/src/intel/vulkan/anv_batch_chain.c 
> b/src/intel/vulkan/anv_batch_chain.c
> index 53b24551088..eaee9afbd29 100644
> --- a/src/intel/vulkan/anv_batch_chain.c
> +++ b/src/intel/vulkan/anv_batch_chain.c
> @@ -75,11 +75,24 @@ anv_reloc_list_init_clone(struct anv_reloc_list *list,
>return vk_error(VK_ERROR_OUT_OF_HOST_MEMORY);
> }
>  
> +   list->deps = _mesa_set_create(NULL, _mesa_hash_pointer,
> + _mesa_key_pointer_equal);
> +
> +   if (!list->deps) {
> +  vk_free(alloc, list->relocs);
> +  vk_free(alloc, list->reloc_bos);
> +  return vk_error(VK_ERROR_OUT_OF_HOST_MEMORY);
> +   }
> +
> if (other_list) {
>memcpy(list->relocs, other_list->relocs,
>   list->array_length * sizeof(*list->relocs));
>memcpy(list->reloc_bos, other_list->reloc_bos,
>   list->array_length * sizeof(*list->reloc_bos));
> +  struct set_entry *entry;
> +  set_foreach(other_list->deps, entry) {
> + _mesa_set_add_pre_hashed(list->deps, entry->hash, entry->key);
> +  }
> }
>  
> return VK_SUCCESS;
> @@ -98,6 +111,7 @@ anv_reloc_list_finish(struct anv_reloc_list *list,
>  {
> vk_free(alloc, list->relocs);
> vk_free(alloc, list->reloc_bos);
> +   _mesa_set_destroy(list->deps, NULL);
>  }
>  
>  static VkResult
> @@ -148,6 +162,11 @@ anv_reloc_list_add(struct anv_reloc_list *list,
> struct drm_i915_gem_relocation_entry *entry;
> int index;
>  
> +   if (target_bo->flags & EXEC_OBJECT_PINNED) {
> +  _mesa_set_add(list->deps, target_bo);
> +  return VK_SUCCESS;
> +   }
> +
> VkResult result = anv_reloc_list_grow(list, alloc, 1);
> if (result != VK_SUCCESS)
>return result;
> @@ -185,6 +204,12 @@ anv_reloc_list_append(struct anv_reloc_list *list,
>list->relocs[i + list->num_relocs].offset += offset;
>  
> list->num_relocs += other->num_relocs;
> +
> +   struct set_entry *entry;
> +   set_foreach(other->deps, entry) {
> +  _mesa_set_add_pre_hashed(list->deps, entry->hash, entry->key);
> +   }
> +
> return VK_SUCCESS;
>  }
>  
> @@ -338,6 +363,7 @@ anv_batch_bo_start(struct anv_batch_bo *bbo, struct 
> anv_batch *batch,
> batch->end = bbo->bo.map + bbo->bo.size - batch_padding;
> batch->relocs = &bbo->relocs;
> bbo->relocs.num_relocs = 0;
> +   _mesa_set_clear(bbo->relocs.deps, NULL);
>  }
>  
>  static void
> @@ -783,6 +809,7 @@ anv_cmd_buffer_reset_batch_bo_chain(struct anv_cmd_buffer 
> *cmd_buffer)
> cmd_buffer->bt_next = 0;
>  
> cmd_buffer->surface_relocs.num_relocs = 0;
> +   _mesa_set_clear(cmd_buffer->surface_relocs.deps, NULL);
> cmd_buffer->last_ss_pool_center = 0;
>  
> /* Reset the list of seen buffers */
> @@ -985,6 +1012,14 @@ anv_execbuf_finish(struct anv_execbuf *exec,
> vk_free(alloc, exec->syncobjs);
>  }
>  
> +static int
> +_compar_bo_handles(const void *_bo1, const void *_bo2)
> +{
> +   const struct anv_bo **bo1 = _bo1, **bo2 = _bo2;
> +
> +   return (*bo1)->gem_handle - (*bo2)->gem_handle;
> +}
> +
>  static VkResult
>  anv_execbuf_add_bo(struct anv_execbuf *exec,
> struct anv_bo *bo,
> @@ -1068,6 +1103,23 @@ anv_execbuf_add_bo(struct anv_execbuf *exec,
>   if (result != VK_SUCCESS)
>  return result;
>}
> +
> +  uint32_t entries = relocs->deps->entries;
> +  struct anv_bo *bos[entries], **bo = bos;
> +  struct set_entry *entry;
> +  set_foreach(relocs->deps, entry) {
> + *bo++ = entry->key;
> +  }
> +
> +  qsort(bos, entries, sizeof(struct anv_bo*), _compar_bo_handles);
> +
> +  for (bo = bos; bo < bos + entries; bo++) {
> + VkResult result = anv_execbuf_add_bo(exec, *bo, NULL,
> +  extra_flags, alloc);
> +
> + if (result != VK_SUCCESS)
> +return result;
> +  }
> }
>  
> return VK_SUCCESS;
> diff --git a/src/intel/vulkan/anv_private.h b/src/intel/vulkan/anv_private.h
> index 36e45

[Mesa-dev] [PATCH v2 0/2] intel: Stall before disable indirect state pointers

2018-05-08 Thread Lionel Landwerlin
Hi,

A quick update on the i965 patch, dropping the Post-Sync operation.

Thanks a lot,

Lionel Landwerlin (2):
  i965: require post sync operation prior to ISP disable
  anv: emit stall at pixel scoreboard before ISP disable

 src/intel/vulkan/genX_cmd_buffer.c   | 9 -
 src/mesa/drivers/dri/i965/brw_pipe_control.c | 9 -
 2 files changed, 16 insertions(+), 2 deletions(-)

--
2.17.0
___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/mesa-dev


[Mesa-dev] [PATCH v2 2/2] anv: emit stall at pixel scoreboard before ISP disable

2018-05-08 Thread Lionel Landwerlin
We want to make sure that all indirect state data has been loaded into
the EUs before disable the pointers.

Signed-off-by: Lionel Landwerlin 
Fixes: 78c125af3904c ("anv/gen10: Ignore push constant packets during context 
restore.")
---
 src/intel/vulkan/genX_cmd_buffer.c | 9 -
 1 file changed, 8 insertions(+), 1 deletion(-)

diff --git a/src/intel/vulkan/genX_cmd_buffer.c 
b/src/intel/vulkan/genX_cmd_buffer.c
index 2882cf36506..526e18af108 100644
--- a/src/intel/vulkan/genX_cmd_buffer.c
+++ b/src/intel/vulkan/genX_cmd_buffer.c
@@ -1420,14 +1420,21 @@ genX(BeginCommandBuffer)(
  * context restore, so the mentioned hang doesn't happen. However,
  * software must program push constant commands for all stages prior to
  * rendering anything. So we flag them dirty in BeginCommandBuffer.
+ *
+ * Finally, we also make sure to stall at pixel scoreboard to make sure the
+ * constants have been loaded into the EUs prior to disable the push constants
+ * so that it doesn't hang a previous 3DPRIMITIVE.
  */
 static void
 emit_isp_disable(struct anv_cmd_buffer *cmd_buffer)
 {
anv_batch_emit(&cmd_buffer->batch, GENX(PIPE_CONTROL), pc) {
- pc.IndirectStatePointersDisable = true;
+ pc.StallAtPixelScoreboard = true;
  pc.CommandStreamerStallEnable = true;
}
+   anv_batch_emit(&cmd_buffer->batch, GENX(PIPE_CONTROL), pc) {
+ pc.IndirectStatePointersDisable = true;
+   }
 }
 
 VkResult
-- 
2.17.0

___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/mesa-dev


[Mesa-dev] [PATCH v2 1/2] i965: require post sync operation prior to ISP disable

2018-05-08 Thread Lionel Landwerlin
Invalidating the indirect state pointers might affect a previously
scheduled & still running 3DPRIMITIVE (causing page fault). So stall
on pixel scoreboard before that.

v2: Fix compile issue :(

v3: Stall on pixel scoreboard

v4: Drop the post sync operation (Lionel)

Signed-off-by: Lionel Landwerlin 
Fixes: ca19ee33d7d39 ("i965/gen10: Ignore push constant packets during context 
restore.")
Bugzilla: https://bugs.freedesktop.org/show_bug.cgi?id=106243
---
 src/mesa/drivers/dri/i965/brw_pipe_control.c | 9 -
 1 file changed, 8 insertions(+), 1 deletion(-)

diff --git a/src/mesa/drivers/dri/i965/brw_pipe_control.c 
b/src/mesa/drivers/dri/i965/brw_pipe_control.c
index 02278be6d62..879bfb660ed 100644
--- a/src/mesa/drivers/dri/i965/brw_pipe_control.c
+++ b/src/mesa/drivers/dri/i965/brw_pipe_control.c
@@ -349,14 +349,21 @@ gen7_emit_vs_workaround_flush(struct brw_context *brw)
  * context restore, so the mentioned hang doesn't happen. However,
  * software must program push constant commands for all stages prior to
  * rendering anything, so we flag them as dirty.
+ *
+ * Finally, we also make sure to stall at pixel scoreboard to make sure the
+ * constants have been loaded into the EUs prior to disable the push constants
+ * so that it doesn't hang a previous 3DPRIMITIVE.
  */
 void
 gen10_emit_isp_disable(struct brw_context *brw)
 {
brw_emit_pipe_control(brw,
- PIPE_CONTROL_ISP_DIS |
+ PIPE_CONTROL_STALL_AT_SCOREBOARD |
  PIPE_CONTROL_CS_STALL,
  NULL, 0, 0);
+   brw_emit_pipe_control(brw,
+ PIPE_CONTROL_ISP_DIS,
+ NULL, 0, 0);
 
brw->vs.base.push_constants_dirty = true;
brw->tcs.base.push_constants_dirty = true;
-- 
2.17.0

___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/mesa-dev


Re: [Mesa-dev] Lowering viewport transformation in NIR

2018-05-08 Thread Eric Anholt
Alyssa Rosenzweig  writes:

> Hi all,
>
> Certain embedded GPUs do not implement coordinate transformation in
> hardware. Instead, section 12.5 "Coordinate Transformation" of the ES
> 3.2 specification is implemented in the vertex shader itself. Relevant
> examples include Midgard and vc4.
>
> To handle this, a lowering pass is needed to convert gl_Position writes
> to screen space writes. The vc4 driver lowers this in the backend IR;
> however, I don't think the pass needs to be specialised to the backend.
> For Midgard, I have written a NIR lowering pass to implement the same,
> which enables the lowered instructions themselves to be optimised.
>
> At the moment, this pass lives inside the (downstream) Midgard compiler.
> In the future, it will be necessary for the Bifrost compiler as well,
> should that use NIR. That said, Bifrost will share the same Gallium
> driver, so the pass could still live in the driver
> (src/gallium/drivers/panfrost).
>
> Should this pass be moved into common code (src/compiler/nir)? If so,
> what would the driver agnostic way of passing viewport parameters be?
> Both vc4 and Midgard currently use/will use special uniforms for this
> purpose. Similarly, is there a driver agnostic way of representing the
> transformed write? The Midgard pass emits `nir_instrinic_store_output`
> for the final value, but I'm not sure if this is generalisable.

I don't think I can really use a generic lowering on vc4 and v3d, FWIW.
I need to store both the scaled and unscaled vertex coordinates in my
coordinate shader.

What might be nice for me would be the ability to lower output stores to
a store intrinsic with a totally custom offset (unrelated to any
VARYING_SLOT), and which doesn't get reordered relative to other store
intrinsics.  Right now I have to emit my stores in order in an epilogue
and then try to coalesce an ALU op into the store, rather than having
something in NIR that would be easier to coalesce.


signature.asc
Description: PGP signature
___
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/mesa-dev


  1   2   >