Re: [Mesa-dev] [PATCH 8/8] anv: soft pin the remaining bos

Jason Ekstrand Tue, 08 May 2018 15:30:31 -0700

On Mon, May 7, 2018 at 5:30 PM, Scott D Phillips <scott.d.phill...@intel.com
> wrote:


> ---
>  src/intel/vulkan/anv_allocator.c   | 16 +++++++++++++++-
>  src/intel/vulkan/anv_batch_chain.c | 27 +++++++++------------------
>  src/intel/vulkan/anv_device.c      | 32 ++++++++++++++++++++++++++++----
>  src/intel/vulkan/anv_private.h     | 16 ++++++++++++++++
>  src/intel/vulkan/anv_queue.c       |  2 +-
>  src/intel/vulkan/genX_blorp_exec.c |  6 ++++++
>  src/intel/vulkan/genX_cmd_buffer.c | 26 +++++++++++++++++++++-----
>  src/intel/vulkan/genX_query.c      |  6 ++++++
>  8 files changed, 102 insertions(+), 29 deletions(-)
>
> diff --git a/src/intel/vulkan/anv_allocator.c b/src/intel/vulkan/anv_
> allocator.c
> index 7d368f09c9e..a1a306817e0 100644
> --- a/src/intel/vulkan/anv_allocator.c
> +++ b/src/intel/vulkan/anv_allocator.c
> @@ -985,6 +985,7 @@ anv_bo_pool_finish(struct anv_bo_pool *pool)
>           struct bo_pool_bo_link link_copy = VG_NOACCESS_READ(link);
>
>           anv_gem_munmap(link_copy.bo.map, link_copy.bo.size);
> +         anv_vma_free(pool->device, &link_copy.bo);
>           anv_gem_close(pool->device, link_copy.bo.gem_handle);
>           link = link_copy.next;
>        }
> @@ -1024,11 +1025,15 @@ anv_bo_pool_alloc(struct anv_bo_pool *pool, struct
> anv_bo *bo, uint32_t size)
>
>     new_bo.flags = pool->bo_flags;
>
> +   if (!anv_vma_alloc(pool->device, &new_bo))
> +      return vk_error(VK_ERROR_OUT_OF_DEVICE_MEMORY);
> +
>     assert(new_bo.size == pow2_size);
>
>     new_bo.map = anv_gem_mmap(pool->device, new_bo.gem_handle, 0,
> pow2_size, 0);
>     if (new_bo.map == MAP_FAILED) {
>        anv_gem_close(pool->device, new_bo.gem_handle);
> +      anv_vma_free(pool->device, &new_bo);
>        return vk_error(VK_ERROR_MEMORY_MAP_FAILED);
>     }
>
> @@ -1072,8 +1077,10 @@ anv_scratch_pool_finish(struct anv_device *device,
> struct anv_scratch_pool *pool
>     for (unsigned s = 0; s < MESA_SHADER_STAGES; s++) {
>        for (unsigned i = 0; i < 16; i++) {
>           struct anv_scratch_bo *bo = &pool->bos[i][s];
> -         if (bo->exists > 0)
> +         if (bo->exists > 0) {
> +            anv_vma_free(device, &bo->bo);
>              anv_gem_close(device, bo->bo.gem_handle);
> +         }
>        }
>     }
>  }
> @@ -1171,6 +1178,11 @@ anv_scratch_pool_alloc(struct anv_device *device,
> struct anv_scratch_pool *pool,
>     if (device->instance->physicalDevice.has_exec_async)
>        bo->bo.flags |= EXEC_OBJECT_ASYNC;
>
> +   if (device->instance->physicalDevice.use_softpin)
> +      bo->bo.flags |= EXEC_OBJECT_PINNED;
> +
> +   anv_vma_alloc(device, &bo->bo);
> +
>     /* Set the exists last because it may be read by other threads */
>     __sync_synchronize();
>     bo->exists = true;
> @@ -1390,6 +1402,8 @@ anv_bo_cache_release(struct anv_device *device,
>     if (bo->bo.map)
>        anv_gem_munmap(bo->bo.map, bo->bo.size);
>
> +   anv_vma_free(device, bo);
> +
>     anv_gem_close(device, bo->bo.gem_handle);
>
>     /* Don't unlock until we've actually closed the BO.  The whole point of
> diff --git a/src/intel/vulkan/anv_batch_chain.c
> b/src/intel/vulkan/anv_batch_chain.c
> index eaee9afbd29..9b0cc984599 100644
> --- a/src/intel/vulkan/anv_batch_chain.c
> +++ b/src/intel/vulkan/anv_batch_chain.c
> @@ -430,6 +430,7 @@ anv_batch_bo_list_clone(const struct list_head *list,
>                          struct list_head *new_list)
>  {
>     VkResult result = VK_SUCCESS;
> +   struct anv_device *device = cmd_buffer->device;
>
>     list_inithead(new_list);
>
> @@ -448,8 +449,14 @@ anv_batch_bo_list_clone(const struct list_head *list,
>            * as it will always be the last relocation in the list.
>            */
>           uint32_t last_idx = prev_bbo->relocs.num_relocs - 1;
> -         assert(prev_bbo->relocs.reloc_bos[last_idx] == &bbo->bo);
> -         prev_bbo->relocs.reloc_bos[last_idx] = &new_bbo->bo;
> +         if (last_idx == -1) {
> +            write_reloc(device, prev_bbo->bo.map + prev_bbo->length -
> +                        (device->info.gen >= 8 ? 8 : 4),
> new_bbo->bo.offset,
> +                        false);
> +         } else {
> +            assert(prev_bbo->relocs.reloc_bos[last_idx] == &bbo->bo);
> +            prev_bbo->relocs.reloc_bos[last_idx] = &new_bbo->bo;
> +         }
>

I'm not convinced this is correct.  It heavily depends on two things:

 1) We will either have a reloc to the next batch_bo or we will have no
relocs at all
 2) That we can find the address in the MI_BATCH_BUFFER_START by a fixed
calculation based on hardware generation

This code is fragile enough as is without adding yet more subtle
assumptions.

Maybe the thing to do here is to stash off some bit of information for
reliably finding the MI_BATCH_BUFFER_START such as a pointer to the command
or to it's jump address.

As for the "last_idx == -1" check itself.  Maybe that should just be a
"use_softpin" check.  Or, for that matter, you could just leave the
"chain_addr" field mentioned above NULL if softpin is not in use.


>        }
>
>        prev_bbo = new_bbo;
> @@ -1162,22 +1169,6 @@ anv_cmd_buffer_process_relocs(struct
> anv_cmd_buffer *cmd_buffer,
>        list->relocs[i].target_handle = list->reloc_bos[i]->index;
>  }
>
> -static void
> -write_reloc(const struct anv_device *device, void *p, uint64_t v, bool
> flush)
> -{
> -   unsigned reloc_size = 0;
> -   if (device->info.gen >= 8) {
> -      reloc_size = sizeof(uint64_t);
> -      *(uint64_t *)p = canonical_address(v);
> -   } else {
> -      reloc_size = sizeof(uint32_t);
> -      *(uint32_t *)p = v;
> -   }
> -
> -   if (flush && !device->info.has_llc)
> -      gen_flush_range(p, reloc_size);
> -}
> -
>  static void
>  adjust_relocations_from_state_pool(struct anv_state_pool *pool,
>                                     struct anv_reloc_list *relocs,
> diff --git a/src/intel/vulkan/anv_device.c b/src/intel/vulkan/anv_device.c
> index afb5b2a4f5d..5774086877d 100644
> --- a/src/intel/vulkan/anv_device.c
> +++ b/src/intel/vulkan/anv_device.c
> @@ -1326,6 +1326,11 @@ anv_device_init_trivial_batch(struct anv_device
> *device)
>     if (device->instance->physicalDevice.has_exec_async)
>        device->trivial_batch_bo.flags |= EXEC_OBJECT_ASYNC;
>
> +   if (device->instance->physicalDevice.use_softpin)
> +      device->trivial_batch_bo.flags |= EXEC_OBJECT_PINNED;
> +
> +   anv_vma_alloc(device, &device->trivial_batch_bo);
> +
>     void *map = anv_gem_mmap(device, device->trivial_batch_bo.gem_handle,
>                              0, 4096, 0);
>
> @@ -1607,7 +1612,8 @@ VkResult anv_CreateDevice(
>     uint64_t bo_flags =
>        (physical_device->supports_48bit_addresses ?
> EXEC_OBJECT_SUPPORTS_48B_ADDRESS : 0) |
>        (physical_device->has_exec_async ? EXEC_OBJECT_ASYNC : 0) |
> -      (physical_device->has_exec_capture ? EXEC_OBJECT_CAPTURE : 0);
> +      (physical_device->has_exec_capture ? EXEC_OBJECT_CAPTURE : 0) |
> +      (physical_device->use_softpin ? EXEC_OBJECT_PINNED : 0);
>
>     anv_bo_pool_init(&device->batch_bo_pool, device, bo_flags);
>
> @@ -1615,9 +1621,7 @@ VkResult anv_CreateDevice(
>     if (result != VK_SUCCESS)
>        goto fail_batch_bo_pool;
>
> -   if (physical_device->use_softpin)
> -      bo_flags |= EXEC_OBJECT_PINNED;
> -   else
> +   if (!physical_device->use_softpin)
>        bo_flags &= ~EXEC_OBJECT_SUPPORTS_48B_ADDRESS;
>
>     result = anv_state_pool_init(&device->dynamic_state_pool, device,
> @@ -1654,6 +1658,12 @@ VkResult anv_CreateDevice(
>     if (result != VK_SUCCESS)
>        goto fail_binding_table_pool;
>
> +   if (physical_device->use_softpin)
> +      device->workaround_bo.flags |= EXEC_OBJECT_PINNED;
> +
> +   if (!anv_vma_alloc(device, &device->workaround_bo))
> +      goto fail_workaround_bo;
> +
>     anv_device_init_trivial_batch(device);
>
>     if (device->info.gen >= 10)
> @@ -1752,8 +1762,10 @@ void anv_DestroyDevice(
>     anv_scratch_pool_finish(device, &device->scratch_pool);
>
>     anv_gem_munmap(device->workaround_bo.map, device->workaround_bo.size);
> +   anv_vma_free(device, &device->workaround_bo);
>     anv_gem_close(device, device->workaround_bo.gem_handle);
>
> +   anv_vma_free(device, &device->trivial_batch_bo);
>     anv_gem_close(device, device->trivial_batch_bo.gem_handle);
>     if (device->info.gen >= 10)
>        anv_gem_close(device, device->hiz_clear_bo.gem_handle);
> @@ -2111,6 +2123,18 @@ VkResult anv_AllocateMemory(
>     if (pdevice->memory.heaps[mem->type->heapIndex].supports_
> 48bit_addresses)
>        mem->bo->flags |= EXEC_OBJECT_SUPPORTS_48B_ADDRESS;
>
> +   if (pdevice->use_softpin) {
> +      mem->bo->flags |= EXEC_OBJECT_PINNED;
> +      if (mem->bo->offset == -1) {
> +         if (!anv_vma_alloc(device, mem->bo)) {
> +            result = vk_errorf(device->instance, NULL,
> +                               VK_ERROR_OUT_OF_DEVICE_MEMORY,
> +                               "failed to allocate virtual address for
> BO");
> +            goto fail;
> +         }
> +      }
> +   }
> +
>     const struct wsi_memory_allocate_info *wsi_info =
>        vk_find_struct_const(pAllocateInfo->pNext,
> WSI_MEMORY_ALLOCATE_INFO_MESA);
>     if (wsi_info && wsi_info->implicit_sync) {
> diff --git a/src/intel/vulkan/anv_private.h b/src/intel/vulkan/anv_
> private.h
> index 6d9c7d4dfb3..52890f5614c 100644
> --- a/src/intel/vulkan/anv_private.h
> +++ b/src/intel/vulkan/anv_private.h
> @@ -1169,6 +1169,22 @@ struct anv_address {
>     uint32_t offset;
>  };
>
> +static inline void
> +write_reloc(const struct anv_device *device, void *p, uint64_t v, bool
> flush)
> +{
> +   unsigned reloc_size = 0;
> +   if (device->info.gen >= 8) {
> +      reloc_size = sizeof(uint64_t);
> +      *(uint64_t *)p = canonical_address(v);
> +   } else {
> +      reloc_size = sizeof(uint32_t);
> +      *(uint32_t *)p = v;
> +   }
> +
> +   if (flush && !device->info.has_llc)
> +      gen_flush_range(p, reloc_size);
> +}
> +
>  static inline uint64_t
>  _anv_combine_address(struct anv_batch *batch, void *location,
>                       const struct anv_address address, uint32_t delta)
> diff --git a/src/intel/vulkan/anv_queue.c b/src/intel/vulkan/anv_queue.c
> index b9ca189fddc..15c6e31b54b 100644
> --- a/src/intel/vulkan/anv_queue.c
> +++ b/src/intel/vulkan/anv_queue.c
> @@ -81,7 +81,7 @@ anv_device_submit_simple_batch(struct anv_device
> *device,
>     exec2_objects[0].relocs_ptr = 0;
>     exec2_objects[0].alignment = 0;
>     exec2_objects[0].offset = bo.offset;
> -   exec2_objects[0].flags = 0;
> +   exec2_objects[0].flags = bo.flags;
>     exec2_objects[0].rsvd1 = 0;
>     exec2_objects[0].rsvd2 = 0;
>
> diff --git a/src/intel/vulkan/genX_blorp_exec.c
> b/src/intel/vulkan/genX_blorp_exec.c
> index 9023269d61b..ecca3928de5 100644
> --- a/src/intel/vulkan/genX_blorp_exec.c
> +++ b/src/intel/vulkan/genX_blorp_exec.c
> @@ -62,6 +62,12 @@ blorp_surface_reloc(struct blorp_batch *batch, uint32_t
> ss_offset,
>                           ss_offset, address.buffer, address.offset +
> delta);
>     if (result != VK_SUCCESS)
>        anv_batch_set_error(&cmd_buffer->batch, result);
> +
> +   void *dest = cmd_buffer->device->surface_state_pool.block_pool.map +
> +      ss_offset;
> +   uint64_t val = ((struct anv_bo*)address.buffer)->offset +
> address.offset +
> +      delta;
> +   write_reloc(cmd_buffer->device, dest, val, false);
>

Eventually, I think we can make BLORP just not do relocations at all.  I've
got some ideas on how to do that but this works for now.


>  }
>
>  #if GEN_GEN >= 7 && GEN_GEN < 10
> diff --git a/src/intel/vulkan/genX_cmd_buffer.c
> b/src/intel/vulkan/genX_cmd_buffer.c
> index 2882cf36506..d6cbd915d88 100644
> --- a/src/intel/vulkan/genX_cmd_buffer.c
> +++ b/src/intel/vulkan/genX_cmd_buffer.c
> @@ -167,15 +167,20 @@ genX(cmd_buffer_emit_state_base_address)(struct
> anv_cmd_buffer *cmd_buffer)
>  static void
>  add_surface_state_reloc(struct anv_cmd_buffer *cmd_buffer,
>                          struct anv_state state,
> -                        struct anv_bo *bo, uint32_t offset)
> +                        struct anv_bo *bo, uint32_t delta)
>  {
>     const struct isl_device *isl_dev = &cmd_buffer->device->isl_dev;
>
> +   uint32_t offset = state.offset + isl_dev->ss.addr_offset;
>     VkResult result =
>        anv_reloc_list_add(&cmd_buffer->surface_relocs,
> &cmd_buffer->pool->alloc,
> -                         state.offset + isl_dev->ss.addr_offset, bo,
> offset);
> +                         offset, bo, delta);
>
    if (result != VK_SUCCESS)
>        anv_batch_set_error(&cmd_buffer->batch, result);
> +
> +   void *dest = cmd_buffer->device->surface_state_pool.block_pool.map +
> +      offset;
> +   write_reloc(cmd_buffer->device, dest, bo->offset + delta, false);
>

This is sketchy.  I think it's safe but only because we're always writing
the value that's already there.  Still, it makes me nervous.  The reason
why blorp is safer above is because we know it always has a fresh surface
state while this is one we may have filled out some time ago.

What we really want to be doing is just using the correct address when we
call isl_surf_fill_state.


>  }
>
>  static void
> @@ -192,24 +197,35 @@ add_image_view_relocs(struct anv_cmd_buffer
> *cmd_buffer,
>                             image->planes[image_plane].bo, state.address);
>
>     if (state.aux_address) {
> +      uint32_t offset = state.state.offset + isl_dev->ss.aux_addr_offset;
>        VkResult result =
>           anv_reloc_list_add(&cmd_buffer->surface_relocs,
>                              &cmd_buffer->pool->alloc,
> -                            state.state.offset +
> isl_dev->ss.aux_addr_offset,
> +                            offset,
>                              image->planes[image_plane].bo,
> state.aux_address);
>        if (result != VK_SUCCESS)
>           anv_batch_set_error(&cmd_buffer->batch, result);
> +
> +      void *dest = cmd_buffer->device->surface_state_pool.block_pool.map
> +
> +         offset;
> +      uint64_t val = image->planes[image_plane].bo->offset +
> state.aux_address;
> +      write_reloc(cmd_buffer->device, dest, val, false);
>     }
>
>     if (state.clear_address) {
> +      uint32_t offset = state.state.offset +
> isl_dev->ss.clear_color_state_offset;
>        VkResult result =
>           anv_reloc_list_add(&cmd_buffer->surface_relocs,
>                              &cmd_buffer->pool->alloc,
> -                            state.state.offset +
> -                            isl_dev->ss.clear_color_state_offset,
> +                            offset,
>                              image->planes[image_plane].bo,
> state.clear_address);
>        if (result != VK_SUCCESS)
>           anv_batch_set_error(&cmd_buffer->batch, result);
> +
> +      void *dest = cmd_buffer->device->surface_state_pool.block_pool.map
> +
> +         offset;
> +      uint64_t val = image->planes[image_plane].bo->offset +
> state.clear_address;
> +      write_reloc(cmd_buffer->device, dest, val, false);
>     }
>  }
>
> diff --git a/src/intel/vulkan/genX_query.c b/src/intel/vulkan/genX_query.c
> index 4efcc57e475..30081b10881 100644
> --- a/src/intel/vulkan/genX_query.c
> +++ b/src/intel/vulkan/genX_query.c
> @@ -94,9 +94,14 @@ VkResult genX(CreateQueryPool)(
>     if (pdevice->supports_48bit_addresses)
>        pool->bo.flags |= EXEC_OBJECT_SUPPORTS_48B_ADDRESS;
>
> +   if (pdevice->use_softpin)
> +      pool->bo.flags |= EXEC_OBJECT_PINNED;
> +
>     if (pdevice->has_exec_async)
>        pool->bo.flags |= EXEC_OBJECT_ASYNC;
>
> +   anv_vma_alloc(device, &pool->bo);
> +
>     /* For query pools, we set the caching mode to I915_CACHING_CACHED.
> On LLC
>      * platforms, this does nothing.  On non-LLC platforms, this means
> snooping
>      * which comes at a slight cost.  However, the buffers aren't big,
> won't be
> @@ -129,6 +134,7 @@ void genX(DestroyQueryPool)(
>        return;
>
>     anv_gem_munmap(pool->bo.map, pool->bo.size);
> +   anv_vma_free(device, &pool->bo);
>     anv_gem_close(device, pool->bo.gem_handle);
>     vk_free2(&device->alloc, pAllocator, pool);
>  }
> --
> 2.14.3
>
>

_______________________________________________
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/mesa-dev

Re: [Mesa-dev] [PATCH 8/8] anv: soft pin the remaining bos

Reply via email to