Re: [Mesa-dev] [PATCH 8/8] anv: soft pin the remaining bos

Jordan Justen Tue, 08 May 2018 16:49:49 -0700

On 2018-05-07 17:30:50, Scott D Phillips wrote:
> ---
>  src/intel/vulkan/anv_allocator.c   | 16 +++++++++++++++-
>  src/intel/vulkan/anv_batch_chain.c | 27 +++++++++------------------
>  src/intel/vulkan/anv_device.c      | 32 ++++++++++++++++++++++++++++----
>  src/intel/vulkan/anv_private.h     | 16 ++++++++++++++++
>  src/intel/vulkan/anv_queue.c       |  2 +-
>  src/intel/vulkan/genX_blorp_exec.c |  6 ++++++
>  src/intel/vulkan/genX_cmd_buffer.c | 26 +++++++++++++++++++++-----
>  src/intel/vulkan/genX_query.c      |  6 ++++++
>  8 files changed, 102 insertions(+), 29 deletions(-)
> 
> diff --git a/src/intel/vulkan/anv_allocator.c 
> b/src/intel/vulkan/anv_allocator.c
> index 7d368f09c9e..a1a306817e0 100644
> --- a/src/intel/vulkan/anv_allocator.c
> +++ b/src/intel/vulkan/anv_allocator.c
> @@ -985,6 +985,7 @@ anv_bo_pool_finish(struct anv_bo_pool *pool)
>           struct bo_pool_bo_link link_copy = VG_NOACCESS_READ(link);
>  
>           anv_gem_munmap(link_copy.bo.map, link_copy.bo.size);
> +         anv_vma_free(pool->device, &link_copy.bo);
>           anv_gem_close(pool->device, link_copy.bo.gem_handle);
>           link = link_copy.next;
>        }
> @@ -1024,11 +1025,15 @@ anv_bo_pool_alloc(struct anv_bo_pool *pool, struct 
> anv_bo *bo, uint32_t size)
>  
>     new_bo.flags = pool->bo_flags;
>  
> +   if (!anv_vma_alloc(pool->device, &new_bo))
> +      return vk_error(VK_ERROR_OUT_OF_DEVICE_MEMORY);
> +
>     assert(new_bo.size == pow2_size);
>  
>     new_bo.map = anv_gem_mmap(pool->device, new_bo.gem_handle, 0, pow2_size, 
> 0);
>     if (new_bo.map == MAP_FAILED) {
>        anv_gem_close(pool->device, new_bo.gem_handle);
> +      anv_vma_free(pool->device, &new_bo);
>        return vk_error(VK_ERROR_MEMORY_MAP_FAILED);
>     }
>  
> @@ -1072,8 +1077,10 @@ anv_scratch_pool_finish(struct anv_device *device, 
> struct anv_scratch_pool *pool
>     for (unsigned s = 0; s < MESA_SHADER_STAGES; s++) {
>        for (unsigned i = 0; i < 16; i++) {
>           struct anv_scratch_bo *bo = &pool->bos[i][s];
> -         if (bo->exists > 0)
> +         if (bo->exists > 0) {
> +            anv_vma_free(device, &bo->bo);
>              anv_gem_close(device, bo->bo.gem_handle);
> +         }
>        }
>     }
>  }
> @@ -1171,6 +1178,11 @@ anv_scratch_pool_alloc(struct anv_device *device, 
> struct anv_scratch_pool *pool,
>     if (device->instance->physicalDevice.has_exec_async)
>        bo->bo.flags |= EXEC_OBJECT_ASYNC;
>  
> +   if (device->instance->physicalDevice.use_softpin)
> +      bo->bo.flags |= EXEC_OBJECT_PINNED;
> +
> +   anv_vma_alloc(device, &bo->bo);
> +
>     /* Set the exists last because it may be read by other threads */
>     __sync_synchronize();
>     bo->exists = true;
> @@ -1390,6 +1402,8 @@ anv_bo_cache_release(struct anv_device *device,
>     if (bo->bo.map)
>        anv_gem_munmap(bo->bo.map, bo->bo.size);
>  
> +   anv_vma_free(device, bo);
> +
>     anv_gem_close(device, bo->bo.gem_handle);
>  
>     /* Don't unlock until we've actually closed the BO.  The whole point of
> diff --git a/src/intel/vulkan/anv_batch_chain.c 
> b/src/intel/vulkan/anv_batch_chain.c
> index eaee9afbd29..9b0cc984599 100644
> --- a/src/intel/vulkan/anv_batch_chain.c
> +++ b/src/intel/vulkan/anv_batch_chain.c
> @@ -430,6 +430,7 @@ anv_batch_bo_list_clone(const struct list_head *list,
>                          struct list_head *new_list)
>  {
>     VkResult result = VK_SUCCESS;
> +   struct anv_device *device = cmd_buffer->device;
>  
>     list_inithead(new_list);
>  
> @@ -448,8 +449,14 @@ anv_batch_bo_list_clone(const struct list_head *list,
>            * as it will always be the last relocation in the list.
>            */
>           uint32_t last_idx = prev_bbo->relocs.num_relocs - 1;
> -         assert(prev_bbo->relocs.reloc_bos[last_idx] == &bbo->bo);
> -         prev_bbo->relocs.reloc_bos[last_idx] = &new_bbo->bo;
> +         if (last_idx == -1) {
> +            write_reloc(device, prev_bbo->bo.map + prev_bbo->length -
> +                        (device->info.gen >= 8 ? 8 : 4), new_bbo->bo.offset,
> +                        false);
> +         } else {
> +            assert(prev_bbo->relocs.reloc_bos[last_idx] == &bbo->bo);
> +            prev_bbo->relocs.reloc_bos[last_idx] = &new_bbo->bo;
> +         }


Would it be feasible to split this patch? It could be nice to have a
commit message explaining it.

>        }
>  
>        prev_bbo = new_bbo;
> @@ -1162,22 +1169,6 @@ anv_cmd_buffer_process_relocs(struct anv_cmd_buffer 
> *cmd_buffer,
>        list->relocs[i].target_handle = list->reloc_bos[i]->index;
>  }
>  
> -static void
> -write_reloc(const struct anv_device *device, void *p, uint64_t v, bool flush)
> -{
> -   unsigned reloc_size = 0;
> -   if (device->info.gen >= 8) {
> -      reloc_size = sizeof(uint64_t);
> -      *(uint64_t *)p = canonical_address(v);
> -   } else {
> -      reloc_size = sizeof(uint32_t);
> -      *(uint32_t *)p = v;
> -   }
> -
> -   if (flush && !device->info.has_llc)
> -      gen_flush_range(p, reloc_size);
> -}
> -
>  static void
>  adjust_relocations_from_state_pool(struct anv_state_pool *pool,
>                                     struct anv_reloc_list *relocs,
> diff --git a/src/intel/vulkan/anv_device.c b/src/intel/vulkan/anv_device.c
> index afb5b2a4f5d..5774086877d 100644
> --- a/src/intel/vulkan/anv_device.c
> +++ b/src/intel/vulkan/anv_device.c
> @@ -1326,6 +1326,11 @@ anv_device_init_trivial_batch(struct anv_device 
> *device)
>     if (device->instance->physicalDevice.has_exec_async)
>        device->trivial_batch_bo.flags |= EXEC_OBJECT_ASYNC;
>  
> +   if (device->instance->physicalDevice.use_softpin)
> +      device->trivial_batch_bo.flags |= EXEC_OBJECT_PINNED;
> +
> +   anv_vma_alloc(device, &device->trivial_batch_bo);
> +
>     void *map = anv_gem_mmap(device, device->trivial_batch_bo.gem_handle,
>                              0, 4096, 0);
>  
> @@ -1607,7 +1612,8 @@ VkResult anv_CreateDevice(
>     uint64_t bo_flags =
>        (physical_device->supports_48bit_addresses ? 
> EXEC_OBJECT_SUPPORTS_48B_ADDRESS : 0) |
>        (physical_device->has_exec_async ? EXEC_OBJECT_ASYNC : 0) |
> -      (physical_device->has_exec_capture ? EXEC_OBJECT_CAPTURE : 0);
> +      (physical_device->has_exec_capture ? EXEC_OBJECT_CAPTURE : 0) |
> +      (physical_device->use_softpin ? EXEC_OBJECT_PINNED : 0);
>  
>     anv_bo_pool_init(&device->batch_bo_pool, device, bo_flags);
>  
> @@ -1615,9 +1621,7 @@ VkResult anv_CreateDevice(
>     if (result != VK_SUCCESS)
>        goto fail_batch_bo_pool;
>  
> -   if (physical_device->use_softpin)
> -      bo_flags |= EXEC_OBJECT_PINNED;
> -   else
> +   if (!physical_device->use_softpin)
>        bo_flags &= ~EXEC_OBJECT_SUPPORTS_48B_ADDRESS;
>  
>     result = anv_state_pool_init(&device->dynamic_state_pool, device,
> @@ -1654,6 +1658,12 @@ VkResult anv_CreateDevice(
>     if (result != VK_SUCCESS)
>        goto fail_binding_table_pool;
>  
> +   if (physical_device->use_softpin)
> +      device->workaround_bo.flags |= EXEC_OBJECT_PINNED;
> +
> +   if (!anv_vma_alloc(device, &device->workaround_bo))
> +      goto fail_workaround_bo;
> +
>     anv_device_init_trivial_batch(device);
>  
>     if (device->info.gen >= 10)
> @@ -1752,8 +1762,10 @@ void anv_DestroyDevice(
>     anv_scratch_pool_finish(device, &device->scratch_pool);
>  
>     anv_gem_munmap(device->workaround_bo.map, device->workaround_bo.size);
> +   anv_vma_free(device, &device->workaround_bo);
>     anv_gem_close(device, device->workaround_bo.gem_handle);
>  
> +   anv_vma_free(device, &device->trivial_batch_bo);
>     anv_gem_close(device, device->trivial_batch_bo.gem_handle);
>     if (device->info.gen >= 10)
>        anv_gem_close(device, device->hiz_clear_bo.gem_handle);
> @@ -2111,6 +2123,18 @@ VkResult anv_AllocateMemory(
>     if (pdevice->memory.heaps[mem->type->heapIndex].supports_48bit_addresses)
>        mem->bo->flags |= EXEC_OBJECT_SUPPORTS_48B_ADDRESS;
>  
> +   if (pdevice->use_softpin) {
> +      mem->bo->flags |= EXEC_OBJECT_PINNED;
> +      if (mem->bo->offset == -1) {
> +         if (!anv_vma_alloc(device, mem->bo)) {
> +            result = vk_errorf(device->instance, NULL,
> +                               VK_ERROR_OUT_OF_DEVICE_MEMORY,
> +                               "failed to allocate virtual address for BO");
> +            goto fail;
> +         }
> +      }
> +   }
> +
>     const struct wsi_memory_allocate_info *wsi_info =
>        vk_find_struct_const(pAllocateInfo->pNext, 
> WSI_MEMORY_ALLOCATE_INFO_MESA);
>     if (wsi_info && wsi_info->implicit_sync) {
> diff --git a/src/intel/vulkan/anv_private.h b/src/intel/vulkan/anv_private.h
> index 6d9c7d4dfb3..52890f5614c 100644
> --- a/src/intel/vulkan/anv_private.h
> +++ b/src/intel/vulkan/anv_private.h
> @@ -1169,6 +1169,22 @@ struct anv_address {
>     uint32_t offset;
>  };
>  
> +static inline void
> +write_reloc(const struct anv_device *device, void *p, uint64_t v, bool flush)
> +{
> +   unsigned reloc_size = 0;
> +   if (device->info.gen >= 8) {
> +      reloc_size = sizeof(uint64_t);
> +      *(uint64_t *)p = canonical_address(v);
> +   } else {
> +      reloc_size = sizeof(uint32_t);
> +      *(uint32_t *)p = v;
> +   }
> +
> +   if (flush && !device->info.has_llc)
> +      gen_flush_range(p, reloc_size);
> +}

The various points where write_reloc is now being called... is this to
cause the address to get written whereas before it would be handled by
the relocations?

Would it be feasible to split this patch out?

Also, what about renaming this write_reloc function? I'm thinking
write_gpu_addr might better cover both the softpin and relocatable
scenarios.

Another approach to the series might be to have a 'flip the switch'
patch at the end, but "anv: Add vma_heap allocators in anv_device"
allowed use_softpin to be set to true a few patches back.

You feel confident that the series will work from a bisect standpoint
with use_softpin being flipped on in the middle?

-Jordan

> +
>  static inline uint64_t
>  _anv_combine_address(struct anv_batch *batch, void *location,
>                       const struct anv_address address, uint32_t delta)
> diff --git a/src/intel/vulkan/anv_queue.c b/src/intel/vulkan/anv_queue.c
> index b9ca189fddc..15c6e31b54b 100644
> --- a/src/intel/vulkan/anv_queue.c
> +++ b/src/intel/vulkan/anv_queue.c
> @@ -81,7 +81,7 @@ anv_device_submit_simple_batch(struct anv_device *device,
>     exec2_objects[0].relocs_ptr = 0;
>     exec2_objects[0].alignment = 0;
>     exec2_objects[0].offset = bo.offset;
> -   exec2_objects[0].flags = 0;
> +   exec2_objects[0].flags = bo.flags;
>     exec2_objects[0].rsvd1 = 0;
>     exec2_objects[0].rsvd2 = 0;
>  
> diff --git a/src/intel/vulkan/genX_blorp_exec.c 
> b/src/intel/vulkan/genX_blorp_exec.c
> index 9023269d61b..ecca3928de5 100644
> --- a/src/intel/vulkan/genX_blorp_exec.c
> +++ b/src/intel/vulkan/genX_blorp_exec.c
> @@ -62,6 +62,12 @@ blorp_surface_reloc(struct blorp_batch *batch, uint32_t 
> ss_offset,
>                           ss_offset, address.buffer, address.offset + delta);
>     if (result != VK_SUCCESS)
>        anv_batch_set_error(&cmd_buffer->batch, result);
> +
> +   void *dest = cmd_buffer->device->surface_state_pool.block_pool.map +
> +      ss_offset;
> +   uint64_t val = ((struct anv_bo*)address.buffer)->offset + address.offset +
> +      delta;
> +   write_reloc(cmd_buffer->device, dest, val, false);
>  }
>  
>  #if GEN_GEN >= 7 && GEN_GEN < 10
> diff --git a/src/intel/vulkan/genX_cmd_buffer.c 
> b/src/intel/vulkan/genX_cmd_buffer.c
> index 2882cf36506..d6cbd915d88 100644
> --- a/src/intel/vulkan/genX_cmd_buffer.c
> +++ b/src/intel/vulkan/genX_cmd_buffer.c
> @@ -167,15 +167,20 @@ genX(cmd_buffer_emit_state_base_address)(struct 
> anv_cmd_buffer *cmd_buffer)
>  static void
>  add_surface_state_reloc(struct anv_cmd_buffer *cmd_buffer,
>                          struct anv_state state,
> -                        struct anv_bo *bo, uint32_t offset)
> +                        struct anv_bo *bo, uint32_t delta)
>  {
>     const struct isl_device *isl_dev = &cmd_buffer->device->isl_dev;
>  
> +   uint32_t offset = state.offset + isl_dev->ss.addr_offset;
>     VkResult result =
>        anv_reloc_list_add(&cmd_buffer->surface_relocs, 
> &cmd_buffer->pool->alloc,
> -                         state.offset + isl_dev->ss.addr_offset, bo, offset);
> +                         offset, bo, delta);
>     if (result != VK_SUCCESS)
>        anv_batch_set_error(&cmd_buffer->batch, result);
> +
> +   void *dest = cmd_buffer->device->surface_state_pool.block_pool.map +
> +      offset;
> +   write_reloc(cmd_buffer->device, dest, bo->offset + delta, false);
>  }
>  
>  static void
> @@ -192,24 +197,35 @@ add_image_view_relocs(struct anv_cmd_buffer *cmd_buffer,
>                             image->planes[image_plane].bo, state.address);
>  
>     if (state.aux_address) {
> +      uint32_t offset = state.state.offset + isl_dev->ss.aux_addr_offset;
>        VkResult result =
>           anv_reloc_list_add(&cmd_buffer->surface_relocs,
>                              &cmd_buffer->pool->alloc,
> -                            state.state.offset + isl_dev->ss.aux_addr_offset,
> +                            offset,
>                              image->planes[image_plane].bo, 
> state.aux_address);
>        if (result != VK_SUCCESS)
>           anv_batch_set_error(&cmd_buffer->batch, result);
> +
> +      void *dest = cmd_buffer->device->surface_state_pool.block_pool.map +
> +         offset;
> +      uint64_t val = image->planes[image_plane].bo->offset + 
> state.aux_address;
> +      write_reloc(cmd_buffer->device, dest, val, false);
>     }
>  
>     if (state.clear_address) {
> +      uint32_t offset = state.state.offset + 
> isl_dev->ss.clear_color_state_offset;
>        VkResult result =
>           anv_reloc_list_add(&cmd_buffer->surface_relocs,
>                              &cmd_buffer->pool->alloc,
> -                            state.state.offset +
> -                            isl_dev->ss.clear_color_state_offset,
> +                            offset,
>                              image->planes[image_plane].bo, 
> state.clear_address);
>        if (result != VK_SUCCESS)
>           anv_batch_set_error(&cmd_buffer->batch, result);
> +
> +      void *dest = cmd_buffer->device->surface_state_pool.block_pool.map +
> +         offset;
> +      uint64_t val = image->planes[image_plane].bo->offset + 
> state.clear_address;
> +      write_reloc(cmd_buffer->device, dest, val, false);
>     }
>  }
>  
> diff --git a/src/intel/vulkan/genX_query.c b/src/intel/vulkan/genX_query.c
> index 4efcc57e475..30081b10881 100644
> --- a/src/intel/vulkan/genX_query.c
> +++ b/src/intel/vulkan/genX_query.c
> @@ -94,9 +94,14 @@ VkResult genX(CreateQueryPool)(
>     if (pdevice->supports_48bit_addresses)
>        pool->bo.flags |= EXEC_OBJECT_SUPPORTS_48B_ADDRESS;
>  
> +   if (pdevice->use_softpin)
> +      pool->bo.flags |= EXEC_OBJECT_PINNED;
> +
>     if (pdevice->has_exec_async)
>        pool->bo.flags |= EXEC_OBJECT_ASYNC;
>  
> +   anv_vma_alloc(device, &pool->bo);
> +
>     /* For query pools, we set the caching mode to I915_CACHING_CACHED.  On 
> LLC
>      * platforms, this does nothing.  On non-LLC platforms, this means 
> snooping
>      * which comes at a slight cost.  However, the buffers aren't big, won't 
> be
> @@ -129,6 +134,7 @@ void genX(DestroyQueryPool)(
>        return;
>  
>     anv_gem_munmap(pool->bo.map, pool->bo.size);
> +   anv_vma_free(device, &pool->bo);
>     anv_gem_close(device, pool->bo.gem_handle);
>     vk_free2(&device->alloc, pAllocator, pool);
>  }
> -- 
> 2.14.3
> 
_______________________________________________
mesa-dev mailing list
mesa-dev@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/mesa-dev

Re: [Mesa-dev] [PATCH 8/8] anv: soft pin the remaining bos

Reply via email to