On Mon, Sep 17, 2018 at 07:42:38PM +0800, Christian König wrote:
> Am 17.09.2018 um 11:10 schrieb Huang Rui:
> > On Fri, Sep 14, 2018 at 03:42:57PM +0200, Christian König wrote:
> >> Don't grab the reservation lock any more and simplify the handling quite
> >> a bit.
> >>
> >> Signed-off-by: Christian König <christian.koe...@amd.com>
> >> ---
> >>   drivers/gpu/drm/amd/amdgpu/amdgpu_device.c | 109 
> >> ++++++++---------------------
> >>   drivers/gpu/drm/amd/amdgpu/amdgpu_object.c |  46 ++++--------
> >>   drivers/gpu/drm/amd/amdgpu/amdgpu_object.h |   8 +--
> >>   3 files changed, 43 insertions(+), 120 deletions(-)
> >>
> >> diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c 
> >> b/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c
> >> index 899342c6dfad..1cbc372964f8 100644
> >> --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c
> >> +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c
> >> @@ -2954,54 +2954,6 @@ static int amdgpu_device_ip_post_soft_reset(struct 
> >> amdgpu_device *adev)
> >>    return 0;
> >>   }
> >>   
> >> -/**
> >> - * amdgpu_device_recover_vram_from_shadow - restore shadowed VRAM buffers
> >> - *
> >> - * @adev: amdgpu_device pointer
> >> - * @ring: amdgpu_ring for the engine handling the buffer operations
> >> - * @bo: amdgpu_bo buffer whose shadow is being restored
> >> - * @fence: dma_fence associated with the operation
> >> - *
> >> - * Restores the VRAM buffer contents from the shadow in GTT.  Used to
> >> - * restore things like GPUVM page tables after a GPU reset where
> >> - * the contents of VRAM might be lost.
> >> - * Returns 0 on success, negative error code on failure.
> >> - */
> >> -static int amdgpu_device_recover_vram_from_shadow(struct amdgpu_device 
> >> *adev,
> >> -                                            struct amdgpu_ring *ring,
> >> -                                            struct amdgpu_bo *bo,
> >> -                                            struct dma_fence **fence)
> >> -{
> >> -  uint32_t domain;
> >> -  int r;
> >> -
> >> -  if (!bo->shadow)
> >> -          return 0;
> >> -
> >> -  r = amdgpu_bo_reserve(bo, true);
> >> -  if (r)
> >> -          return r;
> >> -  domain = amdgpu_mem_type_to_domain(bo->tbo.mem.mem_type);
> >> -  /* if bo has been evicted, then no need to recover */
> >> -  if (domain == AMDGPU_GEM_DOMAIN_VRAM) {
> >> -          r = amdgpu_bo_validate(bo->shadow);
> >> -          if (r) {
> >> -                  DRM_ERROR("bo validate failed!\n");
> >> -                  goto err;
> >> -          }
> >> -
> >> -          r = amdgpu_bo_restore_from_shadow(adev, ring, bo,
> >> -                                           NULL, fence, true);
> >> -          if (r) {
> >> -                  DRM_ERROR("recover page table failed!\n");
> >> -                  goto err;
> >> -          }
> >> -  }
> >> -err:
> >> -  amdgpu_bo_unreserve(bo);
> >> -  return r;
> >> -}
> >> -
> >>   /**
> >>    * amdgpu_device_recover_vram - Recover some VRAM contents
> >>    *
> >> @@ -3010,16 +2962,15 @@ static int 
> >> amdgpu_device_recover_vram_from_shadow(struct amdgpu_device *adev,
> >>    * Restores the contents of VRAM buffers from the shadows in GTT.  Used 
> >> to
> >>    * restore things like GPUVM page tables after a GPU reset where
> >>    * the contents of VRAM might be lost.
> >> - * Returns 0 on success, 1 on failure.
> >> + *
> >> + * Returns:
> >> + * 0 on success, negative error code on failure.
> >>    */
> >>   static int amdgpu_device_recover_vram(struct amdgpu_device *adev)
> >>   {
> >> -  struct amdgpu_ring *ring = adev->mman.buffer_funcs_ring;
> >> -  struct amdgpu_bo *bo, *tmp;
> >>    struct dma_fence *fence = NULL, *next = NULL;
> >> -  long r = 1;
> >> -  int i = 0;
> >> -  long tmo;
> >> +  struct amdgpu_bo *shadow;
> >> +  long r = 1, tmo;
> >>   
> >>    if (amdgpu_sriov_runtime(adev))
> >>            tmo = msecs_to_jiffies(8000);
> >> @@ -3028,44 +2979,40 @@ static int amdgpu_device_recover_vram(struct 
> >> amdgpu_device *adev)
> >>   
> >>    DRM_INFO("recover vram bo from shadow start\n");
> >>    mutex_lock(&adev->shadow_list_lock);
> >> -  list_for_each_entry_safe(bo, tmp, &adev->shadow_list, shadow_list) {
> >> -          next = NULL;
> >> -          amdgpu_device_recover_vram_from_shadow(adev, ring, bo, &next);
> >> +  list_for_each_entry(shadow, &adev->shadow_list, shadow_list) {
> >> +
> >> +          /* No need to recover an evicted BO */
> >> +          if (shadow->tbo.mem.mem_type != TTM_PL_TT ||
> >> +              shadow->parent->tbo.mem.mem_type != TTM_PL_VRAM)
> >> +                  continue;
> >> +
> >> +          r = amdgpu_bo_restore_shadow(shadow, &next);
> >> +          if (r)
> >> +                  break;
> >> +
> >>            if (fence) {
> >>                    r = dma_fence_wait_timeout(fence, false, tmo);
> >> -                  if (r == 0)
> >> -                          pr_err("wait fence %p[%d] timeout\n", fence, i);
> >> -                  else if (r < 0)
> >> -                          pr_err("wait fence %p[%d] interrupted\n", 
> >> fence, i);
> >> -                  if (r < 1) {
> >> -                          dma_fence_put(fence);
> >> -                          fence = next;
> >> +                  dma_fence_put(fence);
> >> +                  fence = next;
> >> +                  if (r <= 0)
> >>                            break;
> >> -                  }
> >> -                  i++;
> >> +          } else {
> >> +                  fence = next;
> >>            }
> >> -
> >> -          dma_fence_put(fence);
> >> -          fence = next;
> >>    }
> >>    mutex_unlock(&adev->shadow_list_lock);
> >>   
> >> -  if (fence) {
> >> -          r = dma_fence_wait_timeout(fence, false, tmo);
> >> -          if (r == 0)
> >> -                  pr_err("wait fence %p[%d] timeout\n", fence, i);
> >> -          else if (r < 0)
> >> -                  pr_err("wait fence %p[%d] interrupted\n", fence, i);
> >> -
> >> -  }
> >> +  if (fence)
> >> +          tmo = dma_fence_wait_timeout(fence, false, tmo);
> >>    dma_fence_put(fence);
> >>   
> >> -  if (r > 0)
> >> -          DRM_INFO("recover vram bo from shadow done\n");
> >> -  else
> >> +  if (r <= 0 || tmo <= 0) {
> >>            DRM_ERROR("recover vram bo from shadow failed\n");
> >> +          return -EIO;
> >> +  }
> >>   
> >> -  return (r > 0) ? 0 : 1;
> >> +  DRM_INFO("recover vram bo from shadow done\n");
> >> +  return 0;
> >>   }
> >>   
> >>   /**
> >> diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_object.c 
> >> b/drivers/gpu/drm/amd/amdgpu/amdgpu_object.c
> >> index 650c45c896f0..ca8a0b7a5ac3 100644
> >> --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_object.c
> >> +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_object.c
> >> @@ -547,7 +547,7 @@ static int amdgpu_bo_create_shadow(struct 
> >> amdgpu_device *adev,
> >>    if (!r) {
> >>            bo->shadow->parent = amdgpu_bo_ref(bo);
> >>            mutex_lock(&adev->shadow_list_lock);
> >> -          list_add_tail(&bo->shadow_list, &adev->shadow_list);
> >> +          list_add_tail(&bo->shadow->shadow_list, &adev->shadow_list);
> >>            mutex_unlock(&adev->shadow_list_lock);
> >>    }
> >>   
> >> @@ -679,13 +679,10 @@ int amdgpu_bo_validate(struct amdgpu_bo *bo)
> >>   }
> >>   
> >>   /**
> >> - * amdgpu_bo_restore_from_shadow - restore an &amdgpu_bo buffer object
> >> - * @adev: amdgpu device object
> >> - * @ring: amdgpu_ring for the engine handling the buffer operations
> >> - * @bo: &amdgpu_bo buffer to be restored
> >> - * @resv: reservation object with embedded fence
> >> + * amdgpu_bo_restore_shadow - restore an &amdgpu_bo shadow
> >> + *
> >> + * @shadow: &amdgpu_bo shadow to be restored
> >>    * @fence: dma_fence associated with the operation
> >> - * @direct: whether to submit the job directly
> >>    *
> >>    * Copies a buffer object's shadow content back to the object.
> >>    * This is used for recovering a buffer from its shadow in case of a gpu
> >> @@ -694,36 +691,19 @@ int amdgpu_bo_validate(struct amdgpu_bo *bo)
> >>    * Returns:
> >>    * 0 for success or a negative error code on failure.
> >>    */
> >> -int amdgpu_bo_restore_from_shadow(struct amdgpu_device *adev,
> >> -                            struct amdgpu_ring *ring,
> >> -                            struct amdgpu_bo *bo,
> >> -                            struct reservation_object *resv,
> >> -                            struct dma_fence **fence,
> >> -                            bool direct)
> >> +int amdgpu_bo_restore_shadow(struct amdgpu_bo *shadow, struct dma_fence 
> >> **fence)
> >>   
> >>   {
> >> -  struct amdgpu_bo *shadow = bo->shadow;
> >> -  uint64_t bo_addr, shadow_addr;
> >> -  int r;
> >> -
> >> -  if (!shadow)
> >> -          return -EINVAL;
> >> -
> >> -  bo_addr = amdgpu_bo_gpu_offset(bo);
> >> -  shadow_addr = amdgpu_bo_gpu_offset(bo->shadow);
> >> -
> >> -  r = reservation_object_reserve_shared(bo->tbo.resv);
> >> -  if (r)
> >> -          goto err;
> >> +  struct amdgpu_device *adev = amdgpu_ttm_adev(shadow->tbo.bdev);
> >> +  struct amdgpu_ring *ring = adev->mman.buffer_funcs_ring;
> >> +  uint64_t shadow_addr, parent_addr;
> >>   
> > May I know why won't need the reservation_object_reserve_shared() here? Is
> > it because we only copy buffer from shadow parent bo instead of orignal bo?
> 
> The scheduler and delayed free thread are disabled and we wait for the 
> copy to finish before allowing any eviction to proceed.
> 
> Adding the BO as shared fence to the BO could actually be harmful 
> because TTM might already want to destroy it.
> 

Thanks, patch is Reviewed-by: Huang Rui <ray.hu...@amd.com>

> Regards,
> Christian.
> 
> >
> > Thanks,
> > Ray
> >
> >> -  r = amdgpu_copy_buffer(ring, shadow_addr, bo_addr,
> >> -                         amdgpu_bo_size(bo), resv, fence,
> >> -                         direct, false);
> >> -  if (!r)
> >> -          amdgpu_bo_fence(bo, *fence, true);
> >> +  shadow_addr = amdgpu_bo_gpu_offset(shadow);
> >> +  parent_addr = amdgpu_bo_gpu_offset(shadow->parent);
> >>   
> >> -err:
> >> -  return r;
> >> +  return amdgpu_copy_buffer(ring, shadow_addr, parent_addr,
> >> +                            amdgpu_bo_size(shadow), NULL, fence,
> >> +                            true, false);
> >>   }
> >>   
> >>   /**
> >> diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_object.h 
> >> b/drivers/gpu/drm/amd/amdgpu/amdgpu_object.h
> >> index 64337ff2ad63..7d3312d0da11 100644
> >> --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_object.h
> >> +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_object.h
> >> @@ -273,12 +273,8 @@ int amdgpu_bo_backup_to_shadow(struct amdgpu_device 
> >> *adev,
> >>                           struct reservation_object *resv,
> >>                           struct dma_fence **fence, bool direct);
> >>   int amdgpu_bo_validate(struct amdgpu_bo *bo);
> >> -int amdgpu_bo_restore_from_shadow(struct amdgpu_device *adev,
> >> -                            struct amdgpu_ring *ring,
> >> -                            struct amdgpu_bo *bo,
> >> -                            struct reservation_object *resv,
> >> -                            struct dma_fence **fence,
> >> -                            bool direct);
> >> +int amdgpu_bo_restore_shadow(struct amdgpu_bo *shadow,
> >> +                       struct dma_fence **fence);
> >>   uint32_t amdgpu_bo_get_preferred_pin_domain(struct amdgpu_device *adev,
> >>                                        uint32_t domain);
> >>   
> >> -- 
> >> 2.14.1
> >>
> >> _______________________________________________
> >> amd-gfx mailing list
> >> amd-gfx@lists.freedesktop.org
> >> https://lists.freedesktop.org/mailman/listinfo/amd-gfx
> 
_______________________________________________
amd-gfx mailing list
amd-gfx@lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/amd-gfx

Reply via email to