On 12/10/25 10:20, Jesse.Zhang wrote:
> This patch introduces a new function `amdgpu_sync_resv_usage` that allows
> syncing fences of a specific usage from a reservation object, replacing
> the hardcoded DMA_RESV_USAGE_READ in the original `amdgpu_sync_resv`.
> 
> The original `amdgpu_sync_resv` is modified to call the new function with
> DMA_RESV_USAGE_READ to maintain backward compatibility.
> 
> In `amdgpu_vm_bo_update`, we update the sync call to use
> `amdgpu_sync_resv_usage` with DMA_RESV_USAGE_BOOKKEEP.
> This ensures we properly sync with bookkeeping fences (e.g., related to
> memory eviction, migration) when updating VM mappings.

Clear NAK. VM updates should *never* sync to eviction fences.

This is clearly just a broken workaround and not fixing the underlying bug.

Regards,
Christian.

> 
> Suggested-by: Prike Liang <[email protected]>
> Signed-off-by: Jesse Zhang <[email protected]>
> ---
>  drivers/gpu/drm/amd/amdgpu/amdgpu_sync.c | 49 ++++++++++++++++--------
>  drivers/gpu/drm/amd/amdgpu/amdgpu_sync.h |  3 ++
>  drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c   | 10 ++++-
>  3 files changed, 44 insertions(+), 18 deletions(-)
> 
> diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_sync.c 
> b/drivers/gpu/drm/amd/amdgpu/amdgpu_sync.c
> index d6ae9974c952..9665eed65b5d 100644
> --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_sync.c
> +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_sync.c
> @@ -229,31 +229,30 @@ static bool amdgpu_sync_test_fence(struct amdgpu_device 
> *adev,
>  }
>  
>  /**
> - * amdgpu_sync_resv - sync to a reservation object
> - *
> - * @adev: amdgpu device
> - * @sync: sync object to add fences from reservation object to
> - * @resv: reservation object with embedded fence
> - * @mode: how owner affects which fences we sync to
> - * @owner: owner of the planned job submission
> - *
> - * Sync to the fence
> + * amdgpu_sync_resv_usage - Sync fences of a specific usage from a 
> reservation object
> + * @adev: AMDGPU device
> + * @sync: Sync object
> + * @resv: Reservation object
> + * @mode: Sync mode (affects which fences are selected)
> + * @owner: Owner identifier
> + * @usage: Target fence usage (e.g., DMA_RESV_USAGE_BOOKKEEP)
> + *
> + * Collects fences of the specified usage from the reservation and adds them 
> to the sync object
>   */
> -int amdgpu_sync_resv(struct amdgpu_device *adev, struct amdgpu_sync *sync,
> -                  struct dma_resv *resv, enum amdgpu_sync_mode mode,
> -                  void *owner)
> +int amdgpu_sync_resv_usage(struct amdgpu_device *adev, struct amdgpu_sync 
> *sync,
> +                        struct dma_resv *resv, enum amdgpu_sync_mode mode,
> +                        void *owner, uint64_t usage)
>  {
>       struct dma_resv_iter cursor;
>       struct dma_fence *f;
>       int r;
>  
> -     if (resv == NULL)
> +     if (!resv)
>               return -EINVAL;
> -     /* Implicitly sync only to KERNEL, WRITE and READ */
> -     dma_resv_for_each_fence(&cursor, resv, DMA_RESV_USAGE_READ, f) {
> +
> +     dma_resv_for_each_fence(&cursor, resv, usage, f) {
>               dma_fence_chain_for_each(f, f) {
>                       struct dma_fence *tmp = dma_fence_chain_contained(f);
> -
>                       if (amdgpu_sync_test_fence(adev, mode, owner, tmp)) {
>                               r = amdgpu_sync_fence(sync, f, GFP_KERNEL);
>                               dma_fence_put(f);
> @@ -266,6 +265,24 @@ int amdgpu_sync_resv(struct amdgpu_device *adev, struct 
> amdgpu_sync *sync,
>       return 0;
>  }
>  
> +/**
> + * amdgpu_sync_resv - sync to a reservation object
> + *
> + * @adev: amdgpu device
> + * @sync: sync object to add fences from reservation object to
> + * @resv: reservation object with embedded fence
> + * @mode: how owner affects which fences we sync to
> + * @owner: owner of the planned job submission
> + *
> + * Sync to the fence
> + */
> +int amdgpu_sync_resv(struct amdgpu_device *adev, struct amdgpu_sync *sync,
> +                  struct dma_resv *resv, enum amdgpu_sync_mode mode,
> +                  void *owner)
> +{
> +      return amdgpu_sync_resv_usage(adev, sync, resv, mode, owner, 
> DMA_RESV_USAGE_READ);
> +}
> +
>  /**
>   * amdgpu_sync_kfd - sync to KFD fences
>   *
> diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_sync.h 
> b/drivers/gpu/drm/amd/amdgpu/amdgpu_sync.h
> index 51eb4382c91e..1f875b1e9d2c 100644
> --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_sync.h
> +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_sync.h
> @@ -52,6 +52,9 @@ int amdgpu_sync_fence(struct amdgpu_sync *sync, struct 
> dma_fence *f,
>  int amdgpu_sync_resv(struct amdgpu_device *adev, struct amdgpu_sync *sync,
>                    struct dma_resv *resv, enum amdgpu_sync_mode mode,
>                    void *owner);
> +int amdgpu_sync_resv_usage(struct amdgpu_device *adev, struct amdgpu_sync 
> *sync,
> +                          struct dma_resv *resv, enum amdgpu_sync_mode mode,
> +                          void *owner, uint64_t usage);
>  int amdgpu_sync_kfd(struct amdgpu_sync *sync, struct dma_resv *resv);
>  struct dma_fence *amdgpu_sync_peek_fence(struct amdgpu_sync *sync,
>                                    struct amdgpu_ring *ring);
> diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c 
> b/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c
> index 0eccb31793ca..f470b7e5489a 100644
> --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c
> +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c
> @@ -1313,8 +1313,14 @@ int amdgpu_vm_bo_update(struct amdgpu_device *adev, 
> struct amdgpu_bo_va *bo_va,
>                       pages_addr = bo->tbo.ttm->dma_address;
>  
>               /* Implicitly sync to moving fences before mapping anything */
> -             r = amdgpu_sync_resv(adev, &sync, bo->tbo.base.resv,
> -                                  AMDGPU_SYNC_EXPLICIT, vm);
> +             if (vm->update_funcs == &amdgpu_vm_sdma_funcs)
> +                     r = amdgpu_sync_resv_usage(adev, &sync, 
> bo->tbo.base.resv,
> +                                     AMDGPU_SYNC_EXPLICIT, vm,
> +                                     DMA_RESV_USAGE_BOOKKEEP);
> +             else
> +                     r = amdgpu_sync_resv_usage(adev, &sync, 
> bo->tbo.base.resv,
> +                                     AMDGPU_SYNC_EXPLICIT, vm,
> +                                     DMA_RESV_USAGE_READ);
>               if (r)
>                       goto error_free;
>       }

Reply via email to