On 12/10/25 10:20, Jesse.Zhang wrote: > This patch introduces a new function `amdgpu_sync_resv_usage` that allows > syncing fences of a specific usage from a reservation object, replacing > the hardcoded DMA_RESV_USAGE_READ in the original `amdgpu_sync_resv`. > > The original `amdgpu_sync_resv` is modified to call the new function with > DMA_RESV_USAGE_READ to maintain backward compatibility. > > In `amdgpu_vm_bo_update`, we update the sync call to use > `amdgpu_sync_resv_usage` with DMA_RESV_USAGE_BOOKKEEP. > This ensures we properly sync with bookkeeping fences (e.g., related to > memory eviction, migration) when updating VM mappings.
Clear NAK. VM updates should *never* sync to eviction fences. This is clearly just a broken workaround and not fixing the underlying bug. Regards, Christian. > > Suggested-by: Prike Liang <[email protected]> > Signed-off-by: Jesse Zhang <[email protected]> > --- > drivers/gpu/drm/amd/amdgpu/amdgpu_sync.c | 49 ++++++++++++++++-------- > drivers/gpu/drm/amd/amdgpu/amdgpu_sync.h | 3 ++ > drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c | 10 ++++- > 3 files changed, 44 insertions(+), 18 deletions(-) > > diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_sync.c > b/drivers/gpu/drm/amd/amdgpu/amdgpu_sync.c > index d6ae9974c952..9665eed65b5d 100644 > --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_sync.c > +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_sync.c > @@ -229,31 +229,30 @@ static bool amdgpu_sync_test_fence(struct amdgpu_device > *adev, > } > > /** > - * amdgpu_sync_resv - sync to a reservation object > - * > - * @adev: amdgpu device > - * @sync: sync object to add fences from reservation object to > - * @resv: reservation object with embedded fence > - * @mode: how owner affects which fences we sync to > - * @owner: owner of the planned job submission > - * > - * Sync to the fence > + * amdgpu_sync_resv_usage - Sync fences of a specific usage from a > reservation object > + * @adev: AMDGPU device > + * @sync: Sync object > + * @resv: Reservation object > + * @mode: Sync mode (affects which fences are selected) > + * @owner: Owner identifier > + * @usage: Target fence usage (e.g., DMA_RESV_USAGE_BOOKKEEP) > + * > + * Collects fences of the specified usage from the reservation and adds them > to the sync object > */ > -int amdgpu_sync_resv(struct amdgpu_device *adev, struct amdgpu_sync *sync, > - struct dma_resv *resv, enum amdgpu_sync_mode mode, > - void *owner) > +int amdgpu_sync_resv_usage(struct amdgpu_device *adev, struct amdgpu_sync > *sync, > + struct dma_resv *resv, enum amdgpu_sync_mode mode, > + void *owner, uint64_t usage) > { > struct dma_resv_iter cursor; > struct dma_fence *f; > int r; > > - if (resv == NULL) > + if (!resv) > return -EINVAL; > - /* Implicitly sync only to KERNEL, WRITE and READ */ > - dma_resv_for_each_fence(&cursor, resv, DMA_RESV_USAGE_READ, f) { > + > + dma_resv_for_each_fence(&cursor, resv, usage, f) { > dma_fence_chain_for_each(f, f) { > struct dma_fence *tmp = dma_fence_chain_contained(f); > - > if (amdgpu_sync_test_fence(adev, mode, owner, tmp)) { > r = amdgpu_sync_fence(sync, f, GFP_KERNEL); > dma_fence_put(f); > @@ -266,6 +265,24 @@ int amdgpu_sync_resv(struct amdgpu_device *adev, struct > amdgpu_sync *sync, > return 0; > } > > +/** > + * amdgpu_sync_resv - sync to a reservation object > + * > + * @adev: amdgpu device > + * @sync: sync object to add fences from reservation object to > + * @resv: reservation object with embedded fence > + * @mode: how owner affects which fences we sync to > + * @owner: owner of the planned job submission > + * > + * Sync to the fence > + */ > +int amdgpu_sync_resv(struct amdgpu_device *adev, struct amdgpu_sync *sync, > + struct dma_resv *resv, enum amdgpu_sync_mode mode, > + void *owner) > +{ > + return amdgpu_sync_resv_usage(adev, sync, resv, mode, owner, > DMA_RESV_USAGE_READ); > +} > + > /** > * amdgpu_sync_kfd - sync to KFD fences > * > diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_sync.h > b/drivers/gpu/drm/amd/amdgpu/amdgpu_sync.h > index 51eb4382c91e..1f875b1e9d2c 100644 > --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_sync.h > +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_sync.h > @@ -52,6 +52,9 @@ int amdgpu_sync_fence(struct amdgpu_sync *sync, struct > dma_fence *f, > int amdgpu_sync_resv(struct amdgpu_device *adev, struct amdgpu_sync *sync, > struct dma_resv *resv, enum amdgpu_sync_mode mode, > void *owner); > +int amdgpu_sync_resv_usage(struct amdgpu_device *adev, struct amdgpu_sync > *sync, > + struct dma_resv *resv, enum amdgpu_sync_mode mode, > + void *owner, uint64_t usage); > int amdgpu_sync_kfd(struct amdgpu_sync *sync, struct dma_resv *resv); > struct dma_fence *amdgpu_sync_peek_fence(struct amdgpu_sync *sync, > struct amdgpu_ring *ring); > diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c > b/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c > index 0eccb31793ca..f470b7e5489a 100644 > --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c > +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c > @@ -1313,8 +1313,14 @@ int amdgpu_vm_bo_update(struct amdgpu_device *adev, > struct amdgpu_bo_va *bo_va, > pages_addr = bo->tbo.ttm->dma_address; > > /* Implicitly sync to moving fences before mapping anything */ > - r = amdgpu_sync_resv(adev, &sync, bo->tbo.base.resv, > - AMDGPU_SYNC_EXPLICIT, vm); > + if (vm->update_funcs == &amdgpu_vm_sdma_funcs) > + r = amdgpu_sync_resv_usage(adev, &sync, > bo->tbo.base.resv, > + AMDGPU_SYNC_EXPLICIT, vm, > + DMA_RESV_USAGE_BOOKKEEP); > + else > + r = amdgpu_sync_resv_usage(adev, &sync, > bo->tbo.base.resv, > + AMDGPU_SYNC_EXPLICIT, vm, > + DMA_RESV_USAGE_READ); > if (r) > goto error_free; > }
