On 1/25/2024 1:37 PM, Le Ma wrote:
> This patch is to eliminate interrupt warning below:
> 
>   "[drm] Fence fallback timer expired on ring sdma0.0".
> 
> An early vm pt clearing job is sent to SDMA ahead of interrupt enabled,
> introduced by patch below:
> 
>   - drm/amdkfd: Export DMABufs from KFD using GEM handles
> 
The drm client creation logic in this patch won't work well for dynamic
partition switch cases. Better add a ''Fixes' tag also.

This looks fine to me, needs to be checked by Felix anyway.

Thanks,
Lijo

> And re-locating the drm client creation following after drm_dev_register
> looks like a more proper flow.
> 
> v2: wrap the drm client creation
> 
> Signed-off-by: Le Ma <le...@amd.com>
> ---
>  drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd.c | 32 ++++++++++++++--------
>  drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd.h |  2 ++
>  drivers/gpu/drm/amd/amdgpu/amdgpu_drv.c    |  4 +++
>  3 files changed, 27 insertions(+), 11 deletions(-)
> 
> diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd.c 
> b/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd.c
> index 77e263660288..41db030ddc4e 100644
> --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd.c
> +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd.c
> @@ -141,11 +141,31 @@ static void amdgpu_amdkfd_reset_work(struct work_struct 
> *work)
>  static const struct drm_client_funcs kfd_client_funcs = {
>       .unregister     = drm_client_release,
>  };
> +
> +int amdgpu_amdkfd_drm_client_create(struct amdgpu_device *adev)
> +{
> +     int ret;
> +
> +     if (!adev->kfd.init_complete)
> +             return 0;
> +
> +     ret = drm_client_init(&adev->ddev, &adev->kfd.client, "kfd",
> +                           &kfd_client_funcs);
> +     if (ret) {
> +             dev_err(adev->dev, "Failed to init DRM client: %d\n",
> +                     ret);
> +             return ret;
> +     }
> +
> +     drm_client_register(&adev->kfd.client);
> +
> +     return 0;
> +}
> +
>  void amdgpu_amdkfd_device_init(struct amdgpu_device *adev)
>  {
>       int i;
>       int last_valid_bit;
> -     int ret;
>  
>       amdgpu_amdkfd_gpuvm_init_mem_limits();
>  
> @@ -164,12 +184,6 @@ void amdgpu_amdkfd_device_init(struct amdgpu_device 
> *adev)
>                       .enable_mes = adev->enable_mes,
>               };
>  
> -             ret = drm_client_init(&adev->ddev, &adev->kfd.client, "kfd", 
> &kfd_client_funcs);
> -             if (ret) {
> -                     dev_err(adev->dev, "Failed to init DRM client: %d\n", 
> ret);
> -                     return;
> -             }
> -
>               /* this is going to have a few of the MSBs set that we need to
>                * clear
>                */
> @@ -208,10 +222,6 @@ void amdgpu_amdkfd_device_init(struct amdgpu_device 
> *adev)
>  
>               adev->kfd.init_complete = kgd2kfd_device_init(adev->kfd.dev,
>                                                       &gpu_resources);
> -             if (adev->kfd.init_complete)
> -                     drm_client_register(&adev->kfd.client);
> -             else
> -                     drm_client_release(&adev->kfd.client);
>  
>               amdgpu_amdkfd_total_mem_size += adev->gmc.real_vram_size;
>  
> diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd.h 
> b/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd.h
> index 584a0cea5572..da175c384adf 100644
> --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd.h
> +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd.h
> @@ -182,6 +182,8 @@ int amdgpu_queue_mask_bit_to_set_resource_bit(struct 
> amdgpu_device *adev,
>  struct amdgpu_amdkfd_fence *amdgpu_amdkfd_fence_create(u64 context,
>                               struct mm_struct *mm,
>                               struct svm_range_bo *svm_bo);
> +
> +int amdgpu_amdkfd_drm_client_create(struct amdgpu_device *adev);
>  #if defined(CONFIG_DEBUG_FS)
>  int kfd_debugfs_kfd_mem_limits(struct seq_file *m, void *data);
>  #endif
> diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_drv.c 
> b/drivers/gpu/drm/amd/amdgpu/amdgpu_drv.c
> index 475bd59c9ac2..91d5d9435067 100644
> --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_drv.c
> +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_drv.c
> @@ -2255,6 +2255,10 @@ static int amdgpu_pci_probe(struct pci_dev *pdev,
>       if (ret)
>               goto err_pci;
>  
> +     ret = amdgpu_amdkfd_drm_client_create(adev);
> +     if (ret)
> +             goto err_pci;
> +
>       /*
>        * 1. don't init fbdev on hw without DCE
>        * 2. don't init fbdev if there are no connectors

Reply via email to