Re: [PATCH v5 2/7] drm/amdgpu: only use working sdma schedulers for ttm

2026-04-07 Thread Christian König
On 4/3/26 10:35, Pierre-Eric Pelloux-Prayer wrote:
> It's possible that some sdma instances aren't working so we shouldn't try
> to use them from TTM. To achieve this, delay the call to
> amdgpu_sdma_set_buffer_funcs_scheds after the rings have been tested, and
> then use the 'ready' property to decide if a sched should be used or not.
> 
> Note that currently it's not doing much, because if the ring helper fails
> for any ring, the whole sdma block init fails.
> 
> ---
> v5: check buffer_funcs_enabled from amdgpu_ttm_access_memory_sdma
> ---
> 
> Signed-off-by: Pierre-Eric Pelloux-Prayer 

Reviewed-by: Christian König 

> ---
>  drivers/gpu/drm/amd/amdgpu/amdgpu_ttm.c  | 23 ---
>  drivers/gpu/drm/amd/amdgpu/cik_sdma.c| 10 --
>  drivers/gpu/drm/amd/amdgpu/sdma_v2_4.c   |  5 +++--
>  drivers/gpu/drm/amd/amdgpu/sdma_v3_0.c   |  5 +++--
>  drivers/gpu/drm/amd/amdgpu/sdma_v4_0.c   |  9 +++--
>  drivers/gpu/drm/amd/amdgpu/sdma_v4_4_2.c |  6 --
>  drivers/gpu/drm/amd/amdgpu/sdma_v5_0.c   |  6 --
>  drivers/gpu/drm/amd/amdgpu/sdma_v5_2.c   |  9 +++--
>  drivers/gpu/drm/amd/amdgpu/sdma_v6_0.c   |  2 +-
>  drivers/gpu/drm/amd/amdgpu/sdma_v7_0.c   |  2 +-
>  drivers/gpu/drm/amd/amdgpu/sdma_v7_1.c   |  9 +++--
>  drivers/gpu/drm/amd/amdgpu/si_dma.c  |  9 +++--
>  12 files changed, 68 insertions(+), 27 deletions(-)
> 
> diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_ttm.c 
> b/drivers/gpu/drm/amd/amdgpu/amdgpu_ttm.c
> index 4ba7321b75e3..bd32113292ec 100644
> --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_ttm.c
> +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_ttm.c
> @@ -1560,7 +1560,7 @@ static int amdgpu_ttm_access_memory_sdma(struct 
> ttm_buffer_object *bo,
>   if (!adev->mman.sdma_access_ptr)
>   return -EACCES;
>  
> - if (!drm_dev_enter(adev_to_drm(adev), &idx))
> + if (!adev->mman.buffer_funcs_enabled || 
> !drm_dev_enter(adev_to_drm(adev), &idx))
>   return -ENODEV;
>  
>   if (write)
> @@ -2351,8 +2351,7 @@ void amdgpu_ttm_set_buffer_funcs_status(struct 
> amdgpu_device *adev, bool enable)
>   if (enable) {
>   struct drm_gpu_scheduler *sched;
>  
> - if (!adev->mman.num_buffer_funcs_scheds ||
> - !adev->mman.buffer_funcs_scheds[0]->ready) {
> + if (!adev->mman.num_buffer_funcs_scheds) {
>   dev_warn(adev->dev, "Not enabling DMA transfers for in 
> kernel use");
>   return;
>   }
> @@ -2734,20 +2733,30 @@ void amdgpu_sdma_set_buffer_funcs_scheds(struct 
> amdgpu_device *adev,
>  {
>   struct amdgpu_vmhub *hub = &adev->vmhub[AMDGPU_GFXHUB(0)];
>   struct drm_gpu_scheduler *sched;
> - int i;
> + int i, n;
>  
>   adev->mman.buffer_funcs = buffer_funcs;
>  
> - for (i = 0; i < adev->sdma.num_instances; i++) {
> + for (i = 0, n = 0; i < adev->sdma.num_instances; i++) {
>   if (adev->sdma.has_page_queue)
>   sched = &adev->sdma.instance[i].page.sched;
>   else
>   sched = &adev->sdma.instance[i].ring.sched;
> - adev->mman.buffer_funcs_scheds[i] = sched;
> +
> + if (!sched->ready)
> + continue;
> +
> + adev->mman.buffer_funcs_scheds[n++] = sched;
> + }
> +
> + if (n == 0) {
> + adev->mman.num_buffer_funcs_scheds = 0;
> + drm_warn(&adev->ddev, "No working sdma ring available\n");
> + return;
>   }
>  
>   adev->mman.num_buffer_funcs_scheds = hub->sdma_invalidation_workaround ?
> - 1 : adev->sdma.num_instances;
> + 1 : n;
>  }
>  
>  #if defined(CONFIG_DEBUG_FS)
> diff --git a/drivers/gpu/drm/amd/amdgpu/cik_sdma.c 
> b/drivers/gpu/drm/amd/amdgpu/cik_sdma.c
> index 26276dcfd458..120da838ac28 100644
> --- a/drivers/gpu/drm/amd/amdgpu/cik_sdma.c
> +++ b/drivers/gpu/drm/amd/amdgpu/cik_sdma.c
> @@ -939,7 +939,6 @@ static int cik_sdma_early_init(struct amdgpu_ip_block 
> *ip_block)
>  
>   cik_sdma_set_ring_funcs(adev);
>   cik_sdma_set_irq_funcs(adev);
> - cik_sdma_set_buffer_funcs(adev);
>   amdgpu_sdma_set_vm_pte_scheds(adev, &cik_sdma_vm_pte_funcs);
>  
>   return 0;
> @@ -1000,8 +999,15 @@ static int cik_sdma_sw_fini(struct amdgpu_ip_block 
> *ip_block)
>  static int cik_sdma_hw_init(struct amdgpu_ip_block *ip_block)
>  {
>   struct amdgpu_device *adev = ip_block->adev;
> + int r;
>  
> - return cik_sdma_start(adev);
> + r = cik_sdma_start(adev);
> + if (r)
> + return r;
> +
> + cik_sdma_set_buffer_funcs(adev);
> +
> + return 0;
>  }
>  
>  static int cik_sdma_hw_fini(struct amdgpu_ip_block *ip_block)
> diff --git a/drivers/gpu/drm/amd/amdgpu/sdma_v2_4.c 
> b/drivers/gpu/drm/amd/amdgpu/sdma_v2_4.c
> index c6a059ca59e5..93ec52c1f367 100644
> --- a/drivers/gpu/drm/amd/amdgpu/sdma_v2_4.c
> +++ b/drivers/gpu/drm/amd/amdgpu/sdma_v2_4.c
> @@ -828,7 +828,

[PATCH v5 2/7] drm/amdgpu: only use working sdma schedulers for ttm

2026-04-03 Thread Pierre-Eric Pelloux-Prayer
It's possible that some sdma instances aren't working so we shouldn't try
to use them from TTM. To achieve this, delay the call to
amdgpu_sdma_set_buffer_funcs_scheds after the rings have been tested, and
then use the 'ready' property to decide if a sched should be used or not.

Note that currently it's not doing much, because if the ring helper fails
for any ring, the whole sdma block init fails.

---
v5: check buffer_funcs_enabled from amdgpu_ttm_access_memory_sdma
---

Signed-off-by: Pierre-Eric Pelloux-Prayer 
---
 drivers/gpu/drm/amd/amdgpu/amdgpu_ttm.c  | 23 ---
 drivers/gpu/drm/amd/amdgpu/cik_sdma.c| 10 --
 drivers/gpu/drm/amd/amdgpu/sdma_v2_4.c   |  5 +++--
 drivers/gpu/drm/amd/amdgpu/sdma_v3_0.c   |  5 +++--
 drivers/gpu/drm/amd/amdgpu/sdma_v4_0.c   |  9 +++--
 drivers/gpu/drm/amd/amdgpu/sdma_v4_4_2.c |  6 --
 drivers/gpu/drm/amd/amdgpu/sdma_v5_0.c   |  6 --
 drivers/gpu/drm/amd/amdgpu/sdma_v5_2.c   |  9 +++--
 drivers/gpu/drm/amd/amdgpu/sdma_v6_0.c   |  2 +-
 drivers/gpu/drm/amd/amdgpu/sdma_v7_0.c   |  2 +-
 drivers/gpu/drm/amd/amdgpu/sdma_v7_1.c   |  9 +++--
 drivers/gpu/drm/amd/amdgpu/si_dma.c  |  9 +++--
 12 files changed, 68 insertions(+), 27 deletions(-)

diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_ttm.c 
b/drivers/gpu/drm/amd/amdgpu/amdgpu_ttm.c
index 4ba7321b75e3..bd32113292ec 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_ttm.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_ttm.c
@@ -1560,7 +1560,7 @@ static int amdgpu_ttm_access_memory_sdma(struct 
ttm_buffer_object *bo,
if (!adev->mman.sdma_access_ptr)
return -EACCES;
 
-   if (!drm_dev_enter(adev_to_drm(adev), &idx))
+   if (!adev->mman.buffer_funcs_enabled || 
!drm_dev_enter(adev_to_drm(adev), &idx))
return -ENODEV;
 
if (write)
@@ -2351,8 +2351,7 @@ void amdgpu_ttm_set_buffer_funcs_status(struct 
amdgpu_device *adev, bool enable)
if (enable) {
struct drm_gpu_scheduler *sched;
 
-   if (!adev->mman.num_buffer_funcs_scheds ||
-   !adev->mman.buffer_funcs_scheds[0]->ready) {
+   if (!adev->mman.num_buffer_funcs_scheds) {
dev_warn(adev->dev, "Not enabling DMA transfers for in 
kernel use");
return;
}
@@ -2734,20 +2733,30 @@ void amdgpu_sdma_set_buffer_funcs_scheds(struct 
amdgpu_device *adev,
 {
struct amdgpu_vmhub *hub = &adev->vmhub[AMDGPU_GFXHUB(0)];
struct drm_gpu_scheduler *sched;
-   int i;
+   int i, n;
 
adev->mman.buffer_funcs = buffer_funcs;
 
-   for (i = 0; i < adev->sdma.num_instances; i++) {
+   for (i = 0, n = 0; i < adev->sdma.num_instances; i++) {
if (adev->sdma.has_page_queue)
sched = &adev->sdma.instance[i].page.sched;
else
sched = &adev->sdma.instance[i].ring.sched;
-   adev->mman.buffer_funcs_scheds[i] = sched;
+
+   if (!sched->ready)
+   continue;
+
+   adev->mman.buffer_funcs_scheds[n++] = sched;
+   }
+
+   if (n == 0) {
+   adev->mman.num_buffer_funcs_scheds = 0;
+   drm_warn(&adev->ddev, "No working sdma ring available\n");
+   return;
}
 
adev->mman.num_buffer_funcs_scheds = hub->sdma_invalidation_workaround ?
-   1 : adev->sdma.num_instances;
+   1 : n;
 }
 
 #if defined(CONFIG_DEBUG_FS)
diff --git a/drivers/gpu/drm/amd/amdgpu/cik_sdma.c 
b/drivers/gpu/drm/amd/amdgpu/cik_sdma.c
index 26276dcfd458..120da838ac28 100644
--- a/drivers/gpu/drm/amd/amdgpu/cik_sdma.c
+++ b/drivers/gpu/drm/amd/amdgpu/cik_sdma.c
@@ -939,7 +939,6 @@ static int cik_sdma_early_init(struct amdgpu_ip_block 
*ip_block)
 
cik_sdma_set_ring_funcs(adev);
cik_sdma_set_irq_funcs(adev);
-   cik_sdma_set_buffer_funcs(adev);
amdgpu_sdma_set_vm_pte_scheds(adev, &cik_sdma_vm_pte_funcs);
 
return 0;
@@ -1000,8 +999,15 @@ static int cik_sdma_sw_fini(struct amdgpu_ip_block 
*ip_block)
 static int cik_sdma_hw_init(struct amdgpu_ip_block *ip_block)
 {
struct amdgpu_device *adev = ip_block->adev;
+   int r;
 
-   return cik_sdma_start(adev);
+   r = cik_sdma_start(adev);
+   if (r)
+   return r;
+
+   cik_sdma_set_buffer_funcs(adev);
+
+   return 0;
 }
 
 static int cik_sdma_hw_fini(struct amdgpu_ip_block *ip_block)
diff --git a/drivers/gpu/drm/amd/amdgpu/sdma_v2_4.c 
b/drivers/gpu/drm/amd/amdgpu/sdma_v2_4.c
index c6a059ca59e5..93ec52c1f367 100644
--- a/drivers/gpu/drm/amd/amdgpu/sdma_v2_4.c
+++ b/drivers/gpu/drm/amd/amdgpu/sdma_v2_4.c
@@ -828,7 +828,6 @@ static int sdma_v2_4_early_init(struct amdgpu_ip_block 
*ip_block)
return r;
 
sdma_v2_4_set_ring_funcs(adev);
-   sdma_v2_4_set_buffer_funcs(adev);
amdgpu_sdma_set_vm_pte_scheds(adev, &sdma_v2_4_vm_p