On Tue, Sep 23, 2025 at 9:19 AM Christian König
<[email protected]> wrote:
>
> The Constant Engine found on gfx6-gfx10 HW has been a notorious source of
> problems.
>
> RADV never used it in the first place, radeonsi only used it for a few
> releases around 2017 for gfx6-gfx9 before dropping support for it as
> well.
>
> While investigating another problem I just recently found that submitting
> to the CE seems to be completely broken on gfx9 for quite a while.
>
> Since nobody complained about that problem it most likely means that
> nobody is using any of the affected radeonsi versions on current Linux
> kernels any more.
>
> So to potentially phase out the support for the CE and eliminate another
> source of problems block submitting CE IBs unless it is enabled again
> using a debug flag.
>
> Signed-off-by: Christian König <[email protected]>

Series is:
Reviewed-by: Alex Deucher <[email protected]>

> ---
>  drivers/gpu/drm/amd/amdgpu/amdgpu.h     | 1 +
>  drivers/gpu/drm/amd/amdgpu/amdgpu_cs.c  | 6 ++++++
>  drivers/gpu/drm/amd/amdgpu/amdgpu_drv.c | 8 +++++++-
>  3 files changed, 14 insertions(+), 1 deletion(-)
>
> diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu.h 
> b/drivers/gpu/drm/amd/amdgpu/amdgpu.h
> index 2a0df4cabb99..6f5b4a0e0a34 100644
> --- a/drivers/gpu/drm/amd/amdgpu/amdgpu.h
> +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu.h
> @@ -1290,6 +1290,7 @@ struct amdgpu_device {
>         bool                            debug_disable_gpu_ring_reset;
>         bool                            debug_vm_userptr;
>         bool                            debug_disable_ce_logs;
> +       bool                            debug_enable_ce_cs;
>
>         /* Protection for the following isolation structure */
>         struct mutex                    enforce_isolation_mutex;
> diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_cs.c 
> b/drivers/gpu/drm/amd/amdgpu/amdgpu_cs.c
> index 744e6ff69814..322890e2c899 100644
> --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_cs.c
> +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_cs.c
> @@ -364,6 +364,12 @@ static int amdgpu_cs_p2_ib(struct amdgpu_cs_parser *p,
>         if (p->uf_bo && ring->funcs->no_user_fence)
>                 return -EINVAL;
>
> +       if (!p->adev->debug_enable_ce_cs &&
> +           chunk_ib->flags & AMDGPU_IB_FLAG_CE) {
> +               dev_err_ratelimited(p->adev->dev, "CE CS is blocked, use 
> debug=0x400 to override\n");
> +               return -EINVAL;
> +       }
> +
>         if (chunk_ib->ip_type == AMDGPU_HW_IP_GFX &&
>             chunk_ib->flags & AMDGPU_IB_FLAG_PREEMPT) {
>                 if (chunk_ib->flags & AMDGPU_IB_FLAG_CE)
> diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_drv.c 
> b/drivers/gpu/drm/amd/amdgpu/amdgpu_drv.c
> index ece251cbe8c3..3b3fc734c0f8 100644
> --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_drv.c
> +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_drv.c
> @@ -144,7 +144,8 @@ enum AMDGPU_DEBUG_MASK {
>         AMDGPU_DEBUG_DISABLE_GPU_RING_RESET = BIT(6),
>         AMDGPU_DEBUG_SMU_POOL = BIT(7),
>         AMDGPU_DEBUG_VM_USERPTR = BIT(8),
> -       AMDGPU_DEBUG_DISABLE_RAS_CE_LOG = BIT(9)
> +       AMDGPU_DEBUG_DISABLE_RAS_CE_LOG = BIT(9),
> +       AMDGPU_DEBUG_ENABLE_CE_CS = BIT(10)
>  };
>
>  unsigned int amdgpu_vram_limit = UINT_MAX;
> @@ -2289,6 +2290,11 @@ static void amdgpu_init_debug_options(struct 
> amdgpu_device *adev)
>                 pr_info("debug: disable kernel logs of correctable errors\n");
>                 adev->debug_disable_ce_logs = true;
>         }
> +
> +       if (amdgpu_debug_mask & AMDGPU_DEBUG_ENABLE_CE_CS) {
> +               pr_info("debug: allowing command submission to CE engine\n");
> +               adev->debug_enable_ce_cs = true;
> +       }
>  }
>
>  static unsigned long amdgpu_fix_asic_type(struct pci_dev *pdev, unsigned 
> long flags)
> --
> 2.43.0
>

Reply via email to