The Constant Engine found on gfx6-gfx10 HW has been a notorious source of
problems.

RADV never used it in the first place, radeonsi only used it for a few
releases around 2017 for gfx6-gfx9 before dropping support for it as
well.

While investigating another problem I just recently found that submitting
to the CE seems to be completely broken on gfx9 for quite a while.

Since nobody complained about that problem it most likely means that
nobody is using any of the affected radeonsi versions on current Linux
kernels any more.

So to potentially phase out the support for the CE and eliminate another
source of problems block submitting CE IBs unless it is enabled again
using a debug flag.

Signed-off-by: Christian König <[email protected]>
---
 drivers/gpu/drm/amd/amdgpu/amdgpu.h     | 1 +
 drivers/gpu/drm/amd/amdgpu/amdgpu_cs.c  | 6 ++++++
 drivers/gpu/drm/amd/amdgpu/amdgpu_drv.c | 8 +++++++-
 3 files changed, 14 insertions(+), 1 deletion(-)

diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu.h 
b/drivers/gpu/drm/amd/amdgpu/amdgpu.h
index 2a0df4cabb99..6f5b4a0e0a34 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu.h
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu.h
@@ -1290,6 +1290,7 @@ struct amdgpu_device {
        bool                            debug_disable_gpu_ring_reset;
        bool                            debug_vm_userptr;
        bool                            debug_disable_ce_logs;
+       bool                            debug_enable_ce_cs;
 
        /* Protection for the following isolation structure */
        struct mutex                    enforce_isolation_mutex;
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_cs.c 
b/drivers/gpu/drm/amd/amdgpu/amdgpu_cs.c
index 744e6ff69814..322890e2c899 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_cs.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_cs.c
@@ -364,6 +364,12 @@ static int amdgpu_cs_p2_ib(struct amdgpu_cs_parser *p,
        if (p->uf_bo && ring->funcs->no_user_fence)
                return -EINVAL;
 
+       if (!p->adev->debug_enable_ce_cs &&
+           chunk_ib->flags & AMDGPU_IB_FLAG_CE) {
+               dev_err_ratelimited(p->adev->dev, "CE CS is blocked, use 
debug=0x400 to override\n");
+               return -EINVAL;
+       }
+
        if (chunk_ib->ip_type == AMDGPU_HW_IP_GFX &&
            chunk_ib->flags & AMDGPU_IB_FLAG_PREEMPT) {
                if (chunk_ib->flags & AMDGPU_IB_FLAG_CE)
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_drv.c 
b/drivers/gpu/drm/amd/amdgpu/amdgpu_drv.c
index ece251cbe8c3..3b3fc734c0f8 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_drv.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_drv.c
@@ -144,7 +144,8 @@ enum AMDGPU_DEBUG_MASK {
        AMDGPU_DEBUG_DISABLE_GPU_RING_RESET = BIT(6),
        AMDGPU_DEBUG_SMU_POOL = BIT(7),
        AMDGPU_DEBUG_VM_USERPTR = BIT(8),
-       AMDGPU_DEBUG_DISABLE_RAS_CE_LOG = BIT(9)
+       AMDGPU_DEBUG_DISABLE_RAS_CE_LOG = BIT(9),
+       AMDGPU_DEBUG_ENABLE_CE_CS = BIT(10)
 };
 
 unsigned int amdgpu_vram_limit = UINT_MAX;
@@ -2289,6 +2290,11 @@ static void amdgpu_init_debug_options(struct 
amdgpu_device *adev)
                pr_info("debug: disable kernel logs of correctable errors\n");
                adev->debug_disable_ce_logs = true;
        }
+
+       if (amdgpu_debug_mask & AMDGPU_DEBUG_ENABLE_CE_CS) {
+               pr_info("debug: allowing command submission to CE engine\n");
+               adev->debug_enable_ce_cs = true;
+       }
 }
 
 static unsigned long amdgpu_fix_asic_type(struct pci_dev *pdev, unsigned long 
flags)
-- 
2.43.0

Reply via email to