[PATCH 3/3] drm/radeon/kms: simplify improve GPU reset V2
This simplify and improve GPU reset for R1XX-R6XX hw, it's not 100% reliable here are result: - R1XX/R2XX works bunch of time in a row, sometimes it seems it can work indifinitly - R3XX/R3XX the most unreliable one, sometimes you will be able to reset few times, sometimes not even once - R5XX more reliable than previous hw, seems to work most of the times but once in a while it fails for no obvious reasons (same status than previous reset just no same happy ending) - R6XX/R7XX are lot more reliable with this patch, still it seems that it can fail after a bunch (reset every 2sec for 3hour bring down the GPU computer) This have been tested on various hw, for some odd reasons i wasn't able to lockup RS480/RS690 (while they use to love locking up). Note that on R1XX-R5XX the cursor will disapear after lockup haven't checked why, switch to console and back to X will restore cursor. Next step is to record the bogus command that leaded to the lockup. V2 Fix r6xx resume path to avoid reinitializing blit module, use the gpu_lockup boolean to avoid entering inifinite waiting loop on fence while reiniting the GPU Signed-off-by: Jerome Glisse jgli...@redhat.com --- drivers/gpu/drm/radeon/r100.c | 180 +++ drivers/gpu/drm/radeon/r100d.h | 128 ++ drivers/gpu/drm/radeon/r300.c | 134 +++- drivers/gpu/drm/radeon/r300d.h | 47 - drivers/gpu/drm/radeon/r520.c |1 - drivers/gpu/drm/radeon/r600.c | 53 +- drivers/gpu/drm/radeon/r600_blit_kms.c |3 + drivers/gpu/drm/radeon/radeon.h|4 +- drivers/gpu/drm/radeon/radeon_asic.h | 12 +- drivers/gpu/drm/radeon/radeon_cs.c |4 - drivers/gpu/drm/radeon/radeon_device.c | 22 drivers/gpu/drm/radeon/radeon_fence.c | 13 ++- drivers/gpu/drm/radeon/radeon_gart.c |2 +- drivers/gpu/drm/radeon/rs400.c |2 - drivers/gpu/drm/radeon/rs600.c | 73 +- drivers/gpu/drm/radeon/rs600d.h| 46 drivers/gpu/drm/radeon/rs690.c |2 - drivers/gpu/drm/radeon/rv515.c | 90 drivers/gpu/drm/radeon/rv515d.h| 46 19 files changed, 508 insertions(+), 354 deletions(-) diff --git a/drivers/gpu/drm/radeon/r100.c b/drivers/gpu/drm/radeon/r100.c index 5594e71..a57939a 100644 --- a/drivers/gpu/drm/radeon/r100.c +++ b/drivers/gpu/drm/radeon/r100.c @@ -659,26 +659,6 @@ int r100_cp_init(struct radeon_device *rdev, unsigned ring_size) if (r100_debugfs_cp_init(rdev)) { DRM_ERROR(Failed to register debugfs file for CP !\n); } - /* Reset CP */ - tmp = RREG32(RADEON_CP_CSQ_STAT); - if ((tmp (1 31))) { - DRM_INFO(radeon: cp busy (0x%08X) resetting\n, tmp); - WREG32(RADEON_CP_CSQ_MODE, 0); - WREG32(RADEON_CP_CSQ_CNTL, 0); - WREG32(RADEON_RBBM_SOFT_RESET, RADEON_SOFT_RESET_CP); - tmp = RREG32(RADEON_RBBM_SOFT_RESET); - mdelay(2); - WREG32(RADEON_RBBM_SOFT_RESET, 0); - tmp = RREG32(RADEON_RBBM_SOFT_RESET); - mdelay(2); - tmp = RREG32(RADEON_CP_CSQ_STAT); - if ((tmp (1 31))) { - DRM_INFO(radeon: cp reset failed (0x%08X)\n, tmp); - } - } else { - DRM_INFO(radeon: cp idle (0x%08X)\n, tmp); - } - if (!rdev-me_fw) { r = r100_cp_init_microcode(rdev); if (r) { @@ -781,39 +761,6 @@ void r100_cp_disable(struct radeon_device *rdev) } } -int r100_cp_reset(struct radeon_device *rdev) -{ - uint32_t tmp; - bool reinit_cp; - int i; - - reinit_cp = rdev-cp.ready; - rdev-cp.ready = false; - WREG32(RADEON_CP_CSQ_MODE, 0); - WREG32(RADEON_CP_CSQ_CNTL, 0); - WREG32(RADEON_RBBM_SOFT_RESET, RADEON_SOFT_RESET_CP); - (void)RREG32(RADEON_RBBM_SOFT_RESET); - udelay(200); - WREG32(RADEON_RBBM_SOFT_RESET, 0); - /* Wait to prevent race in RBBM_STATUS */ - mdelay(1); - for (i = 0; i rdev-usec_timeout; i++) { - tmp = RREG32(RADEON_RBBM_STATUS); - if (!(tmp (1 16))) { - DRM_INFO(CP reset succeed (RBBM_STATUS=0x%08X)\n, -tmp); - if (reinit_cp) { - return r100_cp_init(rdev, rdev-cp.ring_size); - } - return 0; - } - DRM_UDELAY(1); - } - tmp = RREG32(RADEON_RBBM_STATUS); - DRM_ERROR(Failed to reset CP (RBBM_STATUS=0x%08X)!\n, tmp); - return -1; -} - void r100_cp_commit(struct radeon_device *rdev) { WREG32(RADEON_CP_RB_WPTR, rdev-cp.wptr); @@ -1727,51 +1674,6 @@ int r100_mc_wait_for_idle(struct radeon_device
[PATCH 3/3] drm/radeon/kms: simplify improve GPU reset
This simplify and improve GPU reset for R1XX-R6XX hw, it's not 100% reliable here are result: - R1XX/R2XX works bunch of time in a row, sometimes it seems it can work indifinitly - R3XX/R3XX the most unreliable one, sometimes you will be able to reset few times, sometimes not even once - R5XX more reliable than previous hw, seems to work most of the times but once in a while it fails for no obvious reasons (same status than previous reset just no same happy ending) - R6XX/R7XX are lot more reliable with this patch, still it seems that it can fail after a bunch (reset every 2sec for 3hour bring down the GPU computer) This have been tested on various hw, for some odd reasons i wasn't able to lockup RS480/RS690 (while they use to love locking up). Note that on R1XX-R5XX the cursor will disapear after lockup haven't checked why, switch to console and back to X will restore cursor. Next step is to record the bogus command that leaded to the lockup. Signed-off-by: Jerome Glisse jgli...@redhat.com --- drivers/gpu/drm/radeon/r100.c | 180 +++ drivers/gpu/drm/radeon/r100d.h | 128 ++ drivers/gpu/drm/radeon/r300.c | 134 +++- drivers/gpu/drm/radeon/r300d.h | 47 - drivers/gpu/drm/radeon/r520.c |1 - drivers/gpu/drm/radeon/r600.c | 53 +- drivers/gpu/drm/radeon/radeon.h|4 +- drivers/gpu/drm/radeon/radeon_asic.h | 12 +- drivers/gpu/drm/radeon/radeon_device.c | 20 drivers/gpu/drm/radeon/radeon_fence.c |5 +- drivers/gpu/drm/radeon/radeon_gart.c |4 + drivers/gpu/drm/radeon/rs400.c |2 - drivers/gpu/drm/radeon/rs600.c | 73 +- drivers/gpu/drm/radeon/rs600d.h| 46 drivers/gpu/drm/radeon/rs690.c |2 - drivers/gpu/drm/radeon/rv515.c | 90 drivers/gpu/drm/radeon/rv515d.h| 46 17 files changed, 501 insertions(+), 346 deletions(-) diff --git a/drivers/gpu/drm/radeon/r100.c b/drivers/gpu/drm/radeon/r100.c index f5b46a9..91e3b57 100644 --- a/drivers/gpu/drm/radeon/r100.c +++ b/drivers/gpu/drm/radeon/r100.c @@ -659,26 +659,6 @@ int r100_cp_init(struct radeon_device *rdev, unsigned ring_size) if (r100_debugfs_cp_init(rdev)) { DRM_ERROR(Failed to register debugfs file for CP !\n); } - /* Reset CP */ - tmp = RREG32(RADEON_CP_CSQ_STAT); - if ((tmp (1 31))) { - DRM_INFO(radeon: cp busy (0x%08X) resetting\n, tmp); - WREG32(RADEON_CP_CSQ_MODE, 0); - WREG32(RADEON_CP_CSQ_CNTL, 0); - WREG32(RADEON_RBBM_SOFT_RESET, RADEON_SOFT_RESET_CP); - tmp = RREG32(RADEON_RBBM_SOFT_RESET); - mdelay(2); - WREG32(RADEON_RBBM_SOFT_RESET, 0); - tmp = RREG32(RADEON_RBBM_SOFT_RESET); - mdelay(2); - tmp = RREG32(RADEON_CP_CSQ_STAT); - if ((tmp (1 31))) { - DRM_INFO(radeon: cp reset failed (0x%08X)\n, tmp); - } - } else { - DRM_INFO(radeon: cp idle (0x%08X)\n, tmp); - } - if (!rdev-me_fw) { r = r100_cp_init_microcode(rdev); if (r) { @@ -781,39 +761,6 @@ void r100_cp_disable(struct radeon_device *rdev) } } -int r100_cp_reset(struct radeon_device *rdev) -{ - uint32_t tmp; - bool reinit_cp; - int i; - - reinit_cp = rdev-cp.ready; - rdev-cp.ready = false; - WREG32(RADEON_CP_CSQ_MODE, 0); - WREG32(RADEON_CP_CSQ_CNTL, 0); - WREG32(RADEON_RBBM_SOFT_RESET, RADEON_SOFT_RESET_CP); - (void)RREG32(RADEON_RBBM_SOFT_RESET); - udelay(200); - WREG32(RADEON_RBBM_SOFT_RESET, 0); - /* Wait to prevent race in RBBM_STATUS */ - mdelay(1); - for (i = 0; i rdev-usec_timeout; i++) { - tmp = RREG32(RADEON_RBBM_STATUS); - if (!(tmp (1 16))) { - DRM_INFO(CP reset succeed (RBBM_STATUS=0x%08X)\n, -tmp); - if (reinit_cp) { - return r100_cp_init(rdev, rdev-cp.ring_size); - } - return 0; - } - DRM_UDELAY(1); - } - tmp = RREG32(RADEON_RBBM_STATUS); - DRM_ERROR(Failed to reset CP (RBBM_STATUS=0x%08X)!\n, tmp); - return -1; -} - void r100_cp_commit(struct radeon_device *rdev) { WREG32(RADEON_CP_RB_WPTR, rdev-cp.wptr); @@ -1727,51 +1674,6 @@ int r100_mc_wait_for_idle(struct radeon_device *rdev) return -1; } -void r100_gpu_init(struct radeon_device *rdev) -{ - /* TODO: anythings to do here ? pipes ? */ - r100_hdp_reset(rdev); -} - -void r100_hdp_reset(struct radeon_device *rdev) -{ - uint32_t tmp; - - tmp =