[PATCH 3/3] drm/radeon/kms: simplify improve GPU reset V2

2010-03-09 Thread Jerome Glisse
This simplify and improve GPU reset for R1XX-R6XX hw, it's
not 100% reliable here are result:
- R1XX/R2XX works bunch of time in a row, sometimes it
  seems it can work indifinitly
- R3XX/R3XX the most unreliable one, sometimes you will be
  able to reset few times, sometimes not even once
- R5XX more reliable than previous hw, seems to work most
  of the times but once in a while it fails for no obvious
  reasons (same status than previous reset just no same
  happy ending)
- R6XX/R7XX are lot more reliable with this patch, still
  it seems that it can fail after a bunch (reset every
  2sec for 3hour bring down the GPU  computer)

This have been tested on various hw, for some odd reasons
i wasn't able to lockup RS480/RS690 (while they use to
love locking up).

Note that on R1XX-R5XX the cursor will disapear after
lockup haven't checked why, switch to console and back
to X will restore cursor.

Next step is to record the bogus command that leaded to
the lockup.

V2 Fix r6xx resume path to avoid reinitializing blit
module, use the gpu_lockup boolean to avoid entering
inifinite waiting loop on fence while reiniting the GPU

Signed-off-by: Jerome Glisse jgli...@redhat.com
---
 drivers/gpu/drm/radeon/r100.c  |  180 +++
 drivers/gpu/drm/radeon/r100d.h |  128 ++
 drivers/gpu/drm/radeon/r300.c  |  134 +++-
 drivers/gpu/drm/radeon/r300d.h |   47 -
 drivers/gpu/drm/radeon/r520.c  |1 -
 drivers/gpu/drm/radeon/r600.c  |   53 +-
 drivers/gpu/drm/radeon/r600_blit_kms.c |3 +
 drivers/gpu/drm/radeon/radeon.h|4 +-
 drivers/gpu/drm/radeon/radeon_asic.h   |   12 +-
 drivers/gpu/drm/radeon/radeon_cs.c |4 -
 drivers/gpu/drm/radeon/radeon_device.c |   22 
 drivers/gpu/drm/radeon/radeon_fence.c  |   13 ++-
 drivers/gpu/drm/radeon/radeon_gart.c   |2 +-
 drivers/gpu/drm/radeon/rs400.c |2 -
 drivers/gpu/drm/radeon/rs600.c |   73 +-
 drivers/gpu/drm/radeon/rs600d.h|   46 
 drivers/gpu/drm/radeon/rs690.c |2 -
 drivers/gpu/drm/radeon/rv515.c |   90 
 drivers/gpu/drm/radeon/rv515d.h|   46 
 19 files changed, 508 insertions(+), 354 deletions(-)

diff --git a/drivers/gpu/drm/radeon/r100.c b/drivers/gpu/drm/radeon/r100.c
index 5594e71..a57939a 100644
--- a/drivers/gpu/drm/radeon/r100.c
+++ b/drivers/gpu/drm/radeon/r100.c
@@ -659,26 +659,6 @@ int r100_cp_init(struct radeon_device *rdev, unsigned 
ring_size)
if (r100_debugfs_cp_init(rdev)) {
DRM_ERROR(Failed to register debugfs file for CP !\n);
}
-   /* Reset CP */
-   tmp = RREG32(RADEON_CP_CSQ_STAT);
-   if ((tmp  (1  31))) {
-   DRM_INFO(radeon: cp busy (0x%08X) resetting\n, tmp);
-   WREG32(RADEON_CP_CSQ_MODE, 0);
-   WREG32(RADEON_CP_CSQ_CNTL, 0);
-   WREG32(RADEON_RBBM_SOFT_RESET, RADEON_SOFT_RESET_CP);
-   tmp = RREG32(RADEON_RBBM_SOFT_RESET);
-   mdelay(2);
-   WREG32(RADEON_RBBM_SOFT_RESET, 0);
-   tmp = RREG32(RADEON_RBBM_SOFT_RESET);
-   mdelay(2);
-   tmp = RREG32(RADEON_CP_CSQ_STAT);
-   if ((tmp  (1  31))) {
-   DRM_INFO(radeon: cp reset failed (0x%08X)\n, tmp);
-   }
-   } else {
-   DRM_INFO(radeon: cp idle (0x%08X)\n, tmp);
-   }
-
if (!rdev-me_fw) {
r = r100_cp_init_microcode(rdev);
if (r) {
@@ -781,39 +761,6 @@ void r100_cp_disable(struct radeon_device *rdev)
}
 }
 
-int r100_cp_reset(struct radeon_device *rdev)
-{
-   uint32_t tmp;
-   bool reinit_cp;
-   int i;
-
-   reinit_cp = rdev-cp.ready;
-   rdev-cp.ready = false;
-   WREG32(RADEON_CP_CSQ_MODE, 0);
-   WREG32(RADEON_CP_CSQ_CNTL, 0);
-   WREG32(RADEON_RBBM_SOFT_RESET, RADEON_SOFT_RESET_CP);
-   (void)RREG32(RADEON_RBBM_SOFT_RESET);
-   udelay(200);
-   WREG32(RADEON_RBBM_SOFT_RESET, 0);
-   /* Wait to prevent race in RBBM_STATUS */
-   mdelay(1);
-   for (i = 0; i  rdev-usec_timeout; i++) {
-   tmp = RREG32(RADEON_RBBM_STATUS);
-   if (!(tmp  (1  16))) {
-   DRM_INFO(CP reset succeed (RBBM_STATUS=0x%08X)\n,
-tmp);
-   if (reinit_cp) {
-   return r100_cp_init(rdev, rdev-cp.ring_size);
-   }
-   return 0;
-   }
-   DRM_UDELAY(1);
-   }
-   tmp = RREG32(RADEON_RBBM_STATUS);
-   DRM_ERROR(Failed to reset CP (RBBM_STATUS=0x%08X)!\n, tmp);
-   return -1;
-}
-
 void r100_cp_commit(struct radeon_device *rdev)
 {
WREG32(RADEON_CP_RB_WPTR, rdev-cp.wptr);
@@ -1727,51 +1674,6 @@ int r100_mc_wait_for_idle(struct radeon_device 

[PATCH 3/3] drm/radeon/kms: simplify improve GPU reset

2010-03-05 Thread Jerome Glisse
This simplify and improve GPU reset for R1XX-R6XX hw, it's
not 100% reliable here are result:
- R1XX/R2XX works bunch of time in a row, sometimes it
  seems it can work indifinitly
- R3XX/R3XX the most unreliable one, sometimes you will be
  able to reset few times, sometimes not even once
- R5XX more reliable than previous hw, seems to work most
  of the times but once in a while it fails for no obvious
  reasons (same status than previous reset just no same
  happy ending)
- R6XX/R7XX are lot more reliable with this patch, still
  it seems that it can fail after a bunch (reset every
  2sec for 3hour bring down the GPU  computer)

This have been tested on various hw, for some odd reasons
i wasn't able to lockup RS480/RS690 (while they use to
love locking up).

Note that on R1XX-R5XX the cursor will disapear after
lockup haven't checked why, switch to console and back
to X will restore cursor.

Next step is to record the bogus command that leaded to
the lockup.

Signed-off-by: Jerome Glisse jgli...@redhat.com
---
 drivers/gpu/drm/radeon/r100.c  |  180 +++
 drivers/gpu/drm/radeon/r100d.h |  128 ++
 drivers/gpu/drm/radeon/r300.c  |  134 +++-
 drivers/gpu/drm/radeon/r300d.h |   47 -
 drivers/gpu/drm/radeon/r520.c  |1 -
 drivers/gpu/drm/radeon/r600.c  |   53 +-
 drivers/gpu/drm/radeon/radeon.h|4 +-
 drivers/gpu/drm/radeon/radeon_asic.h   |   12 +-
 drivers/gpu/drm/radeon/radeon_device.c |   20 
 drivers/gpu/drm/radeon/radeon_fence.c  |5 +-
 drivers/gpu/drm/radeon/radeon_gart.c   |4 +
 drivers/gpu/drm/radeon/rs400.c |2 -
 drivers/gpu/drm/radeon/rs600.c |   73 +-
 drivers/gpu/drm/radeon/rs600d.h|   46 
 drivers/gpu/drm/radeon/rs690.c |2 -
 drivers/gpu/drm/radeon/rv515.c |   90 
 drivers/gpu/drm/radeon/rv515d.h|   46 
 17 files changed, 501 insertions(+), 346 deletions(-)

diff --git a/drivers/gpu/drm/radeon/r100.c b/drivers/gpu/drm/radeon/r100.c
index f5b46a9..91e3b57 100644
--- a/drivers/gpu/drm/radeon/r100.c
+++ b/drivers/gpu/drm/radeon/r100.c
@@ -659,26 +659,6 @@ int r100_cp_init(struct radeon_device *rdev, unsigned 
ring_size)
if (r100_debugfs_cp_init(rdev)) {
DRM_ERROR(Failed to register debugfs file for CP !\n);
}
-   /* Reset CP */
-   tmp = RREG32(RADEON_CP_CSQ_STAT);
-   if ((tmp  (1  31))) {
-   DRM_INFO(radeon: cp busy (0x%08X) resetting\n, tmp);
-   WREG32(RADEON_CP_CSQ_MODE, 0);
-   WREG32(RADEON_CP_CSQ_CNTL, 0);
-   WREG32(RADEON_RBBM_SOFT_RESET, RADEON_SOFT_RESET_CP);
-   tmp = RREG32(RADEON_RBBM_SOFT_RESET);
-   mdelay(2);
-   WREG32(RADEON_RBBM_SOFT_RESET, 0);
-   tmp = RREG32(RADEON_RBBM_SOFT_RESET);
-   mdelay(2);
-   tmp = RREG32(RADEON_CP_CSQ_STAT);
-   if ((tmp  (1  31))) {
-   DRM_INFO(radeon: cp reset failed (0x%08X)\n, tmp);
-   }
-   } else {
-   DRM_INFO(radeon: cp idle (0x%08X)\n, tmp);
-   }
-
if (!rdev-me_fw) {
r = r100_cp_init_microcode(rdev);
if (r) {
@@ -781,39 +761,6 @@ void r100_cp_disable(struct radeon_device *rdev)
}
 }
 
-int r100_cp_reset(struct radeon_device *rdev)
-{
-   uint32_t tmp;
-   bool reinit_cp;
-   int i;
-
-   reinit_cp = rdev-cp.ready;
-   rdev-cp.ready = false;
-   WREG32(RADEON_CP_CSQ_MODE, 0);
-   WREG32(RADEON_CP_CSQ_CNTL, 0);
-   WREG32(RADEON_RBBM_SOFT_RESET, RADEON_SOFT_RESET_CP);
-   (void)RREG32(RADEON_RBBM_SOFT_RESET);
-   udelay(200);
-   WREG32(RADEON_RBBM_SOFT_RESET, 0);
-   /* Wait to prevent race in RBBM_STATUS */
-   mdelay(1);
-   for (i = 0; i  rdev-usec_timeout; i++) {
-   tmp = RREG32(RADEON_RBBM_STATUS);
-   if (!(tmp  (1  16))) {
-   DRM_INFO(CP reset succeed (RBBM_STATUS=0x%08X)\n,
-tmp);
-   if (reinit_cp) {
-   return r100_cp_init(rdev, rdev-cp.ring_size);
-   }
-   return 0;
-   }
-   DRM_UDELAY(1);
-   }
-   tmp = RREG32(RADEON_RBBM_STATUS);
-   DRM_ERROR(Failed to reset CP (RBBM_STATUS=0x%08X)!\n, tmp);
-   return -1;
-}
-
 void r100_cp_commit(struct radeon_device *rdev)
 {
WREG32(RADEON_CP_RB_WPTR, rdev-cp.wptr);
@@ -1727,51 +1674,6 @@ int r100_mc_wait_for_idle(struct radeon_device *rdev)
return -1;
 }
 
-void r100_gpu_init(struct radeon_device *rdev)
-{
-   /* TODO: anythings to do here ? pipes ? */
-   r100_hdp_reset(rdev);
-}
-
-void r100_hdp_reset(struct radeon_device *rdev)
-{
-   uint32_t tmp;
-
-   tmp =