I've updated the drm.watchdog.2.patch http://marc.theaimsgroup.com/?l=dri-devel&m=108551485018672&w=2 to latest DRM CVS and it works together with "ati.unlock.1.patch" and "ati.drm-r300-version.1.patch" http://marc.theaimsgroup.com/?l=dri-devel&m=108551675805810&w=2 when the UT2003/2004 lockups ('Shock Rifle') arise.
But the 3D engine wouldn't run again before a hard reset take place ;-( Any newly started 3D app would be stopped after an empty window popped up. setenv R200_DEBUG sanity progs/demos> ./ipers >& log gave some weird numbers R200_VS_LIGHT_ATTENUATION_ADDR[2] <-- 1.000 *** NEW MAX (prev 0.000) R200_SS_LIGHT_RANGE_CUTOFF_SQRD <-- 340282346638528859811704183484516925440.000 *** NEW MAX (prev 0.000) Any hints? -Dieter
diff -ru drm.orig/linux/drm_drv.h drm/linux/drm_drv.h --- drm.orig/linux/drm_drv.h 2004-09-13 12:46:47.000000000 +0200 +++ drm/linux/drm_drv.h 2004-09-13 21:39:40.657676794 +0200 @@ -185,6 +185,8 @@ MODULE_PARM( drm_opts, "s" ); MODULE_LICENSE("GPL and additional rights"); +static void drm_lock_watchdog( unsigned long __data ); + static int DRM(setup)( drm_device_t *dev ) { int i; @@ -323,6 +325,7 @@ down( &dev->struct_sem ); del_timer( &dev->timer ); + del_timer_sync( &dev->lock.watchdog ); if ( dev->devname ) { DRM(free)( dev->devname, strlen( dev->devname ) + 1, @@ -433,6 +436,7 @@ if ( dev->lock.hw_lock ) { dev->sigdata.lock = dev->lock.hw_lock = NULL; /* SHM removed */ dev->lock.filp = NULL; + dev->lock.dontbreak = 1; wake_up_interruptible( &dev->lock.lock_queue ); } up( &dev->struct_sem ); @@ -530,6 +534,10 @@ goto error_out; } + init_timer( &dev->lock.watchdog ); + dev->lock.watchdog.data = (unsigned long) dev; + dev->lock.watchdog.function = drm_lock_watchdog; + if ((dev->minor = DRM(stub_register)(DRIVER_NAME, &DRM(fops),dev)) < 0) { retcode = -EPERM; @@ -888,6 +896,11 @@ if (dev->fn_tbl.release) dev->fn_tbl.release(dev, filp); + /* Avoid potential race where the watchdog callback is still + * running when filp is freed. + */ + del_timer_sync( &dev->lock.watchdog ); + DRM(lock_free)( dev, &dev->lock.hw_lock->lock, _DRM_LOCKING_CONTEXT(dev->lock.hw_lock->lock) ); @@ -910,6 +923,7 @@ } if ( DRM(lock_take)( &dev->lock.hw_lock->lock, DRM_KERNEL_CONTEXT ) ) { + dev->lock.dontbreak = 1; dev->lock.filp = filp; dev->lock.lock_time = jiffies; atomic_inc( &dev->counts[_DRM_STAT_LOCKS] ); @@ -1060,6 +1074,40 @@ return retcode; } + +/** + * Lock watchdog callback function. + * + * Whenever a privileged client must sleep on the lock waitqueue + * in the LOCK ioctl, the watchdog timer is started. + * When the UNLOCK ioctl is called, the timer is stopped. + * + * When the watchdog timer expires, the process holding the lock + * is killed. Privileged clients set lock.dontbreak and are exempt + * from this rule. + */ +static void drm_lock_watchdog( unsigned long __data ) +{ + drm_device_t *dev = (drm_device_t *)__data; + drm_file_t *priv; + + if ( !dev->lock.filp ) { + DRM_DEBUG( "held by kernel\n" ); + return; + } + + if ( dev->lock.dontbreak ) { + DRM_DEBUG( "privileged lock\n" ); + return; + } + + priv = dev->lock.filp->private_data; + DRM_DEBUG( "Kill pid=%d\n", priv->pid ); + + kill_proc( priv->pid, SIGKILL, 1 ); +} + + /** * Lock ioctl. * @@ -1079,6 +1127,7 @@ DECLARE_WAITQUEUE( entry, current ); drm_lock_t lock; int ret = 0; + int privileged = capable( CAP_SYS_ADMIN ); ++priv->lock_count; @@ -1109,6 +1158,7 @@ } if ( DRM(lock_take)( &dev->lock.hw_lock->lock, lock.context ) ) { + dev->lock.dontbreak = privileged; dev->lock.filp = filp; dev->lock.lock_time = jiffies; atomic_inc( &dev->counts[_DRM_STAT_LOCKS] ); @@ -1116,6 +1166,14 @@ } /* Contention */ + + if ( privileged ) { + if ( !timer_pending( &dev->lock.watchdog ) ) { + DRM_DEBUG( "Starting lock watchdog\n" ); + mod_timer( &dev->lock.watchdog, jiffies + 5 * HZ ); + } + } + schedule(); if ( signal_pending( current ) ) { ret = -ERESTARTSYS; @@ -1180,8 +1238,12 @@ return -EINVAL; } + DRM_DEBUG( "\n" ); + atomic_inc( &dev->counts[_DRM_STAT_UNLOCKS] ); + del_timer_sync( &dev->lock.watchdog ); + if (dev->fn_tbl.kernel_context_switch_unlock) dev->fn_tbl.kernel_context_switch_unlock(dev); else diff -ru drm.orig/linux/drmP.h drm/linux/drmP.h --- drm.orig/linux/drmP.h 2004-09-05 17:42:45.000000000 +0200 +++ drm/linux/drmP.h 2004-09-13 16:22:16.000000000 +0200 @@ -403,6 +403,8 @@ struct file *filp; /**< File descr of lock holder (0=kernel) */ wait_queue_head_t lock_queue; /**< Queue of blocked processes */ unsigned long lock_time; /**< Time of last lock in jiffies */ + struct timer_list watchdog; /**< Watchdog timer to kill runaway processes */ + int dontbreak; /**< Even watchdog honours the current lock */ } drm_lock_data_t; /**