diff -x '*.o' -x '.*' -ru ../saffroy/linux-2.4.22-kdb-orig/include/asm-i386/semaphore.h amate-linux-2.4.22/include/asm-i386/semaphore.h
--- ../saffroy/linux-2.4.22-kdb-orig/include/asm-i386/semaphore.h	Thu Nov 13 16:54:48 2003
+++ amate-linux-2.4.22/include/asm-i386/semaphore.h	Thu Dec 11 15:02:50 2003
@@ -116,6 +116,7 @@
 #if WAITQUEUE_DEBUG
 	CHECK_MAGIC(sem->__magic);
 #endif
+	check_lock_count();
 
 	__asm__ __volatile__(
 		"# atomic down operation\n\t"
@@ -142,6 +143,7 @@
 #if WAITQUEUE_DEBUG
 	CHECK_MAGIC(sem->__magic);
 #endif
+	check_lock_count();
 
 	__asm__ __volatile__(
 		"# atomic interruptible down operation\n\t"
diff -x '*.o' -x '.*' -ru ../saffroy/linux-2.4.22-kdb-orig/include/asm-i386/smplock.h amate-linux-2.4.22/include/asm-i386/smplock.h
--- ../saffroy/linux-2.4.22-kdb-orig/include/asm-i386/smplock.h	Thu Nov 13 16:54:48 2003
+++ amate-linux-2.4.22/include/asm-i386/smplock.h	Thu Dec 11 15:02:50 2003
@@ -22,7 +22,7 @@
 #define release_kernel_lock(task, cpu) \
 do { \
 	if (task->lock_depth >= 0) \
-		spin_unlock(&kernel_flag); \
+		_raw_spin_unlock(&kernel_flag); \
 	release_irqlock(cpu); \
 	__sti(); \
 } while (0)
@@ -33,7 +33,7 @@
 #define reacquire_kernel_lock(task) \
 do { \
 	if (task->lock_depth >= 0) \
-		spin_lock(&kernel_flag); \
+		_raw_spin_lock(&kernel_flag); \
 } while (0)
 
 
@@ -48,7 +48,7 @@
 {
 #if 1
 	if (!++current->lock_depth)
-		spin_lock(&kernel_flag);
+		_raw_spin_lock(&kernel_flag);
 #else
 	__asm__ __volatile__(
 		"incl %1\n\t"
@@ -66,7 +66,7 @@
 		out_of_line_bug();
 #if 1
 	if (--current->lock_depth < 0)
-		spin_unlock(&kernel_flag);
+		_raw_spin_unlock(&kernel_flag);
 #else
 	__asm__ __volatile__(
 		"decl %1\n\t"
diff -x '*.o' -x '.*' -ru ../saffroy/linux-2.4.22-kdb-orig/include/asm-i386/spinlock.h amate-linux-2.4.22/include/asm-i386/spinlock.h
--- ../saffroy/linux-2.4.22-kdb-orig/include/asm-i386/spinlock.h	Thu Nov 13 16:54:48 2003
+++ amate-linux-2.4.22/include/asm-i386/spinlock.h	Thu Dec 11 15:02:50 2003
@@ -6,6 +6,9 @@
 #include <asm/page.h>
 #include <linux/config.h>
 
+#include <linux/cache.h> /* for __cache_aligned */
+#include <asm/system.h> /* for local_irq_* */
+
 extern int printk(const char * fmt, ...)
 	__attribute__ ((format (printf, 1, 2)));
 
@@ -23,12 +26,18 @@
  * Your basic SMP spinlocks, allowing only a single CPU anywhere
  */
 
-typedef struct {
+struct _spinlock {
 	volatile unsigned int lock;
 #if SPINLOCK_DEBUG
 	unsigned magic;
+	struct _spinlock *next;
+	char *file;
+	int line;
+	int cpu;
 #endif
-} spinlock_t;
+};
+
+typedef struct _spinlock spinlock_t;
 
 #define SPINLOCK_MAGIC	0xdead4ead
 
@@ -77,7 +86,7 @@
 		:"=m" (lock->lock) : : "memory"
 
 
-static inline void spin_unlock(spinlock_t *lock)
+static inline void _raw_spin_unlock(spinlock_t *lock)
 {
 #if SPINLOCK_DEBUG
 	if (lock->magic != SPINLOCK_MAGIC)
@@ -97,7 +106,7 @@
 		:"=q" (oldval), "=m" (lock->lock) \
 		:"0" (oldval) : "memory"
 
-static inline void spin_unlock(spinlock_t *lock)
+static inline void _raw_spin_unlock(spinlock_t *lock)
 {
 	char oldval = 1;
 #if SPINLOCK_DEBUG
@@ -113,7 +122,7 @@
 
 #endif
 
-static inline int spin_trylock(spinlock_t *lock)
+static inline int _raw_spin_trylock(spinlock_t *lock)
 {
 	char oldval;
 	__asm__ __volatile__(
@@ -123,7 +132,7 @@
 	return oldval > 0;
 }
 
-static inline void spin_lock(spinlock_t *lock)
+static inline void _raw_spin_lock(spinlock_t *lock)
 {
 #if SPINLOCK_DEBUG
 	__label__ here;
@@ -138,6 +147,106 @@
 		:"=m" (lock->lock) : : "memory");
 }
 
+#if SPINLOCK_DEBUG
+
+extern spinlock_t *lock_stack; /* start of spinlock "stack" */
+extern spinlock_t lock_stack_lock; /* spinlock for spinlock stack :-) */
+extern int cpu_lock_count[] __cacheline_aligned; /* per-cpu lock counter */
+
+#define NEED_PROCESSOR_ID 1
+extern int __processor_id(void);
+extern void bad_lock_count(char *file, int line);
+
+static inline void spin_unlock(spinlock_t *lock)
+{
+	spinlock_t **l;
+	unsigned long flags;
+	int cpu = __processor_id();
+
+	local_irq_save(flags);
+	_raw_spin_lock(&lock_stack_lock);
+	for(l = &lock_stack; *l != lock; l = &(*l)->next)
+		BUG_ON(*l == NULL);
+	*l = (*l)->next;
+	_raw_spin_unlock(&lock_stack_lock);
+	cpu_lock_count[cpu]--;
+	local_irq_restore(flags);
+
+	lock->next = NULL;
+	_raw_spin_unlock(lock);
+}
+
+static inline void _spin_lock_fl(spinlock_t *lock, char *file, int line)
+{
+	unsigned long flags;
+	int cpu = __processor_id();
+
+	_raw_spin_lock(lock);
+	BUG_ON(lock->next != NULL);
+	lock->file = file;
+	lock->line = line;
+	lock->cpu = cpu;
+
+	local_irq_save(flags);
+	cpu_lock_count[cpu]++;
+	_raw_spin_lock(&lock_stack_lock);
+	lock->next = lock_stack;
+	lock_stack = lock;
+	_raw_spin_unlock(&lock_stack_lock);
+	local_irq_restore(flags);
+}
+
+#define spin_lock(l) _spin_lock_fl(l, __FILE__, __LINE__)
+
+static inline int _spin_trylock_fl(spinlock_t *lock, char *file, int line)
+{
+	unsigned long flags;
+	int locked = _raw_spin_trylock(lock);
+	int cpu = __processor_id();
+
+	if(!locked)
+		return locked;
+
+	BUG_ON(lock->next != NULL);
+	lock->file = file;
+	lock->line = line;
+	lock->cpu = cpu;
+
+	local_irq_save(flags);
+	cpu_lock_count[cpu]++;
+	_raw_spin_lock(&lock_stack_lock);
+	lock->next = lock_stack;
+	lock_stack = lock;
+	_raw_spin_unlock(&lock_stack_lock);
+	local_irq_restore(flags);
+
+	return locked;
+}
+
+#define spin_trylock(l) _spin_trylock_fl(l, __FILE__, __LINE__)
+
+#define check_lock_count()					\
+do {								\
+	if(unlikely(cpu_lock_count[__processor_id()] != 0))	\
+		bad_lock_count(__FILE__, __LINE__);		\
+} while(0)
+
+#define check_fs_and_lock_count()		\
+do {						\
+	if(!segment_eq(get_fs(), KERNEL_DS))	\
+		check_lock_count();		\
+} while(0)
+
+#else /* !SPINLOCK_DEBUG */
+
+#define spin_unlock _raw_spin_unlock
+#define spin_lock _raw_spin_lock
+#define spin_trylock _raw_spin_trylock
+#define check_lock_count() do { } while(0)
+#define check_fs_and_lock_count() do { } while(0)
+
+#endif
+
 
 /*
  * Read-write spinlocks, allowing multiple readers
diff -x '*.o' -x '.*' -ru ../saffroy/linux-2.4.22-kdb-orig/include/asm-i386/uaccess.h amate-linux-2.4.22/include/asm-i386/uaccess.h
--- ../saffroy/linux-2.4.22-kdb-orig/include/asm-i386/uaccess.h	Thu Nov 13 16:54:48 2003
+++ amate-linux-2.4.22/include/asm-i386/uaccess.h	Thu Dec 11 15:02:51 2003
@@ -174,6 +174,7 @@
  */
 #define get_user(x,ptr)							\
 ({	int __ret_gu,__val_gu;						\
+	check_fs_and_lock_count();					\
 	switch(sizeof (*(ptr))) {					\
 	case 1:  __get_user_x(1,__ret_gu,__val_gu,ptr); break;		\
 	case 2:  __get_user_x(2,__ret_gu,__val_gu,ptr); break;		\
@@ -209,7 +210,8 @@
  * Returns zero on success, or -EFAULT on error.
  */
 #define put_user(x,ptr)							\
-  __put_user_check((__typeof__(*(ptr)))(x),(ptr),sizeof(*(ptr)))
+({	check_fs_and_lock_count();					\
+  __put_user_check((__typeof__(*(ptr)))(x),(ptr),sizeof(*(ptr))); })
 
 
 /**
@@ -232,8 +234,9 @@
  * Returns zero on success, or -EFAULT on error.
  * On error, the variable @x is set to zero.
  */
-#define __get_user(x,ptr) \
-  __get_user_nocheck((x),(ptr),sizeof(*(ptr)))
+#define __get_user(x,ptr)				\
+({	check_fs_and_lock_count();			\
+  __get_user_nocheck((x),(ptr),sizeof(*(ptr))); })
 
 
 /**
@@ -255,8 +258,9 @@
  *
  * Returns zero on success, or -EFAULT on error.
  */
-#define __put_user(x,ptr) \
-  __put_user_nocheck((__typeof__(*(ptr)))(x),(ptr),sizeof(*(ptr)))
+#define __put_user(x,ptr)						\
+({	check_fs_and_lock_count();					\
+  __put_user_nocheck((__typeof__(*(ptr)))(x),(ptr),sizeof(*(ptr))); })
 
 #define __put_user_nocheck(x,ptr,size)			\
 ({							\
@@ -709,9 +713,10 @@
  * On success, this will be zero.
  */
 #define copy_to_user(to,from,n)				\
-	(__builtin_constant_p(n) ?			\
-	 __constant_copy_to_user((to),(from),(n)) :	\
-	 __generic_copy_to_user((to),(from),(n)))
+	({ check_fs_and_lock_count();			\
+	   __builtin_constant_p(n) ?			\
+	   __constant_copy_to_user((to),(from),(n)) :	\
+	   __generic_copy_to_user((to),(from),(n)); })
 
 /**
  * copy_from_user: - Copy a block of data from user space.
@@ -730,9 +735,10 @@
  * data to the requested size using zero bytes.
  */
 #define copy_from_user(to,from,n)			\
-	(__builtin_constant_p(n) ?			\
-	 __constant_copy_from_user((to),(from),(n)) :	\
-	 __generic_copy_from_user((to),(from),(n)))
+	({ check_fs_and_lock_count();			\
+	   __builtin_constant_p(n) ?			\
+	   __constant_copy_from_user((to),(from),(n)) :	\
+	   __generic_copy_from_user((to),(from),(n)); })
 
 /**
  * __copy_to_user: - Copy a block of data into user space, with less checking.
@@ -748,10 +754,11 @@
  * Returns number of bytes that could not be copied.
  * On success, this will be zero.
  */
-#define __copy_to_user(to,from,n)			\
-	(__builtin_constant_p(n) ?			\
-	 __constant_copy_to_user_nocheck((to),(from),(n)) :	\
-	 __generic_copy_to_user_nocheck((to),(from),(n)))
+#define __copy_to_user(to,from,n)				\
+	({ check_fs_and_lock_count();				\
+	   __builtin_constant_p(n) ?				\
+	   __constant_copy_to_user_nocheck((to),(from),(n)) :	\
+	   __generic_copy_to_user_nocheck((to),(from),(n)); })
 
 /**
  * __copy_from_user: - Copy a block of data from user space, with less checking.
@@ -770,10 +777,11 @@
  * If some data could not be copied, this function will pad the copied
  * data to the requested size using zero bytes.
  */
-#define __copy_from_user(to,from,n)			\
-	(__builtin_constant_p(n) ?			\
-	 __constant_copy_from_user_nocheck((to),(from),(n)) :	\
-	 __generic_copy_from_user_nocheck((to),(from),(n)))
+#define __copy_from_user(to,from,n)					\
+	({ check_fs_and_lock_count();					\
+	   __builtin_constant_p(n) ?					\
+	   __constant_copy_from_user_nocheck((to),(from),(n)) :		\
+	   __generic_copy_from_user_nocheck((to),(from),(n)); })
 
 long strncpy_from_user(char *dst, const char *src, long count);
 long __strncpy_from_user(char *dst, const char *src, long count);
diff -x '*.o' -x '.*' -ru ../saffroy/linux-2.4.22-kdb-orig/kernel/ksyms.c amate-linux-2.4.22/kernel/ksyms.c
--- ../saffroy/linux-2.4.22-kdb-orig/kernel/ksyms.c	Wed Aug 27 16:27:01 2003
+++ amate-linux-2.4.22/kernel/ksyms.c	Thu Dec 11 14:49:59 2003
@@ -609,6 +609,15 @@
 /* debug */
 EXPORT_SYMBOL(dump_stack);
 
+/* spinlock debug */
+#ifdef CONFIG_DEBUG_SPINLOCK
+EXPORT_SYMBOL(lock_stack);
+EXPORT_SYMBOL(lock_stack_lock);
+EXPORT_SYMBOL(__processor_id);
+EXPORT_SYMBOL(cpu_lock_count);
+EXPORT_SYMBOL(bad_lock_count);
+#endif
+
 /* To match ksyms with System.map */
 extern const char _end[];
 EXPORT_SYMBOL(_end);
diff -x '*.o' -x '.*' -ru ../saffroy/linux-2.4.22-kdb-orig/kernel/sched.c amate-linux-2.4.22/kernel/sched.c
--- ../saffroy/linux-2.4.22-kdb-orig/kernel/sched.c	Wed Aug 27 16:27:01 2003
+++ amate-linux-2.4.22/kernel/sched.c	Fri Dec 12 12:18:13 2003
@@ -125,6 +125,71 @@
 
 #endif
 
+#if SPINLOCK_DEBUG
+
+spinlock_t *lock_stack = NULL; /* start of spinlock "stack" */
+spinlock_t lock_stack_lock = SPIN_LOCK_UNLOCKED; /* spinlock for spinlock stack :-) */
+int cpu_lock_count[NR_CPUS] __cacheline_aligned = { }; /* per-cpu lock counter */
+
+#ifdef NEED_PROCESSOR_ID /* see <asm-i386/spinlock.h> */
+int __processor_id(void)
+{
+	return current->processor;
+}
+#endif
+
+void bad_lock_count(char *file, int line)
+{
+	int cpu = current->processor;
+	int count = cpu_lock_count[cpu];
+	spinlock_t **l;
+	unsigned long flags;
+	static char buf[1024];
+	int buflen = 0;
+	static int entered = 0;
+
+	if(oops_in_progress)
+		/* avoid false bug reports while in BUG() */
+		return;
+	if(entered)
+		/* one report is ok, more is hard to handle gracefully */
+		return;
+	entered = 1;
+
+	printk("Scheduling on CPU #%d with lock count %d at %s:%d!\n",
+	       cpu, count, file, line);
+
+	/* show info on held spinlocks */
+	local_irq_save(flags);
+	_raw_spin_lock(&lock_stack_lock);
+	for(l = &lock_stack; *l != NULL; ) {
+		/* cannot call printk here, since it calls spinlock
+		   => we would deadlock on lock_stack_lock */
+		buflen += snprintf(&buf[buflen], sizeof(buf)-buflen,
+				   "Lock 0x%p owned on CPU #%d by %s:%d%s.\n",
+				   *l, (*l)->cpu, (*l)->file, (*l)->line,
+				   ((*l)->cpu == cpu ? ", releasing" : ""));
+		if((*l)->cpu == cpu) {
+			/* release this lock */
+			spinlock_t *lock = *l;
+			*l = (*l)->next;
+			lock->next = NULL;
+			cpu_lock_count[cpu]--;
+			_raw_spin_unlock(lock);
+		} else {
+			l = &(*l)->next;
+		}
+	}
+	_raw_spin_unlock(&lock_stack_lock);
+	local_irq_restore(flags);
+	buf[buflen] = '\0';
+	if(buflen)
+		printk(&buf[0]);
+	BUG();
+}
+
+#endif /* SPINLOCK_DEBUG */
+
 void scheduling_functions_start_here(void) { }
 
 /*
@@ -564,6 +629,8 @@
 		BUG();
 	}
 
+	check_lock_count();
+
 	release_kernel_lock(prev, this_cpu);
 
 	/*
diff -x '*.o' -x '.*' -ru ../saffroy/linux-2.4.22-kdb-orig/mm/slab.c amate-linux-2.4.22/mm/slab.c
--- ../saffroy/linux-2.4.22-kdb-orig/mm/slab.c	Fri Jun 13 16:51:39 2003
+++ amate-linux-2.4.22/mm/slab.c	Tue Jan  6 14:58:20 2004
@@ -1237,6 +1237,8 @@
 		if (cachep->gfpflags & GFP_DMA)
 			BUG();
 	}
+	if (flags & __GFP_WAIT)
+		check_lock_count();
 }
 
 static inline void * kmem_cache_alloc_one_tail (kmem_cache_t *cachep,
