Re: [patch 00/12] Slab defragmentation V3

2007-06-08 Thread Christoph Lameter
On Fri, 8 Jun 2007, Michal Piotrowski wrote:

> > Could you remove the trylock patch and see how this one fares? We may need
> > both but this should avoid taking the slub_lock around any possible alloc of
> > sysfs.
> It's a bit tricky

Hmmm... Yes that version was aginst 4-mm1 instead after the defrag 
patchset. The difference is only the "ops" parameter...

Rediff to apply after defrag patchset.

SLUB: Move sysfs operations outside of slub_lock

Sysfs can do a gazillion things when called. Make sure that we do
not call any sysfs functions while holding the slub_lock. Let sysfs
fend for itself locking wise.

Just protect the essentials: The modifications to the slab lists
and the ref counters of the slabs.

Signed-off-by: Christoph Lameter <[EMAIL PROTECTED]>

---
 mm/slub.c |   34 +-
 1 file changed, 21 insertions(+), 13 deletions(-)

Index: slub/mm/slub.c
===
--- slub.orig/mm/slub.c 2007-06-08 13:47:32.0 -0700
+++ slub/mm/slub.c  2007-06-08 13:48:07.0 -0700
@@ -2193,12 +2193,13 @@ void kmem_cache_destroy(struct kmem_cach
s->refcount--;
if (!s->refcount) {
list_del(>list);
+   up_write(_lock);
if (kmem_cache_close(s))
WARN_ON(1);
sysfs_slab_remove(s);
kfree(s);
-   }
-   up_write(_lock);
+   } else
+   up_write(_lock);
 }
 EXPORT_SYMBOL(kmem_cache_destroy);
 
@@ -2956,26 +2957,33 @@ struct kmem_cache *kmem_cache_create(con
 */
s->objsize = max(s->objsize, (int)size);
s->inuse = max_t(int, s->inuse, ALIGN(size, sizeof(void *)));
+   up_write(_lock);
+
if (sysfs_slab_alias(s, name))
goto err;
-   } else {
-   s = kmalloc(kmem_size, GFP_KERNEL);
-   if (s && kmem_cache_open(s, GFP_KERNEL, name,
+
+   return s;
+   }
+
+   s = kmalloc(kmem_size, GFP_KERNEL);
+   if (s) {
+   if (kmem_cache_open(s, GFP_KERNEL, name,
size, align, flags, ctor, ops)) {
-   if (sysfs_slab_add(s)) {
-   kfree(s);
-   goto err;
-   }
list_add(>list, _caches);
+   up_write(_lock);
raise_kswapd_order(s->order);
-   } else
-   kfree(s);
+
+   if (sysfs_slab_add(s))
+   goto err;
+
+   return s;
+
+   }
+   kfree(s);
}
up_write(_lock);
-   return s;
 
 err:
-   up_write(_lock);
if (flags & SLAB_PANIC)
panic("Cannot create slabcache %s\n", name);
else
-
To unsubscribe from this list: send the line "unsubscribe linux-kernel" in
the body of a message to [EMAIL PROTECTED]
More majordomo info at  http://vger.kernel.org/majordomo-info.html
Please read the FAQ at  http://www.tux.org/lkml/


Re: [patch 00/12] Slab defragmentation V3

2007-06-08 Thread Michal Piotrowski

Christoph Lameter pisze:

On Fri, 8 Jun 2007, Christoph Lameter wrote:


On Fri, 8 Jun 2007, Michal Piotrowski wrote:


Yes, it does. Thanks!
Ahhh... That leds to the discovery more sysfs problems. I need to make 
sure not to be holding locks while calling into sysfs. More cleanup...


Could you remove the trylock patch and see how this one fares? We may need 
both but this should avoid taking the slub_lock around any possible alloc 
of sysfs.





It's a bit tricky

cat ../sd2.patch | patch -p1
patching file mm/slub.c
Hunk #1 succeeded at 2194 (offset 15 lines).
Hunk #2 FAILED at 2653.
1 out of 2 hunks FAILED -- saving rejects to file mm/slub.c.rej
[EMAIL PROTECTED] linux-work3]$ cat mm/slub.c.rej
***
*** 2652,2677 
*/
   s->objsize = max(s->objsize, (int)size);
   s->inuse = max_t(int, s->inuse, ALIGN(size, sizeof(void *)));
   if (sysfs_slab_alias(s, name))
   goto err;
-   } else {
-   s = kmalloc(kmem_size, GFP_KERNEL);
-   if (s && kmem_cache_open(s, GFP_KERNEL, name,
   size, align, flags, ctor)) {
-   if (sysfs_slab_add(s)) {
-   kfree(s);
-   goto err;
-   }
   list_add(>list, _caches);
   raise_kswapd_order(s->order);
-   } else
-   kfree(s);
   }
   up_write(_lock);
-   return s;

 err:
-   up_write(_lock);
   if (flags & SLAB_PANIC)
   panic("Cannot create slabcache %s\n", name);
   else
--- 2653,2685 
*/
   s->objsize = max(s->objsize, (int)size);
   s->inuse = max_t(int, s->inuse, ALIGN(size, sizeof(void *)));
+   up_write(_lock);
+
   if (sysfs_slab_alias(s, name))
   goto err;
+
+   return s;
+   }
+
+   s = kmalloc(kmem_size, GFP_KERNEL);
+   if (s) {
+   if (kmem_cache_open(s, GFP_KERNEL, name,
   size, align, flags, ctor)) {
   list_add(>list, _caches);
+   up_write(_lock);
   raise_kswapd_order(s->order);
+
+   if (sysfs_slab_add(s))
+   goto err;
+
+   return s;
+
+   }
+   kfree(s);
   }
   up_write(_lock);

 err:
   if (flags & SLAB_PANIC)
   panic("Cannot create slabcache %s\n", name);
   else

Regards,
Michal

--
"Najbardziej brakowało mi twojego milczenia."
-- Andrzej Sapkowski "Coś więcej"
-
To unsubscribe from this list: send the line "unsubscribe linux-kernel" in
the body of a message to [EMAIL PROTECTED]
More majordomo info at  http://vger.kernel.org/majordomo-info.html
Please read the FAQ at  http://www.tux.org/lkml/


Re: [patch 00/12] Slab defragmentation V3

2007-06-08 Thread Christoph Lameter
On Fri, 8 Jun 2007, Christoph Lameter wrote:

> On Fri, 8 Jun 2007, Michal Piotrowski wrote:
> 
> > Yes, it does. Thanks!
> 
> Ahhh... That leds to the discovery more sysfs problems. I need to make 
> sure not to be holding locks while calling into sysfs. More cleanup...

Could you remove the trylock patch and see how this one fares? We may need 
both but this should avoid taking the slub_lock around any possible alloc 
of sysfs.


SLUB: Move sysfs operations outside of slub_lock

Sysfs can do a gazillion things when called. Make sure that we do
not call any sysfs functions while holding the slub_lock. Let sysfs
fend for itself locking wise.

Just protect the essentials: The modifications to the slab lists
and the ref counters of the slabs.

Signed-off-by: Christoph Lameter <[EMAIL PROTECTED]>

---
 mm/slub.c |   34 +-
 1 file changed, 21 insertions(+), 13 deletions(-)

Index: slub/mm/slub.c
===
--- slub.orig/mm/slub.c 2007-06-08 12:21:56.0 -0700
+++ slub/mm/slub.c  2007-06-08 12:30:23.0 -0700
@@ -2179,12 +2179,13 @@ void kmem_cache_destroy(struct kmem_cach
s->refcount--;
if (!s->refcount) {
list_del(>list);
+   up_write(_lock);
if (kmem_cache_close(s))
WARN_ON(1);
sysfs_slab_remove(s);
kfree(s);
-   }
-   up_write(_lock);
+   } else
+   up_write(_lock);
 }
 EXPORT_SYMBOL(kmem_cache_destroy);
 
@@ -2637,26 +2638,33 @@ struct kmem_cache *kmem_cache_create(con
 */
s->objsize = max(s->objsize, (int)size);
s->inuse = max_t(int, s->inuse, ALIGN(size, sizeof(void *)));
+   up_write(_lock);
+
if (sysfs_slab_alias(s, name))
goto err;
-   } else {
-   s = kmalloc(kmem_size, GFP_KERNEL);
-   if (s && kmem_cache_open(s, GFP_KERNEL, name,
+
+   return s;
+   }
+
+   s = kmalloc(kmem_size, GFP_KERNEL);
+   if (s) {
+   if (kmem_cache_open(s, GFP_KERNEL, name,
size, align, flags, ctor)) {
-   if (sysfs_slab_add(s)) {
-   kfree(s);
-   goto err;
-   }
list_add(>list, _caches);
+   up_write(_lock);
raise_kswapd_order(s->order);
-   } else
-   kfree(s);
+
+   if (sysfs_slab_add(s))
+   goto err;
+
+   return s;
+
+   }
+   kfree(s);
}
up_write(_lock);
-   return s;
 
 err:
-   up_write(_lock);
if (flags & SLAB_PANIC)
panic("Cannot create slabcache %s\n", name);
else
-
To unsubscribe from this list: send the line "unsubscribe linux-kernel" in
the body of a message to [EMAIL PROTECTED]
More majordomo info at  http://vger.kernel.org/majordomo-info.html
Please read the FAQ at  http://www.tux.org/lkml/


Re: [patch 00/12] Slab defragmentation V3

2007-06-08 Thread Christoph Lameter
On Fri, 8 Jun 2007, Michal Piotrowski wrote:

> 0xc1081630 is in list_locations (mm/slub.c:3388).
> 3383struct page *page;
> 3384
> 3385if (!atomic_read(>nr_slabs))
> 3386continue;
> 3387
> 3388spin_lock_irqsave(>list_lock, flags);
> 3389list_for_each_entry(page, >partial, lru)
> 3390process_slab(, s, page, alloc);
> 3391list_for_each_entry(page, >full, lru)
> 3392process_slab(, s, page, alloc);


Yes process slab needs some temporary data to generate the lists of 
functions calling etc and that is a GFP_TEMPORARY alloc.

Does this fix it?

---
 mm/slub.c |2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

Index: slub/mm/slub.c
===
--- slub.orig/mm/slub.c 2007-06-08 12:35:56.0 -0700
+++ slub/mm/slub.c  2007-06-08 12:37:32.0 -0700
@@ -2930,7 +2930,7 @@ static int alloc_loc_track(struct loc_tr
 
order = get_order(sizeof(struct location) * max);
 
-   l = (void *)__get_free_pages(GFP_TEMPORARY, order);
+   l = (void *)__get_free_pages(GFP_ATOMIC, order);
 
if (!l)
return 0;
-
To unsubscribe from this list: send the line "unsubscribe linux-kernel" in
the body of a message to [EMAIL PROTECTED]
More majordomo info at  http://vger.kernel.org/majordomo-info.html
Please read the FAQ at  http://www.tux.org/lkml/


Re: [patch 00/12] Slab defragmentation V3

2007-06-08 Thread Michal Piotrowski

Christoph Lameter pisze:

On Fri, 8 Jun 2007, Michal Piotrowski wrote:


Yes, it does. Thanks!


Ahhh... That leds to the discovery more sysfs problems. I need to make 
sure not to be holding locks while calling into sysfs. More cleanup...





sysfs... I forgot about my sysfs test case

#! /bin/sh

for i in `find /sys/ -type f`
do
   echo "wyświetlam $i"
   sudo cat $i > /dev/null
#sleep 1s
done

[ 2816.175573] BUG: sleeping function called from invalid context at 
mm/page_alloc.c:1547
[ 2816.183578] in_atomic():1, irqs_disabled():1
[ 2816.187946] 1 lock held by cat/12586:
[ 2816.191705]  #0:  (>list_lock){++..}, at: [] 
list_locations+0x3d/0x26b

l *list_locations+0x3d
0xc1081630 is in list_locations (mm/slub.c:3388).
3383struct page *page;
3384
3385if (!atomic_read(>nr_slabs))
3386continue;
3387
3388spin_lock_irqsave(>list_lock, flags);
3389list_for_each_entry(page, >partial, lru)
3390process_slab(, s, page, alloc);
3391list_for_each_entry(page, >full, lru)
3392process_slab(, s, page, alloc);


[ 2816.199571] irq event stamp: 11526
[ 2816.203054] hardirqs last  enabled at (11525): [] 
on_each_cpu+0x3b/0x71
[ 2816.210689] hardirqs last disabled at (11526): [] 
_spin_lock_irqsave+0x13/0x6e
[ 2816.218910] softirqs last  enabled at (11236): [] 
__do_softirq+0xdf/0xe5
[ 2816.226635] softirqs last disabled at (11229): [] 
do_softirq+0x68/0x11f

l *on_each_cpu+0x3b
0xc102adbd is in on_each_cpu (include/asm/irqflags.h:36).
31  asm volatile("cli": : :"memory");
32  }
33
34  static inline void native_irq_enable(void)
35  {
36  asm volatile("sti": : :"memory");
37  }
38
39  static inline void native_safe_halt(void)
40  {

l *_spin_lock_irqsave+0x13
0xc125d241 is in _spin_lock_irqsave (kernel/spinlock.c:84).
79  unsigned long __lockfunc _spin_lock_irqsave(spinlock_t *lock)
80  {
81  unsigned long flags;
82
83  local_irq_save(flags);
84  preempt_disable();
85  spin_acquire(>dep_map, 0, 0, _RET_IP_);
86  /*
87   * On lockdep we dont want the hand-coded irq-enable of
88   * _raw_spin_lock_flags() code, because lockdep assumes

l *__do_softirq+0xdf
0xc102b5dd is in __do_softirq (kernel/softirq.c:252).
247
248 trace_softirq_exit();
249
250 account_system_vtime(current);
251 _local_bh_enable();
252 }
253
254 #ifndef __ARCH_HAS_DO_SOFTIRQ
255
256 asmlinkage void do_softirq(void)

l *do_softirq+0x68
0xc1006d65 is in do_softirq (arch/i386/kernel/irq.c:222).
217 irqctx->tinfo.previous_esp = current_stack_pointer;
218
219 /* build the stack frame on the softirq stack */
220 isp = (u32*) ((char*)irqctx + sizeof(*irqctx));
221
222 asm volatile(
223 "   xchgl   %%ebx,%%esp \n"
224 "   call__do_softirq\n"
225 "   movl%%ebx,%%esp \n"
226 : "=b"(isp)


[ 2816.234235]  [] dump_trace+0x63/0x1eb
[ 2816.23]  [] show_trace_log_lvl+0x1a/0x2f
[ 2816.244211]  [] show_trace+0x12/0x14
[ 2816.248757]  [] dump_stack+0x16/0x18
[ 2816.253288]  [] __might_sleep+0xce/0xd5
[ 2816.258046]  [] __alloc_pages+0x33/0x324
[ 2816.262968]  [] __get_free_pages+0x55/0x66
[ 2816.268060]  [] process_slab+0x1bd/0x299
[ 2816.272988]  [] list_locations+0x57/0x26b
[ 2816.277981]  [] free_calls_show+0x22/0x29
[ 2816.282965]  [] slab_attr_show+0x1c/0x20
[ 2816.287891]  [] sysfs_read_file+0x94/0x105
[ 2816.293018]  [] vfs_read+0xcf/0x158
[ 2816.297539]  [] sys_read+0x3d/0x72
[ 2816.301910]  [] syscall_call+0x7/0xb
[ 2816.306486]  [] 0xb7f30410
[ 2816.310165]  ===
[ 2818.826341] BUG: sleeping function called from invalid context at 
mm/page_alloc.c:1547
[ 2818.834388] in_atomic():1, irqs_disabled():1
[ 2818.838751] 1 lock held by cat/12635:
[ 2818.842506]  #0:  (>list_lock){++..}, at: [] 
list_locations+0x3d/0x26b
[ 2818.850460] irq event stamp: 11494
[ 2818.853908] hardirqs last  enabled at (11493): [] 
on_each_cpu+0x3b/0x71
[ 2818.861505] hardirqs last disabled at (11494): [] 
_spin_lock_irqsave+0x13/0x6e
[ 2818.869831] softirqs last  enabled at (11258): [] 
__do_softirq+0xdf/0xe5
[ 2818.877576] softirqs last disabled at (11215): [] 
do_softirq+0x68/0x11f
[ 2818.885217]  [] dump_trace+0x63/0x1eb
[ 2818.889893]  [] show_trace_log_lvl+0x1a/0x2f
[ 2818.895112]  [] show_trace+0x12/0x14
[ 2818.899667]  [] dump_stack+0x16/0x18
[ 2818.904232]  [] __might_sleep+0xce/0xd5
[ 2818.909046]  [] __alloc_pages+0x33/0x324
[ 2818.913956]  [] __get_free_pages+0x55/0x66
[ 2818.919022]  [] process_slab+0x1bd/0x299
[ 2818.923923]  [] list_locations+0x57/0x26b
[ 

Re: [patch 00/12] Slab defragmentation V3

2007-06-08 Thread Christoph Lameter
On Fri, 8 Jun 2007, Michal Piotrowski wrote:

> Yes, it does. Thanks!

Ahhh... That leds to the discovery more sysfs problems. I need to make 
sure not to be holding locks while calling into sysfs. More cleanup...

-
To unsubscribe from this list: send the line "unsubscribe linux-kernel" in
the body of a message to [EMAIL PROTECTED]
More majordomo info at  http://vger.kernel.org/majordomo-info.html
Please read the FAQ at  http://www.tux.org/lkml/


Re: [patch 00/12] Slab defragmentation V3

2007-06-08 Thread Christoph Lameter
On Fri, 8 Jun 2007, Michal Piotrowski wrote:

> bash shared mapping + your script in a loop
> while true;  do sudo ./run.sh; done > res3.txt

H... Seems to be triggered from the reclaim path kmem_cache_defrag 
rather than the manual triggered one from the script. Taking the slub_lock 
on the reclaim path is an issue it seems.

Maybe we need to do a trylock in kmem_cache_defrag to defuse the 
situation? This is after all an optimization so we can bug out.

Does this fix it?

---
 mm/slub.c |4 +++-
 1 file changed, 3 insertions(+), 1 deletion(-)

Index: slub/mm/slub.c
===
--- slub.orig/mm/slub.c 2007-06-08 11:12:40.0 -0700
+++ slub/mm/slub.c  2007-06-08 11:14:34.0 -0700
@@ -2738,7 +2738,9 @@ int kmem_cache_defrag(int percent, int n
unsigned long pages = 0;
void *scratch;
 
-   down_read(_lock);
+   if (!down_read_trylock(_lock))
+   return 0;
+
list_for_each_entry(s, _caches, list) {
 
/*


-
To unsubscribe from this list: send the line "unsubscribe linux-kernel" in
the body of a message to [EMAIL PROTECTED]
More majordomo info at  http://vger.kernel.org/majordomo-info.html
Please read the FAQ at  http://www.tux.org/lkml/


Re: [patch 00/12] Slab defragmentation V3

2007-06-08 Thread Michal Piotrowski

bash shared mapping + your script in a loop
while true;  do sudo ./run.sh; done > res3.txt


[ 2866.154597] ===
[ 2866.162384] [ INFO: possible circular locking dependency detected ]
[ 2866.168698] 2.6.22-rc4-mm2 #1
[ 2866.171671] ---
[ 2866.177972] bash-shared-map/3245 is trying to acquire lock:
[ 2866.183566]  (slub_lock){}, at: [] kmem_cache_defrag+0x18/0xb3

l *kmem_cache_defrag+0x18
0xc1082510 is in kmem_cache_defrag (mm/slub.c:2742).
2737struct kmem_cache *s;
2738unsigned long pages = 0;
2739void *scratch;
2740
2741down_read(_lock);
2742list_for_each_entry(s, _caches, list) {
2743
2744/*
2745 * The slab cache must have defrag methods.
2746 */


[ 2866.190800] 
[ 2866.190801] but task is already holding lock:

[ 2866.196746]  (>i_alloc_sem){--..}, at: [] 
notify_change+0xdf/0x2ec

l *notify_change+0xdf
0xc1098b07 is in notify_change (fs/attr.c:145).
140 return 0;
141
142 if (ia_valid & ATTR_SIZE)
143 down_write(>d_inode->i_alloc_sem);
144
145 if (inode->i_op && inode->i_op->setattr) {
146 error = security_inode_setattr(dentry, attr);
147 if (!error)
148 error = inode->i_op->setattr(dentry, attr);
149 } else {


[ 2866.204761] 
[ 2866.204762] which lock already depends on the new lock.
[ 2866.204764] 
[ 2866.213058] 
[ 2866.213060] the existing dependency chain (in reverse order) is:
[ 2866.220630] 
[ 2866.220631] -> #2 (>i_alloc_sem){--..}:

[ 2866.226784][] add_lock_to_list+0x67/0x8b
[ 2866.232525][] __lock_acquire+0xb02/0xd36
[ 2866.238315][] lock_acquire+0x9e/0xb8
[ 2866.243702][] down_write+0x3e/0x77
[ 2866.248914][] notify_change+0xdf/0x2ec
[ 2866.254542][] do_truncate+0x60/0x79
[ 2866.259927][] may_open+0x1db/0x240
[ 2866.265165][] open_namei+0x2d6/0x6bb
[ 2866.270602][] do_filp_open+0x26/0x3b
[ 2866.275996][] do_sys_open+0x5d/0xed
[ 2866.281382][] sys_open+0x1c/0x1e
[ 2866.286508][] sysenter_past_esp+0x5f/0x99
[ 2866.292428][] 0xb7f9d410
[ 2866.296819][] 0x
[ 2866.301177] 
[ 2866.301178] -> #1 (_inode_imutex_key){--..}:

[ 2866.307632][] add_lock_to_list+0x67/0x8b
[ 2866.313425][] __lock_acquire+0xb02/0xd36
[ 2866.319164][] lock_acquire+0x9e/0xb8
[ 2866.324576][] __mutex_lock_slowpath+0x107/0x369
[ 2866.331008][] mutex_lock+0x1c/0x1f
[ 2866.336314][] create_dir+0x1e/0x1c2
[ 2866.341682][] sysfs_create_dir+0x60/0x7b
[ 2866.347396][] kobject_shadow_add+0xd7/0x189
[ 2866.353499][] kobject_add+0xa/0xc
[ 2866.358685][] sysfs_slab_add+0x10c/0x152
[ 2866.364374][] kmem_cache_create+0x13a/0x1d4
[ 2866.370442][] fasync_init+0x2e/0x37
[ 2866.375818][] kernel_init+0x14e/0x2bf
[ 2866.381351][] kernel_thread_helper+0x7/0x10
[ 2866.387419][] 0x
[ 2866.391843] 
[ 2866.391845] -> #0 (slub_lock){}:

[ 2866.397022][] print_circular_bug_tail+0x2e/0x68
[ 2866.403359][] __lock_acquire+0x9ee/0xd36
[ 2866.409080][] lock_acquire+0x9e/0xb8
[ 2866.414466][] down_read+0x3d/0x74
[ 2866.419635][] kmem_cache_defrag+0x18/0xb3
[ 2866.425540][] shrink_slab+0x1ca/0x1d5
[ 2866.431002][] try_to_free_pages+0x178/0x224
[ 2866.437044][] __alloc_pages+0x1cd/0x324
[ 2866.442794][] find_or_create_page+0x5c/0xa6
[ 2866.448817][] ext3_truncate+0xbb/0x83b
[ 2866.454411][] vmtruncate+0x11a/0x140
[ 2866.459762][] inode_setattr+0x5c/0x137
[ 2866.465286][] ext3_setattr+0x19c/0x1f8
[ 2866.470835][] notify_change+0x139/0x2ec
[ 2866.476514][] do_truncate+0x60/0x79
[ 2866.481822][] do_sys_ftruncate+0x135/0x150
[ 2866.487778][] sys_ftruncate64+0x1b/0x1d
[ 2866.493405][] syscall_call+0x7/0xb
[ 2866.498599][] 0xb7f10410
[ 2866.502913][] 0x
[ 2866.507201] 
[ 2866.507203] other info that might help us debug this:
[ 2866.507204] 
[ 2866.515363] 2 locks held by bash-shared-map/3245:

[ 2866.520151]  #0:  (>i_mutex){--..}, at: [] 
mutex_lock+0x1c/0x1f
[ 2866.527826]  #1:  (>i_alloc_sem){--..}, at: [] 
notify_change+0xdf/0x2ec
[ 2866.536158] 
[ 2866.536160] stack backtrace:

[ 2866.540597]  [] dump_trace+0x63/0x1eb
[ 2866.545187]  [] show_trace_log_lvl+0x1a/0x2f
[ 2866.550426]  [] show_trace+0x12/0x14
[ 2866.555005]  [] dump_stack+0x16/0x18
[ 2866.559552]  [] print_circular_bug_tail+0x5f/0x68
[ 2866.565216]  [] __lock_acquire+0x9ee/0xd36
[ 2866.570264]  [] lock_acquire+0x9e/0xb8
[ 2866.574991]  [] down_read+0x3d/0x74
[ 2866.579487]  [] kmem_cache_defrag+0x18/0xb3
[ 

Re: [patch 00/12] Slab defragmentation V3

2007-06-08 Thread Christoph Lameter
On Fri, 8 Jun 2007, Michal Piotrowski wrote:

> Hi Christoph,
> 
> On 07/06/07, [EMAIL PROTECTED] <[EMAIL PROTECTED]> wrote:
> > Will show up shortly at
> http://ftp.kernel.org/pub/linux/kernel/people/christoph/slab-defrag/
> 
> I tried to apply this patchset, but without success. I tried
> 2.6.22-rc4-mm2, 2.6.22-rc4, 2.6.22-rc4-git2, 2.6.22-rc3...

Yeah its against 2.6.22-rc4-mm1 and 2.6.22-rc4-mm2 changes kernel/sysctl.c 
so that the defrag trigger patch fails. Sigh.

I added kernel versions below slab-defrag so that you can find the correct 
version for your kernel.
-
To unsubscribe from this list: send the line "unsubscribe linux-kernel" in
the body of a message to [EMAIL PROTECTED]
More majordomo info at  http://vger.kernel.org/majordomo-info.html
Please read the FAQ at  http://www.tux.org/lkml/


Re: [patch 00/12] Slab defragmentation V3

2007-06-08 Thread Christoph Lameter
On Fri, 8 Jun 2007, Michal Piotrowski wrote:

> Hi Christoph,
> 
> On 07/06/07, [EMAIL PROTECTED] <[EMAIL PROTECTED]> wrote:
> > Will show up shortly at
> http://ftp.kernel.org/pub/linux/kernel/people/christoph/slab-defrag/
> 
> I tried to apply this patchset, but without success. I tried
> 2.6.22-rc4-mm2, 2.6.22-rc4, 2.6.22-rc4-git2, 2.6.22-rc3...

What was the problem?

-
To unsubscribe from this list: send the line "unsubscribe linux-kernel" in
the body of a message to [EMAIL PROTECTED]
More majordomo info at  http://vger.kernel.org/majordomo-info.html
Please read the FAQ at  http://www.tux.org/lkml/


Re: [patch 00/12] Slab defragmentation V3

2007-06-08 Thread Christoph Lameter
On Fri, 8 Jun 2007, Michal Piotrowski wrote:

 Hi Christoph,
 
 On 07/06/07, [EMAIL PROTECTED] [EMAIL PROTECTED] wrote:
  Will show up shortly at
 http://ftp.kernel.org/pub/linux/kernel/people/christoph/slab-defrag/
 
 I tried to apply this patchset, but without success. I tried
 2.6.22-rc4-mm2, 2.6.22-rc4, 2.6.22-rc4-git2, 2.6.22-rc3...

What was the problem?

-
To unsubscribe from this list: send the line unsubscribe linux-kernel in
the body of a message to [EMAIL PROTECTED]
More majordomo info at  http://vger.kernel.org/majordomo-info.html
Please read the FAQ at  http://www.tux.org/lkml/


Re: [patch 00/12] Slab defragmentation V3

2007-06-08 Thread Christoph Lameter
On Fri, 8 Jun 2007, Michal Piotrowski wrote:

 Hi Christoph,
 
 On 07/06/07, [EMAIL PROTECTED] [EMAIL PROTECTED] wrote:
  Will show up shortly at
 http://ftp.kernel.org/pub/linux/kernel/people/christoph/slab-defrag/
 
 I tried to apply this patchset, but without success. I tried
 2.6.22-rc4-mm2, 2.6.22-rc4, 2.6.22-rc4-git2, 2.6.22-rc3...

Yeah its against 2.6.22-rc4-mm1 and 2.6.22-rc4-mm2 changes kernel/sysctl.c 
so that the defrag trigger patch fails. Sigh.

I added kernel versions below slab-defrag so that you can find the correct 
version for your kernel.
-
To unsubscribe from this list: send the line unsubscribe linux-kernel in
the body of a message to [EMAIL PROTECTED]
More majordomo info at  http://vger.kernel.org/majordomo-info.html
Please read the FAQ at  http://www.tux.org/lkml/


Re: [patch 00/12] Slab defragmentation V3

2007-06-08 Thread Christoph Lameter
On Fri, 8 Jun 2007, Michal Piotrowski wrote:

 bash shared mapping + your script in a loop
 while true;  do sudo ./run.sh; done  res3.txt

H... Seems to be triggered from the reclaim path kmem_cache_defrag 
rather than the manual triggered one from the script. Taking the slub_lock 
on the reclaim path is an issue it seems.

Maybe we need to do a trylock in kmem_cache_defrag to defuse the 
situation? This is after all an optimization so we can bug out.

Does this fix it?

---
 mm/slub.c |4 +++-
 1 file changed, 3 insertions(+), 1 deletion(-)

Index: slub/mm/slub.c
===
--- slub.orig/mm/slub.c 2007-06-08 11:12:40.0 -0700
+++ slub/mm/slub.c  2007-06-08 11:14:34.0 -0700
@@ -2738,7 +2738,9 @@ int kmem_cache_defrag(int percent, int n
unsigned long pages = 0;
void *scratch;
 
-   down_read(slub_lock);
+   if (!down_read_trylock(slub_lock))
+   return 0;
+
list_for_each_entry(s, slab_caches, list) {
 
/*


-
To unsubscribe from this list: send the line unsubscribe linux-kernel in
the body of a message to [EMAIL PROTECTED]
More majordomo info at  http://vger.kernel.org/majordomo-info.html
Please read the FAQ at  http://www.tux.org/lkml/


Re: [patch 00/12] Slab defragmentation V3

2007-06-08 Thread Michal Piotrowski

bash shared mapping + your script in a loop
while true;  do sudo ./run.sh; done  res3.txt


[ 2866.154597] ===
[ 2866.162384] [ INFO: possible circular locking dependency detected ]
[ 2866.168698] 2.6.22-rc4-mm2 #1
[ 2866.171671] ---
[ 2866.177972] bash-shared-map/3245 is trying to acquire lock:
[ 2866.183566]  (slub_lock){}, at: [c0482510] kmem_cache_defrag+0x18/0xb3

l *kmem_cache_defrag+0x18
0xc1082510 is in kmem_cache_defrag (mm/slub.c:2742).
2737struct kmem_cache *s;
2738unsigned long pages = 0;
2739void *scratch;
2740
2741down_read(slub_lock);
2742list_for_each_entry(s, slab_caches, list) {
2743
2744/*
2745 * The slab cache must have defrag methods.
2746 */


[ 2866.190800] 
[ 2866.190801] but task is already holding lock:

[ 2866.196746]  (inode-i_alloc_sem){--..}, at: [c0498b07] 
notify_change+0xdf/0x2ec

l *notify_change+0xdf
0xc1098b07 is in notify_change (fs/attr.c:145).
140 return 0;
141
142 if (ia_valid  ATTR_SIZE)
143 down_write(dentry-d_inode-i_alloc_sem);
144
145 if (inode-i_op  inode-i_op-setattr) {
146 error = security_inode_setattr(dentry, attr);
147 if (!error)
148 error = inode-i_op-setattr(dentry, attr);
149 } else {


[ 2866.204761] 
[ 2866.204762] which lock already depends on the new lock.
[ 2866.204764] 
[ 2866.213058] 
[ 2866.213060] the existing dependency chain (in reverse order) is:
[ 2866.220630] 
[ 2866.220631] - #2 (inode-i_alloc_sem){--..}:

[ 2866.226784][c0441df1] add_lock_to_list+0x67/0x8b
[ 2866.232525][c0444bb9] __lock_acquire+0xb02/0xd36
[ 2866.238315][c0444e8b] lock_acquire+0x9e/0xb8
[ 2866.243702][c043c0c5] down_write+0x3e/0x77
[ 2866.248914][c0498b07] notify_change+0xdf/0x2ec
[ 2866.254542][c0484161] do_truncate+0x60/0x79
[ 2866.259927][c048d5fe] may_open+0x1db/0x240
[ 2866.265165][c048fbbd] open_namei+0x2d6/0x6bb
[ 2866.270602][c0483a5d] do_filp_open+0x26/0x3b
[ 2866.275996][c0483acf] do_sys_open+0x5d/0xed
[ 2866.281382][c0483b97] sys_open+0x1c/0x1e
[ 2866.286508][c0404182] sysenter_past_esp+0x5f/0x99
[ 2866.292428][b7f9d410] 0xb7f9d410
[ 2866.296819][] 0x
[ 2866.301177] 
[ 2866.301178] - #1 (sysfs_inode_imutex_key){--..}:

[ 2866.307632][c0441df1] add_lock_to_list+0x67/0x8b
[ 2866.313425][c0444bb9] __lock_acquire+0xb02/0xd36
[ 2866.319164][c0444e8b] lock_acquire+0x9e/0xb8
[ 2866.324576][c065b745] __mutex_lock_slowpath+0x107/0x369
[ 2866.331008][c065b9c3] mutex_lock+0x1c/0x1f
[ 2866.336314][c04c2609] create_dir+0x1e/0x1c2
[ 2866.341682][c04c280d] sysfs_create_dir+0x60/0x7b
[ 2866.347396][c050a335] kobject_shadow_add+0xd7/0x189
[ 2866.353499][c050a3f1] kobject_add+0xa/0xc
[ 2866.358685][c0480f00] sysfs_slab_add+0x10c/0x152
[ 2866.364374][c048111b] kmem_cache_create+0x13a/0x1d4
[ 2866.370442][c083415d] fasync_init+0x2e/0x37
[ 2866.375818][c0824542] kernel_init+0x14e/0x2bf
[ 2866.381351][c0404e7b] kernel_thread_helper+0x7/0x10
[ 2866.387419][] 0x
[ 2866.391843] 
[ 2866.391845] - #0 (slub_lock){}:

[ 2866.397022][c0442b04] print_circular_bug_tail+0x2e/0x68
[ 2866.403359][c0444aa5] __lock_acquire+0x9ee/0xd36
[ 2866.409080][c0444e8b] lock_acquire+0x9e/0xb8
[ 2866.414466][c043bfff] down_read+0x3d/0x74
[ 2866.419635][c0482510] kmem_cache_defrag+0x18/0xb3
[ 2866.425540][c046c7ac] shrink_slab+0x1ca/0x1d5
[ 2866.431002][c046cc1d] try_to_free_pages+0x178/0x224
[ 2866.437044][c046824f] __alloc_pages+0x1cd/0x324
[ 2866.442794][c0465282] find_or_create_page+0x5c/0xa6
[ 2866.448817][c04c9379] ext3_truncate+0xbb/0x83b
[ 2866.454411][c0472470] vmtruncate+0x11a/0x140
[ 2866.459762][c049894d] inode_setattr+0x5c/0x137
[ 2866.465286][c04caafb] ext3_setattr+0x19c/0x1f8
[ 2866.470835][c0498b61] notify_change+0x139/0x2ec
[ 2866.476514][c0484161] do_truncate+0x60/0x79
[ 2866.481822][c04842af] do_sys_ftruncate+0x135/0x150
[ 2866.487778][c04842e5] sys_ftruncate64+0x1b/0x1d
[ 2866.493405][c040420c] syscall_call+0x7/0xb
[ 2866.498599][b7f10410] 0xb7f10410
[ 2866.502913][] 0x
[ 2866.507201] 
[ 2866.507203] other info that might help us debug this:
[ 2866.507204] 
[ 2866.515363] 2 locks held by bash-shared-map/3245:

[ 2866.520151]  #0:  (inode-i_mutex){--..}, at: [c065b9c3] 
mutex_lock+0x1c/0x1f
[ 2866.527826]  #1:  (inode-i_alloc_sem){--..}, at: [c0498b07] 
notify_change+0xdf/0x2ec
[ 

Re: [patch 00/12] Slab defragmentation V3

2007-06-08 Thread Christoph Lameter
On Fri, 8 Jun 2007, Michal Piotrowski wrote:

 Yes, it does. Thanks!

Ahhh... That leds to the discovery more sysfs problems. I need to make 
sure not to be holding locks while calling into sysfs. More cleanup...

-
To unsubscribe from this list: send the line unsubscribe linux-kernel in
the body of a message to [EMAIL PROTECTED]
More majordomo info at  http://vger.kernel.org/majordomo-info.html
Please read the FAQ at  http://www.tux.org/lkml/


Re: [patch 00/12] Slab defragmentation V3

2007-06-08 Thread Christoph Lameter
On Fri, 8 Jun 2007, Michal Piotrowski wrote:

 0xc1081630 is in list_locations (mm/slub.c:3388).
 3383struct page *page;
 3384
 3385if (!atomic_read(n-nr_slabs))
 3386continue;
 3387
 3388spin_lock_irqsave(n-list_lock, flags);
 3389list_for_each_entry(page, n-partial, lru)
 3390process_slab(t, s, page, alloc);
 3391list_for_each_entry(page, n-full, lru)
 3392process_slab(t, s, page, alloc);


Yes process slab needs some temporary data to generate the lists of 
functions calling etc and that is a GFP_TEMPORARY alloc.

Does this fix it?

---
 mm/slub.c |2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

Index: slub/mm/slub.c
===
--- slub.orig/mm/slub.c 2007-06-08 12:35:56.0 -0700
+++ slub/mm/slub.c  2007-06-08 12:37:32.0 -0700
@@ -2930,7 +2930,7 @@ static int alloc_loc_track(struct loc_tr
 
order = get_order(sizeof(struct location) * max);
 
-   l = (void *)__get_free_pages(GFP_TEMPORARY, order);
+   l = (void *)__get_free_pages(GFP_ATOMIC, order);
 
if (!l)
return 0;
-
To unsubscribe from this list: send the line unsubscribe linux-kernel in
the body of a message to [EMAIL PROTECTED]
More majordomo info at  http://vger.kernel.org/majordomo-info.html
Please read the FAQ at  http://www.tux.org/lkml/


Re: [patch 00/12] Slab defragmentation V3

2007-06-08 Thread Christoph Lameter
On Fri, 8 Jun 2007, Christoph Lameter wrote:

 On Fri, 8 Jun 2007, Michal Piotrowski wrote:
 
  Yes, it does. Thanks!
 
 Ahhh... That leds to the discovery more sysfs problems. I need to make 
 sure not to be holding locks while calling into sysfs. More cleanup...

Could you remove the trylock patch and see how this one fares? We may need 
both but this should avoid taking the slub_lock around any possible alloc 
of sysfs.


SLUB: Move sysfs operations outside of slub_lock

Sysfs can do a gazillion things when called. Make sure that we do
not call any sysfs functions while holding the slub_lock. Let sysfs
fend for itself locking wise.

Just protect the essentials: The modifications to the slab lists
and the ref counters of the slabs.

Signed-off-by: Christoph Lameter [EMAIL PROTECTED]

---
 mm/slub.c |   34 +-
 1 file changed, 21 insertions(+), 13 deletions(-)

Index: slub/mm/slub.c
===
--- slub.orig/mm/slub.c 2007-06-08 12:21:56.0 -0700
+++ slub/mm/slub.c  2007-06-08 12:30:23.0 -0700
@@ -2179,12 +2179,13 @@ void kmem_cache_destroy(struct kmem_cach
s-refcount--;
if (!s-refcount) {
list_del(s-list);
+   up_write(slub_lock);
if (kmem_cache_close(s))
WARN_ON(1);
sysfs_slab_remove(s);
kfree(s);
-   }
-   up_write(slub_lock);
+   } else
+   up_write(slub_lock);
 }
 EXPORT_SYMBOL(kmem_cache_destroy);
 
@@ -2637,26 +2638,33 @@ struct kmem_cache *kmem_cache_create(con
 */
s-objsize = max(s-objsize, (int)size);
s-inuse = max_t(int, s-inuse, ALIGN(size, sizeof(void *)));
+   up_write(slub_lock);
+
if (sysfs_slab_alias(s, name))
goto err;
-   } else {
-   s = kmalloc(kmem_size, GFP_KERNEL);
-   if (s  kmem_cache_open(s, GFP_KERNEL, name,
+
+   return s;
+   }
+
+   s = kmalloc(kmem_size, GFP_KERNEL);
+   if (s) {
+   if (kmem_cache_open(s, GFP_KERNEL, name,
size, align, flags, ctor)) {
-   if (sysfs_slab_add(s)) {
-   kfree(s);
-   goto err;
-   }
list_add(s-list, slab_caches);
+   up_write(slub_lock);
raise_kswapd_order(s-order);
-   } else
-   kfree(s);
+
+   if (sysfs_slab_add(s))
+   goto err;
+
+   return s;
+
+   }
+   kfree(s);
}
up_write(slub_lock);
-   return s;
 
 err:
-   up_write(slub_lock);
if (flags  SLAB_PANIC)
panic(Cannot create slabcache %s\n, name);
else
-
To unsubscribe from this list: send the line unsubscribe linux-kernel in
the body of a message to [EMAIL PROTECTED]
More majordomo info at  http://vger.kernel.org/majordomo-info.html
Please read the FAQ at  http://www.tux.org/lkml/


Re: [patch 00/12] Slab defragmentation V3

2007-06-08 Thread Michal Piotrowski

Christoph Lameter pisze:

On Fri, 8 Jun 2007, Christoph Lameter wrote:


On Fri, 8 Jun 2007, Michal Piotrowski wrote:


Yes, it does. Thanks!
Ahhh... That leds to the discovery more sysfs problems. I need to make 
sure not to be holding locks while calling into sysfs. More cleanup...


Could you remove the trylock patch and see how this one fares? We may need 
both but this should avoid taking the slub_lock around any possible alloc 
of sysfs.





It's a bit tricky

cat ../sd2.patch | patch -p1
patching file mm/slub.c
Hunk #1 succeeded at 2194 (offset 15 lines).
Hunk #2 FAILED at 2653.
1 out of 2 hunks FAILED -- saving rejects to file mm/slub.c.rej
[EMAIL PROTECTED] linux-work3]$ cat mm/slub.c.rej
***
*** 2652,2677 
*/
   s-objsize = max(s-objsize, (int)size);
   s-inuse = max_t(int, s-inuse, ALIGN(size, sizeof(void *)));
   if (sysfs_slab_alias(s, name))
   goto err;
-   } else {
-   s = kmalloc(kmem_size, GFP_KERNEL);
-   if (s  kmem_cache_open(s, GFP_KERNEL, name,
   size, align, flags, ctor)) {
-   if (sysfs_slab_add(s)) {
-   kfree(s);
-   goto err;
-   }
   list_add(s-list, slab_caches);
   raise_kswapd_order(s-order);
-   } else
-   kfree(s);
   }
   up_write(slub_lock);
-   return s;

 err:
-   up_write(slub_lock);
   if (flags  SLAB_PANIC)
   panic(Cannot create slabcache %s\n, name);
   else
--- 2653,2685 
*/
   s-objsize = max(s-objsize, (int)size);
   s-inuse = max_t(int, s-inuse, ALIGN(size, sizeof(void *)));
+   up_write(slub_lock);
+
   if (sysfs_slab_alias(s, name))
   goto err;
+
+   return s;
+   }
+
+   s = kmalloc(kmem_size, GFP_KERNEL);
+   if (s) {
+   if (kmem_cache_open(s, GFP_KERNEL, name,
   size, align, flags, ctor)) {
   list_add(s-list, slab_caches);
+   up_write(slub_lock);
   raise_kswapd_order(s-order);
+
+   if (sysfs_slab_add(s))
+   goto err;
+
+   return s;
+
+   }
+   kfree(s);
   }
   up_write(slub_lock);

 err:
   if (flags  SLAB_PANIC)
   panic(Cannot create slabcache %s\n, name);
   else

Regards,
Michal

--
Najbardziej brakowało mi twojego milczenia.
-- Andrzej Sapkowski Coś więcej
-
To unsubscribe from this list: send the line unsubscribe linux-kernel in
the body of a message to [EMAIL PROTECTED]
More majordomo info at  http://vger.kernel.org/majordomo-info.html
Please read the FAQ at  http://www.tux.org/lkml/


Re: [patch 00/12] Slab defragmentation V3

2007-06-08 Thread Michal Piotrowski

Christoph Lameter pisze:

On Fri, 8 Jun 2007, Michal Piotrowski wrote:


Yes, it does. Thanks!


Ahhh... That leds to the discovery more sysfs problems. I need to make 
sure not to be holding locks while calling into sysfs. More cleanup...





sysfs... I forgot about my sysfs test case

#! /bin/sh

for i in `find /sys/ -type f`
do
   echo wyświetlam $i
   sudo cat $i  /dev/null
#sleep 1s
done

[ 2816.175573] BUG: sleeping function called from invalid context at 
mm/page_alloc.c:1547
[ 2816.183578] in_atomic():1, irqs_disabled():1
[ 2816.187946] 1 lock held by cat/12586:
[ 2816.191705]  #0:  (n-list_lock){++..}, at: [c0481630] 
list_locations+0x3d/0x26b

l *list_locations+0x3d
0xc1081630 is in list_locations (mm/slub.c:3388).
3383struct page *page;
3384
3385if (!atomic_read(n-nr_slabs))
3386continue;
3387
3388spin_lock_irqsave(n-list_lock, flags);
3389list_for_each_entry(page, n-partial, lru)
3390process_slab(t, s, page, alloc);
3391list_for_each_entry(page, n-full, lru)
3392process_slab(t, s, page, alloc);


[ 2816.199571] irq event stamp: 11526
[ 2816.203054] hardirqs last  enabled at (11525): [c042adbd] 
on_each_cpu+0x3b/0x71
[ 2816.210689] hardirqs last disabled at (11526): [c065d241] 
_spin_lock_irqsave+0x13/0x6e
[ 2816.218910] softirqs last  enabled at (11236): [c042b5dd] 
__do_softirq+0xdf/0xe5
[ 2816.226635] softirqs last disabled at (11229): [c0406d65] 
do_softirq+0x68/0x11f

l *on_each_cpu+0x3b
0xc102adbd is in on_each_cpu (include/asm/irqflags.h:36).
31  asm volatile(cli: : :memory);
32  }
33
34  static inline void native_irq_enable(void)
35  {
36  asm volatile(sti: : :memory);
37  }
38
39  static inline void native_safe_halt(void)
40  {

l *_spin_lock_irqsave+0x13
0xc125d241 is in _spin_lock_irqsave (kernel/spinlock.c:84).
79  unsigned long __lockfunc _spin_lock_irqsave(spinlock_t *lock)
80  {
81  unsigned long flags;
82
83  local_irq_save(flags);
84  preempt_disable();
85  spin_acquire(lock-dep_map, 0, 0, _RET_IP_);
86  /*
87   * On lockdep we dont want the hand-coded irq-enable of
88   * _raw_spin_lock_flags() code, because lockdep assumes

l *__do_softirq+0xdf
0xc102b5dd is in __do_softirq (kernel/softirq.c:252).
247
248 trace_softirq_exit();
249
250 account_system_vtime(current);
251 _local_bh_enable();
252 }
253
254 #ifndef __ARCH_HAS_DO_SOFTIRQ
255
256 asmlinkage void do_softirq(void)

l *do_softirq+0x68
0xc1006d65 is in do_softirq (arch/i386/kernel/irq.c:222).
217 irqctx-tinfo.previous_esp = current_stack_pointer;
218
219 /* build the stack frame on the softirq stack */
220 isp = (u32*) ((char*)irqctx + sizeof(*irqctx));
221
222 asm volatile(
223xchgl   %%ebx,%%esp \n
224call__do_softirq\n
225movl%%ebx,%%esp \n
226 : =b(isp)


[ 2816.234235]  [c04052ad] dump_trace+0x63/0x1eb
[ 2816.23]  [c040544f] show_trace_log_lvl+0x1a/0x2f
[ 2816.244211]  [c040608d] show_trace+0x12/0x14
[ 2816.248757]  [c04060a5] dump_stack+0x16/0x18
[ 2816.253288]  [c041eef1] __might_sleep+0xce/0xd5
[ 2816.258046]  [c04680b5] __alloc_pages+0x33/0x324
[ 2816.262968]  [c04683fb] __get_free_pages+0x55/0x66
[ 2816.268060]  [c0481517] process_slab+0x1bd/0x299
[ 2816.272988]  [c048164a] list_locations+0x57/0x26b
[ 2816.277981]  [c0481880] free_calls_show+0x22/0x29
[ 2816.282965]  [c047e702] slab_attr_show+0x1c/0x20
[ 2816.287891]  [c04c1bd9] sysfs_read_file+0x94/0x105
[ 2816.293018]  [c048580b] vfs_read+0xcf/0x158
[ 2816.297539]  [c0485c71] sys_read+0x3d/0x72
[ 2816.301910]  [c040420c] syscall_call+0x7/0xb
[ 2816.306486]  [b7f30410] 0xb7f30410
[ 2816.310165]  ===
[ 2818.826341] BUG: sleeping function called from invalid context at 
mm/page_alloc.c:1547
[ 2818.834388] in_atomic():1, irqs_disabled():1
[ 2818.838751] 1 lock held by cat/12635:
[ 2818.842506]  #0:  (n-list_lock){++..}, at: [c0481630] 
list_locations+0x3d/0x26b
[ 2818.850460] irq event stamp: 11494
[ 2818.853908] hardirqs last  enabled at (11493): [c042adbd] 
on_each_cpu+0x3b/0x71
[ 2818.861505] hardirqs last disabled at (11494): [c065d241] 
_spin_lock_irqsave+0x13/0x6e
[ 2818.869831] softirqs last  enabled at (11258): [c042b5dd] 
__do_softirq+0xdf/0xe5
[ 2818.877576] softirqs last disabled at (11215): [c0406d65] 
do_softirq+0x68/0x11f
[ 2818.885217]  [c04052ad] dump_trace+0x63/0x1eb
[ 2818.889893]  [c040544f] show_trace_log_lvl+0x1a/0x2f
[ 2818.895112]  [c040608d] show_trace+0x12/0x14
[ 2818.899667]  [c04060a5] dump_stack+0x16/0x18
[ 

Re: [patch 00/12] Slab defragmentation V3

2007-06-08 Thread Christoph Lameter
On Fri, 8 Jun 2007, Michal Piotrowski wrote:

  Could you remove the trylock patch and see how this one fares? We may need
  both but this should avoid taking the slub_lock around any possible alloc of
  sysfs.
 It's a bit tricky

Hmmm... Yes that version was aginst 4-mm1 instead after the defrag 
patchset. The difference is only the ops parameter...

Rediff to apply after defrag patchset.

SLUB: Move sysfs operations outside of slub_lock

Sysfs can do a gazillion things when called. Make sure that we do
not call any sysfs functions while holding the slub_lock. Let sysfs
fend for itself locking wise.

Just protect the essentials: The modifications to the slab lists
and the ref counters of the slabs.

Signed-off-by: Christoph Lameter [EMAIL PROTECTED]

---
 mm/slub.c |   34 +-
 1 file changed, 21 insertions(+), 13 deletions(-)

Index: slub/mm/slub.c
===
--- slub.orig/mm/slub.c 2007-06-08 13:47:32.0 -0700
+++ slub/mm/slub.c  2007-06-08 13:48:07.0 -0700
@@ -2193,12 +2193,13 @@ void kmem_cache_destroy(struct kmem_cach
s-refcount--;
if (!s-refcount) {
list_del(s-list);
+   up_write(slub_lock);
if (kmem_cache_close(s))
WARN_ON(1);
sysfs_slab_remove(s);
kfree(s);
-   }
-   up_write(slub_lock);
+   } else
+   up_write(slub_lock);
 }
 EXPORT_SYMBOL(kmem_cache_destroy);
 
@@ -2956,26 +2957,33 @@ struct kmem_cache *kmem_cache_create(con
 */
s-objsize = max(s-objsize, (int)size);
s-inuse = max_t(int, s-inuse, ALIGN(size, sizeof(void *)));
+   up_write(slub_lock);
+
if (sysfs_slab_alias(s, name))
goto err;
-   } else {
-   s = kmalloc(kmem_size, GFP_KERNEL);
-   if (s  kmem_cache_open(s, GFP_KERNEL, name,
+
+   return s;
+   }
+
+   s = kmalloc(kmem_size, GFP_KERNEL);
+   if (s) {
+   if (kmem_cache_open(s, GFP_KERNEL, name,
size, align, flags, ctor, ops)) {
-   if (sysfs_slab_add(s)) {
-   kfree(s);
-   goto err;
-   }
list_add(s-list, slab_caches);
+   up_write(slub_lock);
raise_kswapd_order(s-order);
-   } else
-   kfree(s);
+
+   if (sysfs_slab_add(s))
+   goto err;
+
+   return s;
+
+   }
+   kfree(s);
}
up_write(slub_lock);
-   return s;
 
 err:
-   up_write(slub_lock);
if (flags  SLAB_PANIC)
panic(Cannot create slabcache %s\n, name);
else
-
To unsubscribe from this list: send the line unsubscribe linux-kernel in
the body of a message to [EMAIL PROTECTED]
More majordomo info at  http://vger.kernel.org/majordomo-info.html
Please read the FAQ at  http://www.tux.org/lkml/