Re: [Bluez-devel] Oops involving RFCOMM and sysfs
On Jan 18, 2008 7:26 PM, Cornelia Huck <[EMAIL PROTECTED]> wrote: > On Fri, 18 Jan 2008 18:34:55 +0800, > "Dave Young" <[EMAIL PROTECTED]> wrote: > > > > On Jan 18, 2008 6:23 PM, Cornelia Huck <[EMAIL PROTECTED]> wrote: > > > On Fri, 18 Jan 2008 10:19:33 +0100, > > > Cornelia Huck <[EMAIL PROTECTED]> wrote: > > > > > > > > > > > > > 1314 if (IS_ERR(new_parent_kobj)) { > > > > > 1315 error = PTR_ERR(new_parent_kobj); > > > > > 1316 put_device(new_parent); > > > > > 1317 goto out; > > > > > 1318 } > > > > > 1319 pr_debug("DEVICE: moving '%s' to '%s'\n", dev->bus_id, > > > > > 1320 new_parent ? new_parent->bus_id : ""); > > > > > 1321 error = kobject_move(>kobj, new_parent_kobj); > > > > > 1322 if (error) { > > > > > 1323 put_device(new_parent); > > > > > > > > > > imagine new_parent is NULL, then the new_parent_kobj should be put > > > > > > > > No, we would need a put_device_parent() (crappy name) which puts the > > > > reference iff get_device_parent() grabbed it. > > > > > > And looking at Greg's patchset, it has cleanup_device_parent(), which > > > does just that. But it is only called in device_del(), not when > > > device_move() has errors. > > > > > > (get_device_parent() also always returns a pointer to a kobject or > > > NULL, so we can get rid of those IS_ERR() checks in setup_parent() and > > > device_move() as well.) > > > > > > > Hmm, thanks. > > I will be offline during weekend, but I will still check the > > device_move and other code if I have time. > > Just hacked together the following against Greg's tree: > > COMPLETELY UNTESTED DO NOT APPLY > > --- > drivers/base/core.c | 33 - > 1 files changed, 16 insertions(+), 17 deletions(-) > > --- linux-2.6.orig/drivers/base/core.c > +++ linux-2.6/drivers/base/core.c > @@ -553,6 +553,8 @@ static struct kobject *get_device_parent > } > > static inline void cleanup_device_parent(struct device *dev) {} > +static inline void cleanup_glue_dir(struct device *dev, > + struct kobject *kobj) {} > #else > static struct kobject *virtual_device_parent(struct device *dev) > { > @@ -617,27 +619,27 @@ static struct kobject *get_device_parent > return NULL; > } > > -static void cleanup_device_parent(struct device *dev) > +static void cleanup_glue_dir(struct device *dev, struct kobject *kobj) > { > - struct kobject *glue_dir = dev->kobj.parent; > - > /* see if we live in a "glue" directory */ > - if (!dev->class || glue_dir->kset != >class->class_dirs) > + if (!dev->class || kobj->kset != >class->class_dirs) > return; > > - kobject_put(glue_dir); > + kobject_put(kobj); > +} > + > +static void cleanup_device_parent(struct device *dev) > +{ > + cleanup_glue_dir(dev, dev->kobj.parent); > } > #endif > > -static int setup_parent(struct device *dev, struct device *parent) > +static void setup_parent(struct device *dev, struct device *parent) > { > struct kobject *kobj; > kobj = get_device_parent(dev, parent); > - if (IS_ERR(kobj)) > - return PTR_ERR(kobj); > if (kobj) > dev->kobj.parent = kobj; > - return 0; > } > > static int device_add_class_symlinks(struct device *dev) > @@ -782,9 +784,7 @@ int device_add(struct device *dev) > pr_debug("device: '%s': %s\n", dev->bus_id, __FUNCTION__); > > parent = get_device(dev->parent); > - error = setup_parent(dev, parent); > - if (error) > - goto Error; > + setup_parent(dev, parent); > > /* first, register with generic layer. */ > error = kobject_add(>kobj, dev->kobj.parent, "%s", dev->bus_id); > @@ -862,6 +862,7 @@ int device_add(struct device *dev) > kobject_uevent(>kobj, KOBJ_REMOVE); > kobject_del(>kobj); > Error: > + cleanup_device_parent(dev); > if (parent) > put_device(parent); > goto Done; > @@ -1303,15 +1304,12 @@ int device_move(struct device *dev, stru > > new_parent = get_device(new_parent); > new_parent_kobj = get_device_parent (dev, new_parent); > - if (IS_ERR(new_parent_kobj)) { > - error = PTR_ERR(new_parent_kobj); > - put_device(new_parent); > - goto out; > - } > + > pr_debug("device: '%s': %s: moving to '%s'\n", dev->bus_id, > __FUNCTION__, new_parent ? new_parent->bus_id : ""); > error = kobject_move(>kobj, new_parent_kobj); > if (error) { > + cleanup_glue_dir(dev, new_parent_kobj); > put_device(new_parent); > goto out; > } > @@ -1334,6 +1332,7 @@ int device_move(struct device *dev, stru > klist_add_tail(>knode_parent, >
Re: [Bluez-devel] Oops involving RFCOMM and sysfs
On Jan 18, 2008 7:26 PM, Cornelia Huck [EMAIL PROTECTED] wrote: On Fri, 18 Jan 2008 18:34:55 +0800, Dave Young [EMAIL PROTECTED] wrote: On Jan 18, 2008 6:23 PM, Cornelia Huck [EMAIL PROTECTED] wrote: On Fri, 18 Jan 2008 10:19:33 +0100, Cornelia Huck [EMAIL PROTECTED] wrote: 1314 if (IS_ERR(new_parent_kobj)) { 1315 error = PTR_ERR(new_parent_kobj); 1316 put_device(new_parent); 1317 goto out; 1318 } 1319 pr_debug(DEVICE: moving '%s' to '%s'\n, dev-bus_id, 1320 new_parent ? new_parent-bus_id : NULL); 1321 error = kobject_move(dev-kobj, new_parent_kobj); 1322 if (error) { 1323 put_device(new_parent); imagine new_parent is NULL, then the new_parent_kobj should be put No, we would need a put_device_parent() (crappy name) which puts the reference iff get_device_parent() grabbed it. And looking at Greg's patchset, it has cleanup_device_parent(), which does just that. But it is only called in device_del(), not when device_move() has errors. (get_device_parent() also always returns a pointer to a kobject or NULL, so we can get rid of those IS_ERR() checks in setup_parent() and device_move() as well.) Hmm, thanks. I will be offline during weekend, but I will still check the device_move and other code if I have time. Just hacked together the following against Greg's tree: COMPLETELY UNTESTED DO NOT APPLY --- drivers/base/core.c | 33 - 1 files changed, 16 insertions(+), 17 deletions(-) --- linux-2.6.orig/drivers/base/core.c +++ linux-2.6/drivers/base/core.c @@ -553,6 +553,8 @@ static struct kobject *get_device_parent } static inline void cleanup_device_parent(struct device *dev) {} +static inline void cleanup_glue_dir(struct device *dev, + struct kobject *kobj) {} #else static struct kobject *virtual_device_parent(struct device *dev) { @@ -617,27 +619,27 @@ static struct kobject *get_device_parent return NULL; } -static void cleanup_device_parent(struct device *dev) +static void cleanup_glue_dir(struct device *dev, struct kobject *kobj) { - struct kobject *glue_dir = dev-kobj.parent; - /* see if we live in a glue directory */ - if (!dev-class || glue_dir-kset != dev-class-class_dirs) + if (!dev-class || kobj-kset != dev-class-class_dirs) return; - kobject_put(glue_dir); + kobject_put(kobj); +} + +static void cleanup_device_parent(struct device *dev) +{ + cleanup_glue_dir(dev, dev-kobj.parent); } #endif -static int setup_parent(struct device *dev, struct device *parent) +static void setup_parent(struct device *dev, struct device *parent) { struct kobject *kobj; kobj = get_device_parent(dev, parent); - if (IS_ERR(kobj)) - return PTR_ERR(kobj); if (kobj) dev-kobj.parent = kobj; - return 0; } static int device_add_class_symlinks(struct device *dev) @@ -782,9 +784,7 @@ int device_add(struct device *dev) pr_debug(device: '%s': %s\n, dev-bus_id, __FUNCTION__); parent = get_device(dev-parent); - error = setup_parent(dev, parent); - if (error) - goto Error; + setup_parent(dev, parent); /* first, register with generic layer. */ error = kobject_add(dev-kobj, dev-kobj.parent, %s, dev-bus_id); @@ -862,6 +862,7 @@ int device_add(struct device *dev) kobject_uevent(dev-kobj, KOBJ_REMOVE); kobject_del(dev-kobj); Error: + cleanup_device_parent(dev); if (parent) put_device(parent); goto Done; @@ -1303,15 +1304,12 @@ int device_move(struct device *dev, stru new_parent = get_device(new_parent); new_parent_kobj = get_device_parent (dev, new_parent); - if (IS_ERR(new_parent_kobj)) { - error = PTR_ERR(new_parent_kobj); - put_device(new_parent); - goto out; - } + pr_debug(device: '%s': %s: moving to '%s'\n, dev-bus_id, __FUNCTION__, new_parent ? new_parent-bus_id : NULL); error = kobject_move(dev-kobj, new_parent_kobj); if (error) { + cleanup_glue_dir(dev, new_parent_kobj); put_device(new_parent); goto out; } @@ -1334,6 +1332,7 @@ int device_move(struct device *dev, stru klist_add_tail(dev-knode_parent, old_parent-klist_children); } + cleanup_glue_dir(dev, new_parent_kobj); put_device(new_parent); goto out; } Looks good. -- To unsubscribe from this list: send
Re: [Bluez-devel] Oops involving RFCOMM and sysfs
On Fri, 18 Jan 2008 18:34:55 +0800, "Dave Young" <[EMAIL PROTECTED]> wrote: > On Jan 18, 2008 6:23 PM, Cornelia Huck <[EMAIL PROTECTED]> wrote: > > On Fri, 18 Jan 2008 10:19:33 +0100, > > Cornelia Huck <[EMAIL PROTECTED]> wrote: > > > > > > > > > > 1314 if (IS_ERR(new_parent_kobj)) { > > > > 1315 error = PTR_ERR(new_parent_kobj); > > > > 1316 put_device(new_parent); > > > > 1317 goto out; > > > > 1318 } > > > > 1319 pr_debug("DEVICE: moving '%s' to '%s'\n", dev->bus_id, > > > > 1320 new_parent ? new_parent->bus_id : ""); > > > > 1321 error = kobject_move(>kobj, new_parent_kobj); > > > > 1322 if (error) { > > > > 1323 put_device(new_parent); > > > > > > > > imagine new_parent is NULL, then the new_parent_kobj should be put > > > > > > No, we would need a put_device_parent() (crappy name) which puts the > > > reference iff get_device_parent() grabbed it. > > > > And looking at Greg's patchset, it has cleanup_device_parent(), which > > does just that. But it is only called in device_del(), not when > > device_move() has errors. > > > > (get_device_parent() also always returns a pointer to a kobject or > > NULL, so we can get rid of those IS_ERR() checks in setup_parent() and > > device_move() as well.) > > > > Hmm, thanks. > I will be offline during weekend, but I will still check the > device_move and other code if I have time. Just hacked together the following against Greg's tree: COMPLETELY UNTESTED DO NOT APPLY --- drivers/base/core.c | 33 - 1 files changed, 16 insertions(+), 17 deletions(-) --- linux-2.6.orig/drivers/base/core.c +++ linux-2.6/drivers/base/core.c @@ -553,6 +553,8 @@ static struct kobject *get_device_parent } static inline void cleanup_device_parent(struct device *dev) {} +static inline void cleanup_glue_dir(struct device *dev, + struct kobject *kobj) {} #else static struct kobject *virtual_device_parent(struct device *dev) { @@ -617,27 +619,27 @@ static struct kobject *get_device_parent return NULL; } -static void cleanup_device_parent(struct device *dev) +static void cleanup_glue_dir(struct device *dev, struct kobject *kobj) { - struct kobject *glue_dir = dev->kobj.parent; - /* see if we live in a "glue" directory */ - if (!dev->class || glue_dir->kset != >class->class_dirs) + if (!dev->class || kobj->kset != >class->class_dirs) return; - kobject_put(glue_dir); + kobject_put(kobj); +} + +static void cleanup_device_parent(struct device *dev) +{ + cleanup_glue_dir(dev, dev->kobj.parent); } #endif -static int setup_parent(struct device *dev, struct device *parent) +static void setup_parent(struct device *dev, struct device *parent) { struct kobject *kobj; kobj = get_device_parent(dev, parent); - if (IS_ERR(kobj)) - return PTR_ERR(kobj); if (kobj) dev->kobj.parent = kobj; - return 0; } static int device_add_class_symlinks(struct device *dev) @@ -782,9 +784,7 @@ int device_add(struct device *dev) pr_debug("device: '%s': %s\n", dev->bus_id, __FUNCTION__); parent = get_device(dev->parent); - error = setup_parent(dev, parent); - if (error) - goto Error; + setup_parent(dev, parent); /* first, register with generic layer. */ error = kobject_add(>kobj, dev->kobj.parent, "%s", dev->bus_id); @@ -862,6 +862,7 @@ int device_add(struct device *dev) kobject_uevent(>kobj, KOBJ_REMOVE); kobject_del(>kobj); Error: + cleanup_device_parent(dev); if (parent) put_device(parent); goto Done; @@ -1303,15 +1304,12 @@ int device_move(struct device *dev, stru new_parent = get_device(new_parent); new_parent_kobj = get_device_parent (dev, new_parent); - if (IS_ERR(new_parent_kobj)) { - error = PTR_ERR(new_parent_kobj); - put_device(new_parent); - goto out; - } + pr_debug("device: '%s': %s: moving to '%s'\n", dev->bus_id, __FUNCTION__, new_parent ? new_parent->bus_id : ""); error = kobject_move(>kobj, new_parent_kobj); if (error) { + cleanup_glue_dir(dev, new_parent_kobj); put_device(new_parent); goto out; } @@ -1334,6 +1332,7 @@ int device_move(struct device *dev, stru klist_add_tail(>knode_parent, _parent->klist_children); } + cleanup_glue_dir(dev, new_parent_kobj); put_device(new_parent); goto out; } -- To unsubscribe from this list: send the line "unsubscribe linux-kernel" in the body of a message to [EMAIL PROTECTED] More majordomo
Re: [Bluez-devel] Oops involving RFCOMM and sysfs
On Jan 18, 2008 6:23 PM, Cornelia Huck <[EMAIL PROTECTED]> wrote: > On Fri, 18 Jan 2008 10:19:33 +0100, > Cornelia Huck <[EMAIL PROTECTED]> wrote: > > > > > > > 1314 if (IS_ERR(new_parent_kobj)) { > > > 1315 error = PTR_ERR(new_parent_kobj); > > > 1316 put_device(new_parent); > > > 1317 goto out; > > > 1318 } > > > 1319 pr_debug("DEVICE: moving '%s' to '%s'\n", dev->bus_id, > > > 1320 new_parent ? new_parent->bus_id : ""); > > > 1321 error = kobject_move(>kobj, new_parent_kobj); > > > 1322 if (error) { > > > 1323 put_device(new_parent); > > > > > > imagine new_parent is NULL, then the new_parent_kobj should be put > > > > No, we would need a put_device_parent() (crappy name) which puts the > > reference iff get_device_parent() grabbed it. > > And looking at Greg's patchset, it has cleanup_device_parent(), which > does just that. But it is only called in device_del(), not when > device_move() has errors. > > (get_device_parent() also always returns a pointer to a kobject or > NULL, so we can get rid of those IS_ERR() checks in setup_parent() and > device_move() as well.) > Hmm, thanks. I will be offline during weekend, but I will still check the device_move and other code if I have time. -- To unsubscribe from this list: send the line "unsubscribe linux-kernel" in the body of a message to [EMAIL PROTECTED] More majordomo info at http://vger.kernel.org/majordomo-info.html Please read the FAQ at http://www.tux.org/lkml/
Re: [Bluez-devel] Oops involving RFCOMM and sysfs
On Fri, 18 Jan 2008 10:19:33 +0100, Cornelia Huck <[EMAIL PROTECTED]> wrote: > > > > 1314 if (IS_ERR(new_parent_kobj)) { > > 1315 error = PTR_ERR(new_parent_kobj); > > 1316 put_device(new_parent); > > 1317 goto out; > > 1318 } > > 1319 pr_debug("DEVICE: moving '%s' to '%s'\n", dev->bus_id, > > 1320 new_parent ? new_parent->bus_id : ""); > > 1321 error = kobject_move(>kobj, new_parent_kobj); > > 1322 if (error) { > > 1323 put_device(new_parent); > > > > imagine new_parent is NULL, then the new_parent_kobj should be put > > No, we would need a put_device_parent() (crappy name) which puts the > reference iff get_device_parent() grabbed it. And looking at Greg's patchset, it has cleanup_device_parent(), which does just that. But it is only called in device_del(), not when device_move() has errors. (get_device_parent() also always returns a pointer to a kobject or NULL, so we can get rid of those IS_ERR() checks in setup_parent() and device_move() as well.) -- To unsubscribe from this list: send the line "unsubscribe linux-kernel" in the body of a message to [EMAIL PROTECTED] More majordomo info at http://vger.kernel.org/majordomo-info.html Please read the FAQ at http://www.tux.org/lkml/
Re: [Bluez-devel] Oops involving RFCOMM and sysfs
On Fri, 18 Jan 2008 11:37:21 +0800, "Dave Young" <[EMAIL PROTECTED]> wrote: > Lets see the device_move function, seems there's some problems in it: > > 1302 int device_move(struct device *dev, struct device *new_parent) > 1303 { > 1304 int error; > 1305 struct device *old_parent; > 1306 struct kobject *new_parent_kobj; > 1307 > 1308 dev = get_device(dev); > 1309 if (!dev) > 1310 return -EINVAL; > 1311 > 1312 new_parent = get_device(new_parent); > 1313 new_parent_kobj = get_device_parent (dev, new_parent); > > Here could get kobject reference Eww. get_device_parent() may inflate the refcount in one case for !CONFIG_SYSFS_DEPRECATED, but often won't. (And the function is named confusingly, since it hints that we always get a reference, which we don't.) > > 1314 if (IS_ERR(new_parent_kobj)) { > 1315 error = PTR_ERR(new_parent_kobj); > 1316 put_device(new_parent); > 1317 goto out; > 1318 } > 1319 pr_debug("DEVICE: moving '%s' to '%s'\n", dev->bus_id, > 1320 new_parent ? new_parent->bus_id : ""); > 1321 error = kobject_move(>kobj, new_parent_kobj); > 1322 if (error) { > 1323 put_device(new_parent); > > imagine new_parent is NULL, then the new_parent_kobj should be put No, we would need a put_device_parent() (crappy name) which puts the reference iff get_device_parent() grabbed it. > > 1324 goto out; > 1325 } > 1326 old_parent = dev->parent; > 1327 dev->parent = new_parent; > 1328 if (old_parent) > 1329 klist_remove(>knode_parent); > 1330 if (new_parent) > 1331 klist_add_tail(>knode_parent, > _parent->klist_children); > 1332 if (!dev->class) > 1333 goto out_put; > > Why not put new_parent | new_parent_kobj? Because that is the good case :) > > 1334 error = device_move_class_links(dev, old_parent, new_parent); > 1335 if (error) { > 1336 /* We ignore errors on cleanup since we're hosed > anyway... */ > 1337 device_move_class_links(dev, new_parent, old_parent); > 1338 if (!kobject_move(>kobj, _parent->kobj)) { > 1339 if (new_parent) > 1340 klist_remove(>knode_parent); > 1341 if (old_parent) > 1342 klist_add_tail(>knode_parent, > 1343 > _parent->klist_children); > 1344 } > 1345 put_device(new_parent); > > Same doubt as above We'd need put_device_parent() or whatever here as well, I guess. > > 1346 goto out; > 1347 } > 1348 out_put: > 1349 put_device(old_parent); > 1350 out: > 1351 put_device(dev); > 1352 return error; > 1353 } > > Hope I'm wrong, but if it's indeed bugs, I will send a patch about this. There are more problems, I'm afraid :( setup_parent() calls get_device_parent() as well, so device_add() has the same problems on error cleanup... I'll take a look at it if I find some time, but I'm afraid I'll not be able to do so before next week. -- To unsubscribe from this list: send the line "unsubscribe linux-kernel" in the body of a message to [EMAIL PROTECTED] More majordomo info at http://vger.kernel.org/majordomo-info.html Please read the FAQ at http://www.tux.org/lkml/
Re: [Bluez-devel] Oops involving RFCOMM and sysfs
On Fri, 18 Jan 2008 11:37:21 +0800, Dave Young [EMAIL PROTECTED] wrote: Lets see the device_move function, seems there's some problems in it: 1302 int device_move(struct device *dev, struct device *new_parent) 1303 { 1304 int error; 1305 struct device *old_parent; 1306 struct kobject *new_parent_kobj; 1307 1308 dev = get_device(dev); 1309 if (!dev) 1310 return -EINVAL; 1311 1312 new_parent = get_device(new_parent); 1313 new_parent_kobj = get_device_parent (dev, new_parent); Here could get kobject reference Eww. get_device_parent() may inflate the refcount in one case for !CONFIG_SYSFS_DEPRECATED, but often won't. (And the function is named confusingly, since it hints that we always get a reference, which we don't.) 1314 if (IS_ERR(new_parent_kobj)) { 1315 error = PTR_ERR(new_parent_kobj); 1316 put_device(new_parent); 1317 goto out; 1318 } 1319 pr_debug(DEVICE: moving '%s' to '%s'\n, dev-bus_id, 1320 new_parent ? new_parent-bus_id : NULL); 1321 error = kobject_move(dev-kobj, new_parent_kobj); 1322 if (error) { 1323 put_device(new_parent); imagine new_parent is NULL, then the new_parent_kobj should be put No, we would need a put_device_parent() (crappy name) which puts the reference iff get_device_parent() grabbed it. 1324 goto out; 1325 } 1326 old_parent = dev-parent; 1327 dev-parent = new_parent; 1328 if (old_parent) 1329 klist_remove(dev-knode_parent); 1330 if (new_parent) 1331 klist_add_tail(dev-knode_parent, new_parent-klist_children); 1332 if (!dev-class) 1333 goto out_put; Why not put new_parent | new_parent_kobj? Because that is the good case :) 1334 error = device_move_class_links(dev, old_parent, new_parent); 1335 if (error) { 1336 /* We ignore errors on cleanup since we're hosed anyway... */ 1337 device_move_class_links(dev, new_parent, old_parent); 1338 if (!kobject_move(dev-kobj, old_parent-kobj)) { 1339 if (new_parent) 1340 klist_remove(dev-knode_parent); 1341 if (old_parent) 1342 klist_add_tail(dev-knode_parent, 1343 old_parent-klist_children); 1344 } 1345 put_device(new_parent); Same doubt as above We'd need put_device_parent() or whatever here as well, I guess. 1346 goto out; 1347 } 1348 out_put: 1349 put_device(old_parent); 1350 out: 1351 put_device(dev); 1352 return error; 1353 } Hope I'm wrong, but if it's indeed bugs, I will send a patch about this. There are more problems, I'm afraid :( setup_parent() calls get_device_parent() as well, so device_add() has the same problems on error cleanup... I'll take a look at it if I find some time, but I'm afraid I'll not be able to do so before next week. -- To unsubscribe from this list: send the line unsubscribe linux-kernel in the body of a message to [EMAIL PROTECTED] More majordomo info at http://vger.kernel.org/majordomo-info.html Please read the FAQ at http://www.tux.org/lkml/
Re: [Bluez-devel] Oops involving RFCOMM and sysfs
On Jan 18, 2008 6:23 PM, Cornelia Huck [EMAIL PROTECTED] wrote: On Fri, 18 Jan 2008 10:19:33 +0100, Cornelia Huck [EMAIL PROTECTED] wrote: 1314 if (IS_ERR(new_parent_kobj)) { 1315 error = PTR_ERR(new_parent_kobj); 1316 put_device(new_parent); 1317 goto out; 1318 } 1319 pr_debug(DEVICE: moving '%s' to '%s'\n, dev-bus_id, 1320 new_parent ? new_parent-bus_id : NULL); 1321 error = kobject_move(dev-kobj, new_parent_kobj); 1322 if (error) { 1323 put_device(new_parent); imagine new_parent is NULL, then the new_parent_kobj should be put No, we would need a put_device_parent() (crappy name) which puts the reference iff get_device_parent() grabbed it. And looking at Greg's patchset, it has cleanup_device_parent(), which does just that. But it is only called in device_del(), not when device_move() has errors. (get_device_parent() also always returns a pointer to a kobject or NULL, so we can get rid of those IS_ERR() checks in setup_parent() and device_move() as well.) Hmm, thanks. I will be offline during weekend, but I will still check the device_move and other code if I have time. -- To unsubscribe from this list: send the line unsubscribe linux-kernel in the body of a message to [EMAIL PROTECTED] More majordomo info at http://vger.kernel.org/majordomo-info.html Please read the FAQ at http://www.tux.org/lkml/
Re: [Bluez-devel] Oops involving RFCOMM and sysfs
On Fri, 18 Jan 2008 10:19:33 +0100, Cornelia Huck [EMAIL PROTECTED] wrote: 1314 if (IS_ERR(new_parent_kobj)) { 1315 error = PTR_ERR(new_parent_kobj); 1316 put_device(new_parent); 1317 goto out; 1318 } 1319 pr_debug(DEVICE: moving '%s' to '%s'\n, dev-bus_id, 1320 new_parent ? new_parent-bus_id : NULL); 1321 error = kobject_move(dev-kobj, new_parent_kobj); 1322 if (error) { 1323 put_device(new_parent); imagine new_parent is NULL, then the new_parent_kobj should be put No, we would need a put_device_parent() (crappy name) which puts the reference iff get_device_parent() grabbed it. And looking at Greg's patchset, it has cleanup_device_parent(), which does just that. But it is only called in device_del(), not when device_move() has errors. (get_device_parent() also always returns a pointer to a kobject or NULL, so we can get rid of those IS_ERR() checks in setup_parent() and device_move() as well.) -- To unsubscribe from this list: send the line unsubscribe linux-kernel in the body of a message to [EMAIL PROTECTED] More majordomo info at http://vger.kernel.org/majordomo-info.html Please read the FAQ at http://www.tux.org/lkml/
Re: [Bluez-devel] Oops involving RFCOMM and sysfs
On Fri, 18 Jan 2008 18:34:55 +0800, Dave Young [EMAIL PROTECTED] wrote: On Jan 18, 2008 6:23 PM, Cornelia Huck [EMAIL PROTECTED] wrote: On Fri, 18 Jan 2008 10:19:33 +0100, Cornelia Huck [EMAIL PROTECTED] wrote: 1314 if (IS_ERR(new_parent_kobj)) { 1315 error = PTR_ERR(new_parent_kobj); 1316 put_device(new_parent); 1317 goto out; 1318 } 1319 pr_debug(DEVICE: moving '%s' to '%s'\n, dev-bus_id, 1320 new_parent ? new_parent-bus_id : NULL); 1321 error = kobject_move(dev-kobj, new_parent_kobj); 1322 if (error) { 1323 put_device(new_parent); imagine new_parent is NULL, then the new_parent_kobj should be put No, we would need a put_device_parent() (crappy name) which puts the reference iff get_device_parent() grabbed it. And looking at Greg's patchset, it has cleanup_device_parent(), which does just that. But it is only called in device_del(), not when device_move() has errors. (get_device_parent() also always returns a pointer to a kobject or NULL, so we can get rid of those IS_ERR() checks in setup_parent() and device_move() as well.) Hmm, thanks. I will be offline during weekend, but I will still check the device_move and other code if I have time. Just hacked together the following against Greg's tree: COMPLETELY UNTESTED DO NOT APPLY --- drivers/base/core.c | 33 - 1 files changed, 16 insertions(+), 17 deletions(-) --- linux-2.6.orig/drivers/base/core.c +++ linux-2.6/drivers/base/core.c @@ -553,6 +553,8 @@ static struct kobject *get_device_parent } static inline void cleanup_device_parent(struct device *dev) {} +static inline void cleanup_glue_dir(struct device *dev, + struct kobject *kobj) {} #else static struct kobject *virtual_device_parent(struct device *dev) { @@ -617,27 +619,27 @@ static struct kobject *get_device_parent return NULL; } -static void cleanup_device_parent(struct device *dev) +static void cleanup_glue_dir(struct device *dev, struct kobject *kobj) { - struct kobject *glue_dir = dev-kobj.parent; - /* see if we live in a glue directory */ - if (!dev-class || glue_dir-kset != dev-class-class_dirs) + if (!dev-class || kobj-kset != dev-class-class_dirs) return; - kobject_put(glue_dir); + kobject_put(kobj); +} + +static void cleanup_device_parent(struct device *dev) +{ + cleanup_glue_dir(dev, dev-kobj.parent); } #endif -static int setup_parent(struct device *dev, struct device *parent) +static void setup_parent(struct device *dev, struct device *parent) { struct kobject *kobj; kobj = get_device_parent(dev, parent); - if (IS_ERR(kobj)) - return PTR_ERR(kobj); if (kobj) dev-kobj.parent = kobj; - return 0; } static int device_add_class_symlinks(struct device *dev) @@ -782,9 +784,7 @@ int device_add(struct device *dev) pr_debug(device: '%s': %s\n, dev-bus_id, __FUNCTION__); parent = get_device(dev-parent); - error = setup_parent(dev, parent); - if (error) - goto Error; + setup_parent(dev, parent); /* first, register with generic layer. */ error = kobject_add(dev-kobj, dev-kobj.parent, %s, dev-bus_id); @@ -862,6 +862,7 @@ int device_add(struct device *dev) kobject_uevent(dev-kobj, KOBJ_REMOVE); kobject_del(dev-kobj); Error: + cleanup_device_parent(dev); if (parent) put_device(parent); goto Done; @@ -1303,15 +1304,12 @@ int device_move(struct device *dev, stru new_parent = get_device(new_parent); new_parent_kobj = get_device_parent (dev, new_parent); - if (IS_ERR(new_parent_kobj)) { - error = PTR_ERR(new_parent_kobj); - put_device(new_parent); - goto out; - } + pr_debug(device: '%s': %s: moving to '%s'\n, dev-bus_id, __FUNCTION__, new_parent ? new_parent-bus_id : NULL); error = kobject_move(dev-kobj, new_parent_kobj); if (error) { + cleanup_glue_dir(dev, new_parent_kobj); put_device(new_parent); goto out; } @@ -1334,6 +1332,7 @@ int device_move(struct device *dev, stru klist_add_tail(dev-knode_parent, old_parent-klist_children); } + cleanup_glue_dir(dev, new_parent_kobj); put_device(new_parent); goto out; } -- To unsubscribe from this list: send the line unsubscribe linux-kernel in the body of a message to [EMAIL PROTECTED] More majordomo info at http://vger.kernel.org/majordomo-info.html Please read the FAQ at
Re: [Bluez-devel] Oops involving RFCOMM and sysfs
On Jan 17, 2008 7:42 PM, Cornelia Huck <[EMAIL PROTECTED]> wrote: > On Thu, 17 Jan 2008 16:15:04 +0800, > Dave Young <[EMAIL PROTECTED]> wrote: > > > On Thu, Jan 17, 2008 at 03:24:50PM +0800, Dave Young wrote: > > > On Jan 17, 2008 7:06 AM, Gabor Gombas <[EMAIL PROTECTED]> wrote: > > > > Hi, > > > > > > > > On Wed, Jan 16, 2008 at 09:02:05AM +0800, Dave Young wrote: > > > > > > > > > The rfcomm tty device will possibly retain even when conn is down, > > > > > and sysfs doesn't support zombie device moving, so this patch > > > > > move the tty device before conn device is destroyed. > > > > > > > > > > Signed-off-by: Dave Young <[EMAIL PROTECTED]> > > > > > > > > This seems to work, both the oops and the hang are gone. I get these > > > > messages in syslog when the Bluetooth link hangs and I want to kill pppd > > > > with "poff": > > > > > > > > Jan 16 23:55:59 twister kernel: unregister_netdevice: waiting for ppp0 > > > > to become free. Usage count = 1 > > > > Jan 16 23:56:09 twister kernel: unregister_netdevice: waiting for ppp0 > > > > to become free. Usage count = 1 > > Might be helpful to try with CONFIG_DEBUG_DRIVER=y and > CONFIG_KOBJECT_DEBUG=y to spot where a reference is not dropped. > > > > > > > > > But a "killall -9 pppd" seems to help and then the re-connect (after the > > > > phone got power-cycled) works. > > > > > > Weird, I guess "device_move(dev, NULL) two times" cause the problem. > > > > > > Anyway, device_move should check the old_parent and new_parent , if > > > they equal to each other then just return. > > sysfs_move_dir() does this (to avoid a loop later in the function). > Don't know if that's a good thing to check at a higher level as well, > because the calling code should really know where their devices are. > > Anyway, if this "move in place" exposes a refcounting bug, we must fix > it. Lets see the device_move function, seems there's some problems in it: 1302 int device_move(struct device *dev, struct device *new_parent) 1303 { 1304 int error; 1305 struct device *old_parent; 1306 struct kobject *new_parent_kobj; 1307 1308 dev = get_device(dev); 1309 if (!dev) 1310 return -EINVAL; 1311 1312 new_parent = get_device(new_parent); 1313 new_parent_kobj = get_device_parent (dev, new_parent); Here could get kobject reference 1314 if (IS_ERR(new_parent_kobj)) { 1315 error = PTR_ERR(new_parent_kobj); 1316 put_device(new_parent); 1317 goto out; 1318 } 1319 pr_debug("DEVICE: moving '%s' to '%s'\n", dev->bus_id, 1320 new_parent ? new_parent->bus_id : ""); 1321 error = kobject_move(>kobj, new_parent_kobj); 1322 if (error) { 1323 put_device(new_parent); imagine new_parent is NULL, then the new_parent_kobj should be put 1324 goto out; 1325 } 1326 old_parent = dev->parent; 1327 dev->parent = new_parent; 1328 if (old_parent) 1329 klist_remove(>knode_parent); 1330 if (new_parent) 1331 klist_add_tail(>knode_parent, _parent->klist_children); 1332 if (!dev->class) 1333 goto out_put; Why not put new_parent | new_parent_kobj? 1334 error = device_move_class_links(dev, old_parent, new_parent); 1335 if (error) { 1336 /* We ignore errors on cleanup since we're hosed anyway... */ 1337 device_move_class_links(dev, new_parent, old_parent); 1338 if (!kobject_move(>kobj, _parent->kobj)) { 1339 if (new_parent) 1340 klist_remove(>knode_parent); 1341 if (old_parent) 1342 klist_add_tail(>knode_parent, 1343 _parent->klist_children); 1344 } 1345 put_device(new_parent); Same doubt as above 1346 goto out; 1347 } 1348 out_put: 1349 put_device(old_parent); 1350 out: 1351 put_device(dev); 1352 return error; 1353 } Hope I'm wrong, but if it's indeed bugs, I will send a patch about this. > > > > > > > Am I right? > > > > Could you apply this patch as well to test? Thanks. > > > > diff -upr linux/net/bluetooth/rfcomm/tty.c > > linux.new/net/bluetooth/rfcomm/tty.c > > --- linux/net/bluetooth/rfcomm/tty.c 2008-01-17 16:09:34.0 +0800 > > +++ linux.new/net/bluetooth/rfcomm/tty.c 2008-01-17 16:10:22.0 > > +0800 > > @@ -692,7 +692,8 @@ static void rfcomm_tty_close(struct tty_ > > BT_DBG("tty %p dev %p dlc %p opened %d", tty, dev, dev->dlc, > > dev->opened); > > > > if (--dev->opened == 0) { > > - device_move(dev->tty_dev, NULL); > > + if (dev->tty_dev->parent) > > + device_move(dev->tty_dev, NULL); > > > > /* Close DLC and dettach TTY */ > >
Re: [Bluez-devel] Oops involving RFCOMM and sysfs
On Thu, 17 Jan 2008 16:15:04 +0800, Dave Young <[EMAIL PROTECTED]> wrote: > On Thu, Jan 17, 2008 at 03:24:50PM +0800, Dave Young wrote: > > On Jan 17, 2008 7:06 AM, Gabor Gombas <[EMAIL PROTECTED]> wrote: > > > Hi, > > > > > > On Wed, Jan 16, 2008 at 09:02:05AM +0800, Dave Young wrote: > > > > > > > The rfcomm tty device will possibly retain even when conn is down, > > > > and sysfs doesn't support zombie device moving, so this patch > > > > move the tty device before conn device is destroyed. > > > > > > > > Signed-off-by: Dave Young <[EMAIL PROTECTED]> > > > > > > This seems to work, both the oops and the hang are gone. I get these > > > messages in syslog when the Bluetooth link hangs and I want to kill pppd > > > with "poff": > > > > > > Jan 16 23:55:59 twister kernel: unregister_netdevice: waiting for ppp0 to > > > become free. Usage count = 1 > > > Jan 16 23:56:09 twister kernel: unregister_netdevice: waiting for ppp0 to > > > become free. Usage count = 1 Might be helpful to try with CONFIG_DEBUG_DRIVER=y and CONFIG_KOBJECT_DEBUG=y to spot where a reference is not dropped. > > > > > > But a "killall -9 pppd" seems to help and then the re-connect (after the > > > phone got power-cycled) works. > > > > Weird, I guess "device_move(dev, NULL) two times" cause the problem. > > > > Anyway, device_move should check the old_parent and new_parent , if > > they equal to each other then just return. sysfs_move_dir() does this (to avoid a loop later in the function). Don't know if that's a good thing to check at a higher level as well, because the calling code should really know where their devices are. Anyway, if this "move in place" exposes a refcounting bug, we must fix it. > > > > Am I right? > > Could you apply this patch as well to test? Thanks. > > diff -upr linux/net/bluetooth/rfcomm/tty.c > linux.new/net/bluetooth/rfcomm/tty.c > --- linux/net/bluetooth/rfcomm/tty.c 2008-01-17 16:09:34.0 +0800 > +++ linux.new/net/bluetooth/rfcomm/tty.c 2008-01-17 16:10:22.0 > +0800 > @@ -692,7 +692,8 @@ static void rfcomm_tty_close(struct tty_ > BT_DBG("tty %p dev %p dlc %p opened %d", tty, dev, dev->dlc, > dev->opened); > > if (--dev->opened == 0) { > - device_move(dev->tty_dev, NULL); > + if (dev->tty_dev->parent) > + device_move(dev->tty_dev, NULL); > > /* Close DLC and dettach TTY */ > rfcomm_dlc_close(dev->dlc, 0); > Avoiding to move devices when there is nothing to be moved nevertheless sounds like a good idea :) -- To unsubscribe from this list: send the line "unsubscribe linux-kernel" in the body of a message to [EMAIL PROTECTED] More majordomo info at http://vger.kernel.org/majordomo-info.html Please read the FAQ at http://www.tux.org/lkml/
Re: [Bluez-devel] Oops involving RFCOMM and sysfs
On Thu, Jan 17, 2008 at 03:24:50PM +0800, Dave Young wrote: > On Jan 17, 2008 7:06 AM, Gabor Gombas <[EMAIL PROTECTED]> wrote: > > Hi, > > > > On Wed, Jan 16, 2008 at 09:02:05AM +0800, Dave Young wrote: > > > > > The rfcomm tty device will possibly retain even when conn is down, > > > and sysfs doesn't support zombie device moving, so this patch > > > move the tty device before conn device is destroyed. > > > > > > Signed-off-by: Dave Young <[EMAIL PROTECTED]> > > > > This seems to work, both the oops and the hang are gone. I get these > > messages in syslog when the Bluetooth link hangs and I want to kill pppd > > with "poff": > > > > Jan 16 23:55:59 twister kernel: unregister_netdevice: waiting for ppp0 to > > become free. Usage count = 1 > > Jan 16 23:56:09 twister kernel: unregister_netdevice: waiting for ppp0 to > > become free. Usage count = 1 > > > > But a "killall -9 pppd" seems to help and then the re-connect (after the > > phone got power-cycled) works. > > Weird, I guess "device_move(dev, NULL) two times" cause the problem. > > Anyway, device_move should check the old_parent and new_parent , if > they equal to each other then just return. > > Am I right? Could you apply this patch as well to test? Thanks. diff -upr linux/net/bluetooth/rfcomm/tty.c linux.new/net/bluetooth/rfcomm/tty.c --- linux/net/bluetooth/rfcomm/tty.c2008-01-17 16:09:34.0 +0800 +++ linux.new/net/bluetooth/rfcomm/tty.c2008-01-17 16:10:22.0 +0800 @@ -692,7 +692,8 @@ static void rfcomm_tty_close(struct tty_ BT_DBG("tty %p dev %p dlc %p opened %d", tty, dev, dev->dlc, dev->opened); if (--dev->opened == 0) { - device_move(dev->tty_dev, NULL); + if (dev->tty_dev->parent) + device_move(dev->tty_dev, NULL); /* Close DLC and dettach TTY */ rfcomm_dlc_close(dev->dlc, 0); > > > > > > > Gabor > > > > -- > > - > > MTA SZTAKI Computer and Automation Research Institute > > Hungarian Academy of Sciences > > - > > -- To unsubscribe from this list: send the line "unsubscribe linux-kernel" in the body of a message to [EMAIL PROTECTED] More majordomo info at http://vger.kernel.org/majordomo-info.html Please read the FAQ at http://www.tux.org/lkml/
Re: [Bluez-devel] Oops involving RFCOMM and sysfs
On Thu, 17 Jan 2008 16:15:04 +0800, Dave Young [EMAIL PROTECTED] wrote: On Thu, Jan 17, 2008 at 03:24:50PM +0800, Dave Young wrote: On Jan 17, 2008 7:06 AM, Gabor Gombas [EMAIL PROTECTED] wrote: Hi, On Wed, Jan 16, 2008 at 09:02:05AM +0800, Dave Young wrote: The rfcomm tty device will possibly retain even when conn is down, and sysfs doesn't support zombie device moving, so this patch move the tty device before conn device is destroyed. Signed-off-by: Dave Young [EMAIL PROTECTED] This seems to work, both the oops and the hang are gone. I get these messages in syslog when the Bluetooth link hangs and I want to kill pppd with poff: Jan 16 23:55:59 twister kernel: unregister_netdevice: waiting for ppp0 to become free. Usage count = 1 Jan 16 23:56:09 twister kernel: unregister_netdevice: waiting for ppp0 to become free. Usage count = 1 Might be helpful to try with CONFIG_DEBUG_DRIVER=y and CONFIG_KOBJECT_DEBUG=y to spot where a reference is not dropped. But a killall -9 pppd seems to help and then the re-connect (after the phone got power-cycled) works. Weird, I guess device_move(dev, NULL) two times cause the problem. Anyway, device_move should check the old_parent and new_parent , if they equal to each other then just return. sysfs_move_dir() does this (to avoid a loop later in the function). Don't know if that's a good thing to check at a higher level as well, because the calling code should really know where their devices are. Anyway, if this move in place exposes a refcounting bug, we must fix it. Am I right? Could you apply this patch as well to test? Thanks. diff -upr linux/net/bluetooth/rfcomm/tty.c linux.new/net/bluetooth/rfcomm/tty.c --- linux/net/bluetooth/rfcomm/tty.c 2008-01-17 16:09:34.0 +0800 +++ linux.new/net/bluetooth/rfcomm/tty.c 2008-01-17 16:10:22.0 +0800 @@ -692,7 +692,8 @@ static void rfcomm_tty_close(struct tty_ BT_DBG(tty %p dev %p dlc %p opened %d, tty, dev, dev-dlc, dev-opened); if (--dev-opened == 0) { - device_move(dev-tty_dev, NULL); + if (dev-tty_dev-parent) + device_move(dev-tty_dev, NULL); /* Close DLC and dettach TTY */ rfcomm_dlc_close(dev-dlc, 0); Avoiding to move devices when there is nothing to be moved nevertheless sounds like a good idea :) -- To unsubscribe from this list: send the line unsubscribe linux-kernel in the body of a message to [EMAIL PROTECTED] More majordomo info at http://vger.kernel.org/majordomo-info.html Please read the FAQ at http://www.tux.org/lkml/
Re: [Bluez-devel] Oops involving RFCOMM and sysfs
On Jan 17, 2008 7:42 PM, Cornelia Huck [EMAIL PROTECTED] wrote: On Thu, 17 Jan 2008 16:15:04 +0800, Dave Young [EMAIL PROTECTED] wrote: On Thu, Jan 17, 2008 at 03:24:50PM +0800, Dave Young wrote: On Jan 17, 2008 7:06 AM, Gabor Gombas [EMAIL PROTECTED] wrote: Hi, On Wed, Jan 16, 2008 at 09:02:05AM +0800, Dave Young wrote: The rfcomm tty device will possibly retain even when conn is down, and sysfs doesn't support zombie device moving, so this patch move the tty device before conn device is destroyed. Signed-off-by: Dave Young [EMAIL PROTECTED] This seems to work, both the oops and the hang are gone. I get these messages in syslog when the Bluetooth link hangs and I want to kill pppd with poff: Jan 16 23:55:59 twister kernel: unregister_netdevice: waiting for ppp0 to become free. Usage count = 1 Jan 16 23:56:09 twister kernel: unregister_netdevice: waiting for ppp0 to become free. Usage count = 1 Might be helpful to try with CONFIG_DEBUG_DRIVER=y and CONFIG_KOBJECT_DEBUG=y to spot where a reference is not dropped. But a killall -9 pppd seems to help and then the re-connect (after the phone got power-cycled) works. Weird, I guess device_move(dev, NULL) two times cause the problem. Anyway, device_move should check the old_parent and new_parent , if they equal to each other then just return. sysfs_move_dir() does this (to avoid a loop later in the function). Don't know if that's a good thing to check at a higher level as well, because the calling code should really know where their devices are. Anyway, if this move in place exposes a refcounting bug, we must fix it. Lets see the device_move function, seems there's some problems in it: 1302 int device_move(struct device *dev, struct device *new_parent) 1303 { 1304 int error; 1305 struct device *old_parent; 1306 struct kobject *new_parent_kobj; 1307 1308 dev = get_device(dev); 1309 if (!dev) 1310 return -EINVAL; 1311 1312 new_parent = get_device(new_parent); 1313 new_parent_kobj = get_device_parent (dev, new_parent); Here could get kobject reference 1314 if (IS_ERR(new_parent_kobj)) { 1315 error = PTR_ERR(new_parent_kobj); 1316 put_device(new_parent); 1317 goto out; 1318 } 1319 pr_debug(DEVICE: moving '%s' to '%s'\n, dev-bus_id, 1320 new_parent ? new_parent-bus_id : NULL); 1321 error = kobject_move(dev-kobj, new_parent_kobj); 1322 if (error) { 1323 put_device(new_parent); imagine new_parent is NULL, then the new_parent_kobj should be put 1324 goto out; 1325 } 1326 old_parent = dev-parent; 1327 dev-parent = new_parent; 1328 if (old_parent) 1329 klist_remove(dev-knode_parent); 1330 if (new_parent) 1331 klist_add_tail(dev-knode_parent, new_parent-klist_children); 1332 if (!dev-class) 1333 goto out_put; Why not put new_parent | new_parent_kobj? 1334 error = device_move_class_links(dev, old_parent, new_parent); 1335 if (error) { 1336 /* We ignore errors on cleanup since we're hosed anyway... */ 1337 device_move_class_links(dev, new_parent, old_parent); 1338 if (!kobject_move(dev-kobj, old_parent-kobj)) { 1339 if (new_parent) 1340 klist_remove(dev-knode_parent); 1341 if (old_parent) 1342 klist_add_tail(dev-knode_parent, 1343 old_parent-klist_children); 1344 } 1345 put_device(new_parent); Same doubt as above 1346 goto out; 1347 } 1348 out_put: 1349 put_device(old_parent); 1350 out: 1351 put_device(dev); 1352 return error; 1353 } Hope I'm wrong, but if it's indeed bugs, I will send a patch about this. Am I right? Could you apply this patch as well to test? Thanks. diff -upr linux/net/bluetooth/rfcomm/tty.c linux.new/net/bluetooth/rfcomm/tty.c --- linux/net/bluetooth/rfcomm/tty.c 2008-01-17 16:09:34.0 +0800 +++ linux.new/net/bluetooth/rfcomm/tty.c 2008-01-17 16:10:22.0 +0800 @@ -692,7 +692,8 @@ static void rfcomm_tty_close(struct tty_ BT_DBG(tty %p dev %p dlc %p opened %d, tty, dev, dev-dlc, dev-opened); if (--dev-opened == 0) { - device_move(dev-tty_dev, NULL); + if (dev-tty_dev-parent) + device_move(dev-tty_dev, NULL); /* Close DLC and dettach TTY */ rfcomm_dlc_close(dev-dlc, 0); Avoiding to move devices when there is nothing to be moved nevertheless sounds like a good idea :) -- To unsubscribe from this list: send the line
Re: [Bluez-devel] Oops involving RFCOMM and sysfs
On Jan 17, 2008 7:06 AM, Gabor Gombas <[EMAIL PROTECTED]> wrote: > Hi, > > On Wed, Jan 16, 2008 at 09:02:05AM +0800, Dave Young wrote: > > > The rfcomm tty device will possibly retain even when conn is down, > > and sysfs doesn't support zombie device moving, so this patch > > move the tty device before conn device is destroyed. > > > > Signed-off-by: Dave Young <[EMAIL PROTECTED]> > > This seems to work, both the oops and the hang are gone. I get these > messages in syslog when the Bluetooth link hangs and I want to kill pppd > with "poff": > > Jan 16 23:55:59 twister kernel: unregister_netdevice: waiting for ppp0 to > become free. Usage count = 1 > Jan 16 23:56:09 twister kernel: unregister_netdevice: waiting for ppp0 to > become free. Usage count = 1 > > But a "killall -9 pppd" seems to help and then the re-connect (after the > phone got power-cycled) works. Weird, I guess "device_move(dev, NULL) two times" cause the problem. Anyway, device_move should check the old_parent and new_parent , if they equal to each other then just return. Am I right? > > > Gabor > > -- > - > MTA SZTAKI Computer and Automation Research Institute > Hungarian Academy of Sciences > - > -- To unsubscribe from this list: send the line "unsubscribe linux-kernel" in the body of a message to [EMAIL PROTECTED] More majordomo info at http://vger.kernel.org/majordomo-info.html Please read the FAQ at http://www.tux.org/lkml/
Re: [Bluez-devel] Oops involving RFCOMM and sysfs
Hi, On Wed, Jan 16, 2008 at 09:02:05AM +0800, Dave Young wrote: > The rfcomm tty device will possibly retain even when conn is down, > and sysfs doesn't support zombie device moving, so this patch > move the tty device before conn device is destroyed. > > Signed-off-by: Dave Young <[EMAIL PROTECTED]> This seems to work, both the oops and the hang are gone. I get these messages in syslog when the Bluetooth link hangs and I want to kill pppd with "poff": Jan 16 23:55:59 twister kernel: unregister_netdevice: waiting for ppp0 to become free. Usage count = 1 Jan 16 23:56:09 twister kernel: unregister_netdevice: waiting for ppp0 to become free. Usage count = 1 But a "killall -9 pppd" seems to help and then the re-connect (after the phone got power-cycled) works. Gabor -- - MTA SZTAKI Computer and Automation Research Institute Hungarian Academy of Sciences - -- To unsubscribe from this list: send the line "unsubscribe linux-kernel" in the body of a message to [EMAIL PROTECTED] More majordomo info at http://vger.kernel.org/majordomo-info.html Please read the FAQ at http://www.tux.org/lkml/
Re: [Bluez-devel] Oops involving RFCOMM and sysfs
On Jan 17, 2008 7:06 AM, Gabor Gombas [EMAIL PROTECTED] wrote: Hi, On Wed, Jan 16, 2008 at 09:02:05AM +0800, Dave Young wrote: The rfcomm tty device will possibly retain even when conn is down, and sysfs doesn't support zombie device moving, so this patch move the tty device before conn device is destroyed. Signed-off-by: Dave Young [EMAIL PROTECTED] This seems to work, both the oops and the hang are gone. I get these messages in syslog when the Bluetooth link hangs and I want to kill pppd with poff: Jan 16 23:55:59 twister kernel: unregister_netdevice: waiting for ppp0 to become free. Usage count = 1 Jan 16 23:56:09 twister kernel: unregister_netdevice: waiting for ppp0 to become free. Usage count = 1 But a killall -9 pppd seems to help and then the re-connect (after the phone got power-cycled) works. Weird, I guess device_move(dev, NULL) two times cause the problem. Anyway, device_move should check the old_parent and new_parent , if they equal to each other then just return. Am I right? Gabor -- - MTA SZTAKI Computer and Automation Research Institute Hungarian Academy of Sciences - -- To unsubscribe from this list: send the line unsubscribe linux-kernel in the body of a message to [EMAIL PROTECTED] More majordomo info at http://vger.kernel.org/majordomo-info.html Please read the FAQ at http://www.tux.org/lkml/
Re: [Bluez-devel] Oops involving RFCOMM and sysfs
On Tue, Jan 15, 2008 at 09:57:41AM +0800, Dave Young wrote: > On Mon, Jan 14, 2008 at 01:52:28PM +0100, Cornelia Huck wrote: > > On Mon, 14 Jan 2008 15:05:19 +0800, > > "Dave Young" <[EMAIL PROTECTED]> wrote: > > > > > On Jan 12, 2008 7:09 AM, Gabor Gombas <[EMAIL PROTECTED]> wrote: > > > > On Thu, Jan 10, 2008 at 09:11:17AM +0800, Dave Young wrote: > > > > > > > > > For bluetooth device_move, the only child device of hci_conn dev is > > > > > the rfcomm tty_dev. How about the following patch, please verify : > > > > > > > > There is now no oops, instead the keyboard becomes almost completely > > > > unresponsible when I switch off & on the phone. The mouse still works > > > > (tested both with X and with the VGA console), but terminal input and VT > > > > switching is dead. On the VGA console, even scrolling using Shift+PgUp > > > > stops working. > > > > > > > > Alt+SysRq+w works, trace is below. Ctrl+Alt+Del also works, but then > > > > init complains about hung processes and that it can not umount the file > > > > systems and cannot stop the RAID arrays. Once it still rebooted though, > > > > the second time it got hung after trying to umount the filesystems, and > > > > I had to use Alt+SysRq+b. > > > > > > > > If I can choose then I prefer the Oops... > > > > > > > > Jan 11 23:46:35 twister kernel: SysRq : Emergency Sync > > > > Jan 11 23:46:35 twister kernel: Emergency Sync complete > > > > Jan 11 23:46:42 twister kernel: SysRq : Show Blocked State > > > > Jan 11 23:46:42 twister kernel: taskPC stack > > > > pid father > > > > Jan 11 23:46:42 twister kernel: events/0 D 80487190 0 > > > >5 2 > > > > Jan 11 23:46:42 twister kernel: 8100bf84fd40 0046 > > > > 8100ae7f2600 8100bf84fd00 > > > > Jan 11 23:46:42 twister kernel: 8100bf84a830 8100bd872000 > > > > 80652ae0 00010100 > > > > Jan 11 23:46:42 twister kernel: 7fff > > > > 7fff 0002 > > > > Jan 11 23:46:42 twister kernel: Call Trace: > > > > Jan 11 23:46:42 twister kernel: [] > > > > schedule_timeout+0x1e/0xad > > > > Jan 11 23:46:42 twister kernel: [] dput+0x26/0x103 > > > > Jan 11 23:46:42 twister kernel: [] > > > > sysfs_move_dir+0x1ee/0x1fd > > > > Jan 11 23:46:42 twister kernel: [] > > > > wait_for_common+0xc4/0x129 > > > > Jan 11 23:46:42 twister kernel: [] > > > > default_wake_function+0x0/0xe > > > > Jan 11 23:46:42 twister kernel: [] > > > > klist_del+0x15/0x2e > > > > Jan 11 23:46:42 twister kernel: [] > > > > device_move+0x80/0x111 > > > > Jan 11 23:46:42 twister kernel: [] > > > > :bluetooth:hci_conn_move_child+0x0/0xf > > > > Jan 11 23:46:42 twister kernel: [] > > > > :bluetooth:hci_conn_move_child+0xb/0xf > > > > Jan 11 23:46:42 twister kernel: [] > > > > device_for_each_child+0x22/0x4d > > > > The problem here is that device_for_each_child() will elevate the > > klist_node's refcount while the callback function calls device_move(). > > device_move() calls klist_remove() on the same node, which will do a > > klist_del() and then wait for the refcount to drop to 0... > > My wrong, it is the problem, thanks. > > > > > > > Jan 11 23:46:42 twister kernel: [] > > > > :bluetooth:del_conn+0x0/0x22 > > > > Jan 11 23:46:42 twister kernel: [] > > > > :bluetooth:del_conn+0x19/0x22 > > > > Jan 11 23:46:42 twister kernel: [] > > > > run_workqueue+0x74/0xee > > > > Jan 11 23:46:42 twister kernel: [] > > > > worker_thread+0x0/0xe7 > > > > Jan 11 23:46:42 twister kernel: [] > > > > worker_thread+0xda/0xe7 > > > > Jan 11 23:46:42 twister kernel: [] > > > > autoremove_wake_function+0x0/0x2e > > > > Jan 11 23:46:42 twister kernel: [] > > > > worker_thread+0x0/0xe7 > > > > Jan 11 23:46:42 twister kernel: [] kthread+0x47/0x73 > > > > Jan 11 23:46:42 twister kernel: [] child_rip+0xa/0x12 > > > > Jan 11 23:46:42 twister kernel: [] kthread+0x0/0x73 > > > > Jan 11 23:46:42 twister kernel: [] child_rip+0x0/0x12 > > > > Jan 11 23:46:42 twister kernel: > > > > Jan 11 23:46:42 twister kernel: cat D 7fff 0 > > > > 3978 3965 > > > > Jan 11 23:46:42 twister kernel: 81009e857ce8 0086 > > > > 8100ae7f2900 81009e857ca8 > > > > Jan 11 23:46:42 twister kernel: 8100bd872000 8100bf9e7060 > > > > 80652ae0 000100c0 > > > > Jan 11 23:46:42 twister kernel: 7fff > > > > 7fff 0002 > > > > Jan 11 23:46:42 twister kernel: Call Trace: > > > > Jan 11 23:46:42 twister kernel: [] > > > > schedule_timeout+0x1e/0xad > > > > Jan 11 23:46:42 twister kernel: [] > > > > __dequeue_entity+0x1c/0x32 > > > > Jan 11 23:46:42 twister kernel: [] > > > > set_next_entity+0x23/0x73 > > > > Jan 11 23:46:42 twister kernel: [] > > > > wait_for_common+0xc4/0x129 > > > > Jan 11 23:46:42 twister kernel: [] > > > > default_wake_function+0x0/0xe > > > > Jan 11
Re: [Bluez-devel] Oops involving RFCOMM and sysfs
On Tue, Jan 15, 2008 at 09:57:41AM +0800, Dave Young wrote: On Mon, Jan 14, 2008 at 01:52:28PM +0100, Cornelia Huck wrote: On Mon, 14 Jan 2008 15:05:19 +0800, Dave Young [EMAIL PROTECTED] wrote: On Jan 12, 2008 7:09 AM, Gabor Gombas [EMAIL PROTECTED] wrote: On Thu, Jan 10, 2008 at 09:11:17AM +0800, Dave Young wrote: For bluetooth device_move, the only child device of hci_conn dev is the rfcomm tty_dev. How about the following patch, please verify : There is now no oops, instead the keyboard becomes almost completely unresponsible when I switch off on the phone. The mouse still works (tested both with X and with the VGA console), but terminal input and VT switching is dead. On the VGA console, even scrolling using Shift+PgUp stops working. Alt+SysRq+w works, trace is below. Ctrl+Alt+Del also works, but then init complains about hung processes and that it can not umount the file systems and cannot stop the RAID arrays. Once it still rebooted though, the second time it got hung after trying to umount the filesystems, and I had to use Alt+SysRq+b. If I can choose then I prefer the Oops... Jan 11 23:46:35 twister kernel: SysRq : Emergency Sync Jan 11 23:46:35 twister kernel: Emergency Sync complete Jan 11 23:46:42 twister kernel: SysRq : Show Blocked State Jan 11 23:46:42 twister kernel: taskPC stack pid father Jan 11 23:46:42 twister kernel: events/0 D 80487190 0 5 2 Jan 11 23:46:42 twister kernel: 8100bf84fd40 0046 8100ae7f2600 8100bf84fd00 Jan 11 23:46:42 twister kernel: 8100bf84a830 8100bd872000 80652ae0 00010100 Jan 11 23:46:42 twister kernel: 7fff 7fff 0002 Jan 11 23:46:42 twister kernel: Call Trace: Jan 11 23:46:42 twister kernel: [80473543] schedule_timeout+0x1e/0xad Jan 11 23:46:42 twister kernel: [8028560d] dput+0x26/0x103 Jan 11 23:46:42 twister kernel: [802af3fd] sysfs_move_dir+0x1ee/0x1fd Jan 11 23:46:42 twister kernel: [8047340b] wait_for_common+0xc4/0x129 Jan 11 23:46:42 twister kernel: [80224204] default_wake_function+0x0/0xe Jan 11 23:46:42 twister kernel: [80471d72] klist_del+0x15/0x2e Jan 11 23:46:42 twister kernel: [803698f3] device_move+0x80/0x111 Jan 11 23:46:42 twister kernel: [88146a27] :bluetooth:hci_conn_move_child+0x0/0xf Jan 11 23:46:42 twister kernel: [88146a32] :bluetooth:hci_conn_move_child+0xb/0xf Jan 11 23:46:42 twister kernel: [80369836] device_for_each_child+0x22/0x4d The problem here is that device_for_each_child() will elevate the klist_node's refcount while the callback function calls device_move(). device_move() calls klist_remove() on the same node, which will do a klist_del() and then wait for the refcount to drop to 0... My wrong, it is the problem, thanks. Jan 11 23:46:42 twister kernel: [88146a05] :bluetooth:del_conn+0x0/0x22 Jan 11 23:46:42 twister kernel: [88146a1e] :bluetooth:del_conn+0x19/0x22 Jan 11 23:46:42 twister kernel: [8023696d] run_workqueue+0x74/0xee Jan 11 23:46:42 twister kernel: [80236ff8] worker_thread+0x0/0xe7 Jan 11 23:46:42 twister kernel: [802370d2] worker_thread+0xda/0xe7 Jan 11 23:46:42 twister kernel: [80239b77] autoremove_wake_function+0x0/0x2e Jan 11 23:46:42 twister kernel: [80236ff8] worker_thread+0x0/0xe7 Jan 11 23:46:42 twister kernel: [802399a0] kthread+0x47/0x73 Jan 11 23:46:42 twister kernel: [8020bdc8] child_rip+0xa/0x12 Jan 11 23:46:42 twister kernel: [80239959] kthread+0x0/0x73 Jan 11 23:46:42 twister kernel: [8020bdbe] child_rip+0x0/0x12 Jan 11 23:46:42 twister kernel: Jan 11 23:46:42 twister kernel: cat D 7fff 0 3978 3965 Jan 11 23:46:42 twister kernel: 81009e857ce8 0086 8100ae7f2900 81009e857ca8 Jan 11 23:46:42 twister kernel: 8100bd872000 8100bf9e7060 80652ae0 000100c0 Jan 11 23:46:42 twister kernel: 7fff 7fff 0002 Jan 11 23:46:42 twister kernel: Call Trace: Jan 11 23:46:42 twister kernel: [80473543] schedule_timeout+0x1e/0xad Jan 11 23:46:42 twister kernel: [80223ccc] __dequeue_entity+0x1c/0x32 Jan 11 23:46:42 twister kernel: [80223d05] set_next_entity+0x23/0x73 Jan 11 23:46:42 twister kernel: [8047340b] wait_for_common+0xc4/0x129 Jan 11 23:46:42 twister kernel: [80224204]
Re: [Bluez-devel] Oops involving RFCOMM and sysfs
On Mon, Jan 14, 2008 at 01:52:28PM +0100, Cornelia Huck wrote: > On Mon, 14 Jan 2008 15:05:19 +0800, > "Dave Young" <[EMAIL PROTECTED]> wrote: > > > On Jan 12, 2008 7:09 AM, Gabor Gombas <[EMAIL PROTECTED]> wrote: > > > On Thu, Jan 10, 2008 at 09:11:17AM +0800, Dave Young wrote: > > > > > > > For bluetooth device_move, the only child device of hci_conn dev is > > > > the rfcomm tty_dev. How about the following patch, please verify : > > > > > > There is now no oops, instead the keyboard becomes almost completely > > > unresponsible when I switch off & on the phone. The mouse still works > > > (tested both with X and with the VGA console), but terminal input and VT > > > switching is dead. On the VGA console, even scrolling using Shift+PgUp > > > stops working. > > > > > > Alt+SysRq+w works, trace is below. Ctrl+Alt+Del also works, but then > > > init complains about hung processes and that it can not umount the file > > > systems and cannot stop the RAID arrays. Once it still rebooted though, > > > the second time it got hung after trying to umount the filesystems, and > > > I had to use Alt+SysRq+b. > > > > > > If I can choose then I prefer the Oops... > > > > > > Jan 11 23:46:35 twister kernel: SysRq : Emergency Sync > > > Jan 11 23:46:35 twister kernel: Emergency Sync complete > > > Jan 11 23:46:42 twister kernel: SysRq : Show Blocked State > > > Jan 11 23:46:42 twister kernel: taskPC stack > > > pid father > > > Jan 11 23:46:42 twister kernel: events/0 D 80487190 0 > > > 5 2 > > > Jan 11 23:46:42 twister kernel: 8100bf84fd40 0046 > > > 8100ae7f2600 8100bf84fd00 > > > Jan 11 23:46:42 twister kernel: 8100bf84a830 8100bd872000 > > > 80652ae0 00010100 > > > Jan 11 23:46:42 twister kernel: 7fff > > > 7fff 0002 > > > Jan 11 23:46:42 twister kernel: Call Trace: > > > Jan 11 23:46:42 twister kernel: [] > > > schedule_timeout+0x1e/0xad > > > Jan 11 23:46:42 twister kernel: [] dput+0x26/0x103 > > > Jan 11 23:46:42 twister kernel: [] > > > sysfs_move_dir+0x1ee/0x1fd > > > Jan 11 23:46:42 twister kernel: [] > > > wait_for_common+0xc4/0x129 > > > Jan 11 23:46:42 twister kernel: [] > > > default_wake_function+0x0/0xe > > > Jan 11 23:46:42 twister kernel: [] klist_del+0x15/0x2e > > > Jan 11 23:46:42 twister kernel: [] > > > device_move+0x80/0x111 > > > Jan 11 23:46:42 twister kernel: [] > > > :bluetooth:hci_conn_move_child+0x0/0xf > > > Jan 11 23:46:42 twister kernel: [] > > > :bluetooth:hci_conn_move_child+0xb/0xf > > > Jan 11 23:46:42 twister kernel: [] > > > device_for_each_child+0x22/0x4d > > The problem here is that device_for_each_child() will elevate the > klist_node's refcount while the callback function calls device_move(). > device_move() calls klist_remove() on the same node, which will do a > klist_del() and then wait for the refcount to drop to 0... My wrong, it is the problem, thanks. > > > > Jan 11 23:46:42 twister kernel: [] > > > :bluetooth:del_conn+0x0/0x22 > > > Jan 11 23:46:42 twister kernel: [] > > > :bluetooth:del_conn+0x19/0x22 > > > Jan 11 23:46:42 twister kernel: [] > > > run_workqueue+0x74/0xee > > > Jan 11 23:46:42 twister kernel: [] > > > worker_thread+0x0/0xe7 > > > Jan 11 23:46:42 twister kernel: [] > > > worker_thread+0xda/0xe7 > > > Jan 11 23:46:42 twister kernel: [] > > > autoremove_wake_function+0x0/0x2e > > > Jan 11 23:46:42 twister kernel: [] > > > worker_thread+0x0/0xe7 > > > Jan 11 23:46:42 twister kernel: [] kthread+0x47/0x73 > > > Jan 11 23:46:42 twister kernel: [] child_rip+0xa/0x12 > > > Jan 11 23:46:42 twister kernel: [] kthread+0x0/0x73 > > > Jan 11 23:46:42 twister kernel: [] child_rip+0x0/0x12 > > > Jan 11 23:46:42 twister kernel: > > > Jan 11 23:46:42 twister kernel: cat D 7fff 0 > > > 3978 3965 > > > Jan 11 23:46:42 twister kernel: 81009e857ce8 0086 > > > 8100ae7f2900 81009e857ca8 > > > Jan 11 23:46:42 twister kernel: 8100bd872000 8100bf9e7060 > > > 80652ae0 000100c0 > > > Jan 11 23:46:42 twister kernel: 7fff > > > 7fff 0002 > > > Jan 11 23:46:42 twister kernel: Call Trace: > > > Jan 11 23:46:42 twister kernel: [] > > > schedule_timeout+0x1e/0xad > > > Jan 11 23:46:42 twister kernel: [] > > > __dequeue_entity+0x1c/0x32 > > > Jan 11 23:46:42 twister kernel: [] > > > set_next_entity+0x23/0x73 > > > Jan 11 23:46:42 twister kernel: [] > > > wait_for_common+0xc4/0x129 > > > Jan 11 23:46:42 twister kernel: [] > > > default_wake_function+0x0/0xe > > > Jan 11 23:46:42 twister kernel: [] > > > flush_cpu_workqueue+0x50/0x58 > > > Jan 11 23:46:42 twister kernel: [] > > > wq_barrier_func+0x0/0x9 > > > Jan 11 23:46:42 twister kernel: [] > > > flush_workqueue+0x9/0x12 > > > Jan 11 23:46:42 twister kernel: [] > > >
Re: [Bluez-devel] Oops involving RFCOMM and sysfs
On Mon, 14 Jan 2008 15:05:19 +0800, "Dave Young" <[EMAIL PROTECTED]> wrote: > On Jan 12, 2008 7:09 AM, Gabor Gombas <[EMAIL PROTECTED]> wrote: > > On Thu, Jan 10, 2008 at 09:11:17AM +0800, Dave Young wrote: > > > > > For bluetooth device_move, the only child device of hci_conn dev is > > > the rfcomm tty_dev. How about the following patch, please verify : > > > > There is now no oops, instead the keyboard becomes almost completely > > unresponsible when I switch off & on the phone. The mouse still works > > (tested both with X and with the VGA console), but terminal input and VT > > switching is dead. On the VGA console, even scrolling using Shift+PgUp > > stops working. > > > > Alt+SysRq+w works, trace is below. Ctrl+Alt+Del also works, but then > > init complains about hung processes and that it can not umount the file > > systems and cannot stop the RAID arrays. Once it still rebooted though, > > the second time it got hung after trying to umount the filesystems, and > > I had to use Alt+SysRq+b. > > > > If I can choose then I prefer the Oops... > > > > Jan 11 23:46:35 twister kernel: SysRq : Emergency Sync > > Jan 11 23:46:35 twister kernel: Emergency Sync complete > > Jan 11 23:46:42 twister kernel: SysRq : Show Blocked State > > Jan 11 23:46:42 twister kernel: taskPC stack > > pid father > > Jan 11 23:46:42 twister kernel: events/0 D 80487190 0 > > 5 2 > > Jan 11 23:46:42 twister kernel: 8100bf84fd40 0046 > > 8100ae7f2600 8100bf84fd00 > > Jan 11 23:46:42 twister kernel: 8100bf84a830 8100bd872000 > > 80652ae0 00010100 > > Jan 11 23:46:42 twister kernel: 7fff > > 7fff 0002 > > Jan 11 23:46:42 twister kernel: Call Trace: > > Jan 11 23:46:42 twister kernel: [] > > schedule_timeout+0x1e/0xad > > Jan 11 23:46:42 twister kernel: [] dput+0x26/0x103 > > Jan 11 23:46:42 twister kernel: [] > > sysfs_move_dir+0x1ee/0x1fd > > Jan 11 23:46:42 twister kernel: [] > > wait_for_common+0xc4/0x129 > > Jan 11 23:46:42 twister kernel: [] > > default_wake_function+0x0/0xe > > Jan 11 23:46:42 twister kernel: [] klist_del+0x15/0x2e > > Jan 11 23:46:42 twister kernel: [] device_move+0x80/0x111 > > Jan 11 23:46:42 twister kernel: [] > > :bluetooth:hci_conn_move_child+0x0/0xf > > Jan 11 23:46:42 twister kernel: [] > > :bluetooth:hci_conn_move_child+0xb/0xf > > Jan 11 23:46:42 twister kernel: [] > > device_for_each_child+0x22/0x4d The problem here is that device_for_each_child() will elevate the klist_node's refcount while the callback function calls device_move(). device_move() calls klist_remove() on the same node, which will do a klist_del() and then wait for the refcount to drop to 0... > > Jan 11 23:46:42 twister kernel: [] > > :bluetooth:del_conn+0x0/0x22 > > Jan 11 23:46:42 twister kernel: [] > > :bluetooth:del_conn+0x19/0x22 > > Jan 11 23:46:42 twister kernel: [] > > run_workqueue+0x74/0xee > > Jan 11 23:46:42 twister kernel: [] worker_thread+0x0/0xe7 > > Jan 11 23:46:42 twister kernel: [] > > worker_thread+0xda/0xe7 > > Jan 11 23:46:42 twister kernel: [] > > autoremove_wake_function+0x0/0x2e > > Jan 11 23:46:42 twister kernel: [] worker_thread+0x0/0xe7 > > Jan 11 23:46:42 twister kernel: [] kthread+0x47/0x73 > > Jan 11 23:46:42 twister kernel: [] child_rip+0xa/0x12 > > Jan 11 23:46:42 twister kernel: [] kthread+0x0/0x73 > > Jan 11 23:46:42 twister kernel: [] child_rip+0x0/0x12 > > Jan 11 23:46:42 twister kernel: > > Jan 11 23:46:42 twister kernel: cat D 7fff 0 > > 3978 3965 > > Jan 11 23:46:42 twister kernel: 81009e857ce8 0086 > > 8100ae7f2900 81009e857ca8 > > Jan 11 23:46:42 twister kernel: 8100bd872000 8100bf9e7060 > > 80652ae0 000100c0 > > Jan 11 23:46:42 twister kernel: 7fff > > 7fff 0002 > > Jan 11 23:46:42 twister kernel: Call Trace: > > Jan 11 23:46:42 twister kernel: [] > > schedule_timeout+0x1e/0xad > > Jan 11 23:46:42 twister kernel: [] > > __dequeue_entity+0x1c/0x32 > > Jan 11 23:46:42 twister kernel: [] > > set_next_entity+0x23/0x73 > > Jan 11 23:46:42 twister kernel: [] > > wait_for_common+0xc4/0x129 > > Jan 11 23:46:42 twister kernel: [] > > default_wake_function+0x0/0xe > > Jan 11 23:46:42 twister kernel: [] > > flush_cpu_workqueue+0x50/0x58 > > Jan 11 23:46:42 twister kernel: [] > > wq_barrier_func+0x0/0x9 > > Jan 11 23:46:42 twister kernel: [] > > flush_workqueue+0x9/0x12 > > Jan 11 23:46:42 twister kernel: [] > > release_dev+0x47c/0x5e2 > > Jan 11 23:46:42 twister kernel: [] > > do_page_fault+0x2ff/0x65a > > Jan 11 23:46:42 twister kernel: [] tty_release+0xc/0x10 > > Jan 11 23:46:42 twister kernel: [] __fput+0xb1/0x16f > > Jan 11 23:46:42 twister kernel: [] filp_close+0x5d/0x65 > > Jan 11 23:46:42 twister kernel: [] sys_close+0x73/0xa6 > > Jan 11
Re: [Bluez-devel] Oops involving RFCOMM and sysfs
On Mon, 14 Jan 2008 15:05:19 +0800, Dave Young [EMAIL PROTECTED] wrote: On Jan 12, 2008 7:09 AM, Gabor Gombas [EMAIL PROTECTED] wrote: On Thu, Jan 10, 2008 at 09:11:17AM +0800, Dave Young wrote: For bluetooth device_move, the only child device of hci_conn dev is the rfcomm tty_dev. How about the following patch, please verify : There is now no oops, instead the keyboard becomes almost completely unresponsible when I switch off on the phone. The mouse still works (tested both with X and with the VGA console), but terminal input and VT switching is dead. On the VGA console, even scrolling using Shift+PgUp stops working. Alt+SysRq+w works, trace is below. Ctrl+Alt+Del also works, but then init complains about hung processes and that it can not umount the file systems and cannot stop the RAID arrays. Once it still rebooted though, the second time it got hung after trying to umount the filesystems, and I had to use Alt+SysRq+b. If I can choose then I prefer the Oops... Jan 11 23:46:35 twister kernel: SysRq : Emergency Sync Jan 11 23:46:35 twister kernel: Emergency Sync complete Jan 11 23:46:42 twister kernel: SysRq : Show Blocked State Jan 11 23:46:42 twister kernel: taskPC stack pid father Jan 11 23:46:42 twister kernel: events/0 D 80487190 0 5 2 Jan 11 23:46:42 twister kernel: 8100bf84fd40 0046 8100ae7f2600 8100bf84fd00 Jan 11 23:46:42 twister kernel: 8100bf84a830 8100bd872000 80652ae0 00010100 Jan 11 23:46:42 twister kernel: 7fff 7fff 0002 Jan 11 23:46:42 twister kernel: Call Trace: Jan 11 23:46:42 twister kernel: [80473543] schedule_timeout+0x1e/0xad Jan 11 23:46:42 twister kernel: [8028560d] dput+0x26/0x103 Jan 11 23:46:42 twister kernel: [802af3fd] sysfs_move_dir+0x1ee/0x1fd Jan 11 23:46:42 twister kernel: [8047340b] wait_for_common+0xc4/0x129 Jan 11 23:46:42 twister kernel: [80224204] default_wake_function+0x0/0xe Jan 11 23:46:42 twister kernel: [80471d72] klist_del+0x15/0x2e Jan 11 23:46:42 twister kernel: [803698f3] device_move+0x80/0x111 Jan 11 23:46:42 twister kernel: [88146a27] :bluetooth:hci_conn_move_child+0x0/0xf Jan 11 23:46:42 twister kernel: [88146a32] :bluetooth:hci_conn_move_child+0xb/0xf Jan 11 23:46:42 twister kernel: [80369836] device_for_each_child+0x22/0x4d The problem here is that device_for_each_child() will elevate the klist_node's refcount while the callback function calls device_move(). device_move() calls klist_remove() on the same node, which will do a klist_del() and then wait for the refcount to drop to 0... Jan 11 23:46:42 twister kernel: [88146a05] :bluetooth:del_conn+0x0/0x22 Jan 11 23:46:42 twister kernel: [88146a1e] :bluetooth:del_conn+0x19/0x22 Jan 11 23:46:42 twister kernel: [8023696d] run_workqueue+0x74/0xee Jan 11 23:46:42 twister kernel: [80236ff8] worker_thread+0x0/0xe7 Jan 11 23:46:42 twister kernel: [802370d2] worker_thread+0xda/0xe7 Jan 11 23:46:42 twister kernel: [80239b77] autoremove_wake_function+0x0/0x2e Jan 11 23:46:42 twister kernel: [80236ff8] worker_thread+0x0/0xe7 Jan 11 23:46:42 twister kernel: [802399a0] kthread+0x47/0x73 Jan 11 23:46:42 twister kernel: [8020bdc8] child_rip+0xa/0x12 Jan 11 23:46:42 twister kernel: [80239959] kthread+0x0/0x73 Jan 11 23:46:42 twister kernel: [8020bdbe] child_rip+0x0/0x12 Jan 11 23:46:42 twister kernel: Jan 11 23:46:42 twister kernel: cat D 7fff 0 3978 3965 Jan 11 23:46:42 twister kernel: 81009e857ce8 0086 8100ae7f2900 81009e857ca8 Jan 11 23:46:42 twister kernel: 8100bd872000 8100bf9e7060 80652ae0 000100c0 Jan 11 23:46:42 twister kernel: 7fff 7fff 0002 Jan 11 23:46:42 twister kernel: Call Trace: Jan 11 23:46:42 twister kernel: [80473543] schedule_timeout+0x1e/0xad Jan 11 23:46:42 twister kernel: [80223ccc] __dequeue_entity+0x1c/0x32 Jan 11 23:46:42 twister kernel: [80223d05] set_next_entity+0x23/0x73 Jan 11 23:46:42 twister kernel: [8047340b] wait_for_common+0xc4/0x129 Jan 11 23:46:42 twister kernel: [80224204] default_wake_function+0x0/0xe Jan 11 23:46:42 twister kernel: [80236b02] flush_cpu_workqueue+0x50/0x58 Jan 11 23:46:42 twister kernel: [80236c32] wq_barrier_func+0x0/0x9 Jan 11 23:46:42 twister kernel: [80236ecf] flush_workqueue+0x9/0x12 Jan 11 23:46:42 twister kernel: [803449f9] release_dev+0x47c/0x5e2 Jan 11 23:46:42 twister kernel:
Re: [Bluez-devel] Oops involving RFCOMM and sysfs
On Mon, Jan 14, 2008 at 01:52:28PM +0100, Cornelia Huck wrote: On Mon, 14 Jan 2008 15:05:19 +0800, Dave Young [EMAIL PROTECTED] wrote: On Jan 12, 2008 7:09 AM, Gabor Gombas [EMAIL PROTECTED] wrote: On Thu, Jan 10, 2008 at 09:11:17AM +0800, Dave Young wrote: For bluetooth device_move, the only child device of hci_conn dev is the rfcomm tty_dev. How about the following patch, please verify : There is now no oops, instead the keyboard becomes almost completely unresponsible when I switch off on the phone. The mouse still works (tested both with X and with the VGA console), but terminal input and VT switching is dead. On the VGA console, even scrolling using Shift+PgUp stops working. Alt+SysRq+w works, trace is below. Ctrl+Alt+Del also works, but then init complains about hung processes and that it can not umount the file systems and cannot stop the RAID arrays. Once it still rebooted though, the second time it got hung after trying to umount the filesystems, and I had to use Alt+SysRq+b. If I can choose then I prefer the Oops... Jan 11 23:46:35 twister kernel: SysRq : Emergency Sync Jan 11 23:46:35 twister kernel: Emergency Sync complete Jan 11 23:46:42 twister kernel: SysRq : Show Blocked State Jan 11 23:46:42 twister kernel: taskPC stack pid father Jan 11 23:46:42 twister kernel: events/0 D 80487190 0 5 2 Jan 11 23:46:42 twister kernel: 8100bf84fd40 0046 8100ae7f2600 8100bf84fd00 Jan 11 23:46:42 twister kernel: 8100bf84a830 8100bd872000 80652ae0 00010100 Jan 11 23:46:42 twister kernel: 7fff 7fff 0002 Jan 11 23:46:42 twister kernel: Call Trace: Jan 11 23:46:42 twister kernel: [80473543] schedule_timeout+0x1e/0xad Jan 11 23:46:42 twister kernel: [8028560d] dput+0x26/0x103 Jan 11 23:46:42 twister kernel: [802af3fd] sysfs_move_dir+0x1ee/0x1fd Jan 11 23:46:42 twister kernel: [8047340b] wait_for_common+0xc4/0x129 Jan 11 23:46:42 twister kernel: [80224204] default_wake_function+0x0/0xe Jan 11 23:46:42 twister kernel: [80471d72] klist_del+0x15/0x2e Jan 11 23:46:42 twister kernel: [803698f3] device_move+0x80/0x111 Jan 11 23:46:42 twister kernel: [88146a27] :bluetooth:hci_conn_move_child+0x0/0xf Jan 11 23:46:42 twister kernel: [88146a32] :bluetooth:hci_conn_move_child+0xb/0xf Jan 11 23:46:42 twister kernel: [80369836] device_for_each_child+0x22/0x4d The problem here is that device_for_each_child() will elevate the klist_node's refcount while the callback function calls device_move(). device_move() calls klist_remove() on the same node, which will do a klist_del() and then wait for the refcount to drop to 0... My wrong, it is the problem, thanks. Jan 11 23:46:42 twister kernel: [88146a05] :bluetooth:del_conn+0x0/0x22 Jan 11 23:46:42 twister kernel: [88146a1e] :bluetooth:del_conn+0x19/0x22 Jan 11 23:46:42 twister kernel: [8023696d] run_workqueue+0x74/0xee Jan 11 23:46:42 twister kernel: [80236ff8] worker_thread+0x0/0xe7 Jan 11 23:46:42 twister kernel: [802370d2] worker_thread+0xda/0xe7 Jan 11 23:46:42 twister kernel: [80239b77] autoremove_wake_function+0x0/0x2e Jan 11 23:46:42 twister kernel: [80236ff8] worker_thread+0x0/0xe7 Jan 11 23:46:42 twister kernel: [802399a0] kthread+0x47/0x73 Jan 11 23:46:42 twister kernel: [8020bdc8] child_rip+0xa/0x12 Jan 11 23:46:42 twister kernel: [80239959] kthread+0x0/0x73 Jan 11 23:46:42 twister kernel: [8020bdbe] child_rip+0x0/0x12 Jan 11 23:46:42 twister kernel: Jan 11 23:46:42 twister kernel: cat D 7fff 0 3978 3965 Jan 11 23:46:42 twister kernel: 81009e857ce8 0086 8100ae7f2900 81009e857ca8 Jan 11 23:46:42 twister kernel: 8100bd872000 8100bf9e7060 80652ae0 000100c0 Jan 11 23:46:42 twister kernel: 7fff 7fff 0002 Jan 11 23:46:42 twister kernel: Call Trace: Jan 11 23:46:42 twister kernel: [80473543] schedule_timeout+0x1e/0xad Jan 11 23:46:42 twister kernel: [80223ccc] __dequeue_entity+0x1c/0x32 Jan 11 23:46:42 twister kernel: [80223d05] set_next_entity+0x23/0x73 Jan 11 23:46:42 twister kernel: [8047340b] wait_for_common+0xc4/0x129 Jan 11 23:46:42 twister kernel: [80224204] default_wake_function+0x0/0xe Jan 11 23:46:42 twister kernel: [80236b02] flush_cpu_workqueue+0x50/0x58 Jan 11 23:46:42 twister kernel: [80236c32]
Re: [Bluez-devel] Oops involving RFCOMM and sysfs
On Jan 12, 2008 7:09 AM, Gabor Gombas <[EMAIL PROTECTED]> wrote: > On Thu, Jan 10, 2008 at 09:11:17AM +0800, Dave Young wrote: > > > For bluetooth device_move, the only child device of hci_conn dev is > > the rfcomm tty_dev. How about the following patch, please verify : > > There is now no oops, instead the keyboard becomes almost completely > unresponsible when I switch off & on the phone. The mouse still works > (tested both with X and with the VGA console), but terminal input and VT > switching is dead. On the VGA console, even scrolling using Shift+PgUp > stops working. > > Alt+SysRq+w works, trace is below. Ctrl+Alt+Del also works, but then > init complains about hung processes and that it can not umount the file > systems and cannot stop the RAID arrays. Once it still rebooted though, > the second time it got hung after trying to umount the filesystems, and > I had to use Alt+SysRq+b. > > If I can choose then I prefer the Oops... > > Jan 11 23:46:35 twister kernel: SysRq : Emergency Sync > Jan 11 23:46:35 twister kernel: Emergency Sync complete > Jan 11 23:46:42 twister kernel: SysRq : Show Blocked State > Jan 11 23:46:42 twister kernel: taskPC stack pid > father > Jan 11 23:46:42 twister kernel: events/0 D 80487190 0 5 > 2 > Jan 11 23:46:42 twister kernel: 8100bf84fd40 0046 > 8100ae7f2600 8100bf84fd00 > Jan 11 23:46:42 twister kernel: 8100bf84a830 8100bd872000 > 80652ae0 00010100 > Jan 11 23:46:42 twister kernel: 7fff > 7fff 0002 > Jan 11 23:46:42 twister kernel: Call Trace: > Jan 11 23:46:42 twister kernel: [] > schedule_timeout+0x1e/0xad > Jan 11 23:46:42 twister kernel: [] dput+0x26/0x103 > Jan 11 23:46:42 twister kernel: [] > sysfs_move_dir+0x1ee/0x1fd > Jan 11 23:46:42 twister kernel: [] > wait_for_common+0xc4/0x129 > Jan 11 23:46:42 twister kernel: [] > default_wake_function+0x0/0xe > Jan 11 23:46:42 twister kernel: [] klist_del+0x15/0x2e > Jan 11 23:46:42 twister kernel: [] device_move+0x80/0x111 > Jan 11 23:46:42 twister kernel: [] > :bluetooth:hci_conn_move_child+0x0/0xf > Jan 11 23:46:42 twister kernel: [] > :bluetooth:hci_conn_move_child+0xb/0xf > Jan 11 23:46:42 twister kernel: [] > device_for_each_child+0x22/0x4d > Jan 11 23:46:42 twister kernel: [] > :bluetooth:del_conn+0x0/0x22 > Jan 11 23:46:42 twister kernel: [] > :bluetooth:del_conn+0x19/0x22 > Jan 11 23:46:42 twister kernel: [] run_workqueue+0x74/0xee > Jan 11 23:46:42 twister kernel: [] worker_thread+0x0/0xe7 > Jan 11 23:46:42 twister kernel: [] worker_thread+0xda/0xe7 > Jan 11 23:46:42 twister kernel: [] > autoremove_wake_function+0x0/0x2e > Jan 11 23:46:42 twister kernel: [] worker_thread+0x0/0xe7 > Jan 11 23:46:42 twister kernel: [] kthread+0x47/0x73 > Jan 11 23:46:42 twister kernel: [] child_rip+0xa/0x12 > Jan 11 23:46:42 twister kernel: [] kthread+0x0/0x73 > Jan 11 23:46:42 twister kernel: [] child_rip+0x0/0x12 > Jan 11 23:46:42 twister kernel: > Jan 11 23:46:42 twister kernel: cat D 7fff 0 3978 > 3965 > Jan 11 23:46:42 twister kernel: 81009e857ce8 0086 > 8100ae7f2900 81009e857ca8 > Jan 11 23:46:42 twister kernel: 8100bd872000 8100bf9e7060 > 80652ae0 000100c0 > Jan 11 23:46:42 twister kernel: 7fff > 7fff 0002 > Jan 11 23:46:42 twister kernel: Call Trace: > Jan 11 23:46:42 twister kernel: [] > schedule_timeout+0x1e/0xad > Jan 11 23:46:42 twister kernel: [] > __dequeue_entity+0x1c/0x32 > Jan 11 23:46:42 twister kernel: [] > set_next_entity+0x23/0x73 > Jan 11 23:46:42 twister kernel: [] > wait_for_common+0xc4/0x129 > Jan 11 23:46:42 twister kernel: [] > default_wake_function+0x0/0xe > Jan 11 23:46:42 twister kernel: [] > flush_cpu_workqueue+0x50/0x58 > Jan 11 23:46:42 twister kernel: [] wq_barrier_func+0x0/0x9 > Jan 11 23:46:42 twister kernel: [] flush_workqueue+0x9/0x12 > Jan 11 23:46:42 twister kernel: [] release_dev+0x47c/0x5e2 > Jan 11 23:46:42 twister kernel: [] > do_page_fault+0x2ff/0x65a > Jan 11 23:46:42 twister kernel: [] tty_release+0xc/0x10 > Jan 11 23:46:42 twister kernel: [] __fput+0xb1/0x16f > Jan 11 23:46:42 twister kernel: [] filp_close+0x5d/0x65 > Jan 11 23:46:42 twister kernel: [] sys_close+0x73/0xa6 > Jan 11 23:46:42 twister kernel: [] tracesys+0xdc/0xe1 > Jan 11 23:46:42 twister kernel: > > If I'm not mistaken the "cat" above is the "cat /dev/zero > > /dev/rfcomm0" command. > Any idea about this? Add marcel and davem in cc-list. > > Gabor > > -- > - > MTA SZTAKI Computer and Automation Research Institute > Hungarian Academy of Sciences > - > -- To unsubscribe from this list: send the line "unsubscribe linux-kernel"
Re: [Bluez-devel] Oops involving RFCOMM and sysfs
On Jan 12, 2008 7:09 AM, Gabor Gombas [EMAIL PROTECTED] wrote: On Thu, Jan 10, 2008 at 09:11:17AM +0800, Dave Young wrote: For bluetooth device_move, the only child device of hci_conn dev is the rfcomm tty_dev. How about the following patch, please verify : There is now no oops, instead the keyboard becomes almost completely unresponsible when I switch off on the phone. The mouse still works (tested both with X and with the VGA console), but terminal input and VT switching is dead. On the VGA console, even scrolling using Shift+PgUp stops working. Alt+SysRq+w works, trace is below. Ctrl+Alt+Del also works, but then init complains about hung processes and that it can not umount the file systems and cannot stop the RAID arrays. Once it still rebooted though, the second time it got hung after trying to umount the filesystems, and I had to use Alt+SysRq+b. If I can choose then I prefer the Oops... Jan 11 23:46:35 twister kernel: SysRq : Emergency Sync Jan 11 23:46:35 twister kernel: Emergency Sync complete Jan 11 23:46:42 twister kernel: SysRq : Show Blocked State Jan 11 23:46:42 twister kernel: taskPC stack pid father Jan 11 23:46:42 twister kernel: events/0 D 80487190 0 5 2 Jan 11 23:46:42 twister kernel: 8100bf84fd40 0046 8100ae7f2600 8100bf84fd00 Jan 11 23:46:42 twister kernel: 8100bf84a830 8100bd872000 80652ae0 00010100 Jan 11 23:46:42 twister kernel: 7fff 7fff 0002 Jan 11 23:46:42 twister kernel: Call Trace: Jan 11 23:46:42 twister kernel: [80473543] schedule_timeout+0x1e/0xad Jan 11 23:46:42 twister kernel: [8028560d] dput+0x26/0x103 Jan 11 23:46:42 twister kernel: [802af3fd] sysfs_move_dir+0x1ee/0x1fd Jan 11 23:46:42 twister kernel: [8047340b] wait_for_common+0xc4/0x129 Jan 11 23:46:42 twister kernel: [80224204] default_wake_function+0x0/0xe Jan 11 23:46:42 twister kernel: [80471d72] klist_del+0x15/0x2e Jan 11 23:46:42 twister kernel: [803698f3] device_move+0x80/0x111 Jan 11 23:46:42 twister kernel: [88146a27] :bluetooth:hci_conn_move_child+0x0/0xf Jan 11 23:46:42 twister kernel: [88146a32] :bluetooth:hci_conn_move_child+0xb/0xf Jan 11 23:46:42 twister kernel: [80369836] device_for_each_child+0x22/0x4d Jan 11 23:46:42 twister kernel: [88146a05] :bluetooth:del_conn+0x0/0x22 Jan 11 23:46:42 twister kernel: [88146a1e] :bluetooth:del_conn+0x19/0x22 Jan 11 23:46:42 twister kernel: [8023696d] run_workqueue+0x74/0xee Jan 11 23:46:42 twister kernel: [80236ff8] worker_thread+0x0/0xe7 Jan 11 23:46:42 twister kernel: [802370d2] worker_thread+0xda/0xe7 Jan 11 23:46:42 twister kernel: [80239b77] autoremove_wake_function+0x0/0x2e Jan 11 23:46:42 twister kernel: [80236ff8] worker_thread+0x0/0xe7 Jan 11 23:46:42 twister kernel: [802399a0] kthread+0x47/0x73 Jan 11 23:46:42 twister kernel: [8020bdc8] child_rip+0xa/0x12 Jan 11 23:46:42 twister kernel: [80239959] kthread+0x0/0x73 Jan 11 23:46:42 twister kernel: [8020bdbe] child_rip+0x0/0x12 Jan 11 23:46:42 twister kernel: Jan 11 23:46:42 twister kernel: cat D 7fff 0 3978 3965 Jan 11 23:46:42 twister kernel: 81009e857ce8 0086 8100ae7f2900 81009e857ca8 Jan 11 23:46:42 twister kernel: 8100bd872000 8100bf9e7060 80652ae0 000100c0 Jan 11 23:46:42 twister kernel: 7fff 7fff 0002 Jan 11 23:46:42 twister kernel: Call Trace: Jan 11 23:46:42 twister kernel: [80473543] schedule_timeout+0x1e/0xad Jan 11 23:46:42 twister kernel: [80223ccc] __dequeue_entity+0x1c/0x32 Jan 11 23:46:42 twister kernel: [80223d05] set_next_entity+0x23/0x73 Jan 11 23:46:42 twister kernel: [8047340b] wait_for_common+0xc4/0x129 Jan 11 23:46:42 twister kernel: [80224204] default_wake_function+0x0/0xe Jan 11 23:46:42 twister kernel: [80236b02] flush_cpu_workqueue+0x50/0x58 Jan 11 23:46:42 twister kernel: [80236c32] wq_barrier_func+0x0/0x9 Jan 11 23:46:42 twister kernel: [80236ecf] flush_workqueue+0x9/0x12 Jan 11 23:46:42 twister kernel: [803449f9] release_dev+0x47c/0x5e2 Jan 11 23:46:42 twister kernel: [8021b609] do_page_fault+0x2ff/0x65a Jan 11 23:46:42 twister kernel: [80344b6b] tty_release+0xc/0x10 Jan 11 23:46:42 twister kernel: [80277013] __fput+0xb1/0x16f Jan 11 23:46:42 twister kernel: [80274961] filp_close+0x5d/0x65 Jan 11 23:46:42 twister kernel: [80275ab3] sys_close+0x73/0xa6 Jan 11 23:46:42 twister kernel: [8020b5fc] tracesys+0xdc/0xe1 Jan 11 23:46:42 twister kernel: If I'm
Re: [Bluez-devel] Oops involving RFCOMM and sysfs
On Thu, Jan 10, 2008 at 09:11:17AM +0800, Dave Young wrote: > For bluetooth device_move, the only child device of hci_conn dev is > the rfcomm tty_dev. How about the following patch, please verify : There is now no oops, instead the keyboard becomes almost completely unresponsible when I switch off & on the phone. The mouse still works (tested both with X and with the VGA console), but terminal input and VT switching is dead. On the VGA console, even scrolling using Shift+PgUp stops working. Alt+SysRq+w works, trace is below. Ctrl+Alt+Del also works, but then init complains about hung processes and that it can not umount the file systems and cannot stop the RAID arrays. Once it still rebooted though, the second time it got hung after trying to umount the filesystems, and I had to use Alt+SysRq+b. If I can choose then I prefer the Oops... Jan 11 23:46:35 twister kernel: SysRq : Emergency Sync Jan 11 23:46:35 twister kernel: Emergency Sync complete Jan 11 23:46:42 twister kernel: SysRq : Show Blocked State Jan 11 23:46:42 twister kernel: taskPC stack pid father Jan 11 23:46:42 twister kernel: events/0 D 80487190 0 5 2 Jan 11 23:46:42 twister kernel: 8100bf84fd40 0046 8100ae7f2600 8100bf84fd00 Jan 11 23:46:42 twister kernel: 8100bf84a830 8100bd872000 80652ae0 00010100 Jan 11 23:46:42 twister kernel: 7fff 7fff 0002 Jan 11 23:46:42 twister kernel: Call Trace: Jan 11 23:46:42 twister kernel: [] schedule_timeout+0x1e/0xad Jan 11 23:46:42 twister kernel: [] dput+0x26/0x103 Jan 11 23:46:42 twister kernel: [] sysfs_move_dir+0x1ee/0x1fd Jan 11 23:46:42 twister kernel: [] wait_for_common+0xc4/0x129 Jan 11 23:46:42 twister kernel: [] default_wake_function+0x0/0xe Jan 11 23:46:42 twister kernel: [] klist_del+0x15/0x2e Jan 11 23:46:42 twister kernel: [] device_move+0x80/0x111 Jan 11 23:46:42 twister kernel: [] :bluetooth:hci_conn_move_child+0x0/0xf Jan 11 23:46:42 twister kernel: [] :bluetooth:hci_conn_move_child+0xb/0xf Jan 11 23:46:42 twister kernel: [] device_for_each_child+0x22/0x4d Jan 11 23:46:42 twister kernel: [] :bluetooth:del_conn+0x0/0x22 Jan 11 23:46:42 twister kernel: [] :bluetooth:del_conn+0x19/0x22 Jan 11 23:46:42 twister kernel: [] run_workqueue+0x74/0xee Jan 11 23:46:42 twister kernel: [] worker_thread+0x0/0xe7 Jan 11 23:46:42 twister kernel: [] worker_thread+0xda/0xe7 Jan 11 23:46:42 twister kernel: [] autoremove_wake_function+0x0/0x2e Jan 11 23:46:42 twister kernel: [] worker_thread+0x0/0xe7 Jan 11 23:46:42 twister kernel: [] kthread+0x47/0x73 Jan 11 23:46:42 twister kernel: [] child_rip+0xa/0x12 Jan 11 23:46:42 twister kernel: [] kthread+0x0/0x73 Jan 11 23:46:42 twister kernel: [] child_rip+0x0/0x12 Jan 11 23:46:42 twister kernel: Jan 11 23:46:42 twister kernel: cat D 7fff 0 3978 3965 Jan 11 23:46:42 twister kernel: 81009e857ce8 0086 8100ae7f2900 81009e857ca8 Jan 11 23:46:42 twister kernel: 8100bd872000 8100bf9e7060 80652ae0 000100c0 Jan 11 23:46:42 twister kernel: 7fff 7fff 0002 Jan 11 23:46:42 twister kernel: Call Trace: Jan 11 23:46:42 twister kernel: [] schedule_timeout+0x1e/0xad Jan 11 23:46:42 twister kernel: [] __dequeue_entity+0x1c/0x32 Jan 11 23:46:42 twister kernel: [] set_next_entity+0x23/0x73 Jan 11 23:46:42 twister kernel: [] wait_for_common+0xc4/0x129 Jan 11 23:46:42 twister kernel: [] default_wake_function+0x0/0xe Jan 11 23:46:42 twister kernel: [] flush_cpu_workqueue+0x50/0x58 Jan 11 23:46:42 twister kernel: [] wq_barrier_func+0x0/0x9 Jan 11 23:46:42 twister kernel: [] flush_workqueue+0x9/0x12 Jan 11 23:46:42 twister kernel: [] release_dev+0x47c/0x5e2 Jan 11 23:46:42 twister kernel: [] do_page_fault+0x2ff/0x65a Jan 11 23:46:42 twister kernel: [] tty_release+0xc/0x10 Jan 11 23:46:42 twister kernel: [] __fput+0xb1/0x16f Jan 11 23:46:42 twister kernel: [] filp_close+0x5d/0x65 Jan 11 23:46:42 twister kernel: [] sys_close+0x73/0xa6 Jan 11 23:46:42 twister kernel: [] tracesys+0xdc/0xe1 Jan 11 23:46:42 twister kernel: If I'm not mistaken the "cat" above is the "cat /dev/zero > /dev/rfcomm0" command. Gabor -- - MTA SZTAKI Computer and Automation Research Institute Hungarian Academy of Sciences - -- To unsubscribe from this list: send the line "unsubscribe linux-kernel" in the body of a message to [EMAIL PROTECTED] More majordomo info at http://vger.kernel.org/majordomo-info.html Please read the FAQ at http://www.tux.org/lkml/
Re: [Bluez-devel] Oops involving RFCOMM and sysfs
On Thu, Jan 10, 2008 at 09:11:17AM +0800, Dave Young wrote: For bluetooth device_move, the only child device of hci_conn dev is the rfcomm tty_dev. How about the following patch, please verify : There is now no oops, instead the keyboard becomes almost completely unresponsible when I switch off on the phone. The mouse still works (tested both with X and with the VGA console), but terminal input and VT switching is dead. On the VGA console, even scrolling using Shift+PgUp stops working. Alt+SysRq+w works, trace is below. Ctrl+Alt+Del also works, but then init complains about hung processes and that it can not umount the file systems and cannot stop the RAID arrays. Once it still rebooted though, the second time it got hung after trying to umount the filesystems, and I had to use Alt+SysRq+b. If I can choose then I prefer the Oops... Jan 11 23:46:35 twister kernel: SysRq : Emergency Sync Jan 11 23:46:35 twister kernel: Emergency Sync complete Jan 11 23:46:42 twister kernel: SysRq : Show Blocked State Jan 11 23:46:42 twister kernel: taskPC stack pid father Jan 11 23:46:42 twister kernel: events/0 D 80487190 0 5 2 Jan 11 23:46:42 twister kernel: 8100bf84fd40 0046 8100ae7f2600 8100bf84fd00 Jan 11 23:46:42 twister kernel: 8100bf84a830 8100bd872000 80652ae0 00010100 Jan 11 23:46:42 twister kernel: 7fff 7fff 0002 Jan 11 23:46:42 twister kernel: Call Trace: Jan 11 23:46:42 twister kernel: [80473543] schedule_timeout+0x1e/0xad Jan 11 23:46:42 twister kernel: [8028560d] dput+0x26/0x103 Jan 11 23:46:42 twister kernel: [802af3fd] sysfs_move_dir+0x1ee/0x1fd Jan 11 23:46:42 twister kernel: [8047340b] wait_for_common+0xc4/0x129 Jan 11 23:46:42 twister kernel: [80224204] default_wake_function+0x0/0xe Jan 11 23:46:42 twister kernel: [80471d72] klist_del+0x15/0x2e Jan 11 23:46:42 twister kernel: [803698f3] device_move+0x80/0x111 Jan 11 23:46:42 twister kernel: [88146a27] :bluetooth:hci_conn_move_child+0x0/0xf Jan 11 23:46:42 twister kernel: [88146a32] :bluetooth:hci_conn_move_child+0xb/0xf Jan 11 23:46:42 twister kernel: [80369836] device_for_each_child+0x22/0x4d Jan 11 23:46:42 twister kernel: [88146a05] :bluetooth:del_conn+0x0/0x22 Jan 11 23:46:42 twister kernel: [88146a1e] :bluetooth:del_conn+0x19/0x22 Jan 11 23:46:42 twister kernel: [8023696d] run_workqueue+0x74/0xee Jan 11 23:46:42 twister kernel: [80236ff8] worker_thread+0x0/0xe7 Jan 11 23:46:42 twister kernel: [802370d2] worker_thread+0xda/0xe7 Jan 11 23:46:42 twister kernel: [80239b77] autoremove_wake_function+0x0/0x2e Jan 11 23:46:42 twister kernel: [80236ff8] worker_thread+0x0/0xe7 Jan 11 23:46:42 twister kernel: [802399a0] kthread+0x47/0x73 Jan 11 23:46:42 twister kernel: [8020bdc8] child_rip+0xa/0x12 Jan 11 23:46:42 twister kernel: [80239959] kthread+0x0/0x73 Jan 11 23:46:42 twister kernel: [8020bdbe] child_rip+0x0/0x12 Jan 11 23:46:42 twister kernel: Jan 11 23:46:42 twister kernel: cat D 7fff 0 3978 3965 Jan 11 23:46:42 twister kernel: 81009e857ce8 0086 8100ae7f2900 81009e857ca8 Jan 11 23:46:42 twister kernel: 8100bd872000 8100bf9e7060 80652ae0 000100c0 Jan 11 23:46:42 twister kernel: 7fff 7fff 0002 Jan 11 23:46:42 twister kernel: Call Trace: Jan 11 23:46:42 twister kernel: [80473543] schedule_timeout+0x1e/0xad Jan 11 23:46:42 twister kernel: [80223ccc] __dequeue_entity+0x1c/0x32 Jan 11 23:46:42 twister kernel: [80223d05] set_next_entity+0x23/0x73 Jan 11 23:46:42 twister kernel: [8047340b] wait_for_common+0xc4/0x129 Jan 11 23:46:42 twister kernel: [80224204] default_wake_function+0x0/0xe Jan 11 23:46:42 twister kernel: [80236b02] flush_cpu_workqueue+0x50/0x58 Jan 11 23:46:42 twister kernel: [80236c32] wq_barrier_func+0x0/0x9 Jan 11 23:46:42 twister kernel: [80236ecf] flush_workqueue+0x9/0x12 Jan 11 23:46:42 twister kernel: [803449f9] release_dev+0x47c/0x5e2 Jan 11 23:46:42 twister kernel: [8021b609] do_page_fault+0x2ff/0x65a Jan 11 23:46:42 twister kernel: [80344b6b] tty_release+0xc/0x10 Jan 11 23:46:42 twister kernel: [80277013] __fput+0xb1/0x16f Jan 11 23:46:42 twister kernel: [80274961] filp_close+0x5d/0x65 Jan 11 23:46:42 twister kernel: [80275ab3] sys_close+0x73/0xa6 Jan 11 23:46:42 twister kernel: [8020b5fc] tracesys+0xdc/0xe1 Jan 11 23:46:42 twister kernel: If I'm not mistaken the cat above is the cat /dev/zero /dev/rfcomm0 command. Gabor -- - MTA SZTAKI
Re: [Bluez-devel] Oops involving RFCOMM and sysfs
Hi, On Wed, Jan 09, 2008 at 06:16:02PM +0900, Tejun Heo wrote: > Please confirm the attached patch fixes the oops. I'll separate it into > two patches and forward them to Greg. But bluetooth code also needs to > be updated such that it moves the refcommX device before killing the > connection node. The kernel survives switching the phone off & on but then oopses when pppd is started: Jan 10 10:59:09 twister kernel: Unable to handle kernel NULL pointer dereference at 0008 RIP: Jan 10 10:59:09 twister kernel: [] sysfs_get_dentry+0x27/0x8f Jan 10 10:59:09 twister kernel: PGD 260bb067 PUD 260bc067 PMD 0 Jan 10 10:59:09 twister kernel: Oops: [1] Jan 10 10:59:09 twister kernel: CPU 0 Jan 10 10:59:09 twister kernel: Modules linked in: ppp_generic slhc binfmt_misc rfcomm l2cap nfsd auth_rpcgss exportfs ipt_REJECT xt_tcpudp ipt_LOG xt_limit iptable_filter ip_tables x_tables nfs lockd nfs_acl sunrpc fuse dm_crypt saa7134_alsa radeon hwmon_vid eeprom hci_usb bluetooth usb_storage tuner tea5767 tda8290 tuner_simple mt20xx tea5761 snd_intel8x0 snd_ac97_codec saa7134 firewire_ohci firewire_core sg ac97_bus videobuf_dma_sg videobuf_core ir_kbd_i2c crc_itu_t ir_common snd_pcm parport_pc parport sky2 forcedeth sr_mod snd_timer snd_page_alloc ehci_hcd ohci_hcd floppy cdrom Jan 10 10:59:09 twister kernel: Pid: 4490, comm: pppd Not tainted 2.6.24-rc6debug-dirty #8 Jan 10 10:59:09 twister kernel: RIP: 0010:[] [] sysfs_get_dentry+0x27/0x8f Jan 10 10:59:09 twister kernel: RSP: 0018:8100260dbc88 EFLAGS: 00010207 Jan 10 10:59:09 twister kernel: RAX: RBX: 8100251577d0 RCX: Jan 10 10:59:09 twister kernel: RDX: 8100260e3410 RSI: 80643d70 RDI: 8100bb32d898 Jan 10 10:59:09 twister kernel: RBP: R08: 6390dfaa7e3bd395 R09: 8050cee6 Jan 10 10:59:09 twister kernel: R10: 8100260dbcb8 R11: 00300018 R12: 8100b6ef4b90 Jan 10 10:59:09 twister kernel: R13: fff4 R14: 81004d9d2600 R15: 8100b6eb227e Jan 10 10:59:09 twister kernel: FS: 2b5731903ac0() GS:805ba000() knlGS: Jan 10 10:59:09 twister kernel: CS: 0010 DS: ES: CR0: 8005003b Jan 10 10:59:09 twister kernel: CR2: 0008 CR3: 260b1000 CR4: 06e0 Jan 10 10:59:09 twister kernel: DR0: DR1: DR2: Jan 10 10:59:09 twister kernel: DR3: DR6: 0ff0 DR7: 0400 Jan 10 10:59:09 twister kernel: Process pppd (pid: 4490, threadinfo 8100260da000, task 81009550d060) Jan 10 10:59:09 twister kernel: Stack: 8100260e3690 8100b6ef4b90 81003a5942a0 802af272 Jan 10 10:59:09 twister kernel: 8100becca2d0 8100becca2d0 8100260df300 81003a5942a0 Jan 10 10:59:09 twister kernel: fff4 81004d9d2600 8100b6eb227e 803007a5 Jan 10 10:59:09 twister kernel: Call Trace: Jan 10 10:59:09 twister kernel: [] sysfs_move_dir+0x63/0x1fd Jan 10 10:59:09 twister kernel: [] kobject_move+0xba/0x110 Jan 10 10:59:09 twister kernel: [] device_move+0x59/0x111 Jan 10 10:59:09 twister kernel: [] :rfcomm:rfcomm_tty_open+0x1c1/0x1db Jan 10 10:59:09 twister kernel: [] default_wake_function+0x0/0xe Jan 10 10:59:09 twister kernel: [] tty_open+0x1c5/0x2af Jan 10 10:59:09 twister kernel: [] chrdev_open+0x113/0x132 Jan 10 10:59:09 twister kernel: [] chrdev_open+0x0/0x132 Jan 10 10:59:09 twister kernel: [] __dentry_open+0xda/0x1aa Jan 10 10:59:09 twister kernel: [] do_filp_open+0x2d/0x3d Jan 10 10:59:09 twister kernel: [] mutex_lock+0xd/0x1d Jan 10 10:59:09 twister kernel: [] get_unused_fd_flags+0x6d/0x100 Jan 10 10:59:09 twister kernel: [] do_sys_open+0x46/0xc3 Jan 10 10:59:09 twister kernel: [] tracesys+0xdc/0xe1 Jan 10 10:59:09 twister kernel: Jan 10 10:59:09 twister kernel: Jan 10 10:59:09 twister kernel: Code: 48 8b 45 08 48 39 d0 74 05 48 89 c5 eb f2 48 8b 7b 08 48 81 Jan 10 10:59:09 twister kernel: RIP [] sysfs_get_dentry+0x27/0x8f Jan 10 10:59:09 twister kernel: RSP Jan 10 10:59:09 twister kernel: CR2: 0008 Jan 10 10:59:09 twister kernel: ---[ end trace b33fe66800773e0c ]--- Gabor -- - MTA SZTAKI Computer and Automation Research Institute Hungarian Academy of Sciences - -- To unsubscribe from this list: send the line "unsubscribe linux-kernel" in the body of a message to [EMAIL PROTECTED] More majordomo info at http://vger.kernel.org/majordomo-info.html Please read the FAQ at http://www.tux.org/lkml/
Re: [Bluez-devel] Oops involving RFCOMM and sysfs
Hi, On Wed, Jan 09, 2008 at 06:16:02PM +0900, Tejun Heo wrote: Please confirm the attached patch fixes the oops. I'll separate it into two patches and forward them to Greg. But bluetooth code also needs to be updated such that it moves the refcommX device before killing the connection node. The kernel survives switching the phone off on but then oopses when pppd is started: Jan 10 10:59:09 twister kernel: Unable to handle kernel NULL pointer dereference at 0008 RIP: Jan 10 10:59:09 twister kernel: [802af03f] sysfs_get_dentry+0x27/0x8f Jan 10 10:59:09 twister kernel: PGD 260bb067 PUD 260bc067 PMD 0 Jan 10 10:59:09 twister kernel: Oops: [1] Jan 10 10:59:09 twister kernel: CPU 0 Jan 10 10:59:09 twister kernel: Modules linked in: ppp_generic slhc binfmt_misc rfcomm l2cap nfsd auth_rpcgss exportfs ipt_REJECT xt_tcpudp ipt_LOG xt_limit iptable_filter ip_tables x_tables nfs lockd nfs_acl sunrpc fuse dm_crypt saa7134_alsa radeon hwmon_vid eeprom hci_usb bluetooth usb_storage tuner tea5767 tda8290 tuner_simple mt20xx tea5761 snd_intel8x0 snd_ac97_codec saa7134 firewire_ohci firewire_core sg ac97_bus videobuf_dma_sg videobuf_core ir_kbd_i2c crc_itu_t ir_common snd_pcm parport_pc parport sky2 forcedeth sr_mod snd_timer snd_page_alloc ehci_hcd ohci_hcd floppy cdrom Jan 10 10:59:09 twister kernel: Pid: 4490, comm: pppd Not tainted 2.6.24-rc6debug-dirty #8 Jan 10 10:59:09 twister kernel: RIP: 0010:[802af03f] [802af03f] sysfs_get_dentry+0x27/0x8f Jan 10 10:59:09 twister kernel: RSP: 0018:8100260dbc88 EFLAGS: 00010207 Jan 10 10:59:09 twister kernel: RAX: RBX: 8100251577d0 RCX: Jan 10 10:59:09 twister kernel: RDX: 8100260e3410 RSI: 80643d70 RDI: 8100bb32d898 Jan 10 10:59:09 twister kernel: RBP: R08: 6390dfaa7e3bd395 R09: 8050cee6 Jan 10 10:59:09 twister kernel: R10: 8100260dbcb8 R11: 00300018 R12: 8100b6ef4b90 Jan 10 10:59:09 twister kernel: R13: fff4 R14: 81004d9d2600 R15: 8100b6eb227e Jan 10 10:59:09 twister kernel: FS: 2b5731903ac0() GS:805ba000() knlGS: Jan 10 10:59:09 twister kernel: CS: 0010 DS: ES: CR0: 8005003b Jan 10 10:59:09 twister kernel: CR2: 0008 CR3: 260b1000 CR4: 06e0 Jan 10 10:59:09 twister kernel: DR0: DR1: DR2: Jan 10 10:59:09 twister kernel: DR3: DR6: 0ff0 DR7: 0400 Jan 10 10:59:09 twister kernel: Process pppd (pid: 4490, threadinfo 8100260da000, task 81009550d060) Jan 10 10:59:09 twister kernel: Stack: 8100260e3690 8100b6ef4b90 81003a5942a0 802af272 Jan 10 10:59:09 twister kernel: 8100becca2d0 8100becca2d0 8100260df300 81003a5942a0 Jan 10 10:59:09 twister kernel: fff4 81004d9d2600 8100b6eb227e 803007a5 Jan 10 10:59:09 twister kernel: Call Trace: Jan 10 10:59:09 twister kernel: [802af272] sysfs_move_dir+0x63/0x1fd Jan 10 10:59:09 twister kernel: [803007a5] kobject_move+0xba/0x110 Jan 10 10:59:09 twister kernel: [803698cc] device_move+0x59/0x111 Jan 10 10:59:09 twister kernel: [8827d31e] :rfcomm:rfcomm_tty_open+0x1c1/0x1db Jan 10 10:59:09 twister kernel: [80224204] default_wake_function+0x0/0xe Jan 10 10:59:09 twister kernel: [80345a2d] tty_open+0x1c5/0x2af Jan 10 10:59:09 twister kernel: [80278590] chrdev_open+0x113/0x132 Jan 10 10:59:09 twister kernel: [8027847d] chrdev_open+0x0/0x132 Jan 10 10:59:09 twister kernel: [80274b4a] __dentry_open+0xda/0x1aa Jan 10 10:59:09 twister kernel: [80274cd1] do_filp_open+0x2d/0x3d Jan 10 10:59:09 twister kernel: [80473812] mutex_lock+0xd/0x1d Jan 10 10:59:09 twister kernel: [802749d6] get_unused_fd_flags+0x6d/0x100 Jan 10 10:59:09 twister kernel: [80274d27] do_sys_open+0x46/0xc3 Jan 10 10:59:09 twister kernel: [8020b5fc] tracesys+0xdc/0xe1 Jan 10 10:59:09 twister kernel: Jan 10 10:59:09 twister kernel: Jan 10 10:59:09 twister kernel: Code: 48 8b 45 08 48 39 d0 74 05 48 89 c5 eb f2 48 8b 7b 08 48 81 Jan 10 10:59:09 twister kernel: RIP [802af03f] sysfs_get_dentry+0x27/0x8f Jan 10 10:59:09 twister kernel: RSP 8100260dbc88 Jan 10 10:59:09 twister kernel: CR2: 0008 Jan 10 10:59:09 twister kernel: ---[ end trace b33fe66800773e0c ]--- Gabor -- - MTA SZTAKI Computer and Automation Research Institute Hungarian Academy of Sciences - -- To unsubscribe from this list: send the line unsubscribe linux-kernel in the body of a message to [EMAIL PROTECTED] More majordomo info at http://vger.kernel.org/majordomo-info.html Please read
Re: [Bluez-devel] Oops involving RFCOMM and sysfs
On Wed, Jan 09, 2008 at 06:16:02PM +0900, Tejun Heo wrote: > Hello, > > My laptop and cell finally decided to talk to each other and I could > reproduce the bug here. The attached patch should remove the oops. > > The bug is two folded. I just skimmed through the bluetooth code and am > very likely to wrong at places. Please correct me where I'm wrong. > > 1. It's introduced by class device migration and the bug will go away > with CONFIG_SYSFS_DEPRECATED turned on. With CONFIG_SYSFS_DEPRECATED > turned off, what used to be class devices live under actual devices. > For rfcomm, this means the rfcommN tty device now lives under the > acl node (probably represents an active connection?) instead of the > class directory. > > It seems rfcommN devices are supposed to survive over disconnects so the > rfcommN device moves under the live connection while connection is alive > and retreats back to a default directory when the connection is lost. > This is all fine and dandy as long as the rfcommN device lives under > class directory as class directory never goes away. > > However, with recent sysfs updates, rfcommN now lives directly under the > acl node. If the connection goes away while rfcommN device is under > it, it gets deleted but rfcommN is still treated as alive. > > This isn't supported. sysfs doesn't allow parents to die first and the > dangling children to be salvaged using sysfs_move(). > > 2. Which in turn exposes three bugs in sysfs > - sysfs_lookup() returning NULL on negative lookup. sysfs > code requires that all negative dentries are shot down. > lookup should return -ENOENT instead of NULL. > - in move and rename, error handling is wrong. It ends up > passing ERR_PTR() values to dput(). > - there was an extra dput() in sysfs_move_dir(). > > The attached patch fixes all sysfs bugs and removes the oops. However, > rfcommX moving is still broken. The rfcommX device won't be visible > from sysfs tree after the initial move failure and all following moves > will fail. > > Please confirm the attached patch fixes the oops. I'll separate it into > two patches and forward them to Greg. But bluetooth code also needs to > be updated such that it moves the refcommX device before killing the > connection node. For bluetooth device_move, the only child device of hci_conn dev is the rfcomm tty_dev. How about the following patch, please verify : --- net/bluetooth/hci_sysfs.c |7 +++ 1 file changed, 7 insertions(+) diff -upr linux/net/bluetooth/hci_sysfs.c linux.new/net/bluetooth/hci_sysfs.c --- linux/net/bluetooth/hci_sysfs.c 2008-01-10 09:01:51.0 +0800 +++ linux.new/net/bluetooth/hci_sysfs.c 2008-01-10 09:01:22.0 +0800 @@ -316,9 +316,16 @@ void hci_conn_add_sysfs(struct hci_conn schedule_work(>work); } +static int hci_conn_move_child(struct device *dev, void *data) +{ + device_move(dev, NULL); + return 0; +} + static void del_conn(struct work_struct *work) { struct hci_conn *conn = container_of(work, struct hci_conn, work); + device_for_each_child(>dev, NULL, hci_conn_move_child); device_del(>dev); put_device(>dev); } > > Thanks. > > -- > tejun > diff --git a/drivers/ata/libata-pmp.c b/drivers/ata/libata-pmp.c > diff --git a/fs/dcache.c b/fs/dcache.c > diff --git a/fs/sysfs/dir.c b/fs/sysfs/dir.c > index 3371629..f281cc6 100644 > --- a/fs/sysfs/dir.c > +++ b/fs/sysfs/dir.c > @@ -678,8 +678,10 @@ static struct dentry * sysfs_lookup(struct inode *dir, > struct dentry *dentry, > sd = sysfs_find_dirent(parent_sd, dentry->d_name.name); > > /* no such entry */ > - if (!sd) > + if (!sd) { > + ret = ERR_PTR(-ENOENT); > goto out_unlock; > + } > > /* attach dentry and inode */ > inode = sysfs_get_inode(sd); > @@ -781,6 +783,7 @@ int sysfs_rename_dir(struct kobject * kobj, const char > *new_name) > old_dentry = sysfs_get_dentry(sd); > if (IS_ERR(old_dentry)) { > error = PTR_ERR(old_dentry); > + old_dentry = NULL; > goto out; > } > > @@ -848,6 +851,7 @@ int sysfs_move_dir(struct kobject *kobj, struct kobject > *new_parent_kobj) > old_dentry = sysfs_get_dentry(sd); > if (IS_ERR(old_dentry)) { > error = PTR_ERR(old_dentry); > + old_dentry = NULL; > goto out; > } > old_parent = old_dentry->d_parent; > @@ -855,6 +859,7 @@ int sysfs_move_dir(struct kobject *kobj, struct kobject > *new_parent_kobj) > new_parent = sysfs_get_dentry(new_parent_sd); > if (IS_ERR(new_parent)) { > error = PTR_ERR(new_parent); > + new_parent = NULL; > goto out; > } > > @@ -878,7 +883,6 @@ again: > error = 0; > d_add(new_dentry, NULL); > d_move(old_dentry, new_dentry); > - dput(new_dentry); > > /* Remove from
Re: [Bluez-devel] Oops involving RFCOMM and sysfs
On Wed, 09 Jan 2008 18:16:02 +0900, Tejun Heo <[EMAIL PROTECTED]> wrote: > This isn't supported. sysfs doesn't allow parents to die first and the > dangling children to be salvaged using sysfs_move(). But (with the sysfs bugs fixed) it will return an error, won't it? It seems the bluetooth code is rather optimistic with never checking device_move()'s return code. > > 2. Which in turn exposes three bugs in sysfs > - sysfs_lookup() returning NULL on negative lookup. sysfs > code requires that all negative dentries are shot down. > lookup should return -ENOENT instead of NULL. > - in move and rename, error handling is wrong. It ends up > passing ERR_PTR() values to dput(). > - there was an extra dput() in sysfs_move_dir(). Ups, seem to have copied that from some old version of sysfs_rename_dir()... > > The attached patch fixes all sysfs bugs and removes the oops. Looks OK at first glance. -- To unsubscribe from this list: send the line "unsubscribe linux-kernel" in the body of a message to [EMAIL PROTECTED] More majordomo info at http://vger.kernel.org/majordomo-info.html Please read the FAQ at http://www.tux.org/lkml/
Re: [Bluez-devel] Oops involving RFCOMM and sysfs
Hello, My laptop and cell finally decided to talk to each other and I could reproduce the bug here. The attached patch should remove the oops. The bug is two folded. I just skimmed through the bluetooth code and am very likely to wrong at places. Please correct me where I'm wrong. 1. It's introduced by class device migration and the bug will go away with CONFIG_SYSFS_DEPRECATED turned on. With CONFIG_SYSFS_DEPRECATED turned off, what used to be class devices live under actual devices. For rfcomm, this means the rfcommN tty device now lives under the acl node (probably represents an active connection?) instead of the class directory. It seems rfcommN devices are supposed to survive over disconnects so the rfcommN device moves under the live connection while connection is alive and retreats back to a default directory when the connection is lost. This is all fine and dandy as long as the rfcommN device lives under class directory as class directory never goes away. However, with recent sysfs updates, rfcommN now lives directly under the acl node. If the connection goes away while rfcommN device is under it, it gets deleted but rfcommN is still treated as alive. This isn't supported. sysfs doesn't allow parents to die first and the dangling children to be salvaged using sysfs_move(). 2. Which in turn exposes three bugs in sysfs - sysfs_lookup() returning NULL on negative lookup. sysfs code requires that all negative dentries are shot down. lookup should return -ENOENT instead of NULL. - in move and rename, error handling is wrong. It ends up passing ERR_PTR() values to dput(). - there was an extra dput() in sysfs_move_dir(). The attached patch fixes all sysfs bugs and removes the oops. However, rfcommX moving is still broken. The rfcommX device won't be visible from sysfs tree after the initial move failure and all following moves will fail. Please confirm the attached patch fixes the oops. I'll separate it into two patches and forward them to Greg. But bluetooth code also needs to be updated such that it moves the refcommX device before killing the connection node. Thanks. -- tejun diff --git a/drivers/ata/libata-pmp.c b/drivers/ata/libata-pmp.c diff --git a/fs/dcache.c b/fs/dcache.c diff --git a/fs/sysfs/dir.c b/fs/sysfs/dir.c index 3371629..f281cc6 100644 --- a/fs/sysfs/dir.c +++ b/fs/sysfs/dir.c @@ -678,8 +678,10 @@ static struct dentry * sysfs_lookup(struct inode *dir, struct dentry *dentry, sd = sysfs_find_dirent(parent_sd, dentry->d_name.name); /* no such entry */ - if (!sd) + if (!sd) { + ret = ERR_PTR(-ENOENT); goto out_unlock; + } /* attach dentry and inode */ inode = sysfs_get_inode(sd); @@ -781,6 +783,7 @@ int sysfs_rename_dir(struct kobject * kobj, const char *new_name) old_dentry = sysfs_get_dentry(sd); if (IS_ERR(old_dentry)) { error = PTR_ERR(old_dentry); + old_dentry = NULL; goto out; } @@ -848,6 +851,7 @@ int sysfs_move_dir(struct kobject *kobj, struct kobject *new_parent_kobj) old_dentry = sysfs_get_dentry(sd); if (IS_ERR(old_dentry)) { error = PTR_ERR(old_dentry); + old_dentry = NULL; goto out; } old_parent = old_dentry->d_parent; @@ -855,6 +859,7 @@ int sysfs_move_dir(struct kobject *kobj, struct kobject *new_parent_kobj) new_parent = sysfs_get_dentry(new_parent_sd); if (IS_ERR(new_parent)) { error = PTR_ERR(new_parent); + new_parent = NULL; goto out; } @@ -878,7 +883,6 @@ again: error = 0; d_add(new_dentry, NULL); d_move(old_dentry, new_dentry); - dput(new_dentry); /* Remove from old parent's list and insert into new parent's list. */ sysfs_unlink_sibling(sd); diff --git a/include/linux/dcache.h b/include/linux/dcache.h
Re: [Bluez-devel] Oops involving RFCOMM and sysfs
Hello, My laptop and cell finally decided to talk to each other and I could reproduce the bug here. The attached patch should remove the oops. The bug is two folded. I just skimmed through the bluetooth code and am very likely to wrong at places. Please correct me where I'm wrong. 1. It's introduced by class device migration and the bug will go away with CONFIG_SYSFS_DEPRECATED turned on. With CONFIG_SYSFS_DEPRECATED turned off, what used to be class devices live under actual devices. For rfcomm, this means the rfcommN tty device now lives under the acl node (probably represents an active connection?) instead of the class directory. It seems rfcommN devices are supposed to survive over disconnects so the rfcommN device moves under the live connection while connection is alive and retreats back to a default directory when the connection is lost. This is all fine and dandy as long as the rfcommN device lives under class directory as class directory never goes away. However, with recent sysfs updates, rfcommN now lives directly under the acl node. If the connection goes away while rfcommN device is under it, it gets deleted but rfcommN is still treated as alive. This isn't supported. sysfs doesn't allow parents to die first and the dangling children to be salvaged using sysfs_move(). 2. Which in turn exposes three bugs in sysfs - sysfs_lookup() returning NULL on negative lookup. sysfs code requires that all negative dentries are shot down. lookup should return -ENOENT instead of NULL. - in move and rename, error handling is wrong. It ends up passing ERR_PTR() values to dput(). - there was an extra dput() in sysfs_move_dir(). The attached patch fixes all sysfs bugs and removes the oops. However, rfcommX moving is still broken. The rfcommX device won't be visible from sysfs tree after the initial move failure and all following moves will fail. Please confirm the attached patch fixes the oops. I'll separate it into two patches and forward them to Greg. But bluetooth code also needs to be updated such that it moves the refcommX device before killing the connection node. Thanks. -- tejun diff --git a/drivers/ata/libata-pmp.c b/drivers/ata/libata-pmp.c diff --git a/fs/dcache.c b/fs/dcache.c diff --git a/fs/sysfs/dir.c b/fs/sysfs/dir.c index 3371629..f281cc6 100644 --- a/fs/sysfs/dir.c +++ b/fs/sysfs/dir.c @@ -678,8 +678,10 @@ static struct dentry * sysfs_lookup(struct inode *dir, struct dentry *dentry, sd = sysfs_find_dirent(parent_sd, dentry-d_name.name); /* no such entry */ - if (!sd) + if (!sd) { + ret = ERR_PTR(-ENOENT); goto out_unlock; + } /* attach dentry and inode */ inode = sysfs_get_inode(sd); @@ -781,6 +783,7 @@ int sysfs_rename_dir(struct kobject * kobj, const char *new_name) old_dentry = sysfs_get_dentry(sd); if (IS_ERR(old_dentry)) { error = PTR_ERR(old_dentry); + old_dentry = NULL; goto out; } @@ -848,6 +851,7 @@ int sysfs_move_dir(struct kobject *kobj, struct kobject *new_parent_kobj) old_dentry = sysfs_get_dentry(sd); if (IS_ERR(old_dentry)) { error = PTR_ERR(old_dentry); + old_dentry = NULL; goto out; } old_parent = old_dentry-d_parent; @@ -855,6 +859,7 @@ int sysfs_move_dir(struct kobject *kobj, struct kobject *new_parent_kobj) new_parent = sysfs_get_dentry(new_parent_sd); if (IS_ERR(new_parent)) { error = PTR_ERR(new_parent); + new_parent = NULL; goto out; } @@ -878,7 +883,6 @@ again: error = 0; d_add(new_dentry, NULL); d_move(old_dentry, new_dentry); - dput(new_dentry); /* Remove from old parent's list and insert into new parent's list. */ sysfs_unlink_sibling(sd); diff --git a/include/linux/dcache.h b/include/linux/dcache.h
Re: [Bluez-devel] Oops involving RFCOMM and sysfs
On Wed, 09 Jan 2008 18:16:02 +0900, Tejun Heo [EMAIL PROTECTED] wrote: This isn't supported. sysfs doesn't allow parents to die first and the dangling children to be salvaged using sysfs_move(). But (with the sysfs bugs fixed) it will return an error, won't it? It seems the bluetooth code is rather optimistic with never checking device_move()'s return code. 2. Which in turn exposes three bugs in sysfs - sysfs_lookup() returning NULL on negative lookup. sysfs code requires that all negative dentries are shot down. lookup should return -ENOENT instead of NULL. - in move and rename, error handling is wrong. It ends up passing ERR_PTR() values to dput(). - there was an extra dput() in sysfs_move_dir(). Ups, seem to have copied that from some old version of sysfs_rename_dir()... The attached patch fixes all sysfs bugs and removes the oops. Looks OK at first glance. -- To unsubscribe from this list: send the line unsubscribe linux-kernel in the body of a message to [EMAIL PROTECTED] More majordomo info at http://vger.kernel.org/majordomo-info.html Please read the FAQ at http://www.tux.org/lkml/
Re: [Bluez-devel] Oops involving RFCOMM and sysfs
On Wed, Jan 09, 2008 at 06:16:02PM +0900, Tejun Heo wrote: Hello, My laptop and cell finally decided to talk to each other and I could reproduce the bug here. The attached patch should remove the oops. The bug is two folded. I just skimmed through the bluetooth code and am very likely to wrong at places. Please correct me where I'm wrong. 1. It's introduced by class device migration and the bug will go away with CONFIG_SYSFS_DEPRECATED turned on. With CONFIG_SYSFS_DEPRECATED turned off, what used to be class devices live under actual devices. For rfcomm, this means the rfcommN tty device now lives under the acl node (probably represents an active connection?) instead of the class directory. It seems rfcommN devices are supposed to survive over disconnects so the rfcommN device moves under the live connection while connection is alive and retreats back to a default directory when the connection is lost. This is all fine and dandy as long as the rfcommN device lives under class directory as class directory never goes away. However, with recent sysfs updates, rfcommN now lives directly under the acl node. If the connection goes away while rfcommN device is under it, it gets deleted but rfcommN is still treated as alive. This isn't supported. sysfs doesn't allow parents to die first and the dangling children to be salvaged using sysfs_move(). 2. Which in turn exposes three bugs in sysfs - sysfs_lookup() returning NULL on negative lookup. sysfs code requires that all negative dentries are shot down. lookup should return -ENOENT instead of NULL. - in move and rename, error handling is wrong. It ends up passing ERR_PTR() values to dput(). - there was an extra dput() in sysfs_move_dir(). The attached patch fixes all sysfs bugs and removes the oops. However, rfcommX moving is still broken. The rfcommX device won't be visible from sysfs tree after the initial move failure and all following moves will fail. Please confirm the attached patch fixes the oops. I'll separate it into two patches and forward them to Greg. But bluetooth code also needs to be updated such that it moves the refcommX device before killing the connection node. For bluetooth device_move, the only child device of hci_conn dev is the rfcomm tty_dev. How about the following patch, please verify : --- net/bluetooth/hci_sysfs.c |7 +++ 1 file changed, 7 insertions(+) diff -upr linux/net/bluetooth/hci_sysfs.c linux.new/net/bluetooth/hci_sysfs.c --- linux/net/bluetooth/hci_sysfs.c 2008-01-10 09:01:51.0 +0800 +++ linux.new/net/bluetooth/hci_sysfs.c 2008-01-10 09:01:22.0 +0800 @@ -316,9 +316,16 @@ void hci_conn_add_sysfs(struct hci_conn schedule_work(conn-work); } +static int hci_conn_move_child(struct device *dev, void *data) +{ + device_move(dev, NULL); + return 0; +} + static void del_conn(struct work_struct *work) { struct hci_conn *conn = container_of(work, struct hci_conn, work); + device_for_each_child(conn-dev, NULL, hci_conn_move_child); device_del(conn-dev); put_device(conn-dev); } Thanks. -- tejun diff --git a/drivers/ata/libata-pmp.c b/drivers/ata/libata-pmp.c diff --git a/fs/dcache.c b/fs/dcache.c diff --git a/fs/sysfs/dir.c b/fs/sysfs/dir.c index 3371629..f281cc6 100644 --- a/fs/sysfs/dir.c +++ b/fs/sysfs/dir.c @@ -678,8 +678,10 @@ static struct dentry * sysfs_lookup(struct inode *dir, struct dentry *dentry, sd = sysfs_find_dirent(parent_sd, dentry-d_name.name); /* no such entry */ - if (!sd) + if (!sd) { + ret = ERR_PTR(-ENOENT); goto out_unlock; + } /* attach dentry and inode */ inode = sysfs_get_inode(sd); @@ -781,6 +783,7 @@ int sysfs_rename_dir(struct kobject * kobj, const char *new_name) old_dentry = sysfs_get_dentry(sd); if (IS_ERR(old_dentry)) { error = PTR_ERR(old_dentry); + old_dentry = NULL; goto out; } @@ -848,6 +851,7 @@ int sysfs_move_dir(struct kobject *kobj, struct kobject *new_parent_kobj) old_dentry = sysfs_get_dentry(sd); if (IS_ERR(old_dentry)) { error = PTR_ERR(old_dentry); + old_dentry = NULL; goto out; } old_parent = old_dentry-d_parent; @@ -855,6 +859,7 @@ int sysfs_move_dir(struct kobject *kobj, struct kobject *new_parent_kobj) new_parent = sysfs_get_dentry(new_parent_sd); if (IS_ERR(new_parent)) { error = PTR_ERR(new_parent); + new_parent = NULL; goto out; } @@ -878,7 +883,6 @@ again: error = 0; d_add(new_dentry, NULL); d_move(old_dentry, new_dentry); - dput(new_dentry); /* Remove from old parent's list and insert into new parent's list. */ sysfs_unlink_sibling(sd);
Re: [Bluez-devel] Oops involving RFCOMM and sysfs
On Tue, Jan 08, 2008 at 06:42:43PM +0900, Tejun Heo wrote: > Thanks. Please apply the attached patch and report the oops. Jan 8 14:23:29 twister kernel: XXX: moving /devices/virtual/tty/rfcomm0 under /devices/pci:00/:00:02.0/usb1/1-3/1-3:1.0/hci0/acl001BAFE1624D/tty Jan 8 14:23:29 twister kernel: XXX: looking up / (8100bf40) / devices Jan 8 14:23:29 twister kernel: XXX: looking up devices (8100bf658cc0) / virtual Jan 8 14:23:29 twister kernel: XXX: looking up virtual (8100bf417220) / tty Jan 8 14:23:29 twister kernel: XXX: looking up tty (8100bb31c440) / rfcomm0 Jan 8 14:23:29 twister kernel: XXX: looking up / (8100bf40) / devices Jan 8 14:23:29 twister kernel: XXX: looking up devices (8100bf658cc0) / pci:00 Jan 8 14:23:29 twister kernel: XXX: looking up pci:00 (8100bf658880) / :00:02.0 Jan 8 14:23:29 twister kernel: XXX: looking up :00:02.0 (8100bb2abcc0) / usb1 Jan 8 14:23:29 twister kernel: XXX: looking up usb1 (8100bb306000) / 1-3 Jan 8 14:23:29 twister kernel: XXX: looking up 1-3 (8100bb309220) / 1-3:1.0 Jan 8 14:23:29 twister kernel: XXX: looking up 1-3:1.0 (8100bb309880) / hci0 Jan 8 14:23:29 twister kernel: XXX: looking up hci0 (8100bb318aa0) / acl001BAFE1624D Jan 8 14:23:29 twister kernel: XXX: looking up acl001BAFE1624D (8100b555a000) / tty Jan 8 14:23:52 twister kernel: XXX: moving /devices/pci:00/:00:02.0/usb1/1-3/1-3:1.0/hci0/acl001BAFE1624D/tty/rfcomm0 under /devices/virtual/tty Jan 8 14:23:52 twister kernel: XXX: looking up / (8100bf40) / devices Jan 8 14:23:52 twister kernel: XXX: looking up devices (8100bf658cc0) / pci:00 Jan 8 14:23:52 twister kernel: XXX: looking up pci:00 (8100bf658880) / :00:02.0 Jan 8 14:23:52 twister kernel: XXX: looking up :00:02.0 (8100bb2abcc0) / usb1 Jan 8 14:23:52 twister kernel: XXX: looking up usb1 (8100bb306000) / 1-3 Jan 8 14:23:52 twister kernel: XXX: looking up 1-3 (8100bb309220) / 1-3:1.0 Jan 8 14:23:52 twister kernel: XXX: looking up 1-3:1.0 (8100bb309880) / hci0 Jan 8 14:23:52 twister kernel: XXX: looking up hci0 (8100bb318aa0) / acl001BAFE1624D Jan 8 14:23:52 twister kernel: XXX: looking up acl001BAFE1624D () / Jan 8 14:23:52 twister kernel: Unable to handle kernel NULL pointer dereference at 00b8 RIP: Jan 8 14:23:52 twister kernel: [] mutex_lock+0x10/0x1d Jan 8 14:23:52 twister kernel: PGD b19b6067 PUD b1969067 PMD 0 Jan 8 14:23:52 twister kernel: Oops: 0002 [1] Jan 8 14:23:52 twister kernel: CPU 0 Jan 8 14:23:52 twister kernel: Modules linked in: binfmt_misc rfcomm l2cap nfsd auth_rpcgss exportfs ipt_REJECT xt_tcpudp ipt_LOG xt_limit iptable_filter ip_tables x_tables nfs lockd nfs_acl sunrpc fuse dm_crypt saa7134_alsa radeon hwmon_vid eeprom hci_usb bluetooth usb_storage tuner tea5767 tda8290 tuner_simple mt20xx tea5761 snd_intel8x0 sg snd_ac97_codec ac97_bus saa7134 firewire_ohci firewire_core crc_itu_t videobuf_dma_sg videobuf_core ir_kbd_i2c snd_pcm ir_common sky2 forcedeth ehci_hcd snd_timer snd_page_alloc ohci_hcd sr_mod parport_pc parport cdrom floppy Jan 8 14:23:52 twister kernel: Pid: 3228, comm: cat Not tainted 2.6.24-rc6debug-dirty #7 Jan 8 14:23:52 twister kernel: RIP: 0010:[] [] mutex_lock+0x10/0x1d Jan 8 14:23:52 twister kernel: RSP: 0018:8100b19a5d08 EFLAGS: 00010246 Jan 8 14:23:52 twister kernel: RAX: RBX: 00b8 RCX: 00010352 Jan 8 14:23:52 twister kernel: RDX: 8100b19a5fd8 RSI: 00010352 RDI: 00b8 Jan 8 14:23:52 twister kernel: RBP: 80593db0 R08: R09: 0097 Jan 8 14:23:52 twister kernel: R10: 000e R11: 803103d4 R12: 8100ba312b90 Jan 8 14:23:52 twister kernel: R13: 8100bfa78d00 R14: 8100b1980b00 R15: 8100b6c6b840 Jan 8 14:23:52 twister kernel: FS: 2b3bd213d6e0() GS:805ba000() knlGS: Jan 8 14:23:52 twister kernel: CS: 0010 DS: ES: CR0: 8005003b Jan 8 14:23:52 twister kernel: CR2: 00b8 CR3: b19ae000 CR4: 06e0 Jan 8 14:23:52 twister kernel: DR0: DR1: DR2: Jan 8 14:23:52 twister kernel: DR3: DR6: 0ff0 DR7: 0400 Jan 8 14:23:52 twister kernel: Process cat (pid: 3228, threadinfo 8100b19a4000, task 8100b18d9060) Jan 8 14:23:52 twister kernel: Stack: 8100bfa78d00 8028560d 8100b554ca28 802ae9d5 Jan 8 14:23:52 twister kernel: 8100bf9f4fa0 8100ba312b90 8100b5326c60 802af2de Jan 8 14:23:52 twister kernel: 8100b6ca4cd0 8100b6ca4cd0 8100bfa78d00 8100b5326ba0 Jan 8 14:23:52 twister kernel: Call Trace: Jan 8 14:23:52 twister kernel: [] dput+0x26/0x103 Jan 8 14:23:52 twister kernel: []
Re: [Bluez-devel] Oops involving RFCOMM and sysfs
Gabor Gombas wrote: > On Tue, Jan 08, 2008 at 12:24:05AM +0900, Tejun Heo wrote: > >> Does the attached patch fix the problem? > > No, it still oopses. Thanks. Please apply the attached patch and report the oops. -- tejun diff --git a/drivers/ata/libata-eh.c b/drivers/ata/libata-eh.c diff --git a/fs/dcache.c b/fs/dcache.c diff --git a/fs/sysfs/dir.c b/fs/sysfs/dir.c index 3371629..f5ec27e 100644 --- a/fs/sysfs/dir.c +++ b/fs/sysfs/dir.c @@ -96,7 +96,7 @@ static void sysfs_unlink_sibling(struct sysfs_dirent *sd) * RETURNS: * Pointer to found dentry on success, ERR_PTR() value on error. */ -struct dentry *sysfs_get_dentry(struct sysfs_dirent *sd) +static struct dentry *__sysfs_get_dentry(struct sysfs_dirent *sd, int dbg) { struct dentry *dentry = dget(sysfs_sb->s_root); @@ -111,6 +111,8 @@ struct dentry *sysfs_get_dentry(struct sysfs_dirent *sd) /* look it up */ parent = dentry; + printk("XXX: looking up %s (%p) / %s\n", + parent->d_name.name, parent->d_inode, cur->s_name); mutex_lock(>d_inode->i_mutex); dentry = lookup_one_noperm(cur->s_name, parent); mutex_unlock(>d_inode->i_mutex); @@ -122,6 +124,11 @@ struct dentry *sysfs_get_dentry(struct sysfs_dirent *sd) return dentry; } +struct dentry *sysfs_get_dentry(struct sysfs_dirent *sd) +{ + return __sysfs_get_dentry(sd, 0); +} + /** * sysfs_get_active - get an active reference to sysfs_dirent * @sd: sysfs_dirent to get an active reference to @@ -838,6 +845,14 @@ int sysfs_move_dir(struct kobject *kobj, struct kobject *new_parent_kobj) mutex_lock(_rename_mutex); BUG_ON(!sd->s_parent); + { + char *s = kobject_get_path(kobj, GFP_KERNEL); + char *p = kobject_get_path(new_parent_kobj, GFP_KERNEL); + + printk("XXX: moving %s under %s\n", s, p); + kfree(s); + kfree(p); + } new_parent_sd = new_parent_kobj->sd ? new_parent_kobj->sd : _root; error = 0; @@ -845,14 +860,14 @@ int sysfs_move_dir(struct kobject *kobj, struct kobject *new_parent_kobj) goto out; /* nothing to move */ /* get dentries */ - old_dentry = sysfs_get_dentry(sd); + old_dentry = __sysfs_get_dentry(sd, 1); if (IS_ERR(old_dentry)) { error = PTR_ERR(old_dentry); goto out; } old_parent = old_dentry->d_parent; - new_parent = sysfs_get_dentry(new_parent_sd); + new_parent = __sysfs_get_dentry(new_parent_sd, 1); if (IS_ERR(new_parent)) { error = PTR_ERR(new_parent); goto out; @@ -878,7 +893,6 @@ again: error = 0; d_add(new_dentry, NULL); d_move(old_dentry, new_dentry); - dput(new_dentry); /* Remove from old parent's list and insert into new parent's list. */ sysfs_unlink_sibling(sd);
Re: [Bluez-devel] Oops involving RFCOMM and sysfs
Gabor Gombas wrote: On Tue, Jan 08, 2008 at 12:24:05AM +0900, Tejun Heo wrote: Does the attached patch fix the problem? No, it still oopses. Thanks. Please apply the attached patch and report the oops. -- tejun diff --git a/drivers/ata/libata-eh.c b/drivers/ata/libata-eh.c diff --git a/fs/dcache.c b/fs/dcache.c diff --git a/fs/sysfs/dir.c b/fs/sysfs/dir.c index 3371629..f5ec27e 100644 --- a/fs/sysfs/dir.c +++ b/fs/sysfs/dir.c @@ -96,7 +96,7 @@ static void sysfs_unlink_sibling(struct sysfs_dirent *sd) * RETURNS: * Pointer to found dentry on success, ERR_PTR() value on error. */ -struct dentry *sysfs_get_dentry(struct sysfs_dirent *sd) +static struct dentry *__sysfs_get_dentry(struct sysfs_dirent *sd, int dbg) { struct dentry *dentry = dget(sysfs_sb-s_root); @@ -111,6 +111,8 @@ struct dentry *sysfs_get_dentry(struct sysfs_dirent *sd) /* look it up */ parent = dentry; + printk(XXX: looking up %s (%p) / %s\n, + parent-d_name.name, parent-d_inode, cur-s_name); mutex_lock(parent-d_inode-i_mutex); dentry = lookup_one_noperm(cur-s_name, parent); mutex_unlock(parent-d_inode-i_mutex); @@ -122,6 +124,11 @@ struct dentry *sysfs_get_dentry(struct sysfs_dirent *sd) return dentry; } +struct dentry *sysfs_get_dentry(struct sysfs_dirent *sd) +{ + return __sysfs_get_dentry(sd, 0); +} + /** * sysfs_get_active - get an active reference to sysfs_dirent * @sd: sysfs_dirent to get an active reference to @@ -838,6 +845,14 @@ int sysfs_move_dir(struct kobject *kobj, struct kobject *new_parent_kobj) mutex_lock(sysfs_rename_mutex); BUG_ON(!sd-s_parent); + { + char *s = kobject_get_path(kobj, GFP_KERNEL); + char *p = kobject_get_path(new_parent_kobj, GFP_KERNEL); + + printk(XXX: moving %s under %s\n, s, p); + kfree(s); + kfree(p); + } new_parent_sd = new_parent_kobj-sd ? new_parent_kobj-sd : sysfs_root; error = 0; @@ -845,14 +860,14 @@ int sysfs_move_dir(struct kobject *kobj, struct kobject *new_parent_kobj) goto out; /* nothing to move */ /* get dentries */ - old_dentry = sysfs_get_dentry(sd); + old_dentry = __sysfs_get_dentry(sd, 1); if (IS_ERR(old_dentry)) { error = PTR_ERR(old_dentry); goto out; } old_parent = old_dentry-d_parent; - new_parent = sysfs_get_dentry(new_parent_sd); + new_parent = __sysfs_get_dentry(new_parent_sd, 1); if (IS_ERR(new_parent)) { error = PTR_ERR(new_parent); goto out; @@ -878,7 +893,6 @@ again: error = 0; d_add(new_dentry, NULL); d_move(old_dentry, new_dentry); - dput(new_dentry); /* Remove from old parent's list and insert into new parent's list. */ sysfs_unlink_sibling(sd);
Re: [Bluez-devel] Oops involving RFCOMM and sysfs
On Tue, Jan 08, 2008 at 06:42:43PM +0900, Tejun Heo wrote: Thanks. Please apply the attached patch and report the oops. Jan 8 14:23:29 twister kernel: XXX: moving /devices/virtual/tty/rfcomm0 under /devices/pci:00/:00:02.0/usb1/1-3/1-3:1.0/hci0/acl001BAFE1624D/tty Jan 8 14:23:29 twister kernel: XXX: looking up / (8100bf40) / devices Jan 8 14:23:29 twister kernel: XXX: looking up devices (8100bf658cc0) / virtual Jan 8 14:23:29 twister kernel: XXX: looking up virtual (8100bf417220) / tty Jan 8 14:23:29 twister kernel: XXX: looking up tty (8100bb31c440) / rfcomm0 Jan 8 14:23:29 twister kernel: XXX: looking up / (8100bf40) / devices Jan 8 14:23:29 twister kernel: XXX: looking up devices (8100bf658cc0) / pci:00 Jan 8 14:23:29 twister kernel: XXX: looking up pci:00 (8100bf658880) / :00:02.0 Jan 8 14:23:29 twister kernel: XXX: looking up :00:02.0 (8100bb2abcc0) / usb1 Jan 8 14:23:29 twister kernel: XXX: looking up usb1 (8100bb306000) / 1-3 Jan 8 14:23:29 twister kernel: XXX: looking up 1-3 (8100bb309220) / 1-3:1.0 Jan 8 14:23:29 twister kernel: XXX: looking up 1-3:1.0 (8100bb309880) / hci0 Jan 8 14:23:29 twister kernel: XXX: looking up hci0 (8100bb318aa0) / acl001BAFE1624D Jan 8 14:23:29 twister kernel: XXX: looking up acl001BAFE1624D (8100b555a000) / tty Jan 8 14:23:52 twister kernel: XXX: moving /devices/pci:00/:00:02.0/usb1/1-3/1-3:1.0/hci0/acl001BAFE1624D/tty/rfcomm0 under /devices/virtual/tty Jan 8 14:23:52 twister kernel: XXX: looking up / (8100bf40) / devices Jan 8 14:23:52 twister kernel: XXX: looking up devices (8100bf658cc0) / pci:00 Jan 8 14:23:52 twister kernel: XXX: looking up pci:00 (8100bf658880) / :00:02.0 Jan 8 14:23:52 twister kernel: XXX: looking up :00:02.0 (8100bb2abcc0) / usb1 Jan 8 14:23:52 twister kernel: XXX: looking up usb1 (8100bb306000) / 1-3 Jan 8 14:23:52 twister kernel: XXX: looking up 1-3 (8100bb309220) / 1-3:1.0 Jan 8 14:23:52 twister kernel: XXX: looking up 1-3:1.0 (8100bb309880) / hci0 Jan 8 14:23:52 twister kernel: XXX: looking up hci0 (8100bb318aa0) / acl001BAFE1624D Jan 8 14:23:52 twister kernel: XXX: looking up acl001BAFE1624D () / Jan 8 14:23:52 twister kernel: Unable to handle kernel NULL pointer dereference at 00b8 RIP: Jan 8 14:23:52 twister kernel: [80473875] mutex_lock+0x10/0x1d Jan 8 14:23:52 twister kernel: PGD b19b6067 PUD b1969067 PMD 0 Jan 8 14:23:52 twister kernel: Oops: 0002 [1] Jan 8 14:23:52 twister kernel: CPU 0 Jan 8 14:23:52 twister kernel: Modules linked in: binfmt_misc rfcomm l2cap nfsd auth_rpcgss exportfs ipt_REJECT xt_tcpudp ipt_LOG xt_limit iptable_filter ip_tables x_tables nfs lockd nfs_acl sunrpc fuse dm_crypt saa7134_alsa radeon hwmon_vid eeprom hci_usb bluetooth usb_storage tuner tea5767 tda8290 tuner_simple mt20xx tea5761 snd_intel8x0 sg snd_ac97_codec ac97_bus saa7134 firewire_ohci firewire_core crc_itu_t videobuf_dma_sg videobuf_core ir_kbd_i2c snd_pcm ir_common sky2 forcedeth ehci_hcd snd_timer snd_page_alloc ohci_hcd sr_mod parport_pc parport cdrom floppy Jan 8 14:23:52 twister kernel: Pid: 3228, comm: cat Not tainted 2.6.24-rc6debug-dirty #7 Jan 8 14:23:52 twister kernel: RIP: 0010:[80473875] [80473875] mutex_lock+0x10/0x1d Jan 8 14:23:52 twister kernel: RSP: 0018:8100b19a5d08 EFLAGS: 00010246 Jan 8 14:23:52 twister kernel: RAX: RBX: 00b8 RCX: 00010352 Jan 8 14:23:52 twister kernel: RDX: 8100b19a5fd8 RSI: 00010352 RDI: 00b8 Jan 8 14:23:52 twister kernel: RBP: 80593db0 R08: R09: 0097 Jan 8 14:23:52 twister kernel: R10: 000e R11: 803103d4 R12: 8100ba312b90 Jan 8 14:23:52 twister kernel: R13: 8100bfa78d00 R14: 8100b1980b00 R15: 8100b6c6b840 Jan 8 14:23:52 twister kernel: FS: 2b3bd213d6e0() GS:805ba000() knlGS: Jan 8 14:23:52 twister kernel: CS: 0010 DS: ES: CR0: 8005003b Jan 8 14:23:52 twister kernel: CR2: 00b8 CR3: b19ae000 CR4: 06e0 Jan 8 14:23:52 twister kernel: DR0: DR1: DR2: Jan 8 14:23:52 twister kernel: DR3: DR6: 0ff0 DR7: 0400 Jan 8 14:23:52 twister kernel: Process cat (pid: 3228, threadinfo 8100b19a4000, task 8100b18d9060) Jan 8 14:23:52 twister kernel: Stack: 8100bfa78d00 8028560d 8100b554ca28 802ae9d5 Jan 8 14:23:52 twister kernel: 8100bf9f4fa0 8100ba312b90 8100b5326c60 802af2de Jan 8 14:23:52 twister kernel: 8100b6ca4cd0 8100b6ca4cd0 8100bfa78d00 8100b5326ba0 Jan 8 14:23:52 twister kernel: Call Trace: Jan 8 14:23:52 twister kernel: [8028560d]
Re: [Bluez-devel] Oops involving RFCOMM and sysfs
On Tue, Jan 08, 2008 at 12:24:05AM +0900, Tejun Heo wrote: > Does the attached patch fix the problem? No, it still oopses. Gabor -- - MTA SZTAKI Computer and Automation Research Institute Hungarian Academy of Sciences - -- To unsubscribe from this list: send the line "unsubscribe linux-kernel" in the body of a message to [EMAIL PROTECTED] More majordomo info at http://vger.kernel.org/majordomo-info.html Please read the FAQ at http://www.tux.org/lkml/
Re: [Bluez-devel] Oops involving RFCOMM and sysfs
Gabor Gombas wrote: > Hi, > > On Sat, Jan 05, 2008 at 07:50:39AM +, Al Viro wrote: > >> Could you stick >> if (!parent->d_inode) >> printk(KERN_WARNING "sysfs locking blows: %s", >> parent->d_name.name); >> right before >> mutex_lock(>d_inode->i_mutex); >> dentry = lookup_one_noperm(cur->s_name, parent); >> mutex_unlock(>d_inode->i_mutex); >> in sysfs_get_dentry() (fs/sysfs/dir.c) and verify that it does, indeed, >> trigger? > > Here it is: > > Jan 7 14:35:43 twister kernel: sysfs locking blows: acl001BAFE1624D<1>Unable > to handle kernel NULL pointer dereference at 00b8 RIP: Does the attached patch fix the problem? -- tejun diff --git a/fs/sysfs/dir.c b/fs/sysfs/dir.c index 3371629..4e7f3bf 100644 --- a/fs/sysfs/dir.c +++ b/fs/sysfs/dir.c @@ -878,7 +878,6 @@ again: error = 0; d_add(new_dentry, NULL); d_move(old_dentry, new_dentry); - dput(new_dentry); /* Remove from old parent's list and insert into new parent's list. */ sysfs_unlink_sibling(sd);
Re: [Bluez-devel] Oops involving RFCOMM and sysfs
Hi, On Sat, Jan 05, 2008 at 07:50:39AM +, Al Viro wrote: > Could you stick > if (!parent->d_inode) > printk(KERN_WARNING "sysfs locking blows: %s", > parent->d_name.name); > right before > mutex_lock(>d_inode->i_mutex); > dentry = lookup_one_noperm(cur->s_name, parent); > mutex_unlock(>d_inode->i_mutex); > in sysfs_get_dentry() (fs/sysfs/dir.c) and verify that it does, indeed, > trigger? Here it is: Jan 7 14:35:43 twister kernel: sysfs locking blows: acl001BAFE1624D<1>Unable to handle kernel NULL pointer dereference at 00b8 RIP: Jan 7 14:35:43 twister kernel: [] mutex_lock+0x10/0x1d Jan 7 14:35:43 twister kernel: PGD b6031067 PUD b609c067 PMD 0 Jan 7 14:35:43 twister kernel: Oops: 0002 [1] Jan 7 14:35:43 twister kernel: CPU 0 Jan 7 14:35:43 twister kernel: Modules linked in: binfmt_misc rfcomm l2cap nfsd auth_rpcgss exportfs ipt_REJECT xt_tcpudp ipt_LOG xt_limit iptable_filter ip_tables x_tables nfs lockd nfs_acl sunrpc fuse dm_crypt saa7134_alsa radeon hwmon_vid eeprom hci_usb bluetooth usb_storage tuner tea5767 tda8290 tuner_simple mt20xx tea5761 parport_pc parport floppy snd_intel8x0 snd_ac97_codec saa7134 ac97_bus firewire_ohci firewire_core videobuf_dma_sg videobuf_core ir_kbd_i2c sky2 ir_common crc_itu_t snd_pcm forcedeth snd_timer snd_page_alloc ehci_hcd ohci_hcd sg sr_mod cdrom Jan 7 14:35:43 twister kernel: Pid: 3218, comm: cat Not tainted 2.6.24-rc6debug-dirty #5 Jan 7 14:35:43 twister kernel: RIP: 0010:[] [] mutex_lock+0x10/0x1d Jan 7 14:35:43 twister kernel: RSP: 0018:8100b5733d08 EFLAGS: 00010246 Jan 7 14:35:43 twister kernel: RAX: RBX: 00b8 RCX: 0001 Jan 7 14:35:43 twister kernel: RDX: 8100b5733fd8 RSI: 8100bfab6000 RDI: 00b8 Jan 7 14:35:43 twister kernel: RBP: 80593db0 R08: 8057b118 R09: 806093d0 Jan 7 14:35:43 twister kernel: R10: 8100b6074aa0 R11: 81000101b100 R12: 8100b6074aa0 Jan 7 14:35:43 twister kernel: R13: fff4 R14: 8100b571fb80 R15: 8100b573e000 Jan 7 14:35:43 twister kernel: FS: 2aead9fc56e0() GS:805ba000() knlGS: Jan 7 14:35:43 twister kernel: CS: 0010 DS: ES: CR0: 8005003b Jan 7 14:35:43 twister kernel: CR2: 00b8 CR3: b60b2000 CR4: 06e0 Jan 7 14:35:43 twister kernel: DR0: DR1: DR2: Jan 7 14:35:43 twister kernel: DR3: DR6: 0ff0 DR7: 0400 Jan 7 14:35:43 twister kernel: Process cat (pid: 3218, threadinfo 8100b5732000, task 8100bfa70830) Jan 7 14:35:43 twister kernel: Stack: e207e310 8028560d 8100b202a7d0 802af076 Jan 7 14:35:43 twister kernel: 8100bfa52f00 8100b6074aa0 8100b618c720 802af28b Jan 7 14:35:43 twister kernel: 8100b61308d0 8100b61308d0 8100bfa1a800 8100b618c720 Jan 7 14:35:43 twister kernel: Call Trace: Jan 7 14:35:43 twister kernel: [] dput+0x26/0x103 Jan 7 14:35:43 twister kernel: [] sysfs_get_dentry+0x5e/0xa8 Jan 7 14:35:43 twister kernel: [] sysfs_move_dir+0x63/0x204 Jan 7 14:35:43 twister kernel: [] kobject_move+0xba/0x110 Jan 7 14:35:43 twister kernel: [] device_move+0x59/0x111 Jan 7 14:35:43 twister kernel: [] :rfcomm:rfcomm_tty_close+0x2f/0x74 Jan 7 14:35:43 twister kernel: [] release_dev+0x212/0x5e2 Jan 7 14:35:43 twister kernel: [] do_page_fault+0x2ff/0x65a Jan 7 14:35:43 twister kernel: [] tty_release+0xc/0x10 Jan 7 14:35:43 twister kernel: [] __fput+0xb1/0x16f Jan 7 14:35:43 twister kernel: [] filp_close+0x5d/0x65 Jan 7 14:35:43 twister kernel: [] sys_close+0x73/0xa6 Jan 7 14:35:43 twister kernel: [] tracesys+0xdc/0xe1 Jan 7 14:35:43 twister kernel: Jan 7 14:35:43 twister kernel: Jan 7 14:35:43 twister kernel: Code: ff 0f 79 05 e8 c9 00 00 00 58 5a 5b c3 41 54 48 8d 47 08 48 Jan 7 14:35:43 twister kernel: RIP [] mutex_lock+0x10/0x1d Jan 7 14:35:43 twister kernel: RSP Jan 7 14:35:43 twister kernel: CR2: 00b8 Jan 7 14:35:43 twister kernel: ---[ end trace 0e3a1176e9294792 ]--- Gabor -- - MTA SZTAKI Computer and Automation Research Institute Hungarian Academy of Sciences - -- To unsubscribe from this list: send the line "unsubscribe linux-kernel" in the body of a message to [EMAIL PROTECTED] More majordomo info at http://vger.kernel.org/majordomo-info.html Please read the FAQ at http://www.tux.org/lkml/
Re: [Bluez-devel] Oops involving RFCOMM and sysfs
Hi, On Fri, Jan 04, 2008 at 09:05:42AM +0800, Dave Young wrote: > Could you try the patch (on 2.6.24-rc6) following and check the debug > messages? > > diff -upr linux/net/bluetooth/rfcomm/tty.c > linux.new/net/bluetooth/rfcomm/tty.c > --- linux/net/bluetooth/rfcomm/tty.c 2008-01-04 08:58:48.0 +0800 > +++ linux.new/net/bluetooth/rfcomm/tty.c 2008-01-04 09:01:01.0 > +0800 > @@ -313,6 +313,7 @@ static void rfcomm_dev_del(struct rfcomm > { > BT_DBG("dev %p", dev); > > + dump_stack(); > set_bit(RFCOMM_TTY_RELEASED, >flags); > rfcomm_dev_put(dev); > } This did not trigger. Gabor -- - MTA SZTAKI Computer and Automation Research Institute Hungarian Academy of Sciences - -- To unsubscribe from this list: send the line "unsubscribe linux-kernel" in the body of a message to [EMAIL PROTECTED] More majordomo info at http://vger.kernel.org/majordomo-info.html Please read the FAQ at http://www.tux.org/lkml/
Re: [Bluez-devel] Oops involving RFCOMM and sysfs
Al Viro <[EMAIL PROTECTED]> writes: > On Mon, Jan 07, 2008 at 06:18:21PM +0900, Tejun Heo wrote: >> Tejun Heo wrote: >> > Eric W. Biederman wrote: >> >>> That said, the mechanism is a bit too fragile. sysfs currently ensures >> >>> that dentry/inode point to the associated sysfs_dirent. This is mainly >> >>> remanent of conversion from previous VFS based implementation. I think >> >>> the right thing to do here is to make sysfs behave like other proper >> >>> distributed filesystems using d_revalidate. >> >> Huh? We still need something like sysfs_get_dentry to find the dentries >> >> for the rename or move operation. So we can call d_move. >> > >> > Ah... right. Thanks. :-) >> >> On the second thought, can't those too be dealt with d_revalidate? > > FVO "dealt with" as pleasant and efficient as using coarse whetstone > to deal with caries. Or to say it another way. The linux VFS requires dentries to be preserved and used as long as we can. dropping them early causes some weird nasties to show up, in particular it totally messes up mounting other filesystems on top. Eric -- To unsubscribe from this list: send the line "unsubscribe linux-kernel" in the body of a message to [EMAIL PROTECTED] More majordomo info at http://vger.kernel.org/majordomo-info.html Please read the FAQ at http://www.tux.org/lkml/
Re: [Bluez-devel] Oops involving RFCOMM and sysfs
On Mon, Jan 07, 2008 at 06:18:21PM +0900, Tejun Heo wrote: > Tejun Heo wrote: > > Eric W. Biederman wrote: > >>> That said, the mechanism is a bit too fragile. sysfs currently ensures > >>> that dentry/inode point to the associated sysfs_dirent. This is mainly > >>> remanent of conversion from previous VFS based implementation. I think > >>> the right thing to do here is to make sysfs behave like other proper > >>> distributed filesystems using d_revalidate. > >> Huh? We still need something like sysfs_get_dentry to find the dentries > >> for the rename or move operation. So we can call d_move. > > > > Ah... right. Thanks. :-) > > On the second thought, can't those too be dealt with d_revalidate? FVO "dealt with" as pleasant and efficient as using coarse whetstone to deal with caries. -- To unsubscribe from this list: send the line "unsubscribe linux-kernel" in the body of a message to [EMAIL PROTECTED] More majordomo info at http://vger.kernel.org/majordomo-info.html Please read the FAQ at http://www.tux.org/lkml/
Re: [Bluez-devel] Oops involving RFCOMM and sysfs
Tejun Heo wrote: > Eric W. Biederman wrote: >>> That said, the mechanism is a bit too fragile. sysfs currently ensures >>> that dentry/inode point to the associated sysfs_dirent. This is mainly >>> remanent of conversion from previous VFS based implementation. I think >>> the right thing to do here is to make sysfs behave like other proper >>> distributed filesystems using d_revalidate. >> Huh? We still need something like sysfs_get_dentry to find the dentries >> for the rename or move operation. So we can call d_move. > > Ah... right. Thanks. :-) On the second thought, can't those too be dealt with d_revalidate? -- tejun -- To unsubscribe from this list: send the line "unsubscribe linux-kernel" in the body of a message to [EMAIL PROTECTED] More majordomo info at http://vger.kernel.org/majordomo-info.html Please read the FAQ at http://www.tux.org/lkml/
Re: [Bluez-devel] Oops involving RFCOMM and sysfs
Eric W. Biederman wrote: >> That said, the mechanism is a bit too fragile. sysfs currently ensures >> that dentry/inode point to the associated sysfs_dirent. This is mainly >> remanent of conversion from previous VFS based implementation. I think >> the right thing to do here is to make sysfs behave like other proper >> distributed filesystems using d_revalidate. > > Huh? We still need something like sysfs_get_dentry to find the dentries > for the rename or move operation. So we can call d_move. Ah... right. Thanks. :-) -- tejun -- To unsubscribe from this list: send the line "unsubscribe linux-kernel" in the body of a message to [EMAIL PROTECTED] More majordomo info at http://vger.kernel.org/majordomo-info.html Please read the FAQ at http://www.tux.org/lkml/
Re: [Bluez-devel] Oops involving RFCOMM and sysfs
Tejun Heo <[EMAIL PROTECTED]> writes: > Hello, > > Tejun Heo wrote: >> Al Viro wrote: >>> On Sat, Jan 05, 2008 at 11:30:25PM +0900, Tejun Heo wrote: > Assuming that this is what we get, everything looks explainable - we > have sysfs_rename_dir() calling sysfs_get_dentry() while the parent > gets evicted. We don't have any exclusion, so while we are playing > silly buggers with lookups in sysfs_get_dentry() we have parent become > negative; the rest is obvious... That part of code is walking down the sysfs tree from the s_root of sysfs hierarchy and on each step parent is held using dget() while being referenced, so I don't think they can turn negative there. >>> Turn? Just what stops you from getting a negative (and unhashed) from >>> lookup_one_noperm() and on the next iteration being buggered on >>> mutex_lock()? >> >> Right, I haven't thought about that. When sysfs_get_dentry() is called, >> @sd is always valid so unless there was existing negative dentry, lookup >> is guaranteed to return positive dentry, but by populating dcache with >> negative dentry before a node is created, things can go wrong. I don't >> think that's what's going on here tho. If that was the case, the >> while() loop looking up the next sd to lookup (@cur) should have blown >> up as negative dentry will have NULL d_fsdata which doesn't match any sd. >> >> I guess what's needed here is d_revalidate() as other distributed >> filesystems do. I'll test whether this can be actually triggered and >> prepare a fix. Thanks a lot for pointing out the problem. > > This can't happen because lookup of non-existent entry doesn't create a > negative dentry. The new dentry is never hashed and killed after lookup > failure, the above scenario can't happen. > > That said, the mechanism is a bit too fragile. sysfs currently ensures > that dentry/inode point to the associated sysfs_dirent. This is mainly > remanent of conversion from previous VFS based implementation. I think > the right thing to do here is to make sysfs behave like other proper > distributed filesystems using d_revalidate. Huh? We still need something like sysfs_get_dentry to find the dentries for the rename or move operation. So we can call d_move. Further we should be talking about sysfs_move_dir not sysfs_rename_dir as only the networking code calls sysfs_rename_dir. Not that it is significantly different. I think I saw a change in lock ordering recently to help distributed filesystems, and maybe that will simplify some of this. Anyway I figure if I am to understand this I better go look and see if I can find the start of this thread. I don't have a clue where we are blowing up. Eric -- To unsubscribe from this list: send the line "unsubscribe linux-kernel" in the body of a message to [EMAIL PROTECTED] More majordomo info at http://vger.kernel.org/majordomo-info.html Please read the FAQ at http://www.tux.org/lkml/
Re: [Bluez-devel] Oops involving RFCOMM and sysfs
Dave Young wrote: > On Thu, Jan 03, 2008 at 02:16:21PM +0100, Gabor Gombas wrote: >> On Wed, Jan 02, 2008 at 04:16:42PM +0100, Gabor Gombas wrote: >> >>> So the patch referenced above does not help. But I've found a very easy >>> way to trigger the bug: >>> >>> - do a "cat /dev/zero > /dev/rfcomm0" >>> - switch the phone off >>> - switch the phone on, and the kernel oopses >> FYI I also remember having oopses with 2.6.22. but I do not >> have those logs anymore. Also I now booted 2.6.20.7 and it did not oops. > > Could you try the patch (on 2.6.24-rc6) following and check the debug > messages? > > diff -upr linux/net/bluetooth/rfcomm/tty.c > linux.new/net/bluetooth/rfcomm/tty.c > --- linux/net/bluetooth/rfcomm/tty.c 2008-01-04 08:58:48.0 +0800 > +++ linux.new/net/bluetooth/rfcomm/tty.c 2008-01-04 09:01:01.0 > +0800 > @@ -313,6 +313,7 @@ static void rfcomm_dev_del(struct rfcomm > { > BT_DBG("dev %p", dev); > > + dump_stack(); > set_bit(RFCOMM_TTY_RELEASED, >flags); > rfcomm_dev_put(dev); > } Tried to reproduce the problem here but I can't persuade my laptop to talk to my cell. :-( Gabor, can you please report what Dave Young asked? Thanks. -- tejun -- To unsubscribe from this list: send the line "unsubscribe linux-kernel" in the body of a message to [EMAIL PROTECTED] More majordomo info at http://vger.kernel.org/majordomo-info.html Please read the FAQ at http://www.tux.org/lkml/
Re: [Bluez-devel] Oops involving RFCOMM and sysfs
Dave Young wrote: On Thu, Jan 03, 2008 at 02:16:21PM +0100, Gabor Gombas wrote: On Wed, Jan 02, 2008 at 04:16:42PM +0100, Gabor Gombas wrote: So the patch referenced above does not help. But I've found a very easy way to trigger the bug: - do a cat /dev/zero /dev/rfcomm0 - switch the phone off - switch the phone on, and the kernel oopses FYI I also remember having oopses with 2.6.22.something but I do not have those logs anymore. Also I now booted 2.6.20.7 and it did not oops. Could you try the patch (on 2.6.24-rc6) following and check the debug messages? diff -upr linux/net/bluetooth/rfcomm/tty.c linux.new/net/bluetooth/rfcomm/tty.c --- linux/net/bluetooth/rfcomm/tty.c 2008-01-04 08:58:48.0 +0800 +++ linux.new/net/bluetooth/rfcomm/tty.c 2008-01-04 09:01:01.0 +0800 @@ -313,6 +313,7 @@ static void rfcomm_dev_del(struct rfcomm { BT_DBG(dev %p, dev); + dump_stack(); set_bit(RFCOMM_TTY_RELEASED, dev-flags); rfcomm_dev_put(dev); } Tried to reproduce the problem here but I can't persuade my laptop to talk to my cell. :-( Gabor, can you please report what Dave Young asked? Thanks. -- tejun -- To unsubscribe from this list: send the line unsubscribe linux-kernel in the body of a message to [EMAIL PROTECTED] More majordomo info at http://vger.kernel.org/majordomo-info.html Please read the FAQ at http://www.tux.org/lkml/
Re: [Bluez-devel] Oops involving RFCOMM and sysfs
Tejun Heo [EMAIL PROTECTED] writes: Hello, Tejun Heo wrote: Al Viro wrote: On Sat, Jan 05, 2008 at 11:30:25PM +0900, Tejun Heo wrote: Assuming that this is what we get, everything looks explainable - we have sysfs_rename_dir() calling sysfs_get_dentry() while the parent gets evicted. We don't have any exclusion, so while we are playing silly buggers with lookups in sysfs_get_dentry() we have parent become negative; the rest is obvious... That part of code is walking down the sysfs tree from the s_root of sysfs hierarchy and on each step parent is held using dget() while being referenced, so I don't think they can turn negative there. Turn? Just what stops you from getting a negative (and unhashed) from lookup_one_noperm() and on the next iteration being buggered on mutex_lock()? Right, I haven't thought about that. When sysfs_get_dentry() is called, @sd is always valid so unless there was existing negative dentry, lookup is guaranteed to return positive dentry, but by populating dcache with negative dentry before a node is created, things can go wrong. I don't think that's what's going on here tho. If that was the case, the while() loop looking up the next sd to lookup (@cur) should have blown up as negative dentry will have NULL d_fsdata which doesn't match any sd. I guess what's needed here is d_revalidate() as other distributed filesystems do. I'll test whether this can be actually triggered and prepare a fix. Thanks a lot for pointing out the problem. This can't happen because lookup of non-existent entry doesn't create a negative dentry. The new dentry is never hashed and killed after lookup failure, the above scenario can't happen. That said, the mechanism is a bit too fragile. sysfs currently ensures that dentry/inode point to the associated sysfs_dirent. This is mainly remanent of conversion from previous VFS based implementation. I think the right thing to do here is to make sysfs behave like other proper distributed filesystems using d_revalidate. Huh? We still need something like sysfs_get_dentry to find the dentries for the rename or move operation. So we can call d_move. Further we should be talking about sysfs_move_dir not sysfs_rename_dir as only the networking code calls sysfs_rename_dir. Not that it is significantly different. I think I saw a change in lock ordering recently to help distributed filesystems, and maybe that will simplify some of this. Anyway I figure if I am to understand this I better go look and see if I can find the start of this thread. I don't have a clue where we are blowing up. Eric -- To unsubscribe from this list: send the line unsubscribe linux-kernel in the body of a message to [EMAIL PROTECTED] More majordomo info at http://vger.kernel.org/majordomo-info.html Please read the FAQ at http://www.tux.org/lkml/
Re: [Bluez-devel] Oops involving RFCOMM and sysfs
Eric W. Biederman wrote: That said, the mechanism is a bit too fragile. sysfs currently ensures that dentry/inode point to the associated sysfs_dirent. This is mainly remanent of conversion from previous VFS based implementation. I think the right thing to do here is to make sysfs behave like other proper distributed filesystems using d_revalidate. Huh? We still need something like sysfs_get_dentry to find the dentries for the rename or move operation. So we can call d_move. Ah... right. Thanks. :-) -- tejun -- To unsubscribe from this list: send the line unsubscribe linux-kernel in the body of a message to [EMAIL PROTECTED] More majordomo info at http://vger.kernel.org/majordomo-info.html Please read the FAQ at http://www.tux.org/lkml/
Re: [Bluez-devel] Oops involving RFCOMM and sysfs
Tejun Heo wrote: Eric W. Biederman wrote: That said, the mechanism is a bit too fragile. sysfs currently ensures that dentry/inode point to the associated sysfs_dirent. This is mainly remanent of conversion from previous VFS based implementation. I think the right thing to do here is to make sysfs behave like other proper distributed filesystems using d_revalidate. Huh? We still need something like sysfs_get_dentry to find the dentries for the rename or move operation. So we can call d_move. Ah... right. Thanks. :-) On the second thought, can't those too be dealt with d_revalidate? -- tejun -- To unsubscribe from this list: send the line unsubscribe linux-kernel in the body of a message to [EMAIL PROTECTED] More majordomo info at http://vger.kernel.org/majordomo-info.html Please read the FAQ at http://www.tux.org/lkml/
Re: [Bluez-devel] Oops involving RFCOMM and sysfs
On Mon, Jan 07, 2008 at 06:18:21PM +0900, Tejun Heo wrote: Tejun Heo wrote: Eric W. Biederman wrote: That said, the mechanism is a bit too fragile. sysfs currently ensures that dentry/inode point to the associated sysfs_dirent. This is mainly remanent of conversion from previous VFS based implementation. I think the right thing to do here is to make sysfs behave like other proper distributed filesystems using d_revalidate. Huh? We still need something like sysfs_get_dentry to find the dentries for the rename or move operation. So we can call d_move. Ah... right. Thanks. :-) On the second thought, can't those too be dealt with d_revalidate? FVO dealt with as pleasant and efficient as using coarse whetstone to deal with caries. -- To unsubscribe from this list: send the line unsubscribe linux-kernel in the body of a message to [EMAIL PROTECTED] More majordomo info at http://vger.kernel.org/majordomo-info.html Please read the FAQ at http://www.tux.org/lkml/
Re: [Bluez-devel] Oops involving RFCOMM and sysfs
Al Viro [EMAIL PROTECTED] writes: On Mon, Jan 07, 2008 at 06:18:21PM +0900, Tejun Heo wrote: Tejun Heo wrote: Eric W. Biederman wrote: That said, the mechanism is a bit too fragile. sysfs currently ensures that dentry/inode point to the associated sysfs_dirent. This is mainly remanent of conversion from previous VFS based implementation. I think the right thing to do here is to make sysfs behave like other proper distributed filesystems using d_revalidate. Huh? We still need something like sysfs_get_dentry to find the dentries for the rename or move operation. So we can call d_move. Ah... right. Thanks. :-) On the second thought, can't those too be dealt with d_revalidate? FVO dealt with as pleasant and efficient as using coarse whetstone to deal with caries. Or to say it another way. The linux VFS requires dentries to be preserved and used as long as we can. dropping them early causes some weird nasties to show up, in particular it totally messes up mounting other filesystems on top. Eric -- To unsubscribe from this list: send the line unsubscribe linux-kernel in the body of a message to [EMAIL PROTECTED] More majordomo info at http://vger.kernel.org/majordomo-info.html Please read the FAQ at http://www.tux.org/lkml/
Re: [Bluez-devel] Oops involving RFCOMM and sysfs
Gabor Gombas wrote: Hi, On Sat, Jan 05, 2008 at 07:50:39AM +, Al Viro wrote: Could you stick if (!parent-d_inode) printk(KERN_WARNING sysfs locking blows: %s, parent-d_name.name); right before mutex_lock(parent-d_inode-i_mutex); dentry = lookup_one_noperm(cur-s_name, parent); mutex_unlock(parent-d_inode-i_mutex); in sysfs_get_dentry() (fs/sysfs/dir.c) and verify that it does, indeed, trigger? Here it is: Jan 7 14:35:43 twister kernel: sysfs locking blows: acl001BAFE1624D1Unable to handle kernel NULL pointer dereference at 00b8 RIP: Does the attached patch fix the problem? -- tejun diff --git a/fs/sysfs/dir.c b/fs/sysfs/dir.c index 3371629..4e7f3bf 100644 --- a/fs/sysfs/dir.c +++ b/fs/sysfs/dir.c @@ -878,7 +878,6 @@ again: error = 0; d_add(new_dentry, NULL); d_move(old_dentry, new_dentry); - dput(new_dentry); /* Remove from old parent's list and insert into new parent's list. */ sysfs_unlink_sibling(sd);
Re: [Bluez-devel] Oops involving RFCOMM and sysfs
On Tue, Jan 08, 2008 at 12:24:05AM +0900, Tejun Heo wrote: Does the attached patch fix the problem? No, it still oopses. Gabor -- - MTA SZTAKI Computer and Automation Research Institute Hungarian Academy of Sciences - -- To unsubscribe from this list: send the line unsubscribe linux-kernel in the body of a message to [EMAIL PROTECTED] More majordomo info at http://vger.kernel.org/majordomo-info.html Please read the FAQ at http://www.tux.org/lkml/
Re: [Bluez-devel] Oops involving RFCOMM and sysfs
Hello, Tejun Heo wrote: > Al Viro wrote: >> On Sat, Jan 05, 2008 at 11:30:25PM +0900, Tejun Heo wrote: Assuming that this is what we get, everything looks explainable - we have sysfs_rename_dir() calling sysfs_get_dentry() while the parent gets evicted. We don't have any exclusion, so while we are playing silly buggers with lookups in sysfs_get_dentry() we have parent become negative; the rest is obvious... >>> That part of code is walking down the sysfs tree from the s_root of >>> sysfs hierarchy and on each step parent is held using dget() while being >>> referenced, so I don't think they can turn negative there. >> Turn? Just what stops you from getting a negative (and unhashed) from >> lookup_one_noperm() and on the next iteration being buggered on mutex_lock()? > > Right, I haven't thought about that. When sysfs_get_dentry() is called, > @sd is always valid so unless there was existing negative dentry, lookup > is guaranteed to return positive dentry, but by populating dcache with > negative dentry before a node is created, things can go wrong. I don't > think that's what's going on here tho. If that was the case, the > while() loop looking up the next sd to lookup (@cur) should have blown > up as negative dentry will have NULL d_fsdata which doesn't match any sd. > > I guess what's needed here is d_revalidate() as other distributed > filesystems do. I'll test whether this can be actually triggered and > prepare a fix. Thanks a lot for pointing out the problem. This can't happen because lookup of non-existent entry doesn't create a negative dentry. The new dentry is never hashed and killed after lookup failure, the above scenario can't happen. That said, the mechanism is a bit too fragile. sysfs currently ensures that dentry/inode point to the associated sysfs_dirent. This is mainly remanent of conversion from previous VFS based implementation. I think the right thing to do here is to make sysfs behave like other proper distributed filesystems using d_revalidate. Thanks. -- tejun -- To unsubscribe from this list: send the line "unsubscribe linux-kernel" in the body of a message to [EMAIL PROTECTED] More majordomo info at http://vger.kernel.org/majordomo-info.html Please read the FAQ at http://www.tux.org/lkml/
Re: [Bluez-devel] Oops involving RFCOMM and sysfs
Hello, Tejun Heo wrote: Al Viro wrote: On Sat, Jan 05, 2008 at 11:30:25PM +0900, Tejun Heo wrote: Assuming that this is what we get, everything looks explainable - we have sysfs_rename_dir() calling sysfs_get_dentry() while the parent gets evicted. We don't have any exclusion, so while we are playing silly buggers with lookups in sysfs_get_dentry() we have parent become negative; the rest is obvious... That part of code is walking down the sysfs tree from the s_root of sysfs hierarchy and on each step parent is held using dget() while being referenced, so I don't think they can turn negative there. Turn? Just what stops you from getting a negative (and unhashed) from lookup_one_noperm() and on the next iteration being buggered on mutex_lock()? Right, I haven't thought about that. When sysfs_get_dentry() is called, @sd is always valid so unless there was existing negative dentry, lookup is guaranteed to return positive dentry, but by populating dcache with negative dentry before a node is created, things can go wrong. I don't think that's what's going on here tho. If that was the case, the while() loop looking up the next sd to lookup (@cur) should have blown up as negative dentry will have NULL d_fsdata which doesn't match any sd. I guess what's needed here is d_revalidate() as other distributed filesystems do. I'll test whether this can be actually triggered and prepare a fix. Thanks a lot for pointing out the problem. This can't happen because lookup of non-existent entry doesn't create a negative dentry. The new dentry is never hashed and killed after lookup failure, the above scenario can't happen. That said, the mechanism is a bit too fragile. sysfs currently ensures that dentry/inode point to the associated sysfs_dirent. This is mainly remanent of conversion from previous VFS based implementation. I think the right thing to do here is to make sysfs behave like other proper distributed filesystems using d_revalidate. Thanks. -- tejun -- To unsubscribe from this list: send the line unsubscribe linux-kernel in the body of a message to [EMAIL PROTECTED] More majordomo info at http://vger.kernel.org/majordomo-info.html Please read the FAQ at http://www.tux.org/lkml/
Re: [Bluez-devel] Oops involving RFCOMM and sysfs
Hello, Al Viro wrote: > On Sun, Jan 06, 2008 at 11:54:43AM +0900, Tejun Heo wrote: >> That means sysfs_remove_dir() is called on parent while other operations >> are in progress on children, right? sysfs has never allowed such things >> && AFAIK no one does that. It's somewhat implied in the interface (such >> as recursive removing) but I fully agree it's problematic. Things like >> these are why I think we need to unify/simplify locking as I wrote >> previously. > > All it takes is kobject_rename() or kobject_move() called asynchronously > wrt removal... I don't see an explicit ban for that. There really hasn't been any stone-set rules for how sysfs can be used and what operations are allowed simultaneously although sysfs implementation always had a lot of assumptions about which ops can and can't be done simultaneously. The general assumption is that the caller is responsible for its and its children's lifetime management which is usually followed as sysfs nodes are removed when a device goes away or driver detaches at which point no other ops should be in progress anyway. I think this stems partially from tight coupling with kobject / driver model. kobject and driver model have certain rules about how they can be used (again not explicit enough) and sysfs implementation kind of piggy backed on those rules and added its own assumptions on top of it. After a while, it's hard to track what's assumed where and implementing or changing one feature somewhere breaks some assumption elsewhere. This is the primary reason why I think sysfs should be an independent component which the driver model uses not something intertwined into driver model while having mostly separate implementation. > FWIW, what happens here *is* fishy, but I don't see an outright ban on > that in documentation - rfcomm_tty_open() does > device_move(dev->tty_dev, rfcomm_get_device(dev)); > when we get openers, rfcomm_tty_close() does > device_move(dev->tty_dev, NULL); > when the number of openers hits zero. Can happen repeatedly. > > Note that device_move() with new parent being NULL is explicitly allowed > and handled, so... (cc'ing Kay Sievers) IIRC, that's device class compatibility thing. Overlapping move's are okay tho as they're protected from each other by sysfs_rename_mutex. I'll take a look at the rfcomm code and see what's going on tomorrow. Gotta hit the road now. Thanks. -- tejun -- To unsubscribe from this list: send the line "unsubscribe linux-kernel" in the body of a message to [EMAIL PROTECTED] More majordomo info at http://vger.kernel.org/majordomo-info.html Please read the FAQ at http://www.tux.org/lkml/
Re: [Bluez-devel] Oops involving RFCOMM and sysfs
On Sun, Jan 06, 2008 at 11:54:43AM +0900, Tejun Heo wrote: > That means sysfs_remove_dir() is called on parent while other operations > are in progress on children, right? sysfs has never allowed such things > && AFAIK no one does that. It's somewhat implied in the interface (such > as recursive removing) but I fully agree it's problematic. Things like > these are why I think we need to unify/simplify locking as I wrote > previously. All it takes is kobject_rename() or kobject_move() called asynchronously wrt removal... I don't see an explicit ban for that. FWIW, what happens here *is* fishy, but I don't see an outright ban on that in documentation - rfcomm_tty_open() does device_move(dev->tty_dev, rfcomm_get_device(dev)); when we get openers, rfcomm_tty_close() does device_move(dev->tty_dev, NULL); when the number of openers hits zero. Can happen repeatedly. Note that device_move() with new parent being NULL is explicitly allowed and handled, so... -- To unsubscribe from this list: send the line "unsubscribe linux-kernel" in the body of a message to [EMAIL PROTECTED] More majordomo info at http://vger.kernel.org/majordomo-info.html Please read the FAQ at http://www.tux.org/lkml/
Re: [Bluez-devel] Oops involving RFCOMM and sysfs
Hello, Al Viro. Al Viro wrote: > On Sun, Jan 06, 2008 at 11:07:52AM +0900, Tejun Heo wrote: > >> Right, I haven't thought about that. When sysfs_get_dentry() is called, >> @sd is always valid so unless there was existing negative dentry, lookup >> is guaranteed to return positive dentry, but by populating dcache with >> negative dentry before a node is created, things can go wrong. I don't >> think that's what's going on here tho. If that was the case, the >> while() loop looking up the next sd to lookup (@cur) should have blown >> up as negative dentry will have NULL d_fsdata which doesn't match any sd. > > No. What happens if sd gets unlinked while we are on the way to > sysfs_get_dentry() and so does its parent? That means sysfs_remove_dir() is called on parent while other operations are in progress on children, right? sysfs has never allowed such things && AFAIK no one does that. It's somewhat implied in the interface (such as recursive removing) but I fully agree it's problematic. Things like these are why I think we need to unify/simplify locking as I wrote previously. > The parent is off the > sibling list, we get negative dentry from lookup. It's not hashed, > so won't stick around in dcache (which is apparently what you are > thinking about). However, _THIS_ lookup has returned you a dentry > with NULL ->d_inode and you are well and truly buggered. So, the assumption is that while @sd is held and being operated on its ancestry can never change except for rename and move and those are protected with sysfs_rename_mutex. I agree that the locking is awkward but sysfs's internal implementation has completely changed over last six or so months and the conversion still isn't complete. It isn't pretty at all but is at least understandable now, IMHO. :-) The last pending change is separating out kobject from sysfs and simplifying locking. I had some disagreement with Greg about the future of kobject and then there was tsunami of ATA bugs because most major distros converted to libata from IDE in the second half of the last year and I'm still buried under it. I'll get back to sysfs once these ATA bugs subside a bit. Thanks. -- tejun -- To unsubscribe from this list: send the line "unsubscribe linux-kernel" in the body of a message to [EMAIL PROTECTED] More majordomo info at http://vger.kernel.org/majordomo-info.html Please read the FAQ at http://www.tux.org/lkml/
Re: [Bluez-devel] Oops involving RFCOMM and sysfs
On Sun, Jan 06, 2008 at 11:07:52AM +0900, Tejun Heo wrote: > Right, I haven't thought about that. When sysfs_get_dentry() is called, > @sd is always valid so unless there was existing negative dentry, lookup > is guaranteed to return positive dentry, but by populating dcache with > negative dentry before a node is created, things can go wrong. I don't > think that's what's going on here tho. If that was the case, the > while() loop looking up the next sd to lookup (@cur) should have blown > up as negative dentry will have NULL d_fsdata which doesn't match any sd. No. What happens if sd gets unlinked while we are on the way to sysfs_get_dentry() and so does its parent? The parent is off the sibling list, we get negative dentry from lookup. It's not hashed, so won't stick around in dcache (which is apparently what you are thinking about). However, _THIS_ lookup has returned you a dentry with NULL ->d_inode and you are well and truly buggered. -- To unsubscribe from this list: send the line "unsubscribe linux-kernel" in the body of a message to [EMAIL PROTECTED] More majordomo info at http://vger.kernel.org/majordomo-info.html Please read the FAQ at http://www.tux.org/lkml/
Re: [Bluez-devel] Oops involving RFCOMM and sysfs
Hello, Al Viro wrote: > On Sat, Jan 05, 2008 at 11:30:25PM +0900, Tejun Heo wrote: >>> Assuming that this is what we get, everything looks explainable - we >>> have sysfs_rename_dir() calling sysfs_get_dentry() while the parent >>> gets evicted. We don't have any exclusion, so while we are playing >>> silly buggers with lookups in sysfs_get_dentry() we have parent become >>> negative; the rest is obvious... >> That part of code is walking down the sysfs tree from the s_root of >> sysfs hierarchy and on each step parent is held using dget() while being >> referenced, so I don't think they can turn negative there. > > Turn? Just what stops you from getting a negative (and unhashed) from > lookup_one_noperm() and on the next iteration being buggered on mutex_lock()? Right, I haven't thought about that. When sysfs_get_dentry() is called, @sd is always valid so unless there was existing negative dentry, lookup is guaranteed to return positive dentry, but by populating dcache with negative dentry before a node is created, things can go wrong. I don't think that's what's going on here tho. If that was the case, the while() loop looking up the next sd to lookup (@cur) should have blown up as negative dentry will have NULL d_fsdata which doesn't match any sd. I guess what's needed here is d_revalidate() as other distributed filesystems do. I'll test whether this can be actually triggered and prepare a fix. Thanks a lot for pointing out the problem. >>> AFAICS, the locking here is quite broken and frankly, sysfs_get_dentry() >>> and the way it plays with fs/namei.c are ucking fugly. >> Can you elaborate a bit? The locking in sysfs is unconventional but >> that's mostly from necessity. It has dual interface - vfs and driver >> model && vfs data structures (dentry and inode) are too big to always >> keep around, so it basically becomes a small distributed file system >> where the backing data can change asynchronously. > > ... with all fun that creates. As it is, you have those async changers > of backing data using VFS locking _under_ sysfs locks via lookup_one_noperm() > and yet it needs sysfs_mutex inside sysfs_lookup(). So you can't have > sysfs_get_dentry() under it. So you don't have exclusion with arseloads > of sysfs tree changes in there. Joy... There are two locks. sysfs_rename_mutex and sysfs_mutex. sysfs_rename_mutex is above VFS locks while sysfs_mutex is below VFS locks. sysfs_rename_mutex() protects against move/rename which can change the ancestry of a held sysfs_dirent while sysfs_mutex protects the sd hierarchy itself. Locking can be wrong if sysfs_rename_mutex locking is missing from the places where ancestry of a held sd can change but I can't find one ATM. If I'm missing your point again, feel free to scream at me. :-) As it's unnecessarily unintuitive, there's a pending change to rename sysfs_rename_mutex and use it to protect the whole tree structure to make locking simpler while using sysfs_mutex to guard VFS access such that the locking hierarchy plainly becomes sysfs_rename_mutex - VFS locks - sysfs_mutex where all internal sysfs structure is protected by the outer mutex and the inner one just protects VFS accesses. > Frankly, with the current state of sysfs the last vestiges of arguments > used to push it into the tree back then are dead and buried. I'm not > blaming you, BTW - the shitpile *did* grow past the point where its > memory footprint became far too large and something needed to be done. > Unfortunately, it happened too late for that something being "get rid > of the entire mess" and now we are saddled with it for good. Yeah, it's too late to get rid of sysfs and regardless implementation ugliness, which BTW I think has improved a lot during last six or so months, it's now pretty useful and important to drivers, so I guess the only option is trying hard to make it better. Oh, BTW, the ugly lookup_one_noperm() can be removed if LOOKUP_NOPERM flag is added. The only reason sysfs_lookup() uses the specialized lookup is to avoid permission check. Thanks. -- tejun -- To unsubscribe from this list: send the line "unsubscribe linux-kernel" in the body of a message to [EMAIL PROTECTED] More majordomo info at http://vger.kernel.org/majordomo-info.html Please read the FAQ at http://www.tux.org/lkml/
Re: [Bluez-devel] Oops involving RFCOMM and sysfs
On Sat, Jan 05, 2008 at 11:30:25PM +0900, Tejun Heo wrote: > > Assuming that this is what we get, everything looks explainable - we > > have sysfs_rename_dir() calling sysfs_get_dentry() while the parent > > gets evicted. We don't have any exclusion, so while we are playing > > silly buggers with lookups in sysfs_get_dentry() we have parent become > > negative; the rest is obvious... > > That part of code is walking down the sysfs tree from the s_root of > sysfs hierarchy and on each step parent is held using dget() while being > referenced, so I don't think they can turn negative there. Turn? Just what stops you from getting a negative (and unhashed) from lookup_one_noperm() and on the next iteration being buggered on mutex_lock()? > > AFAICS, the locking here is quite broken and frankly, sysfs_get_dentry() > > and the way it plays with fs/namei.c are ucking fugly. > > Can you elaborate a bit? The locking in sysfs is unconventional but > that's mostly from necessity. It has dual interface - vfs and driver > model && vfs data structures (dentry and inode) are too big to always > keep around, so it basically becomes a small distributed file system > where the backing data can change asynchronously. ... with all fun that creates. As it is, you have those async changers of backing data using VFS locking _under_ sysfs locks via lookup_one_noperm() and yet it needs sysfs_mutex inside sysfs_lookup(). So you can't have sysfs_get_dentry() under it. So you don't have exclusion with arseloads of sysfs tree changes in there. Joy... Frankly, with the current state of sysfs the last vestiges of arguments used to push it into the tree back then are dead and buried. I'm not blaming you, BTW - the shitpile *did* grow past the point where its memory footprint became far too large and something needed to be done. Unfortunately, it happened too late for that something being "get rid of the entire mess" and now we are saddled with it for good. -- To unsubscribe from this list: send the line "unsubscribe linux-kernel" in the body of a message to [EMAIL PROTECTED] More majordomo info at http://vger.kernel.org/majordomo-info.html Please read the FAQ at http://www.tux.org/lkml/
Re: [Bluez-devel] Oops involving RFCOMM and sysfs
Hello. Al Viro wrote: > sysfs_get_dentry(), > mutex_lock(>d_inode->i_mutex); > hitting parent->d_inode either NULL or very close to it, depending on your > .config; most likely NULL, if offset of i_mutex is 0xb8 in your build. > That's plausible - 0xb8 is what you'd get on UP build without spinlock > debugging, lockdep, etc. > > Assuming that this is what we get, everything looks explainable - we > have sysfs_rename_dir() calling sysfs_get_dentry() while the parent > gets evicted. We don't have any exclusion, so while we are playing > silly buggers with lookups in sysfs_get_dentry() we have parent become > negative; the rest is obvious... That part of code is walking down the sysfs tree from the s_root of sysfs hierarchy and on each step parent is held using dget() while being referenced, so I don't think they can turn negative there. > AFAICS, the locking here is quite broken and frankly, sysfs_get_dentry() > and the way it plays with fs/namei.c are ucking fugly. Can you elaborate a bit? The locking in sysfs is unconventional but that's mostly from necessity. It has dual interface - vfs and driver model && vfs data structures (dentry and inode) are too big to always keep around, so it basically becomes a small distributed file system where the backing data can change asynchronously. > Could you stick > if (!parent->d_inode) > printk(KERN_WARNING "sysfs locking blows: %s", > parent->d_name.name); > right before > mutex_lock(>d_inode->i_mutex); > dentry = lookup_one_noperm(cur->s_name, parent); > mutex_unlock(>d_inode->i_mutex); > in sysfs_get_dentry() (fs/sysfs/dir.c) and verify that it does, indeed, > trigger? Yes, please. Thanks. -- tejun -- To unsubscribe from this list: send the line "unsubscribe linux-kernel" in the body of a message to [EMAIL PROTECTED] More majordomo info at http://vger.kernel.org/majordomo-info.html Please read the FAQ at http://www.tux.org/lkml/
Re: [Bluez-devel] Oops involving RFCOMM and sysfs
Hello. Al Viro wrote: sysfs_get_dentry(), mutex_lock(parent-d_inode-i_mutex); hitting parent-d_inode either NULL or very close to it, depending on your .config; most likely NULL, if offset of i_mutex is 0xb8 in your build. That's plausible - 0xb8 is what you'd get on UP build without spinlock debugging, lockdep, etc. Assuming that this is what we get, everything looks explainable - we have sysfs_rename_dir() calling sysfs_get_dentry() while the parent gets evicted. We don't have any exclusion, so while we are playing silly buggers with lookups in sysfs_get_dentry() we have parent become negative; the rest is obvious... That part of code is walking down the sysfs tree from the s_root of sysfs hierarchy and on each step parent is held using dget() while being referenced, so I don't think they can turn negative there. AFAICS, the locking here is quite broken and frankly, sysfs_get_dentry() and the way it plays with fs/namei.c are ucking fugly. Can you elaborate a bit? The locking in sysfs is unconventional but that's mostly from necessity. It has dual interface - vfs and driver model vfs data structures (dentry and inode) are too big to always keep around, so it basically becomes a small distributed file system where the backing data can change asynchronously. Could you stick if (!parent-d_inode) printk(KERN_WARNING sysfs locking blows: %s, parent-d_name.name); right before mutex_lock(parent-d_inode-i_mutex); dentry = lookup_one_noperm(cur-s_name, parent); mutex_unlock(parent-d_inode-i_mutex); in sysfs_get_dentry() (fs/sysfs/dir.c) and verify that it does, indeed, trigger? Yes, please. Thanks. -- tejun -- To unsubscribe from this list: send the line unsubscribe linux-kernel in the body of a message to [EMAIL PROTECTED] More majordomo info at http://vger.kernel.org/majordomo-info.html Please read the FAQ at http://www.tux.org/lkml/
Re: [Bluez-devel] Oops involving RFCOMM and sysfs
On Sat, Jan 05, 2008 at 11:30:25PM +0900, Tejun Heo wrote: Assuming that this is what we get, everything looks explainable - we have sysfs_rename_dir() calling sysfs_get_dentry() while the parent gets evicted. We don't have any exclusion, so while we are playing silly buggers with lookups in sysfs_get_dentry() we have parent become negative; the rest is obvious... That part of code is walking down the sysfs tree from the s_root of sysfs hierarchy and on each step parent is held using dget() while being referenced, so I don't think they can turn negative there. Turn? Just what stops you from getting a negative (and unhashed) from lookup_one_noperm() and on the next iteration being buggered on mutex_lock()? AFAICS, the locking here is quite broken and frankly, sysfs_get_dentry() and the way it plays with fs/namei.c are ucking fugly. Can you elaborate a bit? The locking in sysfs is unconventional but that's mostly from necessity. It has dual interface - vfs and driver model vfs data structures (dentry and inode) are too big to always keep around, so it basically becomes a small distributed file system where the backing data can change asynchronously. ... with all fun that creates. As it is, you have those async changers of backing data using VFS locking _under_ sysfs locks via lookup_one_noperm() and yet it needs sysfs_mutex inside sysfs_lookup(). So you can't have sysfs_get_dentry() under it. So you don't have exclusion with arseloads of sysfs tree changes in there. Joy... Frankly, with the current state of sysfs the last vestiges of arguments used to push it into the tree back then are dead and buried. I'm not blaming you, BTW - the shitpile *did* grow past the point where its memory footprint became far too large and something needed to be done. Unfortunately, it happened too late for that something being get rid of the entire mess and now we are saddled with it for good. -- To unsubscribe from this list: send the line unsubscribe linux-kernel in the body of a message to [EMAIL PROTECTED] More majordomo info at http://vger.kernel.org/majordomo-info.html Please read the FAQ at http://www.tux.org/lkml/
Re: [Bluez-devel] Oops involving RFCOMM and sysfs
On Sun, Jan 06, 2008 at 11:07:52AM +0900, Tejun Heo wrote: Right, I haven't thought about that. When sysfs_get_dentry() is called, @sd is always valid so unless there was existing negative dentry, lookup is guaranteed to return positive dentry, but by populating dcache with negative dentry before a node is created, things can go wrong. I don't think that's what's going on here tho. If that was the case, the while() loop looking up the next sd to lookup (@cur) should have blown up as negative dentry will have NULL d_fsdata which doesn't match any sd. No. What happens if sd gets unlinked while we are on the way to sysfs_get_dentry() and so does its parent? The parent is off the sibling list, we get negative dentry from lookup. It's not hashed, so won't stick around in dcache (which is apparently what you are thinking about). However, _THIS_ lookup has returned you a dentry with NULL -d_inode and you are well and truly buggered. -- To unsubscribe from this list: send the line unsubscribe linux-kernel in the body of a message to [EMAIL PROTECTED] More majordomo info at http://vger.kernel.org/majordomo-info.html Please read the FAQ at http://www.tux.org/lkml/
Re: [Bluez-devel] Oops involving RFCOMM and sysfs
Hello, Al Viro. Al Viro wrote: On Sun, Jan 06, 2008 at 11:07:52AM +0900, Tejun Heo wrote: Right, I haven't thought about that. When sysfs_get_dentry() is called, @sd is always valid so unless there was existing negative dentry, lookup is guaranteed to return positive dentry, but by populating dcache with negative dentry before a node is created, things can go wrong. I don't think that's what's going on here tho. If that was the case, the while() loop looking up the next sd to lookup (@cur) should have blown up as negative dentry will have NULL d_fsdata which doesn't match any sd. No. What happens if sd gets unlinked while we are on the way to sysfs_get_dentry() and so does its parent? That means sysfs_remove_dir() is called on parent while other operations are in progress on children, right? sysfs has never allowed such things AFAIK no one does that. It's somewhat implied in the interface (such as recursive removing) but I fully agree it's problematic. Things like these are why I think we need to unify/simplify locking as I wrote previously. The parent is off the sibling list, we get negative dentry from lookup. It's not hashed, so won't stick around in dcache (which is apparently what you are thinking about). However, _THIS_ lookup has returned you a dentry with NULL -d_inode and you are well and truly buggered. So, the assumption is that while @sd is held and being operated on its ancestry can never change except for rename and move and those are protected with sysfs_rename_mutex. I agree that the locking is awkward but sysfs's internal implementation has completely changed over last six or so months and the conversion still isn't complete. It isn't pretty at all but is at least understandable now, IMHO. :-) The last pending change is separating out kobject from sysfs and simplifying locking. I had some disagreement with Greg about the future of kobject and then there was tsunami of ATA bugs because most major distros converted to libata from IDE in the second half of the last year and I'm still buried under it. I'll get back to sysfs once these ATA bugs subside a bit. Thanks. -- tejun -- To unsubscribe from this list: send the line unsubscribe linux-kernel in the body of a message to [EMAIL PROTECTED] More majordomo info at http://vger.kernel.org/majordomo-info.html Please read the FAQ at http://www.tux.org/lkml/
Re: [Bluez-devel] Oops involving RFCOMM and sysfs
Hello, Al Viro wrote: On Sun, Jan 06, 2008 at 11:54:43AM +0900, Tejun Heo wrote: That means sysfs_remove_dir() is called on parent while other operations are in progress on children, right? sysfs has never allowed such things AFAIK no one does that. It's somewhat implied in the interface (such as recursive removing) but I fully agree it's problematic. Things like these are why I think we need to unify/simplify locking as I wrote previously. All it takes is kobject_rename() or kobject_move() called asynchronously wrt removal... I don't see an explicit ban for that. There really hasn't been any stone-set rules for how sysfs can be used and what operations are allowed simultaneously although sysfs implementation always had a lot of assumptions about which ops can and can't be done simultaneously. The general assumption is that the caller is responsible for its and its children's lifetime management which is usually followed as sysfs nodes are removed when a device goes away or driver detaches at which point no other ops should be in progress anyway. I think this stems partially from tight coupling with kobject / driver model. kobject and driver model have certain rules about how they can be used (again not explicit enough) and sysfs implementation kind of piggy backed on those rules and added its own assumptions on top of it. After a while, it's hard to track what's assumed where and implementing or changing one feature somewhere breaks some assumption elsewhere. This is the primary reason why I think sysfs should be an independent component which the driver model uses not something intertwined into driver model while having mostly separate implementation. FWIW, what happens here *is* fishy, but I don't see an outright ban on that in documentation - rfcomm_tty_open() does device_move(dev-tty_dev, rfcomm_get_device(dev)); when we get openers, rfcomm_tty_close() does device_move(dev-tty_dev, NULL); when the number of openers hits zero. Can happen repeatedly. Note that device_move() with new parent being NULL is explicitly allowed and handled, so... (cc'ing Kay Sievers) IIRC, that's device class compatibility thing. Overlapping move's are okay tho as they're protected from each other by sysfs_rename_mutex. I'll take a look at the rfcomm code and see what's going on tomorrow. Gotta hit the road now. Thanks. -- tejun -- To unsubscribe from this list: send the line unsubscribe linux-kernel in the body of a message to [EMAIL PROTECTED] More majordomo info at http://vger.kernel.org/majordomo-info.html Please read the FAQ at http://www.tux.org/lkml/
Re: [Bluez-devel] Oops involving RFCOMM and sysfs
On Sun, Jan 06, 2008 at 11:54:43AM +0900, Tejun Heo wrote: That means sysfs_remove_dir() is called on parent while other operations are in progress on children, right? sysfs has never allowed such things AFAIK no one does that. It's somewhat implied in the interface (such as recursive removing) but I fully agree it's problematic. Things like these are why I think we need to unify/simplify locking as I wrote previously. All it takes is kobject_rename() or kobject_move() called asynchronously wrt removal... I don't see an explicit ban for that. FWIW, what happens here *is* fishy, but I don't see an outright ban on that in documentation - rfcomm_tty_open() does device_move(dev-tty_dev, rfcomm_get_device(dev)); when we get openers, rfcomm_tty_close() does device_move(dev-tty_dev, NULL); when the number of openers hits zero. Can happen repeatedly. Note that device_move() with new parent being NULL is explicitly allowed and handled, so... -- To unsubscribe from this list: send the line unsubscribe linux-kernel in the body of a message to [EMAIL PROTECTED] More majordomo info at http://vger.kernel.org/majordomo-info.html Please read the FAQ at http://www.tux.org/lkml/
Re: [Bluez-devel] Oops involving RFCOMM and sysfs
On Wed, Jan 02, 2008 at 04:16:42PM +0100, Gabor Gombas wrote: > Heh, it seems talking about a bug makes it trigger: > > Jan 2 16:05:45 twister kernel: Unable to handle kernel NULL pointer > dereference at 00b8 RIP: > Jan 2 16:05:45 twister kernel: [] mutex_lock+0x10/0x1d > So the patch referenced above does not help. But I've found a very easy > way to trigger the bug: > > - do a "cat /dev/zero > /dev/rfcomm0" > - switch the phone off > - switch the phone on, and the kernel oopses sysfs_get_dentry(), mutex_lock(>d_inode->i_mutex); hitting parent->d_inode either NULL or very close to it, depending on your .config; most likely NULL, if offset of i_mutex is 0xb8 in your build. That's plausible - 0xb8 is what you'd get on UP build without spinlock debugging, lockdep, etc. Assuming that this is what we get, everything looks explainable - we have sysfs_rename_dir() calling sysfs_get_dentry() while the parent gets evicted. We don't have any exclusion, so while we are playing silly buggers with lookups in sysfs_get_dentry() we have parent become negative; the rest is obvious... AFAICS, the locking here is quite broken and frankly, sysfs_get_dentry() and the way it plays with fs/namei.c are ucking fugly. Could you stick if (!parent->d_inode) printk(KERN_WARNING "sysfs locking blows: %s", parent->d_name.name); right before mutex_lock(>d_inode->i_mutex); dentry = lookup_one_noperm(cur->s_name, parent); mutex_unlock(>d_inode->i_mutex); in sysfs_get_dentry() (fs/sysfs/dir.c) and verify that it does, indeed, trigger? -- To unsubscribe from this list: send the line "unsubscribe linux-kernel" in the body of a message to [EMAIL PROTECTED] More majordomo info at http://vger.kernel.org/majordomo-info.html Please read the FAQ at http://www.tux.org/lkml/
Re: [Bluez-devel] Oops involving RFCOMM and sysfs
On Wed, Jan 02, 2008 at 04:16:42PM +0100, Gabor Gombas wrote: Heh, it seems talking about a bug makes it trigger: Jan 2 16:05:45 twister kernel: Unable to handle kernel NULL pointer dereference at 00b8 RIP: Jan 2 16:05:45 twister kernel: [804720a5] mutex_lock+0x10/0x1d So the patch referenced above does not help. But I've found a very easy way to trigger the bug: - do a cat /dev/zero /dev/rfcomm0 - switch the phone off - switch the phone on, and the kernel oopses sysfs_get_dentry(), mutex_lock(parent-d_inode-i_mutex); hitting parent-d_inode either NULL or very close to it, depending on your .config; most likely NULL, if offset of i_mutex is 0xb8 in your build. That's plausible - 0xb8 is what you'd get on UP build without spinlock debugging, lockdep, etc. Assuming that this is what we get, everything looks explainable - we have sysfs_rename_dir() calling sysfs_get_dentry() while the parent gets evicted. We don't have any exclusion, so while we are playing silly buggers with lookups in sysfs_get_dentry() we have parent become negative; the rest is obvious... AFAICS, the locking here is quite broken and frankly, sysfs_get_dentry() and the way it plays with fs/namei.c are ucking fugly. Could you stick if (!parent-d_inode) printk(KERN_WARNING sysfs locking blows: %s, parent-d_name.name); right before mutex_lock(parent-d_inode-i_mutex); dentry = lookup_one_noperm(cur-s_name, parent); mutex_unlock(parent-d_inode-i_mutex); in sysfs_get_dentry() (fs/sysfs/dir.c) and verify that it does, indeed, trigger? -- To unsubscribe from this list: send the line unsubscribe linux-kernel in the body of a message to [EMAIL PROTECTED] More majordomo info at http://vger.kernel.org/majordomo-info.html Please read the FAQ at http://www.tux.org/lkml/
Re: [Bluez-devel] Oops involving RFCOMM and sysfs
On Thu, Jan 03, 2008 at 02:16:21PM +0100, Gabor Gombas wrote: > On Wed, Jan 02, 2008 at 04:16:42PM +0100, Gabor Gombas wrote: > > > So the patch referenced above does not help. But I've found a very easy > > way to trigger the bug: > > > > - do a "cat /dev/zero > /dev/rfcomm0" > > - switch the phone off > > - switch the phone on, and the kernel oopses > > FYI I also remember having oopses with 2.6.22. but I do not > have those logs anymore. Also I now booted 2.6.20.7 and it did not oops. Could you try the patch (on 2.6.24-rc6) following and check the debug messages? diff -upr linux/net/bluetooth/rfcomm/tty.c linux.new/net/bluetooth/rfcomm/tty.c --- linux/net/bluetooth/rfcomm/tty.c2008-01-04 08:58:48.0 +0800 +++ linux.new/net/bluetooth/rfcomm/tty.c2008-01-04 09:01:01.0 +0800 @@ -313,6 +313,7 @@ static void rfcomm_dev_del(struct rfcomm { BT_DBG("dev %p", dev); + dump_stack(); set_bit(RFCOMM_TTY_RELEASED, >flags); rfcomm_dev_put(dev); } -- To unsubscribe from this list: send the line "unsubscribe linux-kernel" in the body of a message to [EMAIL PROTECTED] More majordomo info at http://vger.kernel.org/majordomo-info.html Please read the FAQ at http://www.tux.org/lkml/
Re: [Bluez-devel] Oops involving RFCOMM and sysfs
On Wed, Jan 02, 2008 at 04:16:42PM +0100, Gabor Gombas wrote: > So the patch referenced above does not help. But I've found a very easy > way to trigger the bug: > > - do a "cat /dev/zero > /dev/rfcomm0" > - switch the phone off > - switch the phone on, and the kernel oopses FYI I also remember having oopses with 2.6.22. but I do not have those logs anymore. Also I now booted 2.6.20.7 and it did not oops. Gabor -- - MTA SZTAKI Computer and Automation Research Institute Hungarian Academy of Sciences - -- To unsubscribe from this list: send the line "unsubscribe linux-kernel" in the body of a message to [EMAIL PROTECTED] More majordomo info at http://vger.kernel.org/majordomo-info.html Please read the FAQ at http://www.tux.org/lkml/
Re: [Bluez-devel] Oops involving RFCOMM and sysfs
On Wed, Jan 02, 2008 at 04:16:42PM +0100, Gabor Gombas wrote: So the patch referenced above does not help. But I've found a very easy way to trigger the bug: - do a cat /dev/zero /dev/rfcomm0 - switch the phone off - switch the phone on, and the kernel oopses FYI I also remember having oopses with 2.6.22.something but I do not have those logs anymore. Also I now booted 2.6.20.7 and it did not oops. Gabor -- - MTA SZTAKI Computer and Automation Research Institute Hungarian Academy of Sciences - -- To unsubscribe from this list: send the line unsubscribe linux-kernel in the body of a message to [EMAIL PROTECTED] More majordomo info at http://vger.kernel.org/majordomo-info.html Please read the FAQ at http://www.tux.org/lkml/
Re: [Bluez-devel] Oops involving RFCOMM and sysfs
On Sat, Dec 29, 2007 at 04:07:04PM +0800, Dave Young wrote: > Please try the -mm tree kernel, might have been fixed by : > http://lkml.org/lkml/2007/11/18/141 Heh, it seems talking about a bug makes it trigger: Jan 2 16:05:45 twister kernel: Unable to handle kernel NULL pointer dereference at 00b8 RIP: Jan 2 16:05:45 twister kernel: [] mutex_lock+0x10/0x1d Jan 2 16:05:45 twister kernel: PGD bcf6e067 PUD bcee3067 PMD 0 Jan 2 16:05:45 twister kernel: Oops: 0002 [1] Jan 2 16:05:45 twister kernel: CPU 0 Jan 2 16:05:45 twister kernel: Modules linked in: binfmt_misc rfcomm l2cap nfsd auth_rpcgss exportfs ipt_REJECT xt_tcpudp ipt_LOG xt_limit iptable_filter ip_tables x_tables nfs lockd nfs_acl sunrpc fuse dm_crypt dm_snapshot dm_mirror cpufreq_ondemand saa7134_alsa radeon hwmon_vid eeprom hci_usb bluetooth usb_storage tuner tea5767 tda8290 tuner_simple mt20xx tea5761 sg snd_intel8x0 saa7134 snd_ac97_codec ac97_bus videobuf_dma_sg videobuf_core ir_kbd_i2c sr_mod firewire_ohci firewire_core snd_pcm crc_itu_t ir_common ehci_hcd ohci_hcd cdrom snd_timer snd_page_alloc parport_pc parport sky2 forcedeth floppy Jan 2 16:05:45 twister kernel: Pid: 5056, comm: cat Not tainted 2.6.24-rc6-dirty #3 Jan 2 16:05:45 twister kernel: RIP: 0010:[] [] mutex_lock+0x10/0x1d Jan 2 16:05:45 twister kernel: RSP: 0018:8100bce3fd08 EFLAGS: 00010246 Jan 2 16:05:45 twister kernel: RAX: RBX: 00b8 RCX: Jan 2 16:05:45 twister kernel: RDX: 8100bce3ffd8 RSI: 80641d70 RDI: 00b8 Jan 2 16:05:45 twister kernel: RBP: 80591db0 R08: R09: 000899a2 Jan 2 16:05:45 twister kernel: R10: R11: 00300018 R12: 8100bcb8ef50 Jan 2 16:05:45 twister kernel: R13: fff4 R14: 8100bcfc8e00 R15: 8100a370b300 Jan 2 16:05:45 twister kernel: FS: 2b225d0e56e0() GS:805b8000() knlGS: Jan 2 16:05:45 twister kernel: CS: 0010 DS: ES: CR0: 8005003b Jan 2 16:05:45 twister kernel: CR2: 00b8 CR3: 95ad1000 CR4: 06e0 Jan 2 16:05:45 twister kernel: DR0: DR1: DR2: Jan 2 16:05:45 twister kernel: DR3: DR6: 0ff0 DR7: 0400 Jan 2 16:05:45 twister kernel: Process cat (pid: 5056, threadinfo 8100bce3e000, task 8100ba931060) Jan 2 16:05:45 twister kernel: Stack: e207e2d8 8028 8100a3387000 802aefa5 Jan 2 16:05:45 twister kernel: 8100bfa8af50 8100bcb8ef50 8100baab9300 802af1ba Jan 2 16:05:45 twister kernel: 8100a342a8d0 8100a342a8d0 8100bfa92dc0 8100baab9300 Jan 2 16:05:45 twister kernel: Call Trace: Jan 2 16:05:45 twister kernel: [] dput+0x26/0x103 Jan 2 16:05:45 twister kernel: [] sysfs_get_dentry+0x45/0x8f Jan 2 16:05:45 twister kernel: [] sysfs_move_dir+0x63/0x204 Jan 2 16:05:45 twister kernel: [] kobject_move+0xba/0x110 Jan 2 16:05:45 twister kernel: [] device_move+0x59/0x111 Jan 2 16:05:45 twister kernel: [] :rfcomm:rfcomm_tty_close+0x2f/0x74 Jan 2 16:05:45 twister kernel: [] release_dev+0x212/0x5e2 Jan 2 16:05:45 twister kernel: [] do_page_fault+0x2ff/0x65a Jan 2 16:05:45 twister kernel: [] tty_release+0xc/0x10 Jan 2 16:05:45 twister kernel: [] __fput+0xb1/0x16f Jan 2 16:05:45 twister kernel: [] filp_close+0x5d/0x65 Jan 2 16:05:45 twister kernel: [] sys_close+0x73/0xa6 Jan 2 16:05:45 twister kernel: [] tracesys+0xdc/0xe1 Jan 2 16:05:45 twister kernel: Jan 2 16:05:45 twister kernel: Jan 2 16:05:45 twister kernel: Code: ff 0f 79 05 e8 c9 00 00 00 58 5a 5b c3 41 54 48 8d 47 08 48 Jan 2 16:05:45 twister kernel: RIP [] mutex_lock+0x10/0x1d Jan 2 16:05:45 twister kernel: RSP Jan 2 16:05:45 twister kernel: CR2: 00b8 Jan 2 16:05:45 twister kernel: ---[ end trace da76522f0284e9b6 ]--- So the patch referenced above does not help. But I've found a very easy way to trigger the bug: - do a "cat /dev/zero > /dev/rfcomm0" - switch the phone off - switch the phone on, and the kernel oopses Gabor -- - MTA SZTAKI Computer and Automation Research Institute Hungarian Academy of Sciences - -- To unsubscribe from this list: send the line "unsubscribe linux-kernel" in the body of a message to [EMAIL PROTECTED] More majordomo info at http://vger.kernel.org/majordomo-info.html Please read the FAQ at http://www.tux.org/lkml/
Re: [Bluez-devel] Oops involving RFCOMM and sysfs
On Sat, Dec 29, 2007 at 04:07:04PM +0800, Dave Young wrote: > Please try the -mm tree kernel, might have been fixed by : > http://lkml.org/lkml/2007/11/18/141 I've applied the patch on top of 2.6.24-rc6 (I don't want to run -mm kernels on this machine). We'll see what happens. Gabor -- - MTA SZTAKI Computer and Automation Research Institute Hungarian Academy of Sciences - -- To unsubscribe from this list: send the line "unsubscribe linux-kernel" in the body of a message to [EMAIL PROTECTED] More majordomo info at http://vger.kernel.org/majordomo-info.html Please read the FAQ at http://www.tux.org/lkml/
Re: [Bluez-devel] Oops involving RFCOMM and sysfs
On Sat, Dec 29, 2007 at 04:07:04PM +0800, Dave Young wrote: Please try the -mm tree kernel, might have been fixed by : http://lkml.org/lkml/2007/11/18/141 I've applied the patch on top of 2.6.24-rc6 (I don't want to run -mm kernels on this machine). We'll see what happens. Gabor -- - MTA SZTAKI Computer and Automation Research Institute Hungarian Academy of Sciences - -- To unsubscribe from this list: send the line unsubscribe linux-kernel in the body of a message to [EMAIL PROTECTED] More majordomo info at http://vger.kernel.org/majordomo-info.html Please read the FAQ at http://www.tux.org/lkml/
Re: [Bluez-devel] Oops involving RFCOMM and sysfs
On Sat, Dec 29, 2007 at 04:07:04PM +0800, Dave Young wrote: Please try the -mm tree kernel, might have been fixed by : http://lkml.org/lkml/2007/11/18/141 Heh, it seems talking about a bug makes it trigger: Jan 2 16:05:45 twister kernel: Unable to handle kernel NULL pointer dereference at 00b8 RIP: Jan 2 16:05:45 twister kernel: [804720a5] mutex_lock+0x10/0x1d Jan 2 16:05:45 twister kernel: PGD bcf6e067 PUD bcee3067 PMD 0 Jan 2 16:05:45 twister kernel: Oops: 0002 [1] Jan 2 16:05:45 twister kernel: CPU 0 Jan 2 16:05:45 twister kernel: Modules linked in: binfmt_misc rfcomm l2cap nfsd auth_rpcgss exportfs ipt_REJECT xt_tcpudp ipt_LOG xt_limit iptable_filter ip_tables x_tables nfs lockd nfs_acl sunrpc fuse dm_crypt dm_snapshot dm_mirror cpufreq_ondemand saa7134_alsa radeon hwmon_vid eeprom hci_usb bluetooth usb_storage tuner tea5767 tda8290 tuner_simple mt20xx tea5761 sg snd_intel8x0 saa7134 snd_ac97_codec ac97_bus videobuf_dma_sg videobuf_core ir_kbd_i2c sr_mod firewire_ohci firewire_core snd_pcm crc_itu_t ir_common ehci_hcd ohci_hcd cdrom snd_timer snd_page_alloc parport_pc parport sky2 forcedeth floppy Jan 2 16:05:45 twister kernel: Pid: 5056, comm: cat Not tainted 2.6.24-rc6-dirty #3 Jan 2 16:05:45 twister kernel: RIP: 0010:[804720a5] [804720a5] mutex_lock+0x10/0x1d Jan 2 16:05:45 twister kernel: RSP: 0018:8100bce3fd08 EFLAGS: 00010246 Jan 2 16:05:45 twister kernel: RAX: RBX: 00b8 RCX: Jan 2 16:05:45 twister kernel: RDX: 8100bce3ffd8 RSI: 80641d70 RDI: 00b8 Jan 2 16:05:45 twister kernel: RBP: 80591db0 R08: R09: 000899a2 Jan 2 16:05:45 twister kernel: R10: R11: 00300018 R12: 8100bcb8ef50 Jan 2 16:05:45 twister kernel: R13: fff4 R14: 8100bcfc8e00 R15: 8100a370b300 Jan 2 16:05:45 twister kernel: FS: 2b225d0e56e0() GS:805b8000() knlGS: Jan 2 16:05:45 twister kernel: CS: 0010 DS: ES: CR0: 8005003b Jan 2 16:05:45 twister kernel: CR2: 00b8 CR3: 95ad1000 CR4: 06e0 Jan 2 16:05:45 twister kernel: DR0: DR1: DR2: Jan 2 16:05:45 twister kernel: DR3: DR6: 0ff0 DR7: 0400 Jan 2 16:05:45 twister kernel: Process cat (pid: 5056, threadinfo 8100bce3e000, task 8100ba931060) Jan 2 16:05:45 twister kernel: Stack: e207e2d8 8028 8100a3387000 802aefa5 Jan 2 16:05:45 twister kernel: 8100bfa8af50 8100bcb8ef50 8100baab9300 802af1ba Jan 2 16:05:45 twister kernel: 8100a342a8d0 8100a342a8d0 8100bfa92dc0 8100baab9300 Jan 2 16:05:45 twister kernel: Call Trace: Jan 2 16:05:45 twister kernel: [8028] dput+0x26/0x103 Jan 2 16:05:45 twister kernel: [802aefa5] sysfs_get_dentry+0x45/0x8f Jan 2 16:05:45 twister kernel: [802af1ba] sysfs_move_dir+0x63/0x204 Jan 2 16:05:45 twister kernel: [803006e5] kobject_move+0xba/0x110 Jan 2 16:05:45 twister kernel: [80368a00] device_move+0x59/0x111 Jan 2 16:05:45 twister kernel: [88292425] :rfcomm:rfcomm_tty_close+0x2f/0x74 Jan 2 16:05:45 twister kernel: [803446bf] release_dev+0x212/0x5e2 Jan 2 16:05:45 twister kernel: [8021b609] do_page_fault+0x2ff/0x65a Jan 2 16:05:45 twister kernel: [80344a9b] tty_release+0xc/0x10 Jan 2 16:05:45 twister kernel: [80276f67] __fput+0xb1/0x16f Jan 2 16:05:45 twister kernel: [802748b5] filp_close+0x5d/0x65 Jan 2 16:05:45 twister kernel: [80275a07] sys_close+0x73/0xa6 Jan 2 16:05:45 twister kernel: [8020b5fc] tracesys+0xdc/0xe1 Jan 2 16:05:45 twister kernel: Jan 2 16:05:45 twister kernel: Jan 2 16:05:45 twister kernel: Code: ff 0f 79 05 e8 c9 00 00 00 58 5a 5b c3 41 54 48 8d 47 08 48 Jan 2 16:05:45 twister kernel: RIP [804720a5] mutex_lock+0x10/0x1d Jan 2 16:05:45 twister kernel: RSP 8100bce3fd08 Jan 2 16:05:45 twister kernel: CR2: 00b8 Jan 2 16:05:45 twister kernel: ---[ end trace da76522f0284e9b6 ]--- So the patch referenced above does not help. But I've found a very easy way to trigger the bug: - do a cat /dev/zero /dev/rfcomm0 - switch the phone off - switch the phone on, and the kernel oopses Gabor -- - MTA SZTAKI Computer and Automation Research Institute Hungarian Academy of Sciences - -- To unsubscribe from this list: send the line unsubscribe linux-kernel in the body of a message to [EMAIL PROTECTED] More majordomo info at http://vger.kernel.org/majordomo-info.html Please read the FAQ at http://www.tux.org/lkml/
Re: [Bluez-devel] Oops involving RFCOMM and sysfs
On Dec 29, 2007 1:32 AM, Gabor Gombas <[EMAIL PROTECTED]> wrote: > Hi, > > I'm using my phone as a GPRS modem over Bluetooth. Sometimes Bluetooth > on the phone seems to stall and the phone has to be switched off & on to > get it back to a sane state. During this I sometimes get the following > Oops (this is kernel 2.6.24-rc6 on x86_64): > > Dec 28 17:52:11 host kernel: Unable to handle kernel NULL pointer dereference > at 00b8 RIP: > Dec 28 17:52:11 host kernel: [] mutex_lock+0x10/0x1d > Dec 28 17:52:11 host kernel: PGD 920b0067 PUD a7aba067 PMD 0 > Dec 28 17:52:11 host kernel: Oops: 0002 [1] > Dec 28 17:52:11 host kernel: CPU 0 > Dec 28 17:52:11 host kernel: Modules linked in: cpufreq_ondemand ppp_deflate > zlib_deflate zlib_inflate bsd_comp ppp_async crc_ccitt ppp_generic slhc > binfmt_misc rfcomm l2cap nfsd auth_rpcgss exportfs ipt_REJECT xt_tcpudp > ipt_LOG xt_limit iptable_filter ip_tables x_tables nfs lockd nfs_acl sunrpc > fuse dm_crypt dm_snapshot dm_mirror saa7134_alsa radeon hwmon_vid eeprom > hci_usb bluetooth usb_storage tuner tea5767 tda8290 tuner_simple mt20xx > tea5761 sg snd_intel8x0 saa7134 firewire_ohci snd_ac97_codec firewire_core > videobuf_dma_sg videobuf_core ir_kbd_i2c ac97_bus crc_itu_t ir_common snd_pcm > ehci_hcd ohci_hcd parport_pc parport sky2 sr_mod forcedeth snd_timer > snd_page_alloc cdrom floppy > Dec 28 17:52:11 host kernel: Pid: 4381, comm: pppd Not tainted 2.6.24-rc6 #1 > Dec 28 17:52:11 host kernel: RIP: 0010:[] > [] mutex_lock+0x10/0x1d > Dec 28 17:52:11 host kernel: RSP: 0018:81009402fd08 EFLAGS: 00010246 > Dec 28 17:52:11 host kernel: RAX: RBX: 00b8 RCX: > > Dec 28 17:52:11 host kernel: RDX: 81009402ffd8 RSI: 80639d70 RDI: > 00b8 > Dec 28 17:52:11 host kernel: RBP: 8058adb0 R08: 6390dfaa7e3bd395 R09: > 8050597e > Dec 28 17:52:11 host kernel: R10: 4000 R11: 00300018 R12: > 8100b4d75e60 > Dec 28 17:52:11 host kernel: R13: fff4 R14: 8100abe9e180 R15: > 8100a7a10900 > Dec 28 17:52:11 host kernel: FS: 2ab1bddc3ac0() > GS:805b() knlGS: > Dec 28 17:52:11 host kernel: CS: 0010 DS: ES: CR0: 8005003b > Dec 28 17:52:11 host kernel: CR2: 00b8 CR3: 9400e000 CR4: > 06e0 > Dec 28 17:52:11 host kernel: DR0: DR1: DR2: > > Dec 28 17:52:11 host kernel: DR3: DR6: 0ff0 DR7: > 0400 > Dec 28 17:52:11 host kernel: Process pppd (pid: 4381, threadinfo > 81009402e000, task 81009403d060) > Dec 28 17:52:11 host kernel: Stack: 81009403d060 8028 > 810093734d48 802aefa5 > Dec 28 17:52:11 host kernel: 8100bf9e1c80 8100b4d75e60 > 810060720600 802af1ba > Dec 28 17:52:11 host kernel: 8100b6e5ced0 8100b6e5ced0 > 8100bfa11c00 810060720600 > Dec 28 17:52:11 host kernel: Call Trace: > Dec 28 17:52:11 host kernel: [] dput+0x26/0x103 > Dec 28 17:52:11 host kernel: [] sysfs_get_dentry+0x45/0x8f > Dec 28 17:52:11 host kernel: [] sysfs_move_dir+0x63/0x204 > Dec 28 17:52:11 host kernel: [] kobject_move+0xba/0x110 > Dec 28 17:52:11 host kernel: [] device_move+0x59/0x111 > Dec 28 17:52:11 host kernel: [] > :rfcomm:rfcomm_tty_close+0x2f/0x74 > Dec 28 17:52:11 host kernel: [] release_dev+0x212/0x5e2 > Dec 28 17:52:11 host kernel: [] mntput_no_expire+0x1f/0x6f > Dec 28 17:52:11 host kernel: [] sys_sendto+0x128/0x151 > Dec 28 17:52:11 host kernel: [] tty_release+0xc/0x10 > Dec 28 17:52:11 host kernel: [] __fput+0xb1/0x16f > Dec 28 17:52:11 host kernel: [] filp_close+0x5d/0x65 > Dec 28 17:52:11 host kernel: [] sys_close+0x73/0xa6 > Dec 28 17:52:11 host kernel: [] tracesys+0xdc/0xe1 > Dec 28 17:52:11 host kernel: > Dec 28 17:52:11 host kernel: > Dec 28 17:52:11 host kernel: Code: ff 0f 79 05 e8 c9 00 00 00 58 5a 5b c3 41 > 54 48 8d 47 08 48 > Dec 28 17:52:11 host kernel: RIP [] mutex_lock+0x10/0x1d > Dec 28 17:52:11 host kernel: RSP > Dec 28 17:52:11 host kernel: CR2: 00b8 > Dec 28 17:52:11 host kernel: ---[ end trace 12717319afdb56f5 ]--- > > The bug is also present in 2.6.23.9: > > Dec 28 15:30:29 twister kernel: Unable to handle kernel paging request at > fffe RIP: > Dec 28 15:30:29 twister kernel: [] dput+0xd/0x103 > Dec 28 15:30:29 twister kernel: PGD 203067 PUD 204067 PMD 0 > Dec 28 15:30:29 twister kernel: Oops: [1] > Dec 28 15:30:29 twister kernel: CPU 0 > Dec 28 15:30:29 twister kernel: Modules linked in: ppp_deflate zlib_deflate > zlib_inflate bsd_comp ppp_async crc_ccitt ppp_generic slhc binfmt_misc rfcomm > l2cap nfsd exportfs auth_rpcgss ipt_REJECT xt_tcpudp ipt_LOG xt_limit > iptable_filter ip_tables x_tables nfs lockd nfs_acl sunrpc fuse dm_crypt > dm_snapshot dm_mirror saa7134_alsa radeon it87 hwmon_vid eeprom hci_usb >
Re: [Bluez-devel] Oops involving RFCOMM and sysfs
On Dec 29, 2007 1:32 AM, Gabor Gombas [EMAIL PROTECTED] wrote: Hi, I'm using my phone as a GPRS modem over Bluetooth. Sometimes Bluetooth on the phone seems to stall and the phone has to be switched off on to get it back to a sane state. During this I sometimes get the following Oops (this is kernel 2.6.24-rc6 on x86_64): Dec 28 17:52:11 host kernel: Unable to handle kernel NULL pointer dereference at 00b8 RIP: Dec 28 17:52:11 host kernel: [8046e0d5] mutex_lock+0x10/0x1d Dec 28 17:52:11 host kernel: PGD 920b0067 PUD a7aba067 PMD 0 Dec 28 17:52:11 host kernel: Oops: 0002 [1] Dec 28 17:52:11 host kernel: CPU 0 Dec 28 17:52:11 host kernel: Modules linked in: cpufreq_ondemand ppp_deflate zlib_deflate zlib_inflate bsd_comp ppp_async crc_ccitt ppp_generic slhc binfmt_misc rfcomm l2cap nfsd auth_rpcgss exportfs ipt_REJECT xt_tcpudp ipt_LOG xt_limit iptable_filter ip_tables x_tables nfs lockd nfs_acl sunrpc fuse dm_crypt dm_snapshot dm_mirror saa7134_alsa radeon hwmon_vid eeprom hci_usb bluetooth usb_storage tuner tea5767 tda8290 tuner_simple mt20xx tea5761 sg snd_intel8x0 saa7134 firewire_ohci snd_ac97_codec firewire_core videobuf_dma_sg videobuf_core ir_kbd_i2c ac97_bus crc_itu_t ir_common snd_pcm ehci_hcd ohci_hcd parport_pc parport sky2 sr_mod forcedeth snd_timer snd_page_alloc cdrom floppy Dec 28 17:52:11 host kernel: Pid: 4381, comm: pppd Not tainted 2.6.24-rc6 #1 Dec 28 17:52:11 host kernel: RIP: 0010:[8046e0d5] [8046e0d5] mutex_lock+0x10/0x1d Dec 28 17:52:11 host kernel: RSP: 0018:81009402fd08 EFLAGS: 00010246 Dec 28 17:52:11 host kernel: RAX: RBX: 00b8 RCX: Dec 28 17:52:11 host kernel: RDX: 81009402ffd8 RSI: 80639d70 RDI: 00b8 Dec 28 17:52:11 host kernel: RBP: 8058adb0 R08: 6390dfaa7e3bd395 R09: 8050597e Dec 28 17:52:11 host kernel: R10: 4000 R11: 00300018 R12: 8100b4d75e60 Dec 28 17:52:11 host kernel: R13: fff4 R14: 8100abe9e180 R15: 8100a7a10900 Dec 28 17:52:11 host kernel: FS: 2ab1bddc3ac0() GS:805b() knlGS: Dec 28 17:52:11 host kernel: CS: 0010 DS: ES: CR0: 8005003b Dec 28 17:52:11 host kernel: CR2: 00b8 CR3: 9400e000 CR4: 06e0 Dec 28 17:52:11 host kernel: DR0: DR1: DR2: Dec 28 17:52:11 host kernel: DR3: DR6: 0ff0 DR7: 0400 Dec 28 17:52:11 host kernel: Process pppd (pid: 4381, threadinfo 81009402e000, task 81009403d060) Dec 28 17:52:11 host kernel: Stack: 81009403d060 8028 810093734d48 802aefa5 Dec 28 17:52:11 host kernel: 8100bf9e1c80 8100b4d75e60 810060720600 802af1ba Dec 28 17:52:11 host kernel: 8100b6e5ced0 8100b6e5ced0 8100bfa11c00 810060720600 Dec 28 17:52:11 host kernel: Call Trace: Dec 28 17:52:11 host kernel: [8028] dput+0x26/0x103 Dec 28 17:52:11 host kernel: [802aefa5] sysfs_get_dentry+0x45/0x8f Dec 28 17:52:11 host kernel: [802af1ba] sysfs_move_dir+0x63/0x204 Dec 28 17:52:11 host kernel: [803006e5] kobject_move+0xba/0x110 Dec 28 17:52:11 host kernel: [80368640] device_move+0x59/0x111 Dec 28 17:52:11 host kernel: [8828841c] :rfcomm:rfcomm_tty_close+0x2f/0x74 Dec 28 17:52:11 host kernel: [803442ff] release_dev+0x212/0x5e2 Dec 28 17:52:11 host kernel: [80288801] mntput_no_expire+0x1f/0x6f Dec 28 17:52:11 host kernel: [803e845d] sys_sendto+0x128/0x151 Dec 28 17:52:11 host kernel: [803446db] tty_release+0xc/0x10 Dec 28 17:52:11 host kernel: [80276f67] __fput+0xb1/0x16f Dec 28 17:52:11 host kernel: [802748b5] filp_close+0x5d/0x65 Dec 28 17:52:11 host kernel: [80275a07] sys_close+0x73/0xa6 Dec 28 17:52:11 host kernel: [8020b5fc] tracesys+0xdc/0xe1 Dec 28 17:52:11 host kernel: Dec 28 17:52:11 host kernel: Dec 28 17:52:11 host kernel: Code: ff 0f 79 05 e8 c9 00 00 00 58 5a 5b c3 41 54 48 8d 47 08 48 Dec 28 17:52:11 host kernel: RIP [8046e0d5] mutex_lock+0x10/0x1d Dec 28 17:52:11 host kernel: RSP 81009402fd08 Dec 28 17:52:11 host kernel: CR2: 00b8 Dec 28 17:52:11 host kernel: ---[ end trace 12717319afdb56f5 ]--- The bug is also present in 2.6.23.9: Dec 28 15:30:29 twister kernel: Unable to handle kernel paging request at fffe RIP: Dec 28 15:30:29 twister kernel: [8027ac13] dput+0xd/0x103 Dec 28 15:30:29 twister kernel: PGD 203067 PUD 204067 PMD 0 Dec 28 15:30:29 twister kernel: Oops: [1] Dec 28 15:30:29 twister kernel: CPU 0 Dec 28 15:30:29 twister kernel: Modules linked in: ppp_deflate zlib_deflate zlib_inflate bsd_comp ppp_async crc_ccitt ppp_generic slhc binfmt_misc rfcomm