Re: [Bluez-devel] Oops involving RFCOMM and sysfs

2008-01-20 Thread Dave Young
On Jan 18, 2008 7:26 PM, Cornelia Huck <[EMAIL PROTECTED]> wrote:
> On Fri, 18 Jan 2008 18:34:55 +0800,
> "Dave Young" <[EMAIL PROTECTED]> wrote:
>
>
> > On Jan 18, 2008 6:23 PM, Cornelia Huck <[EMAIL PROTECTED]> wrote:
> > > On Fri, 18 Jan 2008 10:19:33 +0100,
> > > Cornelia Huck <[EMAIL PROTECTED]> wrote:
> > >
> > > > >
> > > > > 1314 if (IS_ERR(new_parent_kobj)) {
> > > > > 1315 error = PTR_ERR(new_parent_kobj);
> > > > > 1316 put_device(new_parent);
> > > > > 1317 goto out;
> > > > > 1318 }
> > > > > 1319 pr_debug("DEVICE: moving '%s' to '%s'\n", dev->bus_id,
> > > > > 1320  new_parent ? new_parent->bus_id : "");
> > > > > 1321 error = kobject_move(>kobj, new_parent_kobj);
> > > > > 1322 if (error) {
> > > > > 1323 put_device(new_parent);
> > > > >
> > > > > imagine new_parent is NULL, then the new_parent_kobj should be put
> > > >
> > > > No, we would need a put_device_parent() (crappy name) which puts the
> > > > reference iff get_device_parent() grabbed it.
> > >
> > > And looking at Greg's patchset, it has cleanup_device_parent(), which
> > > does just that. But it is only called in device_del(), not when
> > > device_move() has errors.
> > >
> > > (get_device_parent() also always returns a pointer to a kobject or
> > > NULL, so we can get rid of those IS_ERR() checks in setup_parent() and
> > > device_move() as well.)
> > >
> >
> > Hmm, thanks.
> > I will be offline during weekend,  but I will still check the
> > device_move and other code if I have time.
>
> Just hacked together the following against Greg's tree:
>
> COMPLETELY UNTESTED DO NOT APPLY
>
> ---
>  drivers/base/core.c |   33 -
>  1 files changed, 16 insertions(+), 17 deletions(-)
>
> --- linux-2.6.orig/drivers/base/core.c
> +++ linux-2.6/drivers/base/core.c
> @@ -553,6 +553,8 @@ static struct kobject *get_device_parent
>  }
>
>  static inline void cleanup_device_parent(struct device *dev) {}
> +static inline void cleanup_glue_dir(struct device *dev,
> +   struct kobject *kobj) {}
>  #else
>  static struct kobject *virtual_device_parent(struct device *dev)
>  {
> @@ -617,27 +619,27 @@ static struct kobject *get_device_parent
> return NULL;
>  }
>
> -static void cleanup_device_parent(struct device *dev)
> +static void cleanup_glue_dir(struct device *dev, struct kobject *kobj)
>  {
> -   struct kobject *glue_dir = dev->kobj.parent;
> -
> /* see if we live in a "glue" directory */
> -   if (!dev->class || glue_dir->kset != >class->class_dirs)
> +   if (!dev->class || kobj->kset != >class->class_dirs)
> return;
>
> -   kobject_put(glue_dir);
> +   kobject_put(kobj);
> +}
> +
> +static void cleanup_device_parent(struct device *dev)
> +{
> +   cleanup_glue_dir(dev, dev->kobj.parent);
>  }
>  #endif
>
> -static int setup_parent(struct device *dev, struct device *parent)
> +static void setup_parent(struct device *dev, struct device *parent)
>  {
> struct kobject *kobj;
> kobj = get_device_parent(dev, parent);
> -   if (IS_ERR(kobj))
> -   return PTR_ERR(kobj);
> if (kobj)
> dev->kobj.parent = kobj;
> -   return 0;
>  }
>
>  static int device_add_class_symlinks(struct device *dev)
> @@ -782,9 +784,7 @@ int device_add(struct device *dev)
> pr_debug("device: '%s': %s\n", dev->bus_id, __FUNCTION__);
>
> parent = get_device(dev->parent);
> -   error = setup_parent(dev, parent);
> -   if (error)
> -   goto Error;
> +   setup_parent(dev, parent);
>
> /* first, register with generic layer. */
> error = kobject_add(>kobj, dev->kobj.parent, "%s", dev->bus_id);
> @@ -862,6 +862,7 @@ int device_add(struct device *dev)
> kobject_uevent(>kobj, KOBJ_REMOVE);
> kobject_del(>kobj);
>   Error:
> +   cleanup_device_parent(dev);
> if (parent)
> put_device(parent);
> goto Done;
> @@ -1303,15 +1304,12 @@ int device_move(struct device *dev, stru
>
> new_parent = get_device(new_parent);
> new_parent_kobj = get_device_parent (dev, new_parent);
> -   if (IS_ERR(new_parent_kobj)) {
> -   error = PTR_ERR(new_parent_kobj);
> -   put_device(new_parent);
> -   goto out;
> -   }
> +
> pr_debug("device: '%s': %s: moving to '%s'\n", dev->bus_id,
>  __FUNCTION__, new_parent ? new_parent->bus_id : "");
> error = kobject_move(>kobj, new_parent_kobj);
> if (error) {
> +   cleanup_glue_dir(dev, new_parent_kobj);
> put_device(new_parent);
> goto out;
> }
> @@ -1334,6 +1332,7 @@ int device_move(struct device *dev, stru
> klist_add_tail(>knode_parent,
>

Re: [Bluez-devel] Oops involving RFCOMM and sysfs

2008-01-20 Thread Dave Young
On Jan 18, 2008 7:26 PM, Cornelia Huck [EMAIL PROTECTED] wrote:
 On Fri, 18 Jan 2008 18:34:55 +0800,
 Dave Young [EMAIL PROTECTED] wrote:


  On Jan 18, 2008 6:23 PM, Cornelia Huck [EMAIL PROTECTED] wrote:
   On Fri, 18 Jan 2008 10:19:33 +0100,
   Cornelia Huck [EMAIL PROTECTED] wrote:
  

 1314 if (IS_ERR(new_parent_kobj)) {
 1315 error = PTR_ERR(new_parent_kobj);
 1316 put_device(new_parent);
 1317 goto out;
 1318 }
 1319 pr_debug(DEVICE: moving '%s' to '%s'\n, dev-bus_id,
 1320  new_parent ? new_parent-bus_id : NULL);
 1321 error = kobject_move(dev-kobj, new_parent_kobj);
 1322 if (error) {
 1323 put_device(new_parent);

 imagine new_parent is NULL, then the new_parent_kobj should be put
   
No, we would need a put_device_parent() (crappy name) which puts the
reference iff get_device_parent() grabbed it.
  
   And looking at Greg's patchset, it has cleanup_device_parent(), which
   does just that. But it is only called in device_del(), not when
   device_move() has errors.
  
   (get_device_parent() also always returns a pointer to a kobject or
   NULL, so we can get rid of those IS_ERR() checks in setup_parent() and
   device_move() as well.)
  
 
  Hmm, thanks.
  I will be offline during weekend,  but I will still check the
  device_move and other code if I have time.

 Just hacked together the following against Greg's tree:

 COMPLETELY UNTESTED DO NOT APPLY

 ---
  drivers/base/core.c |   33 -
  1 files changed, 16 insertions(+), 17 deletions(-)

 --- linux-2.6.orig/drivers/base/core.c
 +++ linux-2.6/drivers/base/core.c
 @@ -553,6 +553,8 @@ static struct kobject *get_device_parent
  }

  static inline void cleanup_device_parent(struct device *dev) {}
 +static inline void cleanup_glue_dir(struct device *dev,
 +   struct kobject *kobj) {}
  #else
  static struct kobject *virtual_device_parent(struct device *dev)
  {
 @@ -617,27 +619,27 @@ static struct kobject *get_device_parent
 return NULL;
  }

 -static void cleanup_device_parent(struct device *dev)
 +static void cleanup_glue_dir(struct device *dev, struct kobject *kobj)
  {
 -   struct kobject *glue_dir = dev-kobj.parent;
 -
 /* see if we live in a glue directory */
 -   if (!dev-class || glue_dir-kset != dev-class-class_dirs)
 +   if (!dev-class || kobj-kset != dev-class-class_dirs)
 return;

 -   kobject_put(glue_dir);
 +   kobject_put(kobj);
 +}
 +
 +static void cleanup_device_parent(struct device *dev)
 +{
 +   cleanup_glue_dir(dev, dev-kobj.parent);
  }
  #endif

 -static int setup_parent(struct device *dev, struct device *parent)
 +static void setup_parent(struct device *dev, struct device *parent)
  {
 struct kobject *kobj;
 kobj = get_device_parent(dev, parent);
 -   if (IS_ERR(kobj))
 -   return PTR_ERR(kobj);
 if (kobj)
 dev-kobj.parent = kobj;
 -   return 0;
  }

  static int device_add_class_symlinks(struct device *dev)
 @@ -782,9 +784,7 @@ int device_add(struct device *dev)
 pr_debug(device: '%s': %s\n, dev-bus_id, __FUNCTION__);

 parent = get_device(dev-parent);
 -   error = setup_parent(dev, parent);
 -   if (error)
 -   goto Error;
 +   setup_parent(dev, parent);

 /* first, register with generic layer. */
 error = kobject_add(dev-kobj, dev-kobj.parent, %s, dev-bus_id);
 @@ -862,6 +862,7 @@ int device_add(struct device *dev)
 kobject_uevent(dev-kobj, KOBJ_REMOVE);
 kobject_del(dev-kobj);
   Error:
 +   cleanup_device_parent(dev);
 if (parent)
 put_device(parent);
 goto Done;
 @@ -1303,15 +1304,12 @@ int device_move(struct device *dev, stru

 new_parent = get_device(new_parent);
 new_parent_kobj = get_device_parent (dev, new_parent);
 -   if (IS_ERR(new_parent_kobj)) {
 -   error = PTR_ERR(new_parent_kobj);
 -   put_device(new_parent);
 -   goto out;
 -   }
 +
 pr_debug(device: '%s': %s: moving to '%s'\n, dev-bus_id,
  __FUNCTION__, new_parent ? new_parent-bus_id : NULL);
 error = kobject_move(dev-kobj, new_parent_kobj);
 if (error) {
 +   cleanup_glue_dir(dev, new_parent_kobj);
 put_device(new_parent);
 goto out;
 }
 @@ -1334,6 +1332,7 @@ int device_move(struct device *dev, stru
 klist_add_tail(dev-knode_parent,
old_parent-klist_children);
 }
 +   cleanup_glue_dir(dev, new_parent_kobj);
 put_device(new_parent);
 goto out;
 }


Looks good.
--
To unsubscribe from this list: send 

Re: [Bluez-devel] Oops involving RFCOMM and sysfs

2008-01-18 Thread Cornelia Huck
On Fri, 18 Jan 2008 18:34:55 +0800,
"Dave Young" <[EMAIL PROTECTED]> wrote:

> On Jan 18, 2008 6:23 PM, Cornelia Huck <[EMAIL PROTECTED]> wrote:
> > On Fri, 18 Jan 2008 10:19:33 +0100,
> > Cornelia Huck <[EMAIL PROTECTED]> wrote:
> >
> > > >
> > > > 1314 if (IS_ERR(new_parent_kobj)) {
> > > > 1315 error = PTR_ERR(new_parent_kobj);
> > > > 1316 put_device(new_parent);
> > > > 1317 goto out;
> > > > 1318 }
> > > > 1319 pr_debug("DEVICE: moving '%s' to '%s'\n", dev->bus_id,
> > > > 1320  new_parent ? new_parent->bus_id : "");
> > > > 1321 error = kobject_move(>kobj, new_parent_kobj);
> > > > 1322 if (error) {
> > > > 1323 put_device(new_parent);
> > > >
> > > > imagine new_parent is NULL, then the new_parent_kobj should be put
> > >
> > > No, we would need a put_device_parent() (crappy name) which puts the
> > > reference iff get_device_parent() grabbed it.
> >
> > And looking at Greg's patchset, it has cleanup_device_parent(), which
> > does just that. But it is only called in device_del(), not when
> > device_move() has errors.
> >
> > (get_device_parent() also always returns a pointer to a kobject or
> > NULL, so we can get rid of those IS_ERR() checks in setup_parent() and
> > device_move() as well.)
> >
> 
> Hmm, thanks.
> I will be offline during weekend,  but I will still check the
> device_move and other code if I have time.

Just hacked together the following against Greg's tree:

COMPLETELY UNTESTED DO NOT APPLY

---
 drivers/base/core.c |   33 -
 1 files changed, 16 insertions(+), 17 deletions(-)

--- linux-2.6.orig/drivers/base/core.c
+++ linux-2.6/drivers/base/core.c
@@ -553,6 +553,8 @@ static struct kobject *get_device_parent
 }
 
 static inline void cleanup_device_parent(struct device *dev) {}
+static inline void cleanup_glue_dir(struct device *dev,
+   struct kobject *kobj) {}
 #else
 static struct kobject *virtual_device_parent(struct device *dev)
 {
@@ -617,27 +619,27 @@ static struct kobject *get_device_parent
return NULL;
 }
 
-static void cleanup_device_parent(struct device *dev)
+static void cleanup_glue_dir(struct device *dev, struct kobject *kobj)
 {
-   struct kobject *glue_dir = dev->kobj.parent;
-
/* see if we live in a "glue" directory */
-   if (!dev->class || glue_dir->kset != >class->class_dirs)
+   if (!dev->class || kobj->kset != >class->class_dirs)
return;
 
-   kobject_put(glue_dir);
+   kobject_put(kobj);
+}
+
+static void cleanup_device_parent(struct device *dev)
+{
+   cleanup_glue_dir(dev, dev->kobj.parent);
 }
 #endif
 
-static int setup_parent(struct device *dev, struct device *parent)
+static void setup_parent(struct device *dev, struct device *parent)
 {
struct kobject *kobj;
kobj = get_device_parent(dev, parent);
-   if (IS_ERR(kobj))
-   return PTR_ERR(kobj);
if (kobj)
dev->kobj.parent = kobj;
-   return 0;
 }
 
 static int device_add_class_symlinks(struct device *dev)
@@ -782,9 +784,7 @@ int device_add(struct device *dev)
pr_debug("device: '%s': %s\n", dev->bus_id, __FUNCTION__);
 
parent = get_device(dev->parent);
-   error = setup_parent(dev, parent);
-   if (error)
-   goto Error;
+   setup_parent(dev, parent);
 
/* first, register with generic layer. */
error = kobject_add(>kobj, dev->kobj.parent, "%s", dev->bus_id);
@@ -862,6 +862,7 @@ int device_add(struct device *dev)
kobject_uevent(>kobj, KOBJ_REMOVE);
kobject_del(>kobj);
  Error:
+   cleanup_device_parent(dev);
if (parent)
put_device(parent);
goto Done;
@@ -1303,15 +1304,12 @@ int device_move(struct device *dev, stru
 
new_parent = get_device(new_parent);
new_parent_kobj = get_device_parent (dev, new_parent);
-   if (IS_ERR(new_parent_kobj)) {
-   error = PTR_ERR(new_parent_kobj);
-   put_device(new_parent);
-   goto out;
-   }
+
pr_debug("device: '%s': %s: moving to '%s'\n", dev->bus_id,
 __FUNCTION__, new_parent ? new_parent->bus_id : "");
error = kobject_move(>kobj, new_parent_kobj);
if (error) {
+   cleanup_glue_dir(dev, new_parent_kobj);
put_device(new_parent);
goto out;
}
@@ -1334,6 +1332,7 @@ int device_move(struct device *dev, stru
klist_add_tail(>knode_parent,
   _parent->klist_children);
}
+   cleanup_glue_dir(dev, new_parent_kobj);
put_device(new_parent);
goto out;
}
--
To unsubscribe from this list: send the line "unsubscribe linux-kernel" in
the body of a message to [EMAIL PROTECTED]
More majordomo 

Re: [Bluez-devel] Oops involving RFCOMM and sysfs

2008-01-18 Thread Dave Young
On Jan 18, 2008 6:23 PM, Cornelia Huck <[EMAIL PROTECTED]> wrote:
> On Fri, 18 Jan 2008 10:19:33 +0100,
> Cornelia Huck <[EMAIL PROTECTED]> wrote:
>
> > >
> > > 1314 if (IS_ERR(new_parent_kobj)) {
> > > 1315 error = PTR_ERR(new_parent_kobj);
> > > 1316 put_device(new_parent);
> > > 1317 goto out;
> > > 1318 }
> > > 1319 pr_debug("DEVICE: moving '%s' to '%s'\n", dev->bus_id,
> > > 1320  new_parent ? new_parent->bus_id : "");
> > > 1321 error = kobject_move(>kobj, new_parent_kobj);
> > > 1322 if (error) {
> > > 1323 put_device(new_parent);
> > >
> > > imagine new_parent is NULL, then the new_parent_kobj should be put
> >
> > No, we would need a put_device_parent() (crappy name) which puts the
> > reference iff get_device_parent() grabbed it.
>
> And looking at Greg's patchset, it has cleanup_device_parent(), which
> does just that. But it is only called in device_del(), not when
> device_move() has errors.
>
> (get_device_parent() also always returns a pointer to a kobject or
> NULL, so we can get rid of those IS_ERR() checks in setup_parent() and
> device_move() as well.)
>

Hmm, thanks.
I will be offline during weekend,  but I will still check the
device_move and other code if I have time.
--
To unsubscribe from this list: send the line "unsubscribe linux-kernel" in
the body of a message to [EMAIL PROTECTED]
More majordomo info at  http://vger.kernel.org/majordomo-info.html
Please read the FAQ at  http://www.tux.org/lkml/


Re: [Bluez-devel] Oops involving RFCOMM and sysfs

2008-01-18 Thread Cornelia Huck
On Fri, 18 Jan 2008 10:19:33 +0100,
Cornelia Huck <[EMAIL PROTECTED]> wrote:

> > 
> > 1314 if (IS_ERR(new_parent_kobj)) {
> > 1315 error = PTR_ERR(new_parent_kobj);
> > 1316 put_device(new_parent);
> > 1317 goto out;
> > 1318 }
> > 1319 pr_debug("DEVICE: moving '%s' to '%s'\n", dev->bus_id,
> > 1320  new_parent ? new_parent->bus_id : "");
> > 1321 error = kobject_move(>kobj, new_parent_kobj);
> > 1322 if (error) {
> > 1323 put_device(new_parent);
> > 
> > imagine new_parent is NULL, then the new_parent_kobj should be put
> 
> No, we would need a put_device_parent() (crappy name) which puts the
> reference iff get_device_parent() grabbed it.

And looking at Greg's patchset, it has cleanup_device_parent(), which
does just that. But it is only called in device_del(), not when
device_move() has errors.

(get_device_parent() also always returns a pointer to a kobject or
NULL, so we can get rid of those IS_ERR() checks in setup_parent() and
device_move() as well.)
--
To unsubscribe from this list: send the line "unsubscribe linux-kernel" in
the body of a message to [EMAIL PROTECTED]
More majordomo info at  http://vger.kernel.org/majordomo-info.html
Please read the FAQ at  http://www.tux.org/lkml/


Re: [Bluez-devel] Oops involving RFCOMM and sysfs

2008-01-18 Thread Cornelia Huck
On Fri, 18 Jan 2008 11:37:21 +0800,
"Dave Young" <[EMAIL PROTECTED]> wrote:


> Lets see the device_move function, seems there's some problems in it:
> 
> 1302 int device_move(struct device *dev, struct device *new_parent)
> 1303 {
> 1304 int error;
> 1305 struct device *old_parent;
> 1306 struct kobject *new_parent_kobj;
> 1307
> 1308 dev = get_device(dev);
> 1309 if (!dev)
> 1310 return -EINVAL;
> 1311
> 1312 new_parent = get_device(new_parent);
> 1313 new_parent_kobj = get_device_parent (dev, new_parent);
> 
> Here could get kobject reference

Eww. get_device_parent() may inflate the refcount in one case
for !CONFIG_SYSFS_DEPRECATED, but often won't. (And the function is
named confusingly, since it hints that we always get a reference, which
we don't.)

> 
> 1314 if (IS_ERR(new_parent_kobj)) {
> 1315 error = PTR_ERR(new_parent_kobj);
> 1316 put_device(new_parent);
> 1317 goto out;
> 1318 }
> 1319 pr_debug("DEVICE: moving '%s' to '%s'\n", dev->bus_id,
> 1320  new_parent ? new_parent->bus_id : "");
> 1321 error = kobject_move(>kobj, new_parent_kobj);
> 1322 if (error) {
> 1323 put_device(new_parent);
> 
> imagine new_parent is NULL, then the new_parent_kobj should be put

No, we would need a put_device_parent() (crappy name) which puts the
reference iff get_device_parent() grabbed it.

> 
> 1324 goto out;
> 1325 }
> 1326 old_parent = dev->parent;
> 1327 dev->parent = new_parent;
> 1328 if (old_parent)
> 1329 klist_remove(>knode_parent);
> 1330 if (new_parent)
> 1331 klist_add_tail(>knode_parent,
> _parent->klist_children);
> 1332 if (!dev->class)
> 1333 goto out_put;
> 
> Why not put new_parent | new_parent_kobj?

Because that is the good case :)

> 
> 1334 error = device_move_class_links(dev, old_parent, new_parent);
> 1335 if (error) {
> 1336 /* We ignore errors on cleanup since we're hosed
> anyway... */
> 1337 device_move_class_links(dev, new_parent, old_parent);
> 1338 if (!kobject_move(>kobj, _parent->kobj)) {
> 1339 if (new_parent)
> 1340 klist_remove(>knode_parent);
> 1341 if (old_parent)
> 1342 klist_add_tail(>knode_parent,
> 1343
> _parent->klist_children);
> 1344 }
> 1345 put_device(new_parent);
> 
> Same doubt as above

We'd need put_device_parent() or whatever here as well, I guess.

> 
> 1346 goto out;
> 1347 }
> 1348 out_put:
> 1349 put_device(old_parent);
> 1350 out:
> 1351 put_device(dev);
> 1352 return error;
> 1353 }
> 
> Hope I'm wrong, but if it's indeed bugs, I will send a patch about this.

There are more problems, I'm afraid :( setup_parent() calls
get_device_parent() as well, so device_add() has the same problems on
error cleanup...

I'll take a look at it if I find some time, but I'm afraid I'll not
be able to do so before next week.
--
To unsubscribe from this list: send the line "unsubscribe linux-kernel" in
the body of a message to [EMAIL PROTECTED]
More majordomo info at  http://vger.kernel.org/majordomo-info.html
Please read the FAQ at  http://www.tux.org/lkml/


Re: [Bluez-devel] Oops involving RFCOMM and sysfs

2008-01-18 Thread Cornelia Huck
On Fri, 18 Jan 2008 11:37:21 +0800,
Dave Young [EMAIL PROTECTED] wrote:


 Lets see the device_move function, seems there's some problems in it:
 
 1302 int device_move(struct device *dev, struct device *new_parent)
 1303 {
 1304 int error;
 1305 struct device *old_parent;
 1306 struct kobject *new_parent_kobj;
 1307
 1308 dev = get_device(dev);
 1309 if (!dev)
 1310 return -EINVAL;
 1311
 1312 new_parent = get_device(new_parent);
 1313 new_parent_kobj = get_device_parent (dev, new_parent);
 
 Here could get kobject reference

Eww. get_device_parent() may inflate the refcount in one case
for !CONFIG_SYSFS_DEPRECATED, but often won't. (And the function is
named confusingly, since it hints that we always get a reference, which
we don't.)

 
 1314 if (IS_ERR(new_parent_kobj)) {
 1315 error = PTR_ERR(new_parent_kobj);
 1316 put_device(new_parent);
 1317 goto out;
 1318 }
 1319 pr_debug(DEVICE: moving '%s' to '%s'\n, dev-bus_id,
 1320  new_parent ? new_parent-bus_id : NULL);
 1321 error = kobject_move(dev-kobj, new_parent_kobj);
 1322 if (error) {
 1323 put_device(new_parent);
 
 imagine new_parent is NULL, then the new_parent_kobj should be put

No, we would need a put_device_parent() (crappy name) which puts the
reference iff get_device_parent() grabbed it.

 
 1324 goto out;
 1325 }
 1326 old_parent = dev-parent;
 1327 dev-parent = new_parent;
 1328 if (old_parent)
 1329 klist_remove(dev-knode_parent);
 1330 if (new_parent)
 1331 klist_add_tail(dev-knode_parent,
 new_parent-klist_children);
 1332 if (!dev-class)
 1333 goto out_put;
 
 Why not put new_parent | new_parent_kobj?

Because that is the good case :)

 
 1334 error = device_move_class_links(dev, old_parent, new_parent);
 1335 if (error) {
 1336 /* We ignore errors on cleanup since we're hosed
 anyway... */
 1337 device_move_class_links(dev, new_parent, old_parent);
 1338 if (!kobject_move(dev-kobj, old_parent-kobj)) {
 1339 if (new_parent)
 1340 klist_remove(dev-knode_parent);
 1341 if (old_parent)
 1342 klist_add_tail(dev-knode_parent,
 1343
 old_parent-klist_children);
 1344 }
 1345 put_device(new_parent);
 
 Same doubt as above

We'd need put_device_parent() or whatever here as well, I guess.

 
 1346 goto out;
 1347 }
 1348 out_put:
 1349 put_device(old_parent);
 1350 out:
 1351 put_device(dev);
 1352 return error;
 1353 }
 
 Hope I'm wrong, but if it's indeed bugs, I will send a patch about this.

There are more problems, I'm afraid :( setup_parent() calls
get_device_parent() as well, so device_add() has the same problems on
error cleanup...

I'll take a look at it if I find some time, but I'm afraid I'll not
be able to do so before next week.
--
To unsubscribe from this list: send the line unsubscribe linux-kernel in
the body of a message to [EMAIL PROTECTED]
More majordomo info at  http://vger.kernel.org/majordomo-info.html
Please read the FAQ at  http://www.tux.org/lkml/


Re: [Bluez-devel] Oops involving RFCOMM and sysfs

2008-01-18 Thread Dave Young
On Jan 18, 2008 6:23 PM, Cornelia Huck [EMAIL PROTECTED] wrote:
 On Fri, 18 Jan 2008 10:19:33 +0100,
 Cornelia Huck [EMAIL PROTECTED] wrote:

  
   1314 if (IS_ERR(new_parent_kobj)) {
   1315 error = PTR_ERR(new_parent_kobj);
   1316 put_device(new_parent);
   1317 goto out;
   1318 }
   1319 pr_debug(DEVICE: moving '%s' to '%s'\n, dev-bus_id,
   1320  new_parent ? new_parent-bus_id : NULL);
   1321 error = kobject_move(dev-kobj, new_parent_kobj);
   1322 if (error) {
   1323 put_device(new_parent);
  
   imagine new_parent is NULL, then the new_parent_kobj should be put
 
  No, we would need a put_device_parent() (crappy name) which puts the
  reference iff get_device_parent() grabbed it.

 And looking at Greg's patchset, it has cleanup_device_parent(), which
 does just that. But it is only called in device_del(), not when
 device_move() has errors.

 (get_device_parent() also always returns a pointer to a kobject or
 NULL, so we can get rid of those IS_ERR() checks in setup_parent() and
 device_move() as well.)


Hmm, thanks.
I will be offline during weekend,  but I will still check the
device_move and other code if I have time.
--
To unsubscribe from this list: send the line unsubscribe linux-kernel in
the body of a message to [EMAIL PROTECTED]
More majordomo info at  http://vger.kernel.org/majordomo-info.html
Please read the FAQ at  http://www.tux.org/lkml/


Re: [Bluez-devel] Oops involving RFCOMM and sysfs

2008-01-18 Thread Cornelia Huck
On Fri, 18 Jan 2008 10:19:33 +0100,
Cornelia Huck [EMAIL PROTECTED] wrote:

  
  1314 if (IS_ERR(new_parent_kobj)) {
  1315 error = PTR_ERR(new_parent_kobj);
  1316 put_device(new_parent);
  1317 goto out;
  1318 }
  1319 pr_debug(DEVICE: moving '%s' to '%s'\n, dev-bus_id,
  1320  new_parent ? new_parent-bus_id : NULL);
  1321 error = kobject_move(dev-kobj, new_parent_kobj);
  1322 if (error) {
  1323 put_device(new_parent);
  
  imagine new_parent is NULL, then the new_parent_kobj should be put
 
 No, we would need a put_device_parent() (crappy name) which puts the
 reference iff get_device_parent() grabbed it.

And looking at Greg's patchset, it has cleanup_device_parent(), which
does just that. But it is only called in device_del(), not when
device_move() has errors.

(get_device_parent() also always returns a pointer to a kobject or
NULL, so we can get rid of those IS_ERR() checks in setup_parent() and
device_move() as well.)
--
To unsubscribe from this list: send the line unsubscribe linux-kernel in
the body of a message to [EMAIL PROTECTED]
More majordomo info at  http://vger.kernel.org/majordomo-info.html
Please read the FAQ at  http://www.tux.org/lkml/


Re: [Bluez-devel] Oops involving RFCOMM and sysfs

2008-01-18 Thread Cornelia Huck
On Fri, 18 Jan 2008 18:34:55 +0800,
Dave Young [EMAIL PROTECTED] wrote:

 On Jan 18, 2008 6:23 PM, Cornelia Huck [EMAIL PROTECTED] wrote:
  On Fri, 18 Jan 2008 10:19:33 +0100,
  Cornelia Huck [EMAIL PROTECTED] wrote:
 
   
1314 if (IS_ERR(new_parent_kobj)) {
1315 error = PTR_ERR(new_parent_kobj);
1316 put_device(new_parent);
1317 goto out;
1318 }
1319 pr_debug(DEVICE: moving '%s' to '%s'\n, dev-bus_id,
1320  new_parent ? new_parent-bus_id : NULL);
1321 error = kobject_move(dev-kobj, new_parent_kobj);
1322 if (error) {
1323 put_device(new_parent);
   
imagine new_parent is NULL, then the new_parent_kobj should be put
  
   No, we would need a put_device_parent() (crappy name) which puts the
   reference iff get_device_parent() grabbed it.
 
  And looking at Greg's patchset, it has cleanup_device_parent(), which
  does just that. But it is only called in device_del(), not when
  device_move() has errors.
 
  (get_device_parent() also always returns a pointer to a kobject or
  NULL, so we can get rid of those IS_ERR() checks in setup_parent() and
  device_move() as well.)
 
 
 Hmm, thanks.
 I will be offline during weekend,  but I will still check the
 device_move and other code if I have time.

Just hacked together the following against Greg's tree:

COMPLETELY UNTESTED DO NOT APPLY

---
 drivers/base/core.c |   33 -
 1 files changed, 16 insertions(+), 17 deletions(-)

--- linux-2.6.orig/drivers/base/core.c
+++ linux-2.6/drivers/base/core.c
@@ -553,6 +553,8 @@ static struct kobject *get_device_parent
 }
 
 static inline void cleanup_device_parent(struct device *dev) {}
+static inline void cleanup_glue_dir(struct device *dev,
+   struct kobject *kobj) {}
 #else
 static struct kobject *virtual_device_parent(struct device *dev)
 {
@@ -617,27 +619,27 @@ static struct kobject *get_device_parent
return NULL;
 }
 
-static void cleanup_device_parent(struct device *dev)
+static void cleanup_glue_dir(struct device *dev, struct kobject *kobj)
 {
-   struct kobject *glue_dir = dev-kobj.parent;
-
/* see if we live in a glue directory */
-   if (!dev-class || glue_dir-kset != dev-class-class_dirs)
+   if (!dev-class || kobj-kset != dev-class-class_dirs)
return;
 
-   kobject_put(glue_dir);
+   kobject_put(kobj);
+}
+
+static void cleanup_device_parent(struct device *dev)
+{
+   cleanup_glue_dir(dev, dev-kobj.parent);
 }
 #endif
 
-static int setup_parent(struct device *dev, struct device *parent)
+static void setup_parent(struct device *dev, struct device *parent)
 {
struct kobject *kobj;
kobj = get_device_parent(dev, parent);
-   if (IS_ERR(kobj))
-   return PTR_ERR(kobj);
if (kobj)
dev-kobj.parent = kobj;
-   return 0;
 }
 
 static int device_add_class_symlinks(struct device *dev)
@@ -782,9 +784,7 @@ int device_add(struct device *dev)
pr_debug(device: '%s': %s\n, dev-bus_id, __FUNCTION__);
 
parent = get_device(dev-parent);
-   error = setup_parent(dev, parent);
-   if (error)
-   goto Error;
+   setup_parent(dev, parent);
 
/* first, register with generic layer. */
error = kobject_add(dev-kobj, dev-kobj.parent, %s, dev-bus_id);
@@ -862,6 +862,7 @@ int device_add(struct device *dev)
kobject_uevent(dev-kobj, KOBJ_REMOVE);
kobject_del(dev-kobj);
  Error:
+   cleanup_device_parent(dev);
if (parent)
put_device(parent);
goto Done;
@@ -1303,15 +1304,12 @@ int device_move(struct device *dev, stru
 
new_parent = get_device(new_parent);
new_parent_kobj = get_device_parent (dev, new_parent);
-   if (IS_ERR(new_parent_kobj)) {
-   error = PTR_ERR(new_parent_kobj);
-   put_device(new_parent);
-   goto out;
-   }
+
pr_debug(device: '%s': %s: moving to '%s'\n, dev-bus_id,
 __FUNCTION__, new_parent ? new_parent-bus_id : NULL);
error = kobject_move(dev-kobj, new_parent_kobj);
if (error) {
+   cleanup_glue_dir(dev, new_parent_kobj);
put_device(new_parent);
goto out;
}
@@ -1334,6 +1332,7 @@ int device_move(struct device *dev, stru
klist_add_tail(dev-knode_parent,
   old_parent-klist_children);
}
+   cleanup_glue_dir(dev, new_parent_kobj);
put_device(new_parent);
goto out;
}
--
To unsubscribe from this list: send the line unsubscribe linux-kernel in
the body of a message to [EMAIL PROTECTED]
More majordomo info at  http://vger.kernel.org/majordomo-info.html
Please read the FAQ at  

Re: [Bluez-devel] Oops involving RFCOMM and sysfs

2008-01-17 Thread Dave Young
On Jan 17, 2008 7:42 PM, Cornelia Huck <[EMAIL PROTECTED]> wrote:
> On Thu, 17 Jan 2008 16:15:04 +0800,
> Dave Young <[EMAIL PROTECTED]> wrote:
>
> > On Thu, Jan 17, 2008 at 03:24:50PM +0800, Dave Young wrote:
> > > On Jan 17, 2008 7:06 AM, Gabor Gombas <[EMAIL PROTECTED]> wrote:
> > > > Hi,
> > > >
> > > > On Wed, Jan 16, 2008 at 09:02:05AM +0800, Dave Young wrote:
> > > >
> > > > > The rfcomm tty device will possibly retain even when conn is down,
> > > > > and sysfs doesn't support zombie device moving, so this patch
> > > > > move the tty device before conn device is destroyed.
> > > > >
> > > > > Signed-off-by: Dave Young <[EMAIL PROTECTED]>
> > > >
> > > > This seems to work, both the oops and the hang are gone. I get these
> > > > messages in syslog when the Bluetooth link hangs and I want to kill pppd
> > > > with "poff":
> > > >
> > > > Jan 16 23:55:59 twister kernel: unregister_netdevice: waiting for ppp0 
> > > > to become free. Usage count = 1
> > > > Jan 16 23:56:09 twister kernel: unregister_netdevice: waiting for ppp0 
> > > > to become free. Usage count = 1
>
> Might be helpful to try with CONFIG_DEBUG_DRIVER=y and
> CONFIG_KOBJECT_DEBUG=y to spot where a reference is not dropped.
>
> > > >
> > > > But a "killall -9 pppd" seems to help and then the re-connect (after the
> > > > phone got power-cycled) works.
> > >
> > > Weird, I guess "device_move(dev, NULL) two times" cause the problem.
> > >
> > > Anyway, device_move should check the old_parent and new_parent , if
> > > they equal to each other then just return.
>
> sysfs_move_dir() does this (to avoid a loop later in the function).
> Don't know if that's a good thing to check at a higher level as well,
> because the calling code should really know where their devices are.
>
> Anyway, if this "move in place" exposes a refcounting bug, we must fix
> it.

Lets see the device_move function, seems there's some problems in it:

1302 int device_move(struct device *dev, struct device *new_parent)
1303 {
1304 int error;
1305 struct device *old_parent;
1306 struct kobject *new_parent_kobj;
1307
1308 dev = get_device(dev);
1309 if (!dev)
1310 return -EINVAL;
1311
1312 new_parent = get_device(new_parent);
1313 new_parent_kobj = get_device_parent (dev, new_parent);

Here could get kobject reference

1314 if (IS_ERR(new_parent_kobj)) {
1315 error = PTR_ERR(new_parent_kobj);
1316 put_device(new_parent);
1317 goto out;
1318 }
1319 pr_debug("DEVICE: moving '%s' to '%s'\n", dev->bus_id,
1320  new_parent ? new_parent->bus_id : "");
1321 error = kobject_move(>kobj, new_parent_kobj);
1322 if (error) {
1323 put_device(new_parent);

imagine new_parent is NULL, then the new_parent_kobj should be put

1324 goto out;
1325 }
1326 old_parent = dev->parent;
1327 dev->parent = new_parent;
1328 if (old_parent)
1329 klist_remove(>knode_parent);
1330 if (new_parent)
1331 klist_add_tail(>knode_parent,
_parent->klist_children);
1332 if (!dev->class)
1333 goto out_put;

Why not put new_parent | new_parent_kobj?

1334 error = device_move_class_links(dev, old_parent, new_parent);
1335 if (error) {
1336 /* We ignore errors on cleanup since we're hosed
anyway... */
1337 device_move_class_links(dev, new_parent, old_parent);
1338 if (!kobject_move(>kobj, _parent->kobj)) {
1339 if (new_parent)
1340 klist_remove(>knode_parent);
1341 if (old_parent)
1342 klist_add_tail(>knode_parent,
1343
_parent->klist_children);
1344 }
1345 put_device(new_parent);

Same doubt as above

1346 goto out;
1347 }
1348 out_put:
1349 put_device(old_parent);
1350 out:
1351 put_device(dev);
1352 return error;
1353 }

Hope I'm wrong, but if it's indeed bugs, I will send a patch about this.

>
> > >
> > > Am I right?
> >
> > Could you apply this patch as well to test? Thanks.
> >
> > diff -upr linux/net/bluetooth/rfcomm/tty.c 
> > linux.new/net/bluetooth/rfcomm/tty.c
> > --- linux/net/bluetooth/rfcomm/tty.c  2008-01-17 16:09:34.0 +0800
> > +++ linux.new/net/bluetooth/rfcomm/tty.c  2008-01-17 16:10:22.0 
> > +0800
> > @@ -692,7 +692,8 @@ static void rfcomm_tty_close(struct tty_
> >   BT_DBG("tty %p dev %p dlc %p opened %d", tty, dev, dev->dlc, 
> > dev->opened);
> >
> >   if (--dev->opened == 0) {
> > - device_move(dev->tty_dev, NULL);
> > + if (dev->tty_dev->parent)
> > + device_move(dev->tty_dev, NULL);
> >
> >   /* Close DLC and dettach TTY */
> >   

Re: [Bluez-devel] Oops involving RFCOMM and sysfs

2008-01-17 Thread Cornelia Huck
On Thu, 17 Jan 2008 16:15:04 +0800,
Dave Young <[EMAIL PROTECTED]> wrote:

> On Thu, Jan 17, 2008 at 03:24:50PM +0800, Dave Young wrote:
> > On Jan 17, 2008 7:06 AM, Gabor Gombas <[EMAIL PROTECTED]> wrote:
> > > Hi,
> > >
> > > On Wed, Jan 16, 2008 at 09:02:05AM +0800, Dave Young wrote:
> > >
> > > > The rfcomm tty device will possibly retain even when conn is down,
> > > > and sysfs doesn't support zombie device moving, so this patch
> > > > move the tty device before conn device is destroyed.
> > > >
> > > > Signed-off-by: Dave Young <[EMAIL PROTECTED]>
> > >
> > > This seems to work, both the oops and the hang are gone. I get these
> > > messages in syslog when the Bluetooth link hangs and I want to kill pppd
> > > with "poff":
> > >
> > > Jan 16 23:55:59 twister kernel: unregister_netdevice: waiting for ppp0 to 
> > > become free. Usage count = 1
> > > Jan 16 23:56:09 twister kernel: unregister_netdevice: waiting for ppp0 to 
> > > become free. Usage count = 1

Might be helpful to try with CONFIG_DEBUG_DRIVER=y and
CONFIG_KOBJECT_DEBUG=y to spot where a reference is not dropped.

> > >
> > > But a "killall -9 pppd" seems to help and then the re-connect (after the
> > > phone got power-cycled) works.
> > 
> > Weird, I guess "device_move(dev, NULL) two times" cause the problem.
> > 
> > Anyway, device_move should check the old_parent and new_parent , if
> > they equal to each other then just return.

sysfs_move_dir() does this (to avoid a loop later in the function).
Don't know if that's a good thing to check at a higher level as well,
because the calling code should really know where their devices are.

Anyway, if this "move in place" exposes a refcounting bug, we must fix
it.

> > 
> > Am I right?
> 
> Could you apply this patch as well to test? Thanks.
> 
> diff -upr linux/net/bluetooth/rfcomm/tty.c 
> linux.new/net/bluetooth/rfcomm/tty.c
> --- linux/net/bluetooth/rfcomm/tty.c  2008-01-17 16:09:34.0 +0800
> +++ linux.new/net/bluetooth/rfcomm/tty.c  2008-01-17 16:10:22.0 
> +0800
> @@ -692,7 +692,8 @@ static void rfcomm_tty_close(struct tty_
>   BT_DBG("tty %p dev %p dlc %p opened %d", tty, dev, dev->dlc, 
> dev->opened);
> 
>   if (--dev->opened == 0) {
> - device_move(dev->tty_dev, NULL);
> + if (dev->tty_dev->parent)
> + device_move(dev->tty_dev, NULL);
> 
>   /* Close DLC and dettach TTY */
>   rfcomm_dlc_close(dev->dlc, 0);
> 

Avoiding to move devices when there is nothing to be moved nevertheless
sounds like a good idea :)
--
To unsubscribe from this list: send the line "unsubscribe linux-kernel" in
the body of a message to [EMAIL PROTECTED]
More majordomo info at  http://vger.kernel.org/majordomo-info.html
Please read the FAQ at  http://www.tux.org/lkml/


Re: [Bluez-devel] Oops involving RFCOMM and sysfs

2008-01-17 Thread Dave Young
On Thu, Jan 17, 2008 at 03:24:50PM +0800, Dave Young wrote:
> On Jan 17, 2008 7:06 AM, Gabor Gombas <[EMAIL PROTECTED]> wrote:
> > Hi,
> >
> > On Wed, Jan 16, 2008 at 09:02:05AM +0800, Dave Young wrote:
> >
> > > The rfcomm tty device will possibly retain even when conn is down,
> > > and sysfs doesn't support zombie device moving, so this patch
> > > move the tty device before conn device is destroyed.
> > >
> > > Signed-off-by: Dave Young <[EMAIL PROTECTED]>
> >
> > This seems to work, both the oops and the hang are gone. I get these
> > messages in syslog when the Bluetooth link hangs and I want to kill pppd
> > with "poff":
> >
> > Jan 16 23:55:59 twister kernel: unregister_netdevice: waiting for ppp0 to 
> > become free. Usage count = 1
> > Jan 16 23:56:09 twister kernel: unregister_netdevice: waiting for ppp0 to 
> > become free. Usage count = 1
> >
> > But a "killall -9 pppd" seems to help and then the re-connect (after the
> > phone got power-cycled) works.
> 
> Weird, I guess "device_move(dev, NULL) two times" cause the problem.
> 
> Anyway, device_move should check the old_parent and new_parent , if
> they equal to each other then just return.
> 
> Am I right?

Could you apply this patch as well to test? Thanks.

diff -upr linux/net/bluetooth/rfcomm/tty.c linux.new/net/bluetooth/rfcomm/tty.c
--- linux/net/bluetooth/rfcomm/tty.c2008-01-17 16:09:34.0 +0800
+++ linux.new/net/bluetooth/rfcomm/tty.c2008-01-17 16:10:22.0 
+0800
@@ -692,7 +692,8 @@ static void rfcomm_tty_close(struct tty_
BT_DBG("tty %p dev %p dlc %p opened %d", tty, dev, dev->dlc, 
dev->opened);
 
if (--dev->opened == 0) {
-   device_move(dev->tty_dev, NULL);
+   if (dev->tty_dev->parent)
+   device_move(dev->tty_dev, NULL);
 
/* Close DLC and dettach TTY */
rfcomm_dlc_close(dev->dlc, 0);

> 
> >
> >
> > Gabor
> >
> > --
> >  -
> >  MTA SZTAKI Computer and Automation Research Institute
> > Hungarian Academy of Sciences
> >  -
> >
--
To unsubscribe from this list: send the line "unsubscribe linux-kernel" in
the body of a message to [EMAIL PROTECTED]
More majordomo info at  http://vger.kernel.org/majordomo-info.html
Please read the FAQ at  http://www.tux.org/lkml/


Re: [Bluez-devel] Oops involving RFCOMM and sysfs

2008-01-17 Thread Cornelia Huck
On Thu, 17 Jan 2008 16:15:04 +0800,
Dave Young [EMAIL PROTECTED] wrote:

 On Thu, Jan 17, 2008 at 03:24:50PM +0800, Dave Young wrote:
  On Jan 17, 2008 7:06 AM, Gabor Gombas [EMAIL PROTECTED] wrote:
   Hi,
  
   On Wed, Jan 16, 2008 at 09:02:05AM +0800, Dave Young wrote:
  
The rfcomm tty device will possibly retain even when conn is down,
and sysfs doesn't support zombie device moving, so this patch
move the tty device before conn device is destroyed.
   
Signed-off-by: Dave Young [EMAIL PROTECTED]
  
   This seems to work, both the oops and the hang are gone. I get these
   messages in syslog when the Bluetooth link hangs and I want to kill pppd
   with poff:
  
   Jan 16 23:55:59 twister kernel: unregister_netdevice: waiting for ppp0 to 
   become free. Usage count = 1
   Jan 16 23:56:09 twister kernel: unregister_netdevice: waiting for ppp0 to 
   become free. Usage count = 1

Might be helpful to try with CONFIG_DEBUG_DRIVER=y and
CONFIG_KOBJECT_DEBUG=y to spot where a reference is not dropped.

  
   But a killall -9 pppd seems to help and then the re-connect (after the
   phone got power-cycled) works.
  
  Weird, I guess device_move(dev, NULL) two times cause the problem.
  
  Anyway, device_move should check the old_parent and new_parent , if
  they equal to each other then just return.

sysfs_move_dir() does this (to avoid a loop later in the function).
Don't know if that's a good thing to check at a higher level as well,
because the calling code should really know where their devices are.

Anyway, if this move in place exposes a refcounting bug, we must fix
it.

  
  Am I right?
 
 Could you apply this patch as well to test? Thanks.
 
 diff -upr linux/net/bluetooth/rfcomm/tty.c 
 linux.new/net/bluetooth/rfcomm/tty.c
 --- linux/net/bluetooth/rfcomm/tty.c  2008-01-17 16:09:34.0 +0800
 +++ linux.new/net/bluetooth/rfcomm/tty.c  2008-01-17 16:10:22.0 
 +0800
 @@ -692,7 +692,8 @@ static void rfcomm_tty_close(struct tty_
   BT_DBG(tty %p dev %p dlc %p opened %d, tty, dev, dev-dlc, 
 dev-opened);
 
   if (--dev-opened == 0) {
 - device_move(dev-tty_dev, NULL);
 + if (dev-tty_dev-parent)
 + device_move(dev-tty_dev, NULL);
 
   /* Close DLC and dettach TTY */
   rfcomm_dlc_close(dev-dlc, 0);
 

Avoiding to move devices when there is nothing to be moved nevertheless
sounds like a good idea :)
--
To unsubscribe from this list: send the line unsubscribe linux-kernel in
the body of a message to [EMAIL PROTECTED]
More majordomo info at  http://vger.kernel.org/majordomo-info.html
Please read the FAQ at  http://www.tux.org/lkml/


Re: [Bluez-devel] Oops involving RFCOMM and sysfs

2008-01-17 Thread Dave Young
On Jan 17, 2008 7:42 PM, Cornelia Huck [EMAIL PROTECTED] wrote:
 On Thu, 17 Jan 2008 16:15:04 +0800,
 Dave Young [EMAIL PROTECTED] wrote:

  On Thu, Jan 17, 2008 at 03:24:50PM +0800, Dave Young wrote:
   On Jan 17, 2008 7:06 AM, Gabor Gombas [EMAIL PROTECTED] wrote:
Hi,
   
On Wed, Jan 16, 2008 at 09:02:05AM +0800, Dave Young wrote:
   
 The rfcomm tty device will possibly retain even when conn is down,
 and sysfs doesn't support zombie device moving, so this patch
 move the tty device before conn device is destroyed.

 Signed-off-by: Dave Young [EMAIL PROTECTED]
   
This seems to work, both the oops and the hang are gone. I get these
messages in syslog when the Bluetooth link hangs and I want to kill pppd
with poff:
   
Jan 16 23:55:59 twister kernel: unregister_netdevice: waiting for ppp0 
to become free. Usage count = 1
Jan 16 23:56:09 twister kernel: unregister_netdevice: waiting for ppp0 
to become free. Usage count = 1

 Might be helpful to try with CONFIG_DEBUG_DRIVER=y and
 CONFIG_KOBJECT_DEBUG=y to spot where a reference is not dropped.

   
But a killall -9 pppd seems to help and then the re-connect (after the
phone got power-cycled) works.
  
   Weird, I guess device_move(dev, NULL) two times cause the problem.
  
   Anyway, device_move should check the old_parent and new_parent , if
   they equal to each other then just return.

 sysfs_move_dir() does this (to avoid a loop later in the function).
 Don't know if that's a good thing to check at a higher level as well,
 because the calling code should really know where their devices are.

 Anyway, if this move in place exposes a refcounting bug, we must fix
 it.

Lets see the device_move function, seems there's some problems in it:

1302 int device_move(struct device *dev, struct device *new_parent)
1303 {
1304 int error;
1305 struct device *old_parent;
1306 struct kobject *new_parent_kobj;
1307
1308 dev = get_device(dev);
1309 if (!dev)
1310 return -EINVAL;
1311
1312 new_parent = get_device(new_parent);
1313 new_parent_kobj = get_device_parent (dev, new_parent);

Here could get kobject reference

1314 if (IS_ERR(new_parent_kobj)) {
1315 error = PTR_ERR(new_parent_kobj);
1316 put_device(new_parent);
1317 goto out;
1318 }
1319 pr_debug(DEVICE: moving '%s' to '%s'\n, dev-bus_id,
1320  new_parent ? new_parent-bus_id : NULL);
1321 error = kobject_move(dev-kobj, new_parent_kobj);
1322 if (error) {
1323 put_device(new_parent);

imagine new_parent is NULL, then the new_parent_kobj should be put

1324 goto out;
1325 }
1326 old_parent = dev-parent;
1327 dev-parent = new_parent;
1328 if (old_parent)
1329 klist_remove(dev-knode_parent);
1330 if (new_parent)
1331 klist_add_tail(dev-knode_parent,
new_parent-klist_children);
1332 if (!dev-class)
1333 goto out_put;

Why not put new_parent | new_parent_kobj?

1334 error = device_move_class_links(dev, old_parent, new_parent);
1335 if (error) {
1336 /* We ignore errors on cleanup since we're hosed
anyway... */
1337 device_move_class_links(dev, new_parent, old_parent);
1338 if (!kobject_move(dev-kobj, old_parent-kobj)) {
1339 if (new_parent)
1340 klist_remove(dev-knode_parent);
1341 if (old_parent)
1342 klist_add_tail(dev-knode_parent,
1343
old_parent-klist_children);
1344 }
1345 put_device(new_parent);

Same doubt as above

1346 goto out;
1347 }
1348 out_put:
1349 put_device(old_parent);
1350 out:
1351 put_device(dev);
1352 return error;
1353 }

Hope I'm wrong, but if it's indeed bugs, I will send a patch about this.


  
   Am I right?
 
  Could you apply this patch as well to test? Thanks.
 
  diff -upr linux/net/bluetooth/rfcomm/tty.c 
  linux.new/net/bluetooth/rfcomm/tty.c
  --- linux/net/bluetooth/rfcomm/tty.c  2008-01-17 16:09:34.0 +0800
  +++ linux.new/net/bluetooth/rfcomm/tty.c  2008-01-17 16:10:22.0 
  +0800
  @@ -692,7 +692,8 @@ static void rfcomm_tty_close(struct tty_
BT_DBG(tty %p dev %p dlc %p opened %d, tty, dev, dev-dlc, 
  dev-opened);
 
if (--dev-opened == 0) {
  - device_move(dev-tty_dev, NULL);
  + if (dev-tty_dev-parent)
  + device_move(dev-tty_dev, NULL);
 
/* Close DLC and dettach TTY */
rfcomm_dlc_close(dev-dlc, 0);
 

 Avoiding to move devices when there is nothing to be moved nevertheless
 sounds like a good idea :)

--
To unsubscribe from this list: send the line 

Re: [Bluez-devel] Oops involving RFCOMM and sysfs

2008-01-16 Thread Dave Young
On Jan 17, 2008 7:06 AM, Gabor Gombas <[EMAIL PROTECTED]> wrote:
> Hi,
>
> On Wed, Jan 16, 2008 at 09:02:05AM +0800, Dave Young wrote:
>
> > The rfcomm tty device will possibly retain even when conn is down,
> > and sysfs doesn't support zombie device moving, so this patch
> > move the tty device before conn device is destroyed.
> >
> > Signed-off-by: Dave Young <[EMAIL PROTECTED]>
>
> This seems to work, both the oops and the hang are gone. I get these
> messages in syslog when the Bluetooth link hangs and I want to kill pppd
> with "poff":
>
> Jan 16 23:55:59 twister kernel: unregister_netdevice: waiting for ppp0 to 
> become free. Usage count = 1
> Jan 16 23:56:09 twister kernel: unregister_netdevice: waiting for ppp0 to 
> become free. Usage count = 1
>
> But a "killall -9 pppd" seems to help and then the re-connect (after the
> phone got power-cycled) works.

Weird, I guess "device_move(dev, NULL) two times" cause the problem.

Anyway, device_move should check the old_parent and new_parent , if
they equal to each other then just return.

Am I right?

>
>
> Gabor
>
> --
>  -
>  MTA SZTAKI Computer and Automation Research Institute
> Hungarian Academy of Sciences
>  -
>
--
To unsubscribe from this list: send the line "unsubscribe linux-kernel" in
the body of a message to [EMAIL PROTECTED]
More majordomo info at  http://vger.kernel.org/majordomo-info.html
Please read the FAQ at  http://www.tux.org/lkml/


Re: [Bluez-devel] Oops involving RFCOMM and sysfs

2008-01-16 Thread Gabor Gombas
Hi,

On Wed, Jan 16, 2008 at 09:02:05AM +0800, Dave Young wrote:

> The rfcomm tty device will possibly retain even when conn is down,
> and sysfs doesn't support zombie device moving, so this patch
> move the tty device before conn device is destroyed.
> 
> Signed-off-by: Dave Young <[EMAIL PROTECTED]> 

This seems to work, both the oops and the hang are gone. I get these
messages in syslog when the Bluetooth link hangs and I want to kill pppd
with "poff":

Jan 16 23:55:59 twister kernel: unregister_netdevice: waiting for ppp0 to 
become free. Usage count = 1
Jan 16 23:56:09 twister kernel: unregister_netdevice: waiting for ppp0 to 
become free. Usage count = 1

But a "killall -9 pppd" seems to help and then the re-connect (after the
phone got power-cycled) works.

Gabor

-- 
 -
 MTA SZTAKI Computer and Automation Research Institute
Hungarian Academy of Sciences
 -
--
To unsubscribe from this list: send the line "unsubscribe linux-kernel" in
the body of a message to [EMAIL PROTECTED]
More majordomo info at  http://vger.kernel.org/majordomo-info.html
Please read the FAQ at  http://www.tux.org/lkml/


Re: [Bluez-devel] Oops involving RFCOMM and sysfs

2008-01-16 Thread Dave Young
On Jan 17, 2008 7:06 AM, Gabor Gombas [EMAIL PROTECTED] wrote:
 Hi,

 On Wed, Jan 16, 2008 at 09:02:05AM +0800, Dave Young wrote:

  The rfcomm tty device will possibly retain even when conn is down,
  and sysfs doesn't support zombie device moving, so this patch
  move the tty device before conn device is destroyed.
 
  Signed-off-by: Dave Young [EMAIL PROTECTED]

 This seems to work, both the oops and the hang are gone. I get these
 messages in syslog when the Bluetooth link hangs and I want to kill pppd
 with poff:

 Jan 16 23:55:59 twister kernel: unregister_netdevice: waiting for ppp0 to 
 become free. Usage count = 1
 Jan 16 23:56:09 twister kernel: unregister_netdevice: waiting for ppp0 to 
 become free. Usage count = 1

 But a killall -9 pppd seems to help and then the re-connect (after the
 phone got power-cycled) works.

Weird, I guess device_move(dev, NULL) two times cause the problem.

Anyway, device_move should check the old_parent and new_parent , if
they equal to each other then just return.

Am I right?



 Gabor

 --
  -
  MTA SZTAKI Computer and Automation Research Institute
 Hungarian Academy of Sciences
  -

--
To unsubscribe from this list: send the line unsubscribe linux-kernel in
the body of a message to [EMAIL PROTECTED]
More majordomo info at  http://vger.kernel.org/majordomo-info.html
Please read the FAQ at  http://www.tux.org/lkml/


Re: [Bluez-devel] Oops involving RFCOMM and sysfs

2008-01-15 Thread Dave Young
On Tue, Jan 15, 2008 at 09:57:41AM +0800, Dave Young wrote:
> On Mon, Jan 14, 2008 at 01:52:28PM +0100, Cornelia Huck wrote:
> > On Mon, 14 Jan 2008 15:05:19 +0800,
> > "Dave Young" <[EMAIL PROTECTED]> wrote:
> > 
> > > On Jan 12, 2008 7:09 AM, Gabor Gombas <[EMAIL PROTECTED]> wrote:
> > > > On Thu, Jan 10, 2008 at 09:11:17AM +0800, Dave Young wrote:
> > > >
> > > > > For bluetooth device_move, the only child device of hci_conn dev is
> > > > > the rfcomm tty_dev. How about the following patch, please verify :
> > > >
> > > > There is now no oops, instead the keyboard becomes almost completely
> > > > unresponsible when I switch off & on the phone. The mouse still works
> > > > (tested both with X and with the VGA console), but terminal input and VT
> > > > switching is dead. On the VGA console, even scrolling using Shift+PgUp
> > > > stops working.
> > > >
> > > > Alt+SysRq+w works, trace is below. Ctrl+Alt+Del also works, but then
> > > > init complains about hung processes and that it can not umount the file
> > > > systems and cannot stop the RAID arrays. Once it still rebooted though,
> > > > the second time it got hung after trying to umount the filesystems, and
> > > > I had to use Alt+SysRq+b.
> > > >
> > > > If I can choose then I prefer the Oops...
> > > >
> > > > Jan 11 23:46:35 twister kernel: SysRq : Emergency Sync
> > > > Jan 11 23:46:35 twister kernel: Emergency Sync complete
> > > > Jan 11 23:46:42 twister kernel: SysRq : Show Blocked State
> > > > Jan 11 23:46:42 twister kernel:   taskPC stack  
> > > >  pid father
> > > > Jan 11 23:46:42 twister kernel: events/0  D 80487190 0  
> > > >5  2
> > > > Jan 11 23:46:42 twister kernel:  8100bf84fd40 0046 
> > > > 8100ae7f2600 8100bf84fd00
> > > > Jan 11 23:46:42 twister kernel:  8100bf84a830 8100bd872000 
> > > > 80652ae0 00010100
> > > > Jan 11 23:46:42 twister kernel:   7fff 
> > > > 7fff 0002
> > > > Jan 11 23:46:42 twister kernel: Call Trace:
> > > > Jan 11 23:46:42 twister kernel:  [] 
> > > > schedule_timeout+0x1e/0xad
> > > > Jan 11 23:46:42 twister kernel:  [] dput+0x26/0x103
> > > > Jan 11 23:46:42 twister kernel:  [] 
> > > > sysfs_move_dir+0x1ee/0x1fd
> > > > Jan 11 23:46:42 twister kernel:  [] 
> > > > wait_for_common+0xc4/0x129
> > > > Jan 11 23:46:42 twister kernel:  [] 
> > > > default_wake_function+0x0/0xe
> > > > Jan 11 23:46:42 twister kernel:  [] 
> > > > klist_del+0x15/0x2e
> > > > Jan 11 23:46:42 twister kernel:  [] 
> > > > device_move+0x80/0x111
> > > > Jan 11 23:46:42 twister kernel:  [] 
> > > > :bluetooth:hci_conn_move_child+0x0/0xf
> > > > Jan 11 23:46:42 twister kernel:  [] 
> > > > :bluetooth:hci_conn_move_child+0xb/0xf
> > > > Jan 11 23:46:42 twister kernel:  [] 
> > > > device_for_each_child+0x22/0x4d
> > 
> > The problem here is that device_for_each_child() will elevate the
> > klist_node's refcount while the callback function calls device_move().
> > device_move() calls klist_remove() on the same node, which will do a
> > klist_del() and then wait for the refcount to drop to 0...
> 
> My wrong, it is the problem, thanks.
> 
> > 
> > > > Jan 11 23:46:42 twister kernel:  [] 
> > > > :bluetooth:del_conn+0x0/0x22
> > > > Jan 11 23:46:42 twister kernel:  [] 
> > > > :bluetooth:del_conn+0x19/0x22
> > > > Jan 11 23:46:42 twister kernel:  [] 
> > > > run_workqueue+0x74/0xee
> > > > Jan 11 23:46:42 twister kernel:  [] 
> > > > worker_thread+0x0/0xe7
> > > > Jan 11 23:46:42 twister kernel:  [] 
> > > > worker_thread+0xda/0xe7
> > > > Jan 11 23:46:42 twister kernel:  [] 
> > > > autoremove_wake_function+0x0/0x2e
> > > > Jan 11 23:46:42 twister kernel:  [] 
> > > > worker_thread+0x0/0xe7
> > > > Jan 11 23:46:42 twister kernel:  [] kthread+0x47/0x73
> > > > Jan 11 23:46:42 twister kernel:  [] child_rip+0xa/0x12
> > > > Jan 11 23:46:42 twister kernel:  [] kthread+0x0/0x73
> > > > Jan 11 23:46:42 twister kernel:  [] child_rip+0x0/0x12
> > > > Jan 11 23:46:42 twister kernel:
> > > > Jan 11 23:46:42 twister kernel: cat   D 7fff 0  
> > > > 3978   3965
> > > > Jan 11 23:46:42 twister kernel:  81009e857ce8 0086 
> > > > 8100ae7f2900 81009e857ca8
> > > > Jan 11 23:46:42 twister kernel:  8100bd872000 8100bf9e7060 
> > > > 80652ae0 000100c0
> > > > Jan 11 23:46:42 twister kernel:   7fff 
> > > > 7fff 0002
> > > > Jan 11 23:46:42 twister kernel: Call Trace:
> > > > Jan 11 23:46:42 twister kernel:  [] 
> > > > schedule_timeout+0x1e/0xad
> > > > Jan 11 23:46:42 twister kernel:  [] 
> > > > __dequeue_entity+0x1c/0x32
> > > > Jan 11 23:46:42 twister kernel:  [] 
> > > > set_next_entity+0x23/0x73
> > > > Jan 11 23:46:42 twister kernel:  [] 
> > > > wait_for_common+0xc4/0x129
> > > > Jan 11 23:46:42 twister kernel:  [] 
> > > > default_wake_function+0x0/0xe
> > > > Jan 11 

Re: [Bluez-devel] Oops involving RFCOMM and sysfs

2008-01-15 Thread Dave Young
On Tue, Jan 15, 2008 at 09:57:41AM +0800, Dave Young wrote:
 On Mon, Jan 14, 2008 at 01:52:28PM +0100, Cornelia Huck wrote:
  On Mon, 14 Jan 2008 15:05:19 +0800,
  Dave Young [EMAIL PROTECTED] wrote:
  
   On Jan 12, 2008 7:09 AM, Gabor Gombas [EMAIL PROTECTED] wrote:
On Thu, Jan 10, 2008 at 09:11:17AM +0800, Dave Young wrote:
   
 For bluetooth device_move, the only child device of hci_conn dev is
 the rfcomm tty_dev. How about the following patch, please verify :
   
There is now no oops, instead the keyboard becomes almost completely
unresponsible when I switch off  on the phone. The mouse still works
(tested both with X and with the VGA console), but terminal input and VT
switching is dead. On the VGA console, even scrolling using Shift+PgUp
stops working.
   
Alt+SysRq+w works, trace is below. Ctrl+Alt+Del also works, but then
init complains about hung processes and that it can not umount the file
systems and cannot stop the RAID arrays. Once it still rebooted though,
the second time it got hung after trying to umount the filesystems, and
I had to use Alt+SysRq+b.
   
If I can choose then I prefer the Oops...
   
Jan 11 23:46:35 twister kernel: SysRq : Emergency Sync
Jan 11 23:46:35 twister kernel: Emergency Sync complete
Jan 11 23:46:42 twister kernel: SysRq : Show Blocked State
Jan 11 23:46:42 twister kernel:   taskPC stack  
 pid father
Jan 11 23:46:42 twister kernel: events/0  D 80487190 0  
   5  2
Jan 11 23:46:42 twister kernel:  8100bf84fd40 0046 
8100ae7f2600 8100bf84fd00
Jan 11 23:46:42 twister kernel:  8100bf84a830 8100bd872000 
80652ae0 00010100
Jan 11 23:46:42 twister kernel:   7fff 
7fff 0002
Jan 11 23:46:42 twister kernel: Call Trace:
Jan 11 23:46:42 twister kernel:  [80473543] 
schedule_timeout+0x1e/0xad
Jan 11 23:46:42 twister kernel:  [8028560d] dput+0x26/0x103
Jan 11 23:46:42 twister kernel:  [802af3fd] 
sysfs_move_dir+0x1ee/0x1fd
Jan 11 23:46:42 twister kernel:  [8047340b] 
wait_for_common+0xc4/0x129
Jan 11 23:46:42 twister kernel:  [80224204] 
default_wake_function+0x0/0xe
Jan 11 23:46:42 twister kernel:  [80471d72] 
klist_del+0x15/0x2e
Jan 11 23:46:42 twister kernel:  [803698f3] 
device_move+0x80/0x111
Jan 11 23:46:42 twister kernel:  [88146a27] 
:bluetooth:hci_conn_move_child+0x0/0xf
Jan 11 23:46:42 twister kernel:  [88146a32] 
:bluetooth:hci_conn_move_child+0xb/0xf
Jan 11 23:46:42 twister kernel:  [80369836] 
device_for_each_child+0x22/0x4d
  
  The problem here is that device_for_each_child() will elevate the
  klist_node's refcount while the callback function calls device_move().
  device_move() calls klist_remove() on the same node, which will do a
  klist_del() and then wait for the refcount to drop to 0...
 
 My wrong, it is the problem, thanks.
 
  
Jan 11 23:46:42 twister kernel:  [88146a05] 
:bluetooth:del_conn+0x0/0x22
Jan 11 23:46:42 twister kernel:  [88146a1e] 
:bluetooth:del_conn+0x19/0x22
Jan 11 23:46:42 twister kernel:  [8023696d] 
run_workqueue+0x74/0xee
Jan 11 23:46:42 twister kernel:  [80236ff8] 
worker_thread+0x0/0xe7
Jan 11 23:46:42 twister kernel:  [802370d2] 
worker_thread+0xda/0xe7
Jan 11 23:46:42 twister kernel:  [80239b77] 
autoremove_wake_function+0x0/0x2e
Jan 11 23:46:42 twister kernel:  [80236ff8] 
worker_thread+0x0/0xe7
Jan 11 23:46:42 twister kernel:  [802399a0] kthread+0x47/0x73
Jan 11 23:46:42 twister kernel:  [8020bdc8] child_rip+0xa/0x12
Jan 11 23:46:42 twister kernel:  [80239959] kthread+0x0/0x73
Jan 11 23:46:42 twister kernel:  [8020bdbe] child_rip+0x0/0x12
Jan 11 23:46:42 twister kernel:
Jan 11 23:46:42 twister kernel: cat   D 7fff 0  
3978   3965
Jan 11 23:46:42 twister kernel:  81009e857ce8 0086 
8100ae7f2900 81009e857ca8
Jan 11 23:46:42 twister kernel:  8100bd872000 8100bf9e7060 
80652ae0 000100c0
Jan 11 23:46:42 twister kernel:   7fff 
7fff 0002
Jan 11 23:46:42 twister kernel: Call Trace:
Jan 11 23:46:42 twister kernel:  [80473543] 
schedule_timeout+0x1e/0xad
Jan 11 23:46:42 twister kernel:  [80223ccc] 
__dequeue_entity+0x1c/0x32
Jan 11 23:46:42 twister kernel:  [80223d05] 
set_next_entity+0x23/0x73
Jan 11 23:46:42 twister kernel:  [8047340b] 
wait_for_common+0xc4/0x129
Jan 11 23:46:42 twister kernel:  [80224204] 

Re: [Bluez-devel] Oops involving RFCOMM and sysfs

2008-01-14 Thread Dave Young
On Mon, Jan 14, 2008 at 01:52:28PM +0100, Cornelia Huck wrote:
> On Mon, 14 Jan 2008 15:05:19 +0800,
> "Dave Young" <[EMAIL PROTECTED]> wrote:
> 
> > On Jan 12, 2008 7:09 AM, Gabor Gombas <[EMAIL PROTECTED]> wrote:
> > > On Thu, Jan 10, 2008 at 09:11:17AM +0800, Dave Young wrote:
> > >
> > > > For bluetooth device_move, the only child device of hci_conn dev is
> > > > the rfcomm tty_dev. How about the following patch, please verify :
> > >
> > > There is now no oops, instead the keyboard becomes almost completely
> > > unresponsible when I switch off & on the phone. The mouse still works
> > > (tested both with X and with the VGA console), but terminal input and VT
> > > switching is dead. On the VGA console, even scrolling using Shift+PgUp
> > > stops working.
> > >
> > > Alt+SysRq+w works, trace is below. Ctrl+Alt+Del also works, but then
> > > init complains about hung processes and that it can not umount the file
> > > systems and cannot stop the RAID arrays. Once it still rebooted though,
> > > the second time it got hung after trying to umount the filesystems, and
> > > I had to use Alt+SysRq+b.
> > >
> > > If I can choose then I prefer the Oops...
> > >
> > > Jan 11 23:46:35 twister kernel: SysRq : Emergency Sync
> > > Jan 11 23:46:35 twister kernel: Emergency Sync complete
> > > Jan 11 23:46:42 twister kernel: SysRq : Show Blocked State
> > > Jan 11 23:46:42 twister kernel:   taskPC stack   
> > > pid father
> > > Jan 11 23:46:42 twister kernel: events/0  D 80487190 0
> > >  5  2
> > > Jan 11 23:46:42 twister kernel:  8100bf84fd40 0046 
> > > 8100ae7f2600 8100bf84fd00
> > > Jan 11 23:46:42 twister kernel:  8100bf84a830 8100bd872000 
> > > 80652ae0 00010100
> > > Jan 11 23:46:42 twister kernel:   7fff 
> > > 7fff 0002
> > > Jan 11 23:46:42 twister kernel: Call Trace:
> > > Jan 11 23:46:42 twister kernel:  [] 
> > > schedule_timeout+0x1e/0xad
> > > Jan 11 23:46:42 twister kernel:  [] dput+0x26/0x103
> > > Jan 11 23:46:42 twister kernel:  [] 
> > > sysfs_move_dir+0x1ee/0x1fd
> > > Jan 11 23:46:42 twister kernel:  [] 
> > > wait_for_common+0xc4/0x129
> > > Jan 11 23:46:42 twister kernel:  [] 
> > > default_wake_function+0x0/0xe
> > > Jan 11 23:46:42 twister kernel:  [] klist_del+0x15/0x2e
> > > Jan 11 23:46:42 twister kernel:  [] 
> > > device_move+0x80/0x111
> > > Jan 11 23:46:42 twister kernel:  [] 
> > > :bluetooth:hci_conn_move_child+0x0/0xf
> > > Jan 11 23:46:42 twister kernel:  [] 
> > > :bluetooth:hci_conn_move_child+0xb/0xf
> > > Jan 11 23:46:42 twister kernel:  [] 
> > > device_for_each_child+0x22/0x4d
> 
> The problem here is that device_for_each_child() will elevate the
> klist_node's refcount while the callback function calls device_move().
> device_move() calls klist_remove() on the same node, which will do a
> klist_del() and then wait for the refcount to drop to 0...

My wrong, it is the problem, thanks.

> 
> > > Jan 11 23:46:42 twister kernel:  [] 
> > > :bluetooth:del_conn+0x0/0x22
> > > Jan 11 23:46:42 twister kernel:  [] 
> > > :bluetooth:del_conn+0x19/0x22
> > > Jan 11 23:46:42 twister kernel:  [] 
> > > run_workqueue+0x74/0xee
> > > Jan 11 23:46:42 twister kernel:  [] 
> > > worker_thread+0x0/0xe7
> > > Jan 11 23:46:42 twister kernel:  [] 
> > > worker_thread+0xda/0xe7
> > > Jan 11 23:46:42 twister kernel:  [] 
> > > autoremove_wake_function+0x0/0x2e
> > > Jan 11 23:46:42 twister kernel:  [] 
> > > worker_thread+0x0/0xe7
> > > Jan 11 23:46:42 twister kernel:  [] kthread+0x47/0x73
> > > Jan 11 23:46:42 twister kernel:  [] child_rip+0xa/0x12
> > > Jan 11 23:46:42 twister kernel:  [] kthread+0x0/0x73
> > > Jan 11 23:46:42 twister kernel:  [] child_rip+0x0/0x12
> > > Jan 11 23:46:42 twister kernel:
> > > Jan 11 23:46:42 twister kernel: cat   D 7fff 0  
> > > 3978   3965
> > > Jan 11 23:46:42 twister kernel:  81009e857ce8 0086 
> > > 8100ae7f2900 81009e857ca8
> > > Jan 11 23:46:42 twister kernel:  8100bd872000 8100bf9e7060 
> > > 80652ae0 000100c0
> > > Jan 11 23:46:42 twister kernel:   7fff 
> > > 7fff 0002
> > > Jan 11 23:46:42 twister kernel: Call Trace:
> > > Jan 11 23:46:42 twister kernel:  [] 
> > > schedule_timeout+0x1e/0xad
> > > Jan 11 23:46:42 twister kernel:  [] 
> > > __dequeue_entity+0x1c/0x32
> > > Jan 11 23:46:42 twister kernel:  [] 
> > > set_next_entity+0x23/0x73
> > > Jan 11 23:46:42 twister kernel:  [] 
> > > wait_for_common+0xc4/0x129
> > > Jan 11 23:46:42 twister kernel:  [] 
> > > default_wake_function+0x0/0xe
> > > Jan 11 23:46:42 twister kernel:  [] 
> > > flush_cpu_workqueue+0x50/0x58
> > > Jan 11 23:46:42 twister kernel:  [] 
> > > wq_barrier_func+0x0/0x9
> > > Jan 11 23:46:42 twister kernel:  [] 
> > > flush_workqueue+0x9/0x12
> > > Jan 11 23:46:42 twister kernel:  [] 
> > > 

Re: [Bluez-devel] Oops involving RFCOMM and sysfs

2008-01-14 Thread Cornelia Huck
On Mon, 14 Jan 2008 15:05:19 +0800,
"Dave Young" <[EMAIL PROTECTED]> wrote:

> On Jan 12, 2008 7:09 AM, Gabor Gombas <[EMAIL PROTECTED]> wrote:
> > On Thu, Jan 10, 2008 at 09:11:17AM +0800, Dave Young wrote:
> >
> > > For bluetooth device_move, the only child device of hci_conn dev is
> > > the rfcomm tty_dev. How about the following patch, please verify :
> >
> > There is now no oops, instead the keyboard becomes almost completely
> > unresponsible when I switch off & on the phone. The mouse still works
> > (tested both with X and with the VGA console), but terminal input and VT
> > switching is dead. On the VGA console, even scrolling using Shift+PgUp
> > stops working.
> >
> > Alt+SysRq+w works, trace is below. Ctrl+Alt+Del also works, but then
> > init complains about hung processes and that it can not umount the file
> > systems and cannot stop the RAID arrays. Once it still rebooted though,
> > the second time it got hung after trying to umount the filesystems, and
> > I had to use Alt+SysRq+b.
> >
> > If I can choose then I prefer the Oops...
> >
> > Jan 11 23:46:35 twister kernel: SysRq : Emergency Sync
> > Jan 11 23:46:35 twister kernel: Emergency Sync complete
> > Jan 11 23:46:42 twister kernel: SysRq : Show Blocked State
> > Jan 11 23:46:42 twister kernel:   taskPC stack   
> > pid father
> > Jan 11 23:46:42 twister kernel: events/0  D 80487190 0 
> > 5  2
> > Jan 11 23:46:42 twister kernel:  8100bf84fd40 0046 
> > 8100ae7f2600 8100bf84fd00
> > Jan 11 23:46:42 twister kernel:  8100bf84a830 8100bd872000 
> > 80652ae0 00010100
> > Jan 11 23:46:42 twister kernel:   7fff 
> > 7fff 0002
> > Jan 11 23:46:42 twister kernel: Call Trace:
> > Jan 11 23:46:42 twister kernel:  [] 
> > schedule_timeout+0x1e/0xad
> > Jan 11 23:46:42 twister kernel:  [] dput+0x26/0x103
> > Jan 11 23:46:42 twister kernel:  [] 
> > sysfs_move_dir+0x1ee/0x1fd
> > Jan 11 23:46:42 twister kernel:  [] 
> > wait_for_common+0xc4/0x129
> > Jan 11 23:46:42 twister kernel:  [] 
> > default_wake_function+0x0/0xe
> > Jan 11 23:46:42 twister kernel:  [] klist_del+0x15/0x2e
> > Jan 11 23:46:42 twister kernel:  [] device_move+0x80/0x111
> > Jan 11 23:46:42 twister kernel:  [] 
> > :bluetooth:hci_conn_move_child+0x0/0xf
> > Jan 11 23:46:42 twister kernel:  [] 
> > :bluetooth:hci_conn_move_child+0xb/0xf
> > Jan 11 23:46:42 twister kernel:  [] 
> > device_for_each_child+0x22/0x4d

The problem here is that device_for_each_child() will elevate the
klist_node's refcount while the callback function calls device_move().
device_move() calls klist_remove() on the same node, which will do a
klist_del() and then wait for the refcount to drop to 0...

> > Jan 11 23:46:42 twister kernel:  [] 
> > :bluetooth:del_conn+0x0/0x22
> > Jan 11 23:46:42 twister kernel:  [] 
> > :bluetooth:del_conn+0x19/0x22
> > Jan 11 23:46:42 twister kernel:  [] 
> > run_workqueue+0x74/0xee
> > Jan 11 23:46:42 twister kernel:  [] worker_thread+0x0/0xe7
> > Jan 11 23:46:42 twister kernel:  [] 
> > worker_thread+0xda/0xe7
> > Jan 11 23:46:42 twister kernel:  [] 
> > autoremove_wake_function+0x0/0x2e
> > Jan 11 23:46:42 twister kernel:  [] worker_thread+0x0/0xe7
> > Jan 11 23:46:42 twister kernel:  [] kthread+0x47/0x73
> > Jan 11 23:46:42 twister kernel:  [] child_rip+0xa/0x12
> > Jan 11 23:46:42 twister kernel:  [] kthread+0x0/0x73
> > Jan 11 23:46:42 twister kernel:  [] child_rip+0x0/0x12
> > Jan 11 23:46:42 twister kernel:
> > Jan 11 23:46:42 twister kernel: cat   D 7fff 0  
> > 3978   3965
> > Jan 11 23:46:42 twister kernel:  81009e857ce8 0086 
> > 8100ae7f2900 81009e857ca8
> > Jan 11 23:46:42 twister kernel:  8100bd872000 8100bf9e7060 
> > 80652ae0 000100c0
> > Jan 11 23:46:42 twister kernel:   7fff 
> > 7fff 0002
> > Jan 11 23:46:42 twister kernel: Call Trace:
> > Jan 11 23:46:42 twister kernel:  [] 
> > schedule_timeout+0x1e/0xad
> > Jan 11 23:46:42 twister kernel:  [] 
> > __dequeue_entity+0x1c/0x32
> > Jan 11 23:46:42 twister kernel:  [] 
> > set_next_entity+0x23/0x73
> > Jan 11 23:46:42 twister kernel:  [] 
> > wait_for_common+0xc4/0x129
> > Jan 11 23:46:42 twister kernel:  [] 
> > default_wake_function+0x0/0xe
> > Jan 11 23:46:42 twister kernel:  [] 
> > flush_cpu_workqueue+0x50/0x58
> > Jan 11 23:46:42 twister kernel:  [] 
> > wq_barrier_func+0x0/0x9
> > Jan 11 23:46:42 twister kernel:  [] 
> > flush_workqueue+0x9/0x12
> > Jan 11 23:46:42 twister kernel:  [] 
> > release_dev+0x47c/0x5e2
> > Jan 11 23:46:42 twister kernel:  [] 
> > do_page_fault+0x2ff/0x65a
> > Jan 11 23:46:42 twister kernel:  [] tty_release+0xc/0x10
> > Jan 11 23:46:42 twister kernel:  [] __fput+0xb1/0x16f
> > Jan 11 23:46:42 twister kernel:  [] filp_close+0x5d/0x65
> > Jan 11 23:46:42 twister kernel:  [] sys_close+0x73/0xa6
> > Jan 11 

Re: [Bluez-devel] Oops involving RFCOMM and sysfs

2008-01-14 Thread Cornelia Huck
On Mon, 14 Jan 2008 15:05:19 +0800,
Dave Young [EMAIL PROTECTED] wrote:

 On Jan 12, 2008 7:09 AM, Gabor Gombas [EMAIL PROTECTED] wrote:
  On Thu, Jan 10, 2008 at 09:11:17AM +0800, Dave Young wrote:
 
   For bluetooth device_move, the only child device of hci_conn dev is
   the rfcomm tty_dev. How about the following patch, please verify :
 
  There is now no oops, instead the keyboard becomes almost completely
  unresponsible when I switch off  on the phone. The mouse still works
  (tested both with X and with the VGA console), but terminal input and VT
  switching is dead. On the VGA console, even scrolling using Shift+PgUp
  stops working.
 
  Alt+SysRq+w works, trace is below. Ctrl+Alt+Del also works, but then
  init complains about hung processes and that it can not umount the file
  systems and cannot stop the RAID arrays. Once it still rebooted though,
  the second time it got hung after trying to umount the filesystems, and
  I had to use Alt+SysRq+b.
 
  If I can choose then I prefer the Oops...
 
  Jan 11 23:46:35 twister kernel: SysRq : Emergency Sync
  Jan 11 23:46:35 twister kernel: Emergency Sync complete
  Jan 11 23:46:42 twister kernel: SysRq : Show Blocked State
  Jan 11 23:46:42 twister kernel:   taskPC stack   
  pid father
  Jan 11 23:46:42 twister kernel: events/0  D 80487190 0 
  5  2
  Jan 11 23:46:42 twister kernel:  8100bf84fd40 0046 
  8100ae7f2600 8100bf84fd00
  Jan 11 23:46:42 twister kernel:  8100bf84a830 8100bd872000 
  80652ae0 00010100
  Jan 11 23:46:42 twister kernel:   7fff 
  7fff 0002
  Jan 11 23:46:42 twister kernel: Call Trace:
  Jan 11 23:46:42 twister kernel:  [80473543] 
  schedule_timeout+0x1e/0xad
  Jan 11 23:46:42 twister kernel:  [8028560d] dput+0x26/0x103
  Jan 11 23:46:42 twister kernel:  [802af3fd] 
  sysfs_move_dir+0x1ee/0x1fd
  Jan 11 23:46:42 twister kernel:  [8047340b] 
  wait_for_common+0xc4/0x129
  Jan 11 23:46:42 twister kernel:  [80224204] 
  default_wake_function+0x0/0xe
  Jan 11 23:46:42 twister kernel:  [80471d72] klist_del+0x15/0x2e
  Jan 11 23:46:42 twister kernel:  [803698f3] device_move+0x80/0x111
  Jan 11 23:46:42 twister kernel:  [88146a27] 
  :bluetooth:hci_conn_move_child+0x0/0xf
  Jan 11 23:46:42 twister kernel:  [88146a32] 
  :bluetooth:hci_conn_move_child+0xb/0xf
  Jan 11 23:46:42 twister kernel:  [80369836] 
  device_for_each_child+0x22/0x4d

The problem here is that device_for_each_child() will elevate the
klist_node's refcount while the callback function calls device_move().
device_move() calls klist_remove() on the same node, which will do a
klist_del() and then wait for the refcount to drop to 0...

  Jan 11 23:46:42 twister kernel:  [88146a05] 
  :bluetooth:del_conn+0x0/0x22
  Jan 11 23:46:42 twister kernel:  [88146a1e] 
  :bluetooth:del_conn+0x19/0x22
  Jan 11 23:46:42 twister kernel:  [8023696d] 
  run_workqueue+0x74/0xee
  Jan 11 23:46:42 twister kernel:  [80236ff8] worker_thread+0x0/0xe7
  Jan 11 23:46:42 twister kernel:  [802370d2] 
  worker_thread+0xda/0xe7
  Jan 11 23:46:42 twister kernel:  [80239b77] 
  autoremove_wake_function+0x0/0x2e
  Jan 11 23:46:42 twister kernel:  [80236ff8] worker_thread+0x0/0xe7
  Jan 11 23:46:42 twister kernel:  [802399a0] kthread+0x47/0x73
  Jan 11 23:46:42 twister kernel:  [8020bdc8] child_rip+0xa/0x12
  Jan 11 23:46:42 twister kernel:  [80239959] kthread+0x0/0x73
  Jan 11 23:46:42 twister kernel:  [8020bdbe] child_rip+0x0/0x12
  Jan 11 23:46:42 twister kernel:
  Jan 11 23:46:42 twister kernel: cat   D 7fff 0  
  3978   3965
  Jan 11 23:46:42 twister kernel:  81009e857ce8 0086 
  8100ae7f2900 81009e857ca8
  Jan 11 23:46:42 twister kernel:  8100bd872000 8100bf9e7060 
  80652ae0 000100c0
  Jan 11 23:46:42 twister kernel:   7fff 
  7fff 0002
  Jan 11 23:46:42 twister kernel: Call Trace:
  Jan 11 23:46:42 twister kernel:  [80473543] 
  schedule_timeout+0x1e/0xad
  Jan 11 23:46:42 twister kernel:  [80223ccc] 
  __dequeue_entity+0x1c/0x32
  Jan 11 23:46:42 twister kernel:  [80223d05] 
  set_next_entity+0x23/0x73
  Jan 11 23:46:42 twister kernel:  [8047340b] 
  wait_for_common+0xc4/0x129
  Jan 11 23:46:42 twister kernel:  [80224204] 
  default_wake_function+0x0/0xe
  Jan 11 23:46:42 twister kernel:  [80236b02] 
  flush_cpu_workqueue+0x50/0x58
  Jan 11 23:46:42 twister kernel:  [80236c32] 
  wq_barrier_func+0x0/0x9
  Jan 11 23:46:42 twister kernel:  [80236ecf] 
  flush_workqueue+0x9/0x12
  Jan 11 23:46:42 twister kernel:  [803449f9] 
  release_dev+0x47c/0x5e2
  Jan 11 23:46:42 twister kernel:  

Re: [Bluez-devel] Oops involving RFCOMM and sysfs

2008-01-14 Thread Dave Young
On Mon, Jan 14, 2008 at 01:52:28PM +0100, Cornelia Huck wrote:
 On Mon, 14 Jan 2008 15:05:19 +0800,
 Dave Young [EMAIL PROTECTED] wrote:
 
  On Jan 12, 2008 7:09 AM, Gabor Gombas [EMAIL PROTECTED] wrote:
   On Thu, Jan 10, 2008 at 09:11:17AM +0800, Dave Young wrote:
  
For bluetooth device_move, the only child device of hci_conn dev is
the rfcomm tty_dev. How about the following patch, please verify :
  
   There is now no oops, instead the keyboard becomes almost completely
   unresponsible when I switch off  on the phone. The mouse still works
   (tested both with X and with the VGA console), but terminal input and VT
   switching is dead. On the VGA console, even scrolling using Shift+PgUp
   stops working.
  
   Alt+SysRq+w works, trace is below. Ctrl+Alt+Del also works, but then
   init complains about hung processes and that it can not umount the file
   systems and cannot stop the RAID arrays. Once it still rebooted though,
   the second time it got hung after trying to umount the filesystems, and
   I had to use Alt+SysRq+b.
  
   If I can choose then I prefer the Oops...
  
   Jan 11 23:46:35 twister kernel: SysRq : Emergency Sync
   Jan 11 23:46:35 twister kernel: Emergency Sync complete
   Jan 11 23:46:42 twister kernel: SysRq : Show Blocked State
   Jan 11 23:46:42 twister kernel:   taskPC stack   
   pid father
   Jan 11 23:46:42 twister kernel: events/0  D 80487190 0
5  2
   Jan 11 23:46:42 twister kernel:  8100bf84fd40 0046 
   8100ae7f2600 8100bf84fd00
   Jan 11 23:46:42 twister kernel:  8100bf84a830 8100bd872000 
   80652ae0 00010100
   Jan 11 23:46:42 twister kernel:   7fff 
   7fff 0002
   Jan 11 23:46:42 twister kernel: Call Trace:
   Jan 11 23:46:42 twister kernel:  [80473543] 
   schedule_timeout+0x1e/0xad
   Jan 11 23:46:42 twister kernel:  [8028560d] dput+0x26/0x103
   Jan 11 23:46:42 twister kernel:  [802af3fd] 
   sysfs_move_dir+0x1ee/0x1fd
   Jan 11 23:46:42 twister kernel:  [8047340b] 
   wait_for_common+0xc4/0x129
   Jan 11 23:46:42 twister kernel:  [80224204] 
   default_wake_function+0x0/0xe
   Jan 11 23:46:42 twister kernel:  [80471d72] klist_del+0x15/0x2e
   Jan 11 23:46:42 twister kernel:  [803698f3] 
   device_move+0x80/0x111
   Jan 11 23:46:42 twister kernel:  [88146a27] 
   :bluetooth:hci_conn_move_child+0x0/0xf
   Jan 11 23:46:42 twister kernel:  [88146a32] 
   :bluetooth:hci_conn_move_child+0xb/0xf
   Jan 11 23:46:42 twister kernel:  [80369836] 
   device_for_each_child+0x22/0x4d
 
 The problem here is that device_for_each_child() will elevate the
 klist_node's refcount while the callback function calls device_move().
 device_move() calls klist_remove() on the same node, which will do a
 klist_del() and then wait for the refcount to drop to 0...

My wrong, it is the problem, thanks.

 
   Jan 11 23:46:42 twister kernel:  [88146a05] 
   :bluetooth:del_conn+0x0/0x22
   Jan 11 23:46:42 twister kernel:  [88146a1e] 
   :bluetooth:del_conn+0x19/0x22
   Jan 11 23:46:42 twister kernel:  [8023696d] 
   run_workqueue+0x74/0xee
   Jan 11 23:46:42 twister kernel:  [80236ff8] 
   worker_thread+0x0/0xe7
   Jan 11 23:46:42 twister kernel:  [802370d2] 
   worker_thread+0xda/0xe7
   Jan 11 23:46:42 twister kernel:  [80239b77] 
   autoremove_wake_function+0x0/0x2e
   Jan 11 23:46:42 twister kernel:  [80236ff8] 
   worker_thread+0x0/0xe7
   Jan 11 23:46:42 twister kernel:  [802399a0] kthread+0x47/0x73
   Jan 11 23:46:42 twister kernel:  [8020bdc8] child_rip+0xa/0x12
   Jan 11 23:46:42 twister kernel:  [80239959] kthread+0x0/0x73
   Jan 11 23:46:42 twister kernel:  [8020bdbe] child_rip+0x0/0x12
   Jan 11 23:46:42 twister kernel:
   Jan 11 23:46:42 twister kernel: cat   D 7fff 0  
   3978   3965
   Jan 11 23:46:42 twister kernel:  81009e857ce8 0086 
   8100ae7f2900 81009e857ca8
   Jan 11 23:46:42 twister kernel:  8100bd872000 8100bf9e7060 
   80652ae0 000100c0
   Jan 11 23:46:42 twister kernel:   7fff 
   7fff 0002
   Jan 11 23:46:42 twister kernel: Call Trace:
   Jan 11 23:46:42 twister kernel:  [80473543] 
   schedule_timeout+0x1e/0xad
   Jan 11 23:46:42 twister kernel:  [80223ccc] 
   __dequeue_entity+0x1c/0x32
   Jan 11 23:46:42 twister kernel:  [80223d05] 
   set_next_entity+0x23/0x73
   Jan 11 23:46:42 twister kernel:  [8047340b] 
   wait_for_common+0xc4/0x129
   Jan 11 23:46:42 twister kernel:  [80224204] 
   default_wake_function+0x0/0xe
   Jan 11 23:46:42 twister kernel:  [80236b02] 
   flush_cpu_workqueue+0x50/0x58
   Jan 11 23:46:42 twister kernel:  [80236c32] 
   

Re: [Bluez-devel] Oops involving RFCOMM and sysfs

2008-01-13 Thread Dave Young
On Jan 12, 2008 7:09 AM, Gabor Gombas <[EMAIL PROTECTED]> wrote:
> On Thu, Jan 10, 2008 at 09:11:17AM +0800, Dave Young wrote:
>
> > For bluetooth device_move, the only child device of hci_conn dev is
> > the rfcomm tty_dev. How about the following patch, please verify :
>
> There is now no oops, instead the keyboard becomes almost completely
> unresponsible when I switch off & on the phone. The mouse still works
> (tested both with X and with the VGA console), but terminal input and VT
> switching is dead. On the VGA console, even scrolling using Shift+PgUp
> stops working.
>
> Alt+SysRq+w works, trace is below. Ctrl+Alt+Del also works, but then
> init complains about hung processes and that it can not umount the file
> systems and cannot stop the RAID arrays. Once it still rebooted though,
> the second time it got hung after trying to umount the filesystems, and
> I had to use Alt+SysRq+b.
>
> If I can choose then I prefer the Oops...
>
> Jan 11 23:46:35 twister kernel: SysRq : Emergency Sync
> Jan 11 23:46:35 twister kernel: Emergency Sync complete
> Jan 11 23:46:42 twister kernel: SysRq : Show Blocked State
> Jan 11 23:46:42 twister kernel:   taskPC stack   pid 
> father
> Jan 11 23:46:42 twister kernel: events/0  D 80487190 0 5  
> 2
> Jan 11 23:46:42 twister kernel:  8100bf84fd40 0046 
> 8100ae7f2600 8100bf84fd00
> Jan 11 23:46:42 twister kernel:  8100bf84a830 8100bd872000 
> 80652ae0 00010100
> Jan 11 23:46:42 twister kernel:   7fff 
> 7fff 0002
> Jan 11 23:46:42 twister kernel: Call Trace:
> Jan 11 23:46:42 twister kernel:  [] 
> schedule_timeout+0x1e/0xad
> Jan 11 23:46:42 twister kernel:  [] dput+0x26/0x103
> Jan 11 23:46:42 twister kernel:  [] 
> sysfs_move_dir+0x1ee/0x1fd
> Jan 11 23:46:42 twister kernel:  [] 
> wait_for_common+0xc4/0x129
> Jan 11 23:46:42 twister kernel:  [] 
> default_wake_function+0x0/0xe
> Jan 11 23:46:42 twister kernel:  [] klist_del+0x15/0x2e
> Jan 11 23:46:42 twister kernel:  [] device_move+0x80/0x111
> Jan 11 23:46:42 twister kernel:  [] 
> :bluetooth:hci_conn_move_child+0x0/0xf
> Jan 11 23:46:42 twister kernel:  [] 
> :bluetooth:hci_conn_move_child+0xb/0xf
> Jan 11 23:46:42 twister kernel:  [] 
> device_for_each_child+0x22/0x4d
> Jan 11 23:46:42 twister kernel:  [] 
> :bluetooth:del_conn+0x0/0x22
> Jan 11 23:46:42 twister kernel:  [] 
> :bluetooth:del_conn+0x19/0x22
> Jan 11 23:46:42 twister kernel:  [] run_workqueue+0x74/0xee
> Jan 11 23:46:42 twister kernel:  [] worker_thread+0x0/0xe7
> Jan 11 23:46:42 twister kernel:  [] worker_thread+0xda/0xe7
> Jan 11 23:46:42 twister kernel:  [] 
> autoremove_wake_function+0x0/0x2e
> Jan 11 23:46:42 twister kernel:  [] worker_thread+0x0/0xe7
> Jan 11 23:46:42 twister kernel:  [] kthread+0x47/0x73
> Jan 11 23:46:42 twister kernel:  [] child_rip+0xa/0x12
> Jan 11 23:46:42 twister kernel:  [] kthread+0x0/0x73
> Jan 11 23:46:42 twister kernel:  [] child_rip+0x0/0x12
> Jan 11 23:46:42 twister kernel:
> Jan 11 23:46:42 twister kernel: cat   D 7fff 0  3978  
>  3965
> Jan 11 23:46:42 twister kernel:  81009e857ce8 0086 
> 8100ae7f2900 81009e857ca8
> Jan 11 23:46:42 twister kernel:  8100bd872000 8100bf9e7060 
> 80652ae0 000100c0
> Jan 11 23:46:42 twister kernel:   7fff 
> 7fff 0002
> Jan 11 23:46:42 twister kernel: Call Trace:
> Jan 11 23:46:42 twister kernel:  [] 
> schedule_timeout+0x1e/0xad
> Jan 11 23:46:42 twister kernel:  [] 
> __dequeue_entity+0x1c/0x32
> Jan 11 23:46:42 twister kernel:  [] 
> set_next_entity+0x23/0x73
> Jan 11 23:46:42 twister kernel:  [] 
> wait_for_common+0xc4/0x129
> Jan 11 23:46:42 twister kernel:  [] 
> default_wake_function+0x0/0xe
> Jan 11 23:46:42 twister kernel:  [] 
> flush_cpu_workqueue+0x50/0x58
> Jan 11 23:46:42 twister kernel:  [] wq_barrier_func+0x0/0x9
> Jan 11 23:46:42 twister kernel:  [] flush_workqueue+0x9/0x12
> Jan 11 23:46:42 twister kernel:  [] release_dev+0x47c/0x5e2
> Jan 11 23:46:42 twister kernel:  [] 
> do_page_fault+0x2ff/0x65a
> Jan 11 23:46:42 twister kernel:  [] tty_release+0xc/0x10
> Jan 11 23:46:42 twister kernel:  [] __fput+0xb1/0x16f
> Jan 11 23:46:42 twister kernel:  [] filp_close+0x5d/0x65
> Jan 11 23:46:42 twister kernel:  [] sys_close+0x73/0xa6
> Jan 11 23:46:42 twister kernel:  [] tracesys+0xdc/0xe1
> Jan 11 23:46:42 twister kernel:
>
> If I'm not mistaken the "cat" above is the "cat /dev/zero >
> /dev/rfcomm0" command.
>

Any idea about this?

Add marcel and davem in cc-list.

>
> Gabor
>
> --
>  -
>  MTA SZTAKI Computer and Automation Research Institute
> Hungarian Academy of Sciences
>  -
>
--
To unsubscribe from this list: send the line "unsubscribe linux-kernel" 

Re: [Bluez-devel] Oops involving RFCOMM and sysfs

2008-01-13 Thread Dave Young
On Jan 12, 2008 7:09 AM, Gabor Gombas [EMAIL PROTECTED] wrote:
 On Thu, Jan 10, 2008 at 09:11:17AM +0800, Dave Young wrote:

  For bluetooth device_move, the only child device of hci_conn dev is
  the rfcomm tty_dev. How about the following patch, please verify :

 There is now no oops, instead the keyboard becomes almost completely
 unresponsible when I switch off  on the phone. The mouse still works
 (tested both with X and with the VGA console), but terminal input and VT
 switching is dead. On the VGA console, even scrolling using Shift+PgUp
 stops working.

 Alt+SysRq+w works, trace is below. Ctrl+Alt+Del also works, but then
 init complains about hung processes and that it can not umount the file
 systems and cannot stop the RAID arrays. Once it still rebooted though,
 the second time it got hung after trying to umount the filesystems, and
 I had to use Alt+SysRq+b.

 If I can choose then I prefer the Oops...

 Jan 11 23:46:35 twister kernel: SysRq : Emergency Sync
 Jan 11 23:46:35 twister kernel: Emergency Sync complete
 Jan 11 23:46:42 twister kernel: SysRq : Show Blocked State
 Jan 11 23:46:42 twister kernel:   taskPC stack   pid 
 father
 Jan 11 23:46:42 twister kernel: events/0  D 80487190 0 5  
 2
 Jan 11 23:46:42 twister kernel:  8100bf84fd40 0046 
 8100ae7f2600 8100bf84fd00
 Jan 11 23:46:42 twister kernel:  8100bf84a830 8100bd872000 
 80652ae0 00010100
 Jan 11 23:46:42 twister kernel:   7fff 
 7fff 0002
 Jan 11 23:46:42 twister kernel: Call Trace:
 Jan 11 23:46:42 twister kernel:  [80473543] 
 schedule_timeout+0x1e/0xad
 Jan 11 23:46:42 twister kernel:  [8028560d] dput+0x26/0x103
 Jan 11 23:46:42 twister kernel:  [802af3fd] 
 sysfs_move_dir+0x1ee/0x1fd
 Jan 11 23:46:42 twister kernel:  [8047340b] 
 wait_for_common+0xc4/0x129
 Jan 11 23:46:42 twister kernel:  [80224204] 
 default_wake_function+0x0/0xe
 Jan 11 23:46:42 twister kernel:  [80471d72] klist_del+0x15/0x2e
 Jan 11 23:46:42 twister kernel:  [803698f3] device_move+0x80/0x111
 Jan 11 23:46:42 twister kernel:  [88146a27] 
 :bluetooth:hci_conn_move_child+0x0/0xf
 Jan 11 23:46:42 twister kernel:  [88146a32] 
 :bluetooth:hci_conn_move_child+0xb/0xf
 Jan 11 23:46:42 twister kernel:  [80369836] 
 device_for_each_child+0x22/0x4d
 Jan 11 23:46:42 twister kernel:  [88146a05] 
 :bluetooth:del_conn+0x0/0x22
 Jan 11 23:46:42 twister kernel:  [88146a1e] 
 :bluetooth:del_conn+0x19/0x22
 Jan 11 23:46:42 twister kernel:  [8023696d] run_workqueue+0x74/0xee
 Jan 11 23:46:42 twister kernel:  [80236ff8] worker_thread+0x0/0xe7
 Jan 11 23:46:42 twister kernel:  [802370d2] worker_thread+0xda/0xe7
 Jan 11 23:46:42 twister kernel:  [80239b77] 
 autoremove_wake_function+0x0/0x2e
 Jan 11 23:46:42 twister kernel:  [80236ff8] worker_thread+0x0/0xe7
 Jan 11 23:46:42 twister kernel:  [802399a0] kthread+0x47/0x73
 Jan 11 23:46:42 twister kernel:  [8020bdc8] child_rip+0xa/0x12
 Jan 11 23:46:42 twister kernel:  [80239959] kthread+0x0/0x73
 Jan 11 23:46:42 twister kernel:  [8020bdbe] child_rip+0x0/0x12
 Jan 11 23:46:42 twister kernel:
 Jan 11 23:46:42 twister kernel: cat   D 7fff 0  3978  
  3965
 Jan 11 23:46:42 twister kernel:  81009e857ce8 0086 
 8100ae7f2900 81009e857ca8
 Jan 11 23:46:42 twister kernel:  8100bd872000 8100bf9e7060 
 80652ae0 000100c0
 Jan 11 23:46:42 twister kernel:   7fff 
 7fff 0002
 Jan 11 23:46:42 twister kernel: Call Trace:
 Jan 11 23:46:42 twister kernel:  [80473543] 
 schedule_timeout+0x1e/0xad
 Jan 11 23:46:42 twister kernel:  [80223ccc] 
 __dequeue_entity+0x1c/0x32
 Jan 11 23:46:42 twister kernel:  [80223d05] 
 set_next_entity+0x23/0x73
 Jan 11 23:46:42 twister kernel:  [8047340b] 
 wait_for_common+0xc4/0x129
 Jan 11 23:46:42 twister kernel:  [80224204] 
 default_wake_function+0x0/0xe
 Jan 11 23:46:42 twister kernel:  [80236b02] 
 flush_cpu_workqueue+0x50/0x58
 Jan 11 23:46:42 twister kernel:  [80236c32] wq_barrier_func+0x0/0x9
 Jan 11 23:46:42 twister kernel:  [80236ecf] flush_workqueue+0x9/0x12
 Jan 11 23:46:42 twister kernel:  [803449f9] release_dev+0x47c/0x5e2
 Jan 11 23:46:42 twister kernel:  [8021b609] 
 do_page_fault+0x2ff/0x65a
 Jan 11 23:46:42 twister kernel:  [80344b6b] tty_release+0xc/0x10
 Jan 11 23:46:42 twister kernel:  [80277013] __fput+0xb1/0x16f
 Jan 11 23:46:42 twister kernel:  [80274961] filp_close+0x5d/0x65
 Jan 11 23:46:42 twister kernel:  [80275ab3] sys_close+0x73/0xa6
 Jan 11 23:46:42 twister kernel:  [8020b5fc] tracesys+0xdc/0xe1
 Jan 11 23:46:42 twister kernel:

 If I'm 

Re: [Bluez-devel] Oops involving RFCOMM and sysfs

2008-01-11 Thread Gabor Gombas
On Thu, Jan 10, 2008 at 09:11:17AM +0800, Dave Young wrote:

> For bluetooth device_move, the only child device of hci_conn dev is
> the rfcomm tty_dev. How about the following patch, please verify :

There is now no oops, instead the keyboard becomes almost completely
unresponsible when I switch off & on the phone. The mouse still works
(tested both with X and with the VGA console), but terminal input and VT
switching is dead. On the VGA console, even scrolling using Shift+PgUp
stops working.

Alt+SysRq+w works, trace is below. Ctrl+Alt+Del also works, but then
init complains about hung processes and that it can not umount the file
systems and cannot stop the RAID arrays. Once it still rebooted though,
the second time it got hung after trying to umount the filesystems, and
I had to use Alt+SysRq+b.

If I can choose then I prefer the Oops...

Jan 11 23:46:35 twister kernel: SysRq : Emergency Sync
Jan 11 23:46:35 twister kernel: Emergency Sync complete
Jan 11 23:46:42 twister kernel: SysRq : Show Blocked State
Jan 11 23:46:42 twister kernel:   taskPC stack   pid 
father
Jan 11 23:46:42 twister kernel: events/0  D 80487190 0 5
  2
Jan 11 23:46:42 twister kernel:  8100bf84fd40 0046 
8100ae7f2600 8100bf84fd00
Jan 11 23:46:42 twister kernel:  8100bf84a830 8100bd872000 
80652ae0 00010100
Jan 11 23:46:42 twister kernel:   7fff 
7fff 0002
Jan 11 23:46:42 twister kernel: Call Trace:
Jan 11 23:46:42 twister kernel:  [] schedule_timeout+0x1e/0xad
Jan 11 23:46:42 twister kernel:  [] dput+0x26/0x103
Jan 11 23:46:42 twister kernel:  [] sysfs_move_dir+0x1ee/0x1fd
Jan 11 23:46:42 twister kernel:  [] wait_for_common+0xc4/0x129
Jan 11 23:46:42 twister kernel:  [] 
default_wake_function+0x0/0xe
Jan 11 23:46:42 twister kernel:  [] klist_del+0x15/0x2e
Jan 11 23:46:42 twister kernel:  [] device_move+0x80/0x111
Jan 11 23:46:42 twister kernel:  [] 
:bluetooth:hci_conn_move_child+0x0/0xf
Jan 11 23:46:42 twister kernel:  [] 
:bluetooth:hci_conn_move_child+0xb/0xf
Jan 11 23:46:42 twister kernel:  [] 
device_for_each_child+0x22/0x4d
Jan 11 23:46:42 twister kernel:  [] 
:bluetooth:del_conn+0x0/0x22
Jan 11 23:46:42 twister kernel:  [] 
:bluetooth:del_conn+0x19/0x22
Jan 11 23:46:42 twister kernel:  [] run_workqueue+0x74/0xee
Jan 11 23:46:42 twister kernel:  [] worker_thread+0x0/0xe7
Jan 11 23:46:42 twister kernel:  [] worker_thread+0xda/0xe7
Jan 11 23:46:42 twister kernel:  [] 
autoremove_wake_function+0x0/0x2e
Jan 11 23:46:42 twister kernel:  [] worker_thread+0x0/0xe7
Jan 11 23:46:42 twister kernel:  [] kthread+0x47/0x73
Jan 11 23:46:42 twister kernel:  [] child_rip+0xa/0x12
Jan 11 23:46:42 twister kernel:  [] kthread+0x0/0x73
Jan 11 23:46:42 twister kernel:  [] child_rip+0x0/0x12
Jan 11 23:46:42 twister kernel: 
Jan 11 23:46:42 twister kernel: cat   D 7fff 0  3978   
3965
Jan 11 23:46:42 twister kernel:  81009e857ce8 0086 
8100ae7f2900 81009e857ca8
Jan 11 23:46:42 twister kernel:  8100bd872000 8100bf9e7060 
80652ae0 000100c0
Jan 11 23:46:42 twister kernel:   7fff 
7fff 0002
Jan 11 23:46:42 twister kernel: Call Trace:
Jan 11 23:46:42 twister kernel:  [] schedule_timeout+0x1e/0xad
Jan 11 23:46:42 twister kernel:  [] __dequeue_entity+0x1c/0x32
Jan 11 23:46:42 twister kernel:  [] set_next_entity+0x23/0x73
Jan 11 23:46:42 twister kernel:  [] wait_for_common+0xc4/0x129
Jan 11 23:46:42 twister kernel:  [] 
default_wake_function+0x0/0xe
Jan 11 23:46:42 twister kernel:  [] 
flush_cpu_workqueue+0x50/0x58
Jan 11 23:46:42 twister kernel:  [] wq_barrier_func+0x0/0x9
Jan 11 23:46:42 twister kernel:  [] flush_workqueue+0x9/0x12
Jan 11 23:46:42 twister kernel:  [] release_dev+0x47c/0x5e2
Jan 11 23:46:42 twister kernel:  [] do_page_fault+0x2ff/0x65a
Jan 11 23:46:42 twister kernel:  [] tty_release+0xc/0x10
Jan 11 23:46:42 twister kernel:  [] __fput+0xb1/0x16f
Jan 11 23:46:42 twister kernel:  [] filp_close+0x5d/0x65
Jan 11 23:46:42 twister kernel:  [] sys_close+0x73/0xa6
Jan 11 23:46:42 twister kernel:  [] tracesys+0xdc/0xe1
Jan 11 23:46:42 twister kernel: 

If I'm not mistaken the "cat" above is the "cat /dev/zero >
/dev/rfcomm0" command.

Gabor

-- 
 -
 MTA SZTAKI Computer and Automation Research Institute
Hungarian Academy of Sciences
 -
--
To unsubscribe from this list: send the line "unsubscribe linux-kernel" in
the body of a message to [EMAIL PROTECTED]
More majordomo info at  http://vger.kernel.org/majordomo-info.html
Please read the FAQ at  http://www.tux.org/lkml/


Re: [Bluez-devel] Oops involving RFCOMM and sysfs

2008-01-11 Thread Gabor Gombas
On Thu, Jan 10, 2008 at 09:11:17AM +0800, Dave Young wrote:

 For bluetooth device_move, the only child device of hci_conn dev is
 the rfcomm tty_dev. How about the following patch, please verify :

There is now no oops, instead the keyboard becomes almost completely
unresponsible when I switch off  on the phone. The mouse still works
(tested both with X and with the VGA console), but terminal input and VT
switching is dead. On the VGA console, even scrolling using Shift+PgUp
stops working.

Alt+SysRq+w works, trace is below. Ctrl+Alt+Del also works, but then
init complains about hung processes and that it can not umount the file
systems and cannot stop the RAID arrays. Once it still rebooted though,
the second time it got hung after trying to umount the filesystems, and
I had to use Alt+SysRq+b.

If I can choose then I prefer the Oops...

Jan 11 23:46:35 twister kernel: SysRq : Emergency Sync
Jan 11 23:46:35 twister kernel: Emergency Sync complete
Jan 11 23:46:42 twister kernel: SysRq : Show Blocked State
Jan 11 23:46:42 twister kernel:   taskPC stack   pid 
father
Jan 11 23:46:42 twister kernel: events/0  D 80487190 0 5
  2
Jan 11 23:46:42 twister kernel:  8100bf84fd40 0046 
8100ae7f2600 8100bf84fd00
Jan 11 23:46:42 twister kernel:  8100bf84a830 8100bd872000 
80652ae0 00010100
Jan 11 23:46:42 twister kernel:   7fff 
7fff 0002
Jan 11 23:46:42 twister kernel: Call Trace:
Jan 11 23:46:42 twister kernel:  [80473543] schedule_timeout+0x1e/0xad
Jan 11 23:46:42 twister kernel:  [8028560d] dput+0x26/0x103
Jan 11 23:46:42 twister kernel:  [802af3fd] sysfs_move_dir+0x1ee/0x1fd
Jan 11 23:46:42 twister kernel:  [8047340b] wait_for_common+0xc4/0x129
Jan 11 23:46:42 twister kernel:  [80224204] 
default_wake_function+0x0/0xe
Jan 11 23:46:42 twister kernel:  [80471d72] klist_del+0x15/0x2e
Jan 11 23:46:42 twister kernel:  [803698f3] device_move+0x80/0x111
Jan 11 23:46:42 twister kernel:  [88146a27] 
:bluetooth:hci_conn_move_child+0x0/0xf
Jan 11 23:46:42 twister kernel:  [88146a32] 
:bluetooth:hci_conn_move_child+0xb/0xf
Jan 11 23:46:42 twister kernel:  [80369836] 
device_for_each_child+0x22/0x4d
Jan 11 23:46:42 twister kernel:  [88146a05] 
:bluetooth:del_conn+0x0/0x22
Jan 11 23:46:42 twister kernel:  [88146a1e] 
:bluetooth:del_conn+0x19/0x22
Jan 11 23:46:42 twister kernel:  [8023696d] run_workqueue+0x74/0xee
Jan 11 23:46:42 twister kernel:  [80236ff8] worker_thread+0x0/0xe7
Jan 11 23:46:42 twister kernel:  [802370d2] worker_thread+0xda/0xe7
Jan 11 23:46:42 twister kernel:  [80239b77] 
autoremove_wake_function+0x0/0x2e
Jan 11 23:46:42 twister kernel:  [80236ff8] worker_thread+0x0/0xe7
Jan 11 23:46:42 twister kernel:  [802399a0] kthread+0x47/0x73
Jan 11 23:46:42 twister kernel:  [8020bdc8] child_rip+0xa/0x12
Jan 11 23:46:42 twister kernel:  [80239959] kthread+0x0/0x73
Jan 11 23:46:42 twister kernel:  [8020bdbe] child_rip+0x0/0x12
Jan 11 23:46:42 twister kernel: 
Jan 11 23:46:42 twister kernel: cat   D 7fff 0  3978   
3965
Jan 11 23:46:42 twister kernel:  81009e857ce8 0086 
8100ae7f2900 81009e857ca8
Jan 11 23:46:42 twister kernel:  8100bd872000 8100bf9e7060 
80652ae0 000100c0
Jan 11 23:46:42 twister kernel:   7fff 
7fff 0002
Jan 11 23:46:42 twister kernel: Call Trace:
Jan 11 23:46:42 twister kernel:  [80473543] schedule_timeout+0x1e/0xad
Jan 11 23:46:42 twister kernel:  [80223ccc] __dequeue_entity+0x1c/0x32
Jan 11 23:46:42 twister kernel:  [80223d05] set_next_entity+0x23/0x73
Jan 11 23:46:42 twister kernel:  [8047340b] wait_for_common+0xc4/0x129
Jan 11 23:46:42 twister kernel:  [80224204] 
default_wake_function+0x0/0xe
Jan 11 23:46:42 twister kernel:  [80236b02] 
flush_cpu_workqueue+0x50/0x58
Jan 11 23:46:42 twister kernel:  [80236c32] wq_barrier_func+0x0/0x9
Jan 11 23:46:42 twister kernel:  [80236ecf] flush_workqueue+0x9/0x12
Jan 11 23:46:42 twister kernel:  [803449f9] release_dev+0x47c/0x5e2
Jan 11 23:46:42 twister kernel:  [8021b609] do_page_fault+0x2ff/0x65a
Jan 11 23:46:42 twister kernel:  [80344b6b] tty_release+0xc/0x10
Jan 11 23:46:42 twister kernel:  [80277013] __fput+0xb1/0x16f
Jan 11 23:46:42 twister kernel:  [80274961] filp_close+0x5d/0x65
Jan 11 23:46:42 twister kernel:  [80275ab3] sys_close+0x73/0xa6
Jan 11 23:46:42 twister kernel:  [8020b5fc] tracesys+0xdc/0xe1
Jan 11 23:46:42 twister kernel: 

If I'm not mistaken the cat above is the cat /dev/zero 
/dev/rfcomm0 command.

Gabor

-- 
 -
 MTA SZTAKI 

Re: [Bluez-devel] Oops involving RFCOMM and sysfs

2008-01-10 Thread Gabor Gombas
Hi,

On Wed, Jan 09, 2008 at 06:16:02PM +0900, Tejun Heo wrote:

> Please confirm the attached patch fixes the oops.  I'll separate it into
> two patches and forward them to Greg.  But bluetooth code also needs to
> be updated such that it moves the refcommX device before killing the
> connection node.

The kernel survives switching the phone off & on but then oopses when
pppd is started:

Jan 10 10:59:09 twister kernel: Unable to handle kernel NULL pointer 
dereference at 0008 RIP: 
Jan 10 10:59:09 twister kernel:  [] sysfs_get_dentry+0x27/0x8f
Jan 10 10:59:09 twister kernel: PGD 260bb067 PUD 260bc067 PMD 0 
Jan 10 10:59:09 twister kernel: Oops:  [1] 
Jan 10 10:59:09 twister kernel: CPU 0 
Jan 10 10:59:09 twister kernel: Modules linked in: ppp_generic slhc binfmt_misc 
rfcomm l2cap nfsd auth_rpcgss exportfs ipt_REJECT xt_tcpudp ipt_LOG xt_limit 
iptable_filter ip_tables x_tables nfs lockd nfs_acl sunrpc fuse dm_crypt 
saa7134_alsa radeon hwmon_vid eeprom hci_usb bluetooth usb_storage tuner 
tea5767 tda8290 tuner_simple mt20xx tea5761 snd_intel8x0 snd_ac97_codec saa7134 
firewire_ohci firewire_core sg ac97_bus videobuf_dma_sg videobuf_core 
ir_kbd_i2c crc_itu_t ir_common snd_pcm parport_pc parport sky2 forcedeth sr_mod 
snd_timer snd_page_alloc ehci_hcd ohci_hcd floppy cdrom
Jan 10 10:59:09 twister kernel: Pid: 4490, comm: pppd Not tainted 
2.6.24-rc6debug-dirty #8
Jan 10 10:59:09 twister kernel: RIP: 0010:[]  
[] sysfs_get_dentry+0x27/0x8f
Jan 10 10:59:09 twister kernel: RSP: 0018:8100260dbc88  EFLAGS: 00010207
Jan 10 10:59:09 twister kernel: RAX:  RBX: 8100251577d0 
RCX: 
Jan 10 10:59:09 twister kernel: RDX: 8100260e3410 RSI: 80643d70 
RDI: 8100bb32d898
Jan 10 10:59:09 twister kernel: RBP:  R08: 6390dfaa7e3bd395 
R09: 8050cee6
Jan 10 10:59:09 twister kernel: R10: 8100260dbcb8 R11: 00300018 
R12: 8100b6ef4b90
Jan 10 10:59:09 twister kernel: R13: fff4 R14: 81004d9d2600 
R15: 8100b6eb227e
Jan 10 10:59:09 twister kernel: FS:  2b5731903ac0() 
GS:805ba000() knlGS:
Jan 10 10:59:09 twister kernel: CS:  0010 DS:  ES:  CR0: 
8005003b
Jan 10 10:59:09 twister kernel: CR2: 0008 CR3: 260b1000 
CR4: 06e0
Jan 10 10:59:09 twister kernel: DR0:  DR1:  
DR2: 
Jan 10 10:59:09 twister kernel: DR3:  DR6: 0ff0 
DR7: 0400
Jan 10 10:59:09 twister kernel: Process pppd (pid: 4490, threadinfo 
8100260da000, task 81009550d060)
Jan 10 10:59:09 twister kernel: Stack:  8100260e3690 8100b6ef4b90 
81003a5942a0 802af272
Jan 10 10:59:09 twister kernel:  8100becca2d0 8100becca2d0 
8100260df300 81003a5942a0
Jan 10 10:59:09 twister kernel:  fff4 81004d9d2600 
8100b6eb227e 803007a5
Jan 10 10:59:09 twister kernel: Call Trace:
Jan 10 10:59:09 twister kernel:  [] sysfs_move_dir+0x63/0x1fd
Jan 10 10:59:09 twister kernel:  [] kobject_move+0xba/0x110
Jan 10 10:59:09 twister kernel:  [] device_move+0x59/0x111
Jan 10 10:59:09 twister kernel:  [] 
:rfcomm:rfcomm_tty_open+0x1c1/0x1db
Jan 10 10:59:09 twister kernel:  [] 
default_wake_function+0x0/0xe
Jan 10 10:59:09 twister kernel:  [] tty_open+0x1c5/0x2af
Jan 10 10:59:09 twister kernel:  [] chrdev_open+0x113/0x132
Jan 10 10:59:09 twister kernel:  [] chrdev_open+0x0/0x132
Jan 10 10:59:09 twister kernel:  [] __dentry_open+0xda/0x1aa
Jan 10 10:59:09 twister kernel:  [] do_filp_open+0x2d/0x3d
Jan 10 10:59:09 twister kernel:  [] mutex_lock+0xd/0x1d
Jan 10 10:59:09 twister kernel:  [] 
get_unused_fd_flags+0x6d/0x100
Jan 10 10:59:09 twister kernel:  [] do_sys_open+0x46/0xc3
Jan 10 10:59:09 twister kernel:  [] tracesys+0xdc/0xe1
Jan 10 10:59:09 twister kernel: 
Jan 10 10:59:09 twister kernel: 
Jan 10 10:59:09 twister kernel: Code: 48 8b 45 08 48 39 d0 74 05 48 89 c5 eb f2 
48 8b 7b 08 48 81 
Jan 10 10:59:09 twister kernel: RIP  [] 
sysfs_get_dentry+0x27/0x8f
Jan 10 10:59:09 twister kernel:  RSP 
Jan 10 10:59:09 twister kernel: CR2: 0008
Jan 10 10:59:09 twister kernel: ---[ end trace b33fe66800773e0c ]---

Gabor

-- 
 -
 MTA SZTAKI Computer and Automation Research Institute
Hungarian Academy of Sciences
 -
--
To unsubscribe from this list: send the line "unsubscribe linux-kernel" in
the body of a message to [EMAIL PROTECTED]
More majordomo info at  http://vger.kernel.org/majordomo-info.html
Please read the FAQ at  http://www.tux.org/lkml/


Re: [Bluez-devel] Oops involving RFCOMM and sysfs

2008-01-10 Thread Gabor Gombas
Hi,

On Wed, Jan 09, 2008 at 06:16:02PM +0900, Tejun Heo wrote:

 Please confirm the attached patch fixes the oops.  I'll separate it into
 two patches and forward them to Greg.  But bluetooth code also needs to
 be updated such that it moves the refcommX device before killing the
 connection node.

The kernel survives switching the phone off  on but then oopses when
pppd is started:

Jan 10 10:59:09 twister kernel: Unable to handle kernel NULL pointer 
dereference at 0008 RIP: 
Jan 10 10:59:09 twister kernel:  [802af03f] sysfs_get_dentry+0x27/0x8f
Jan 10 10:59:09 twister kernel: PGD 260bb067 PUD 260bc067 PMD 0 
Jan 10 10:59:09 twister kernel: Oops:  [1] 
Jan 10 10:59:09 twister kernel: CPU 0 
Jan 10 10:59:09 twister kernel: Modules linked in: ppp_generic slhc binfmt_misc 
rfcomm l2cap nfsd auth_rpcgss exportfs ipt_REJECT xt_tcpudp ipt_LOG xt_limit 
iptable_filter ip_tables x_tables nfs lockd nfs_acl sunrpc fuse dm_crypt 
saa7134_alsa radeon hwmon_vid eeprom hci_usb bluetooth usb_storage tuner 
tea5767 tda8290 tuner_simple mt20xx tea5761 snd_intel8x0 snd_ac97_codec saa7134 
firewire_ohci firewire_core sg ac97_bus videobuf_dma_sg videobuf_core 
ir_kbd_i2c crc_itu_t ir_common snd_pcm parport_pc parport sky2 forcedeth sr_mod 
snd_timer snd_page_alloc ehci_hcd ohci_hcd floppy cdrom
Jan 10 10:59:09 twister kernel: Pid: 4490, comm: pppd Not tainted 
2.6.24-rc6debug-dirty #8
Jan 10 10:59:09 twister kernel: RIP: 0010:[802af03f]  
[802af03f] sysfs_get_dentry+0x27/0x8f
Jan 10 10:59:09 twister kernel: RSP: 0018:8100260dbc88  EFLAGS: 00010207
Jan 10 10:59:09 twister kernel: RAX:  RBX: 8100251577d0 
RCX: 
Jan 10 10:59:09 twister kernel: RDX: 8100260e3410 RSI: 80643d70 
RDI: 8100bb32d898
Jan 10 10:59:09 twister kernel: RBP:  R08: 6390dfaa7e3bd395 
R09: 8050cee6
Jan 10 10:59:09 twister kernel: R10: 8100260dbcb8 R11: 00300018 
R12: 8100b6ef4b90
Jan 10 10:59:09 twister kernel: R13: fff4 R14: 81004d9d2600 
R15: 8100b6eb227e
Jan 10 10:59:09 twister kernel: FS:  2b5731903ac0() 
GS:805ba000() knlGS:
Jan 10 10:59:09 twister kernel: CS:  0010 DS:  ES:  CR0: 
8005003b
Jan 10 10:59:09 twister kernel: CR2: 0008 CR3: 260b1000 
CR4: 06e0
Jan 10 10:59:09 twister kernel: DR0:  DR1:  
DR2: 
Jan 10 10:59:09 twister kernel: DR3:  DR6: 0ff0 
DR7: 0400
Jan 10 10:59:09 twister kernel: Process pppd (pid: 4490, threadinfo 
8100260da000, task 81009550d060)
Jan 10 10:59:09 twister kernel: Stack:  8100260e3690 8100b6ef4b90 
81003a5942a0 802af272
Jan 10 10:59:09 twister kernel:  8100becca2d0 8100becca2d0 
8100260df300 81003a5942a0
Jan 10 10:59:09 twister kernel:  fff4 81004d9d2600 
8100b6eb227e 803007a5
Jan 10 10:59:09 twister kernel: Call Trace:
Jan 10 10:59:09 twister kernel:  [802af272] sysfs_move_dir+0x63/0x1fd
Jan 10 10:59:09 twister kernel:  [803007a5] kobject_move+0xba/0x110
Jan 10 10:59:09 twister kernel:  [803698cc] device_move+0x59/0x111
Jan 10 10:59:09 twister kernel:  [8827d31e] 
:rfcomm:rfcomm_tty_open+0x1c1/0x1db
Jan 10 10:59:09 twister kernel:  [80224204] 
default_wake_function+0x0/0xe
Jan 10 10:59:09 twister kernel:  [80345a2d] tty_open+0x1c5/0x2af
Jan 10 10:59:09 twister kernel:  [80278590] chrdev_open+0x113/0x132
Jan 10 10:59:09 twister kernel:  [8027847d] chrdev_open+0x0/0x132
Jan 10 10:59:09 twister kernel:  [80274b4a] __dentry_open+0xda/0x1aa
Jan 10 10:59:09 twister kernel:  [80274cd1] do_filp_open+0x2d/0x3d
Jan 10 10:59:09 twister kernel:  [80473812] mutex_lock+0xd/0x1d
Jan 10 10:59:09 twister kernel:  [802749d6] 
get_unused_fd_flags+0x6d/0x100
Jan 10 10:59:09 twister kernel:  [80274d27] do_sys_open+0x46/0xc3
Jan 10 10:59:09 twister kernel:  [8020b5fc] tracesys+0xdc/0xe1
Jan 10 10:59:09 twister kernel: 
Jan 10 10:59:09 twister kernel: 
Jan 10 10:59:09 twister kernel: Code: 48 8b 45 08 48 39 d0 74 05 48 89 c5 eb f2 
48 8b 7b 08 48 81 
Jan 10 10:59:09 twister kernel: RIP  [802af03f] 
sysfs_get_dentry+0x27/0x8f
Jan 10 10:59:09 twister kernel:  RSP 8100260dbc88
Jan 10 10:59:09 twister kernel: CR2: 0008
Jan 10 10:59:09 twister kernel: ---[ end trace b33fe66800773e0c ]---

Gabor

-- 
 -
 MTA SZTAKI Computer and Automation Research Institute
Hungarian Academy of Sciences
 -
--
To unsubscribe from this list: send the line unsubscribe linux-kernel in
the body of a message to [EMAIL PROTECTED]
More majordomo info at  http://vger.kernel.org/majordomo-info.html
Please read 

Re: [Bluez-devel] Oops involving RFCOMM and sysfs

2008-01-09 Thread Dave Young
On Wed, Jan 09, 2008 at 06:16:02PM +0900, Tejun Heo wrote:
> Hello,
> 
> My laptop and cell finally decided to talk to each other and I could
> reproduce the bug here.  The attached patch should remove the oops.
> 
> The bug is two folded.  I just skimmed through the bluetooth code and am
> very likely to wrong at places.  Please correct me where I'm wrong.
> 
> 1. It's introduced by class device migration and the bug will go away
> with CONFIG_SYSFS_DEPRECATED turned on.  With CONFIG_SYSFS_DEPRECATED
> turned off, what used to be class devices live under actual devices.
> For rfcomm, this means the rfcommN tty device now lives under the
> acl node (probably represents an active connection?) instead of the
> class directory.
> 
> It seems rfcommN devices are supposed to survive over disconnects so the
> rfcommN device moves under the live connection while connection is alive
> and retreats back to a default directory when the connection is lost.
> This is all fine and dandy as long as the rfcommN device lives under
> class directory as class directory never goes away.
> 
> However, with recent sysfs updates, rfcommN now lives directly under the
> acl node.  If the connection goes away while rfcommN device is under
> it, it gets deleted but rfcommN is still treated as alive.
> 
> This isn't supported.  sysfs doesn't allow parents to die first and the
> dangling children to be salvaged using sysfs_move().
> 
> 2. Which in turn exposes three bugs in sysfs
>   - sysfs_lookup() returning NULL on negative lookup.  sysfs
> code requires that all negative dentries are shot down.
> lookup should return -ENOENT instead of NULL.
>   - in move and rename, error handling is wrong.  It ends up
> passing ERR_PTR() values to dput().
>   - there was an extra dput() in sysfs_move_dir().
> 
> The attached patch fixes all sysfs bugs and removes the oops.  However,
> rfcommX moving is still broken.  The rfcommX device won't be visible
> from sysfs tree after the initial move failure and all following moves
> will fail.
> 
> Please confirm the attached patch fixes the oops.  I'll separate it into
> two patches and forward them to Greg.  But bluetooth code also needs to
> be updated such that it moves the refcommX device before killing the
> connection node.
For bluetooth device_move, the only child device of hci_conn dev is the rfcomm 
tty_dev. How about the following patch, please verify :

---
net/bluetooth/hci_sysfs.c |7 +++
1 file changed, 7 insertions(+)

diff -upr linux/net/bluetooth/hci_sysfs.c linux.new/net/bluetooth/hci_sysfs.c
--- linux/net/bluetooth/hci_sysfs.c 2008-01-10 09:01:51.0 +0800
+++ linux.new/net/bluetooth/hci_sysfs.c 2008-01-10 09:01:22.0 +0800
@@ -316,9 +316,16 @@ void hci_conn_add_sysfs(struct hci_conn 
schedule_work(>work);
 }
 
+static int hci_conn_move_child(struct device *dev, void *data)
+{
+   device_move(dev, NULL);
+   return 0;
+}
+
 static void del_conn(struct work_struct *work)
 {
struct hci_conn *conn = container_of(work, struct hci_conn, work);
+   device_for_each_child(>dev, NULL, hci_conn_move_child);
device_del(>dev);
put_device(>dev);
 }

> 
> Thanks.
> 
> -- 
> tejun

> diff --git a/drivers/ata/libata-pmp.c b/drivers/ata/libata-pmp.c
> diff --git a/fs/dcache.c b/fs/dcache.c
> diff --git a/fs/sysfs/dir.c b/fs/sysfs/dir.c
> index 3371629..f281cc6 100644
> --- a/fs/sysfs/dir.c
> +++ b/fs/sysfs/dir.c
> @@ -678,8 +678,10 @@ static struct dentry * sysfs_lookup(struct inode *dir, 
> struct dentry *dentry,
>   sd = sysfs_find_dirent(parent_sd, dentry->d_name.name);
>  
>   /* no such entry */
> - if (!sd)
> + if (!sd) {
> + ret = ERR_PTR(-ENOENT);
>   goto out_unlock;
> + }
>  
>   /* attach dentry and inode */
>   inode = sysfs_get_inode(sd);
> @@ -781,6 +783,7 @@ int sysfs_rename_dir(struct kobject * kobj, const char 
> *new_name)
>   old_dentry = sysfs_get_dentry(sd);
>   if (IS_ERR(old_dentry)) {
>   error = PTR_ERR(old_dentry);
> + old_dentry = NULL;
>   goto out;
>   }
>  
> @@ -848,6 +851,7 @@ int sysfs_move_dir(struct kobject *kobj, struct kobject 
> *new_parent_kobj)
>   old_dentry = sysfs_get_dentry(sd);
>   if (IS_ERR(old_dentry)) {
>   error = PTR_ERR(old_dentry);
> + old_dentry = NULL;
>   goto out;
>   }
>   old_parent = old_dentry->d_parent;
> @@ -855,6 +859,7 @@ int sysfs_move_dir(struct kobject *kobj, struct kobject 
> *new_parent_kobj)
>   new_parent = sysfs_get_dentry(new_parent_sd);
>   if (IS_ERR(new_parent)) {
>   error = PTR_ERR(new_parent);
> + new_parent = NULL;
>   goto out;
>   }
>  
> @@ -878,7 +883,6 @@ again:
>   error = 0;
>   d_add(new_dentry, NULL);
>   d_move(old_dentry, new_dentry);
> - dput(new_dentry);
>  
>   /* Remove from 

Re: [Bluez-devel] Oops involving RFCOMM and sysfs

2008-01-09 Thread Cornelia Huck
On Wed, 09 Jan 2008 18:16:02 +0900,
Tejun Heo <[EMAIL PROTECTED]> wrote:

> This isn't supported.  sysfs doesn't allow parents to die first and the
> dangling children to be salvaged using sysfs_move().

But (with the sysfs bugs fixed) it will return an error, won't it? It
seems the bluetooth code is rather optimistic with never checking
device_move()'s return code.

> 
> 2. Which in turn exposes three bugs in sysfs
>   - sysfs_lookup() returning NULL on negative lookup.  sysfs
> code requires that all negative dentries are shot down.
> lookup should return -ENOENT instead of NULL.
>   - in move and rename, error handling is wrong.  It ends up
> passing ERR_PTR() values to dput().
>   - there was an extra dput() in sysfs_move_dir().

Ups, seem to have copied that from some old version of
sysfs_rename_dir()...

> 
> The attached patch fixes all sysfs bugs and removes the oops. 

Looks OK at first glance.
--
To unsubscribe from this list: send the line "unsubscribe linux-kernel" in
the body of a message to [EMAIL PROTECTED]
More majordomo info at  http://vger.kernel.org/majordomo-info.html
Please read the FAQ at  http://www.tux.org/lkml/


Re: [Bluez-devel] Oops involving RFCOMM and sysfs

2008-01-09 Thread Tejun Heo
Hello,

My laptop and cell finally decided to talk to each other and I could
reproduce the bug here.  The attached patch should remove the oops.

The bug is two folded.  I just skimmed through the bluetooth code and am
very likely to wrong at places.  Please correct me where I'm wrong.

1. It's introduced by class device migration and the bug will go away
with CONFIG_SYSFS_DEPRECATED turned on.  With CONFIG_SYSFS_DEPRECATED
turned off, what used to be class devices live under actual devices.
For rfcomm, this means the rfcommN tty device now lives under the
acl node (probably represents an active connection?) instead of the
class directory.

It seems rfcommN devices are supposed to survive over disconnects so the
rfcommN device moves under the live connection while connection is alive
and retreats back to a default directory when the connection is lost.
This is all fine and dandy as long as the rfcommN device lives under
class directory as class directory never goes away.

However, with recent sysfs updates, rfcommN now lives directly under the
acl node.  If the connection goes away while rfcommN device is under
it, it gets deleted but rfcommN is still treated as alive.

This isn't supported.  sysfs doesn't allow parents to die first and the
dangling children to be salvaged using sysfs_move().

2. Which in turn exposes three bugs in sysfs
- sysfs_lookup() returning NULL on negative lookup.  sysfs
  code requires that all negative dentries are shot down.
  lookup should return -ENOENT instead of NULL.
- in move and rename, error handling is wrong.  It ends up
  passing ERR_PTR() values to dput().
- there was an extra dput() in sysfs_move_dir().

The attached patch fixes all sysfs bugs and removes the oops.  However,
rfcommX moving is still broken.  The rfcommX device won't be visible
from sysfs tree after the initial move failure and all following moves
will fail.

Please confirm the attached patch fixes the oops.  I'll separate it into
two patches and forward them to Greg.  But bluetooth code also needs to
be updated such that it moves the refcommX device before killing the
connection node.

Thanks.

-- 
tejun
diff --git a/drivers/ata/libata-pmp.c b/drivers/ata/libata-pmp.c
diff --git a/fs/dcache.c b/fs/dcache.c
diff --git a/fs/sysfs/dir.c b/fs/sysfs/dir.c
index 3371629..f281cc6 100644
--- a/fs/sysfs/dir.c
+++ b/fs/sysfs/dir.c
@@ -678,8 +678,10 @@ static struct dentry * sysfs_lookup(struct inode *dir, struct dentry *dentry,
 	sd = sysfs_find_dirent(parent_sd, dentry->d_name.name);
 
 	/* no such entry */
-	if (!sd)
+	if (!sd) {
+		ret = ERR_PTR(-ENOENT);
 		goto out_unlock;
+	}
 
 	/* attach dentry and inode */
 	inode = sysfs_get_inode(sd);
@@ -781,6 +783,7 @@ int sysfs_rename_dir(struct kobject * kobj, const char *new_name)
 	old_dentry = sysfs_get_dentry(sd);
 	if (IS_ERR(old_dentry)) {
 		error = PTR_ERR(old_dentry);
+		old_dentry = NULL;
 		goto out;
 	}
 
@@ -848,6 +851,7 @@ int sysfs_move_dir(struct kobject *kobj, struct kobject *new_parent_kobj)
 	old_dentry = sysfs_get_dentry(sd);
 	if (IS_ERR(old_dentry)) {
 		error = PTR_ERR(old_dentry);
+		old_dentry = NULL;
 		goto out;
 	}
 	old_parent = old_dentry->d_parent;
@@ -855,6 +859,7 @@ int sysfs_move_dir(struct kobject *kobj, struct kobject *new_parent_kobj)
 	new_parent = sysfs_get_dentry(new_parent_sd);
 	if (IS_ERR(new_parent)) {
 		error = PTR_ERR(new_parent);
+		new_parent = NULL;
 		goto out;
 	}
 
@@ -878,7 +883,6 @@ again:
 	error = 0;
 	d_add(new_dentry, NULL);
 	d_move(old_dentry, new_dentry);
-	dput(new_dentry);
 
 	/* Remove from old parent's list and insert into new parent's list. */
 	sysfs_unlink_sibling(sd);
diff --git a/include/linux/dcache.h b/include/linux/dcache.h


Re: [Bluez-devel] Oops involving RFCOMM and sysfs

2008-01-09 Thread Tejun Heo
Hello,

My laptop and cell finally decided to talk to each other and I could
reproduce the bug here.  The attached patch should remove the oops.

The bug is two folded.  I just skimmed through the bluetooth code and am
very likely to wrong at places.  Please correct me where I'm wrong.

1. It's introduced by class device migration and the bug will go away
with CONFIG_SYSFS_DEPRECATED turned on.  With CONFIG_SYSFS_DEPRECATED
turned off, what used to be class devices live under actual devices.
For rfcomm, this means the rfcommN tty device now lives under the
acl node (probably represents an active connection?) instead of the
class directory.

It seems rfcommN devices are supposed to survive over disconnects so the
rfcommN device moves under the live connection while connection is alive
and retreats back to a default directory when the connection is lost.
This is all fine and dandy as long as the rfcommN device lives under
class directory as class directory never goes away.

However, with recent sysfs updates, rfcommN now lives directly under the
acl node.  If the connection goes away while rfcommN device is under
it, it gets deleted but rfcommN is still treated as alive.

This isn't supported.  sysfs doesn't allow parents to die first and the
dangling children to be salvaged using sysfs_move().

2. Which in turn exposes three bugs in sysfs
- sysfs_lookup() returning NULL on negative lookup.  sysfs
  code requires that all negative dentries are shot down.
  lookup should return -ENOENT instead of NULL.
- in move and rename, error handling is wrong.  It ends up
  passing ERR_PTR() values to dput().
- there was an extra dput() in sysfs_move_dir().

The attached patch fixes all sysfs bugs and removes the oops.  However,
rfcommX moving is still broken.  The rfcommX device won't be visible
from sysfs tree after the initial move failure and all following moves
will fail.

Please confirm the attached patch fixes the oops.  I'll separate it into
two patches and forward them to Greg.  But bluetooth code also needs to
be updated such that it moves the refcommX device before killing the
connection node.

Thanks.

-- 
tejun
diff --git a/drivers/ata/libata-pmp.c b/drivers/ata/libata-pmp.c
diff --git a/fs/dcache.c b/fs/dcache.c
diff --git a/fs/sysfs/dir.c b/fs/sysfs/dir.c
index 3371629..f281cc6 100644
--- a/fs/sysfs/dir.c
+++ b/fs/sysfs/dir.c
@@ -678,8 +678,10 @@ static struct dentry * sysfs_lookup(struct inode *dir, struct dentry *dentry,
 	sd = sysfs_find_dirent(parent_sd, dentry-d_name.name);
 
 	/* no such entry */
-	if (!sd)
+	if (!sd) {
+		ret = ERR_PTR(-ENOENT);
 		goto out_unlock;
+	}
 
 	/* attach dentry and inode */
 	inode = sysfs_get_inode(sd);
@@ -781,6 +783,7 @@ int sysfs_rename_dir(struct kobject * kobj, const char *new_name)
 	old_dentry = sysfs_get_dentry(sd);
 	if (IS_ERR(old_dentry)) {
 		error = PTR_ERR(old_dentry);
+		old_dentry = NULL;
 		goto out;
 	}
 
@@ -848,6 +851,7 @@ int sysfs_move_dir(struct kobject *kobj, struct kobject *new_parent_kobj)
 	old_dentry = sysfs_get_dentry(sd);
 	if (IS_ERR(old_dentry)) {
 		error = PTR_ERR(old_dentry);
+		old_dentry = NULL;
 		goto out;
 	}
 	old_parent = old_dentry-d_parent;
@@ -855,6 +859,7 @@ int sysfs_move_dir(struct kobject *kobj, struct kobject *new_parent_kobj)
 	new_parent = sysfs_get_dentry(new_parent_sd);
 	if (IS_ERR(new_parent)) {
 		error = PTR_ERR(new_parent);
+		new_parent = NULL;
 		goto out;
 	}
 
@@ -878,7 +883,6 @@ again:
 	error = 0;
 	d_add(new_dentry, NULL);
 	d_move(old_dentry, new_dentry);
-	dput(new_dentry);
 
 	/* Remove from old parent's list and insert into new parent's list. */
 	sysfs_unlink_sibling(sd);
diff --git a/include/linux/dcache.h b/include/linux/dcache.h


Re: [Bluez-devel] Oops involving RFCOMM and sysfs

2008-01-09 Thread Cornelia Huck
On Wed, 09 Jan 2008 18:16:02 +0900,
Tejun Heo [EMAIL PROTECTED] wrote:

 This isn't supported.  sysfs doesn't allow parents to die first and the
 dangling children to be salvaged using sysfs_move().

But (with the sysfs bugs fixed) it will return an error, won't it? It
seems the bluetooth code is rather optimistic with never checking
device_move()'s return code.

 
 2. Which in turn exposes three bugs in sysfs
   - sysfs_lookup() returning NULL on negative lookup.  sysfs
 code requires that all negative dentries are shot down.
 lookup should return -ENOENT instead of NULL.
   - in move and rename, error handling is wrong.  It ends up
 passing ERR_PTR() values to dput().
   - there was an extra dput() in sysfs_move_dir().

Ups, seem to have copied that from some old version of
sysfs_rename_dir()...

 
 The attached patch fixes all sysfs bugs and removes the oops. 

Looks OK at first glance.
--
To unsubscribe from this list: send the line unsubscribe linux-kernel in
the body of a message to [EMAIL PROTECTED]
More majordomo info at  http://vger.kernel.org/majordomo-info.html
Please read the FAQ at  http://www.tux.org/lkml/


Re: [Bluez-devel] Oops involving RFCOMM and sysfs

2008-01-09 Thread Dave Young
On Wed, Jan 09, 2008 at 06:16:02PM +0900, Tejun Heo wrote:
 Hello,
 
 My laptop and cell finally decided to talk to each other and I could
 reproduce the bug here.  The attached patch should remove the oops.
 
 The bug is two folded.  I just skimmed through the bluetooth code and am
 very likely to wrong at places.  Please correct me where I'm wrong.
 
 1. It's introduced by class device migration and the bug will go away
 with CONFIG_SYSFS_DEPRECATED turned on.  With CONFIG_SYSFS_DEPRECATED
 turned off, what used to be class devices live under actual devices.
 For rfcomm, this means the rfcommN tty device now lives under the
 acl node (probably represents an active connection?) instead of the
 class directory.
 
 It seems rfcommN devices are supposed to survive over disconnects so the
 rfcommN device moves under the live connection while connection is alive
 and retreats back to a default directory when the connection is lost.
 This is all fine and dandy as long as the rfcommN device lives under
 class directory as class directory never goes away.
 
 However, with recent sysfs updates, rfcommN now lives directly under the
 acl node.  If the connection goes away while rfcommN device is under
 it, it gets deleted but rfcommN is still treated as alive.
 
 This isn't supported.  sysfs doesn't allow parents to die first and the
 dangling children to be salvaged using sysfs_move().
 
 2. Which in turn exposes three bugs in sysfs
   - sysfs_lookup() returning NULL on negative lookup.  sysfs
 code requires that all negative dentries are shot down.
 lookup should return -ENOENT instead of NULL.
   - in move and rename, error handling is wrong.  It ends up
 passing ERR_PTR() values to dput().
   - there was an extra dput() in sysfs_move_dir().
 
 The attached patch fixes all sysfs bugs and removes the oops.  However,
 rfcommX moving is still broken.  The rfcommX device won't be visible
 from sysfs tree after the initial move failure and all following moves
 will fail.
 
 Please confirm the attached patch fixes the oops.  I'll separate it into
 two patches and forward them to Greg.  But bluetooth code also needs to
 be updated such that it moves the refcommX device before killing the
 connection node.
For bluetooth device_move, the only child device of hci_conn dev is the rfcomm 
tty_dev. How about the following patch, please verify :

---
net/bluetooth/hci_sysfs.c |7 +++
1 file changed, 7 insertions(+)

diff -upr linux/net/bluetooth/hci_sysfs.c linux.new/net/bluetooth/hci_sysfs.c
--- linux/net/bluetooth/hci_sysfs.c 2008-01-10 09:01:51.0 +0800
+++ linux.new/net/bluetooth/hci_sysfs.c 2008-01-10 09:01:22.0 +0800
@@ -316,9 +316,16 @@ void hci_conn_add_sysfs(struct hci_conn 
schedule_work(conn-work);
 }
 
+static int hci_conn_move_child(struct device *dev, void *data)
+{
+   device_move(dev, NULL);
+   return 0;
+}
+
 static void del_conn(struct work_struct *work)
 {
struct hci_conn *conn = container_of(work, struct hci_conn, work);
+   device_for_each_child(conn-dev, NULL, hci_conn_move_child);
device_del(conn-dev);
put_device(conn-dev);
 }

 
 Thanks.
 
 -- 
 tejun

 diff --git a/drivers/ata/libata-pmp.c b/drivers/ata/libata-pmp.c
 diff --git a/fs/dcache.c b/fs/dcache.c
 diff --git a/fs/sysfs/dir.c b/fs/sysfs/dir.c
 index 3371629..f281cc6 100644
 --- a/fs/sysfs/dir.c
 +++ b/fs/sysfs/dir.c
 @@ -678,8 +678,10 @@ static struct dentry * sysfs_lookup(struct inode *dir, 
 struct dentry *dentry,
   sd = sysfs_find_dirent(parent_sd, dentry-d_name.name);
  
   /* no such entry */
 - if (!sd)
 + if (!sd) {
 + ret = ERR_PTR(-ENOENT);
   goto out_unlock;
 + }
  
   /* attach dentry and inode */
   inode = sysfs_get_inode(sd);
 @@ -781,6 +783,7 @@ int sysfs_rename_dir(struct kobject * kobj, const char 
 *new_name)
   old_dentry = sysfs_get_dentry(sd);
   if (IS_ERR(old_dentry)) {
   error = PTR_ERR(old_dentry);
 + old_dentry = NULL;
   goto out;
   }
  
 @@ -848,6 +851,7 @@ int sysfs_move_dir(struct kobject *kobj, struct kobject 
 *new_parent_kobj)
   old_dentry = sysfs_get_dentry(sd);
   if (IS_ERR(old_dentry)) {
   error = PTR_ERR(old_dentry);
 + old_dentry = NULL;
   goto out;
   }
   old_parent = old_dentry-d_parent;
 @@ -855,6 +859,7 @@ int sysfs_move_dir(struct kobject *kobj, struct kobject 
 *new_parent_kobj)
   new_parent = sysfs_get_dentry(new_parent_sd);
   if (IS_ERR(new_parent)) {
   error = PTR_ERR(new_parent);
 + new_parent = NULL;
   goto out;
   }
  
 @@ -878,7 +883,6 @@ again:
   error = 0;
   d_add(new_dentry, NULL);
   d_move(old_dentry, new_dentry);
 - dput(new_dentry);
  
   /* Remove from old parent's list and insert into new parent's list. */
   sysfs_unlink_sibling(sd);
 

Re: [Bluez-devel] Oops involving RFCOMM and sysfs

2008-01-08 Thread Gabor Gombas
On Tue, Jan 08, 2008 at 06:42:43PM +0900, Tejun Heo wrote:

> Thanks.  Please apply the attached patch and report the oops.

Jan  8 14:23:29 twister kernel: XXX: moving /devices/virtual/tty/rfcomm0 under 
/devices/pci:00/:00:02.0/usb1/1-3/1-3:1.0/hci0/acl001BAFE1624D/tty
Jan  8 14:23:29 twister kernel: XXX: looking up / (8100bf40) / devices
Jan  8 14:23:29 twister kernel: XXX: looking up devices (8100bf658cc0) / 
virtual
Jan  8 14:23:29 twister kernel: XXX: looking up virtual (8100bf417220) / tty
Jan  8 14:23:29 twister kernel: XXX: looking up tty (8100bb31c440) / rfcomm0
Jan  8 14:23:29 twister kernel: XXX: looking up / (8100bf40) / devices
Jan  8 14:23:29 twister kernel: XXX: looking up devices (8100bf658cc0) / 
pci:00
Jan  8 14:23:29 twister kernel: XXX: looking up pci:00 (8100bf658880) / 
:00:02.0
Jan  8 14:23:29 twister kernel: XXX: looking up :00:02.0 (8100bb2abcc0) 
/ usb1
Jan  8 14:23:29 twister kernel: XXX: looking up usb1 (8100bb306000) / 1-3
Jan  8 14:23:29 twister kernel: XXX: looking up 1-3 (8100bb309220) / 1-3:1.0
Jan  8 14:23:29 twister kernel: XXX: looking up 1-3:1.0 (8100bb309880) / 
hci0
Jan  8 14:23:29 twister kernel: XXX: looking up hci0 (8100bb318aa0) / 
acl001BAFE1624D
Jan  8 14:23:29 twister kernel: XXX: looking up acl001BAFE1624D 
(8100b555a000) / tty
Jan  8 14:23:52 twister kernel: XXX: moving 
/devices/pci:00/:00:02.0/usb1/1-3/1-3:1.0/hci0/acl001BAFE1624D/tty/rfcomm0
 under /devices/virtual/tty
Jan  8 14:23:52 twister kernel: XXX: looking up / (8100bf40) / devices
Jan  8 14:23:52 twister kernel: XXX: looking up devices (8100bf658cc0) / 
pci:00
Jan  8 14:23:52 twister kernel: XXX: looking up pci:00 (8100bf658880) / 
:00:02.0
Jan  8 14:23:52 twister kernel: XXX: looking up :00:02.0 (8100bb2abcc0) 
/ usb1
Jan  8 14:23:52 twister kernel: XXX: looking up usb1 (8100bb306000) / 1-3
Jan  8 14:23:52 twister kernel: XXX: looking up 1-3 (8100bb309220) / 1-3:1.0
Jan  8 14:23:52 twister kernel: XXX: looking up 1-3:1.0 (8100bb309880) / 
hci0
Jan  8 14:23:52 twister kernel: XXX: looking up hci0 (8100bb318aa0) / 
acl001BAFE1624D
Jan  8 14:23:52 twister kernel: XXX: looking up acl001BAFE1624D 
() / 
Jan  8 14:23:52 twister kernel: Unable to handle kernel NULL pointer 
dereference at 00b8 RIP: 
Jan  8 14:23:52 twister kernel:  [] mutex_lock+0x10/0x1d
Jan  8 14:23:52 twister kernel: PGD b19b6067 PUD b1969067 PMD 0 
Jan  8 14:23:52 twister kernel: Oops: 0002 [1] 
Jan  8 14:23:52 twister kernel: CPU 0 
Jan  8 14:23:52 twister kernel: Modules linked in: binfmt_misc rfcomm l2cap 
nfsd auth_rpcgss exportfs ipt_REJECT xt_tcpudp ipt_LOG xt_limit iptable_filter 
ip_tables x_tables nfs lockd nfs_acl sunrpc fuse dm_crypt saa7134_alsa radeon 
hwmon_vid eeprom hci_usb bluetooth usb_storage tuner tea5767 tda8290 
tuner_simple mt20xx tea5761 snd_intel8x0 sg snd_ac97_codec ac97_bus saa7134 
firewire_ohci firewire_core crc_itu_t videobuf_dma_sg videobuf_core ir_kbd_i2c 
snd_pcm ir_common sky2 forcedeth ehci_hcd snd_timer snd_page_alloc ohci_hcd 
sr_mod parport_pc parport cdrom floppy
Jan  8 14:23:52 twister kernel: Pid: 3228, comm: cat Not tainted 
2.6.24-rc6debug-dirty #7
Jan  8 14:23:52 twister kernel: RIP: 0010:[]  
[] mutex_lock+0x10/0x1d
Jan  8 14:23:52 twister kernel: RSP: 0018:8100b19a5d08  EFLAGS: 00010246
Jan  8 14:23:52 twister kernel: RAX:  RBX: 00b8 
RCX: 00010352
Jan  8 14:23:52 twister kernel: RDX: 8100b19a5fd8 RSI: 00010352 
RDI: 00b8
Jan  8 14:23:52 twister kernel: RBP: 80593db0 R08:  
R09: 0097
Jan  8 14:23:52 twister kernel: R10: 000e R11: 803103d4 
R12: 8100ba312b90
Jan  8 14:23:52 twister kernel: R13: 8100bfa78d00 R14: 8100b1980b00 
R15: 8100b6c6b840
Jan  8 14:23:52 twister kernel: FS:  2b3bd213d6e0() 
GS:805ba000() knlGS:
Jan  8 14:23:52 twister kernel: CS:  0010 DS:  ES:  CR0: 
8005003b
Jan  8 14:23:52 twister kernel: CR2: 00b8 CR3: b19ae000 
CR4: 06e0
Jan  8 14:23:52 twister kernel: DR0:  DR1:  
DR2: 
Jan  8 14:23:52 twister kernel: DR3:  DR6: 0ff0 
DR7: 0400
Jan  8 14:23:52 twister kernel: Process cat (pid: 3228, threadinfo 
8100b19a4000, task 8100b18d9060)
Jan  8 14:23:52 twister kernel: Stack:  8100bfa78d00 8028560d 
8100b554ca28 802ae9d5
Jan  8 14:23:52 twister kernel:  8100bf9f4fa0 8100ba312b90 
8100b5326c60 802af2de
Jan  8 14:23:52 twister kernel:  8100b6ca4cd0 8100b6ca4cd0 
8100bfa78d00 8100b5326ba0
Jan  8 14:23:52 twister kernel: Call Trace:
Jan  8 14:23:52 twister kernel:  [] dput+0x26/0x103
Jan  8 14:23:52 twister kernel:  [] 

Re: [Bluez-devel] Oops involving RFCOMM and sysfs

2008-01-08 Thread Tejun Heo
Gabor Gombas wrote:
> On Tue, Jan 08, 2008 at 12:24:05AM +0900, Tejun Heo wrote:
> 
>> Does the attached patch fix the problem?
> 
> No, it still oopses.

Thanks.  Please apply the attached patch and report the oops.

-- 
tejun
diff --git a/drivers/ata/libata-eh.c b/drivers/ata/libata-eh.c
diff --git a/fs/dcache.c b/fs/dcache.c
diff --git a/fs/sysfs/dir.c b/fs/sysfs/dir.c
index 3371629..f5ec27e 100644
--- a/fs/sysfs/dir.c
+++ b/fs/sysfs/dir.c
@@ -96,7 +96,7 @@ static void sysfs_unlink_sibling(struct sysfs_dirent *sd)
  *	RETURNS:
  *	Pointer to found dentry on success, ERR_PTR() value on error.
  */
-struct dentry *sysfs_get_dentry(struct sysfs_dirent *sd)
+static struct dentry *__sysfs_get_dentry(struct sysfs_dirent *sd, int dbg)
 {
 	struct dentry *dentry = dget(sysfs_sb->s_root);
 
@@ -111,6 +111,8 @@ struct dentry *sysfs_get_dentry(struct sysfs_dirent *sd)
 
 		/* look it up */
 		parent = dentry;
+		printk("XXX: looking up %s (%p) / %s\n",
+		   parent->d_name.name, parent->d_inode, cur->s_name);
 		mutex_lock(>d_inode->i_mutex);
 		dentry = lookup_one_noperm(cur->s_name, parent);
 		mutex_unlock(>d_inode->i_mutex);
@@ -122,6 +124,11 @@ struct dentry *sysfs_get_dentry(struct sysfs_dirent *sd)
 	return dentry;
 }
 
+struct dentry *sysfs_get_dentry(struct sysfs_dirent *sd)
+{
+	return __sysfs_get_dentry(sd, 0);
+}
+
 /**
  *	sysfs_get_active - get an active reference to sysfs_dirent
  *	@sd: sysfs_dirent to get an active reference to
@@ -838,6 +845,14 @@ int sysfs_move_dir(struct kobject *kobj, struct kobject *new_parent_kobj)
 
 	mutex_lock(_rename_mutex);
 	BUG_ON(!sd->s_parent);
+	{
+		char *s = kobject_get_path(kobj, GFP_KERNEL);
+		char *p = kobject_get_path(new_parent_kobj, GFP_KERNEL);
+
+		printk("XXX: moving %s under %s\n", s, p);
+		kfree(s);
+		kfree(p);
+	}
 	new_parent_sd = new_parent_kobj->sd ? new_parent_kobj->sd : _root;
 
 	error = 0;
@@ -845,14 +860,14 @@ int sysfs_move_dir(struct kobject *kobj, struct kobject *new_parent_kobj)
 		goto out;	/* nothing to move */
 
 	/* get dentries */
-	old_dentry = sysfs_get_dentry(sd);
+	old_dentry = __sysfs_get_dentry(sd, 1);
 	if (IS_ERR(old_dentry)) {
 		error = PTR_ERR(old_dentry);
 		goto out;
 	}
 	old_parent = old_dentry->d_parent;
 
-	new_parent = sysfs_get_dentry(new_parent_sd);
+	new_parent = __sysfs_get_dentry(new_parent_sd, 1);
 	if (IS_ERR(new_parent)) {
 		error = PTR_ERR(new_parent);
 		goto out;
@@ -878,7 +893,6 @@ again:
 	error = 0;
 	d_add(new_dentry, NULL);
 	d_move(old_dentry, new_dentry);
-	dput(new_dentry);
 
 	/* Remove from old parent's list and insert into new parent's list. */
 	sysfs_unlink_sibling(sd);


Re: [Bluez-devel] Oops involving RFCOMM and sysfs

2008-01-08 Thread Tejun Heo
Gabor Gombas wrote:
 On Tue, Jan 08, 2008 at 12:24:05AM +0900, Tejun Heo wrote:
 
 Does the attached patch fix the problem?
 
 No, it still oopses.

Thanks.  Please apply the attached patch and report the oops.

-- 
tejun
diff --git a/drivers/ata/libata-eh.c b/drivers/ata/libata-eh.c
diff --git a/fs/dcache.c b/fs/dcache.c
diff --git a/fs/sysfs/dir.c b/fs/sysfs/dir.c
index 3371629..f5ec27e 100644
--- a/fs/sysfs/dir.c
+++ b/fs/sysfs/dir.c
@@ -96,7 +96,7 @@ static void sysfs_unlink_sibling(struct sysfs_dirent *sd)
  *	RETURNS:
  *	Pointer to found dentry on success, ERR_PTR() value on error.
  */
-struct dentry *sysfs_get_dentry(struct sysfs_dirent *sd)
+static struct dentry *__sysfs_get_dentry(struct sysfs_dirent *sd, int dbg)
 {
 	struct dentry *dentry = dget(sysfs_sb-s_root);
 
@@ -111,6 +111,8 @@ struct dentry *sysfs_get_dentry(struct sysfs_dirent *sd)
 
 		/* look it up */
 		parent = dentry;
+		printk(XXX: looking up %s (%p) / %s\n,
+		   parent-d_name.name, parent-d_inode, cur-s_name);
 		mutex_lock(parent-d_inode-i_mutex);
 		dentry = lookup_one_noperm(cur-s_name, parent);
 		mutex_unlock(parent-d_inode-i_mutex);
@@ -122,6 +124,11 @@ struct dentry *sysfs_get_dentry(struct sysfs_dirent *sd)
 	return dentry;
 }
 
+struct dentry *sysfs_get_dentry(struct sysfs_dirent *sd)
+{
+	return __sysfs_get_dentry(sd, 0);
+}
+
 /**
  *	sysfs_get_active - get an active reference to sysfs_dirent
  *	@sd: sysfs_dirent to get an active reference to
@@ -838,6 +845,14 @@ int sysfs_move_dir(struct kobject *kobj, struct kobject *new_parent_kobj)
 
 	mutex_lock(sysfs_rename_mutex);
 	BUG_ON(!sd-s_parent);
+	{
+		char *s = kobject_get_path(kobj, GFP_KERNEL);
+		char *p = kobject_get_path(new_parent_kobj, GFP_KERNEL);
+
+		printk(XXX: moving %s under %s\n, s, p);
+		kfree(s);
+		kfree(p);
+	}
 	new_parent_sd = new_parent_kobj-sd ? new_parent_kobj-sd : sysfs_root;
 
 	error = 0;
@@ -845,14 +860,14 @@ int sysfs_move_dir(struct kobject *kobj, struct kobject *new_parent_kobj)
 		goto out;	/* nothing to move */
 
 	/* get dentries */
-	old_dentry = sysfs_get_dentry(sd);
+	old_dentry = __sysfs_get_dentry(sd, 1);
 	if (IS_ERR(old_dentry)) {
 		error = PTR_ERR(old_dentry);
 		goto out;
 	}
 	old_parent = old_dentry-d_parent;
 
-	new_parent = sysfs_get_dentry(new_parent_sd);
+	new_parent = __sysfs_get_dentry(new_parent_sd, 1);
 	if (IS_ERR(new_parent)) {
 		error = PTR_ERR(new_parent);
 		goto out;
@@ -878,7 +893,6 @@ again:
 	error = 0;
 	d_add(new_dentry, NULL);
 	d_move(old_dentry, new_dentry);
-	dput(new_dentry);
 
 	/* Remove from old parent's list and insert into new parent's list. */
 	sysfs_unlink_sibling(sd);


Re: [Bluez-devel] Oops involving RFCOMM and sysfs

2008-01-08 Thread Gabor Gombas
On Tue, Jan 08, 2008 at 06:42:43PM +0900, Tejun Heo wrote:

 Thanks.  Please apply the attached patch and report the oops.

Jan  8 14:23:29 twister kernel: XXX: moving /devices/virtual/tty/rfcomm0 under 
/devices/pci:00/:00:02.0/usb1/1-3/1-3:1.0/hci0/acl001BAFE1624D/tty
Jan  8 14:23:29 twister kernel: XXX: looking up / (8100bf40) / devices
Jan  8 14:23:29 twister kernel: XXX: looking up devices (8100bf658cc0) / 
virtual
Jan  8 14:23:29 twister kernel: XXX: looking up virtual (8100bf417220) / tty
Jan  8 14:23:29 twister kernel: XXX: looking up tty (8100bb31c440) / rfcomm0
Jan  8 14:23:29 twister kernel: XXX: looking up / (8100bf40) / devices
Jan  8 14:23:29 twister kernel: XXX: looking up devices (8100bf658cc0) / 
pci:00
Jan  8 14:23:29 twister kernel: XXX: looking up pci:00 (8100bf658880) / 
:00:02.0
Jan  8 14:23:29 twister kernel: XXX: looking up :00:02.0 (8100bb2abcc0) 
/ usb1
Jan  8 14:23:29 twister kernel: XXX: looking up usb1 (8100bb306000) / 1-3
Jan  8 14:23:29 twister kernel: XXX: looking up 1-3 (8100bb309220) / 1-3:1.0
Jan  8 14:23:29 twister kernel: XXX: looking up 1-3:1.0 (8100bb309880) / 
hci0
Jan  8 14:23:29 twister kernel: XXX: looking up hci0 (8100bb318aa0) / 
acl001BAFE1624D
Jan  8 14:23:29 twister kernel: XXX: looking up acl001BAFE1624D 
(8100b555a000) / tty
Jan  8 14:23:52 twister kernel: XXX: moving 
/devices/pci:00/:00:02.0/usb1/1-3/1-3:1.0/hci0/acl001BAFE1624D/tty/rfcomm0
 under /devices/virtual/tty
Jan  8 14:23:52 twister kernel: XXX: looking up / (8100bf40) / devices
Jan  8 14:23:52 twister kernel: XXX: looking up devices (8100bf658cc0) / 
pci:00
Jan  8 14:23:52 twister kernel: XXX: looking up pci:00 (8100bf658880) / 
:00:02.0
Jan  8 14:23:52 twister kernel: XXX: looking up :00:02.0 (8100bb2abcc0) 
/ usb1
Jan  8 14:23:52 twister kernel: XXX: looking up usb1 (8100bb306000) / 1-3
Jan  8 14:23:52 twister kernel: XXX: looking up 1-3 (8100bb309220) / 1-3:1.0
Jan  8 14:23:52 twister kernel: XXX: looking up 1-3:1.0 (8100bb309880) / 
hci0
Jan  8 14:23:52 twister kernel: XXX: looking up hci0 (8100bb318aa0) / 
acl001BAFE1624D
Jan  8 14:23:52 twister kernel: XXX: looking up acl001BAFE1624D 
() / 
Jan  8 14:23:52 twister kernel: Unable to handle kernel NULL pointer 
dereference at 00b8 RIP: 
Jan  8 14:23:52 twister kernel:  [80473875] mutex_lock+0x10/0x1d
Jan  8 14:23:52 twister kernel: PGD b19b6067 PUD b1969067 PMD 0 
Jan  8 14:23:52 twister kernel: Oops: 0002 [1] 
Jan  8 14:23:52 twister kernel: CPU 0 
Jan  8 14:23:52 twister kernel: Modules linked in: binfmt_misc rfcomm l2cap 
nfsd auth_rpcgss exportfs ipt_REJECT xt_tcpudp ipt_LOG xt_limit iptable_filter 
ip_tables x_tables nfs lockd nfs_acl sunrpc fuse dm_crypt saa7134_alsa radeon 
hwmon_vid eeprom hci_usb bluetooth usb_storage tuner tea5767 tda8290 
tuner_simple mt20xx tea5761 snd_intel8x0 sg snd_ac97_codec ac97_bus saa7134 
firewire_ohci firewire_core crc_itu_t videobuf_dma_sg videobuf_core ir_kbd_i2c 
snd_pcm ir_common sky2 forcedeth ehci_hcd snd_timer snd_page_alloc ohci_hcd 
sr_mod parport_pc parport cdrom floppy
Jan  8 14:23:52 twister kernel: Pid: 3228, comm: cat Not tainted 
2.6.24-rc6debug-dirty #7
Jan  8 14:23:52 twister kernel: RIP: 0010:[80473875]  
[80473875] mutex_lock+0x10/0x1d
Jan  8 14:23:52 twister kernel: RSP: 0018:8100b19a5d08  EFLAGS: 00010246
Jan  8 14:23:52 twister kernel: RAX:  RBX: 00b8 
RCX: 00010352
Jan  8 14:23:52 twister kernel: RDX: 8100b19a5fd8 RSI: 00010352 
RDI: 00b8
Jan  8 14:23:52 twister kernel: RBP: 80593db0 R08:  
R09: 0097
Jan  8 14:23:52 twister kernel: R10: 000e R11: 803103d4 
R12: 8100ba312b90
Jan  8 14:23:52 twister kernel: R13: 8100bfa78d00 R14: 8100b1980b00 
R15: 8100b6c6b840
Jan  8 14:23:52 twister kernel: FS:  2b3bd213d6e0() 
GS:805ba000() knlGS:
Jan  8 14:23:52 twister kernel: CS:  0010 DS:  ES:  CR0: 
8005003b
Jan  8 14:23:52 twister kernel: CR2: 00b8 CR3: b19ae000 
CR4: 06e0
Jan  8 14:23:52 twister kernel: DR0:  DR1:  
DR2: 
Jan  8 14:23:52 twister kernel: DR3:  DR6: 0ff0 
DR7: 0400
Jan  8 14:23:52 twister kernel: Process cat (pid: 3228, threadinfo 
8100b19a4000, task 8100b18d9060)
Jan  8 14:23:52 twister kernel: Stack:  8100bfa78d00 8028560d 
8100b554ca28 802ae9d5
Jan  8 14:23:52 twister kernel:  8100bf9f4fa0 8100ba312b90 
8100b5326c60 802af2de
Jan  8 14:23:52 twister kernel:  8100b6ca4cd0 8100b6ca4cd0 
8100bfa78d00 8100b5326ba0
Jan  8 14:23:52 twister kernel: Call Trace:
Jan  8 14:23:52 twister kernel:  [8028560d] 

Re: [Bluez-devel] Oops involving RFCOMM and sysfs

2008-01-07 Thread Gabor Gombas
On Tue, Jan 08, 2008 at 12:24:05AM +0900, Tejun Heo wrote:

> Does the attached patch fix the problem?

No, it still oopses.

Gabor

-- 
 -
 MTA SZTAKI Computer and Automation Research Institute
Hungarian Academy of Sciences
 -
--
To unsubscribe from this list: send the line "unsubscribe linux-kernel" in
the body of a message to [EMAIL PROTECTED]
More majordomo info at  http://vger.kernel.org/majordomo-info.html
Please read the FAQ at  http://www.tux.org/lkml/


Re: [Bluez-devel] Oops involving RFCOMM and sysfs

2008-01-07 Thread Tejun Heo
Gabor Gombas wrote:
> Hi,
> 
> On Sat, Jan 05, 2008 at 07:50:39AM +, Al Viro wrote:
> 
>> Could you stick
>>  if (!parent->d_inode)
>>  printk(KERN_WARNING "sysfs locking blows: %s",
>>  parent->d_name.name);
>> right before
>> mutex_lock(>d_inode->i_mutex);
>> dentry = lookup_one_noperm(cur->s_name, parent);
>> mutex_unlock(>d_inode->i_mutex);
>> in sysfs_get_dentry() (fs/sysfs/dir.c) and verify that it does, indeed,
>> trigger?
> 
> Here it is:
> 
> Jan  7 14:35:43 twister kernel: sysfs locking blows: acl001BAFE1624D<1>Unable 
> to handle kernel NULL pointer dereference at 00b8 RIP: 

Does the attached patch fix the problem?

-- 
tejun
diff --git a/fs/sysfs/dir.c b/fs/sysfs/dir.c
index 3371629..4e7f3bf 100644
--- a/fs/sysfs/dir.c
+++ b/fs/sysfs/dir.c
@@ -878,7 +878,6 @@ again:
 	error = 0;
 	d_add(new_dentry, NULL);
 	d_move(old_dentry, new_dentry);
-	dput(new_dentry);
 
 	/* Remove from old parent's list and insert into new parent's list. */
 	sysfs_unlink_sibling(sd);


Re: [Bluez-devel] Oops involving RFCOMM and sysfs

2008-01-07 Thread Gabor Gombas
Hi,

On Sat, Jan 05, 2008 at 07:50:39AM +, Al Viro wrote:

> Could you stick
>   if (!parent->d_inode)
>   printk(KERN_WARNING "sysfs locking blows: %s",
>   parent->d_name.name);
> right before
> mutex_lock(>d_inode->i_mutex);
> dentry = lookup_one_noperm(cur->s_name, parent);
> mutex_unlock(>d_inode->i_mutex);
> in sysfs_get_dentry() (fs/sysfs/dir.c) and verify that it does, indeed,
> trigger?

Here it is:

Jan  7 14:35:43 twister kernel: sysfs locking blows: acl001BAFE1624D<1>Unable 
to handle kernel NULL pointer dereference at 00b8 RIP: 
Jan  7 14:35:43 twister kernel:  [] mutex_lock+0x10/0x1d
Jan  7 14:35:43 twister kernel: PGD b6031067 PUD b609c067 PMD 0 
Jan  7 14:35:43 twister kernel: Oops: 0002 [1] 
Jan  7 14:35:43 twister kernel: CPU 0 
Jan  7 14:35:43 twister kernel: Modules linked in: binfmt_misc rfcomm l2cap 
nfsd auth_rpcgss exportfs ipt_REJECT xt_tcpudp ipt_LOG xt_limit iptable_filter 
ip_tables x_tables nfs lockd nfs_acl sunrpc fuse dm_crypt saa7134_alsa radeon 
hwmon_vid eeprom hci_usb bluetooth usb_storage tuner tea5767 tda8290 
tuner_simple mt20xx tea5761 parport_pc parport floppy snd_intel8x0 
snd_ac97_codec saa7134 ac97_bus firewire_ohci firewire_core videobuf_dma_sg 
videobuf_core ir_kbd_i2c sky2 ir_common crc_itu_t snd_pcm forcedeth snd_timer 
snd_page_alloc ehci_hcd ohci_hcd sg sr_mod cdrom
Jan  7 14:35:43 twister kernel: Pid: 3218, comm: cat Not tainted 
2.6.24-rc6debug-dirty #5
Jan  7 14:35:43 twister kernel: RIP: 0010:[]  
[] mutex_lock+0x10/0x1d
Jan  7 14:35:43 twister kernel: RSP: 0018:8100b5733d08  EFLAGS: 00010246
Jan  7 14:35:43 twister kernel: RAX:  RBX: 00b8 
RCX: 0001
Jan  7 14:35:43 twister kernel: RDX: 8100b5733fd8 RSI: 8100bfab6000 
RDI: 00b8
Jan  7 14:35:43 twister kernel: RBP: 80593db0 R08: 8057b118 
R09: 806093d0
Jan  7 14:35:43 twister kernel: R10: 8100b6074aa0 R11: 81000101b100 
R12: 8100b6074aa0
Jan  7 14:35:43 twister kernel: R13: fff4 R14: 8100b571fb80 
R15: 8100b573e000
Jan  7 14:35:43 twister kernel: FS:  2aead9fc56e0() 
GS:805ba000() knlGS:
Jan  7 14:35:43 twister kernel: CS:  0010 DS:  ES:  CR0: 
8005003b
Jan  7 14:35:43 twister kernel: CR2: 00b8 CR3: b60b2000 
CR4: 06e0
Jan  7 14:35:43 twister kernel: DR0:  DR1:  
DR2: 
Jan  7 14:35:43 twister kernel: DR3:  DR6: 0ff0 
DR7: 0400
Jan  7 14:35:43 twister kernel: Process cat (pid: 3218, threadinfo 
8100b5732000, task 8100bfa70830)
Jan  7 14:35:43 twister kernel: Stack:  e207e310 8028560d 
8100b202a7d0 802af076
Jan  7 14:35:43 twister kernel:  8100bfa52f00 8100b6074aa0 
8100b618c720 802af28b
Jan  7 14:35:43 twister kernel:  8100b61308d0 8100b61308d0 
8100bfa1a800 8100b618c720
Jan  7 14:35:43 twister kernel: Call Trace:
Jan  7 14:35:43 twister kernel:  [] dput+0x26/0x103
Jan  7 14:35:43 twister kernel:  [] sysfs_get_dentry+0x5e/0xa8
Jan  7 14:35:43 twister kernel:  [] sysfs_move_dir+0x63/0x204
Jan  7 14:35:43 twister kernel:  [] kobject_move+0xba/0x110
Jan  7 14:35:43 twister kernel:  [] device_move+0x59/0x111
Jan  7 14:35:43 twister kernel:  [] 
:rfcomm:rfcomm_tty_close+0x2f/0x74
Jan  7 14:35:43 twister kernel:  [] release_dev+0x212/0x5e2
Jan  7 14:35:43 twister kernel:  [] do_page_fault+0x2ff/0x65a
Jan  7 14:35:43 twister kernel:  [] tty_release+0xc/0x10
Jan  7 14:35:43 twister kernel:  [] __fput+0xb1/0x16f
Jan  7 14:35:43 twister kernel:  [] filp_close+0x5d/0x65
Jan  7 14:35:43 twister kernel:  [] sys_close+0x73/0xa6
Jan  7 14:35:43 twister kernel:  [] tracesys+0xdc/0xe1
Jan  7 14:35:43 twister kernel: 
Jan  7 14:35:43 twister kernel: 
Jan  7 14:35:43 twister kernel: Code: ff 0f 79 05 e8 c9 00 00 00 58 5a 5b c3 41 
54 48 8d 47 08 48 
Jan  7 14:35:43 twister kernel: RIP  [] mutex_lock+0x10/0x1d
Jan  7 14:35:43 twister kernel:  RSP 
Jan  7 14:35:43 twister kernel: CR2: 00b8
Jan  7 14:35:43 twister kernel: ---[ end trace 0e3a1176e9294792 ]---

Gabor

-- 
 -
 MTA SZTAKI Computer and Automation Research Institute
Hungarian Academy of Sciences
 -
--
To unsubscribe from this list: send the line "unsubscribe linux-kernel" in
the body of a message to [EMAIL PROTECTED]
More majordomo info at  http://vger.kernel.org/majordomo-info.html
Please read the FAQ at  http://www.tux.org/lkml/


Re: [Bluez-devel] Oops involving RFCOMM and sysfs

2008-01-07 Thread Gabor Gombas
Hi,

On Fri, Jan 04, 2008 at 09:05:42AM +0800, Dave Young wrote:

> Could you try the patch (on 2.6.24-rc6) following and check the debug 
> messages? 
> 
> diff -upr linux/net/bluetooth/rfcomm/tty.c 
> linux.new/net/bluetooth/rfcomm/tty.c
> --- linux/net/bluetooth/rfcomm/tty.c  2008-01-04 08:58:48.0 +0800
> +++ linux.new/net/bluetooth/rfcomm/tty.c  2008-01-04 09:01:01.0 
> +0800
> @@ -313,6 +313,7 @@ static void rfcomm_dev_del(struct rfcomm
>  {
>   BT_DBG("dev %p", dev);
>  
> + dump_stack();
>   set_bit(RFCOMM_TTY_RELEASED, >flags);
>   rfcomm_dev_put(dev);
>  }

This did not trigger.

Gabor

-- 
 -
 MTA SZTAKI Computer and Automation Research Institute
Hungarian Academy of Sciences
 -
--
To unsubscribe from this list: send the line "unsubscribe linux-kernel" in
the body of a message to [EMAIL PROTECTED]
More majordomo info at  http://vger.kernel.org/majordomo-info.html
Please read the FAQ at  http://www.tux.org/lkml/


Re: [Bluez-devel] Oops involving RFCOMM and sysfs

2008-01-07 Thread Eric W. Biederman
Al Viro <[EMAIL PROTECTED]> writes:

> On Mon, Jan 07, 2008 at 06:18:21PM +0900, Tejun Heo wrote:
>> Tejun Heo wrote:
>> > Eric W. Biederman wrote:
>> >>> That said, the mechanism is a bit too fragile.  sysfs currently ensures
>> >>> that dentry/inode point to the associated sysfs_dirent.  This is mainly
>> >>> remanent of conversion from previous VFS based implementation.  I think
>> >>> the right thing to do here is to make sysfs behave like other proper
>> >>> distributed filesystems using d_revalidate.
>> >> Huh?  We still need something like sysfs_get_dentry to find the dentries
>> >> for the rename or move operation.  So we can call d_move.  
>> > 
>> > Ah... right.  Thanks.  :-)
>> 
>> On the second thought, can't those too be dealt with d_revalidate?
>
> FVO "dealt with" as pleasant and efficient as using coarse whetstone
> to deal with caries.

Or to say it another way.  The linux VFS requires dentries to be
preserved and used as long as we can.  dropping them early causes
some weird nasties to show up, in particular it totally messes up
mounting other filesystems on top.

Eric
--
To unsubscribe from this list: send the line "unsubscribe linux-kernel" in
the body of a message to [EMAIL PROTECTED]
More majordomo info at  http://vger.kernel.org/majordomo-info.html
Please read the FAQ at  http://www.tux.org/lkml/


Re: [Bluez-devel] Oops involving RFCOMM and sysfs

2008-01-07 Thread Al Viro
On Mon, Jan 07, 2008 at 06:18:21PM +0900, Tejun Heo wrote:
> Tejun Heo wrote:
> > Eric W. Biederman wrote:
> >>> That said, the mechanism is a bit too fragile.  sysfs currently ensures
> >>> that dentry/inode point to the associated sysfs_dirent.  This is mainly
> >>> remanent of conversion from previous VFS based implementation.  I think
> >>> the right thing to do here is to make sysfs behave like other proper
> >>> distributed filesystems using d_revalidate.
> >> Huh?  We still need something like sysfs_get_dentry to find the dentries
> >> for the rename or move operation.  So we can call d_move.  
> > 
> > Ah... right.  Thanks.  :-)
> 
> On the second thought, can't those too be dealt with d_revalidate?

FVO "dealt with" as pleasant and efficient as using coarse whetstone
to deal with caries.
--
To unsubscribe from this list: send the line "unsubscribe linux-kernel" in
the body of a message to [EMAIL PROTECTED]
More majordomo info at  http://vger.kernel.org/majordomo-info.html
Please read the FAQ at  http://www.tux.org/lkml/


Re: [Bluez-devel] Oops involving RFCOMM and sysfs

2008-01-07 Thread Tejun Heo
Tejun Heo wrote:
> Eric W. Biederman wrote:
>>> That said, the mechanism is a bit too fragile.  sysfs currently ensures
>>> that dentry/inode point to the associated sysfs_dirent.  This is mainly
>>> remanent of conversion from previous VFS based implementation.  I think
>>> the right thing to do here is to make sysfs behave like other proper
>>> distributed filesystems using d_revalidate.
>> Huh?  We still need something like sysfs_get_dentry to find the dentries
>> for the rename or move operation.  So we can call d_move.  
> 
> Ah... right.  Thanks.  :-)

On the second thought, can't those too be dealt with d_revalidate?

-- 
tejun
--
To unsubscribe from this list: send the line "unsubscribe linux-kernel" in
the body of a message to [EMAIL PROTECTED]
More majordomo info at  http://vger.kernel.org/majordomo-info.html
Please read the FAQ at  http://www.tux.org/lkml/


Re: [Bluez-devel] Oops involving RFCOMM and sysfs

2008-01-07 Thread Tejun Heo
Eric W. Biederman wrote:
>> That said, the mechanism is a bit too fragile.  sysfs currently ensures
>> that dentry/inode point to the associated sysfs_dirent.  This is mainly
>> remanent of conversion from previous VFS based implementation.  I think
>> the right thing to do here is to make sysfs behave like other proper
>> distributed filesystems using d_revalidate.
> 
> Huh?  We still need something like sysfs_get_dentry to find the dentries
> for the rename or move operation.  So we can call d_move.  

Ah... right.  Thanks.  :-)

-- 
tejun
--
To unsubscribe from this list: send the line "unsubscribe linux-kernel" in
the body of a message to [EMAIL PROTECTED]
More majordomo info at  http://vger.kernel.org/majordomo-info.html
Please read the FAQ at  http://www.tux.org/lkml/


Re: [Bluez-devel] Oops involving RFCOMM and sysfs

2008-01-07 Thread Eric W. Biederman
Tejun Heo <[EMAIL PROTECTED]> writes:

> Hello,
>
> Tejun Heo wrote:
>> Al Viro wrote:
>>> On Sat, Jan 05, 2008 at 11:30:25PM +0900, Tejun Heo wrote:
> Assuming that this is what we get, everything looks explainable - we
> have sysfs_rename_dir() calling sysfs_get_dentry() while the parent
> gets evicted.  We don't have any exclusion, so while we are playing
> silly buggers with lookups in sysfs_get_dentry() we have parent become
> negative; the rest is obvious...
 That part of code is walking down the sysfs tree from the s_root of
 sysfs hierarchy and on each step parent is held using dget() while being
 referenced, so I don't think they can turn negative there.
>>> Turn?  Just what stops you from getting a negative (and unhashed) from
>>> lookup_one_noperm() and on the next iteration being buggered on 
>>> mutex_lock()?
>> 
>> Right, I haven't thought about that.  When sysfs_get_dentry() is called,
>> @sd is always valid so unless there was existing negative dentry, lookup
>> is guaranteed to return positive dentry, but by populating dcache with
>> negative dentry before a node is created, things can go wrong.  I don't
>> think that's what's going on here tho.  If that was the case, the
>> while() loop looking up the next sd to lookup (@cur) should have blown
>> up as negative dentry will have NULL d_fsdata which doesn't match any sd.
>> 
>> I guess what's needed here is d_revalidate() as other distributed
>> filesystems do.  I'll test whether this can be actually triggered and
>> prepare a fix.  Thanks a lot for pointing out the problem.
>
> This can't happen because lookup of non-existent entry doesn't create a
> negative dentry.  The new dentry is never hashed and killed after lookup
> failure, the above scenario can't happen.
>
> That said, the mechanism is a bit too fragile.  sysfs currently ensures
> that dentry/inode point to the associated sysfs_dirent.  This is mainly
> remanent of conversion from previous VFS based implementation.  I think
> the right thing to do here is to make sysfs behave like other proper
> distributed filesystems using d_revalidate.

Huh?  We still need something like sysfs_get_dentry to find the dentries
for the rename or move operation.  So we can call d_move.  

Further we should be talking about sysfs_move_dir not sysfs_rename_dir
as only the networking code calls sysfs_rename_dir.  Not that it is
significantly different.

I think I saw a change in lock ordering recently to help distributed
filesystems, and maybe that will simplify some of this.

Anyway I figure if I am to understand this I better go look and see if
I can find the start of this thread.  I don't have a clue
where we are blowing up.

Eric
--
To unsubscribe from this list: send the line "unsubscribe linux-kernel" in
the body of a message to [EMAIL PROTECTED]
More majordomo info at  http://vger.kernel.org/majordomo-info.html
Please read the FAQ at  http://www.tux.org/lkml/


Re: [Bluez-devel] Oops involving RFCOMM and sysfs

2008-01-07 Thread Tejun Heo
Dave Young wrote:
> On Thu, Jan 03, 2008 at 02:16:21PM +0100, Gabor Gombas wrote:
>> On Wed, Jan 02, 2008 at 04:16:42PM +0100, Gabor Gombas wrote:
>>
>>> So the patch referenced above does not help. But I've found a very easy
>>> way to trigger the bug:
>>>
>>> - do a "cat /dev/zero > /dev/rfcomm0"
>>> - switch the phone off
>>> - switch the phone on, and the kernel oopses
>> FYI I also remember having oopses with 2.6.22. but I do not
>> have those logs anymore. Also I now booted 2.6.20.7 and it did not oops.
> 
> Could you try the patch (on 2.6.24-rc6) following and check the debug 
> messages? 
> 
> diff -upr linux/net/bluetooth/rfcomm/tty.c 
> linux.new/net/bluetooth/rfcomm/tty.c
> --- linux/net/bluetooth/rfcomm/tty.c  2008-01-04 08:58:48.0 +0800
> +++ linux.new/net/bluetooth/rfcomm/tty.c  2008-01-04 09:01:01.0 
> +0800
> @@ -313,6 +313,7 @@ static void rfcomm_dev_del(struct rfcomm
>  {
>   BT_DBG("dev %p", dev);
>  
> + dump_stack();
>   set_bit(RFCOMM_TTY_RELEASED, >flags);
>   rfcomm_dev_put(dev);
>  }

Tried to reproduce the problem here but I can't persuade my laptop to
talk to my cell. :-( Gabor, can you please report what Dave Young asked?

Thanks.

-- 
tejun
--
To unsubscribe from this list: send the line "unsubscribe linux-kernel" in
the body of a message to [EMAIL PROTECTED]
More majordomo info at  http://vger.kernel.org/majordomo-info.html
Please read the FAQ at  http://www.tux.org/lkml/


Re: [Bluez-devel] Oops involving RFCOMM and sysfs

2008-01-07 Thread Tejun Heo
Dave Young wrote:
 On Thu, Jan 03, 2008 at 02:16:21PM +0100, Gabor Gombas wrote:
 On Wed, Jan 02, 2008 at 04:16:42PM +0100, Gabor Gombas wrote:

 So the patch referenced above does not help. But I've found a very easy
 way to trigger the bug:

 - do a cat /dev/zero  /dev/rfcomm0
 - switch the phone off
 - switch the phone on, and the kernel oopses
 FYI I also remember having oopses with 2.6.22.something but I do not
 have those logs anymore. Also I now booted 2.6.20.7 and it did not oops.
 
 Could you try the patch (on 2.6.24-rc6) following and check the debug 
 messages? 
 
 diff -upr linux/net/bluetooth/rfcomm/tty.c 
 linux.new/net/bluetooth/rfcomm/tty.c
 --- linux/net/bluetooth/rfcomm/tty.c  2008-01-04 08:58:48.0 +0800
 +++ linux.new/net/bluetooth/rfcomm/tty.c  2008-01-04 09:01:01.0 
 +0800
 @@ -313,6 +313,7 @@ static void rfcomm_dev_del(struct rfcomm
  {
   BT_DBG(dev %p, dev);
  
 + dump_stack();
   set_bit(RFCOMM_TTY_RELEASED, dev-flags);
   rfcomm_dev_put(dev);
  }

Tried to reproduce the problem here but I can't persuade my laptop to
talk to my cell. :-( Gabor, can you please report what Dave Young asked?

Thanks.

-- 
tejun
--
To unsubscribe from this list: send the line unsubscribe linux-kernel in
the body of a message to [EMAIL PROTECTED]
More majordomo info at  http://vger.kernel.org/majordomo-info.html
Please read the FAQ at  http://www.tux.org/lkml/


Re: [Bluez-devel] Oops involving RFCOMM and sysfs

2008-01-07 Thread Eric W. Biederman
Tejun Heo [EMAIL PROTECTED] writes:

 Hello,

 Tejun Heo wrote:
 Al Viro wrote:
 On Sat, Jan 05, 2008 at 11:30:25PM +0900, Tejun Heo wrote:
 Assuming that this is what we get, everything looks explainable - we
 have sysfs_rename_dir() calling sysfs_get_dentry() while the parent
 gets evicted.  We don't have any exclusion, so while we are playing
 silly buggers with lookups in sysfs_get_dentry() we have parent become
 negative; the rest is obvious...
 That part of code is walking down the sysfs tree from the s_root of
 sysfs hierarchy and on each step parent is held using dget() while being
 referenced, so I don't think they can turn negative there.
 Turn?  Just what stops you from getting a negative (and unhashed) from
 lookup_one_noperm() and on the next iteration being buggered on 
 mutex_lock()?
 
 Right, I haven't thought about that.  When sysfs_get_dentry() is called,
 @sd is always valid so unless there was existing negative dentry, lookup
 is guaranteed to return positive dentry, but by populating dcache with
 negative dentry before a node is created, things can go wrong.  I don't
 think that's what's going on here tho.  If that was the case, the
 while() loop looking up the next sd to lookup (@cur) should have blown
 up as negative dentry will have NULL d_fsdata which doesn't match any sd.
 
 I guess what's needed here is d_revalidate() as other distributed
 filesystems do.  I'll test whether this can be actually triggered and
 prepare a fix.  Thanks a lot for pointing out the problem.

 This can't happen because lookup of non-existent entry doesn't create a
 negative dentry.  The new dentry is never hashed and killed after lookup
 failure, the above scenario can't happen.

 That said, the mechanism is a bit too fragile.  sysfs currently ensures
 that dentry/inode point to the associated sysfs_dirent.  This is mainly
 remanent of conversion from previous VFS based implementation.  I think
 the right thing to do here is to make sysfs behave like other proper
 distributed filesystems using d_revalidate.

Huh?  We still need something like sysfs_get_dentry to find the dentries
for the rename or move operation.  So we can call d_move.  

Further we should be talking about sysfs_move_dir not sysfs_rename_dir
as only the networking code calls sysfs_rename_dir.  Not that it is
significantly different.

I think I saw a change in lock ordering recently to help distributed
filesystems, and maybe that will simplify some of this.

Anyway I figure if I am to understand this I better go look and see if
I can find the start of this thread.  I don't have a clue
where we are blowing up.

Eric
--
To unsubscribe from this list: send the line unsubscribe linux-kernel in
the body of a message to [EMAIL PROTECTED]
More majordomo info at  http://vger.kernel.org/majordomo-info.html
Please read the FAQ at  http://www.tux.org/lkml/


Re: [Bluez-devel] Oops involving RFCOMM and sysfs

2008-01-07 Thread Tejun Heo
Eric W. Biederman wrote:
 That said, the mechanism is a bit too fragile.  sysfs currently ensures
 that dentry/inode point to the associated sysfs_dirent.  This is mainly
 remanent of conversion from previous VFS based implementation.  I think
 the right thing to do here is to make sysfs behave like other proper
 distributed filesystems using d_revalidate.
 
 Huh?  We still need something like sysfs_get_dentry to find the dentries
 for the rename or move operation.  So we can call d_move.  

Ah... right.  Thanks.  :-)

-- 
tejun
--
To unsubscribe from this list: send the line unsubscribe linux-kernel in
the body of a message to [EMAIL PROTECTED]
More majordomo info at  http://vger.kernel.org/majordomo-info.html
Please read the FAQ at  http://www.tux.org/lkml/


Re: [Bluez-devel] Oops involving RFCOMM and sysfs

2008-01-07 Thread Tejun Heo
Tejun Heo wrote:
 Eric W. Biederman wrote:
 That said, the mechanism is a bit too fragile.  sysfs currently ensures
 that dentry/inode point to the associated sysfs_dirent.  This is mainly
 remanent of conversion from previous VFS based implementation.  I think
 the right thing to do here is to make sysfs behave like other proper
 distributed filesystems using d_revalidate.
 Huh?  We still need something like sysfs_get_dentry to find the dentries
 for the rename or move operation.  So we can call d_move.  
 
 Ah... right.  Thanks.  :-)

On the second thought, can't those too be dealt with d_revalidate?

-- 
tejun
--
To unsubscribe from this list: send the line unsubscribe linux-kernel in
the body of a message to [EMAIL PROTECTED]
More majordomo info at  http://vger.kernel.org/majordomo-info.html
Please read the FAQ at  http://www.tux.org/lkml/


Re: [Bluez-devel] Oops involving RFCOMM and sysfs

2008-01-07 Thread Al Viro
On Mon, Jan 07, 2008 at 06:18:21PM +0900, Tejun Heo wrote:
 Tejun Heo wrote:
  Eric W. Biederman wrote:
  That said, the mechanism is a bit too fragile.  sysfs currently ensures
  that dentry/inode point to the associated sysfs_dirent.  This is mainly
  remanent of conversion from previous VFS based implementation.  I think
  the right thing to do here is to make sysfs behave like other proper
  distributed filesystems using d_revalidate.
  Huh?  We still need something like sysfs_get_dentry to find the dentries
  for the rename or move operation.  So we can call d_move.  
  
  Ah... right.  Thanks.  :-)
 
 On the second thought, can't those too be dealt with d_revalidate?

FVO dealt with as pleasant and efficient as using coarse whetstone
to deal with caries.
--
To unsubscribe from this list: send the line unsubscribe linux-kernel in
the body of a message to [EMAIL PROTECTED]
More majordomo info at  http://vger.kernel.org/majordomo-info.html
Please read the FAQ at  http://www.tux.org/lkml/


Re: [Bluez-devel] Oops involving RFCOMM and sysfs

2008-01-07 Thread Eric W. Biederman
Al Viro [EMAIL PROTECTED] writes:

 On Mon, Jan 07, 2008 at 06:18:21PM +0900, Tejun Heo wrote:
 Tejun Heo wrote:
  Eric W. Biederman wrote:
  That said, the mechanism is a bit too fragile.  sysfs currently ensures
  that dentry/inode point to the associated sysfs_dirent.  This is mainly
  remanent of conversion from previous VFS based implementation.  I think
  the right thing to do here is to make sysfs behave like other proper
  distributed filesystems using d_revalidate.
  Huh?  We still need something like sysfs_get_dentry to find the dentries
  for the rename or move operation.  So we can call d_move.  
  
  Ah... right.  Thanks.  :-)
 
 On the second thought, can't those too be dealt with d_revalidate?

 FVO dealt with as pleasant and efficient as using coarse whetstone
 to deal with caries.

Or to say it another way.  The linux VFS requires dentries to be
preserved and used as long as we can.  dropping them early causes
some weird nasties to show up, in particular it totally messes up
mounting other filesystems on top.

Eric
--
To unsubscribe from this list: send the line unsubscribe linux-kernel in
the body of a message to [EMAIL PROTECTED]
More majordomo info at  http://vger.kernel.org/majordomo-info.html
Please read the FAQ at  http://www.tux.org/lkml/


Re: [Bluez-devel] Oops involving RFCOMM and sysfs

2008-01-07 Thread Tejun Heo
Gabor Gombas wrote:
 Hi,
 
 On Sat, Jan 05, 2008 at 07:50:39AM +, Al Viro wrote:
 
 Could you stick
  if (!parent-d_inode)
  printk(KERN_WARNING sysfs locking blows: %s,
  parent-d_name.name);
 right before
 mutex_lock(parent-d_inode-i_mutex);
 dentry = lookup_one_noperm(cur-s_name, parent);
 mutex_unlock(parent-d_inode-i_mutex);
 in sysfs_get_dentry() (fs/sysfs/dir.c) and verify that it does, indeed,
 trigger?
 
 Here it is:
 
 Jan  7 14:35:43 twister kernel: sysfs locking blows: acl001BAFE1624D1Unable 
 to handle kernel NULL pointer dereference at 00b8 RIP: 

Does the attached patch fix the problem?

-- 
tejun
diff --git a/fs/sysfs/dir.c b/fs/sysfs/dir.c
index 3371629..4e7f3bf 100644
--- a/fs/sysfs/dir.c
+++ b/fs/sysfs/dir.c
@@ -878,7 +878,6 @@ again:
 	error = 0;
 	d_add(new_dentry, NULL);
 	d_move(old_dentry, new_dentry);
-	dput(new_dentry);
 
 	/* Remove from old parent's list and insert into new parent's list. */
 	sysfs_unlink_sibling(sd);


Re: [Bluez-devel] Oops involving RFCOMM and sysfs

2008-01-07 Thread Gabor Gombas
On Tue, Jan 08, 2008 at 12:24:05AM +0900, Tejun Heo wrote:

 Does the attached patch fix the problem?

No, it still oopses.

Gabor

-- 
 -
 MTA SZTAKI Computer and Automation Research Institute
Hungarian Academy of Sciences
 -
--
To unsubscribe from this list: send the line unsubscribe linux-kernel in
the body of a message to [EMAIL PROTECTED]
More majordomo info at  http://vger.kernel.org/majordomo-info.html
Please read the FAQ at  http://www.tux.org/lkml/


Re: [Bluez-devel] Oops involving RFCOMM and sysfs

2008-01-06 Thread Tejun Heo
Hello,

Tejun Heo wrote:
> Al Viro wrote:
>> On Sat, Jan 05, 2008 at 11:30:25PM +0900, Tejun Heo wrote:
 Assuming that this is what we get, everything looks explainable - we
 have sysfs_rename_dir() calling sysfs_get_dentry() while the parent
 gets evicted.  We don't have any exclusion, so while we are playing
 silly buggers with lookups in sysfs_get_dentry() we have parent become
 negative; the rest is obvious...
>>> That part of code is walking down the sysfs tree from the s_root of
>>> sysfs hierarchy and on each step parent is held using dget() while being
>>> referenced, so I don't think they can turn negative there.
>> Turn?  Just what stops you from getting a negative (and unhashed) from
>> lookup_one_noperm() and on the next iteration being buggered on mutex_lock()?
> 
> Right, I haven't thought about that.  When sysfs_get_dentry() is called,
> @sd is always valid so unless there was existing negative dentry, lookup
> is guaranteed to return positive dentry, but by populating dcache with
> negative dentry before a node is created, things can go wrong.  I don't
> think that's what's going on here tho.  If that was the case, the
> while() loop looking up the next sd to lookup (@cur) should have blown
> up as negative dentry will have NULL d_fsdata which doesn't match any sd.
> 
> I guess what's needed here is d_revalidate() as other distributed
> filesystems do.  I'll test whether this can be actually triggered and
> prepare a fix.  Thanks a lot for pointing out the problem.

This can't happen because lookup of non-existent entry doesn't create a
negative dentry.  The new dentry is never hashed and killed after lookup
failure, the above scenario can't happen.

That said, the mechanism is a bit too fragile.  sysfs currently ensures
that dentry/inode point to the associated sysfs_dirent.  This is mainly
remanent of conversion from previous VFS based implementation.  I think
the right thing to do here is to make sysfs behave like other proper
distributed filesystems using d_revalidate.

Thanks.

-- 
tejun
--
To unsubscribe from this list: send the line "unsubscribe linux-kernel" in
the body of a message to [EMAIL PROTECTED]
More majordomo info at  http://vger.kernel.org/majordomo-info.html
Please read the FAQ at  http://www.tux.org/lkml/


Re: [Bluez-devel] Oops involving RFCOMM and sysfs

2008-01-06 Thread Tejun Heo
Hello,

Tejun Heo wrote:
 Al Viro wrote:
 On Sat, Jan 05, 2008 at 11:30:25PM +0900, Tejun Heo wrote:
 Assuming that this is what we get, everything looks explainable - we
 have sysfs_rename_dir() calling sysfs_get_dentry() while the parent
 gets evicted.  We don't have any exclusion, so while we are playing
 silly buggers with lookups in sysfs_get_dentry() we have parent become
 negative; the rest is obvious...
 That part of code is walking down the sysfs tree from the s_root of
 sysfs hierarchy and on each step parent is held using dget() while being
 referenced, so I don't think they can turn negative there.
 Turn?  Just what stops you from getting a negative (and unhashed) from
 lookup_one_noperm() and on the next iteration being buggered on mutex_lock()?
 
 Right, I haven't thought about that.  When sysfs_get_dentry() is called,
 @sd is always valid so unless there was existing negative dentry, lookup
 is guaranteed to return positive dentry, but by populating dcache with
 negative dentry before a node is created, things can go wrong.  I don't
 think that's what's going on here tho.  If that was the case, the
 while() loop looking up the next sd to lookup (@cur) should have blown
 up as negative dentry will have NULL d_fsdata which doesn't match any sd.
 
 I guess what's needed here is d_revalidate() as other distributed
 filesystems do.  I'll test whether this can be actually triggered and
 prepare a fix.  Thanks a lot for pointing out the problem.

This can't happen because lookup of non-existent entry doesn't create a
negative dentry.  The new dentry is never hashed and killed after lookup
failure, the above scenario can't happen.

That said, the mechanism is a bit too fragile.  sysfs currently ensures
that dentry/inode point to the associated sysfs_dirent.  This is mainly
remanent of conversion from previous VFS based implementation.  I think
the right thing to do here is to make sysfs behave like other proper
distributed filesystems using d_revalidate.

Thanks.

-- 
tejun
--
To unsubscribe from this list: send the line unsubscribe linux-kernel in
the body of a message to [EMAIL PROTECTED]
More majordomo info at  http://vger.kernel.org/majordomo-info.html
Please read the FAQ at  http://www.tux.org/lkml/


Re: [Bluez-devel] Oops involving RFCOMM and sysfs

2008-01-05 Thread Tejun Heo
Hello,

Al Viro wrote:
> On Sun, Jan 06, 2008 at 11:54:43AM +0900, Tejun Heo wrote:
>> That means sysfs_remove_dir() is called on parent while other operations
>> are in progress on children, right?  sysfs has never allowed such things
>> && AFAIK no one does that.  It's somewhat implied in the interface (such
>> as recursive removing) but I fully agree it's problematic.  Things like
>> these are why I think we need to unify/simplify locking as I wrote
>> previously.
> 
> All it takes is kobject_rename() or kobject_move() called asynchronously 
> wrt removal...  I don't see an explicit ban for that.

There really hasn't been any stone-set rules for how sysfs can be used
and what operations are allowed simultaneously although sysfs
implementation always had a lot of assumptions about which ops can and
can't be done simultaneously.

The general assumption is that the caller is responsible for its and its
children's lifetime management which is usually followed as sysfs nodes
are removed when a device goes away or driver detaches at which point no
other ops should be in progress anyway.

I think this stems partially from tight coupling with kobject / driver
model.  kobject and driver model have certain rules about how they can
be used (again not explicit enough) and sysfs implementation kind of
piggy backed on those rules and added its own assumptions on top of it.
 After a while, it's hard to track what's assumed where and implementing
or changing one feature somewhere breaks some assumption elsewhere.
This is the primary reason why I think sysfs should be an independent
component which the driver model uses not something intertwined into
driver model while having mostly separate implementation.

> FWIW, what happens here *is* fishy, but I don't see an outright ban on
> that in documentation - rfcomm_tty_open() does
> device_move(dev->tty_dev, rfcomm_get_device(dev));
> when we get openers, rfcomm_tty_close() does
> device_move(dev->tty_dev, NULL);
> when the number of openers hits zero.  Can happen repeatedly.
> 
> Note that device_move() with new parent being NULL is explicitly allowed
> and handled, so...

(cc'ing Kay Sievers) IIRC, that's device class compatibility thing.
Overlapping move's are okay tho as they're protected from each other by
sysfs_rename_mutex.  I'll take a look at the rfcomm code and see what's
going on tomorrow.  Gotta hit the road now.

Thanks.

-- 
tejun
--
To unsubscribe from this list: send the line "unsubscribe linux-kernel" in
the body of a message to [EMAIL PROTECTED]
More majordomo info at  http://vger.kernel.org/majordomo-info.html
Please read the FAQ at  http://www.tux.org/lkml/


Re: [Bluez-devel] Oops involving RFCOMM and sysfs

2008-01-05 Thread Al Viro
On Sun, Jan 06, 2008 at 11:54:43AM +0900, Tejun Heo wrote:
> That means sysfs_remove_dir() is called on parent while other operations
> are in progress on children, right?  sysfs has never allowed such things
> && AFAIK no one does that.  It's somewhat implied in the interface (such
> as recursive removing) but I fully agree it's problematic.  Things like
> these are why I think we need to unify/simplify locking as I wrote
> previously.

All it takes is kobject_rename() or kobject_move() called asynchronously 
wrt removal...  I don't see an explicit ban for that.

FWIW, what happens here *is* fishy, but I don't see an outright ban on
that in documentation - rfcomm_tty_open() does
device_move(dev->tty_dev, rfcomm_get_device(dev));
when we get openers, rfcomm_tty_close() does
device_move(dev->tty_dev, NULL);
when the number of openers hits zero.  Can happen repeatedly.

Note that device_move() with new parent being NULL is explicitly allowed
and handled, so...
--
To unsubscribe from this list: send the line "unsubscribe linux-kernel" in
the body of a message to [EMAIL PROTECTED]
More majordomo info at  http://vger.kernel.org/majordomo-info.html
Please read the FAQ at  http://www.tux.org/lkml/


Re: [Bluez-devel] Oops involving RFCOMM and sysfs

2008-01-05 Thread Tejun Heo
Hello, Al Viro.

Al Viro wrote:
> On Sun, Jan 06, 2008 at 11:07:52AM +0900, Tejun Heo wrote:
> 
>> Right, I haven't thought about that.  When sysfs_get_dentry() is called,
>> @sd is always valid so unless there was existing negative dentry, lookup
>> is guaranteed to return positive dentry, but by populating dcache with
>> negative dentry before a node is created, things can go wrong.  I don't
>> think that's what's going on here tho.  If that was the case, the
>> while() loop looking up the next sd to lookup (@cur) should have blown
>> up as negative dentry will have NULL d_fsdata which doesn't match any sd.
> 
> No.  What happens if sd gets unlinked while we are on the way to
> sysfs_get_dentry() and so does its parent?

That means sysfs_remove_dir() is called on parent while other operations
are in progress on children, right?  sysfs has never allowed such things
&& AFAIK no one does that.  It's somewhat implied in the interface (such
as recursive removing) but I fully agree it's problematic.  Things like
these are why I think we need to unify/simplify locking as I wrote
previously.

> The parent is off the
> sibling list, we get negative dentry from lookup.  It's not hashed,
> so won't stick around in dcache (which is apparently what you are
> thinking about).  However, _THIS_ lookup has returned you a dentry
> with NULL ->d_inode and you are well and truly buggered.

So, the assumption is that while @sd is held and being operated on its
ancestry can never change except for rename and move and those are
protected with sysfs_rename_mutex.

I agree that the locking is awkward but sysfs's internal implementation
has completely changed over last six or so months and the conversion
still isn't complete.  It isn't pretty at all but is at least
understandable now, IMHO.  :-)

The last pending change is separating out kobject from sysfs and
simplifying locking.  I had some disagreement with Greg about the future
of kobject and then there was tsunami of ATA bugs because most major
distros converted to libata from IDE in the second half of the last year
and I'm still buried under it.  I'll get back to sysfs once these ATA
bugs subside a bit.

Thanks.

-- 
tejun
--
To unsubscribe from this list: send the line "unsubscribe linux-kernel" in
the body of a message to [EMAIL PROTECTED]
More majordomo info at  http://vger.kernel.org/majordomo-info.html
Please read the FAQ at  http://www.tux.org/lkml/


Re: [Bluez-devel] Oops involving RFCOMM and sysfs

2008-01-05 Thread Al Viro
On Sun, Jan 06, 2008 at 11:07:52AM +0900, Tejun Heo wrote:

> Right, I haven't thought about that.  When sysfs_get_dentry() is called,
> @sd is always valid so unless there was existing negative dentry, lookup
> is guaranteed to return positive dentry, but by populating dcache with
> negative dentry before a node is created, things can go wrong.  I don't
> think that's what's going on here tho.  If that was the case, the
> while() loop looking up the next sd to lookup (@cur) should have blown
> up as negative dentry will have NULL d_fsdata which doesn't match any sd.

No.  What happens if sd gets unlinked while we are on the way to
sysfs_get_dentry() and so does its parent?  The parent is off the
sibling list, we get negative dentry from lookup.  It's not hashed,
so won't stick around in dcache (which is apparently what you are
thinking about).  However, _THIS_ lookup has returned you a dentry
with NULL ->d_inode and you are well and truly buggered.
--
To unsubscribe from this list: send the line "unsubscribe linux-kernel" in
the body of a message to [EMAIL PROTECTED]
More majordomo info at  http://vger.kernel.org/majordomo-info.html
Please read the FAQ at  http://www.tux.org/lkml/


Re: [Bluez-devel] Oops involving RFCOMM and sysfs

2008-01-05 Thread Tejun Heo
Hello,

Al Viro wrote:
> On Sat, Jan 05, 2008 at 11:30:25PM +0900, Tejun Heo wrote:
>>> Assuming that this is what we get, everything looks explainable - we
>>> have sysfs_rename_dir() calling sysfs_get_dentry() while the parent
>>> gets evicted.  We don't have any exclusion, so while we are playing
>>> silly buggers with lookups in sysfs_get_dentry() we have parent become
>>> negative; the rest is obvious...
>> That part of code is walking down the sysfs tree from the s_root of
>> sysfs hierarchy and on each step parent is held using dget() while being
>> referenced, so I don't think they can turn negative there.
> 
> Turn?  Just what stops you from getting a negative (and unhashed) from
> lookup_one_noperm() and on the next iteration being buggered on mutex_lock()?

Right, I haven't thought about that.  When sysfs_get_dentry() is called,
@sd is always valid so unless there was existing negative dentry, lookup
is guaranteed to return positive dentry, but by populating dcache with
negative dentry before a node is created, things can go wrong.  I don't
think that's what's going on here tho.  If that was the case, the
while() loop looking up the next sd to lookup (@cur) should have blown
up as negative dentry will have NULL d_fsdata which doesn't match any sd.

I guess what's needed here is d_revalidate() as other distributed
filesystems do.  I'll test whether this can be actually triggered and
prepare a fix.  Thanks a lot for pointing out the problem.

>>> AFAICS, the locking here is quite broken and frankly, sysfs_get_dentry()
>>> and the way it plays with fs/namei.c are ucking fugly.
>> Can you elaborate a bit?  The locking in sysfs is unconventional but
>> that's mostly from necessity.  It has dual interface - vfs and driver
>> model && vfs data structures (dentry and inode) are too big to always
>> keep around, so it basically becomes a small distributed file system
>> where the backing data can change asynchronously.
> 
> ... with all fun that creates.  As it is, you have those async changers
> of backing data using VFS locking _under_ sysfs locks via lookup_one_noperm()
> and yet it needs sysfs_mutex inside sysfs_lookup().  So you can't have
> sysfs_get_dentry() under it.  So you don't have exclusion with arseloads
> of sysfs tree changes in there.  Joy...

There are two locks.  sysfs_rename_mutex and sysfs_mutex.
sysfs_rename_mutex is above VFS locks while sysfs_mutex is below VFS
locks.  sysfs_rename_mutex() protects against move/rename which can
change the ancestry of a held sysfs_dirent while sysfs_mutex protects
the sd hierarchy itself.  Locking can be wrong if sysfs_rename_mutex
locking is missing from the places where ancestry of a held sd can
change but I can't find one ATM.  If I'm missing your point again, feel
free to scream at me.  :-)

As it's unnecessarily unintuitive, there's a pending change to rename
sysfs_rename_mutex and use it to protect the whole tree structure to
make locking simpler while using sysfs_mutex to guard VFS access such
that the locking hierarchy plainly becomes sysfs_rename_mutex - VFS
locks - sysfs_mutex where all internal sysfs structure is protected by
the outer mutex and the inner one just protects VFS accesses.

> Frankly, with the current state of sysfs the last vestiges of arguments
> used to push it into the tree back then are dead and buried.  I'm not
> blaming you, BTW - the shitpile *did* grow past the point where its
> memory footprint became far too large and something needed to be done.
> Unfortunately, it happened too late for that something being "get rid
> of the entire mess" and now we are saddled with it for good.

Yeah, it's too late to get rid of sysfs and regardless implementation
ugliness, which BTW I think has improved a lot during last six or so
months, it's now pretty useful and important to drivers, so I guess the
only option is trying hard to make it better.

Oh, BTW, the ugly lookup_one_noperm() can be removed if LOOKUP_NOPERM
flag is added.  The only reason sysfs_lookup() uses the specialized
lookup is to avoid permission check.

Thanks.

-- 
tejun
--
To unsubscribe from this list: send the line "unsubscribe linux-kernel" in
the body of a message to [EMAIL PROTECTED]
More majordomo info at  http://vger.kernel.org/majordomo-info.html
Please read the FAQ at  http://www.tux.org/lkml/


Re: [Bluez-devel] Oops involving RFCOMM and sysfs

2008-01-05 Thread Al Viro
On Sat, Jan 05, 2008 at 11:30:25PM +0900, Tejun Heo wrote:
> > Assuming that this is what we get, everything looks explainable - we
> > have sysfs_rename_dir() calling sysfs_get_dentry() while the parent
> > gets evicted.  We don't have any exclusion, so while we are playing
> > silly buggers with lookups in sysfs_get_dentry() we have parent become
> > negative; the rest is obvious...
> 
> That part of code is walking down the sysfs tree from the s_root of
> sysfs hierarchy and on each step parent is held using dget() while being
> referenced, so I don't think they can turn negative there.

Turn?  Just what stops you from getting a negative (and unhashed) from
lookup_one_noperm() and on the next iteration being buggered on mutex_lock()?
 
> > AFAICS, the locking here is quite broken and frankly, sysfs_get_dentry()
> > and the way it plays with fs/namei.c are ucking fugly.
> 
> Can you elaborate a bit?  The locking in sysfs is unconventional but
> that's mostly from necessity.  It has dual interface - vfs and driver
> model && vfs data structures (dentry and inode) are too big to always
> keep around, so it basically becomes a small distributed file system
> where the backing data can change asynchronously.

... with all fun that creates.  As it is, you have those async changers
of backing data using VFS locking _under_ sysfs locks via lookup_one_noperm()
and yet it needs sysfs_mutex inside sysfs_lookup().  So you can't have
sysfs_get_dentry() under it.  So you don't have exclusion with arseloads
of sysfs tree changes in there.  Joy...

Frankly, with the current state of sysfs the last vestiges of arguments
used to push it into the tree back then are dead and buried.  I'm not
blaming you, BTW - the shitpile *did* grow past the point where its
memory footprint became far too large and something needed to be done.
Unfortunately, it happened too late for that something being "get rid
of the entire mess" and now we are saddled with it for good.
--
To unsubscribe from this list: send the line "unsubscribe linux-kernel" in
the body of a message to [EMAIL PROTECTED]
More majordomo info at  http://vger.kernel.org/majordomo-info.html
Please read the FAQ at  http://www.tux.org/lkml/


Re: [Bluez-devel] Oops involving RFCOMM and sysfs

2008-01-05 Thread Tejun Heo
Hello.

Al Viro wrote:
> sysfs_get_dentry(),
> mutex_lock(>d_inode->i_mutex);
> hitting parent->d_inode either NULL or very close to it, depending on your
> .config; most likely NULL, if offset of i_mutex is 0xb8 in your build.
> That's plausible - 0xb8 is what you'd get on UP build without spinlock
> debugging, lockdep, etc.
> 
> Assuming that this is what we get, everything looks explainable - we
> have sysfs_rename_dir() calling sysfs_get_dentry() while the parent
> gets evicted.  We don't have any exclusion, so while we are playing
> silly buggers with lookups in sysfs_get_dentry() we have parent become
> negative; the rest is obvious...

That part of code is walking down the sysfs tree from the s_root of
sysfs hierarchy and on each step parent is held using dget() while being
referenced, so I don't think they can turn negative there.

> AFAICS, the locking here is quite broken and frankly, sysfs_get_dentry()
> and the way it plays with fs/namei.c are ucking fugly.

Can you elaborate a bit?  The locking in sysfs is unconventional but
that's mostly from necessity.  It has dual interface - vfs and driver
model && vfs data structures (dentry and inode) are too big to always
keep around, so it basically becomes a small distributed file system
where the backing data can change asynchronously.

> Could you stick
>   if (!parent->d_inode)
>   printk(KERN_WARNING "sysfs locking blows: %s",
>   parent->d_name.name);
> right before
> mutex_lock(>d_inode->i_mutex);
> dentry = lookup_one_noperm(cur->s_name, parent);
> mutex_unlock(>d_inode->i_mutex);
> in sysfs_get_dentry() (fs/sysfs/dir.c) and verify that it does, indeed,
> trigger?

Yes, please.

Thanks.

-- 
tejun
--
To unsubscribe from this list: send the line "unsubscribe linux-kernel" in
the body of a message to [EMAIL PROTECTED]
More majordomo info at  http://vger.kernel.org/majordomo-info.html
Please read the FAQ at  http://www.tux.org/lkml/


Re: [Bluez-devel] Oops involving RFCOMM and sysfs

2008-01-05 Thread Tejun Heo
Hello.

Al Viro wrote:
 sysfs_get_dentry(),
 mutex_lock(parent-d_inode-i_mutex);
 hitting parent-d_inode either NULL or very close to it, depending on your
 .config; most likely NULL, if offset of i_mutex is 0xb8 in your build.
 That's plausible - 0xb8 is what you'd get on UP build without spinlock
 debugging, lockdep, etc.
 
 Assuming that this is what we get, everything looks explainable - we
 have sysfs_rename_dir() calling sysfs_get_dentry() while the parent
 gets evicted.  We don't have any exclusion, so while we are playing
 silly buggers with lookups in sysfs_get_dentry() we have parent become
 negative; the rest is obvious...

That part of code is walking down the sysfs tree from the s_root of
sysfs hierarchy and on each step parent is held using dget() while being
referenced, so I don't think they can turn negative there.

 AFAICS, the locking here is quite broken and frankly, sysfs_get_dentry()
 and the way it plays with fs/namei.c are ucking fugly.

Can you elaborate a bit?  The locking in sysfs is unconventional but
that's mostly from necessity.  It has dual interface - vfs and driver
model  vfs data structures (dentry and inode) are too big to always
keep around, so it basically becomes a small distributed file system
where the backing data can change asynchronously.

 Could you stick
   if (!parent-d_inode)
   printk(KERN_WARNING sysfs locking blows: %s,
   parent-d_name.name);
 right before
 mutex_lock(parent-d_inode-i_mutex);
 dentry = lookup_one_noperm(cur-s_name, parent);
 mutex_unlock(parent-d_inode-i_mutex);
 in sysfs_get_dentry() (fs/sysfs/dir.c) and verify that it does, indeed,
 trigger?

Yes, please.

Thanks.

-- 
tejun
--
To unsubscribe from this list: send the line unsubscribe linux-kernel in
the body of a message to [EMAIL PROTECTED]
More majordomo info at  http://vger.kernel.org/majordomo-info.html
Please read the FAQ at  http://www.tux.org/lkml/


Re: [Bluez-devel] Oops involving RFCOMM and sysfs

2008-01-05 Thread Al Viro
On Sat, Jan 05, 2008 at 11:30:25PM +0900, Tejun Heo wrote:
  Assuming that this is what we get, everything looks explainable - we
  have sysfs_rename_dir() calling sysfs_get_dentry() while the parent
  gets evicted.  We don't have any exclusion, so while we are playing
  silly buggers with lookups in sysfs_get_dentry() we have parent become
  negative; the rest is obvious...
 
 That part of code is walking down the sysfs tree from the s_root of
 sysfs hierarchy and on each step parent is held using dget() while being
 referenced, so I don't think they can turn negative there.

Turn?  Just what stops you from getting a negative (and unhashed) from
lookup_one_noperm() and on the next iteration being buggered on mutex_lock()?
 
  AFAICS, the locking here is quite broken and frankly, sysfs_get_dentry()
  and the way it plays with fs/namei.c are ucking fugly.
 
 Can you elaborate a bit?  The locking in sysfs is unconventional but
 that's mostly from necessity.  It has dual interface - vfs and driver
 model  vfs data structures (dentry and inode) are too big to always
 keep around, so it basically becomes a small distributed file system
 where the backing data can change asynchronously.

... with all fun that creates.  As it is, you have those async changers
of backing data using VFS locking _under_ sysfs locks via lookup_one_noperm()
and yet it needs sysfs_mutex inside sysfs_lookup().  So you can't have
sysfs_get_dentry() under it.  So you don't have exclusion with arseloads
of sysfs tree changes in there.  Joy...

Frankly, with the current state of sysfs the last vestiges of arguments
used to push it into the tree back then are dead and buried.  I'm not
blaming you, BTW - the shitpile *did* grow past the point where its
memory footprint became far too large and something needed to be done.
Unfortunately, it happened too late for that something being get rid
of the entire mess and now we are saddled with it for good.
--
To unsubscribe from this list: send the line unsubscribe linux-kernel in
the body of a message to [EMAIL PROTECTED]
More majordomo info at  http://vger.kernel.org/majordomo-info.html
Please read the FAQ at  http://www.tux.org/lkml/


Re: [Bluez-devel] Oops involving RFCOMM and sysfs

2008-01-05 Thread Al Viro
On Sun, Jan 06, 2008 at 11:07:52AM +0900, Tejun Heo wrote:

 Right, I haven't thought about that.  When sysfs_get_dentry() is called,
 @sd is always valid so unless there was existing negative dentry, lookup
 is guaranteed to return positive dentry, but by populating dcache with
 negative dentry before a node is created, things can go wrong.  I don't
 think that's what's going on here tho.  If that was the case, the
 while() loop looking up the next sd to lookup (@cur) should have blown
 up as negative dentry will have NULL d_fsdata which doesn't match any sd.

No.  What happens if sd gets unlinked while we are on the way to
sysfs_get_dentry() and so does its parent?  The parent is off the
sibling list, we get negative dentry from lookup.  It's not hashed,
so won't stick around in dcache (which is apparently what you are
thinking about).  However, _THIS_ lookup has returned you a dentry
with NULL -d_inode and you are well and truly buggered.
--
To unsubscribe from this list: send the line unsubscribe linux-kernel in
the body of a message to [EMAIL PROTECTED]
More majordomo info at  http://vger.kernel.org/majordomo-info.html
Please read the FAQ at  http://www.tux.org/lkml/


Re: [Bluez-devel] Oops involving RFCOMM and sysfs

2008-01-05 Thread Tejun Heo
Hello, Al Viro.

Al Viro wrote:
 On Sun, Jan 06, 2008 at 11:07:52AM +0900, Tejun Heo wrote:
 
 Right, I haven't thought about that.  When sysfs_get_dentry() is called,
 @sd is always valid so unless there was existing negative dentry, lookup
 is guaranteed to return positive dentry, but by populating dcache with
 negative dentry before a node is created, things can go wrong.  I don't
 think that's what's going on here tho.  If that was the case, the
 while() loop looking up the next sd to lookup (@cur) should have blown
 up as negative dentry will have NULL d_fsdata which doesn't match any sd.
 
 No.  What happens if sd gets unlinked while we are on the way to
 sysfs_get_dentry() and so does its parent?

That means sysfs_remove_dir() is called on parent while other operations
are in progress on children, right?  sysfs has never allowed such things
 AFAIK no one does that.  It's somewhat implied in the interface (such
as recursive removing) but I fully agree it's problematic.  Things like
these are why I think we need to unify/simplify locking as I wrote
previously.

 The parent is off the
 sibling list, we get negative dentry from lookup.  It's not hashed,
 so won't stick around in dcache (which is apparently what you are
 thinking about).  However, _THIS_ lookup has returned you a dentry
 with NULL -d_inode and you are well and truly buggered.

So, the assumption is that while @sd is held and being operated on its
ancestry can never change except for rename and move and those are
protected with sysfs_rename_mutex.

I agree that the locking is awkward but sysfs's internal implementation
has completely changed over last six or so months and the conversion
still isn't complete.  It isn't pretty at all but is at least
understandable now, IMHO.  :-)

The last pending change is separating out kobject from sysfs and
simplifying locking.  I had some disagreement with Greg about the future
of kobject and then there was tsunami of ATA bugs because most major
distros converted to libata from IDE in the second half of the last year
and I'm still buried under it.  I'll get back to sysfs once these ATA
bugs subside a bit.

Thanks.

-- 
tejun
--
To unsubscribe from this list: send the line unsubscribe linux-kernel in
the body of a message to [EMAIL PROTECTED]
More majordomo info at  http://vger.kernel.org/majordomo-info.html
Please read the FAQ at  http://www.tux.org/lkml/


Re: [Bluez-devel] Oops involving RFCOMM and sysfs

2008-01-05 Thread Tejun Heo
Hello,

Al Viro wrote:
 On Sun, Jan 06, 2008 at 11:54:43AM +0900, Tejun Heo wrote:
 That means sysfs_remove_dir() is called on parent while other operations
 are in progress on children, right?  sysfs has never allowed such things
  AFAIK no one does that.  It's somewhat implied in the interface (such
 as recursive removing) but I fully agree it's problematic.  Things like
 these are why I think we need to unify/simplify locking as I wrote
 previously.
 
 All it takes is kobject_rename() or kobject_move() called asynchronously 
 wrt removal...  I don't see an explicit ban for that.

There really hasn't been any stone-set rules for how sysfs can be used
and what operations are allowed simultaneously although sysfs
implementation always had a lot of assumptions about which ops can and
can't be done simultaneously.

The general assumption is that the caller is responsible for its and its
children's lifetime management which is usually followed as sysfs nodes
are removed when a device goes away or driver detaches at which point no
other ops should be in progress anyway.

I think this stems partially from tight coupling with kobject / driver
model.  kobject and driver model have certain rules about how they can
be used (again not explicit enough) and sysfs implementation kind of
piggy backed on those rules and added its own assumptions on top of it.
 After a while, it's hard to track what's assumed where and implementing
or changing one feature somewhere breaks some assumption elsewhere.
This is the primary reason why I think sysfs should be an independent
component which the driver model uses not something intertwined into
driver model while having mostly separate implementation.

 FWIW, what happens here *is* fishy, but I don't see an outright ban on
 that in documentation - rfcomm_tty_open() does
 device_move(dev-tty_dev, rfcomm_get_device(dev));
 when we get openers, rfcomm_tty_close() does
 device_move(dev-tty_dev, NULL);
 when the number of openers hits zero.  Can happen repeatedly.
 
 Note that device_move() with new parent being NULL is explicitly allowed
 and handled, so...

(cc'ing Kay Sievers) IIRC, that's device class compatibility thing.
Overlapping move's are okay tho as they're protected from each other by
sysfs_rename_mutex.  I'll take a look at the rfcomm code and see what's
going on tomorrow.  Gotta hit the road now.

Thanks.

-- 
tejun
--
To unsubscribe from this list: send the line unsubscribe linux-kernel in
the body of a message to [EMAIL PROTECTED]
More majordomo info at  http://vger.kernel.org/majordomo-info.html
Please read the FAQ at  http://www.tux.org/lkml/


Re: [Bluez-devel] Oops involving RFCOMM and sysfs

2008-01-05 Thread Al Viro
On Sun, Jan 06, 2008 at 11:54:43AM +0900, Tejun Heo wrote:
 That means sysfs_remove_dir() is called on parent while other operations
 are in progress on children, right?  sysfs has never allowed such things
  AFAIK no one does that.  It's somewhat implied in the interface (such
 as recursive removing) but I fully agree it's problematic.  Things like
 these are why I think we need to unify/simplify locking as I wrote
 previously.

All it takes is kobject_rename() or kobject_move() called asynchronously 
wrt removal...  I don't see an explicit ban for that.

FWIW, what happens here *is* fishy, but I don't see an outright ban on
that in documentation - rfcomm_tty_open() does
device_move(dev-tty_dev, rfcomm_get_device(dev));
when we get openers, rfcomm_tty_close() does
device_move(dev-tty_dev, NULL);
when the number of openers hits zero.  Can happen repeatedly.

Note that device_move() with new parent being NULL is explicitly allowed
and handled, so...
--
To unsubscribe from this list: send the line unsubscribe linux-kernel in
the body of a message to [EMAIL PROTECTED]
More majordomo info at  http://vger.kernel.org/majordomo-info.html
Please read the FAQ at  http://www.tux.org/lkml/


Re: [Bluez-devel] Oops involving RFCOMM and sysfs

2008-01-04 Thread Al Viro
On Wed, Jan 02, 2008 at 04:16:42PM +0100, Gabor Gombas wrote:
> Heh, it seems talking about a bug makes it trigger:
> 
> Jan  2 16:05:45 twister kernel: Unable to handle kernel NULL pointer 
> dereference at 00b8 RIP: 
> Jan  2 16:05:45 twister kernel:  [] mutex_lock+0x10/0x1d
 
> So the patch referenced above does not help. But I've found a very easy
> way to trigger the bug:
> 
> - do a "cat /dev/zero > /dev/rfcomm0"
> - switch the phone off
> - switch the phone on, and the kernel oopses

sysfs_get_dentry(),
mutex_lock(>d_inode->i_mutex);
hitting parent->d_inode either NULL or very close to it, depending on your
.config; most likely NULL, if offset of i_mutex is 0xb8 in your build.
That's plausible - 0xb8 is what you'd get on UP build without spinlock
debugging, lockdep, etc.

Assuming that this is what we get, everything looks explainable - we
have sysfs_rename_dir() calling sysfs_get_dentry() while the parent
gets evicted.  We don't have any exclusion, so while we are playing
silly buggers with lookups in sysfs_get_dentry() we have parent become
negative; the rest is obvious...

AFAICS, the locking here is quite broken and frankly, sysfs_get_dentry()
and the way it plays with fs/namei.c are ucking fugly.

Could you stick
if (!parent->d_inode)
printk(KERN_WARNING "sysfs locking blows: %s",
parent->d_name.name);
right before
mutex_lock(>d_inode->i_mutex);
dentry = lookup_one_noperm(cur->s_name, parent);
mutex_unlock(>d_inode->i_mutex);
in sysfs_get_dentry() (fs/sysfs/dir.c) and verify that it does, indeed,
trigger?
--
To unsubscribe from this list: send the line "unsubscribe linux-kernel" in
the body of a message to [EMAIL PROTECTED]
More majordomo info at  http://vger.kernel.org/majordomo-info.html
Please read the FAQ at  http://www.tux.org/lkml/


Re: [Bluez-devel] Oops involving RFCOMM and sysfs

2008-01-04 Thread Al Viro
On Wed, Jan 02, 2008 at 04:16:42PM +0100, Gabor Gombas wrote:
 Heh, it seems talking about a bug makes it trigger:
 
 Jan  2 16:05:45 twister kernel: Unable to handle kernel NULL pointer 
 dereference at 00b8 RIP: 
 Jan  2 16:05:45 twister kernel:  [804720a5] mutex_lock+0x10/0x1d
 
 So the patch referenced above does not help. But I've found a very easy
 way to trigger the bug:
 
 - do a cat /dev/zero  /dev/rfcomm0
 - switch the phone off
 - switch the phone on, and the kernel oopses

sysfs_get_dentry(),
mutex_lock(parent-d_inode-i_mutex);
hitting parent-d_inode either NULL or very close to it, depending on your
.config; most likely NULL, if offset of i_mutex is 0xb8 in your build.
That's plausible - 0xb8 is what you'd get on UP build without spinlock
debugging, lockdep, etc.

Assuming that this is what we get, everything looks explainable - we
have sysfs_rename_dir() calling sysfs_get_dentry() while the parent
gets evicted.  We don't have any exclusion, so while we are playing
silly buggers with lookups in sysfs_get_dentry() we have parent become
negative; the rest is obvious...

AFAICS, the locking here is quite broken and frankly, sysfs_get_dentry()
and the way it plays with fs/namei.c are ucking fugly.

Could you stick
if (!parent-d_inode)
printk(KERN_WARNING sysfs locking blows: %s,
parent-d_name.name);
right before
mutex_lock(parent-d_inode-i_mutex);
dentry = lookup_one_noperm(cur-s_name, parent);
mutex_unlock(parent-d_inode-i_mutex);
in sysfs_get_dentry() (fs/sysfs/dir.c) and verify that it does, indeed,
trigger?
--
To unsubscribe from this list: send the line unsubscribe linux-kernel in
the body of a message to [EMAIL PROTECTED]
More majordomo info at  http://vger.kernel.org/majordomo-info.html
Please read the FAQ at  http://www.tux.org/lkml/


Re: [Bluez-devel] Oops involving RFCOMM and sysfs

2008-01-03 Thread Dave Young
On Thu, Jan 03, 2008 at 02:16:21PM +0100, Gabor Gombas wrote:
> On Wed, Jan 02, 2008 at 04:16:42PM +0100, Gabor Gombas wrote:
> 
> > So the patch referenced above does not help. But I've found a very easy
> > way to trigger the bug:
> > 
> > - do a "cat /dev/zero > /dev/rfcomm0"
> > - switch the phone off
> > - switch the phone on, and the kernel oopses
> 
> FYI I also remember having oopses with 2.6.22. but I do not
> have those logs anymore. Also I now booted 2.6.20.7 and it did not oops.

Could you try the patch (on 2.6.24-rc6) following and check the debug messages? 

diff -upr linux/net/bluetooth/rfcomm/tty.c linux.new/net/bluetooth/rfcomm/tty.c
--- linux/net/bluetooth/rfcomm/tty.c2008-01-04 08:58:48.0 +0800
+++ linux.new/net/bluetooth/rfcomm/tty.c2008-01-04 09:01:01.0 
+0800
@@ -313,6 +313,7 @@ static void rfcomm_dev_del(struct rfcomm
 {
BT_DBG("dev %p", dev);
 
+   dump_stack();
set_bit(RFCOMM_TTY_RELEASED, >flags);
rfcomm_dev_put(dev);
 }
--
To unsubscribe from this list: send the line "unsubscribe linux-kernel" in
the body of a message to [EMAIL PROTECTED]
More majordomo info at  http://vger.kernel.org/majordomo-info.html
Please read the FAQ at  http://www.tux.org/lkml/


Re: [Bluez-devel] Oops involving RFCOMM and sysfs

2008-01-03 Thread Gabor Gombas
On Wed, Jan 02, 2008 at 04:16:42PM +0100, Gabor Gombas wrote:

> So the patch referenced above does not help. But I've found a very easy
> way to trigger the bug:
> 
> - do a "cat /dev/zero > /dev/rfcomm0"
> - switch the phone off
> - switch the phone on, and the kernel oopses

FYI I also remember having oopses with 2.6.22. but I do not
have those logs anymore. Also I now booted 2.6.20.7 and it did not oops.

Gabor

-- 
 -
 MTA SZTAKI Computer and Automation Research Institute
Hungarian Academy of Sciences
 -
--
To unsubscribe from this list: send the line "unsubscribe linux-kernel" in
the body of a message to [EMAIL PROTECTED]
More majordomo info at  http://vger.kernel.org/majordomo-info.html
Please read the FAQ at  http://www.tux.org/lkml/


Re: [Bluez-devel] Oops involving RFCOMM and sysfs

2008-01-03 Thread Gabor Gombas
On Wed, Jan 02, 2008 at 04:16:42PM +0100, Gabor Gombas wrote:

 So the patch referenced above does not help. But I've found a very easy
 way to trigger the bug:
 
 - do a cat /dev/zero  /dev/rfcomm0
 - switch the phone off
 - switch the phone on, and the kernel oopses

FYI I also remember having oopses with 2.6.22.something but I do not
have those logs anymore. Also I now booted 2.6.20.7 and it did not oops.

Gabor

-- 
 -
 MTA SZTAKI Computer and Automation Research Institute
Hungarian Academy of Sciences
 -
--
To unsubscribe from this list: send the line unsubscribe linux-kernel in
the body of a message to [EMAIL PROTECTED]
More majordomo info at  http://vger.kernel.org/majordomo-info.html
Please read the FAQ at  http://www.tux.org/lkml/


Re: [Bluez-devel] Oops involving RFCOMM and sysfs

2008-01-02 Thread Gabor Gombas
On Sat, Dec 29, 2007 at 04:07:04PM +0800, Dave Young wrote:

> Please try the -mm tree kernel, might have been fixed by :
> http://lkml.org/lkml/2007/11/18/141

Heh, it seems talking about a bug makes it trigger:

Jan  2 16:05:45 twister kernel: Unable to handle kernel NULL pointer 
dereference at 00b8 RIP: 
Jan  2 16:05:45 twister kernel:  [] mutex_lock+0x10/0x1d
Jan  2 16:05:45 twister kernel: PGD bcf6e067 PUD bcee3067 PMD 0 
Jan  2 16:05:45 twister kernel: Oops: 0002 [1] 
Jan  2 16:05:45 twister kernel: CPU 0 
Jan  2 16:05:45 twister kernel: Modules linked in: binfmt_misc rfcomm l2cap 
nfsd auth_rpcgss exportfs ipt_REJECT xt_tcpudp ipt_LOG xt_limit iptable_filter 
ip_tables x_tables nfs lockd nfs_acl sunrpc fuse dm_crypt dm_snapshot dm_mirror 
cpufreq_ondemand saa7134_alsa radeon hwmon_vid eeprom hci_usb bluetooth 
usb_storage tuner tea5767 tda8290 tuner_simple mt20xx tea5761 sg snd_intel8x0 
saa7134 snd_ac97_codec ac97_bus videobuf_dma_sg videobuf_core ir_kbd_i2c sr_mod 
firewire_ohci firewire_core snd_pcm crc_itu_t ir_common ehci_hcd ohci_hcd cdrom 
snd_timer snd_page_alloc parport_pc parport sky2 forcedeth floppy
Jan  2 16:05:45 twister kernel: Pid: 5056, comm: cat Not tainted 
2.6.24-rc6-dirty #3
Jan  2 16:05:45 twister kernel: RIP: 0010:[]  
[] mutex_lock+0x10/0x1d
Jan  2 16:05:45 twister kernel: RSP: 0018:8100bce3fd08  EFLAGS: 00010246
Jan  2 16:05:45 twister kernel: RAX:  RBX: 00b8 
RCX: 
Jan  2 16:05:45 twister kernel: RDX: 8100bce3ffd8 RSI: 80641d70 
RDI: 00b8
Jan  2 16:05:45 twister kernel: RBP: 80591db0 R08:  
R09: 000899a2
Jan  2 16:05:45 twister kernel: R10:  R11: 00300018 
R12: 8100bcb8ef50
Jan  2 16:05:45 twister kernel: R13: fff4 R14: 8100bcfc8e00 
R15: 8100a370b300
Jan  2 16:05:45 twister kernel: FS:  2b225d0e56e0() 
GS:805b8000() knlGS:
Jan  2 16:05:45 twister kernel: CS:  0010 DS:  ES:  CR0: 
8005003b
Jan  2 16:05:45 twister kernel: CR2: 00b8 CR3: 95ad1000 
CR4: 06e0
Jan  2 16:05:45 twister kernel: DR0:  DR1:  
DR2: 
Jan  2 16:05:45 twister kernel: DR3:  DR6: 0ff0 
DR7: 0400
Jan  2 16:05:45 twister kernel: Process cat (pid: 5056, threadinfo 
8100bce3e000, task 8100ba931060)
Jan  2 16:05:45 twister kernel: Stack:  e207e2d8 8028 
8100a3387000 802aefa5
Jan  2 16:05:45 twister kernel:  8100bfa8af50 8100bcb8ef50 
8100baab9300 802af1ba
Jan  2 16:05:45 twister kernel:  8100a342a8d0 8100a342a8d0 
8100bfa92dc0 8100baab9300
Jan  2 16:05:45 twister kernel: Call Trace:
Jan  2 16:05:45 twister kernel:  [] dput+0x26/0x103
Jan  2 16:05:45 twister kernel:  [] sysfs_get_dentry+0x45/0x8f
Jan  2 16:05:45 twister kernel:  [] sysfs_move_dir+0x63/0x204
Jan  2 16:05:45 twister kernel:  [] kobject_move+0xba/0x110
Jan  2 16:05:45 twister kernel:  [] device_move+0x59/0x111
Jan  2 16:05:45 twister kernel:  [] 
:rfcomm:rfcomm_tty_close+0x2f/0x74
Jan  2 16:05:45 twister kernel:  [] release_dev+0x212/0x5e2
Jan  2 16:05:45 twister kernel:  [] do_page_fault+0x2ff/0x65a
Jan  2 16:05:45 twister kernel:  [] tty_release+0xc/0x10
Jan  2 16:05:45 twister kernel:  [] __fput+0xb1/0x16f
Jan  2 16:05:45 twister kernel:  [] filp_close+0x5d/0x65
Jan  2 16:05:45 twister kernel:  [] sys_close+0x73/0xa6
Jan  2 16:05:45 twister kernel:  [] tracesys+0xdc/0xe1
Jan  2 16:05:45 twister kernel: 
Jan  2 16:05:45 twister kernel: 
Jan  2 16:05:45 twister kernel: Code: ff 0f 79 05 e8 c9 00 00 00 58 5a 5b c3 41 
54 48 8d 47 08 48 
Jan  2 16:05:45 twister kernel: RIP  [] mutex_lock+0x10/0x1d
Jan  2 16:05:45 twister kernel:  RSP 
Jan  2 16:05:45 twister kernel: CR2: 00b8
Jan  2 16:05:45 twister kernel: ---[ end trace da76522f0284e9b6 ]---

So the patch referenced above does not help. But I've found a very easy
way to trigger the bug:

- do a "cat /dev/zero > /dev/rfcomm0"
- switch the phone off
- switch the phone on, and the kernel oopses

Gabor

-- 
 -
 MTA SZTAKI Computer and Automation Research Institute
Hungarian Academy of Sciences
 -
--
To unsubscribe from this list: send the line "unsubscribe linux-kernel" in
the body of a message to [EMAIL PROTECTED]
More majordomo info at  http://vger.kernel.org/majordomo-info.html
Please read the FAQ at  http://www.tux.org/lkml/


Re: [Bluez-devel] Oops involving RFCOMM and sysfs

2008-01-02 Thread Gabor Gombas
On Sat, Dec 29, 2007 at 04:07:04PM +0800, Dave Young wrote:

> Please try the -mm tree kernel, might have been fixed by :
> http://lkml.org/lkml/2007/11/18/141

I've applied the patch on top of 2.6.24-rc6 (I don't want to run -mm
kernels on this machine). We'll see what happens.

Gabor

-- 
 -
 MTA SZTAKI Computer and Automation Research Institute
Hungarian Academy of Sciences
 -
--
To unsubscribe from this list: send the line "unsubscribe linux-kernel" in
the body of a message to [EMAIL PROTECTED]
More majordomo info at  http://vger.kernel.org/majordomo-info.html
Please read the FAQ at  http://www.tux.org/lkml/


Re: [Bluez-devel] Oops involving RFCOMM and sysfs

2008-01-02 Thread Gabor Gombas
On Sat, Dec 29, 2007 at 04:07:04PM +0800, Dave Young wrote:

 Please try the -mm tree kernel, might have been fixed by :
 http://lkml.org/lkml/2007/11/18/141

I've applied the patch on top of 2.6.24-rc6 (I don't want to run -mm
kernels on this machine). We'll see what happens.

Gabor

-- 
 -
 MTA SZTAKI Computer and Automation Research Institute
Hungarian Academy of Sciences
 -
--
To unsubscribe from this list: send the line unsubscribe linux-kernel in
the body of a message to [EMAIL PROTECTED]
More majordomo info at  http://vger.kernel.org/majordomo-info.html
Please read the FAQ at  http://www.tux.org/lkml/


Re: [Bluez-devel] Oops involving RFCOMM and sysfs

2008-01-02 Thread Gabor Gombas
On Sat, Dec 29, 2007 at 04:07:04PM +0800, Dave Young wrote:

 Please try the -mm tree kernel, might have been fixed by :
 http://lkml.org/lkml/2007/11/18/141

Heh, it seems talking about a bug makes it trigger:

Jan  2 16:05:45 twister kernel: Unable to handle kernel NULL pointer 
dereference at 00b8 RIP: 
Jan  2 16:05:45 twister kernel:  [804720a5] mutex_lock+0x10/0x1d
Jan  2 16:05:45 twister kernel: PGD bcf6e067 PUD bcee3067 PMD 0 
Jan  2 16:05:45 twister kernel: Oops: 0002 [1] 
Jan  2 16:05:45 twister kernel: CPU 0 
Jan  2 16:05:45 twister kernel: Modules linked in: binfmt_misc rfcomm l2cap 
nfsd auth_rpcgss exportfs ipt_REJECT xt_tcpudp ipt_LOG xt_limit iptable_filter 
ip_tables x_tables nfs lockd nfs_acl sunrpc fuse dm_crypt dm_snapshot dm_mirror 
cpufreq_ondemand saa7134_alsa radeon hwmon_vid eeprom hci_usb bluetooth 
usb_storage tuner tea5767 tda8290 tuner_simple mt20xx tea5761 sg snd_intel8x0 
saa7134 snd_ac97_codec ac97_bus videobuf_dma_sg videobuf_core ir_kbd_i2c sr_mod 
firewire_ohci firewire_core snd_pcm crc_itu_t ir_common ehci_hcd ohci_hcd cdrom 
snd_timer snd_page_alloc parport_pc parport sky2 forcedeth floppy
Jan  2 16:05:45 twister kernel: Pid: 5056, comm: cat Not tainted 
2.6.24-rc6-dirty #3
Jan  2 16:05:45 twister kernel: RIP: 0010:[804720a5]  
[804720a5] mutex_lock+0x10/0x1d
Jan  2 16:05:45 twister kernel: RSP: 0018:8100bce3fd08  EFLAGS: 00010246
Jan  2 16:05:45 twister kernel: RAX:  RBX: 00b8 
RCX: 
Jan  2 16:05:45 twister kernel: RDX: 8100bce3ffd8 RSI: 80641d70 
RDI: 00b8
Jan  2 16:05:45 twister kernel: RBP: 80591db0 R08:  
R09: 000899a2
Jan  2 16:05:45 twister kernel: R10:  R11: 00300018 
R12: 8100bcb8ef50
Jan  2 16:05:45 twister kernel: R13: fff4 R14: 8100bcfc8e00 
R15: 8100a370b300
Jan  2 16:05:45 twister kernel: FS:  2b225d0e56e0() 
GS:805b8000() knlGS:
Jan  2 16:05:45 twister kernel: CS:  0010 DS:  ES:  CR0: 
8005003b
Jan  2 16:05:45 twister kernel: CR2: 00b8 CR3: 95ad1000 
CR4: 06e0
Jan  2 16:05:45 twister kernel: DR0:  DR1:  
DR2: 
Jan  2 16:05:45 twister kernel: DR3:  DR6: 0ff0 
DR7: 0400
Jan  2 16:05:45 twister kernel: Process cat (pid: 5056, threadinfo 
8100bce3e000, task 8100ba931060)
Jan  2 16:05:45 twister kernel: Stack:  e207e2d8 8028 
8100a3387000 802aefa5
Jan  2 16:05:45 twister kernel:  8100bfa8af50 8100bcb8ef50 
8100baab9300 802af1ba
Jan  2 16:05:45 twister kernel:  8100a342a8d0 8100a342a8d0 
8100bfa92dc0 8100baab9300
Jan  2 16:05:45 twister kernel: Call Trace:
Jan  2 16:05:45 twister kernel:  [8028] dput+0x26/0x103
Jan  2 16:05:45 twister kernel:  [802aefa5] sysfs_get_dentry+0x45/0x8f
Jan  2 16:05:45 twister kernel:  [802af1ba] sysfs_move_dir+0x63/0x204
Jan  2 16:05:45 twister kernel:  [803006e5] kobject_move+0xba/0x110
Jan  2 16:05:45 twister kernel:  [80368a00] device_move+0x59/0x111
Jan  2 16:05:45 twister kernel:  [88292425] 
:rfcomm:rfcomm_tty_close+0x2f/0x74
Jan  2 16:05:45 twister kernel:  [803446bf] release_dev+0x212/0x5e2
Jan  2 16:05:45 twister kernel:  [8021b609] do_page_fault+0x2ff/0x65a
Jan  2 16:05:45 twister kernel:  [80344a9b] tty_release+0xc/0x10
Jan  2 16:05:45 twister kernel:  [80276f67] __fput+0xb1/0x16f
Jan  2 16:05:45 twister kernel:  [802748b5] filp_close+0x5d/0x65
Jan  2 16:05:45 twister kernel:  [80275a07] sys_close+0x73/0xa6
Jan  2 16:05:45 twister kernel:  [8020b5fc] tracesys+0xdc/0xe1
Jan  2 16:05:45 twister kernel: 
Jan  2 16:05:45 twister kernel: 
Jan  2 16:05:45 twister kernel: Code: ff 0f 79 05 e8 c9 00 00 00 58 5a 5b c3 41 
54 48 8d 47 08 48 
Jan  2 16:05:45 twister kernel: RIP  [804720a5] mutex_lock+0x10/0x1d
Jan  2 16:05:45 twister kernel:  RSP 8100bce3fd08
Jan  2 16:05:45 twister kernel: CR2: 00b8
Jan  2 16:05:45 twister kernel: ---[ end trace da76522f0284e9b6 ]---

So the patch referenced above does not help. But I've found a very easy
way to trigger the bug:

- do a cat /dev/zero  /dev/rfcomm0
- switch the phone off
- switch the phone on, and the kernel oopses

Gabor

-- 
 -
 MTA SZTAKI Computer and Automation Research Institute
Hungarian Academy of Sciences
 -
--
To unsubscribe from this list: send the line unsubscribe linux-kernel in
the body of a message to [EMAIL PROTECTED]
More majordomo info at  http://vger.kernel.org/majordomo-info.html
Please read the FAQ at  http://www.tux.org/lkml/


Re: [Bluez-devel] Oops involving RFCOMM and sysfs

2007-12-29 Thread Dave Young
On Dec 29, 2007 1:32 AM, Gabor Gombas <[EMAIL PROTECTED]> wrote:
> Hi,
>
> I'm using my phone as a GPRS modem over Bluetooth. Sometimes Bluetooth
> on the phone seems to stall and the phone has to be switched off & on to
> get it back to a sane state. During this I sometimes get the following
> Oops (this is kernel 2.6.24-rc6 on x86_64):
>
> Dec 28 17:52:11 host kernel: Unable to handle kernel NULL pointer dereference 
> at 00b8 RIP:
> Dec 28 17:52:11 host kernel:  [] mutex_lock+0x10/0x1d
> Dec 28 17:52:11 host kernel: PGD 920b0067 PUD a7aba067 PMD 0
> Dec 28 17:52:11 host kernel: Oops: 0002 [1]
> Dec 28 17:52:11 host kernel: CPU 0
> Dec 28 17:52:11 host kernel: Modules linked in: cpufreq_ondemand ppp_deflate 
> zlib_deflate zlib_inflate bsd_comp ppp_async crc_ccitt ppp_generic slhc 
> binfmt_misc rfcomm l2cap nfsd auth_rpcgss exportfs ipt_REJECT xt_tcpudp 
> ipt_LOG xt_limit iptable_filter ip_tables x_tables nfs lockd nfs_acl sunrpc 
> fuse dm_crypt dm_snapshot dm_mirror saa7134_alsa radeon hwmon_vid eeprom 
> hci_usb bluetooth usb_storage tuner tea5767 tda8290 tuner_simple mt20xx 
> tea5761 sg snd_intel8x0 saa7134 firewire_ohci snd_ac97_codec firewire_core 
> videobuf_dma_sg videobuf_core ir_kbd_i2c ac97_bus crc_itu_t ir_common snd_pcm 
> ehci_hcd ohci_hcd parport_pc parport sky2 sr_mod forcedeth snd_timer 
> snd_page_alloc cdrom floppy
> Dec 28 17:52:11 host kernel: Pid: 4381, comm: pppd Not tainted 2.6.24-rc6 #1
> Dec 28 17:52:11 host kernel: RIP: 0010:[]  
> [] mutex_lock+0x10/0x1d
> Dec 28 17:52:11 host kernel: RSP: 0018:81009402fd08  EFLAGS: 00010246
> Dec 28 17:52:11 host kernel: RAX:  RBX: 00b8 RCX: 
> 
> Dec 28 17:52:11 host kernel: RDX: 81009402ffd8 RSI: 80639d70 RDI: 
> 00b8
> Dec 28 17:52:11 host kernel: RBP: 8058adb0 R08: 6390dfaa7e3bd395 R09: 
> 8050597e
> Dec 28 17:52:11 host kernel: R10: 4000 R11: 00300018 R12: 
> 8100b4d75e60
> Dec 28 17:52:11 host kernel: R13: fff4 R14: 8100abe9e180 R15: 
> 8100a7a10900
> Dec 28 17:52:11 host kernel: FS:  2ab1bddc3ac0() 
> GS:805b() knlGS:
> Dec 28 17:52:11 host kernel: CS:  0010 DS:  ES:  CR0: 8005003b
> Dec 28 17:52:11 host kernel: CR2: 00b8 CR3: 9400e000 CR4: 
> 06e0
> Dec 28 17:52:11 host kernel: DR0:  DR1:  DR2: 
> 
> Dec 28 17:52:11 host kernel: DR3:  DR6: 0ff0 DR7: 
> 0400
> Dec 28 17:52:11 host kernel: Process pppd (pid: 4381, threadinfo 
> 81009402e000, task 81009403d060)
> Dec 28 17:52:11 host kernel: Stack:  81009403d060 8028 
> 810093734d48 802aefa5
> Dec 28 17:52:11 host kernel:  8100bf9e1c80 8100b4d75e60 
> 810060720600 802af1ba
> Dec 28 17:52:11 host kernel:  8100b6e5ced0 8100b6e5ced0 
> 8100bfa11c00 810060720600
> Dec 28 17:52:11 host kernel: Call Trace:
> Dec 28 17:52:11 host kernel:  [] dput+0x26/0x103
> Dec 28 17:52:11 host kernel:  [] sysfs_get_dentry+0x45/0x8f
> Dec 28 17:52:11 host kernel:  [] sysfs_move_dir+0x63/0x204
> Dec 28 17:52:11 host kernel:  [] kobject_move+0xba/0x110
> Dec 28 17:52:11 host kernel:  [] device_move+0x59/0x111
> Dec 28 17:52:11 host kernel:  [] 
> :rfcomm:rfcomm_tty_close+0x2f/0x74
> Dec 28 17:52:11 host kernel:  [] release_dev+0x212/0x5e2
> Dec 28 17:52:11 host kernel:  [] mntput_no_expire+0x1f/0x6f
> Dec 28 17:52:11 host kernel:  [] sys_sendto+0x128/0x151
> Dec 28 17:52:11 host kernel:  [] tty_release+0xc/0x10
> Dec 28 17:52:11 host kernel:  [] __fput+0xb1/0x16f
> Dec 28 17:52:11 host kernel:  [] filp_close+0x5d/0x65
> Dec 28 17:52:11 host kernel:  [] sys_close+0x73/0xa6
> Dec 28 17:52:11 host kernel:  [] tracesys+0xdc/0xe1
> Dec 28 17:52:11 host kernel:
> Dec 28 17:52:11 host kernel:
> Dec 28 17:52:11 host kernel: Code: ff 0f 79 05 e8 c9 00 00 00 58 5a 5b c3 41 
> 54 48 8d 47 08 48
> Dec 28 17:52:11 host kernel: RIP  [] mutex_lock+0x10/0x1d
> Dec 28 17:52:11 host kernel:  RSP 
> Dec 28 17:52:11 host kernel: CR2: 00b8
> Dec 28 17:52:11 host kernel: ---[ end trace 12717319afdb56f5 ]---
>
> The bug is also present in 2.6.23.9:
>
> Dec 28 15:30:29 twister kernel: Unable to handle kernel paging request at 
> fffe RIP:
> Dec 28 15:30:29 twister kernel:  [] dput+0xd/0x103
> Dec 28 15:30:29 twister kernel: PGD 203067 PUD 204067 PMD 0
> Dec 28 15:30:29 twister kernel: Oops:  [1]
> Dec 28 15:30:29 twister kernel: CPU 0
> Dec 28 15:30:29 twister kernel: Modules linked in: ppp_deflate zlib_deflate 
> zlib_inflate bsd_comp ppp_async crc_ccitt ppp_generic slhc binfmt_misc rfcomm 
> l2cap nfsd exportfs auth_rpcgss ipt_REJECT xt_tcpudp ipt_LOG xt_limit 
> iptable_filter ip_tables x_tables nfs lockd nfs_acl sunrpc fuse dm_crypt 
> dm_snapshot dm_mirror saa7134_alsa radeon it87 hwmon_vid eeprom hci_usb 
> 

Re: [Bluez-devel] Oops involving RFCOMM and sysfs

2007-12-29 Thread Dave Young
On Dec 29, 2007 1:32 AM, Gabor Gombas [EMAIL PROTECTED] wrote:
 Hi,

 I'm using my phone as a GPRS modem over Bluetooth. Sometimes Bluetooth
 on the phone seems to stall and the phone has to be switched off  on to
 get it back to a sane state. During this I sometimes get the following
 Oops (this is kernel 2.6.24-rc6 on x86_64):

 Dec 28 17:52:11 host kernel: Unable to handle kernel NULL pointer dereference 
 at 00b8 RIP:
 Dec 28 17:52:11 host kernel:  [8046e0d5] mutex_lock+0x10/0x1d
 Dec 28 17:52:11 host kernel: PGD 920b0067 PUD a7aba067 PMD 0
 Dec 28 17:52:11 host kernel: Oops: 0002 [1]
 Dec 28 17:52:11 host kernel: CPU 0
 Dec 28 17:52:11 host kernel: Modules linked in: cpufreq_ondemand ppp_deflate 
 zlib_deflate zlib_inflate bsd_comp ppp_async crc_ccitt ppp_generic slhc 
 binfmt_misc rfcomm l2cap nfsd auth_rpcgss exportfs ipt_REJECT xt_tcpudp 
 ipt_LOG xt_limit iptable_filter ip_tables x_tables nfs lockd nfs_acl sunrpc 
 fuse dm_crypt dm_snapshot dm_mirror saa7134_alsa radeon hwmon_vid eeprom 
 hci_usb bluetooth usb_storage tuner tea5767 tda8290 tuner_simple mt20xx 
 tea5761 sg snd_intel8x0 saa7134 firewire_ohci snd_ac97_codec firewire_core 
 videobuf_dma_sg videobuf_core ir_kbd_i2c ac97_bus crc_itu_t ir_common snd_pcm 
 ehci_hcd ohci_hcd parport_pc parport sky2 sr_mod forcedeth snd_timer 
 snd_page_alloc cdrom floppy
 Dec 28 17:52:11 host kernel: Pid: 4381, comm: pppd Not tainted 2.6.24-rc6 #1
 Dec 28 17:52:11 host kernel: RIP: 0010:[8046e0d5]  
 [8046e0d5] mutex_lock+0x10/0x1d
 Dec 28 17:52:11 host kernel: RSP: 0018:81009402fd08  EFLAGS: 00010246
 Dec 28 17:52:11 host kernel: RAX:  RBX: 00b8 RCX: 
 
 Dec 28 17:52:11 host kernel: RDX: 81009402ffd8 RSI: 80639d70 RDI: 
 00b8
 Dec 28 17:52:11 host kernel: RBP: 8058adb0 R08: 6390dfaa7e3bd395 R09: 
 8050597e
 Dec 28 17:52:11 host kernel: R10: 4000 R11: 00300018 R12: 
 8100b4d75e60
 Dec 28 17:52:11 host kernel: R13: fff4 R14: 8100abe9e180 R15: 
 8100a7a10900
 Dec 28 17:52:11 host kernel: FS:  2ab1bddc3ac0() 
 GS:805b() knlGS:
 Dec 28 17:52:11 host kernel: CS:  0010 DS:  ES:  CR0: 8005003b
 Dec 28 17:52:11 host kernel: CR2: 00b8 CR3: 9400e000 CR4: 
 06e0
 Dec 28 17:52:11 host kernel: DR0:  DR1:  DR2: 
 
 Dec 28 17:52:11 host kernel: DR3:  DR6: 0ff0 DR7: 
 0400
 Dec 28 17:52:11 host kernel: Process pppd (pid: 4381, threadinfo 
 81009402e000, task 81009403d060)
 Dec 28 17:52:11 host kernel: Stack:  81009403d060 8028 
 810093734d48 802aefa5
 Dec 28 17:52:11 host kernel:  8100bf9e1c80 8100b4d75e60 
 810060720600 802af1ba
 Dec 28 17:52:11 host kernel:  8100b6e5ced0 8100b6e5ced0 
 8100bfa11c00 810060720600
 Dec 28 17:52:11 host kernel: Call Trace:
 Dec 28 17:52:11 host kernel:  [8028] dput+0x26/0x103
 Dec 28 17:52:11 host kernel:  [802aefa5] sysfs_get_dentry+0x45/0x8f
 Dec 28 17:52:11 host kernel:  [802af1ba] sysfs_move_dir+0x63/0x204
 Dec 28 17:52:11 host kernel:  [803006e5] kobject_move+0xba/0x110
 Dec 28 17:52:11 host kernel:  [80368640] device_move+0x59/0x111
 Dec 28 17:52:11 host kernel:  [8828841c] 
 :rfcomm:rfcomm_tty_close+0x2f/0x74
 Dec 28 17:52:11 host kernel:  [803442ff] release_dev+0x212/0x5e2
 Dec 28 17:52:11 host kernel:  [80288801] mntput_no_expire+0x1f/0x6f
 Dec 28 17:52:11 host kernel:  [803e845d] sys_sendto+0x128/0x151
 Dec 28 17:52:11 host kernel:  [803446db] tty_release+0xc/0x10
 Dec 28 17:52:11 host kernel:  [80276f67] __fput+0xb1/0x16f
 Dec 28 17:52:11 host kernel:  [802748b5] filp_close+0x5d/0x65
 Dec 28 17:52:11 host kernel:  [80275a07] sys_close+0x73/0xa6
 Dec 28 17:52:11 host kernel:  [8020b5fc] tracesys+0xdc/0xe1
 Dec 28 17:52:11 host kernel:
 Dec 28 17:52:11 host kernel:
 Dec 28 17:52:11 host kernel: Code: ff 0f 79 05 e8 c9 00 00 00 58 5a 5b c3 41 
 54 48 8d 47 08 48
 Dec 28 17:52:11 host kernel: RIP  [8046e0d5] mutex_lock+0x10/0x1d
 Dec 28 17:52:11 host kernel:  RSP 81009402fd08
 Dec 28 17:52:11 host kernel: CR2: 00b8
 Dec 28 17:52:11 host kernel: ---[ end trace 12717319afdb56f5 ]---

 The bug is also present in 2.6.23.9:

 Dec 28 15:30:29 twister kernel: Unable to handle kernel paging request at 
 fffe RIP:
 Dec 28 15:30:29 twister kernel:  [8027ac13] dput+0xd/0x103
 Dec 28 15:30:29 twister kernel: PGD 203067 PUD 204067 PMD 0
 Dec 28 15:30:29 twister kernel: Oops:  [1]
 Dec 28 15:30:29 twister kernel: CPU 0
 Dec 28 15:30:29 twister kernel: Modules linked in: ppp_deflate zlib_deflate 
 zlib_inflate bsd_comp ppp_async crc_ccitt ppp_generic slhc binfmt_misc rfcomm