[PATCH] Bsg referencing parent device

2018-05-16 Thread Anatoliy Glagolev
A follow-up on earlier discussions:
[PATCH] bsg referencing bus driver module
https://www.spinics.net/lists/linux-scsi/msg119631.html
[PATCH] Waiting for scsi_host_template release
https://www.spinics.net/lists/linux-scsi/msg119432.html

All these discussions are attempts to fix a crash after
SCSI transport driver unload if a user-mode process
holds a handle in BSG layer towards the unloaded driver
via SCSI mid-layer:

[16834.636216,07] Call Trace:
 ...   scsi_proc_hostdir_rm
[16834.641944,07]  [] scsi_host_dev_release+0x3f/0x130
[16834.647740,07]  [] device_release+0x32/0xa0
[16834.653423,07]  [] kobject_cleanup+0x77/0x190
[16834.659002,07]  [] kobject_put+0x25/0x50
[16834.664430,07]  [] put_device+0x17/0x20
[16834.669740,07]  [] bsg_kref_release_function+0x24/0x30
[16834.675007,07]  [] bsg_release+0x166/0x1d0
[16834.680148,07]  [] __fput+0xcb/0x1d0
[16834.685156,07]  [] fput+0xe/0x10
[16834.690017,07]  [] task_work_run+0x86/0xb0
[16834.694781,07]  [] exit_to_usermode_loop+0x6b/0x9a
[16834.699466,07]  [] syscall_return_slowpath+0x55/0x60
[16834.704110,07]  [] int_ret_from_sys_call+0x25/0x9f

The latest input from earlier discussions was to cut off access
to the unloaded driver at bsg_unregister_queue time by calling
blk_cleanup_queue. If we do that we still have to release
the reference to the parent device (otherwise we crash with
the same stack). The next logical step is, rather than maintaining
a "part-time" reference to be dropped early, we discard
referencing completely.
Discarding the reference turns out to be the only thing needed
to fix the problem: all transport drivers already do blk_cleanup_queue
before releasing their reference to the parent device.

>From 7eaa9b43f0b99766b1d197044eb4d2e549d11a24 Mon Sep 17 00:00:00 2001
From: Anatoliy Glagolev <glago...@gmail.com>
Date: Wed, 16 May 2018 17:03:09 -0600
Subject: [PATCH] Bsg referencing parent device
Signed-off-by: Anatoliy Glagolev <glago...@gmail.com>

Bsg holding reference to a parent device may result in a crash
if user closes a bsg file handle after the parent device driver
has unloaded.
Holding a reference is not really needed: parent device must exist
between bsg_register_queue and bsg_unregister_queue. Before
the device goes away the caller does blk_cleanup_queue so that
all in-flight requests to the device are gone and all new requests
cannot pass beyond the queue. The queue itself is a refcounted
object and it will stay alive with a bsg file.
---
 block/bsg.c | 37 +
 1 file changed, 17 insertions(+), 20 deletions(-)

diff --git a/block/bsg.c b/block/bsg.c
index defa06c..f9e4b91 100644
--- a/block/bsg.c
+++ b/block/bsg.c
@@ -650,18 +650,6 @@ static struct bsg_device *bsg_alloc_device(void)
return bd;
 }
 
-static void bsg_kref_release_function(struct kref *kref)
-{
-   struct bsg_class_device *bcd =
-   container_of(kref, struct bsg_class_device, ref);
-   struct device *parent = bcd->parent;
-
-   if (bcd->release)
-   bcd->release(bcd->parent);
-
-   put_device(parent);
-}
-
 static int bsg_put_device(struct bsg_device *bd)
 {
int ret = 0, do_free;
@@ -694,7 +682,6 @@ static int bsg_put_device(struct bsg_device *bd)
 
kfree(bd);
 out:
-   kref_put(>bsg_dev.ref, bsg_kref_release_function);
if (do_free)
blk_put_queue(q);
return ret;
@@ -760,8 +747,6 @@ static struct bsg_device *bsg_get_device(struct inode 
*inode, struct file *file)
 */
mutex_lock(_mutex);
bcd = idr_find(_minor_idr, iminor(inode));
-   if (bcd)
-   kref_get(>ref);
mutex_unlock(_mutex);
 
if (!bcd)
@@ -772,8 +757,6 @@ static struct bsg_device *bsg_get_device(struct inode 
*inode, struct file *file)
return bd;
 
bd = bsg_add_device(inode, bcd->queue, file);
-   if (IS_ERR(bd))
-   kref_put(>ref, bsg_kref_release_function);
 
return bd;
 }
@@ -902,6 +885,8 @@ static long bsg_ioctl(struct file *file, unsigned int cmd, 
unsigned long arg)
 
 void bsg_unregister_queue(struct request_queue *q)
 {
+   struct device *parent;
+   void (*release)(struct device *dev);
struct bsg_class_device *bcd = >bsg_dev;
 
if (!bcd->class_dev)
@@ -913,8 +898,21 @@ void bsg_unregister_queue(struct request_queue *q)
sysfs_remove_link(>kobj, "bsg");
device_unregister(bcd->class_dev);
bcd->class_dev = NULL;
-   kref_put(>ref, bsg_kref_release_function);
+   parent = bcd->parent;
+   release = bcd->release;
+   bcd->parent = NULL;
+   bcd->release = NULL;
mutex_unlock(_mutex);
+
+   /*
+* The caller of bsg_[un]register_queue must hold a reference
+* to the parent device between ..register.. and ..unregister..
+* so we do not maintain 

Re: [PATCH] bsg referencing bus driver module

2018-05-01 Thread Anatoliy Glagolev
Any comments on the new patch (which, I think, addresses the concern
about module being stuck in unloadable state forever; if not, there
would be a leak in the bsg layer)? Or on dropping a reference
to bsg_class_device's parent early before the bsg_class_device
itself is gone, to implement James's idea of cutting of the bsg
layer at fc_bsg_remove time?

Thanks.

On Thu, Apr 26, 2018 at 06:01:00PM -0600, Anatoliy Glagolev wrote:
> Any thoughts on this? Can we really drop a reference from a child device
> (bsg_class_device) to a parent device (Scsi_Host) while the child device
> is still around at fc_bsg_remove time?
> 
> If not, please consider a fix with module references. I realized that
> the previous version of the fix had a problem since bsg_open may run
> more often than bsg_release. Sending a newer version... The new fix
> piggybacks on the bsg layer logic allocating/freeing bsg_device structs.
> When all those structs are gone there are no references to Scsi_Host from
> the user-mode side. The only remaining references are from a SCSI bus
> driver (like qla2xxx) itself; it is safe to drop the module reference
> at that time.
> 
> 
> From c744d4fd93578545ad12faa35a3354364793b124 Mon Sep 17 00:00:00 2001
> From: Anatoliy Glagolev <glago...@gmail.com>
> Date: Wed, 25 Apr 2018 19:16:10 -0600
> Subject: [PATCH] bsg referencing parent module
> Signed-off-by: Anatoliy Glagolev <glago...@gmail.com>
> 
> Fixing a bug when bsg layer holds the last reference to device
> when the device's module has been unloaded. Upon dropping the
> reference the device's release function may touch memory of
> the unloaded module.
> ---
>  block/bsg-lib.c  | 24 ++--
>  block/bsg.c  | 22 +-
>  drivers/scsi/scsi_transport_fc.c |  8 ++--
>  include/linux/bsg-lib.h  |  4 
>  include/linux/bsg.h  |  5 +
>  5 files changed, 58 insertions(+), 5 deletions(-)
> 
> diff --git a/block/bsg-lib.c b/block/bsg-lib.c
> index fc2e5ff..bb11786 100644
> --- a/block/bsg-lib.c
> +++ b/block/bsg-lib.c
> @@ -309,6 +309,25 @@ struct request_queue *bsg_setup_queue(struct device 
> *dev, const char *name,
>   bsg_job_fn *job_fn, int dd_job_size,
>   void (*release)(struct device *))
>  {
> + return bsg_setup_queue_ex(dev, name, job_fn, dd_job_size, release,
> + NULL);
> +}
> +EXPORT_SYMBOL_GPL(bsg_setup_queue);
> +
> +/**
> + * bsg_setup_queue_ex - Create and add the bsg hooks so we can receive 
> requests
> + * @dev: device to attach bsg device to
> + * @name: device to give bsg device
> + * @job_fn: bsg job handler
> + * @dd_job_size: size of LLD data needed for each job
> + * @release: @dev release function
> + * @dev_module: @dev's module
> + */
> +struct request_queue *bsg_setup_queue_ex(struct device *dev, const char 
> *name,
> + bsg_job_fn *job_fn, int dd_job_size,
> + void (*release)(struct device *),
> + struct module *dev_module)
> +{
>   struct request_queue *q;
>   int ret;
>  
> @@ -331,7 +350,8 @@ struct request_queue *bsg_setup_queue(struct device *dev, 
> const char *name,
>   blk_queue_softirq_done(q, bsg_softirq_done);
>   blk_queue_rq_timeout(q, BLK_DEFAULT_SG_TIMEOUT);
>  
> - ret = bsg_register_queue(q, dev, name, _transport_ops, release);
> + ret = bsg_register_queue_ex(q, dev, name, _transport_ops, release,
> + dev_module);
>   if (ret) {
>   printk(KERN_ERR "%s: bsg interface failed to "
>  "initialize - register queue\n", dev->kobj.name);
> @@ -343,4 +363,4 @@ struct request_queue *bsg_setup_queue(struct device *dev, 
> const char *name,
>   blk_cleanup_queue(q);
>   return ERR_PTR(ret);
>  }
> -EXPORT_SYMBOL_GPL(bsg_setup_queue);
> +EXPORT_SYMBOL_GPL(bsg_setup_queue_ex);
> diff --git a/block/bsg.c b/block/bsg.c
> index defa06c..950cd31 100644
> --- a/block/bsg.c
> +++ b/block/bsg.c
> @@ -666,6 +666,7 @@ static int bsg_put_device(struct bsg_device *bd)
>  {
>   int ret = 0, do_free;
>   struct request_queue *q = bd->queue;
> + struct module *parent_module = q->bsg_dev.parent_module;
>  
>   mutex_lock(_mutex);
>  
> @@ -695,8 +696,11 @@ static int bsg_put_device(struct bsg_device *bd)
>   kfree(bd);
>  out:
>   kref_put(>bsg_dev.ref, bsg_kref_release_function);
> - if (do_free)
> + if (do_free) {
>   blk_put_queue(q);
> + if (parent_module)
> + module_put(parent_module);
> + }
>   return ret;
>  }
>  
> @@ -706

Re: [PATCH] bsg referencing bus driver module

2018-04-26 Thread Anatoliy Glagolev
Any thoughts on this? Can we really drop a reference from a child device
(bsg_class_device) to a parent device (Scsi_Host) while the child device
is still around at fc_bsg_remove time?

If not, please consider a fix with module references. I realized that
the previous version of the fix had a problem since bsg_open may run
more often than bsg_release. Sending a newer version... The new fix
piggybacks on the bsg layer logic allocating/freeing bsg_device structs.
When all those structs are gone there are no references to Scsi_Host from
the user-mode side. The only remaining references are from a SCSI bus
driver (like qla2xxx) itself; it is safe to drop the module reference
at that time.


>From c744d4fd93578545ad12faa35a3354364793b124 Mon Sep 17 00:00:00 2001
From: Anatoliy Glagolev <glago...@gmail.com>
Date: Wed, 25 Apr 2018 19:16:10 -0600
Subject: [PATCH] bsg referencing parent module
Signed-off-by: Anatoliy Glagolev <glago...@gmail.com>

Fixing a bug when bsg layer holds the last reference to device
when the device's module has been unloaded. Upon dropping the
reference the device's release function may touch memory of
the unloaded module.
---
 block/bsg-lib.c  | 24 ++--
 block/bsg.c  | 22 +-
 drivers/scsi/scsi_transport_fc.c |  8 ++--
 include/linux/bsg-lib.h  |  4 
 include/linux/bsg.h  |  5 +
 5 files changed, 58 insertions(+), 5 deletions(-)

diff --git a/block/bsg-lib.c b/block/bsg-lib.c
index fc2e5ff..bb11786 100644
--- a/block/bsg-lib.c
+++ b/block/bsg-lib.c
@@ -309,6 +309,25 @@ struct request_queue *bsg_setup_queue(struct device *dev, 
const char *name,
bsg_job_fn *job_fn, int dd_job_size,
void (*release)(struct device *))
 {
+   return bsg_setup_queue_ex(dev, name, job_fn, dd_job_size, release,
+   NULL);
+}
+EXPORT_SYMBOL_GPL(bsg_setup_queue);
+
+/**
+ * bsg_setup_queue_ex - Create and add the bsg hooks so we can receive requests
+ * @dev: device to attach bsg device to
+ * @name: device to give bsg device
+ * @job_fn: bsg job handler
+ * @dd_job_size: size of LLD data needed for each job
+ * @release: @dev release function
+ * @dev_module: @dev's module
+ */
+struct request_queue *bsg_setup_queue_ex(struct device *dev, const char *name,
+   bsg_job_fn *job_fn, int dd_job_size,
+   void (*release)(struct device *),
+   struct module *dev_module)
+{
struct request_queue *q;
int ret;
 
@@ -331,7 +350,8 @@ struct request_queue *bsg_setup_queue(struct device *dev, 
const char *name,
blk_queue_softirq_done(q, bsg_softirq_done);
blk_queue_rq_timeout(q, BLK_DEFAULT_SG_TIMEOUT);
 
-   ret = bsg_register_queue(q, dev, name, _transport_ops, release);
+   ret = bsg_register_queue_ex(q, dev, name, _transport_ops, release,
+   dev_module);
if (ret) {
printk(KERN_ERR "%s: bsg interface failed to "
   "initialize - register queue\n", dev->kobj.name);
@@ -343,4 +363,4 @@ struct request_queue *bsg_setup_queue(struct device *dev, 
const char *name,
blk_cleanup_queue(q);
return ERR_PTR(ret);
 }
-EXPORT_SYMBOL_GPL(bsg_setup_queue);
+EXPORT_SYMBOL_GPL(bsg_setup_queue_ex);
diff --git a/block/bsg.c b/block/bsg.c
index defa06c..950cd31 100644
--- a/block/bsg.c
+++ b/block/bsg.c
@@ -666,6 +666,7 @@ static int bsg_put_device(struct bsg_device *bd)
 {
int ret = 0, do_free;
struct request_queue *q = bd->queue;
+   struct module *parent_module = q->bsg_dev.parent_module;
 
mutex_lock(_mutex);
 
@@ -695,8 +696,11 @@ static int bsg_put_device(struct bsg_device *bd)
kfree(bd);
 out:
kref_put(>bsg_dev.ref, bsg_kref_release_function);
-   if (do_free)
+   if (do_free) {
blk_put_queue(q);
+   if (parent_module)
+   module_put(parent_module);
+   }
return ret;
 }
 
@@ -706,12 +710,19 @@ static struct bsg_device *bsg_add_device(struct inode 
*inode,
 {
struct bsg_device *bd;
unsigned char buf[32];
+   struct module *parent_module = rq->bsg_dev.parent_module;
 
if (!blk_get_queue(rq))
return ERR_PTR(-ENXIO);
 
+   if (parent_module) {
+   if (!try_module_get(parent_module))
+   return ERR_PTR(-ENODEV);
+   }
bd = bsg_alloc_device();
if (!bd) {
+   if (parent_module)
+   module_put(parent_module);
blk_put_queue(rq);
return ERR_PTR(-ENOMEM);
}
@@ -922,6 +933,14 @@ int bsg_register_queue(struct request_queue *q, struct 
device *parent,
const char *name, const struct bsg_ops *ops,
void (*release)(struct device *))
 {
+   return bsg_register_queue_ex(q, parent, name, op

Re: [PATCH] bsg referencing bus driver module

2018-04-23 Thread Anatoliy Glagolev
Thanks, James. The idea of cutting communications with Scsi_Host at
bsg_unregister_queue(..) time and leaving bsg_class_device to
its own fate makes a lot of sense, conceptually. But there are
implementation issues that are difficult to work around.

bsg.c creates bsg_class_device and takes a reference to Scsi_Host
at bsg_register_queue(..) time. The reference is dropped at
bsg_class_device's release(..) function. If the driver implementing
Scsi_Host template is not around we crash.
We could move the reference drop from bsg_class_device's release(..)
function to bsg_unregister_queue(..). That would be a small change in
bsg.c. But bsg.c sets Scsi_Host as the parent of bsg_class_device's
device. We cannot have a device around with a dangling parent.
A device's parent cannot be changed dynamically. Not setting
the device's parent at creation may affect software relying
on bsg_class_device - Scsi_Host child-parent relations.

It looks like I am out of options. Do you have suggestions on
how to work around Scsi_Host being bsg_class_device's parent?



Re: [PATCH] bsg referencing bus driver module

2018-04-20 Thread Anatoliy Glagolev
 
> This patch isn't applyable because your mailer has changed all the tabs
> to spaces.
> 
> I also think there's no need to do it this way.  I think what we need
> is for fc_bsg_remove() to wait until the bsg queue is drained.  It does
> look like the author thought this happened otherwise the code wouldn't
> have the note.  If we fix it that way we can do the same thing in all
> the other transport classes that use bsg (which all have a similar
> issue).
> 
> James
> 

Thanks, James. Sorry about the tabs; re-sending.

On fc_bsg_remove()...: are you suggesting to implement the whole fix
in scsi_transport_fc.c? That would be nice, but I do not see how that
is possible. Even with the queue drained bsg still holds a reference
to the Scsi_Host via bsg_class_device; bsg_class_device itself is
referenced on bsg_open and kept around while a user-mode process keeps
a handle to bsg.
Even if we somehow implement the waiting the call may be stuck
forever if the user-mode process keeps the handle.
I think handling it via a rererence to the module is more consistent
with the way things are done in Linux. You suggested the approach
youself back in "Waiting for scsi_host_template release" discussion.


>From df939b80d02bf37b21efaaef8ede86cfd39b0cb8 Mon Sep 17 00:00:00 2001
From: Anatoliy Glagolev <glago...@gmail.com>
Date: Thu, 19 Apr 2018 15:06:06 -0600
Subject: [PATCH] bsg referencing parent module

Fixing a bug when bsg layer holds the last reference to device
when the device's module has been unloaded. Upon dropping the
reference the device's release function may touch memory of
the unloaded module.
---
 block/bsg-lib.c  | 24 ++--
 block/bsg.c  | 29 ++---
 drivers/scsi/scsi_transport_fc.c |  8 ++--
 include/linux/bsg-lib.h  |  4 
 include/linux/bsg.h  |  5 +
 5 files changed, 63 insertions(+), 7 deletions(-)

diff --git a/block/bsg-lib.c b/block/bsg-lib.c
index fc2e5ff..90c28fd 100644
--- a/block/bsg-lib.c
+++ b/block/bsg-lib.c
@@ -309,6 +309,25 @@ struct request_queue *bsg_setup_queue(struct device *dev, 
const char *name,
bsg_job_fn *job_fn, int dd_job_size,
void (*release)(struct device *))
 {
+   return bsg_setup_queue_ex(dev, name, job_fn, dd_job_size, release,
+   NULL);
+}
+EXPORT_SYMBOL_GPL(bsg_setup_queue);
+
+/**
+ * bsg_setup_queue - Create and add the bsg hooks so we can receive requests
+ * @dev: device to attach bsg device to
+ * @name: device to give bsg device
+ * @job_fn: bsg job handler
+ * @dd_job_size: size of LLD data needed for each job
+ * @release: @dev release function
+ * @dev_module: @dev's module
+ */
+struct request_queue *bsg_setup_queue_ex(struct device *dev, const char *name,
+   bsg_job_fn *job_fn, int dd_job_size,
+   void (*release)(struct device *),
+   struct module *dev_module)
+{
struct request_queue *q;
int ret;
 
@@ -331,7 +350,8 @@ struct request_queue *bsg_setup_queue(struct device *dev, 
const char *name,
blk_queue_softirq_done(q, bsg_softirq_done);
blk_queue_rq_timeout(q, BLK_DEFAULT_SG_TIMEOUT);
 
-   ret = bsg_register_queue(q, dev, name, _transport_ops, release);
+   ret = bsg_register_queue_ex(q, dev, name, _transport_ops, release,
+   dev_module);
if (ret) {
printk(KERN_ERR "%s: bsg interface failed to "
   "initialize - register queue\n", dev->kobj.name);
@@ -343,4 +363,4 @@ struct request_queue *bsg_setup_queue(struct device *dev, 
const char *name,
blk_cleanup_queue(q);
return ERR_PTR(ret);
 }
-EXPORT_SYMBOL_GPL(bsg_setup_queue);
+EXPORT_SYMBOL_GPL(bsg_setup_queue_ex);
diff --git a/block/bsg.c b/block/bsg.c
index defa06c..6920c5b 100644
--- a/block/bsg.c
+++ b/block/bsg.c
@@ -750,7 +750,8 @@ static struct bsg_device *__bsg_get_device(int minor, 
struct request_queue *q)
return bd;
 }
 
-static struct bsg_device *bsg_get_device(struct inode *inode, struct file 
*file)
+static struct bsg_device *bsg_get_device(struct inode *inode, struct file 
*file,
+   struct bsg_class_device **pbcd)
 {
struct bsg_device *bd;
struct bsg_class_device *bcd;
@@ -766,6 +767,7 @@ static struct bsg_device *bsg_get_device(struct inode 
*inode, struct file *file)
 
if (!bcd)
return ERR_PTR(-ENODEV);
+   *pbcd = bcd;
 
bd = __bsg_get_device(iminor(inode), bcd->queue);
if (bd)
@@ -781,22 +783,34 @@ static struct bsg_device *bsg_get_device(struct inode 
*inode, struct file *file)
 static int bsg_open(struct inode *inode, struct file *file)
 {
struct bsg_device *bd;
+   struct bsg_class_device *bcd;
 
-   bd = bsg_get_device(inode, file);
+   bd = bsg_get_device(inode, file, );
 
if (IS_ERR(bd))
 

Re: [PATCH] bsg referencing bus driver module

2018-04-19 Thread Anatoliy Glagolev
Updated: rebased on recent Linux, cc-ed maintainers per instructions
in MAINTAINERS file

>From df939b80d02bf37b21efaaef8ede86cfd39b0cb8 Mon Sep 17 00:00:00 2001
From: Anatoliy Glagolev <aglago...@purestorage.com>
Date: Thu, 19 Apr 2018 15:06:06 -0600
Subject: [PATCH] bsg referencing parent module

Fixing a bug when bsg layer holds the last reference to device
when the device's module has been unloaded. Upon dropping the
reference the device's release function may touch memory of
the unloaded module.
---
 block/bsg-lib.c  | 24 ++--
 block/bsg.c  | 29 ++---
 drivers/scsi/scsi_transport_fc.c |  8 ++--
 include/linux/bsg-lib.h  |  4 
 include/linux/bsg.h  |  5 +
 5 files changed, 63 insertions(+), 7 deletions(-)

diff --git a/block/bsg-lib.c b/block/bsg-lib.c
index fc2e5ff..90c28fd 100644
--- a/block/bsg-lib.c
+++ b/block/bsg-lib.c
@@ -309,6 +309,25 @@ struct request_queue *bsg_setup_queue(struct
device *dev, const char *name,
  bsg_job_fn *job_fn, int dd_job_size,
  void (*release)(struct device *))
 {
+ return bsg_setup_queue_ex(dev, name, job_fn, dd_job_size, release,
+ NULL);
+}
+EXPORT_SYMBOL_GPL(bsg_setup_queue);
+
+/**
+ * bsg_setup_queue - Create and add the bsg hooks so we can receive requests
+ * @dev: device to attach bsg device to
+ * @name: device to give bsg device
+ * @job_fn: bsg job handler
+ * @dd_job_size: size of LLD data needed for each job
+ * @release: @dev release function
+ * @dev_module: @dev's module
+ */
+struct request_queue *bsg_setup_queue_ex(struct device *dev, const char *name,
+ bsg_job_fn *job_fn, int dd_job_size,
+ void (*release)(struct device *),
+ struct module *dev_module)
+{
  struct request_queue *q;
  int ret;

@@ -331,7 +350,8 @@ struct request_queue *bsg_setup_queue(struct
device *dev, const char *name,
  blk_queue_softirq_done(q, bsg_softirq_done);
  blk_queue_rq_timeout(q, BLK_DEFAULT_SG_TIMEOUT);

- ret = bsg_register_queue(q, dev, name, _transport_ops, release);
+ ret = bsg_register_queue_ex(q, dev, name, _transport_ops, release,
+ dev_module);
  if (ret) {
  printk(KERN_ERR "%s: bsg interface failed to "
"initialize - register queue\n", dev->kobj.name);
@@ -343,4 +363,4 @@ struct request_queue *bsg_setup_queue(struct
device *dev, const char *name,
  blk_cleanup_queue(q);
  return ERR_PTR(ret);
 }
-EXPORT_SYMBOL_GPL(bsg_setup_queue);
+EXPORT_SYMBOL_GPL(bsg_setup_queue_ex);
diff --git a/block/bsg.c b/block/bsg.c
index defa06c..6920c5b 100644
--- a/block/bsg.c
+++ b/block/bsg.c
@@ -750,7 +750,8 @@ static struct bsg_device *__bsg_get_device(int
minor, struct request_queue *q)
  return bd;
 }

-static struct bsg_device *bsg_get_device(struct inode *inode, struct
file *file)
+static struct bsg_device *bsg_get_device(struct inode *inode, struct
file *file,
+ struct bsg_class_device **pbcd)
 {
  struct bsg_device *bd;
  struct bsg_class_device *bcd;
@@ -766,6 +767,7 @@ static struct bsg_device *bsg_get_device(struct
inode *inode, struct file *file)

  if (!bcd)
  return ERR_PTR(-ENODEV);
+ *pbcd = bcd;

  bd = __bsg_get_device(iminor(inode), bcd->queue);
  if (bd)
@@ -781,22 +783,34 @@ static struct bsg_device *bsg_get_device(struct
inode *inode, struct file *file)
 static int bsg_open(struct inode *inode, struct file *file)
 {
  struct bsg_device *bd;
+ struct bsg_class_device *bcd;

- bd = bsg_get_device(inode, file);
+ bd = bsg_get_device(inode, file, );

  if (IS_ERR(bd))
  return PTR_ERR(bd);

  file->private_data = bd;
+ if (bcd->parent_module) {
+ if (!try_module_get(bcd->parent_module)) {
+ bsg_put_device(bd);
+ return -ENODEV;
+ }
+ }
  return 0;
 }

 static int bsg_release(struct inode *inode, struct file *file)
 {
+ int ret;
  struct bsg_device *bd = file->private_data;
+ struct module *parent_module = bd->queue->bsg_dev.parent_module;

  file->private_data = NULL;
- return bsg_put_device(bd);
+ ret = bsg_put_device(bd);
+ if (parent_module)
+ module_put(parent_module);
+ return ret;
 }

 static __poll_t bsg_poll(struct file *file, poll_table *wait)
@@ -922,6 +936,14 @@ int bsg_register_queue(struct request_queue *q,
struct device *parent,
  const char *name, const struct bsg_ops *ops,
  void (*release)(struct device *))
 {
+ return bsg_register_queue_ex(q, parent, name, ops, release, NULL);
+}
+
+int bsg_register_queue_ex(struct request_queue *q, struct device *parent,
+ const char *name, const struct bsg_ops *ops,
+ void (*release)(struct device *),
+ struct module *parent_module)
+{
  struct bsg_class_device *bcd;
  dev_t dev;
  int ret;
@@ -958,6 +980,7 @@ int bsg_register_queue(struct request_queue *q,
struct device *parent,
  bcd->parent = get_device(parent);
  bcd->release = release;
  bcd->ops = ops;
+ bcd->parent_module = parent_module;
  kref_init(>ref);
  dev = MKDEV(bsg_major, bcd->minor);
  class_dev = device_create(bsg_class, parent, dev, NULL, &q

Re: [PATCH] bsg referencing bus driver module

2018-04-19 Thread Anatoliy Glagolev
+linux-block

On Tue, Apr 17, 2018 at 1:05 PM, Anatoliy Glagolev <glago...@gmail.com> wrote:
> Description: bsg_release may crash while decrementing reference to the
> parent device with the following stack:
>
> [16834.636216,07] Call Trace:
>  ...   scsi_proc_hostdir_rm
> [16834.641944,07]  [] scsi_host_dev_release+0x3f/0x130
> [16834.647740,07]  [] device_release+0x32/0xa0
> [16834.653423,07]  [] kobject_cleanup+0x77/0x190
> [16834.659002,07]  [] kobject_put+0x25/0x50
> [16834.664430,07]  [] put_device+0x17/0x20
> [16834.669740,07]  [] bsg_kref_release_function+0x24/0x30
> [16834.675007,07]  [] bsg_release+0x166/0x1d0
> [16834.680148,07]  [] __fput+0xcb/0x1d0
> [16834.685156,07]  [] fput+0xe/0x10
> [16834.690017,07]  [] task_work_run+0x86/0xb0
> [16834.694781,07]  [] exit_to_usermode_loop+0x6b/0x9a
> [16834.699466,07]  [] syscall_return_slowpath+0x55/0x60
> [16834.704110,07]  [] int_ret_from_sys_call+0x25/0x9f
>
> if the parent driver implementing the device has unloaded. To address
> the problem, taking a reference to the parent driver module.
>
> Note: this is a follow-up to earlier discussion "[PATCH] Waiting for
> scsi_host_template release".
>
> ---
>  block/bsg.c  | 31 +++
>  drivers/scsi/scsi_transport_fc.c |  3 ++-
>  include/linux/bsg.h  |  5 +
>  3 files changed, 34 insertions(+), 5 deletions(-)
>
> diff --git a/block/bsg.c b/block/bsg.c
> index b9a5361..0aa7d74 100644
> --- a/block/bsg.c
> +++ b/block/bsg.c
> @@ -798,7 +798,8 @@ found:
>   return bd;
>  }
>
> -static struct bsg_device *bsg_get_device(struct inode *inode, struct
> file *file)
> +static struct bsg_device *bsg_get_device(struct inode *inode, struct
> file *file,
> + struct bsg_class_device **pbcd)
>  {
>   struct bsg_device *bd;
>   struct bsg_class_device *bcd;
> @@ -814,6 +815,7 @@ static struct bsg_device *bsg_get_device(struct
> inode *inode, struct file *file)
>
>   if (!bcd)
>   return ERR_PTR(-ENODEV);
> + *pbcd = bcd;
>
>   bd = __bsg_get_device(iminor(inode), bcd->queue);
>   if (bd)
> @@ -829,22 +831,34 @@ static struct bsg_device *bsg_get_device(struct
> inode *inode, struct file *file)
>  static int bsg_open(struct inode *inode, struct file *file)
>  {
>   struct bsg_device *bd;
> + struct bsg_class_device *bcd;
>
> - bd = bsg_get_device(inode, file);
> + bd = bsg_get_device(inode, file, );
>
>   if (IS_ERR(bd))
>   return PTR_ERR(bd);
>
>   file->private_data = bd;
> + if (bcd->parent_module) {
> + if (!try_module_get(bcd->parent_module)) {
> + bsg_put_device(bd);
> + return -ENODEV;
> + }
> + }
>   return 0;
>  }
>
>  static int bsg_release(struct inode *inode, struct file *file)
>  {
> + int ret;
>   struct bsg_device *bd = file->private_data;
> + struct module *parent_module = bd->queue->bsg_dev.parent_module;
>
>   file->private_data = NULL;
> - return bsg_put_device(bd);
> + ret = bsg_put_device(bd);
> + if (parent_module)
> + module_put(parent_module);
> + return ret;
>  }
>
>  static unsigned int bsg_poll(struct file *file, poll_table *wait)
> @@ -977,6 +991,14 @@ EXPORT_SYMBOL_GPL(bsg_unregister_queue);
>  int bsg_register_queue(struct request_queue *q, struct device *parent,
> const char *name, void (*release)(struct device *))
>  {
> + return bsg_register_queue_ex(q, parent, name, release, NULL);
> +}
> +EXPORT_SYMBOL_GPL(bsg_register_queue);
> +
> +int bsg_register_queue_ex(struct request_queue *q, struct device *parent,
> +  const char *name, void (*release)(struct device *),
> +  struct module *parent_module)
> +{
>   struct bsg_class_device *bcd;
>   dev_t dev;
>   int ret;
> @@ -1012,6 +1034,7 @@ int bsg_register_queue(struct request_queue *q,
> struct device *parent,
>   bcd->queue = q;
>   bcd->parent = get_device(parent);
>   bcd->release = release;
> + bcd->parent_module = parent_module;
>   kref_init(>ref);
>   dev = MKDEV(bsg_major, bcd->minor);
>   class_dev = device_create(bsg_class, parent, dev, NULL, "%s", devname);
> @@ -1039,7 +1062,7 @@ unlock:
>   mutex_unlock(_mutex);
>   return ret;
>  }
> -EXPORT_SYMBOL_GPL(bsg_register_queue);
> +EXPORT_SYMBOL_GPL(bsg_register_queue_ex);
>
>  static struct cdev bsg_cdev;
>
> diff --git a/drivers/scsi/scsi_transport_fc.c 
> b/drivers/scsi/scsi_transport_fc.c
> index 24eaaf6..c153f80 100644
> --- a/drivers/scsi/scsi_transport_fc.c
> +++ b/drivers/scsi/scsi_transport_fc.c
> @@ -4064,7 +4064,8 @@ fc_bsg_hostadd(struct Scsi_Host *shost, struct
> fc_host_attrs *fc_host)
>   

Re: [PATCH] Waiting for scsi_host_template release

2018-04-18 Thread Anatoliy Glagolev
Sent out a new patch with "bsg referencing bus driver module" in subj.

On Wed, Apr 11, 2018 at 4:37 PM, Anatoliy Glagolev <glago...@gmail.com> wrote:
> On "what was the actual error": it is deref of an invalid address, not
> NULL. Attaching crash dump analysis for the reference.
>
> On module reference count: good point. I decided against it at first,
> but I can reconsider. "modprobe -r qla2xxx" will fail if there is an
> extra reference to the module, and the module_exit function will not
> even run, right? Waiting for references to go away would be more
> convenient for me. But I can see why the module reference count is a
> better approach in general. I can work around and retry "modprobe -r
> qla2xxx" multiple times in my scripts.
>
> I think that it is still a SCSI mid-layer job to do the references.
> There is no way qla2xxx can reference itself and then dereference at
> the right time.
>
> qla2xxx (or any other driver) provides a pointer to its module in
> scsi_host_template when it requests Scsi_Host creation. As far as I
> can see, no one ever takes a reference on that module. SCSI mid-layer
> just relies on the module to be around. Scsi_Host is a device itself;
> that is the device that is referenced on open/close from user mode,
> and not the bus driver that triggered the Scsi_Host creation.
>
> SCSI mid layer taking a reference on the template's module at
> Scsi_Host creation in scsi_host_alloc(..) and dropping it in
> scsi_host_dev_release (called when the last reference to Scsi_Host is
> gone) will not work. Assuming that the module_exit function does not
> run at an attempt to unload a referenced module, qla2xxx's Scsi_Host-s
> corresponding to the adapter's ports will stay forever.
>
> Let me think more about it; the idea is to intercept open/close at
> Scsi_Host and increment/decrement module reference at that time.
>
> Thanks a lot for the input!
>
>
> On Wed, Apr 11, 2018 at 1:12 PM, James Bottomley
> <james.bottom...@hansenpartnership.com> wrote:
>> On Wed, 2018-04-11 at 12:22 -0700, Anatoliy Glagolev wrote:
>>> Hannes, James, thanks a lot for taking a look!
>>>
>>> On the problem the patch is solving: it is in the "Description" part
>>> of my initial e-mail. If you agree that a Scsi_Host may be around
>>> after a driver has unloaded, the problem applies to any driver
>>> creating a new Scsi_Host.
>>
>> No, I don't agree: as I said, the template is part of the module and
>> the module should be reference counted.  Any use after free of the
>> template means there's a refcounting bug somewhere.
>>
>>>  I fixed it in qla2xxx to illustrate the usage of the new function
>>> and scsi_host_template's flag; also, qla2xxx is where I actually
>>> observe crashes. Other drivers may do the same if they want to
>>> address the problem.
>>>
>>> Here are details on the qla2xxx crash repro, if that is what you were
>>> asking about. If I run "qaucli" utility that retrieves some info from
>>> the driver via SCSI mid-layer, and unload the driver in parallel, the
>>> kernel crashes with the following stack:
>>>
>>> [16834.636216,07] Call Trace:
>>>   ...
>>> scsi_proc_hostdir_rm
>>> [16834.641944,07]  []
>>> scsi_host_dev_release+0x3f/0x130
>>> [16834.647740,07]  [] device_release+0x32/0xa0
>>> [16834.653423,07]  [] kobject_cleanup+0x77/0x190
>>> [16834.659002,07]  [] kobject_put+0x25/0x50
>>> [16834.664430,07]  [] put_device+0x17/0x20
>>> [16834.669740,07]  []
>>> bsg_kref_release_function+0x24/0x30
>>> [16834.675007,07]  [] bsg_release+0x166/0x1d0
>>> [16834.680148,07]  [] __fput+0xcb/0x1d0
>>> [16834.685156,07]  [] fput+0xe/0x10
>>> [16834.690017,07]  [] task_work_run+0x86/0xb0
>>> [16834.694781,07]  []
>>> exit_to_usermode_loop+0x6b/0x9a
>>> [16834.699466,07]  []
>>> syscall_return_slowpath+0x55/0x60
>>> [16834.704110,07]  []
>>> int_ret_from_sys_call+0x25/0x9f
>>
>> This one's a bit baffling: open of the bsg device should have already
>> taken the module reference.  What was the actual error: NULL deref?
>>
>> The thing which is supposed to hold the module is the device open/close
>> which does scsi_device_put on sd_release ... unless this is some sort
>> of non-scsi device and qlogic forgot how to refcount?
>>
>>> On refcount for scsi_host_template: valid point, I did consider it.
>>> Existing drivers allocate scsi_host_template statically. We cannot
>>> change them all at once. So we have to allow 2 ways of allocating
>>> scsi_host_template: the dynamic one with refcounts and the static one
>>> for legacy driver support. That is kind of ugly, too. In addition,
>>> having a refcounted scsi_host_template after driver unload is
>>> confusing: the memory of scsi_host_template is OK, but any attempt to
>>> call a method from the template still causes a crash.
>>
>> No, the static template already is part of the module so it should be
>> refcounted as a module reference.
>>
>> James
>>


[PATCH] bsg referencing bus driver module

2018-04-17 Thread Anatoliy Glagolev
Description: bsg_release may crash while decrementing reference to the
parent device with the following stack:

[16834.636216,07] Call Trace:
 ...   scsi_proc_hostdir_rm
[16834.641944,07]  [] scsi_host_dev_release+0x3f/0x130
[16834.647740,07]  [] device_release+0x32/0xa0
[16834.653423,07]  [] kobject_cleanup+0x77/0x190
[16834.659002,07]  [] kobject_put+0x25/0x50
[16834.664430,07]  [] put_device+0x17/0x20
[16834.669740,07]  [] bsg_kref_release_function+0x24/0x30
[16834.675007,07]  [] bsg_release+0x166/0x1d0
[16834.680148,07]  [] __fput+0xcb/0x1d0
[16834.685156,07]  [] fput+0xe/0x10
[16834.690017,07]  [] task_work_run+0x86/0xb0
[16834.694781,07]  [] exit_to_usermode_loop+0x6b/0x9a
[16834.699466,07]  [] syscall_return_slowpath+0x55/0x60
[16834.704110,07]  [] int_ret_from_sys_call+0x25/0x9f

if the parent driver implementing the device has unloaded. To address
the problem, taking a reference to the parent driver module.

Note: this is a follow-up to earlier discussion "[PATCH] Waiting for
scsi_host_template release".

---
 block/bsg.c  | 31 +++
 drivers/scsi/scsi_transport_fc.c |  3 ++-
 include/linux/bsg.h  |  5 +
 3 files changed, 34 insertions(+), 5 deletions(-)

diff --git a/block/bsg.c b/block/bsg.c
index b9a5361..0aa7d74 100644
--- a/block/bsg.c
+++ b/block/bsg.c
@@ -798,7 +798,8 @@ found:
  return bd;
 }

-static struct bsg_device *bsg_get_device(struct inode *inode, struct
file *file)
+static struct bsg_device *bsg_get_device(struct inode *inode, struct
file *file,
+ struct bsg_class_device **pbcd)
 {
  struct bsg_device *bd;
  struct bsg_class_device *bcd;
@@ -814,6 +815,7 @@ static struct bsg_device *bsg_get_device(struct
inode *inode, struct file *file)

  if (!bcd)
  return ERR_PTR(-ENODEV);
+ *pbcd = bcd;

  bd = __bsg_get_device(iminor(inode), bcd->queue);
  if (bd)
@@ -829,22 +831,34 @@ static struct bsg_device *bsg_get_device(struct
inode *inode, struct file *file)
 static int bsg_open(struct inode *inode, struct file *file)
 {
  struct bsg_device *bd;
+ struct bsg_class_device *bcd;

- bd = bsg_get_device(inode, file);
+ bd = bsg_get_device(inode, file, );

  if (IS_ERR(bd))
  return PTR_ERR(bd);

  file->private_data = bd;
+ if (bcd->parent_module) {
+ if (!try_module_get(bcd->parent_module)) {
+ bsg_put_device(bd);
+ return -ENODEV;
+ }
+ }
  return 0;
 }

 static int bsg_release(struct inode *inode, struct file *file)
 {
+ int ret;
  struct bsg_device *bd = file->private_data;
+ struct module *parent_module = bd->queue->bsg_dev.parent_module;

  file->private_data = NULL;
- return bsg_put_device(bd);
+ ret = bsg_put_device(bd);
+ if (parent_module)
+ module_put(parent_module);
+ return ret;
 }

 static unsigned int bsg_poll(struct file *file, poll_table *wait)
@@ -977,6 +991,14 @@ EXPORT_SYMBOL_GPL(bsg_unregister_queue);
 int bsg_register_queue(struct request_queue *q, struct device *parent,
const char *name, void (*release)(struct device *))
 {
+ return bsg_register_queue_ex(q, parent, name, release, NULL);
+}
+EXPORT_SYMBOL_GPL(bsg_register_queue);
+
+int bsg_register_queue_ex(struct request_queue *q, struct device *parent,
+  const char *name, void (*release)(struct device *),
+  struct module *parent_module)
+{
  struct bsg_class_device *bcd;
  dev_t dev;
  int ret;
@@ -1012,6 +1034,7 @@ int bsg_register_queue(struct request_queue *q,
struct device *parent,
  bcd->queue = q;
  bcd->parent = get_device(parent);
  bcd->release = release;
+ bcd->parent_module = parent_module;
  kref_init(>ref);
  dev = MKDEV(bsg_major, bcd->minor);
  class_dev = device_create(bsg_class, parent, dev, NULL, "%s", devname);
@@ -1039,7 +1062,7 @@ unlock:
  mutex_unlock(_mutex);
  return ret;
 }
-EXPORT_SYMBOL_GPL(bsg_register_queue);
+EXPORT_SYMBOL_GPL(bsg_register_queue_ex);

 static struct cdev bsg_cdev;

diff --git a/drivers/scsi/scsi_transport_fc.c b/drivers/scsi/scsi_transport_fc.c
index 24eaaf6..c153f80 100644
--- a/drivers/scsi/scsi_transport_fc.c
+++ b/drivers/scsi/scsi_transport_fc.c
@@ -4064,7 +4064,8 @@ fc_bsg_hostadd(struct Scsi_Host *shost, struct
fc_host_attrs *fc_host)
  blk_queue_rq_timed_out(q, fc_bsg_job_timeout);
  blk_queue_rq_timeout(q, FC_DEFAULT_BSG_TIMEOUT);

- err = bsg_register_queue(q, dev, bsg_name, NULL);
+ err = bsg_register_queue_ex(q, dev, bsg_name, NULL,
+ shost->hostt->module);
  if (err) {
  printk(KERN_ERR "fc_host%d: bsg interface failed to "
  "initialize - register queue\n",
diff --git a/include/linux/bsg.h b/include/linux/bsg.h
index 7173f6e..fe41e83 100644
--- a/include/linux/bsg.h
+++ b/include/linux/bsg.h
@@ -12,11 +12,16 @@ struct bsg_class_device {
  struct request_queue *queue;
  struct kref ref;
  void (*release)(struct device *);
+ struct module *parent_module;
 };

 extern int bsg_register_queue(struct request_queue *q,
   struct device *parent, const char *name,
   void (*release)(struct device *));
+extern int 

Re: [PATCH] Waiting for scsi_host_template release

2018-04-11 Thread Anatoliy Glagolev
On "what was the actual error": it is deref of an invalid address, not
NULL. Attaching crash dump analysis for the reference.

On module reference count: good point. I decided against it at first,
but I can reconsider. "modprobe -r qla2xxx" will fail if there is an
extra reference to the module, and the module_exit function will not
even run, right? Waiting for references to go away would be more
convenient for me. But I can see why the module reference count is a
better approach in general. I can work around and retry "modprobe -r
qla2xxx" multiple times in my scripts.

I think that it is still a SCSI mid-layer job to do the references.
There is no way qla2xxx can reference itself and then dereference at
the right time.

qla2xxx (or any other driver) provides a pointer to its module in
scsi_host_template when it requests Scsi_Host creation. As far as I
can see, no one ever takes a reference on that module. SCSI mid-layer
just relies on the module to be around. Scsi_Host is a device itself;
that is the device that is referenced on open/close from user mode,
and not the bus driver that triggered the Scsi_Host creation.

SCSI mid layer taking a reference on the template's module at
Scsi_Host creation in scsi_host_alloc(..) and dropping it in
scsi_host_dev_release (called when the last reference to Scsi_Host is
gone) will not work. Assuming that the module_exit function does not
run at an attempt to unload a referenced module, qla2xxx's Scsi_Host-s
corresponding to the adapter's ports will stay forever.

Let me think more about it; the idea is to intercept open/close at
Scsi_Host and increment/decrement module reference at that time.

Thanks a lot for the input!


On Wed, Apr 11, 2018 at 1:12 PM, James Bottomley
<james.bottom...@hansenpartnership.com> wrote:
> On Wed, 2018-04-11 at 12:22 -0700, Anatoliy Glagolev wrote:
>> Hannes, James, thanks a lot for taking a look!
>>
>> On the problem the patch is solving: it is in the "Description" part
>> of my initial e-mail. If you agree that a Scsi_Host may be around
>> after a driver has unloaded, the problem applies to any driver
>> creating a new Scsi_Host.
>
> No, I don't agree: as I said, the template is part of the module and
> the module should be reference counted.  Any use after free of the
> template means there's a refcounting bug somewhere.
>
>>  I fixed it in qla2xxx to illustrate the usage of the new function
>> and scsi_host_template's flag; also, qla2xxx is where I actually
>> observe crashes. Other drivers may do the same if they want to
>> address the problem.
>>
>> Here are details on the qla2xxx crash repro, if that is what you were
>> asking about. If I run "qaucli" utility that retrieves some info from
>> the driver via SCSI mid-layer, and unload the driver in parallel, the
>> kernel crashes with the following stack:
>>
>> [16834.636216,07] Call Trace:
>>   ...
>> scsi_proc_hostdir_rm
>> [16834.641944,07]  []
>> scsi_host_dev_release+0x3f/0x130
>> [16834.647740,07]  [] device_release+0x32/0xa0
>> [16834.653423,07]  [] kobject_cleanup+0x77/0x190
>> [16834.659002,07]  [] kobject_put+0x25/0x50
>> [16834.664430,07]  [] put_device+0x17/0x20
>> [16834.669740,07]  []
>> bsg_kref_release_function+0x24/0x30
>> [16834.675007,07]  [] bsg_release+0x166/0x1d0
>> [16834.680148,07]  [] __fput+0xcb/0x1d0
>> [16834.685156,07]  [] fput+0xe/0x10
>> [16834.690017,07]  [] task_work_run+0x86/0xb0
>> [16834.694781,07]  []
>> exit_to_usermode_loop+0x6b/0x9a
>> [16834.699466,07]  []
>> syscall_return_slowpath+0x55/0x60
>> [16834.704110,07]  []
>> int_ret_from_sys_call+0x25/0x9f
>
> This one's a bit baffling: open of the bsg device should have already
> taken the module reference.  What was the actual error: NULL deref?
>
> The thing which is supposed to hold the module is the device open/close
> which does scsi_device_put on sd_release ... unless this is some sort
> of non-scsi device and qlogic forgot how to refcount?
>
>> On refcount for scsi_host_template: valid point, I did consider it.
>> Existing drivers allocate scsi_host_template statically. We cannot
>> change them all at once. So we have to allow 2 ways of allocating
>> scsi_host_template: the dynamic one with refcounts and the static one
>> for legacy driver support. That is kind of ugly, too. In addition,
>> having a refcounted scsi_host_template after driver unload is
>> confusing: the memory of scsi_host_template is OK, but any attempt to
>> call a method from the template still causes a crash.
>
> No, the static template already is part of the module so it should be
>

Re: [PATCH] Waiting for scsi_host_template release

2018-04-11 Thread Anatoliy Glagolev
Hannes, James, thanks a lot for taking a look!

On the problem the patch is solving: it is in the "Description" part
of my initial e-mail. If you agree that a Scsi_Host may be around
after a driver has unloaded, the problem applies to any driver
creating a new Scsi_Host. I fixed it in qla2xxx to illustrate the
usage of the new function and scsi_host_template's flag; also, qla2xxx
is where I actually observe crashes. Other drivers may do the same if
they want to address the problem.

Here are details on the qla2xxx crash repro, if that is what you were
asking about. If I run "qaucli" utility that retrieves some info from
the driver via SCSI mid-layer, and unload the driver in parallel, the
kernel crashes with the following stack:

[16834.636216,07] Call Trace:
  ...
scsi_proc_hostdir_rm
[16834.641944,07]  [] scsi_host_dev_release+0x3f/0x130
[16834.647740,07]  [] device_release+0x32/0xa0
[16834.653423,07]  [] kobject_cleanup+0x77/0x190
[16834.659002,07]  [] kobject_put+0x25/0x50
[16834.664430,07]  [] put_device+0x17/0x20
[16834.669740,07]  [] bsg_kref_release_function+0x24/0x30
[16834.675007,07]  [] bsg_release+0x166/0x1d0
[16834.680148,07]  [] __fput+0xcb/0x1d0
[16834.685156,07]  [] fput+0xe/0x10
[16834.690017,07]  [] task_work_run+0x86/0xb0
[16834.694781,07]  [] exit_to_usermode_loop+0x6b/0x9a
[16834.699466,07]  [] syscall_return_slowpath+0x55/0x60
[16834.704110,07]  [] int_ret_from_sys_call+0x25/0x9f

On refcount for scsi_host_template: valid point, I did consider it.
Existing drivers allocate scsi_host_template statically. We cannot
change them all at once. So we have to allow 2 ways of allocating
scsi_host_template: the dynamic one with refcounts and the static one
for legacy driver support. That is kind of ugly, too. In addition,
having a refcounted scsi_host_template after driver unload is
confusing: the memory of scsi_host_template is OK, but any attempt to
call a method from the template still causes a crash.

The advantage of this patch is low risk and relative simplicity. The
patch does not change the behavior of existing drivers interacting
with SCSI mid-layer unless they set the new flag and call the new
function.

Conceptually nicer, but more risky and more complex change is to fix
scsi_remove_host. SCSI mid-layer must not not touch Scsi_Host's
template after a driver calls scsi_remove_host for that Scsi_Host.


On Wed, Apr 11, 2018 at 7:39 AM, James Bottomley
<james.bottom...@hansenpartnership.com> wrote:
> On Wed, 2018-04-11 at 16:11 +0200, Hannes Reinecke wrote:
>> On Mon, 9 Apr 2018 23:23:51 -0700
>> Anatoliy Glagolev <glago...@gmail.com> wrote:
>>
>> > Description:
>> > SCSI mid-layer may hold references to Scsi_Host structs when
>> > the owning module has already unloaded. Scsi_Host release path
>> > touches scsi_host_template struct that is usually allocated
>> > in the unloaded module's memory. That results in a crash.
>> > To work around the problem, this change implements
>> > scsi_host_template_release API to be called at driver unload
>> > path to make sure all Scsi_Host structs are gone before
>> > releasing scsi_host_template memory.
>> >
>> > ---
>> >  drivers/scsi/hosts.c  |  2 ++
>> >  drivers/scsi/qla2xxx/qla_os.c |  2 ++
>> >  drivers/scsi/scsi_priv.h  |  1 +
>> >  drivers/scsi/scsi_proc.c  | 64
>> > +++
>> > include/scsi/scsi_host.h  | 17  5 files changed, 80
>> > insertions(+), 6 deletions(-)
>> >
>>
>> Whee, that is ugly.
>
> And what's the actual problem it's solving?  It looks to be something
> in qla2xxx module removal?
>
>> Any particular reason why we can't do refcounting here?
>
> We can ... the template is module data and any reference to the dev or
> the host will increment the module reference.  We could even have a
> dummy template reference that only incremented the module refcount.
> However, knowing what to do involves knowing what the problem is and
> how it is triggered.
>
> James
>


[PATCH] Waiting for scsi_host_template release

2018-04-10 Thread Anatoliy Glagolev
Description:
SCSI mid-layer may hold references to Scsi_Host structs when
the owning module has already unloaded. Scsi_Host release path
touches scsi_host_template struct that is usually allocated
in the unloaded module's memory. That results in a crash.
To work around the problem, this change implements
scsi_host_template_release API to be called at driver unload
path to make sure all Scsi_Host structs are gone before
releasing scsi_host_template memory.

---
 drivers/scsi/hosts.c  |  2 ++
 drivers/scsi/qla2xxx/qla_os.c |  2 ++
 drivers/scsi/scsi_priv.h  |  1 +
 drivers/scsi/scsi_proc.c  | 64 +++
 include/scsi/scsi_host.h  | 17 
 5 files changed, 80 insertions(+), 6 deletions(-)

diff --git a/drivers/scsi/hosts.c b/drivers/scsi/hosts.c
index 82ac1cd..51c9e2b 100644
--- a/drivers/scsi/hosts.c
+++ b/drivers/scsi/hosts.c
@@ -353,6 +353,8 @@ static void scsi_host_dev_release(struct device *dev)
  blk_free_tags(shost->bqt);
  }

+ scsi_host_template_release(shost->hostt);
+
  kfree(shost->shost_data);

  if (parent)
diff --git a/drivers/scsi/qla2xxx/qla_os.c b/drivers/scsi/qla2xxx/qla_os.c
index fb8beee..e913b8b 100644
--- a/drivers/scsi/qla2xxx/qla_os.c
+++ b/drivers/scsi/qla2xxx/qla_os.c
@@ -270,6 +270,7 @@ struct scsi_host_template qla2xxx_driver_template = {

  .supported_mode = MODE_INITIATOR,
  .track_queue_depth = 1,
+ .wait_on_unload = 1,
 };

 static struct scsi_transport_template *qla2xxx_transport_template = NULL;
@@ -6116,6 +6117,7 @@ qla2x00_module_exit(void)
  kmem_cache_destroy(ctx_cachep);
  fc_release_transport(qla2xxx_transport_template);
  fc_release_transport(qla2xxx_transport_vport_template);
+ scsi_host_template_wait(_driver_template);
 }

 module_init(qla2x00_module_init);
diff --git a/drivers/scsi/scsi_priv.h b/drivers/scsi/scsi_priv.h
index 4d01cdb..c76cb6e2 100644
--- a/drivers/scsi/scsi_priv.h
+++ b/drivers/scsi/scsi_priv.h
@@ -99,6 +99,7 @@ extern struct kmem_cache *scsi_sdb_cache;
 #ifdef CONFIG_SCSI_PROC_FS
 extern void scsi_proc_hostdir_add(struct scsi_host_template *);
 extern void scsi_proc_hostdir_rm(struct scsi_host_template *);
+extern void scsi_host_template_release(struct scsi_host_template *);
 extern void scsi_proc_host_add(struct Scsi_Host *);
 extern void scsi_proc_host_rm(struct Scsi_Host *);
 extern int scsi_init_procfs(void);
diff --git a/drivers/scsi/scsi_proc.c b/drivers/scsi/scsi_proc.c
index 251598e..daf2c99 100644
--- a/drivers/scsi/scsi_proc.c
+++ b/drivers/scsi/scsi_proc.c
@@ -99,15 +99,21 @@ static const struct file_operations proc_scsi_fops = {

 void scsi_proc_hostdir_add(struct scsi_host_template *sht)
 {
- if (!sht->show_info)
+ if (!sht->show_info && !sht->wait_on_unload)
  return;

  mutex_lock(_host_template_mutex);
  if (!sht->present++) {
- sht->proc_dir = proc_mkdir(sht->proc_name, proc_scsi);
-if (!sht->proc_dir)
- printk(KERN_ERR "%s: proc_mkdir failed for %s\n",
-__func__, sht->proc_name);
+ if (sht->show_info) {
+ sht->proc_dir = proc_mkdir(sht->proc_name, proc_scsi);
+ if (!sht->proc_dir)
+ printk(KERN_ERR "%s: proc_mkdir failed for %s\n",
+__func__, sht->proc_name);
+ }
+ if (sht->wait_on_unload) {
+ sema_init(>release_sem, 0);
+ sht->release_sem_waiters = 0;
+ }
  }
  mutex_unlock(_host_template_mutex);
 }
@@ -118,7 +124,7 @@ void scsi_proc_hostdir_add(struct scsi_host_template *sht)
  */
 void scsi_proc_hostdir_rm(struct scsi_host_template *sht)
 {
- if (!sht->show_info)
+ if (!sht->show_info && !sht->wait_on_unload)
  return;

  mutex_lock(_host_template_mutex);
@@ -126,9 +132,55 @@ void scsi_proc_hostdir_rm(struct scsi_host_template *sht)
  remove_proc_entry(sht->proc_name, proc_scsi);
  sht->proc_dir = NULL;
  }
+ if (sht->wait_on_unload)
+ ++sht->scsi_host_cleanups;
  mutex_unlock(_host_template_mutex);
 }

+/**
+ * scsi_host_template_release - runs when a user of scsi_host_template
+ * (like Scsi_Host) is gone.
+ * @sht: pointer to scsi_host_template
+*/
+void scsi_host_template_release(struct scsi_host_template *sht)
+{
+ int release_sem_waiters = 0;
+ struct semaphore *release_sem;
+ if (!sht->wait_on_unload)
+ return;
+
+ mutex_lock(_host_template_mutex);
+ if (!--sht->scsi_host_cleanups && !sht->present) {
+ release_sem = >release_sem;
+ release_sem_waiters = sht->release_sem_waiters;
+ sht->release_sem_waiters = 0;
+ }
+ mutex_unlock(_host_template_mutex);
+ for (; release_sem_waiters; --release_sem_waiters)
+ up(release_sem);
+}
+
+/**
+ * scsi_host_template_wait - Waits till all references to
+ * scsi_host_template are gone
+ * @sht: pointer to scsi_host_template
+*/
+void scsi_host_template_wait(struct scsi_host_template *sht)
+{
+ unsigned char present = 0;
+ struct semaphore *release_sem;
+ if (!sht->wait_on_unload)
+ return;
+ mutex_lock(_host_template_mutex);
+ present = sht->present + sht->scsi_host_cleanups;
+ if (present) {
+ ++sht->release_sem_waiters;
+ release_sem = >release_sem;
+ }
+