[PATCH v2 3/3] bfq: Add per-device weight

2019-08-05 Thread Fam Zheng
Signed-off-by: Fam Zheng 
---
 block/bfq-cgroup.c  | 95 ++---
 block/bfq-iosched.h |  3 ++
 2 files changed, 87 insertions(+), 11 deletions(-)

diff --git a/block/bfq-cgroup.c b/block/bfq-cgroup.c
index 28e5a9241237..de4fd8b725aa 100644
--- a/block/bfq-cgroup.c
+++ b/block/bfq-cgroup.c
@@ -904,7 +904,7 @@ void bfq_end_wr_async(struct bfq_data *bfqd)
bfq_end_wr_async_queues(bfqd, bfqd->root_group);
 }
 
-static int bfq_io_show_weight(struct seq_file *sf, void *v)
+static int bfq_io_show_weight_legacy(struct seq_file *sf, void *v)
 {
struct blkcg *blkcg = css_to_blkcg(seq_css(sf));
struct bfq_group_data *bfqgd = blkcg_to_bfqgd(blkcg);
@@ -918,8 +918,32 @@ static int bfq_io_show_weight(struct seq_file *sf, void *v)
return 0;
 }
 
-static void bfq_group_set_weight(struct bfq_group *bfqg, u64 weight)
+static u64 bfqg_prfill_weight_device(struct seq_file *sf,
+struct blkg_policy_data *pd, int off)
+{
+   struct bfq_group *bfqg = pd_to_bfqg(pd);
+
+   if (!bfqg->entity.dev_weight)
+   return 0;
+   return __blkg_prfill_u64(sf, pd, bfqg->entity.dev_weight);
+}
+
+static int bfq_io_show_weight(struct seq_file *sf, void *v)
+{
+   struct blkcg *blkcg = css_to_blkcg(seq_css(sf));
+   struct bfq_group_data *bfqgd = blkcg_to_bfqgd(blkcg);
+
+   seq_printf(sf, "default %u\n", bfqgd->weight);
+   blkcg_print_blkgs(sf, blkcg, bfqg_prfill_weight_device,
+ _policy_bfq, 0, false);
+   return 0;
+}
+
+static void bfq_group_set_weight(struct bfq_group *bfqg, u64 weight, u64 
dev_weight)
 {
+   weight = dev_weight ?: weight;
+
+   bfqg->entity.dev_weight = dev_weight;
/*
 * Setting the prio_changed flag of the entity
 * to 1 with new_weight == weight would re-set
@@ -967,28 +991,71 @@ static int bfq_io_set_weight_legacy(struct 
cgroup_subsys_state *css,
struct bfq_group *bfqg = blkg_to_bfqg(blkg);
 
if (bfqg)
-   bfq_group_set_weight(bfqg, val);
+   bfq_group_set_weight(bfqg, val, 0);
}
spin_unlock_irq(>lock);
 
return ret;
 }
 
-static ssize_t bfq_io_set_weight(struct kernfs_open_file *of,
-char *buf, size_t nbytes,
-loff_t off)
+static ssize_t bfq_io_set_device_weight(struct kernfs_open_file *of,
+   char *buf, size_t nbytes,
+   loff_t off)
 {
-   u64 weight;
-   /* First unsigned long found in the file is used */
-   int ret = kstrtoull(strim(buf), 0, );
+   int ret;
+   struct blkg_conf_ctx ctx;
+   struct blkcg *blkcg = css_to_blkcg(of_css(of));
+   struct bfq_group *bfqg;
+   u64 v;
 
+   ret = blkg_conf_prep(blkcg, _policy_bfq, buf, );
if (ret)
return ret;
 
-   ret = bfq_io_set_weight_legacy(of_css(of), NULL, weight);
+   if (sscanf(ctx.body, "%llu", ) == 1) {
+   /* require "default" on dfl */
+   ret = -ERANGE;
+   if (!v)
+   goto out;
+   } else if (!strcmp(strim(ctx.body), "default")) {
+   v = 0;
+   } else {
+   ret = -EINVAL;
+   goto out;
+   }
+
+   bfqg = blkg_to_bfqg(ctx.blkg);
+
+   ret = -ERANGE;
+   if (!v || (v >= BFQ_MIN_WEIGHT && v <= BFQ_MAX_WEIGHT)) {
+   bfq_group_set_weight(bfqg, bfqg->entity.weight, v);
+   ret = 0;
+   }
+out:
+   blkg_conf_finish();
return ret ?: nbytes;
 }
 
+static ssize_t bfq_io_set_weight(struct kernfs_open_file *of,
+char *buf, size_t nbytes,
+loff_t off)
+{
+   char *endp;
+   int ret;
+   u64 v;
+
+   buf = strim(buf);
+
+   /* "WEIGHT" or "default WEIGHT" sets the default weight */
+   v = simple_strtoull(buf, , 0);
+   if (*endp == '\0' || sscanf(buf, "default %llu", ) == 1) {
+   ret = bfq_io_set_weight_legacy(of_css(of), NULL, v);
+   return ret ?: nbytes;
+   }
+
+   return bfq_io_set_device_weight(of, buf, nbytes, off);
+}
+
 #ifdef CONFIG_BFQ_CGROUP_DEBUG
 static int bfqg_print_stat(struct seq_file *sf, void *v)
 {
@@ -1145,9 +1212,15 @@ struct cftype bfq_blkcg_legacy_files[] = {
{
.name = "bfq.weight",
.flags = CFTYPE_NOT_ON_ROOT,
-   .seq_show = bfq_io_show_weight,
+   .seq_show = bfq_io_show_weight_legacy,
.write_u64 = bfq_io_set_weight_legacy,
},
+   {
+   .name = "bfq.weight_device",
+   .flags = CFTYPE_NOT_ON_ROOT,
+   .seq_sho

[PATCH v2 0/3] Implement BFQ per-device weight interface

2019-08-05 Thread Fam Zheng
(Revision starting from v2 since v1 was used off-list)

Hi Paolo and others,

This adds to BFQ the missing per-device weight interfaces:
blkio.bfq.weight_device on legacy and io.bfq.weight on unified. The
implementation pretty closely resembles what we had in CFQ and the parsing code
is basically reused.

Tests
=

Using two cgroups and three block devices, having weights setup as:

Cgroup  test1   test2

default 100 500
sda 500 100
sdb default default
sdc 200 200

cgroup v1 runs
--

sda.test1.out:   READ: bw=913MiB/s
sda.test2.out:   READ: bw=183MiB/s

sdb.test1.out:   READ: bw=213MiB/s
sdb.test2.out:   READ: bw=1054MiB/s

sdc.test1.out:   READ: bw=650MiB/s
sdc.test2.out:   READ: bw=650MiB/s

cgroup v2 runs
--

sda.test1.out:   READ: bw=915MiB/s
sda.test2.out:   READ: bw=184MiB/s

sdb.test1.out:   READ: bw=216MiB/s
sdb.test2.out:   READ: bw=1069MiB/s

sdc.test1.out:   READ: bw=621MiB/s
sdc.test2.out:   READ: bw=622MiB/s

Fam Zheng (3):
  bfq: Fix the missing barrier in __bfq_entity_update_weight_prio
  bfq: Extract bfq_group_set_weight from bfq_io_set_weight_legacy
  bfq: Add per-device weight

 block/bfq-cgroup.c  | 151 +++-
 block/bfq-iosched.h |   3 ++
 block/bfq-wf2q.c|   2 +
 3 files changed, 119 insertions(+), 37 deletions(-)

-- 
2.11.0



Re: [External Email] Re: [PATCH 1/2] virtio-mmio: Process vrings more proactively

2019-07-21 Thread Fam Zheng



On 7/19/19 11:17 PM, Michael S. Tsirkin wrote:

On Fri, Jul 19, 2019 at 09:31:34PM +0800, Fei Li wrote:

From: Fam Zheng 

This allows the backend to _not_ trap mmio read of the status register
after injecting IRQ in the data path, which can improve the performance
significantly by avoiding a vmexit for each interrupt.

More importantly it also makes it possible for Firecracker to hook up
virtio-mmio with vhost-net, in which case there isn't a way to implement
proper status register handling.

For a complete backend that does set either INT_CONFIG bit or INT_VRING
bit upon generating irq, what happens hasn't changed.

Signed-off-by: Fam Zheng 

This has a side effect of skipping vring callbacks
if they trigger at the same time with a config
interrupt.
I don't see why this is safe.


Good point! I think the block can be moved out from the else block and 
run unconditionally then.


Fam






---
  drivers/virtio/virtio_mmio.c | 4 +---
  1 file changed, 1 insertion(+), 3 deletions(-)

diff --git a/drivers/virtio/virtio_mmio.c b/drivers/virtio/virtio_mmio.c
index e09edb5c5e06..9b42502b2204 100644
--- a/drivers/virtio/virtio_mmio.c
+++ b/drivers/virtio/virtio_mmio.c
@@ -295,9 +295,7 @@ static irqreturn_t vm_interrupt(int irq, void *opaque)
if (unlikely(status & VIRTIO_MMIO_INT_CONFIG)) {
virtio_config_changed(_dev->vdev);
ret = IRQ_HANDLED;
-   }
-
-   if (likely(status & VIRTIO_MMIO_INT_VRING)) {
+   } else {
spin_lock_irqsave(_dev->lock, flags);
list_for_each_entry(info, _dev->virtqueues, node)
ret |= vring_interrupt(irq, info->vq);
--
2.11.0


Re: [PATCH 4/9] nvme/pci: use the NVME_CTRL_SUSPENDED state

2019-03-19 Thread Fam Zheng
On Tue, 03/19 16:41, Maxim Levitsky wrote:
> When enteriing low power state, the nvme

Typo: "entering".

> driver will now inform the core with the NVME_CTRL_SUSPENDED state
> which will allow mdev driver to act on this information

[snip]

Fam



[PATCH] devpts: Use simple_dentry_operations

2019-02-26 Thread Fam Zheng
With this, the pty dentry is dropped once we d_delete it. This makes
sense since in devpts_pty_new we always create a new dentry with
d_alloc_name().  Previously, without providing a .op_delete, we would
end up leaving garbage in dentry_hashtable.

As a matter of fact, the d_hash of the garbage are likely to collide
because pty numebers are allocated as mall integers. Therefore, a few
chains in the hashtable get polluted more easily and path lookups
hitting them get slowed down.  In a long running system which has an
application that frequently creates and destroys ptys, those chains can
get very long (thousands or millions of unused dentry), which seriously
hurts d_alloc_parallel() in which there is a seq based retry logic.
Specifically, we have seen a system that has accumulated 7 million pty
dentries (for /dev/pts/5) in a single chain, in dentry_hashtable. Some
other system monitoring progresses keep scanning /proc to get process
information, and occasionally a certain /proc/$pid path lookup made by
tem hits that long chain, making d_alloc_parallel stalled. In this case,
soft lockups are observed.

To emulate such a workload, do this:

while echo > /dev/ptmx; true; done

In a few minutes the system will have a very unbalanced
dentry_hashtable.

This patch fixes the above issue.

Signed-off-by: Fam Zheng 
---
 fs/devpts/inode.c | 1 +
 1 file changed, 1 insertion(+)

diff --git a/fs/devpts/inode.c b/fs/devpts/inode.c
index c53814539070..ce43301f1219 100644
--- a/fs/devpts/inode.c
+++ b/fs/devpts/inode.c
@@ -456,6 +456,7 @@ devpts_fill_super(struct super_block *s, void *data, int 
silent)
s->s_magic = DEVPTS_SUPER_MAGIC;
s->s_op = _sops;
s->s_time_gran = 1;
+   s->s_d_op = _dentry_operations;
 
error = -ENOMEM;
s->s_fs_info = new_pts_fs_info(s);
-- 
2.11.0



Re: [PATCH] virtio_scsi: always read VPD pages for multiqueue too

2017-07-05 Thread Fam Zheng
On Wed, 07/05 10:30, Paolo Bonzini wrote:
> Multi-queue virtio-scsi uses a different scsi_host_template struct.
> Add the .device_alloc field there, too.
> 
> Fixes: 25d1d50e23275e141e3a3fe06c25a99f4c4bf4e0
> Cc: sta...@vger.kernel.org
> Cc: David Gibson <da...@gibson.dropbear.id.au>
> Signed-off-by: Paolo Bonzini <pbonz...@redhat.com>
> ---
>  drivers/scsi/virtio_scsi.c | 1 +
>  1 file changed, 1 insertion(+)
> 
> diff --git a/drivers/scsi/virtio_scsi.c b/drivers/scsi/virtio_scsi.c
> index f8dbfeee6c63..ad1e7f1aba4c 100644
> --- a/drivers/scsi/virtio_scsi.c
> +++ b/drivers/scsi/virtio_scsi.c
> @@ -826,6 +826,7 @@ static int virtscsi_map_queues(struct Scsi_Host *shost)
>   .change_queue_depth = virtscsi_change_queue_depth,
>   .eh_abort_handler = virtscsi_abort,
>   .eh_device_reset_handler = virtscsi_device_reset,
> + .slave_alloc = virtscsi_device_alloc,
>  
>   .can_queue = 1024,
>   .dma_boundary = UINT_MAX,
> -- 
> 1.8.3.1
> 

Reviewed-by: Fam Zheng <f...@redhat.com>


Re: [PATCH] virtio_scsi: always read VPD pages for multiqueue too

2017-07-05 Thread Fam Zheng
On Wed, 07/05 10:30, Paolo Bonzini wrote:
> Multi-queue virtio-scsi uses a different scsi_host_template struct.
> Add the .device_alloc field there, too.
> 
> Fixes: 25d1d50e23275e141e3a3fe06c25a99f4c4bf4e0
> Cc: sta...@vger.kernel.org
> Cc: David Gibson 
> Signed-off-by: Paolo Bonzini 
> ---
>  drivers/scsi/virtio_scsi.c | 1 +
>  1 file changed, 1 insertion(+)
> 
> diff --git a/drivers/scsi/virtio_scsi.c b/drivers/scsi/virtio_scsi.c
> index f8dbfeee6c63..ad1e7f1aba4c 100644
> --- a/drivers/scsi/virtio_scsi.c
> +++ b/drivers/scsi/virtio_scsi.c
> @@ -826,6 +826,7 @@ static int virtscsi_map_queues(struct Scsi_Host *shost)
>   .change_queue_depth = virtscsi_change_queue_depth,
>   .eh_abort_handler = virtscsi_abort,
>   .eh_device_reset_handler = virtscsi_device_reset,
> + .slave_alloc = virtscsi_device_alloc,
>  
>   .can_queue = 1024,
>   .dma_boundary = UINT_MAX,
> -- 
> 1.8.3.1
> 

Reviewed-by: Fam Zheng 


[PATCH] sd: Explicit type cast to fix calculating rw_max

2017-04-06 Thread Fam Zheng
Some compilers don't like BLK_DEF_MAX_SECTORS being an enum (int) when
expanding min_not_zero. Cast it to sector_t so it matches the type of
the other operand, logical_to_sectors().

Signed-off-by: Fam Zheng <f...@redhat.com>
---
 drivers/scsi/sd.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/drivers/scsi/sd.c b/drivers/scsi/sd.c
index ab9011a..8d2315a 100644
--- a/drivers/scsi/sd.c
+++ b/drivers/scsi/sd.c
@@ -2957,7 +2957,7 @@ static int sd_revalidate_disk(struct gendisk *disk)
rw_max = logical_to_sectors(sdp, sdkp->opt_xfer_blocks);
} else
rw_max = min_not_zero(logical_to_sectors(sdp, dev_max),
- BLK_DEF_MAX_SECTORS);
+ (sector_t)BLK_DEF_MAX_SECTORS);
 
/* Combine with controller limits */
q->limits.max_sectors = min(rw_max, queue_max_hw_sectors(q));
-- 
2.9.3



[PATCH] sd: Explicit type cast to fix calculating rw_max

2017-04-06 Thread Fam Zheng
Some compilers don't like BLK_DEF_MAX_SECTORS being an enum (int) when
expanding min_not_zero. Cast it to sector_t so it matches the type of
the other operand, logical_to_sectors().

Signed-off-by: Fam Zheng 
---
 drivers/scsi/sd.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/drivers/scsi/sd.c b/drivers/scsi/sd.c
index ab9011a..8d2315a 100644
--- a/drivers/scsi/sd.c
+++ b/drivers/scsi/sd.c
@@ -2957,7 +2957,7 @@ static int sd_revalidate_disk(struct gendisk *disk)
rw_max = logical_to_sectors(sdp, sdkp->opt_xfer_blocks);
} else
rw_max = min_not_zero(logical_to_sectors(sdp, dev_max),
- BLK_DEF_MAX_SECTORS);
+ (sector_t)BLK_DEF_MAX_SECTORS);
 
/* Combine with controller limits */
q->limits.max_sectors = min(rw_max, queue_max_hw_sectors(q));
-- 
2.9.3



Re: linux-next: build warning after merge of the scsi tree

2017-04-06 Thread Fam Zheng
On Thu, 04/06 14:04, Stephen Rothwell wrote:
> Hi James,
> 
> After merging the scsi tree, today's linux-next build (arm
> multi_v7_defconfig) produced this warning:
> 
> In file included from include/linux/list.h:8:0,
>  from include/linux/module.h:9,
>  from drivers/scsi/sd.c:35:
> drivers/scsi/sd.c: In function 'sd_revalidate_disk':
> include/linux/kernel.h:755:16: warning: comparison of distinct pointer types 
> lacks a cast
>   (void) ( == );   \
> ^
> include/linux/kernel.h:758:2: note: in expansion of macro '__min'
>   __min(typeof(x), typeof(y),   \
>   ^
> include/linux/kernel.h:783:39: note: in expansion of macro 'min'
>   __x == 0 ? __y : ((__y == 0) ? __x : min(__x, __y)); })
>^
> drivers/scsi/sd.c:2959:12: note: in expansion of macro 'min_not_zero'
>rw_max = min_not_zero(logical_to_sectors(sdp, dev_max),
> ^
> 
> Introduced by commit
> 
>   c3e62673ee20 ("scsi: sd: Consider max_xfer_blocks if opt_xfer_blocks is 
> unusable")
> 
> logical_to_sectors() is a sector_t and BLK_DEF_MAX_SECTORS is an "enum
> blk_default_limits" (i.e. int).

Hi Stephen, James,

I will send a patch to fix this warning.

Fam


Re: linux-next: build warning after merge of the scsi tree

2017-04-06 Thread Fam Zheng
On Thu, 04/06 14:04, Stephen Rothwell wrote:
> Hi James,
> 
> After merging the scsi tree, today's linux-next build (arm
> multi_v7_defconfig) produced this warning:
> 
> In file included from include/linux/list.h:8:0,
>  from include/linux/module.h:9,
>  from drivers/scsi/sd.c:35:
> drivers/scsi/sd.c: In function 'sd_revalidate_disk':
> include/linux/kernel.h:755:16: warning: comparison of distinct pointer types 
> lacks a cast
>   (void) ( == );   \
> ^
> include/linux/kernel.h:758:2: note: in expansion of macro '__min'
>   __min(typeof(x), typeof(y),   \
>   ^
> include/linux/kernel.h:783:39: note: in expansion of macro 'min'
>   __x == 0 ? __y : ((__y == 0) ? __x : min(__x, __y)); })
>^
> drivers/scsi/sd.c:2959:12: note: in expansion of macro 'min_not_zero'
>rw_max = min_not_zero(logical_to_sectors(sdp, dev_max),
> ^
> 
> Introduced by commit
> 
>   c3e62673ee20 ("scsi: sd: Consider max_xfer_blocks if opt_xfer_blocks is 
> unusable")
> 
> logical_to_sectors() is a sector_t and BLK_DEF_MAX_SECTORS is an "enum
> blk_default_limits" (i.e. int).

Hi Stephen, James,

I will send a patch to fix this warning.

Fam


Re: [PATCH v2] sd: Consider max_xfer_blocks if opt_xfer_blocks is unusable

2017-03-30 Thread Fam Zheng
On Thu, 03/30 11:30, Martin K. Petersen wrote:
> Fam Zheng <f...@redhat.com> writes:
> 
> >>rw_max = min_not_zero(logical_to_sectors(sdp, dev_max),
> >>   BLK_DEF_MAX_SECTORS);
> >
> > Yes, it is better. Is it okay to make the change when you apply?
> 
> Sure. Applied to 4.11/scsi-fixes.

Cool. Thanks!

Fam


Re: [PATCH v2] sd: Consider max_xfer_blocks if opt_xfer_blocks is unusable

2017-03-30 Thread Fam Zheng
On Thu, 03/30 11:30, Martin K. Petersen wrote:
> Fam Zheng  writes:
> 
> >>rw_max = min_not_zero(logical_to_sectors(sdp, dev_max),
> >>   BLK_DEF_MAX_SECTORS);
> >
> > Yes, it is better. Is it okay to make the change when you apply?
> 
> Sure. Applied to 4.11/scsi-fixes.

Cool. Thanks!

Fam


Re: [PATCH v2] sd: Consider max_xfer_blocks if opt_xfer_blocks is unusable

2017-03-29 Thread Fam Zheng
On Wed, 03/29 22:37, Martin K. Petersen wrote:
> Fam Zheng <f...@redhat.com> writes:
> 
> Fam,
> 
> > diff --git a/drivers/scsi/sd.c b/drivers/scsi/sd.c
> > index fcfeddc..a5c7e67 100644
> > --- a/drivers/scsi/sd.c
> > +++ b/drivers/scsi/sd.c
> > @@ -2957,6 +2957,7 @@ static int sd_revalidate_disk(struct gendisk *disk)
> > rw_max = logical_to_sectors(sdp, sdkp->opt_xfer_blocks);
> > } else
> > rw_max = BLK_DEF_MAX_SECTORS;
> > +   rw_max = min_not_zero(rw_max, logical_to_sectors(sdp, dev_max));
> >  
> > /* Combine with controller limits */
> > q->limits.max_sectors = min(rw_max, queue_max_hw_sectors(q));
> 
> Instead of updating rw_max twice, how about:
> 
> } else
>   rw_max = min_not_zero(logical_to_sectors(sdp, dev_max),
>   BLK_DEF_MAX_SECTORS);

Yes, it is better. Is it okay to make the change when you apply?

Fam


Re: [PATCH v2] sd: Consider max_xfer_blocks if opt_xfer_blocks is unusable

2017-03-29 Thread Fam Zheng
On Wed, 03/29 22:37, Martin K. Petersen wrote:
> Fam Zheng  writes:
> 
> Fam,
> 
> > diff --git a/drivers/scsi/sd.c b/drivers/scsi/sd.c
> > index fcfeddc..a5c7e67 100644
> > --- a/drivers/scsi/sd.c
> > +++ b/drivers/scsi/sd.c
> > @@ -2957,6 +2957,7 @@ static int sd_revalidate_disk(struct gendisk *disk)
> > rw_max = logical_to_sectors(sdp, sdkp->opt_xfer_blocks);
> > } else
> > rw_max = BLK_DEF_MAX_SECTORS;
> > +   rw_max = min_not_zero(rw_max, logical_to_sectors(sdp, dev_max));
> >  
> > /* Combine with controller limits */
> > q->limits.max_sectors = min(rw_max, queue_max_hw_sectors(q));
> 
> Instead of updating rw_max twice, how about:
> 
> } else
>   rw_max = min_not_zero(logical_to_sectors(sdp, dev_max),
>   BLK_DEF_MAX_SECTORS);

Yes, it is better. Is it okay to make the change when you apply?

Fam


[PATCH v2] sd: Consider max_xfer_blocks if opt_xfer_blocks is unusable

2017-03-27 Thread Fam Zheng
If device reports a small max_xfer_blocks and a zero opt_xfer_blocks, we
end up using BLK_DEF_MAX_SECTORS, which is wrong and r/w of that size
may get error.

Fixes: ca369d51b3e ("block/sd: Fix device-imposed transfer length limits")
Signed-off-by: Fam Zheng <f...@redhat.com>

---

v2: Fix granularity mismatch. [Martin]
---
 drivers/scsi/sd.c | 1 +
 1 file changed, 1 insertion(+)

diff --git a/drivers/scsi/sd.c b/drivers/scsi/sd.c
index fcfeddc..a5c7e67 100644
--- a/drivers/scsi/sd.c
+++ b/drivers/scsi/sd.c
@@ -2957,6 +2957,7 @@ static int sd_revalidate_disk(struct gendisk *disk)
rw_max = logical_to_sectors(sdp, sdkp->opt_xfer_blocks);
} else
rw_max = BLK_DEF_MAX_SECTORS;
+   rw_max = min_not_zero(rw_max, logical_to_sectors(sdp, dev_max));
 
/* Combine with controller limits */
q->limits.max_sectors = min(rw_max, queue_max_hw_sectors(q));
-- 
2.9.3



[PATCH v2] sd: Consider max_xfer_blocks if opt_xfer_blocks is unusable

2017-03-27 Thread Fam Zheng
If device reports a small max_xfer_blocks and a zero opt_xfer_blocks, we
end up using BLK_DEF_MAX_SECTORS, which is wrong and r/w of that size
may get error.

Fixes: ca369d51b3e ("block/sd: Fix device-imposed transfer length limits")
Signed-off-by: Fam Zheng 

---

v2: Fix granularity mismatch. [Martin]
---
 drivers/scsi/sd.c | 1 +
 1 file changed, 1 insertion(+)

diff --git a/drivers/scsi/sd.c b/drivers/scsi/sd.c
index fcfeddc..a5c7e67 100644
--- a/drivers/scsi/sd.c
+++ b/drivers/scsi/sd.c
@@ -2957,6 +2957,7 @@ static int sd_revalidate_disk(struct gendisk *disk)
rw_max = logical_to_sectors(sdp, sdkp->opt_xfer_blocks);
} else
rw_max = BLK_DEF_MAX_SECTORS;
+   rw_max = min_not_zero(rw_max, logical_to_sectors(sdp, dev_max));
 
/* Combine with controller limits */
q->limits.max_sectors = min(rw_max, queue_max_hw_sectors(q));
-- 
2.9.3



[PATCH] sd: Consider max_xfer_blocks if opt_xfer_blocks is unusable

2017-03-27 Thread Fam Zheng
If device reports a small max_xfer_blocks and a zero opt_xfer_blocks, we
end up using BLK_DEF_MAX_SECTORS, which is wrong and r/w of that size
may get error.

Fixes: ca369d51b3e ("block/sd: Fix device-imposed transfer length limits")
Signed-off-by: Fam Zheng <f...@redhat.com>
---
 drivers/scsi/sd.c | 1 +
 1 file changed, 1 insertion(+)

diff --git a/drivers/scsi/sd.c b/drivers/scsi/sd.c
index fcfeddc..e2e21ab 100644
--- a/drivers/scsi/sd.c
+++ b/drivers/scsi/sd.c
@@ -2957,6 +2957,7 @@ static int sd_revalidate_disk(struct gendisk *disk)
rw_max = logical_to_sectors(sdp, sdkp->opt_xfer_blocks);
} else
rw_max = BLK_DEF_MAX_SECTORS;
+   rw_max = min_not_zero(rw_max, dev_max);
 
/* Combine with controller limits */
q->limits.max_sectors = min(rw_max, queue_max_hw_sectors(q));
-- 
2.9.3



[PATCH] sd: Consider max_xfer_blocks if opt_xfer_blocks is unusable

2017-03-27 Thread Fam Zheng
If device reports a small max_xfer_blocks and a zero opt_xfer_blocks, we
end up using BLK_DEF_MAX_SECTORS, which is wrong and r/w of that size
may get error.

Fixes: ca369d51b3e ("block/sd: Fix device-imposed transfer length limits")
Signed-off-by: Fam Zheng 
---
 drivers/scsi/sd.c | 1 +
 1 file changed, 1 insertion(+)

diff --git a/drivers/scsi/sd.c b/drivers/scsi/sd.c
index fcfeddc..e2e21ab 100644
--- a/drivers/scsi/sd.c
+++ b/drivers/scsi/sd.c
@@ -2957,6 +2957,7 @@ static int sd_revalidate_disk(struct gendisk *disk)
rw_max = logical_to_sectors(sdp, sdkp->opt_xfer_blocks);
} else
rw_max = BLK_DEF_MAX_SECTORS;
+   rw_max = min_not_zero(rw_max, dev_max);
 
/* Combine with controller limits */
q->limits.max_sectors = min(rw_max, queue_max_hw_sectors(q));
-- 
2.9.3



[PATCH v2 1/2] virtio_scsi: Add fc_host definitions

2017-01-25 Thread Fam Zheng
Signed-off-by: Fam Zheng <f...@redhat.com>
---
 include/uapi/linux/virtio_scsi.h | 6 ++
 1 file changed, 6 insertions(+)

diff --git a/include/uapi/linux/virtio_scsi.h b/include/uapi/linux/virtio_scsi.h
index cc18ef8..a26fb31 100644
--- a/include/uapi/linux/virtio_scsi.h
+++ b/include/uapi/linux/virtio_scsi.h
@@ -113,6 +113,11 @@ struct virtio_scsi_config {
__u16 max_channel;
__u16 max_target;
__u32 max_lun;
+   __u8  primary_wwpn[8];
+   __u8  primary_wwnn[8];
+   __u8  secondary_wwpn[8];
+   __u8  secondary_wwnn[8];
+   __u8  primary_active;
 } __attribute__((packed));
 
 /* Feature Bits */
@@ -120,6 +125,7 @@ struct virtio_scsi_config {
 #define VIRTIO_SCSI_F_HOTPLUG  1
 #define VIRTIO_SCSI_F_CHANGE   2
 #define VIRTIO_SCSI_F_T10_PI   3
+#define VIRTIO_SCSI_F_FC_HOST  4
 
 /* Response codes */
 #define VIRTIO_SCSI_S_OK   0
-- 
2.9.3



[PATCH v2 1/2] virtio_scsi: Add fc_host definitions

2017-01-25 Thread Fam Zheng
Signed-off-by: Fam Zheng 
---
 include/uapi/linux/virtio_scsi.h | 6 ++
 1 file changed, 6 insertions(+)

diff --git a/include/uapi/linux/virtio_scsi.h b/include/uapi/linux/virtio_scsi.h
index cc18ef8..a26fb31 100644
--- a/include/uapi/linux/virtio_scsi.h
+++ b/include/uapi/linux/virtio_scsi.h
@@ -113,6 +113,11 @@ struct virtio_scsi_config {
__u16 max_channel;
__u16 max_target;
__u32 max_lun;
+   __u8  primary_wwpn[8];
+   __u8  primary_wwnn[8];
+   __u8  secondary_wwpn[8];
+   __u8  secondary_wwnn[8];
+   __u8  primary_active;
 } __attribute__((packed));
 
 /* Feature Bits */
@@ -120,6 +125,7 @@ struct virtio_scsi_config {
 #define VIRTIO_SCSI_F_HOTPLUG  1
 #define VIRTIO_SCSI_F_CHANGE   2
 #define VIRTIO_SCSI_F_T10_PI   3
+#define VIRTIO_SCSI_F_FC_HOST  4
 
 /* Response codes */
 #define VIRTIO_SCSI_S_OK   0
-- 
2.9.3



[PATCH v2 2/2] virtio_scsi: Implement fc_host

2017-01-25 Thread Fam Zheng
This implements the VIRTIO_SCSI_F_FC_HOST feature by reading the config
fields and presenting them as sysfs fc_host attributes. The config
change handler is added here because primary_active will toggle during
migration.

Signed-off-by: Fam Zheng <f...@redhat.com>
---
 drivers/scsi/virtio_scsi.c | 60 +-
 1 file changed, 59 insertions(+), 1 deletion(-)

diff --git a/drivers/scsi/virtio_scsi.c b/drivers/scsi/virtio_scsi.c
index ec91bd0..1bb330c 100644
--- a/drivers/scsi/virtio_scsi.c
+++ b/drivers/scsi/virtio_scsi.c
@@ -28,6 +28,7 @@
 #include 
 #include 
 #include 
+#include 
 #include 
 
 #define VIRTIO_SCSI_MEMPOOL_SZ 64
@@ -795,6 +796,15 @@ static struct scsi_host_template 
virtscsi_host_template_multi = {
.track_queue_depth = 1,
 };
 
+static struct fc_function_template virtscsi_fc_template = {
+   .show_host_node_name = 1,
+   .show_host_port_name = 1,
+   .show_host_port_type = 1,
+   .show_host_port_state = 1,
+};
+
+static struct scsi_transport_template *virtscsi_fc_transport_template;
+
 #define virtscsi_config_get(vdev, fld) \
({ \
typeof(((struct virtio_scsi_config *)0)->fld) __val; \
@@ -956,15 +966,42 @@ static int virtscsi_init(struct virtio_device *vdev,
return err;
 }
 
+static void virtscsi_update_fc_host_attrs(struct virtio_device *vdev)
+{
+   struct Scsi_Host *shost = vdev->priv;
+   u8 node_name[8], port_name[8];
+
+   if (virtscsi_config_get(vdev, primary_active)) {
+   virtio_cread_bytes(vdev,
+   offsetof(struct virtio_scsi_config, primary_wwnn),
+   _name, 8);
+   virtio_cread_bytes(vdev,
+   offsetof(struct virtio_scsi_config, primary_wwpn),
+   _name, 8);
+   } else {
+   virtio_cread_bytes(vdev,
+   offsetof(struct virtio_scsi_config, secondary_wwnn),
+   _name, 8);
+   virtio_cread_bytes(vdev,
+   offsetof(struct virtio_scsi_config, secondary_wwpn),
+   _name, 8);
+   }
+   fc_host_node_name(shost) = wwn_to_u64(node_name);
+   fc_host_port_name(shost) = wwn_to_u64(port_name);
+   fc_host_port_type(shost) = FC_PORTTYPE_NPORT;
+   fc_host_port_state(shost) = FC_PORTSTATE_ONLINE;
+}
+
 static int virtscsi_probe(struct virtio_device *vdev)
 {
-   struct Scsi_Host *shost;
+   struct Scsi_Host *shost = NULL;
struct virtio_scsi *vscsi;
int err;
u32 sg_elems, num_targets;
u32 cmd_per_lun;
u32 num_queues;
struct scsi_host_template *hostt;
+   bool fc_host_enabled;
 
if (!vdev->config->get) {
dev_err(>dev, "%s failure: config access disabled\n",
@@ -987,6 +1024,9 @@ static int virtscsi_probe(struct virtio_device *vdev)
if (!shost)
return -ENOMEM;
 
+   fc_host_enabled = virtio_has_feature(vdev, VIRTIO_SCSI_F_FC_HOST);
+   if (fc_host_enabled)
+   shost->transportt = virtscsi_fc_transport_template;
sg_elems = virtscsi_config_get(vdev, seg_max) ?: 1;
shost->sg_tablesize = sg_elems;
vscsi = shost_priv(shost);
@@ -1032,6 +1072,9 @@ static int virtscsi_probe(struct virtio_device *vdev)
if (err)
goto scsi_add_host_failed;
 
+   if (fc_host_enabled)
+   virtscsi_update_fc_host_attrs(vdev);
+
virtio_device_ready(vdev);
 
if (virtio_has_feature(vdev, VIRTIO_SCSI_F_HOTPLUG))
@@ -1098,6 +1141,11 @@ static int virtscsi_restore(struct virtio_device *vdev)
 }
 #endif
 
+static void virtscsi_config_changed(struct virtio_device *vdev)
+{
+   virtscsi_update_fc_host_attrs(vdev);
+}
+
 static struct virtio_device_id id_table[] = {
{ VIRTIO_ID_SCSI, VIRTIO_DEV_ANY_ID },
{ 0 },
@@ -1109,6 +1157,7 @@ static unsigned int features[] = {
 #ifdef CONFIG_BLK_DEV_INTEGRITY
VIRTIO_SCSI_F_T10_PI,
 #endif
+   VIRTIO_SCSI_F_FC_HOST,
 };
 
 static struct virtio_driver virtio_scsi_driver = {
@@ -1123,12 +1172,20 @@ static struct virtio_driver virtio_scsi_driver = {
.restore = virtscsi_restore,
 #endif
.remove = virtscsi_remove,
+   .config_changed = virtscsi_config_changed,
 };
 
 static int __init init(void)
 {
int ret = -ENOMEM;
 
+   virtscsi_fc_transport_template =
+   fc_attach_transport(_fc_template);
+   if (!virtscsi_fc_transport_template) {
+   pr_err("fc_attach_transport() failed\n");
+   goto error;
+   }
+
virtscsi_cmd_cache = KMEM_CACHE(virtio_scsi_cmd, 0);
if (!virtscsi_cmd_cache) {
pr_err("kmem_cache_create() for virtscsi_cmd_cache failed\n");
@@ -1176,6 +1233,7 @@ static int __init init(void)
 
 static void __exit fini(void)
 {
+   fc_release_transport(virtscsi

[PATCH v2 0/2] virtio-scsi: Implement FC_HOST feature

2017-01-25 Thread Fam Zheng
v2: Fix endianness of WWNN/WWPN. [Paolo]

This series implements the proposed fc_host feature of virtio-scsi.

The first patch updates the data structure changes according to the spec
proposal; the second patch actually implements the operations.

Fam Zheng (2):
  virtio_scsi: Add fc_host definitions
  virtio_scsi: Implement fc_host

 drivers/scsi/virtio_scsi.c   | 60 +++-
 include/uapi/linux/virtio_scsi.h |  6 
 2 files changed, 65 insertions(+), 1 deletion(-)

-- 
2.9.3



[PATCH v2 2/2] virtio_scsi: Implement fc_host

2017-01-25 Thread Fam Zheng
This implements the VIRTIO_SCSI_F_FC_HOST feature by reading the config
fields and presenting them as sysfs fc_host attributes. The config
change handler is added here because primary_active will toggle during
migration.

Signed-off-by: Fam Zheng 
---
 drivers/scsi/virtio_scsi.c | 60 +-
 1 file changed, 59 insertions(+), 1 deletion(-)

diff --git a/drivers/scsi/virtio_scsi.c b/drivers/scsi/virtio_scsi.c
index ec91bd0..1bb330c 100644
--- a/drivers/scsi/virtio_scsi.c
+++ b/drivers/scsi/virtio_scsi.c
@@ -28,6 +28,7 @@
 #include 
 #include 
 #include 
+#include 
 #include 
 
 #define VIRTIO_SCSI_MEMPOOL_SZ 64
@@ -795,6 +796,15 @@ static struct scsi_host_template 
virtscsi_host_template_multi = {
.track_queue_depth = 1,
 };
 
+static struct fc_function_template virtscsi_fc_template = {
+   .show_host_node_name = 1,
+   .show_host_port_name = 1,
+   .show_host_port_type = 1,
+   .show_host_port_state = 1,
+};
+
+static struct scsi_transport_template *virtscsi_fc_transport_template;
+
 #define virtscsi_config_get(vdev, fld) \
({ \
typeof(((struct virtio_scsi_config *)0)->fld) __val; \
@@ -956,15 +966,42 @@ static int virtscsi_init(struct virtio_device *vdev,
return err;
 }
 
+static void virtscsi_update_fc_host_attrs(struct virtio_device *vdev)
+{
+   struct Scsi_Host *shost = vdev->priv;
+   u8 node_name[8], port_name[8];
+
+   if (virtscsi_config_get(vdev, primary_active)) {
+   virtio_cread_bytes(vdev,
+   offsetof(struct virtio_scsi_config, primary_wwnn),
+   _name, 8);
+   virtio_cread_bytes(vdev,
+   offsetof(struct virtio_scsi_config, primary_wwpn),
+   _name, 8);
+   } else {
+   virtio_cread_bytes(vdev,
+   offsetof(struct virtio_scsi_config, secondary_wwnn),
+   _name, 8);
+   virtio_cread_bytes(vdev,
+   offsetof(struct virtio_scsi_config, secondary_wwpn),
+   _name, 8);
+   }
+   fc_host_node_name(shost) = wwn_to_u64(node_name);
+   fc_host_port_name(shost) = wwn_to_u64(port_name);
+   fc_host_port_type(shost) = FC_PORTTYPE_NPORT;
+   fc_host_port_state(shost) = FC_PORTSTATE_ONLINE;
+}
+
 static int virtscsi_probe(struct virtio_device *vdev)
 {
-   struct Scsi_Host *shost;
+   struct Scsi_Host *shost = NULL;
struct virtio_scsi *vscsi;
int err;
u32 sg_elems, num_targets;
u32 cmd_per_lun;
u32 num_queues;
struct scsi_host_template *hostt;
+   bool fc_host_enabled;
 
if (!vdev->config->get) {
dev_err(>dev, "%s failure: config access disabled\n",
@@ -987,6 +1024,9 @@ static int virtscsi_probe(struct virtio_device *vdev)
if (!shost)
return -ENOMEM;
 
+   fc_host_enabled = virtio_has_feature(vdev, VIRTIO_SCSI_F_FC_HOST);
+   if (fc_host_enabled)
+   shost->transportt = virtscsi_fc_transport_template;
sg_elems = virtscsi_config_get(vdev, seg_max) ?: 1;
shost->sg_tablesize = sg_elems;
vscsi = shost_priv(shost);
@@ -1032,6 +1072,9 @@ static int virtscsi_probe(struct virtio_device *vdev)
if (err)
goto scsi_add_host_failed;
 
+   if (fc_host_enabled)
+   virtscsi_update_fc_host_attrs(vdev);
+
virtio_device_ready(vdev);
 
if (virtio_has_feature(vdev, VIRTIO_SCSI_F_HOTPLUG))
@@ -1098,6 +1141,11 @@ static int virtscsi_restore(struct virtio_device *vdev)
 }
 #endif
 
+static void virtscsi_config_changed(struct virtio_device *vdev)
+{
+   virtscsi_update_fc_host_attrs(vdev);
+}
+
 static struct virtio_device_id id_table[] = {
{ VIRTIO_ID_SCSI, VIRTIO_DEV_ANY_ID },
{ 0 },
@@ -1109,6 +1157,7 @@ static unsigned int features[] = {
 #ifdef CONFIG_BLK_DEV_INTEGRITY
VIRTIO_SCSI_F_T10_PI,
 #endif
+   VIRTIO_SCSI_F_FC_HOST,
 };
 
 static struct virtio_driver virtio_scsi_driver = {
@@ -1123,12 +1172,20 @@ static struct virtio_driver virtio_scsi_driver = {
.restore = virtscsi_restore,
 #endif
.remove = virtscsi_remove,
+   .config_changed = virtscsi_config_changed,
 };
 
 static int __init init(void)
 {
int ret = -ENOMEM;
 
+   virtscsi_fc_transport_template =
+   fc_attach_transport(_fc_template);
+   if (!virtscsi_fc_transport_template) {
+   pr_err("fc_attach_transport() failed\n");
+   goto error;
+   }
+
virtscsi_cmd_cache = KMEM_CACHE(virtio_scsi_cmd, 0);
if (!virtscsi_cmd_cache) {
pr_err("kmem_cache_create() for virtscsi_cmd_cache failed\n");
@@ -1176,6 +1233,7 @@ static int __init init(void)
 
 static void __exit fini(void)
 {
+   fc_release_transport(virtscsi_fc_transport_templ

[PATCH v2 0/2] virtio-scsi: Implement FC_HOST feature

2017-01-25 Thread Fam Zheng
v2: Fix endianness of WWNN/WWPN. [Paolo]

This series implements the proposed fc_host feature of virtio-scsi.

The first patch updates the data structure changes according to the spec
proposal; the second patch actually implements the operations.

Fam Zheng (2):
  virtio_scsi: Add fc_host definitions
  virtio_scsi: Implement fc_host

 drivers/scsi/virtio_scsi.c   | 60 +++-
 include/uapi/linux/virtio_scsi.h |  6 
 2 files changed, 65 insertions(+), 1 deletion(-)

-- 
2.9.3



Re: [PATCH 2/2] scsi: storvsc: Add support for FC lightweight host.

2017-01-22 Thread Fam Zheng
On Wed, 01/18 15:28, Cathy Avery wrote:
> Enable FC lightweight host option so that the luns exposed by
> the driver may be manually scanned.

Hi Cathy, out of curiosity: how does this relate to issue_lip operation? And how
to trigger manual scan with this patch?

Fam


Re: [PATCH 2/2] scsi: storvsc: Add support for FC lightweight host.

2017-01-22 Thread Fam Zheng
On Wed, 01/18 15:28, Cathy Avery wrote:
> Enable FC lightweight host option so that the luns exposed by
> the driver may be manually scanned.

Hi Cathy, out of curiosity: how does this relate to issue_lip operation? And how
to trigger manual scan with this patch?

Fam


Re: [PATCH 2/2] virtio_scsi: Implement fc_host

2017-01-17 Thread Fam Zheng
On Tue, 01/17 14:17, Paolo Bonzini wrote:
> 
> 
> On 16/01/2017 18:26, Fam Zheng wrote:
> >> Is the endianness correct for big-endian host here?
> >
> > I think so. The fc_host sysfs uses u64 to represent port_name and node_name,
> > this patch does the same, so using virtio_* helpers for these fields should
> > handle the endianness correctly.
> 
> I was suspicious about it because they are defined as "u8 x[8]" in the
> virtio_scsi_config struct.  So you would need to read with
> virtio_cread_bytes and pass the result to wwn_to_u64.
> 
> For example, if you have 0x500123456789abcd this would be
> 
>   0x50 0x01 0x23 0x45 0x67 0x89 0xab 0cd
> 
> in virtio_scsi_config, and then virtio_cread64 would read it as a
> little-endian u64, 0xcdab896745230150.  Maybe your QEMU patch is also
> writing things as little-endian 64-bit integers, rather than 8-element
> arrays of bytes?

Yes, they all used 64-bit integers in a "less surprising" endian. I think there
is an endianness conecpt to WWN, as in 0x500123456789abcd; and there is an
native endianness to virtio, which is little-endian. If we use a "u8 x[8]" type
in the spec and want the WWN's MSB, namely the 0x50 stuff, to be the first byte,
is it worth to explicitly document that to avoid confusion?

Fam


Re: [PATCH 2/2] virtio_scsi: Implement fc_host

2017-01-17 Thread Fam Zheng
On Tue, 01/17 14:17, Paolo Bonzini wrote:
> 
> 
> On 16/01/2017 18:26, Fam Zheng wrote:
> >> Is the endianness correct for big-endian host here?
> >
> > I think so. The fc_host sysfs uses u64 to represent port_name and node_name,
> > this patch does the same, so using virtio_* helpers for these fields should
> > handle the endianness correctly.
> 
> I was suspicious about it because they are defined as "u8 x[8]" in the
> virtio_scsi_config struct.  So you would need to read with
> virtio_cread_bytes and pass the result to wwn_to_u64.
> 
> For example, if you have 0x500123456789abcd this would be
> 
>   0x50 0x01 0x23 0x45 0x67 0x89 0xab 0cd
> 
> in virtio_scsi_config, and then virtio_cread64 would read it as a
> little-endian u64, 0xcdab896745230150.  Maybe your QEMU patch is also
> writing things as little-endian 64-bit integers, rather than 8-element
> arrays of bytes?

Yes, they all used 64-bit integers in a "less surprising" endian. I think there
is an endianness conecpt to WWN, as in 0x500123456789abcd; and there is an
native endianness to virtio, which is little-endian. If we use a "u8 x[8]" type
in the spec and want the WWN's MSB, namely the 0x50 stuff, to be the first byte,
is it worth to explicitly document that to avoid confusion?

Fam


Re: [PATCH 2/2] virtio_scsi: Implement fc_host

2017-01-16 Thread Fam Zheng
On Tue, 01/17 03:02, kbuild test robot wrote:
> Hi Fam,
> 
> [auto build test ERROR on linus/master]
> [also build test ERROR on v4.10-rc4 next-20170116]
> [if your patch is applied to the wrong git tree, please drop us a note to 
> help improve the system]
> 
> url:
> https://github.com/0day-ci/linux/commits/Fam-Zheng/virtio-scsi-Implement-FC_HOST-feature/20170117-011950
> config: i386-randconfig-r0-201703 (attached as .config)
> compiler: gcc-5 (Debian 5.4.1-2) 5.4.1 20160904
> reproduce:
> # save the attached .config to linux build tree
> make ARCH=i386 
> 
> All errors (new ones prefixed by >>):
> 
>drivers/built-in.o: In function `init':
> >> virtio_scsi.c:(.init.text+0x10d52): undefined reference to 
> >> `fc_attach_transport'
>drivers/built-in.o: In function `fini':
> >> virtio_scsi.c:(.exit.text+0x14a4): undefined reference to 
> >> `fc_release_transport'

This is good catch.

Looks like virtio-scsi will now have to depend on fc, or we wrap the fc code
with #ifdef's.

Fam


Re: [PATCH 2/2] virtio_scsi: Implement fc_host

2017-01-16 Thread Fam Zheng
On Tue, 01/17 03:02, kbuild test robot wrote:
> Hi Fam,
> 
> [auto build test ERROR on linus/master]
> [also build test ERROR on v4.10-rc4 next-20170116]
> [if your patch is applied to the wrong git tree, please drop us a note to 
> help improve the system]
> 
> url:
> https://github.com/0day-ci/linux/commits/Fam-Zheng/virtio-scsi-Implement-FC_HOST-feature/20170117-011950
> config: i386-randconfig-r0-201703 (attached as .config)
> compiler: gcc-5 (Debian 5.4.1-2) 5.4.1 20160904
> reproduce:
> # save the attached .config to linux build tree
> make ARCH=i386 
> 
> All errors (new ones prefixed by >>):
> 
>drivers/built-in.o: In function `init':
> >> virtio_scsi.c:(.init.text+0x10d52): undefined reference to 
> >> `fc_attach_transport'
>drivers/built-in.o: In function `fini':
> >> virtio_scsi.c:(.exit.text+0x14a4): undefined reference to 
> >> `fc_release_transport'

This is good catch.

Looks like virtio-scsi will now have to depend on fc, or we wrap the fc code
with #ifdef's.

Fam


Re: [PATCH 0/2] virtio-scsi: Implement FC_HOST feature

2017-01-16 Thread Fam Zheng
On Mon, 01/16 09:34, Christoph Hellwig wrote:
> On Tue, Jan 17, 2017 at 12:04:28AM +0800, Fam Zheng wrote:
> > This series implements the proposed fc_host feature of virtio-scsi.
> 
> Yikes.  Why?

Hi Christoph, this is mostly to "passthrough" a host NPIV vport (leaving libvirt
doing the heavy lifting to watch and attach all visible LUNs to guest).

Fam

> --
> To unsubscribe from this list: send the line "unsubscribe linux-scsi" in
> the body of a message to majord...@vger.kernel.org
> More majordomo info at  http://vger.kernel.org/majordomo-info.html


Re: [PATCH 0/2] virtio-scsi: Implement FC_HOST feature

2017-01-16 Thread Fam Zheng
On Mon, 01/16 09:34, Christoph Hellwig wrote:
> On Tue, Jan 17, 2017 at 12:04:28AM +0800, Fam Zheng wrote:
> > This series implements the proposed fc_host feature of virtio-scsi.
> 
> Yikes.  Why?

Hi Christoph, this is mostly to "passthrough" a host NPIV vport (leaving libvirt
doing the heavy lifting to watch and attach all visible LUNs to guest).

Fam

> --
> To unsubscribe from this list: send the line "unsubscribe linux-scsi" in
> the body of a message to majord...@vger.kernel.org
> More majordomo info at  http://vger.kernel.org/majordomo-info.html


Re: [PATCH 2/2] virtio_scsi: Implement fc_host

2017-01-16 Thread Fam Zheng
On Mon, 01/16 17:45, Paolo Bonzini wrote:
> 
> 
> On 16/01/2017 17:04, Fam Zheng wrote:
> > +   node_name = virtio_cread64(vdev,
> > +   offsetof(struct virtio_scsi_config, primary_wwnn));
> > +   port_name = virtio_cread64(vdev,
> > +   offsetof(struct virtio_scsi_config, primary_wwpn));
> > +   } else {
> > +   node_name = virtio_cread64(vdev,
> > +   offsetof(struct virtio_scsi_config, secondary_wwnn));
> > +   port_name = virtio_cread64(vdev,
> > +   offsetof(struct virtio_scsi_config, secondary_wwpn));
> 
> Is the endianness correct for big-endian host here?

I think so. The fc_host sysfs uses u64 to represent port_name and node_name,
this patch does the same, so using virtio_* helpers for these fields should
handle the endianness correctly.

Maybe we should use u64 in struct virtio_scsi_config as well?

Fam


Re: [PATCH 2/2] virtio_scsi: Implement fc_host

2017-01-16 Thread Fam Zheng
On Mon, 01/16 17:45, Paolo Bonzini wrote:
> 
> 
> On 16/01/2017 17:04, Fam Zheng wrote:
> > +   node_name = virtio_cread64(vdev,
> > +   offsetof(struct virtio_scsi_config, primary_wwnn));
> > +   port_name = virtio_cread64(vdev,
> > +   offsetof(struct virtio_scsi_config, primary_wwpn));
> > +   } else {
> > +   node_name = virtio_cread64(vdev,
> > +   offsetof(struct virtio_scsi_config, secondary_wwnn));
> > +   port_name = virtio_cread64(vdev,
> > +   offsetof(struct virtio_scsi_config, secondary_wwpn));
> 
> Is the endianness correct for big-endian host here?

I think so. The fc_host sysfs uses u64 to represent port_name and node_name,
this patch does the same, so using virtio_* helpers for these fields should
handle the endianness correctly.

Maybe we should use u64 in struct virtio_scsi_config as well?

Fam


[PATCH 1/2] virtio_scsi: Add fc_host definitions

2017-01-16 Thread Fam Zheng
Signed-off-by: Fam Zheng <f...@redhat.com>
---
 include/uapi/linux/virtio_scsi.h | 6 ++
 1 file changed, 6 insertions(+)

diff --git a/include/uapi/linux/virtio_scsi.h b/include/uapi/linux/virtio_scsi.h
index cc18ef8..a26fb31 100644
--- a/include/uapi/linux/virtio_scsi.h
+++ b/include/uapi/linux/virtio_scsi.h
@@ -113,6 +113,11 @@ struct virtio_scsi_config {
__u16 max_channel;
__u16 max_target;
__u32 max_lun;
+   __u8  primary_wwpn[8];
+   __u8  primary_wwnn[8];
+   __u8  secondary_wwpn[8];
+   __u8  secondary_wwnn[8];
+   __u8  primary_active;
 } __attribute__((packed));
 
 /* Feature Bits */
@@ -120,6 +125,7 @@ struct virtio_scsi_config {
 #define VIRTIO_SCSI_F_HOTPLUG  1
 #define VIRTIO_SCSI_F_CHANGE   2
 #define VIRTIO_SCSI_F_T10_PI   3
+#define VIRTIO_SCSI_F_FC_HOST  4
 
 /* Response codes */
 #define VIRTIO_SCSI_S_OK   0
-- 
2.9.3



[PATCH 1/2] virtio_scsi: Add fc_host definitions

2017-01-16 Thread Fam Zheng
Signed-off-by: Fam Zheng 
---
 include/uapi/linux/virtio_scsi.h | 6 ++
 1 file changed, 6 insertions(+)

diff --git a/include/uapi/linux/virtio_scsi.h b/include/uapi/linux/virtio_scsi.h
index cc18ef8..a26fb31 100644
--- a/include/uapi/linux/virtio_scsi.h
+++ b/include/uapi/linux/virtio_scsi.h
@@ -113,6 +113,11 @@ struct virtio_scsi_config {
__u16 max_channel;
__u16 max_target;
__u32 max_lun;
+   __u8  primary_wwpn[8];
+   __u8  primary_wwnn[8];
+   __u8  secondary_wwpn[8];
+   __u8  secondary_wwnn[8];
+   __u8  primary_active;
 } __attribute__((packed));
 
 /* Feature Bits */
@@ -120,6 +125,7 @@ struct virtio_scsi_config {
 #define VIRTIO_SCSI_F_HOTPLUG  1
 #define VIRTIO_SCSI_F_CHANGE   2
 #define VIRTIO_SCSI_F_T10_PI   3
+#define VIRTIO_SCSI_F_FC_HOST  4
 
 /* Response codes */
 #define VIRTIO_SCSI_S_OK   0
-- 
2.9.3



[PATCH 2/2] virtio_scsi: Implement fc_host

2017-01-16 Thread Fam Zheng
This implements the VIRTIO_SCSI_F_FC_HOST feature by reading the config
fields and presenting them as sysfs fc_host attributes. The config
change handler is added here because primary_active will toggle during
migration.

Signed-off-by: Fam Zheng <f...@redhat.com>
---
 drivers/scsi/virtio_scsi.c | 55 +-
 1 file changed, 54 insertions(+), 1 deletion(-)

diff --git a/drivers/scsi/virtio_scsi.c b/drivers/scsi/virtio_scsi.c
index ec91bd0..9e92db9 100644
--- a/drivers/scsi/virtio_scsi.c
+++ b/drivers/scsi/virtio_scsi.c
@@ -28,6 +28,7 @@
 #include 
 #include 
 #include 
+#include 
 #include 
 
 #define VIRTIO_SCSI_MEMPOOL_SZ 64
@@ -795,6 +796,15 @@ static struct scsi_host_template 
virtscsi_host_template_multi = {
.track_queue_depth = 1,
 };
 
+static struct fc_function_template virtscsi_fc_template = {
+   .show_host_node_name = 1,
+   .show_host_port_name = 1,
+   .show_host_port_type = 1,
+   .show_host_port_state = 1,
+};
+
+static struct scsi_transport_template *virtscsi_fc_transport_template;
+
 #define virtscsi_config_get(vdev, fld) \
({ \
typeof(((struct virtio_scsi_config *)0)->fld) __val; \
@@ -956,15 +966,38 @@ static int virtscsi_init(struct virtio_device *vdev,
return err;
 }
 
+static void virtscsi_update_fc_host_attrs(struct virtio_device *vdev)
+{
+   struct Scsi_Host *shost = vdev->priv;
+   u64 node_name, port_name;
+
+   if (virtscsi_config_get(vdev, primary_active)) {
+   node_name = virtio_cread64(vdev,
+   offsetof(struct virtio_scsi_config, primary_wwnn));
+   port_name = virtio_cread64(vdev,
+   offsetof(struct virtio_scsi_config, primary_wwpn));
+   } else {
+   node_name = virtio_cread64(vdev,
+   offsetof(struct virtio_scsi_config, secondary_wwnn));
+   port_name = virtio_cread64(vdev,
+   offsetof(struct virtio_scsi_config, secondary_wwpn));
+   }
+   fc_host_node_name(shost) = node_name;
+   fc_host_port_name(shost) = port_name;
+   fc_host_port_type(shost) = FC_PORTTYPE_NPORT;
+   fc_host_port_state(shost) = FC_PORTSTATE_ONLINE;
+}
+
 static int virtscsi_probe(struct virtio_device *vdev)
 {
-   struct Scsi_Host *shost;
+   struct Scsi_Host *shost = NULL;
struct virtio_scsi *vscsi;
int err;
u32 sg_elems, num_targets;
u32 cmd_per_lun;
u32 num_queues;
struct scsi_host_template *hostt;
+   bool fc_host_enabled;
 
if (!vdev->config->get) {
dev_err(>dev, "%s failure: config access disabled\n",
@@ -987,6 +1020,9 @@ static int virtscsi_probe(struct virtio_device *vdev)
if (!shost)
return -ENOMEM;
 
+   fc_host_enabled = virtio_has_feature(vdev, VIRTIO_SCSI_F_FC_HOST);
+   if (fc_host_enabled)
+   shost->transportt = virtscsi_fc_transport_template;
sg_elems = virtscsi_config_get(vdev, seg_max) ?: 1;
shost->sg_tablesize = sg_elems;
vscsi = shost_priv(shost);
@@ -1032,6 +1068,9 @@ static int virtscsi_probe(struct virtio_device *vdev)
if (err)
goto scsi_add_host_failed;
 
+   if (fc_host_enabled)
+   virtscsi_update_fc_host_attrs(vdev);
+
virtio_device_ready(vdev);
 
if (virtio_has_feature(vdev, VIRTIO_SCSI_F_HOTPLUG))
@@ -1098,6 +1137,11 @@ static int virtscsi_restore(struct virtio_device *vdev)
 }
 #endif
 
+static void virtscsi_config_changed(struct virtio_device *vdev)
+{
+   virtscsi_update_fc_host_attrs(vdev);
+}
+
 static struct virtio_device_id id_table[] = {
{ VIRTIO_ID_SCSI, VIRTIO_DEV_ANY_ID },
{ 0 },
@@ -1109,6 +1153,7 @@ static unsigned int features[] = {
 #ifdef CONFIG_BLK_DEV_INTEGRITY
VIRTIO_SCSI_F_T10_PI,
 #endif
+   VIRTIO_SCSI_F_FC_HOST,
 };
 
 static struct virtio_driver virtio_scsi_driver = {
@@ -1123,12 +1168,19 @@ static struct virtio_driver virtio_scsi_driver = {
.restore = virtscsi_restore,
 #endif
.remove = virtscsi_remove,
+   .config_changed = virtscsi_config_changed,
 };
 
 static int __init init(void)
 {
int ret = -ENOMEM;
 
+   virtscsi_fc_transport_template = 
fc_attach_transport(_fc_template);
+   if (!virtscsi_fc_transport_template) {
+   pr_err("fc_attach_transport() failed\n");
+   goto error;
+   }
+
virtscsi_cmd_cache = KMEM_CACHE(virtio_scsi_cmd, 0);
if (!virtscsi_cmd_cache) {
pr_err("kmem_cache_create() for virtscsi_cmd_cache failed\n");
@@ -1176,6 +1228,7 @@ static int __init init(void)
 
 static void __exit fini(void)
 {
+   fc_release_transport(virtscsi_fc_transport_template);
unregister_virtio_driver(_scsi_driver);
cpuhp_remove_multi_state(virtioscsi_online);
cpuhp_remove_multi_state(CPUHP_VIRT_SCSI_DEAD);
-- 
2.9.3



[PATCH 2/2] virtio_scsi: Implement fc_host

2017-01-16 Thread Fam Zheng
This implements the VIRTIO_SCSI_F_FC_HOST feature by reading the config
fields and presenting them as sysfs fc_host attributes. The config
change handler is added here because primary_active will toggle during
migration.

Signed-off-by: Fam Zheng 
---
 drivers/scsi/virtio_scsi.c | 55 +-
 1 file changed, 54 insertions(+), 1 deletion(-)

diff --git a/drivers/scsi/virtio_scsi.c b/drivers/scsi/virtio_scsi.c
index ec91bd0..9e92db9 100644
--- a/drivers/scsi/virtio_scsi.c
+++ b/drivers/scsi/virtio_scsi.c
@@ -28,6 +28,7 @@
 #include 
 #include 
 #include 
+#include 
 #include 
 
 #define VIRTIO_SCSI_MEMPOOL_SZ 64
@@ -795,6 +796,15 @@ static struct scsi_host_template 
virtscsi_host_template_multi = {
.track_queue_depth = 1,
 };
 
+static struct fc_function_template virtscsi_fc_template = {
+   .show_host_node_name = 1,
+   .show_host_port_name = 1,
+   .show_host_port_type = 1,
+   .show_host_port_state = 1,
+};
+
+static struct scsi_transport_template *virtscsi_fc_transport_template;
+
 #define virtscsi_config_get(vdev, fld) \
({ \
typeof(((struct virtio_scsi_config *)0)->fld) __val; \
@@ -956,15 +966,38 @@ static int virtscsi_init(struct virtio_device *vdev,
return err;
 }
 
+static void virtscsi_update_fc_host_attrs(struct virtio_device *vdev)
+{
+   struct Scsi_Host *shost = vdev->priv;
+   u64 node_name, port_name;
+
+   if (virtscsi_config_get(vdev, primary_active)) {
+   node_name = virtio_cread64(vdev,
+   offsetof(struct virtio_scsi_config, primary_wwnn));
+   port_name = virtio_cread64(vdev,
+   offsetof(struct virtio_scsi_config, primary_wwpn));
+   } else {
+   node_name = virtio_cread64(vdev,
+   offsetof(struct virtio_scsi_config, secondary_wwnn));
+   port_name = virtio_cread64(vdev,
+   offsetof(struct virtio_scsi_config, secondary_wwpn));
+   }
+   fc_host_node_name(shost) = node_name;
+   fc_host_port_name(shost) = port_name;
+   fc_host_port_type(shost) = FC_PORTTYPE_NPORT;
+   fc_host_port_state(shost) = FC_PORTSTATE_ONLINE;
+}
+
 static int virtscsi_probe(struct virtio_device *vdev)
 {
-   struct Scsi_Host *shost;
+   struct Scsi_Host *shost = NULL;
struct virtio_scsi *vscsi;
int err;
u32 sg_elems, num_targets;
u32 cmd_per_lun;
u32 num_queues;
struct scsi_host_template *hostt;
+   bool fc_host_enabled;
 
if (!vdev->config->get) {
dev_err(>dev, "%s failure: config access disabled\n",
@@ -987,6 +1020,9 @@ static int virtscsi_probe(struct virtio_device *vdev)
if (!shost)
return -ENOMEM;
 
+   fc_host_enabled = virtio_has_feature(vdev, VIRTIO_SCSI_F_FC_HOST);
+   if (fc_host_enabled)
+   shost->transportt = virtscsi_fc_transport_template;
sg_elems = virtscsi_config_get(vdev, seg_max) ?: 1;
shost->sg_tablesize = sg_elems;
vscsi = shost_priv(shost);
@@ -1032,6 +1068,9 @@ static int virtscsi_probe(struct virtio_device *vdev)
if (err)
goto scsi_add_host_failed;
 
+   if (fc_host_enabled)
+   virtscsi_update_fc_host_attrs(vdev);
+
virtio_device_ready(vdev);
 
if (virtio_has_feature(vdev, VIRTIO_SCSI_F_HOTPLUG))
@@ -1098,6 +1137,11 @@ static int virtscsi_restore(struct virtio_device *vdev)
 }
 #endif
 
+static void virtscsi_config_changed(struct virtio_device *vdev)
+{
+   virtscsi_update_fc_host_attrs(vdev);
+}
+
 static struct virtio_device_id id_table[] = {
{ VIRTIO_ID_SCSI, VIRTIO_DEV_ANY_ID },
{ 0 },
@@ -1109,6 +1153,7 @@ static unsigned int features[] = {
 #ifdef CONFIG_BLK_DEV_INTEGRITY
VIRTIO_SCSI_F_T10_PI,
 #endif
+   VIRTIO_SCSI_F_FC_HOST,
 };
 
 static struct virtio_driver virtio_scsi_driver = {
@@ -1123,12 +1168,19 @@ static struct virtio_driver virtio_scsi_driver = {
.restore = virtscsi_restore,
 #endif
.remove = virtscsi_remove,
+   .config_changed = virtscsi_config_changed,
 };
 
 static int __init init(void)
 {
int ret = -ENOMEM;
 
+   virtscsi_fc_transport_template = 
fc_attach_transport(_fc_template);
+   if (!virtscsi_fc_transport_template) {
+   pr_err("fc_attach_transport() failed\n");
+   goto error;
+   }
+
virtscsi_cmd_cache = KMEM_CACHE(virtio_scsi_cmd, 0);
if (!virtscsi_cmd_cache) {
pr_err("kmem_cache_create() for virtscsi_cmd_cache failed\n");
@@ -1176,6 +1228,7 @@ static int __init init(void)
 
 static void __exit fini(void)
 {
+   fc_release_transport(virtscsi_fc_transport_template);
unregister_virtio_driver(_scsi_driver);
cpuhp_remove_multi_state(virtioscsi_online);
cpuhp_remove_multi_state(CPUHP_VIRT_SCSI_DEAD);
-- 
2.9.3



[PATCH 0/2] virtio-scsi: Implement FC_HOST feature

2017-01-16 Thread Fam Zheng
This series implements the proposed fc_host feature of virtio-scsi.

The first patch updates the data structure changes according to the spec
proposal; the second patch actually implements the operations.

Fam Zheng (2):
  virtio_scsi: Add fc_host definitions
  virtio_scsi: Implement fc_host

 drivers/scsi/virtio_scsi.c   | 55 +++-
 include/uapi/linux/virtio_scsi.h |  6 +
 2 files changed, 60 insertions(+), 1 deletion(-)

-- 
2.9.3



[PATCH 0/2] virtio-scsi: Implement FC_HOST feature

2017-01-16 Thread Fam Zheng
This series implements the proposed fc_host feature of virtio-scsi.

The first patch updates the data structure changes according to the spec
proposal; the second patch actually implements the operations.

Fam Zheng (2):
  virtio_scsi: Add fc_host definitions
  virtio_scsi: Implement fc_host

 drivers/scsi/virtio_scsi.c   | 55 +++-
 include/uapi/linux/virtio_scsi.h |  6 +
 2 files changed, 60 insertions(+), 1 deletion(-)

-- 
2.9.3



[PATCH] libfc: Fix variable name in fc_set_wwpn

2017-01-12 Thread Fam Zheng
The parameter name should be wwpn instead of wwnn.

Signed-off-by: Fam Zheng <f...@redhat.com>
---
 include/scsi/libfc.h | 6 +++---
 1 file changed, 3 insertions(+), 3 deletions(-)

diff --git a/include/scsi/libfc.h b/include/scsi/libfc.h
index 96dd0b3..da5033d 100644
--- a/include/scsi/libfc.h
+++ b/include/scsi/libfc.h
@@ -809,11 +809,11 @@ static inline void fc_set_wwnn(struct fc_lport *lport, 
u64 wwnn)
 /**
  * fc_set_wwpn() - Set the World Wide Port Name of a local port
  * @lport: The local port whose WWPN is to be set
- * @wwnn:  The new WWPN
+ * @wwpn:  The new WWPN
  */
-static inline void fc_set_wwpn(struct fc_lport *lport, u64 wwnn)
+static inline void fc_set_wwpn(struct fc_lport *lport, u64 wwpn)
 {
-   lport->wwpn = wwnn;
+   lport->wwpn = wwpn;
 }
 
 /**
-- 
2.9.3



[PATCH] libfc: Fix variable name in fc_set_wwpn

2017-01-12 Thread Fam Zheng
The parameter name should be wwpn instead of wwnn.

Signed-off-by: Fam Zheng 
---
 include/scsi/libfc.h | 6 +++---
 1 file changed, 3 insertions(+), 3 deletions(-)

diff --git a/include/scsi/libfc.h b/include/scsi/libfc.h
index 96dd0b3..da5033d 100644
--- a/include/scsi/libfc.h
+++ b/include/scsi/libfc.h
@@ -809,11 +809,11 @@ static inline void fc_set_wwnn(struct fc_lport *lport, 
u64 wwnn)
 /**
  * fc_set_wwpn() - Set the World Wide Port Name of a local port
  * @lport: The local port whose WWPN is to be set
- * @wwnn:  The new WWPN
+ * @wwpn:  The new WWPN
  */
-static inline void fc_set_wwpn(struct fc_lport *lport, u64 wwnn)
+static inline void fc_set_wwpn(struct fc_lport *lport, u64 wwpn)
 {
-   lport->wwpn = wwnn;
+   lport->wwpn = wwpn;
 }
 
 /**
-- 
2.9.3



Re: [PATCH] virtio-blk: Generate uevent after attribute available

2016-09-03 Thread Fam Zheng
On Sat, 09/03 01:56, Michael S. Tsirkin wrote:
> On Wed, Jun 29, 2016 at 09:24:15AM +0800, Fam Zheng wrote:
> > On Tue, 06/28 04:45, Christoph Hellwig wrote:
> > > On Tue, Jun 28, 2016 at 10:39:15AM +0800, Fam Zheng wrote:
> > > > Userspace listens to the KOBJ_ADD uevent generated in add_disk. At that
> > > > point we haven't created the serial attribute file, therefore depending
> > > > on how fast udev reacts, the /dev/disk/by-id/ entry doesn't always get
> > > > created.
> > > > 
> > > > This race condition can be easily reproduced by hot plugging a number of
> > > > virtio-blk disks.
> > > > 
> > > > Also in systemd, there used to be a related workaround in udev rules
> > > > called 'WAIT_FOR="serial"', but it is removed in later versions.
> > > > 
> > > > Now let's generate a KOBJ_CHANGE event after the attributes are ready.
> > > 
> > > The same race is present in other drivers as well, e.g. nvme.  Please
> > > find a way to make this work properly without needing to hack every
> > > driver to send events manually.
> > 
> > OK, I'll take a look today!
> > 
> > Fam
> 
> Was this fixed in the generic code?

A proposed fix is:

https://lkml.org/lkml/2016/8/17/81

Fam


Re: [PATCH] virtio-blk: Generate uevent after attribute available

2016-09-03 Thread Fam Zheng
On Sat, 09/03 01:56, Michael S. Tsirkin wrote:
> On Wed, Jun 29, 2016 at 09:24:15AM +0800, Fam Zheng wrote:
> > On Tue, 06/28 04:45, Christoph Hellwig wrote:
> > > On Tue, Jun 28, 2016 at 10:39:15AM +0800, Fam Zheng wrote:
> > > > Userspace listens to the KOBJ_ADD uevent generated in add_disk. At that
> > > > point we haven't created the serial attribute file, therefore depending
> > > > on how fast udev reacts, the /dev/disk/by-id/ entry doesn't always get
> > > > created.
> > > > 
> > > > This race condition can be easily reproduced by hot plugging a number of
> > > > virtio-blk disks.
> > > > 
> > > > Also in systemd, there used to be a related workaround in udev rules
> > > > called 'WAIT_FOR="serial"', but it is removed in later versions.
> > > > 
> > > > Now let's generate a KOBJ_CHANGE event after the attributes are ready.
> > > 
> > > The same race is present in other drivers as well, e.g. nvme.  Please
> > > find a way to make this work properly without needing to hack every
> > > driver to send events manually.
> > 
> > OK, I'll take a look today!
> > 
> > Fam
> 
> Was this fixed in the generic code?

A proposed fix is:

https://lkml.org/lkml/2016/8/17/81

Fam


Re: [PATCH 06/15] genhd: Add return code to device_add_disk

2016-08-17 Thread Fam Zheng
On Wed, 08/17 11:06, Cornelia Huck wrote:
> On Wed, 17 Aug 2016 16:48:23 +0800
> Fam Zheng <f...@redhat.com> wrote:
> 
> > On Wed, 08/17 10:49, Cornelia Huck wrote:
> > > On Wed, 17 Aug 2016 15:15:06 +0800
> > > Fam Zheng <f...@redhat.com> wrote:
> > > 
> > > > @@ -613,10 +614,8 @@ void device_add_disk(struct device *parent, struct 
> > > > gendisk *disk)
> > > > disk->flags |= GENHD_FL_UP;
> > > > 
> > > > retval = blk_alloc_devt(>part0, );
> > > > -   if (retval) {
> > > > -   WARN_ON(1);
> > > > -   return;
> > > > -   }
> > > > +   if (retval)
> > > > +   goto fail;
> > > > disk_to_dev(disk)->devt = devt;
> > > > 
> > > > /* ->major and ->first_minor aren't supposed to be
> > > > @@ -625,16 +624,26 @@ void device_add_disk(struct device *parent, 
> > > > struct gendisk *disk)
> > > > disk->major = MAJOR(devt);
> > > > disk->first_minor = MINOR(devt);
> > > > 
> > > > -   disk_alloc_events(disk);
> > > > +   retval = disk_alloc_events(disk);
> > > > +   if (retval)
> > > > +   goto fail;
> > > > 
> > > > /* Register BDI before referencing it from bdev */
> > > > bdi = >queue->backing_dev_info;
> > > > -   bdi_register_owner(bdi, disk_to_dev(disk));
> > > > +   retval = bdi_register_owner(bdi, disk_to_dev(disk));
> > > > +   if (retval)
> > > > +   goto fail;
> > > > 
> > > > -   blk_register_region(disk_devt(disk), disk->minors, NULL,
> > > > -   exact_match, exact_lock, disk);
> > > > -   register_disk(parent, disk);
> > > > -   blk_register_queue(disk);
> > > > +   retval = blk_register_region(disk_devt(disk), disk->minors, 
> > > > NULL,
> > > > +exact_match, exact_lock, disk);
> > > > +   if (retval)
> > > > +   goto fail;
> > > > +   retval = register_disk(parent, disk);
> > > > +   if (retval)
> > > > +   goto fail;
> > > > +   retval = blk_register_queue(disk);
> > > > +   if (retval)
> > > > +   goto fail;
> > > > 
> > > > /*
> > > >  * Take an extra ref on queue which will be put on 
> > > > disk_release()
> > > > @@ -644,10 +653,20 @@ void device_add_disk(struct device *parent, 
> > > > struct gendisk *disk)
> > > > 
> > > > retval = sysfs_create_link(_to_dev(disk)->kobj, 
> > > > >dev->kobj,
> > > >"bdi");
> > > > +   if (retval)
> > > > +   goto fail;
> > > > +
> > > > +   retval = disk_add_events(disk);
> > > > +   if (retval)
> > > > +   goto fail;
> > > > +
> > > > +   retval = blk_integrity_add(disk);
> > > > +   if (retval)
> > > > +   goto fail;
> > > > +   return 0;
> > > > +fail:
> > > > WARN_ON(retval);
> > > > -
> > > > -   disk_add_events(disk);
> > > > -   blk_integrity_add(disk);
> > > > +   return retval;
> > > >  }
> > > 
> > > Noticed this when trying to figure out whether the error handling in
> > > virtio_blk was correct:
> > > 
> > > Shouldn't you try to cleanup/rewind so that any structures are in a
> > > sane state after failure? The caller doesn't know where device_add_disk
> > > failed, and calling del_gendisk unconditionally like virtio_blk does is
> > > probably not the right thing to do (at the very least, I don't think
> > > unregistering a device that has not been registered is likely to work).
> > > 
> > 
> > Yes, I think all callers need to be reviewed before device_add_disk do the
> > clean up on error. For this patchset I wanted to keep the change small.
> 
> But do the callers even have a chance to do this correctly right now?
> They will either clean up too much, or too little. ('Too little' is
> probably the more common case, given that you just added error
> propagation...)

Right, which is pre-exising.

> 
> Can you make del_gendisk handle devices partially setup via
> device_add_disk in all cases? Then you could mandate pairing
> device_add_disk with del_gendisk in all cases, error or not, and you
> should have a better chance on avoiding introducing new errors.
> 

Of course, the plan is to write patches on top. I'm not cleaning up anything
here because I'm concerned callers may double free (and I didn't look hard into
that).

Fam


Re: [PATCH 06/15] genhd: Add return code to device_add_disk

2016-08-17 Thread Fam Zheng
On Wed, 08/17 11:06, Cornelia Huck wrote:
> On Wed, 17 Aug 2016 16:48:23 +0800
> Fam Zheng  wrote:
> 
> > On Wed, 08/17 10:49, Cornelia Huck wrote:
> > > On Wed, 17 Aug 2016 15:15:06 +0800
> > > Fam Zheng  wrote:
> > > 
> > > > @@ -613,10 +614,8 @@ void device_add_disk(struct device *parent, struct 
> > > > gendisk *disk)
> > > > disk->flags |= GENHD_FL_UP;
> > > > 
> > > > retval = blk_alloc_devt(>part0, );
> > > > -   if (retval) {
> > > > -   WARN_ON(1);
> > > > -   return;
> > > > -   }
> > > > +   if (retval)
> > > > +   goto fail;
> > > > disk_to_dev(disk)->devt = devt;
> > > > 
> > > > /* ->major and ->first_minor aren't supposed to be
> > > > @@ -625,16 +624,26 @@ void device_add_disk(struct device *parent, 
> > > > struct gendisk *disk)
> > > > disk->major = MAJOR(devt);
> > > > disk->first_minor = MINOR(devt);
> > > > 
> > > > -   disk_alloc_events(disk);
> > > > +   retval = disk_alloc_events(disk);
> > > > +   if (retval)
> > > > +   goto fail;
> > > > 
> > > > /* Register BDI before referencing it from bdev */
> > > > bdi = >queue->backing_dev_info;
> > > > -   bdi_register_owner(bdi, disk_to_dev(disk));
> > > > +   retval = bdi_register_owner(bdi, disk_to_dev(disk));
> > > > +   if (retval)
> > > > +   goto fail;
> > > > 
> > > > -   blk_register_region(disk_devt(disk), disk->minors, NULL,
> > > > -   exact_match, exact_lock, disk);
> > > > -   register_disk(parent, disk);
> > > > -   blk_register_queue(disk);
> > > > +   retval = blk_register_region(disk_devt(disk), disk->minors, 
> > > > NULL,
> > > > +exact_match, exact_lock, disk);
> > > > +   if (retval)
> > > > +   goto fail;
> > > > +   retval = register_disk(parent, disk);
> > > > +   if (retval)
> > > > +   goto fail;
> > > > +   retval = blk_register_queue(disk);
> > > > +   if (retval)
> > > > +   goto fail;
> > > > 
> > > > /*
> > > >  * Take an extra ref on queue which will be put on 
> > > > disk_release()
> > > > @@ -644,10 +653,20 @@ void device_add_disk(struct device *parent, 
> > > > struct gendisk *disk)
> > > > 
> > > > retval = sysfs_create_link(_to_dev(disk)->kobj, 
> > > > >dev->kobj,
> > > >"bdi");
> > > > +   if (retval)
> > > > +   goto fail;
> > > > +
> > > > +   retval = disk_add_events(disk);
> > > > +   if (retval)
> > > > +   goto fail;
> > > > +
> > > > +   retval = blk_integrity_add(disk);
> > > > +   if (retval)
> > > > +   goto fail;
> > > > +   return 0;
> > > > +fail:
> > > > WARN_ON(retval);
> > > > -
> > > > -   disk_add_events(disk);
> > > > -   blk_integrity_add(disk);
> > > > +   return retval;
> > > >  }
> > > 
> > > Noticed this when trying to figure out whether the error handling in
> > > virtio_blk was correct:
> > > 
> > > Shouldn't you try to cleanup/rewind so that any structures are in a
> > > sane state after failure? The caller doesn't know where device_add_disk
> > > failed, and calling del_gendisk unconditionally like virtio_blk does is
> > > probably not the right thing to do (at the very least, I don't think
> > > unregistering a device that has not been registered is likely to work).
> > > 
> > 
> > Yes, I think all callers need to be reviewed before device_add_disk do the
> > clean up on error. For this patchset I wanted to keep the change small.
> 
> But do the callers even have a chance to do this correctly right now?
> They will either clean up too much, or too little. ('Too little' is
> probably the more common case, given that you just added error
> propagation...)

Right, which is pre-exising.

> 
> Can you make del_gendisk handle devices partially setup via
> device_add_disk in all cases? Then you could mandate pairing
> device_add_disk with del_gendisk in all cases, error or not, and you
> should have a better chance on avoiding introducing new errors.
> 

Of course, the plan is to write patches on top. I'm not cleaning up anything
here because I'm concerned callers may double free (and I didn't look hard into
that).

Fam


Re: [PATCH 06/15] genhd: Add return code to device_add_disk

2016-08-17 Thread Fam Zheng
On Wed, 08/17 10:49, Cornelia Huck wrote:
> On Wed, 17 Aug 2016 15:15:06 +0800
> Fam Zheng <f...@redhat.com> wrote:
> 
> > @@ -613,10 +614,8 @@ void device_add_disk(struct device *parent, struct 
> > gendisk *disk)
> > disk->flags |= GENHD_FL_UP;
> > 
> > retval = blk_alloc_devt(>part0, );
> > -   if (retval) {
> > -   WARN_ON(1);
> > -   return;
> > -   }
> > +   if (retval)
> > +   goto fail;
> > disk_to_dev(disk)->devt = devt;
> > 
> > /* ->major and ->first_minor aren't supposed to be
> > @@ -625,16 +624,26 @@ void device_add_disk(struct device *parent, struct 
> > gendisk *disk)
> > disk->major = MAJOR(devt);
> > disk->first_minor = MINOR(devt);
> > 
> > -   disk_alloc_events(disk);
> > +   retval = disk_alloc_events(disk);
> > +   if (retval)
> > +   goto fail;
> > 
> > /* Register BDI before referencing it from bdev */
> > bdi = >queue->backing_dev_info;
> > -   bdi_register_owner(bdi, disk_to_dev(disk));
> > +   retval = bdi_register_owner(bdi, disk_to_dev(disk));
> > +   if (retval)
> > +   goto fail;
> > 
> > -   blk_register_region(disk_devt(disk), disk->minors, NULL,
> > -   exact_match, exact_lock, disk);
> > -   register_disk(parent, disk);
> > -   blk_register_queue(disk);
> > +   retval = blk_register_region(disk_devt(disk), disk->minors, NULL,
> > +exact_match, exact_lock, disk);
> > +   if (retval)
> > +   goto fail;
> > +   retval = register_disk(parent, disk);
> > +   if (retval)
> > +   goto fail;
> > +   retval = blk_register_queue(disk);
> > +   if (retval)
> > +   goto fail;
> > 
> > /*
> >  * Take an extra ref on queue which will be put on disk_release()
> > @@ -644,10 +653,20 @@ void device_add_disk(struct device *parent, struct 
> > gendisk *disk)
> > 
> > retval = sysfs_create_link(_to_dev(disk)->kobj, >dev->kobj,
> >"bdi");
> > +   if (retval)
> > +   goto fail;
> > +
> > +   retval = disk_add_events(disk);
> > +   if (retval)
> > +   goto fail;
> > +
> > +   retval = blk_integrity_add(disk);
> > +   if (retval)
> > +   goto fail;
> > +   return 0;
> > +fail:
> > WARN_ON(retval);
> > -
> > -   disk_add_events(disk);
> > -   blk_integrity_add(disk);
> > +   return retval;
> >  }
> 
> Noticed this when trying to figure out whether the error handling in
> virtio_blk was correct:
> 
> Shouldn't you try to cleanup/rewind so that any structures are in a
> sane state after failure? The caller doesn't know where device_add_disk
> failed, and calling del_gendisk unconditionally like virtio_blk does is
> probably not the right thing to do (at the very least, I don't think
> unregistering a device that has not been registered is likely to work).
> 

Yes, I think all callers need to be reviewed before device_add_disk do the
clean up on error. For this patchset I wanted to keep the change small.

Fam


Re: [PATCH 06/15] genhd: Add return code to device_add_disk

2016-08-17 Thread Fam Zheng
On Wed, 08/17 10:49, Cornelia Huck wrote:
> On Wed, 17 Aug 2016 15:15:06 +0800
> Fam Zheng  wrote:
> 
> > @@ -613,10 +614,8 @@ void device_add_disk(struct device *parent, struct 
> > gendisk *disk)
> > disk->flags |= GENHD_FL_UP;
> > 
> > retval = blk_alloc_devt(>part0, );
> > -   if (retval) {
> > -   WARN_ON(1);
> > -   return;
> > -   }
> > +   if (retval)
> > +   goto fail;
> > disk_to_dev(disk)->devt = devt;
> > 
> > /* ->major and ->first_minor aren't supposed to be
> > @@ -625,16 +624,26 @@ void device_add_disk(struct device *parent, struct 
> > gendisk *disk)
> > disk->major = MAJOR(devt);
> > disk->first_minor = MINOR(devt);
> > 
> > -   disk_alloc_events(disk);
> > +   retval = disk_alloc_events(disk);
> > +   if (retval)
> > +   goto fail;
> > 
> > /* Register BDI before referencing it from bdev */
> > bdi = >queue->backing_dev_info;
> > -   bdi_register_owner(bdi, disk_to_dev(disk));
> > +   retval = bdi_register_owner(bdi, disk_to_dev(disk));
> > +   if (retval)
> > +   goto fail;
> > 
> > -   blk_register_region(disk_devt(disk), disk->minors, NULL,
> > -   exact_match, exact_lock, disk);
> > -   register_disk(parent, disk);
> > -   blk_register_queue(disk);
> > +   retval = blk_register_region(disk_devt(disk), disk->minors, NULL,
> > +exact_match, exact_lock, disk);
> > +   if (retval)
> > +   goto fail;
> > +   retval = register_disk(parent, disk);
> > +   if (retval)
> > +   goto fail;
> > +   retval = blk_register_queue(disk);
> > +   if (retval)
> > +   goto fail;
> > 
> > /*
> >  * Take an extra ref on queue which will be put on disk_release()
> > @@ -644,10 +653,20 @@ void device_add_disk(struct device *parent, struct 
> > gendisk *disk)
> > 
> > retval = sysfs_create_link(_to_dev(disk)->kobj, >dev->kobj,
> >"bdi");
> > +   if (retval)
> > +   goto fail;
> > +
> > +   retval = disk_add_events(disk);
> > +   if (retval)
> > +   goto fail;
> > +
> > +   retval = blk_integrity_add(disk);
> > +   if (retval)
> > +   goto fail;
> > +   return 0;
> > +fail:
> > WARN_ON(retval);
> > -
> > -   disk_add_events(disk);
> > -   blk_integrity_add(disk);
> > +   return retval;
> >  }
> 
> Noticed this when trying to figure out whether the error handling in
> virtio_blk was correct:
> 
> Shouldn't you try to cleanup/rewind so that any structures are in a
> sane state after failure? The caller doesn't know where device_add_disk
> failed, and calling del_gendisk unconditionally like virtio_blk does is
> probably not the right thing to do (at the very least, I don't think
> unregistering a device that has not been registered is likely to work).
> 

Yes, I think all callers need to be reviewed before device_add_disk do the
clean up on error. For this patchset I wanted to keep the change small.

Fam


[PATCH 09/15] virtio-blk: Pass attribute group to device_add_disk

2016-08-17 Thread Fam Zheng
Previously after device_add_disk returns, the KOBJ_ADD uevent is already
emitted. Adding attributes after that is a poor usage of kobject, and
in practice may result in race conditions with userspace, for
example udev checks availability of certain attributes and initializes
/dev entries conditionally.

device_add_disk can handle adding attribute group better, so use it.

Meanwhile, handle error of device_add_disk.

Signed-off-by: Fam Zheng <f...@redhat.com>
---
 drivers/block/virtio_blk.c | 38 +++---
 1 file changed, 27 insertions(+), 11 deletions(-)

diff --git a/drivers/block/virtio_blk.c b/drivers/block/virtio_blk.c
index 4564df5..ff60d82 100644
--- a/drivers/block/virtio_blk.c
+++ b/drivers/block/virtio_blk.c
@@ -522,10 +522,10 @@ virtblk_cache_type_show(struct device *dev, struct 
device_attribute *attr,
return snprintf(buf, 40, "%s\n", virtblk_cache_types[writeback]);
 }
 
-static const struct device_attribute dev_attr_cache_type_ro =
+static struct device_attribute dev_attr_cache_type_ro =
__ATTR(cache_type, S_IRUGO,
   virtblk_cache_type_show, NULL);
-static const struct device_attribute dev_attr_cache_type_rw =
+static struct device_attribute dev_attr_cache_type_rw =
__ATTR(cache_type, S_IRUGO|S_IWUSR,
   virtblk_cache_type_show, virtblk_cache_type_store);
 
@@ -550,6 +550,26 @@ static struct blk_mq_ops virtio_mq_ops = {
 static unsigned int virtblk_queue_depth;
 module_param_named(queue_depth, virtblk_queue_depth, uint, 0444);
 
+static struct attribute *virtblk_attrs_ro[] = {
+   _attr_serial.attr,
+   _attr_cache_type_ro.attr,
+   NULL
+};
+
+static struct attribute *virtblk_attrs_rw[] = {
+   _attr_serial.attr,
+   _attr_cache_type_rw.attr,
+   NULL
+};
+
+static struct attribute_group virtblk_attr_group_ro = {
+   .attrs  = virtblk_attrs_ro,
+};
+
+static struct attribute_group virtblk_attr_group_rw = {
+   .attrs  = virtblk_attrs_rw,
+};
+
 static int virtblk_probe(struct virtio_device *vdev)
 {
struct virtio_blk *vblk;
@@ -560,6 +580,7 @@ static int virtblk_probe(struct virtio_device *vdev)
u32 v, blk_size, sg_elems, opt_io_size;
u16 min_io_size;
u8 physical_block_exp, alignment_offset;
+   struct attribute_group *attr_group;
 
if (!vdev->config->get) {
dev_err(>dev, "%s failure: config access disabled\n",
@@ -719,19 +740,14 @@ static int virtblk_probe(struct virtio_device *vdev)
 
virtio_device_ready(vdev);
 
-   device_add_disk(>dev, vblk->disk, NULL);
-   err = device_create_file(disk_to_dev(vblk->disk), _attr_serial);
-   if (err)
-   goto out_del_disk;
-
if (virtio_has_feature(vdev, VIRTIO_BLK_F_CONFIG_WCE))
-   err = device_create_file(disk_to_dev(vblk->disk),
-_attr_cache_type_rw);
+   attr_group = _attr_group_rw;
else
-   err = device_create_file(disk_to_dev(vblk->disk),
-_attr_cache_type_ro);
+   attr_group = _attr_group_ro;
+   err = device_add_disk(>dev, vblk->disk, attr_group);
if (err)
goto out_del_disk;
+
return 0;
 
 out_del_disk:
-- 
2.7.4



[PATCH 09/15] virtio-blk: Pass attribute group to device_add_disk

2016-08-17 Thread Fam Zheng
Previously after device_add_disk returns, the KOBJ_ADD uevent is already
emitted. Adding attributes after that is a poor usage of kobject, and
in practice may result in race conditions with userspace, for
example udev checks availability of certain attributes and initializes
/dev entries conditionally.

device_add_disk can handle adding attribute group better, so use it.

Meanwhile, handle error of device_add_disk.

Signed-off-by: Fam Zheng 
---
 drivers/block/virtio_blk.c | 38 +++---
 1 file changed, 27 insertions(+), 11 deletions(-)

diff --git a/drivers/block/virtio_blk.c b/drivers/block/virtio_blk.c
index 4564df5..ff60d82 100644
--- a/drivers/block/virtio_blk.c
+++ b/drivers/block/virtio_blk.c
@@ -522,10 +522,10 @@ virtblk_cache_type_show(struct device *dev, struct 
device_attribute *attr,
return snprintf(buf, 40, "%s\n", virtblk_cache_types[writeback]);
 }
 
-static const struct device_attribute dev_attr_cache_type_ro =
+static struct device_attribute dev_attr_cache_type_ro =
__ATTR(cache_type, S_IRUGO,
   virtblk_cache_type_show, NULL);
-static const struct device_attribute dev_attr_cache_type_rw =
+static struct device_attribute dev_attr_cache_type_rw =
__ATTR(cache_type, S_IRUGO|S_IWUSR,
   virtblk_cache_type_show, virtblk_cache_type_store);
 
@@ -550,6 +550,26 @@ static struct blk_mq_ops virtio_mq_ops = {
 static unsigned int virtblk_queue_depth;
 module_param_named(queue_depth, virtblk_queue_depth, uint, 0444);
 
+static struct attribute *virtblk_attrs_ro[] = {
+   _attr_serial.attr,
+   _attr_cache_type_ro.attr,
+   NULL
+};
+
+static struct attribute *virtblk_attrs_rw[] = {
+   _attr_serial.attr,
+   _attr_cache_type_rw.attr,
+   NULL
+};
+
+static struct attribute_group virtblk_attr_group_ro = {
+   .attrs  = virtblk_attrs_ro,
+};
+
+static struct attribute_group virtblk_attr_group_rw = {
+   .attrs  = virtblk_attrs_rw,
+};
+
 static int virtblk_probe(struct virtio_device *vdev)
 {
struct virtio_blk *vblk;
@@ -560,6 +580,7 @@ static int virtblk_probe(struct virtio_device *vdev)
u32 v, blk_size, sg_elems, opt_io_size;
u16 min_io_size;
u8 physical_block_exp, alignment_offset;
+   struct attribute_group *attr_group;
 
if (!vdev->config->get) {
dev_err(>dev, "%s failure: config access disabled\n",
@@ -719,19 +740,14 @@ static int virtblk_probe(struct virtio_device *vdev)
 
virtio_device_ready(vdev);
 
-   device_add_disk(>dev, vblk->disk, NULL);
-   err = device_create_file(disk_to_dev(vblk->disk), _attr_serial);
-   if (err)
-   goto out_del_disk;
-
if (virtio_has_feature(vdev, VIRTIO_BLK_F_CONFIG_WCE))
-   err = device_create_file(disk_to_dev(vblk->disk),
-_attr_cache_type_rw);
+   attr_group = _attr_group_rw;
else
-   err = device_create_file(disk_to_dev(vblk->disk),
-_attr_cache_type_ro);
+   attr_group = _attr_group_ro;
+   err = device_add_disk(>dev, vblk->disk, attr_group);
if (err)
goto out_del_disk;
+
return 0;
 
 out_del_disk:
-- 
2.7.4



[PATCH 08/15] nvme: Pass attribute group to device_add_disk

2016-08-17 Thread Fam Zheng
Previously after device_add_disk returns, the KOBJ_ADD uevent is already
emitted. Adding attributes after that is a poor usage of kobject, and
in practice may result in race conditions with userspace, for
example udev checks availability of certain attributes and initializes
/dev entries conditionally.

device_add_disk can handle adding attribute group better, so use it.

Signed-off-by: Fam Zheng <f...@redhat.com>
---
 drivers/nvme/host/core.c | 6 +-
 1 file changed, 1 insertion(+), 5 deletions(-)

diff --git a/drivers/nvme/host/core.c b/drivers/nvme/host/core.c
index 23a795f..1921cb2 100644
--- a/drivers/nvme/host/core.c
+++ b/drivers/nvme/host/core.c
@@ -1686,11 +1686,7 @@ static void nvme_alloc_ns(struct nvme_ctrl *ctrl, 
unsigned nsid)
if (ns->type == NVME_NS_LIGHTNVM)
return;
 
-   device_add_disk(ctrl->device, ns->disk, NULL);
-   if (sysfs_create_group(_to_dev(ns->disk)->kobj,
-   _ns_attr_group))
-   pr_warn("%s: failed to create sysfs group for identification\n",
-   ns->disk->disk_name);
+   device_add_disk(ctrl->device, ns->disk, _ns_attr_group);
return;
  out_free_disk:
kfree(disk);
-- 
2.7.4



[PATCH 07/15] genhd: Add attribute group parameter to device_add_disk

2016-08-17 Thread Fam Zheng
The added parameter attr_group, if not NULL, be added to the new disk.

The callers are converted with coccinelle:

  @@
  expression e1, e2;
  @@
  - device_add_disk(e1, e2);
  + device_add_disk(e1, e2, NULL);

So there is not behavior change yet.

Signed-off-by: Fam Zheng <f...@redhat.com>
---
 arch/m68k/emu/nfblock.c |  2 +-
 arch/powerpc/sysdev/axonram.c   |  2 +-
 arch/um/drivers/ubd_kern.c  |  2 +-
 arch/xtensa/platforms/iss/simdisk.c |  2 +-
 block/genhd.c   | 10 +-
 drivers/block/DAC960.c  |  2 +-
 drivers/block/amiflop.c |  2 +-
 drivers/block/aoe/aoeblk.c  |  2 +-
 drivers/block/ataflop.c |  2 +-
 drivers/block/brd.c |  4 ++--
 drivers/block/cciss.c   |  2 +-
 drivers/block/drbd/drbd_main.c  |  2 +-
 drivers/block/floppy.c  |  2 +-
 drivers/block/hd.c  |  2 +-
 drivers/block/loop.c|  2 +-
 drivers/block/mg_disk.c |  2 +-
 drivers/block/mtip32xx/mtip32xx.c   |  2 +-
 drivers/block/nbd.c |  2 +-
 drivers/block/null_blk.c|  2 +-
 drivers/block/osdblk.c  |  2 +-
 drivers/block/paride/pcd.c  |  2 +-
 drivers/block/paride/pd.c   |  2 +-
 drivers/block/paride/pf.c   |  2 +-
 drivers/block/pktcdvd.c |  2 +-
 drivers/block/ps3disk.c |  2 +-
 drivers/block/ps3vram.c |  2 +-
 drivers/block/rbd.c |  2 +-
 drivers/block/rsxx/dev.c|  2 +-
 drivers/block/skd_main.c|  2 +-
 drivers/block/sunvdc.c  |  2 +-
 drivers/block/swim.c|  2 +-
 drivers/block/swim3.c   |  2 +-
 drivers/block/sx8.c |  2 +-
 drivers/block/umem.c|  2 +-
 drivers/block/virtio_blk.c  |  2 +-
 drivers/block/xen-blkfront.c|  2 +-
 drivers/block/xsysace.c |  2 +-
 drivers/block/z2ram.c   |  2 +-
 drivers/block/zram/zram_drv.c   |  2 +-
 drivers/cdrom/gdrom.c   |  2 +-
 drivers/ide/ide-cd.c|  2 +-
 drivers/ide/ide-gd.c|  2 +-
 drivers/lightnvm/gennvm.c   |  2 +-
 drivers/md/bcache/super.c   |  4 ++--
 drivers/md/dm.c |  2 +-
 drivers/md/md.c |  2 +-
 drivers/memstick/core/ms_block.c|  2 +-
 drivers/memstick/core/mspro_block.c |  2 +-
 drivers/mmc/card/block.c|  2 +-
 drivers/mtd/mtd_blkdevs.c   |  2 +-
 drivers/mtd/ubi/block.c |  2 +-
 drivers/nvdimm/blk.c|  2 +-
 drivers/nvdimm/btt.c|  2 +-
 drivers/nvdimm/pmem.c   |  2 +-
 drivers/nvme/host/core.c|  2 +-
 drivers/s390/block/dasd_genhd.c |  2 +-
 drivers/s390/block/dcssblk.c|  2 +-
 drivers/s390/block/scm_blk.c|  2 +-
 drivers/s390/block/xpram.c  |  2 +-
 drivers/sbus/char/jsflash.c |  2 +-
 drivers/scsi/sd.c   |  2 +-
 drivers/scsi/sr.c   |  2 +-
 include/linux/genhd.h   |  3 ++-
 63 files changed, 74 insertions(+), 65 deletions(-)

diff --git a/arch/m68k/emu/nfblock.c b/arch/m68k/emu/nfblock.c
index 573f76d..29dfdd6 100644
--- a/arch/m68k/emu/nfblock.c
+++ b/arch/m68k/emu/nfblock.c
@@ -138,7 +138,7 @@ static int __init nfhd_init_one(int id, u32 blocks, u32 
bsize)
set_capacity(dev->disk, (sector_t)blocks * (bsize / 512));
dev->disk->queue = dev->queue;
 
-   device_add_disk(NULL, dev->disk);
+   device_add_disk(NULL, dev->disk, NULL);
 
list_add_tail(>list, _list);
 
diff --git a/arch/powerpc/sysdev/axonram.c b/arch/powerpc/sysdev/axonram.c
index 9144204..6aef6c2 100644
--- a/arch/powerpc/sysdev/axonram.c
+++ b/arch/powerpc/sysdev/axonram.c
@@ -237,7 +237,7 @@ static int axon_ram_probe(struct platform_device *device)
set_capacity(bank->disk, bank->size >> AXON_RAM_SECTOR_SHIFT);
blk_queue_make_request(bank->disk->queue, axon_ram_make_request);
blk_queue_logical_block_size(bank->disk->queue, AXON_RAM_SECTOR_SIZE);
-   device_add_disk(>dev, bank->disk);
+   device_add_disk(>dev, bank->disk, NULL);
 
bank->irq_id = irq_of_parse_and_map(device->dev.of_node, 0);
if (bank->irq_id == NO_IRQ) {
diff --git a/arch/um/drivers/ubd_kern.c b/arch/um/drivers/ubd_kern.c
index f354027..45bbca5 100644
--- a/arch/um/drivers/ubd_kern.c
+++ b/arch/um/drivers/ubd_kern.c
@@ -829,7 +829,7 @@ static int ubd_disk_register(int major, u64 size, int unit,
 
disk->private_data = _devs[unit];
disk->queue = ubd_devs[unit].queue;
-   device_add_disk(parent, disk);
+   device_add_disk(parent, disk, NULL);
 
*disk_out = disk;
return 0;
diff --git a/arch/xtensa/platforms/iss/simdisk.c 
b/arch/xtensa/platforms/iss/simdisk.c
index da000

[PATCH 08/15] nvme: Pass attribute group to device_add_disk

2016-08-17 Thread Fam Zheng
Previously after device_add_disk returns, the KOBJ_ADD uevent is already
emitted. Adding attributes after that is a poor usage of kobject, and
in practice may result in race conditions with userspace, for
example udev checks availability of certain attributes and initializes
/dev entries conditionally.

device_add_disk can handle adding attribute group better, so use it.

Signed-off-by: Fam Zheng 
---
 drivers/nvme/host/core.c | 6 +-
 1 file changed, 1 insertion(+), 5 deletions(-)

diff --git a/drivers/nvme/host/core.c b/drivers/nvme/host/core.c
index 23a795f..1921cb2 100644
--- a/drivers/nvme/host/core.c
+++ b/drivers/nvme/host/core.c
@@ -1686,11 +1686,7 @@ static void nvme_alloc_ns(struct nvme_ctrl *ctrl, 
unsigned nsid)
if (ns->type == NVME_NS_LIGHTNVM)
return;
 
-   device_add_disk(ctrl->device, ns->disk, NULL);
-   if (sysfs_create_group(_to_dev(ns->disk)->kobj,
-   _ns_attr_group))
-   pr_warn("%s: failed to create sysfs group for identification\n",
-   ns->disk->disk_name);
+   device_add_disk(ctrl->device, ns->disk, _ns_attr_group);
return;
  out_free_disk:
kfree(disk);
-- 
2.7.4



[PATCH 07/15] genhd: Add attribute group parameter to device_add_disk

2016-08-17 Thread Fam Zheng
The added parameter attr_group, if not NULL, be added to the new disk.

The callers are converted with coccinelle:

  @@
  expression e1, e2;
  @@
  - device_add_disk(e1, e2);
  + device_add_disk(e1, e2, NULL);

So there is not behavior change yet.

Signed-off-by: Fam Zheng 
---
 arch/m68k/emu/nfblock.c |  2 +-
 arch/powerpc/sysdev/axonram.c   |  2 +-
 arch/um/drivers/ubd_kern.c  |  2 +-
 arch/xtensa/platforms/iss/simdisk.c |  2 +-
 block/genhd.c   | 10 +-
 drivers/block/DAC960.c  |  2 +-
 drivers/block/amiflop.c |  2 +-
 drivers/block/aoe/aoeblk.c  |  2 +-
 drivers/block/ataflop.c |  2 +-
 drivers/block/brd.c |  4 ++--
 drivers/block/cciss.c   |  2 +-
 drivers/block/drbd/drbd_main.c  |  2 +-
 drivers/block/floppy.c  |  2 +-
 drivers/block/hd.c  |  2 +-
 drivers/block/loop.c|  2 +-
 drivers/block/mg_disk.c |  2 +-
 drivers/block/mtip32xx/mtip32xx.c   |  2 +-
 drivers/block/nbd.c |  2 +-
 drivers/block/null_blk.c|  2 +-
 drivers/block/osdblk.c  |  2 +-
 drivers/block/paride/pcd.c  |  2 +-
 drivers/block/paride/pd.c   |  2 +-
 drivers/block/paride/pf.c   |  2 +-
 drivers/block/pktcdvd.c |  2 +-
 drivers/block/ps3disk.c |  2 +-
 drivers/block/ps3vram.c |  2 +-
 drivers/block/rbd.c |  2 +-
 drivers/block/rsxx/dev.c|  2 +-
 drivers/block/skd_main.c|  2 +-
 drivers/block/sunvdc.c  |  2 +-
 drivers/block/swim.c|  2 +-
 drivers/block/swim3.c   |  2 +-
 drivers/block/sx8.c |  2 +-
 drivers/block/umem.c|  2 +-
 drivers/block/virtio_blk.c  |  2 +-
 drivers/block/xen-blkfront.c|  2 +-
 drivers/block/xsysace.c |  2 +-
 drivers/block/z2ram.c   |  2 +-
 drivers/block/zram/zram_drv.c   |  2 +-
 drivers/cdrom/gdrom.c   |  2 +-
 drivers/ide/ide-cd.c|  2 +-
 drivers/ide/ide-gd.c|  2 +-
 drivers/lightnvm/gennvm.c   |  2 +-
 drivers/md/bcache/super.c   |  4 ++--
 drivers/md/dm.c |  2 +-
 drivers/md/md.c |  2 +-
 drivers/memstick/core/ms_block.c|  2 +-
 drivers/memstick/core/mspro_block.c |  2 +-
 drivers/mmc/card/block.c|  2 +-
 drivers/mtd/mtd_blkdevs.c   |  2 +-
 drivers/mtd/ubi/block.c |  2 +-
 drivers/nvdimm/blk.c|  2 +-
 drivers/nvdimm/btt.c|  2 +-
 drivers/nvdimm/pmem.c   |  2 +-
 drivers/nvme/host/core.c|  2 +-
 drivers/s390/block/dasd_genhd.c |  2 +-
 drivers/s390/block/dcssblk.c|  2 +-
 drivers/s390/block/scm_blk.c|  2 +-
 drivers/s390/block/xpram.c  |  2 +-
 drivers/sbus/char/jsflash.c |  2 +-
 drivers/scsi/sd.c   |  2 +-
 drivers/scsi/sr.c   |  2 +-
 include/linux/genhd.h   |  3 ++-
 63 files changed, 74 insertions(+), 65 deletions(-)

diff --git a/arch/m68k/emu/nfblock.c b/arch/m68k/emu/nfblock.c
index 573f76d..29dfdd6 100644
--- a/arch/m68k/emu/nfblock.c
+++ b/arch/m68k/emu/nfblock.c
@@ -138,7 +138,7 @@ static int __init nfhd_init_one(int id, u32 blocks, u32 
bsize)
set_capacity(dev->disk, (sector_t)blocks * (bsize / 512));
dev->disk->queue = dev->queue;
 
-   device_add_disk(NULL, dev->disk);
+   device_add_disk(NULL, dev->disk, NULL);
 
list_add_tail(>list, _list);
 
diff --git a/arch/powerpc/sysdev/axonram.c b/arch/powerpc/sysdev/axonram.c
index 9144204..6aef6c2 100644
--- a/arch/powerpc/sysdev/axonram.c
+++ b/arch/powerpc/sysdev/axonram.c
@@ -237,7 +237,7 @@ static int axon_ram_probe(struct platform_device *device)
set_capacity(bank->disk, bank->size >> AXON_RAM_SECTOR_SHIFT);
blk_queue_make_request(bank->disk->queue, axon_ram_make_request);
blk_queue_logical_block_size(bank->disk->queue, AXON_RAM_SECTOR_SIZE);
-   device_add_disk(>dev, bank->disk);
+   device_add_disk(>dev, bank->disk, NULL);
 
bank->irq_id = irq_of_parse_and_map(device->dev.of_node, 0);
if (bank->irq_id == NO_IRQ) {
diff --git a/arch/um/drivers/ubd_kern.c b/arch/um/drivers/ubd_kern.c
index f354027..45bbca5 100644
--- a/arch/um/drivers/ubd_kern.c
+++ b/arch/um/drivers/ubd_kern.c
@@ -829,7 +829,7 @@ static int ubd_disk_register(int major, u64 size, int unit,
 
disk->private_data = _devs[unit];
disk->queue = ubd_devs[unit].queue;
-   device_add_disk(parent, disk);
+   device_add_disk(parent, disk, NULL);
 
*disk_out = disk;
return 0;
diff --git a/arch/xtensa/platforms/iss/simdisk.c 
b/arch/xtensa/platforms/iss/simdisk.c
index da000c1..6a3d8c3 100644
-

[PATCH 11/15] zram: Pass attribute group to device_add_disk

2016-08-17 Thread Fam Zheng
Previously after device_add_disk returns, the KOBJ_ADD uevent is already
emitted. Adding attributes after that is a poor usage of kobject, and
in practice may result in race conditions with userspace, for
example udev checks availability of certain attributes and initializes
/dev entries conditionally.

device_add_disk can handle adding attribute group better, so use it.
Meanwhile, update the error check code and message.

Signed-off-by: Fam Zheng <f...@redhat.com>
---
 drivers/block/zram/zram_drv.c | 7 ++-
 1 file changed, 2 insertions(+), 5 deletions(-)

diff --git a/drivers/block/zram/zram_drv.c b/drivers/block/zram/zram_drv.c
index 20920a2..2331788 100644
--- a/drivers/block/zram/zram_drv.c
+++ b/drivers/block/zram/zram_drv.c
@@ -1298,13 +1298,10 @@ static int zram_add(void)
zram->disk->queue->limits.discard_zeroes_data = 0;
queue_flag_set_unlocked(QUEUE_FLAG_DISCARD, zram->disk->queue);
 
-   device_add_disk(NULL, zram->disk, NULL);
+   ret = device_add_disk(NULL, zram->disk, _disk_attr_group);
 
-   ret = sysfs_create_group(_to_dev(zram->disk)->kobj,
-   _disk_attr_group);
if (ret < 0) {
-   pr_err("Error creating sysfs group for device %d\n",
-   device_id);
+   pr_err("Error creating disk %d\n", device_id);
goto out_free_disk;
}
strlcpy(zram->compressor, default_compressor, sizeof(zram->compressor));
-- 
2.7.4



[PATCH 06/15] genhd: Add return code to device_add_disk

2016-08-17 Thread Fam Zheng
There are a number of places in device_add_disk that can fail, and even
more to come as we extend it.

Switch the return type of the function, and return the error code when
error happens.

The WARN_ON is kept because callers are not updated to check the error
yet.

Signed-off-by: Fam Zheng <f...@redhat.com>
---
 block/genhd.c | 49 ++---
 include/linux/genhd.h |  2 +-
 2 files changed, 35 insertions(+), 16 deletions(-)

diff --git a/block/genhd.c b/block/genhd.c
index 4316d2d..ffb3929 100644
--- a/block/genhd.c
+++ b/block/genhd.c
@@ -595,9 +595,10 @@ exit:
  * This function registers the partitioning information in @disk
  * with the kernel.
  *
- * FIXME: error handling
+ * RETURNS:
+ * 0 on success, -errno on failure.
  */
-void device_add_disk(struct device *parent, struct gendisk *disk)
+int device_add_disk(struct device *parent, struct gendisk *disk)
 {
struct backing_dev_info *bdi;
dev_t devt;
@@ -613,10 +614,8 @@ void device_add_disk(struct device *parent, struct gendisk 
*disk)
disk->flags |= GENHD_FL_UP;
 
retval = blk_alloc_devt(>part0, );
-   if (retval) {
-   WARN_ON(1);
-   return;
-   }
+   if (retval)
+   goto fail;
disk_to_dev(disk)->devt = devt;
 
/* ->major and ->first_minor aren't supposed to be
@@ -625,16 +624,26 @@ void device_add_disk(struct device *parent, struct 
gendisk *disk)
disk->major = MAJOR(devt);
disk->first_minor = MINOR(devt);
 
-   disk_alloc_events(disk);
+   retval = disk_alloc_events(disk);
+   if (retval)
+   goto fail;
 
/* Register BDI before referencing it from bdev */
bdi = >queue->backing_dev_info;
-   bdi_register_owner(bdi, disk_to_dev(disk));
+   retval = bdi_register_owner(bdi, disk_to_dev(disk));
+   if (retval)
+   goto fail;
 
-   blk_register_region(disk_devt(disk), disk->minors, NULL,
-   exact_match, exact_lock, disk);
-   register_disk(parent, disk);
-   blk_register_queue(disk);
+   retval = blk_register_region(disk_devt(disk), disk->minors, NULL,
+exact_match, exact_lock, disk);
+   if (retval)
+   goto fail;
+   retval = register_disk(parent, disk);
+   if (retval)
+   goto fail;
+   retval = blk_register_queue(disk);
+   if (retval)
+   goto fail;
 
/*
 * Take an extra ref on queue which will be put on disk_release()
@@ -644,10 +653,20 @@ void device_add_disk(struct device *parent, struct 
gendisk *disk)
 
retval = sysfs_create_link(_to_dev(disk)->kobj, >dev->kobj,
   "bdi");
+   if (retval)
+   goto fail;
+
+   retval = disk_add_events(disk);
+   if (retval)
+   goto fail;
+
+   retval = blk_integrity_add(disk);
+   if (retval)
+   goto fail;
+   return 0;
+fail:
WARN_ON(retval);
-
-   disk_add_events(disk);
-   blk_integrity_add(disk);
+   return retval;
 }
 EXPORT_SYMBOL(device_add_disk);
 
diff --git a/include/linux/genhd.h b/include/linux/genhd.h
index 85ce560..991b5ff 100644
--- a/include/linux/genhd.h
+++ b/include/linux/genhd.h
@@ -413,7 +413,7 @@ static inline void free_part_info(struct hd_struct *part)
 extern void part_round_stats(int cpu, struct hd_struct *part);
 
 /* block/genhd.c */
-extern void device_add_disk(struct device *parent, struct gendisk *disk);
+extern int device_add_disk(struct device *parent, struct gendisk *disk);
 
 extern void del_gendisk(struct gendisk *gp);
 extern struct gendisk *get_gendisk(dev_t dev, int *partno);
-- 
2.7.4



[PATCH 11/15] zram: Pass attribute group to device_add_disk

2016-08-17 Thread Fam Zheng
Previously after device_add_disk returns, the KOBJ_ADD uevent is already
emitted. Adding attributes after that is a poor usage of kobject, and
in practice may result in race conditions with userspace, for
example udev checks availability of certain attributes and initializes
/dev entries conditionally.

device_add_disk can handle adding attribute group better, so use it.
Meanwhile, update the error check code and message.

Signed-off-by: Fam Zheng 
---
 drivers/block/zram/zram_drv.c | 7 ++-
 1 file changed, 2 insertions(+), 5 deletions(-)

diff --git a/drivers/block/zram/zram_drv.c b/drivers/block/zram/zram_drv.c
index 20920a2..2331788 100644
--- a/drivers/block/zram/zram_drv.c
+++ b/drivers/block/zram/zram_drv.c
@@ -1298,13 +1298,10 @@ static int zram_add(void)
zram->disk->queue->limits.discard_zeroes_data = 0;
queue_flag_set_unlocked(QUEUE_FLAG_DISCARD, zram->disk->queue);
 
-   device_add_disk(NULL, zram->disk, NULL);
+   ret = device_add_disk(NULL, zram->disk, _disk_attr_group);
 
-   ret = sysfs_create_group(_to_dev(zram->disk)->kobj,
-   _disk_attr_group);
if (ret < 0) {
-   pr_err("Error creating sysfs group for device %d\n",
-   device_id);
+   pr_err("Error creating disk %d\n", device_id);
goto out_free_disk;
}
strlcpy(zram->compressor, default_compressor, sizeof(zram->compressor));
-- 
2.7.4



[PATCH 06/15] genhd: Add return code to device_add_disk

2016-08-17 Thread Fam Zheng
There are a number of places in device_add_disk that can fail, and even
more to come as we extend it.

Switch the return type of the function, and return the error code when
error happens.

The WARN_ON is kept because callers are not updated to check the error
yet.

Signed-off-by: Fam Zheng 
---
 block/genhd.c | 49 ++---
 include/linux/genhd.h |  2 +-
 2 files changed, 35 insertions(+), 16 deletions(-)

diff --git a/block/genhd.c b/block/genhd.c
index 4316d2d..ffb3929 100644
--- a/block/genhd.c
+++ b/block/genhd.c
@@ -595,9 +595,10 @@ exit:
  * This function registers the partitioning information in @disk
  * with the kernel.
  *
- * FIXME: error handling
+ * RETURNS:
+ * 0 on success, -errno on failure.
  */
-void device_add_disk(struct device *parent, struct gendisk *disk)
+int device_add_disk(struct device *parent, struct gendisk *disk)
 {
struct backing_dev_info *bdi;
dev_t devt;
@@ -613,10 +614,8 @@ void device_add_disk(struct device *parent, struct gendisk 
*disk)
disk->flags |= GENHD_FL_UP;
 
retval = blk_alloc_devt(>part0, );
-   if (retval) {
-   WARN_ON(1);
-   return;
-   }
+   if (retval)
+   goto fail;
disk_to_dev(disk)->devt = devt;
 
/* ->major and ->first_minor aren't supposed to be
@@ -625,16 +624,26 @@ void device_add_disk(struct device *parent, struct 
gendisk *disk)
disk->major = MAJOR(devt);
disk->first_minor = MINOR(devt);
 
-   disk_alloc_events(disk);
+   retval = disk_alloc_events(disk);
+   if (retval)
+   goto fail;
 
/* Register BDI before referencing it from bdev */
bdi = >queue->backing_dev_info;
-   bdi_register_owner(bdi, disk_to_dev(disk));
+   retval = bdi_register_owner(bdi, disk_to_dev(disk));
+   if (retval)
+   goto fail;
 
-   blk_register_region(disk_devt(disk), disk->minors, NULL,
-   exact_match, exact_lock, disk);
-   register_disk(parent, disk);
-   blk_register_queue(disk);
+   retval = blk_register_region(disk_devt(disk), disk->minors, NULL,
+exact_match, exact_lock, disk);
+   if (retval)
+   goto fail;
+   retval = register_disk(parent, disk);
+   if (retval)
+   goto fail;
+   retval = blk_register_queue(disk);
+   if (retval)
+   goto fail;
 
/*
 * Take an extra ref on queue which will be put on disk_release()
@@ -644,10 +653,20 @@ void device_add_disk(struct device *parent, struct 
gendisk *disk)
 
retval = sysfs_create_link(_to_dev(disk)->kobj, >dev->kobj,
   "bdi");
+   if (retval)
+   goto fail;
+
+   retval = disk_add_events(disk);
+   if (retval)
+   goto fail;
+
+   retval = blk_integrity_add(disk);
+   if (retval)
+   goto fail;
+   return 0;
+fail:
WARN_ON(retval);
-
-   disk_add_events(disk);
-   blk_integrity_add(disk);
+   return retval;
 }
 EXPORT_SYMBOL(device_add_disk);
 
diff --git a/include/linux/genhd.h b/include/linux/genhd.h
index 85ce560..991b5ff 100644
--- a/include/linux/genhd.h
+++ b/include/linux/genhd.h
@@ -413,7 +413,7 @@ static inline void free_part_info(struct hd_struct *part)
 extern void part_round_stats(int cpu, struct hd_struct *part);
 
 /* block/genhd.c */
-extern void device_add_disk(struct device *parent, struct gendisk *disk);
+extern int device_add_disk(struct device *parent, struct gendisk *disk);
 
 extern void del_gendisk(struct gendisk *gp);
 extern struct gendisk *get_gendisk(dev_t dev, int *partno);
-- 
2.7.4



[PATCH 05/15] genhd: Return error from disk_{add,alloc}_events

2016-08-17 Thread Fam Zheng
disk_alloc_events and disk_add_events can fail, return the error code so
the caller can handle it.

Signed-off-by: Fam Zheng <f...@redhat.com>
---
 block/genhd.c | 24 +++-
 1 file changed, 15 insertions(+), 9 deletions(-)

diff --git a/block/genhd.c b/block/genhd.c
index 8c7510d..4316d2d 100644
--- a/block/genhd.c
+++ b/block/genhd.c
@@ -40,8 +40,8 @@ static struct device_type disk_type;
 
 static void disk_check_events(struct disk_events *ev,
  unsigned int *clearing_ptr);
-static void disk_alloc_events(struct gendisk *disk);
-static void disk_add_events(struct gendisk *disk);
+static int disk_alloc_events(struct gendisk *disk);
+static int disk_add_events(struct gendisk *disk);
 static void disk_del_events(struct gendisk *disk);
 static void disk_release_events(struct gendisk *disk);
 
@@ -1823,17 +1823,17 @@ module_param_cb(events_dfl_poll_msecs, 
_events_dfl_poll_msecs_param_ops,
 /*
  * disk_{alloc|add|del|release}_events - initialize and destroy disk_events.
  */
-static void disk_alloc_events(struct gendisk *disk)
+static int disk_alloc_events(struct gendisk *disk)
 {
struct disk_events *ev;
 
if (!disk->fops->check_events)
-   return;
+   return 0;
 
ev = kzalloc(sizeof(*ev), GFP_KERNEL);
if (!ev) {
pr_warn("%s: failed to initialize events\n", disk->disk_name);
-   return;
+   return -ENOMEM;
}
 
INIT_LIST_HEAD(>node);
@@ -1845,17 +1845,22 @@ static void disk_alloc_events(struct gendisk *disk)
INIT_DELAYED_WORK(>dwork, disk_events_workfn);
 
disk->ev = ev;
+   return 0;
 }
 
-static void disk_add_events(struct gendisk *disk)
+static int disk_add_events(struct gendisk *disk)
 {
+   int rc;
+
if (!disk->ev)
-   return;
+   return 0;
 
-   /* FIXME: error handling */
-   if (sysfs_create_files(_to_dev(disk)->kobj, disk_events_attrs) < 0)
+   rc = sysfs_create_files(_to_dev(disk)->kobj, disk_events_attrs);
+   if (rc) {
pr_warn("%s: failed to create sysfs files for events\n",
disk->disk_name);
+   return rc;
+   }
 
mutex_lock(_events_mutex);
list_add_tail(>ev->node, _events);
@@ -1866,6 +1871,7 @@ static void disk_add_events(struct gendisk *disk)
 * unblock kicks it into action.
 */
__disk_unblock_events(disk, true);
+   return 0;
 }
 
 static void disk_del_events(struct gendisk *disk)
-- 
2.7.4



[PATCH 05/15] genhd: Return error from disk_{add,alloc}_events

2016-08-17 Thread Fam Zheng
disk_alloc_events and disk_add_events can fail, return the error code so
the caller can handle it.

Signed-off-by: Fam Zheng 
---
 block/genhd.c | 24 +++-
 1 file changed, 15 insertions(+), 9 deletions(-)

diff --git a/block/genhd.c b/block/genhd.c
index 8c7510d..4316d2d 100644
--- a/block/genhd.c
+++ b/block/genhd.c
@@ -40,8 +40,8 @@ static struct device_type disk_type;
 
 static void disk_check_events(struct disk_events *ev,
  unsigned int *clearing_ptr);
-static void disk_alloc_events(struct gendisk *disk);
-static void disk_add_events(struct gendisk *disk);
+static int disk_alloc_events(struct gendisk *disk);
+static int disk_add_events(struct gendisk *disk);
 static void disk_del_events(struct gendisk *disk);
 static void disk_release_events(struct gendisk *disk);
 
@@ -1823,17 +1823,17 @@ module_param_cb(events_dfl_poll_msecs, 
_events_dfl_poll_msecs_param_ops,
 /*
  * disk_{alloc|add|del|release}_events - initialize and destroy disk_events.
  */
-static void disk_alloc_events(struct gendisk *disk)
+static int disk_alloc_events(struct gendisk *disk)
 {
struct disk_events *ev;
 
if (!disk->fops->check_events)
-   return;
+   return 0;
 
ev = kzalloc(sizeof(*ev), GFP_KERNEL);
if (!ev) {
pr_warn("%s: failed to initialize events\n", disk->disk_name);
-   return;
+   return -ENOMEM;
}
 
INIT_LIST_HEAD(>node);
@@ -1845,17 +1845,22 @@ static void disk_alloc_events(struct gendisk *disk)
INIT_DELAYED_WORK(>dwork, disk_events_workfn);
 
disk->ev = ev;
+   return 0;
 }
 
-static void disk_add_events(struct gendisk *disk)
+static int disk_add_events(struct gendisk *disk)
 {
+   int rc;
+
if (!disk->ev)
-   return;
+   return 0;
 
-   /* FIXME: error handling */
-   if (sysfs_create_files(_to_dev(disk)->kobj, disk_events_attrs) < 0)
+   rc = sysfs_create_files(_to_dev(disk)->kobj, disk_events_attrs);
+   if (rc) {
pr_warn("%s: failed to create sysfs files for events\n",
disk->disk_name);
+   return rc;
+   }
 
mutex_lock(_events_mutex);
list_add_tail(>ev->node, _events);
@@ -1866,6 +1871,7 @@ static void disk_add_events(struct gendisk *disk)
 * unblock kicks it into action.
 */
__disk_unblock_events(disk, true);
+   return 0;
 }
 
 static void disk_del_events(struct gendisk *disk)
-- 
2.7.4



[PATCH 10/15] mtd: Pass attribute group to device_add_disk

2016-08-17 Thread Fam Zheng
Previously after device_add_disk returns, the KOBJ_ADD uevent is already
emitted. Adding attributes after that is a poor usage of kobject, and
in practice may result in race conditions with userspace, for
example udev checks availability of certain attributes and initializes
/dev entries conditionally.

device_add_disk can handle adding attribute group better, so use it.

Signed-off-by: Fam Zheng <f...@redhat.com>
---
 drivers/mtd/mtd_blkdevs.c | 8 +---
 1 file changed, 1 insertion(+), 7 deletions(-)

diff --git a/drivers/mtd/mtd_blkdevs.c b/drivers/mtd/mtd_blkdevs.c
index 4ff9381..fc9265d 100644
--- a/drivers/mtd/mtd_blkdevs.c
+++ b/drivers/mtd/mtd_blkdevs.c
@@ -434,13 +434,7 @@ int add_mtd_blktrans_dev(struct mtd_blktrans_dev *new)
if (new->readonly)
set_disk_ro(gd, 1);
 
-   device_add_disk(>mtd->dev, gd, NULL);
-
-   if (new->disk_attributes) {
-   ret = sysfs_create_group(_to_dev(gd)->kobj,
-   new->disk_attributes);
-   WARN_ON(ret);
-   }
+   device_add_disk(>mtd->dev, gd, new->disk_attributes);
return 0;
 error4:
blk_cleanup_queue(new->rq);
-- 
2.7.4



[PATCH 10/15] mtd: Pass attribute group to device_add_disk

2016-08-17 Thread Fam Zheng
Previously after device_add_disk returns, the KOBJ_ADD uevent is already
emitted. Adding attributes after that is a poor usage of kobject, and
in practice may result in race conditions with userspace, for
example udev checks availability of certain attributes and initializes
/dev entries conditionally.

device_add_disk can handle adding attribute group better, so use it.

Signed-off-by: Fam Zheng 
---
 drivers/mtd/mtd_blkdevs.c | 8 +---
 1 file changed, 1 insertion(+), 7 deletions(-)

diff --git a/drivers/mtd/mtd_blkdevs.c b/drivers/mtd/mtd_blkdevs.c
index 4ff9381..fc9265d 100644
--- a/drivers/mtd/mtd_blkdevs.c
+++ b/drivers/mtd/mtd_blkdevs.c
@@ -434,13 +434,7 @@ int add_mtd_blktrans_dev(struct mtd_blktrans_dev *new)
if (new->readonly)
set_disk_ro(gd, 1);
 
-   device_add_disk(>mtd->dev, gd, NULL);
-
-   if (new->disk_attributes) {
-   ret = sysfs_create_group(_to_dev(gd)->kobj,
-   new->disk_attributes);
-   WARN_ON(ret);
-   }
+   device_add_disk(>mtd->dev, gd, new->disk_attributes);
return 0;
 error4:
blk_cleanup_queue(new->rq);
-- 
2.7.4



[PATCH 13/15] aoeblk: Pass attribute group to device_add_disk

2016-08-17 Thread Fam Zheng
Previously after device_add_disk returns, the KOBJ_ADD uevent is already
emitted. Adding attributes after that is a poor usage of kobject, and
in practice may result in race conditions with userspace, for
example udev checks availability of certain attributes and initializes
/dev entries conditionally.

device_add_disk can handle adding attribute group better, so use it.

Signed-off-by: Fam Zheng <f...@redhat.com>
---
 drivers/block/aoe/aoeblk.c | 10 ++
 1 file changed, 2 insertions(+), 8 deletions(-)

diff --git a/drivers/block/aoe/aoeblk.c b/drivers/block/aoe/aoeblk.c
index 4edfff2..d1b7541 100644
--- a/drivers/block/aoe/aoeblk.c
+++ b/drivers/block/aoe/aoeblk.c
@@ -177,7 +177,7 @@ static struct attribute *aoe_attrs[] = {
NULL,
 };
 
-static const struct attribute_group attr_group = {
+static struct attribute_group attr_group = {
.attrs = aoe_attrs,
 };
 
@@ -219,11 +219,6 @@ aoedisk_rm_debugfs(struct aoedev *d)
d->debugfs = NULL;
 }
 
-static int
-aoedisk_add_sysfs(struct aoedev *d)
-{
-   return sysfs_create_group(_to_dev(d->gd)->kobj, _group);
-}
 void
 aoedisk_rm_sysfs(struct aoedev *d)
 {
@@ -417,8 +412,7 @@ aoeblk_gdalloc(void *vp)
 
spin_unlock_irqrestore(>lock, flags);
 
-   device_add_disk(NULL, gd, NULL);
-   aoedisk_add_sysfs(d);
+   device_add_disk(NULL, gd, _group);
aoedisk_add_debugfs(d);
 
spin_lock_irqsave(>lock, flags);
-- 
2.7.4



[PATCH 12/15] mtip: Pass attribute group to device_add_disk

2016-08-17 Thread Fam Zheng
Previously after device_add_disk returns, the KOBJ_ADD uevent is already
emitted. Adding attributes after that is a poor usage of kobject, and
in practice may result in race conditions with userspace, for
example udev checks availability of certain attributes and initializes
/dev entries conditionally.

device_add_disk can handle adding attribute group better, so use it.
Meanwhile, update the error check code and message.

Signed-off-by: Fam Zheng <f...@redhat.com>
---
 drivers/block/mtip32xx/mtip32xx.c | 38 --
 1 file changed, 8 insertions(+), 30 deletions(-)

diff --git a/drivers/block/mtip32xx/mtip32xx.c 
b/drivers/block/mtip32xx/mtip32xx.c
index 4048a70..504c549 100644
--- a/drivers/block/mtip32xx/mtip32xx.c
+++ b/drivers/block/mtip32xx/mtip32xx.c
@@ -2702,26 +2702,14 @@ static const struct file_operations mtip_flags_fops = {
.llseek = no_llseek,
 };
 
-/*
- * Create the sysfs related attributes.
- *
- * @dd   Pointer to the driver data structure.
- * @kobj Pointer to the kobj for the block device.
- *
- * return value
- * 0   Operation completed successfully.
- * -EINVAL Invalid parameter.
- */
-static int mtip_hw_sysfs_init(struct driver_data *dd, struct kobject *kobj)
-{
-   if (!kobj || !dd)
-   return -EINVAL;
+static struct attribute *mtip_dev_attrs[] = {
+   _attr_status.attr,
+   NULL
+};
 
-   if (sysfs_create_file(kobj, _attr_status.attr))
-   dev_warn(>pdev->dev,
-   "Error creating 'status' sysfs entry\n");
-   return 0;
-}
+static struct attribute_group mtip_attr_group = {
+   .attrs = mtip_dev_attrs,
+};
 
 /*
  * Remove the sysfs related attributes.
@@ -3918,7 +3906,6 @@ static int mtip_block_initialize(struct driver_data *dd)
int rv = 0, wait_for_rebuild = 0;
sector_t capacity;
unsigned int index = 0;
-   struct kobject *kobj;
 
if (dd->disk)
goto skip_create_disk; /* hw init done, before rebuild */
@@ -4041,18 +4028,9 @@ skip_create_disk:
set_capacity(dd->disk, capacity);
 
/* Enable the block device and add it to /dev */
-   device_add_disk(>pdev->dev, dd->disk, NULL);
+   device_add_disk(>pdev->dev, dd->disk, _attr_group);
 
dd->bdev = bdget_disk(dd->disk, 0);
-   /*
-* Now that the disk is active, initialize any sysfs attributes
-* managed by the protocol layer.
-*/
-   kobj = kobject_get(_to_dev(dd->disk)->kobj);
-   if (kobj) {
-   mtip_hw_sysfs_init(dd, kobj);
-   kobject_put(kobj);
-   }
 
if (dd->mtip_svc_handler) {
set_bit(MTIP_DDF_INIT_DONE_BIT, >dd_flag);
-- 
2.7.4



[PATCH 13/15] aoeblk: Pass attribute group to device_add_disk

2016-08-17 Thread Fam Zheng
Previously after device_add_disk returns, the KOBJ_ADD uevent is already
emitted. Adding attributes after that is a poor usage of kobject, and
in practice may result in race conditions with userspace, for
example udev checks availability of certain attributes and initializes
/dev entries conditionally.

device_add_disk can handle adding attribute group better, so use it.

Signed-off-by: Fam Zheng 
---
 drivers/block/aoe/aoeblk.c | 10 ++
 1 file changed, 2 insertions(+), 8 deletions(-)

diff --git a/drivers/block/aoe/aoeblk.c b/drivers/block/aoe/aoeblk.c
index 4edfff2..d1b7541 100644
--- a/drivers/block/aoe/aoeblk.c
+++ b/drivers/block/aoe/aoeblk.c
@@ -177,7 +177,7 @@ static struct attribute *aoe_attrs[] = {
NULL,
 };
 
-static const struct attribute_group attr_group = {
+static struct attribute_group attr_group = {
.attrs = aoe_attrs,
 };
 
@@ -219,11 +219,6 @@ aoedisk_rm_debugfs(struct aoedev *d)
d->debugfs = NULL;
 }
 
-static int
-aoedisk_add_sysfs(struct aoedev *d)
-{
-   return sysfs_create_group(_to_dev(d->gd)->kobj, _group);
-}
 void
 aoedisk_rm_sysfs(struct aoedev *d)
 {
@@ -417,8 +412,7 @@ aoeblk_gdalloc(void *vp)
 
spin_unlock_irqrestore(>lock, flags);
 
-   device_add_disk(NULL, gd, NULL);
-   aoedisk_add_sysfs(d);
+   device_add_disk(NULL, gd, _group);
aoedisk_add_debugfs(d);
 
spin_lock_irqsave(>lock, flags);
-- 
2.7.4



[PATCH 12/15] mtip: Pass attribute group to device_add_disk

2016-08-17 Thread Fam Zheng
Previously after device_add_disk returns, the KOBJ_ADD uevent is already
emitted. Adding attributes after that is a poor usage of kobject, and
in practice may result in race conditions with userspace, for
example udev checks availability of certain attributes and initializes
/dev entries conditionally.

device_add_disk can handle adding attribute group better, so use it.
Meanwhile, update the error check code and message.

Signed-off-by: Fam Zheng 
---
 drivers/block/mtip32xx/mtip32xx.c | 38 --
 1 file changed, 8 insertions(+), 30 deletions(-)

diff --git a/drivers/block/mtip32xx/mtip32xx.c 
b/drivers/block/mtip32xx/mtip32xx.c
index 4048a70..504c549 100644
--- a/drivers/block/mtip32xx/mtip32xx.c
+++ b/drivers/block/mtip32xx/mtip32xx.c
@@ -2702,26 +2702,14 @@ static const struct file_operations mtip_flags_fops = {
.llseek = no_llseek,
 };
 
-/*
- * Create the sysfs related attributes.
- *
- * @dd   Pointer to the driver data structure.
- * @kobj Pointer to the kobj for the block device.
- *
- * return value
- * 0   Operation completed successfully.
- * -EINVAL Invalid parameter.
- */
-static int mtip_hw_sysfs_init(struct driver_data *dd, struct kobject *kobj)
-{
-   if (!kobj || !dd)
-   return -EINVAL;
+static struct attribute *mtip_dev_attrs[] = {
+   _attr_status.attr,
+   NULL
+};
 
-   if (sysfs_create_file(kobj, _attr_status.attr))
-   dev_warn(>pdev->dev,
-   "Error creating 'status' sysfs entry\n");
-   return 0;
-}
+static struct attribute_group mtip_attr_group = {
+   .attrs = mtip_dev_attrs,
+};
 
 /*
  * Remove the sysfs related attributes.
@@ -3918,7 +3906,6 @@ static int mtip_block_initialize(struct driver_data *dd)
int rv = 0, wait_for_rebuild = 0;
sector_t capacity;
unsigned int index = 0;
-   struct kobject *kobj;
 
if (dd->disk)
goto skip_create_disk; /* hw init done, before rebuild */
@@ -4041,18 +4028,9 @@ skip_create_disk:
set_capacity(dd->disk, capacity);
 
/* Enable the block device and add it to /dev */
-   device_add_disk(>pdev->dev, dd->disk, NULL);
+   device_add_disk(>pdev->dev, dd->disk, _attr_group);
 
dd->bdev = bdget_disk(dd->disk, 0);
-   /*
-* Now that the disk is active, initialize any sysfs attributes
-* managed by the protocol layer.
-*/
-   kobj = kobject_get(_to_dev(dd->disk)->kobj);
-   if (kobj) {
-   mtip_hw_sysfs_init(dd, kobj);
-   kobject_put(kobj);
-   }
 
if (dd->mtip_svc_handler) {
set_bit(MTIP_DDF_INIT_DONE_BIT, >dd_flag);
-- 
2.7.4



[PATCH 15/15] block: Add FIXME comment to handle device_add_disk error

2016-08-17 Thread Fam Zheng
Done with coccinelle:

  @@
  expression e1, e2, e3;
  identifier rc;
  @@
  (
rc = device_add_disk(e1, e2, e3);
  |
  + /* FIXME: handle error. */
device_add_disk(e1, e2, e3);
  )

Signed-off-by: Fam Zheng <f...@redhat.com>
---
 arch/m68k/emu/nfblock.c | 1 +
 arch/um/drivers/ubd_kern.c  | 1 +
 arch/xtensa/platforms/iss/simdisk.c | 1 +
 drivers/block/DAC960.c  | 1 +
 drivers/block/amiflop.c | 1 +
 drivers/block/aoe/aoeblk.c  | 1 +
 drivers/block/ataflop.c | 1 +
 drivers/block/brd.c | 2 ++
 drivers/block/cciss.c   | 1 +
 drivers/block/drbd/drbd_main.c  | 1 +
 drivers/block/floppy.c  | 1 +
 drivers/block/hd.c  | 1 +
 drivers/block/loop.c| 1 +
 drivers/block/mg_disk.c | 1 +
 drivers/block/mtip32xx/mtip32xx.c   | 1 +
 drivers/block/nbd.c | 1 +
 drivers/block/null_blk.c| 1 +
 drivers/block/osdblk.c  | 1 +
 drivers/block/paride/pcd.c  | 1 +
 drivers/block/paride/pd.c   | 1 +
 drivers/block/paride/pf.c   | 1 +
 drivers/block/pktcdvd.c | 1 +
 drivers/block/ps3disk.c | 1 +
 drivers/block/ps3vram.c | 1 +
 drivers/block/rbd.c | 1 +
 drivers/block/rsxx/dev.c| 1 +
 drivers/block/skd_main.c| 1 +
 drivers/block/sunvdc.c  | 1 +
 drivers/block/swim.c| 1 +
 drivers/block/swim3.c   | 1 +
 drivers/block/sx8.c | 1 +
 drivers/block/umem.c| 1 +
 drivers/block/xen-blkfront.c| 1 +
 drivers/block/xsysace.c | 1 +
 drivers/block/z2ram.c   | 1 +
 drivers/cdrom/gdrom.c   | 1 +
 drivers/ide/ide-cd.c| 1 +
 drivers/ide/ide-gd.c| 1 +
 drivers/lightnvm/gennvm.c   | 1 +
 drivers/md/bcache/super.c   | 2 ++
 drivers/md/dm.c | 1 +
 drivers/md/md.c | 1 +
 drivers/memstick/core/ms_block.c| 1 +
 drivers/memstick/core/mspro_block.c | 1 +
 drivers/mmc/card/block.c| 1 +
 drivers/mtd/mtd_blkdevs.c   | 1 +
 drivers/mtd/ubi/block.c | 1 +
 drivers/nvdimm/blk.c| 1 +
 drivers/nvdimm/btt.c| 1 +
 drivers/nvdimm/pmem.c   | 1 +
 drivers/nvme/host/core.c| 1 +
 drivers/s390/block/dasd_genhd.c | 1 +
 drivers/s390/block/dcssblk.c| 1 +
 drivers/s390/block/scm_blk.c| 1 +
 drivers/s390/block/xpram.c  | 1 +
 drivers/sbus/char/jsflash.c | 1 +
 drivers/scsi/sd.c   | 1 +
 drivers/scsi/sr.c   | 1 +
 58 files changed, 60 insertions(+)

diff --git a/arch/m68k/emu/nfblock.c b/arch/m68k/emu/nfblock.c
index 29dfdd6..ec45cb6 100644
--- a/arch/m68k/emu/nfblock.c
+++ b/arch/m68k/emu/nfblock.c
@@ -138,6 +138,7 @@ static int __init nfhd_init_one(int id, u32 blocks, u32 
bsize)
set_capacity(dev->disk, (sector_t)blocks * (bsize / 512));
dev->disk->queue = dev->queue;
 
+   /* FIXME: handle error. */
device_add_disk(NULL, dev->disk, NULL);
 
list_add_tail(>list, _list);
diff --git a/arch/um/drivers/ubd_kern.c b/arch/um/drivers/ubd_kern.c
index 45bbca5..3725af0 100644
--- a/arch/um/drivers/ubd_kern.c
+++ b/arch/um/drivers/ubd_kern.c
@@ -829,6 +829,7 @@ static int ubd_disk_register(int major, u64 size, int unit,
 
disk->private_data = _devs[unit];
disk->queue = ubd_devs[unit].queue;
+   /* FIXME: handle error. */
device_add_disk(parent, disk, NULL);
 
*disk_out = disk;
diff --git a/arch/xtensa/platforms/iss/simdisk.c 
b/arch/xtensa/platforms/iss/simdisk.c
index 6a3d8c3..b9a1a37 100644
--- a/arch/xtensa/platforms/iss/simdisk.c
+++ b/arch/xtensa/platforms/iss/simdisk.c
@@ -288,6 +288,7 @@ static int __init simdisk_setup(struct simdisk *dev, int 
which,
dev->gd->private_data = dev;
snprintf(dev->gd->disk_name, 32, "simdisk%d", which);
set_capacity(dev->gd, 0);
+   /* FIXME: handle error. */
device_add_disk(NULL, dev->gd, NULL);
 
dev->procfile = proc_create_data(tmp, 0644, procdir, , dev);
diff --git a/drivers/block/DAC960.c b/drivers/block/DAC960.c
index abaab30..f6121d3 100644
--- a/drivers/block/DAC960.c
+++ b/drivers/block/DAC960.c
@@ -3175,6 +3175,7 @@ DAC960_Probe(struct pci_dev *dev, const struct 
pci_device_id *entry)
 
   for (disk = 0; disk < DAC960_MaxLogicalDrives; disk++) {
 set_capacity(Controller->disks[disk], disk_size(Controller, disk));
+/* FIXME: handle error. */
 device_add_disk(NULL, Controller->disks[disk], NULL);
   }
   DAC960_CreateProcEntries(Controller);
diff --git a/drivers/block/amiflop.c b/drivers/block/amiflop.c
index f00d3b4..48de00b 100644
--- a/drivers/block/amiflop.c
+++ b/drivers/block/

[PATCH 15/15] block: Add FIXME comment to handle device_add_disk error

2016-08-17 Thread Fam Zheng
Done with coccinelle:

  @@
  expression e1, e2, e3;
  identifier rc;
  @@
  (
rc = device_add_disk(e1, e2, e3);
  |
  + /* FIXME: handle error. */
device_add_disk(e1, e2, e3);
  )

Signed-off-by: Fam Zheng 
---
 arch/m68k/emu/nfblock.c | 1 +
 arch/um/drivers/ubd_kern.c  | 1 +
 arch/xtensa/platforms/iss/simdisk.c | 1 +
 drivers/block/DAC960.c  | 1 +
 drivers/block/amiflop.c | 1 +
 drivers/block/aoe/aoeblk.c  | 1 +
 drivers/block/ataflop.c | 1 +
 drivers/block/brd.c | 2 ++
 drivers/block/cciss.c   | 1 +
 drivers/block/drbd/drbd_main.c  | 1 +
 drivers/block/floppy.c  | 1 +
 drivers/block/hd.c  | 1 +
 drivers/block/loop.c| 1 +
 drivers/block/mg_disk.c | 1 +
 drivers/block/mtip32xx/mtip32xx.c   | 1 +
 drivers/block/nbd.c | 1 +
 drivers/block/null_blk.c| 1 +
 drivers/block/osdblk.c  | 1 +
 drivers/block/paride/pcd.c  | 1 +
 drivers/block/paride/pd.c   | 1 +
 drivers/block/paride/pf.c   | 1 +
 drivers/block/pktcdvd.c | 1 +
 drivers/block/ps3disk.c | 1 +
 drivers/block/ps3vram.c | 1 +
 drivers/block/rbd.c | 1 +
 drivers/block/rsxx/dev.c| 1 +
 drivers/block/skd_main.c| 1 +
 drivers/block/sunvdc.c  | 1 +
 drivers/block/swim.c| 1 +
 drivers/block/swim3.c   | 1 +
 drivers/block/sx8.c | 1 +
 drivers/block/umem.c| 1 +
 drivers/block/xen-blkfront.c| 1 +
 drivers/block/xsysace.c | 1 +
 drivers/block/z2ram.c   | 1 +
 drivers/cdrom/gdrom.c   | 1 +
 drivers/ide/ide-cd.c| 1 +
 drivers/ide/ide-gd.c| 1 +
 drivers/lightnvm/gennvm.c   | 1 +
 drivers/md/bcache/super.c   | 2 ++
 drivers/md/dm.c | 1 +
 drivers/md/md.c | 1 +
 drivers/memstick/core/ms_block.c| 1 +
 drivers/memstick/core/mspro_block.c | 1 +
 drivers/mmc/card/block.c| 1 +
 drivers/mtd/mtd_blkdevs.c   | 1 +
 drivers/mtd/ubi/block.c | 1 +
 drivers/nvdimm/blk.c| 1 +
 drivers/nvdimm/btt.c| 1 +
 drivers/nvdimm/pmem.c   | 1 +
 drivers/nvme/host/core.c| 1 +
 drivers/s390/block/dasd_genhd.c | 1 +
 drivers/s390/block/dcssblk.c| 1 +
 drivers/s390/block/scm_blk.c| 1 +
 drivers/s390/block/xpram.c  | 1 +
 drivers/sbus/char/jsflash.c | 1 +
 drivers/scsi/sd.c   | 1 +
 drivers/scsi/sr.c   | 1 +
 58 files changed, 60 insertions(+)

diff --git a/arch/m68k/emu/nfblock.c b/arch/m68k/emu/nfblock.c
index 29dfdd6..ec45cb6 100644
--- a/arch/m68k/emu/nfblock.c
+++ b/arch/m68k/emu/nfblock.c
@@ -138,6 +138,7 @@ static int __init nfhd_init_one(int id, u32 blocks, u32 
bsize)
set_capacity(dev->disk, (sector_t)blocks * (bsize / 512));
dev->disk->queue = dev->queue;
 
+   /* FIXME: handle error. */
device_add_disk(NULL, dev->disk, NULL);
 
list_add_tail(>list, _list);
diff --git a/arch/um/drivers/ubd_kern.c b/arch/um/drivers/ubd_kern.c
index 45bbca5..3725af0 100644
--- a/arch/um/drivers/ubd_kern.c
+++ b/arch/um/drivers/ubd_kern.c
@@ -829,6 +829,7 @@ static int ubd_disk_register(int major, u64 size, int unit,
 
disk->private_data = _devs[unit];
disk->queue = ubd_devs[unit].queue;
+   /* FIXME: handle error. */
device_add_disk(parent, disk, NULL);
 
*disk_out = disk;
diff --git a/arch/xtensa/platforms/iss/simdisk.c 
b/arch/xtensa/platforms/iss/simdisk.c
index 6a3d8c3..b9a1a37 100644
--- a/arch/xtensa/platforms/iss/simdisk.c
+++ b/arch/xtensa/platforms/iss/simdisk.c
@@ -288,6 +288,7 @@ static int __init simdisk_setup(struct simdisk *dev, int 
which,
dev->gd->private_data = dev;
snprintf(dev->gd->disk_name, 32, "simdisk%d", which);
set_capacity(dev->gd, 0);
+   /* FIXME: handle error. */
device_add_disk(NULL, dev->gd, NULL);
 
dev->procfile = proc_create_data(tmp, 0644, procdir, , dev);
diff --git a/drivers/block/DAC960.c b/drivers/block/DAC960.c
index abaab30..f6121d3 100644
--- a/drivers/block/DAC960.c
+++ b/drivers/block/DAC960.c
@@ -3175,6 +3175,7 @@ DAC960_Probe(struct pci_dev *dev, const struct 
pci_device_id *entry)
 
   for (disk = 0; disk < DAC960_MaxLogicalDrives; disk++) {
 set_capacity(Controller->disks[disk], disk_size(Controller, disk));
+/* FIXME: handle error. */
 device_add_disk(NULL, Controller->disks[disk], NULL);
   }
   DAC960_CreateProcEntries(Controller);
diff --git a/drivers/block/amiflop.c b/drivers/block/amiflop.c
index f00d3b4..48de00b 100644
--- a/drivers/block/amiflop.c
+++ b/drivers/block/amiflop.c
@@ -1738,6 +1738,7 @@

[PATCH 14/15] axonram: Pass attribute group to device_add_disk

2016-08-17 Thread Fam Zheng
Previously after device_add_disk returns, the KOBJ_ADD uevent is already
emitted. Adding attributes after that is a poor usage of kobject, and
in practice may result in race conditions with userspace, for
example udev checks availability of certain attributes and initializes
/dev entries conditionally.

device_add_disk can handle adding attribute group better, so use it.
Meanwhile, handle the error of device_add_disk.

Signed-off-by: Fam Zheng <f...@redhat.com>
---
 arch/powerpc/sysdev/axonram.c | 23 +++
 1 file changed, 15 insertions(+), 8 deletions(-)

diff --git a/arch/powerpc/sysdev/axonram.c b/arch/powerpc/sysdev/axonram.c
index 6aef6c2..23109e3 100644
--- a/arch/powerpc/sysdev/axonram.c
+++ b/arch/powerpc/sysdev/axonram.c
@@ -158,6 +158,15 @@ static const struct block_device_operations 
axon_ram_devops = {
.direct_access  = axon_ram_direct_access
 };
 
+static struct attribute *axon_attrs[] = {
+   _attr_ecc.attr,
+   NULL,
+};
+
+static struct attribute_group axon_attr_group = {
+   .attrs = axon_attrs,
+};
+
 /**
  * axon_ram_probe - probe() method for platform driver
  * @device: see platform_driver method
@@ -237,7 +246,12 @@ static int axon_ram_probe(struct platform_device *device)
set_capacity(bank->disk, bank->size >> AXON_RAM_SECTOR_SHIFT);
blk_queue_make_request(bank->disk->queue, axon_ram_make_request);
blk_queue_logical_block_size(bank->disk->queue, AXON_RAM_SECTOR_SIZE);
-   device_add_disk(>dev, bank->disk, NULL);
+   rc = device_add_disk(>dev, bank->disk, _attr_group);
+   if (rc != 0) {
+   dev_err(>dev, "Cannot create disk\n");
+   rc = -EFAULT;
+   goto failed;
+   }
 
bank->irq_id = irq_of_parse_and_map(device->dev.of_node, 0);
if (bank->irq_id == NO_IRQ) {
@@ -255,13 +269,6 @@ static int axon_ram_probe(struct platform_device *device)
goto failed;
}
 
-   rc = device_create_file(>dev, _attr_ecc);
-   if (rc != 0) {
-   dev_err(>dev, "Cannot create sysfs file\n");
-   rc = -EFAULT;
-   goto failed;
-   }
-
azfs_minor += bank->disk->minors;
 
return 0;
-- 
2.7.4



[PATCH 14/15] axonram: Pass attribute group to device_add_disk

2016-08-17 Thread Fam Zheng
Previously after device_add_disk returns, the KOBJ_ADD uevent is already
emitted. Adding attributes after that is a poor usage of kobject, and
in practice may result in race conditions with userspace, for
example udev checks availability of certain attributes and initializes
/dev entries conditionally.

device_add_disk can handle adding attribute group better, so use it.
Meanwhile, handle the error of device_add_disk.

Signed-off-by: Fam Zheng 
---
 arch/powerpc/sysdev/axonram.c | 23 +++
 1 file changed, 15 insertions(+), 8 deletions(-)

diff --git a/arch/powerpc/sysdev/axonram.c b/arch/powerpc/sysdev/axonram.c
index 6aef6c2..23109e3 100644
--- a/arch/powerpc/sysdev/axonram.c
+++ b/arch/powerpc/sysdev/axonram.c
@@ -158,6 +158,15 @@ static const struct block_device_operations 
axon_ram_devops = {
.direct_access  = axon_ram_direct_access
 };
 
+static struct attribute *axon_attrs[] = {
+   _attr_ecc.attr,
+   NULL,
+};
+
+static struct attribute_group axon_attr_group = {
+   .attrs = axon_attrs,
+};
+
 /**
  * axon_ram_probe - probe() method for platform driver
  * @device: see platform_driver method
@@ -237,7 +246,12 @@ static int axon_ram_probe(struct platform_device *device)
set_capacity(bank->disk, bank->size >> AXON_RAM_SECTOR_SHIFT);
blk_queue_make_request(bank->disk->queue, axon_ram_make_request);
blk_queue_logical_block_size(bank->disk->queue, AXON_RAM_SECTOR_SIZE);
-   device_add_disk(>dev, bank->disk, NULL);
+   rc = device_add_disk(>dev, bank->disk, _attr_group);
+   if (rc != 0) {
+   dev_err(>dev, "Cannot create disk\n");
+   rc = -EFAULT;
+   goto failed;
+   }
 
bank->irq_id = irq_of_parse_and_map(device->dev.of_node, 0);
if (bank->irq_id == NO_IRQ) {
@@ -255,13 +269,6 @@ static int axon_ram_probe(struct platform_device *device)
goto failed;
}
 
-   rc = device_create_file(>dev, _attr_ecc);
-   if (rc != 0) {
-   dev_err(>dev, "Cannot create sysfs file\n");
-   rc = -EFAULT;
-   goto failed;
-   }
-
azfs_minor += bank->disk->minors;
 
return 0;
-- 
2.7.4



[PATCH 03/15] genhd: Return error from blk_register_region

2016-08-17 Thread Fam Zheng
blk_register_region can fail (-ENOMEM), return the error to the caller.

Signed-off-by: Fam Zheng <f...@redhat.com>
---
 block/genhd.c |  4 ++--
 include/linux/genhd.h | 10 +-
 2 files changed, 7 insertions(+), 7 deletions(-)

diff --git a/block/genhd.c b/block/genhd.c
index 3dcecaa..8c7510d 100644
--- a/block/genhd.c
+++ b/block/genhd.c
@@ -474,11 +474,11 @@ static char *bdevt_str(dev_t devt, char *buf)
  * range must be nonzero
  * The hash chain is sorted on range, so that subranges can override.
  */
-void blk_register_region(dev_t devt, unsigned long range, struct module 
*module,
+int blk_register_region(dev_t devt, unsigned long range, struct module *module,
 struct kobject *(*probe)(dev_t, int *, void *),
 int (*lock)(dev_t, void *), void *data)
 {
-   kobj_map(bdev_map, devt, range, module, probe, lock, data);
+   return kobj_map(bdev_map, devt, range, module, probe, lock, data);
 }
 
 EXPORT_SYMBOL(blk_register_region);
diff --git a/include/linux/genhd.h b/include/linux/genhd.h
index 889b1bb..b6fd666 100644
--- a/include/linux/genhd.h
+++ b/include/linux/genhd.h
@@ -617,11 +617,11 @@ extern struct gendisk *alloc_disk_node(int minors, int 
node_id);
 extern struct gendisk *alloc_disk(int minors);
 extern struct kobject *get_disk(struct gendisk *disk);
 extern void put_disk(struct gendisk *disk);
-extern void blk_register_region(dev_t devt, unsigned long range,
-   struct module *module,
-   struct kobject *(*probe)(dev_t, int *, void *),
-   int (*lock)(dev_t, void *),
-   void *data);
+extern int blk_register_region(dev_t devt, unsigned long range,
+  struct module *module,
+  struct kobject *(*probe)(dev_t, int *, void *),
+  int (*lock)(dev_t, void *),
+  void *data);
 extern void blk_unregister_region(dev_t devt, unsigned long range);
 
 extern ssize_t part_size_show(struct device *dev,
-- 
2.7.4



[PATCH 03/15] genhd: Return error from blk_register_region

2016-08-17 Thread Fam Zheng
blk_register_region can fail (-ENOMEM), return the error to the caller.

Signed-off-by: Fam Zheng 
---
 block/genhd.c |  4 ++--
 include/linux/genhd.h | 10 +-
 2 files changed, 7 insertions(+), 7 deletions(-)

diff --git a/block/genhd.c b/block/genhd.c
index 3dcecaa..8c7510d 100644
--- a/block/genhd.c
+++ b/block/genhd.c
@@ -474,11 +474,11 @@ static char *bdevt_str(dev_t devt, char *buf)
  * range must be nonzero
  * The hash chain is sorted on range, so that subranges can override.
  */
-void blk_register_region(dev_t devt, unsigned long range, struct module 
*module,
+int blk_register_region(dev_t devt, unsigned long range, struct module *module,
 struct kobject *(*probe)(dev_t, int *, void *),
 int (*lock)(dev_t, void *), void *data)
 {
-   kobj_map(bdev_map, devt, range, module, probe, lock, data);
+   return kobj_map(bdev_map, devt, range, module, probe, lock, data);
 }
 
 EXPORT_SYMBOL(blk_register_region);
diff --git a/include/linux/genhd.h b/include/linux/genhd.h
index 889b1bb..b6fd666 100644
--- a/include/linux/genhd.h
+++ b/include/linux/genhd.h
@@ -617,11 +617,11 @@ extern struct gendisk *alloc_disk_node(int minors, int 
node_id);
 extern struct gendisk *alloc_disk(int minors);
 extern struct kobject *get_disk(struct gendisk *disk);
 extern void put_disk(struct gendisk *disk);
-extern void blk_register_region(dev_t devt, unsigned long range,
-   struct module *module,
-   struct kobject *(*probe)(dev_t, int *, void *),
-   int (*lock)(dev_t, void *),
-   void *data);
+extern int blk_register_region(dev_t devt, unsigned long range,
+  struct module *module,
+  struct kobject *(*probe)(dev_t, int *, void *),
+  int (*lock)(dev_t, void *),
+  void *data);
 extern void blk_unregister_region(dev_t devt, unsigned long range);
 
 extern ssize_t part_size_show(struct device *dev,
-- 
2.7.4



[PATCH 04/15] block: Return error from blk_integrity_add

2016-08-17 Thread Fam Zheng
The kobject_init_and_add call in blk_integrity_add can fail, return the
error code in this case, so that it can be handled in the caller.

Signed-off-by: Fam Zheng <f...@redhat.com>
---
 block/blk-integrity.c | 12 
 include/linux/genhd.h |  4 ++--
 2 files changed, 10 insertions(+), 6 deletions(-)

diff --git a/block/blk-integrity.c b/block/blk-integrity.c
index d69c5c7..437e09a 100644
--- a/block/blk-integrity.c
+++ b/block/blk-integrity.c
@@ -450,13 +450,17 @@ void blk_integrity_revalidate(struct gendisk *disk)
~BDI_CAP_STABLE_WRITES;
 }
 
-void blk_integrity_add(struct gendisk *disk)
+int blk_integrity_add(struct gendisk *disk)
 {
-   if (kobject_init_and_add(>integrity_kobj, _ktype,
-_to_dev(disk)->kobj, "%s", "integrity"))
-   return;
+   int rc;
+
+   rc = kobject_init_and_add(>integrity_kobj, _ktype,
+ _to_dev(disk)->kobj, "%s", "integrity");
+   if (rc)
+   return rc;
 
kobject_uevent(>integrity_kobj, KOBJ_ADD);
+   return 0;
 }
 
 void blk_integrity_del(struct gendisk *disk)
diff --git a/include/linux/genhd.h b/include/linux/genhd.h
index b6fd666..85ce560 100644
--- a/include/linux/genhd.h
+++ b/include/linux/genhd.h
@@ -725,11 +725,11 @@ static inline void part_nr_sects_write(struct hd_struct 
*part, sector_t size)
 }
 
 #if defined(CONFIG_BLK_DEV_INTEGRITY)
-extern void blk_integrity_add(struct gendisk *);
+extern int blk_integrity_add(struct gendisk *);
 extern void blk_integrity_del(struct gendisk *);
 extern void blk_integrity_revalidate(struct gendisk *);
 #else  /* CONFIG_BLK_DEV_INTEGRITY */
-static inline void blk_integrity_add(struct gendisk *disk) { }
+static inline int blk_integrity_add(struct gendisk *disk) { return 0; }
 static inline void blk_integrity_del(struct gendisk *disk) { }
 static inline void blk_integrity_revalidate(struct gendisk *disk) { }
 #endif /* CONFIG_BLK_DEV_INTEGRITY */
-- 
2.7.4



[PATCH 02/15] genhd: Return error from register_disk()

2016-08-17 Thread Fam Zheng
Several operations in register_disk can fail, but the caller currently
cannot check for error due to missing return code. Change the function
return type and return -errno if any error happens.

Also add some documentation.

Signed-off-by: Fam Zheng <f...@redhat.com>
---
 block/genhd.c | 27 +--
 1 file changed, 21 insertions(+), 6 deletions(-)

diff --git a/block/genhd.c b/block/genhd.c
index fcd6d4f..3dcecaa 100644
--- a/block/genhd.c
+++ b/block/genhd.c
@@ -506,7 +506,15 @@ static int exact_lock(dev_t devt, void *data)
return 0;
 }
 
-static void register_disk(struct device *parent, struct gendisk *disk)
+/**
+ * register_disk - register a gendisk to a parent device
+ * @parent: parent device for the disk
+ * @disk: per-device partitioning information
+ *
+ * RETURNS:
+ * 0 on success, -errno on failure.
+ */
+static int register_disk(struct device *parent, struct gendisk *disk)
 {
struct device *ddev = disk_to_dev(disk);
struct block_device *bdev;
@@ -521,14 +529,16 @@ static void register_disk(struct device *parent, struct 
gendisk *disk)
/* delay uevents, until we scanned partition table */
dev_set_uevent_suppress(ddev, 1);
 
-   if (device_add(ddev))
-   return;
+   err = device_add(ddev);
+   if (err)
+   return err;
+
if (!sysfs_deprecated) {
err = sysfs_create_link(block_depr, >kobj,
kobject_name(>kobj));
if (err) {
device_del(ddev);
-   return;
+   return err;
}
}
 
@@ -547,12 +557,16 @@ static void register_disk(struct device *parent, struct 
gendisk *disk)
goto exit;
 
/* No such device (e.g., media were just removed) */
-   if (!get_capacity(disk))
+   if (!get_capacity(disk)) {
+   err = -ENOMEDIUM;
goto exit;
+   }
 
bdev = bdget_disk(disk, 0);
-   if (!bdev)
+   if (!bdev) {
+   err = -ENOMEDIUM;
goto exit;
+   }
 
bdev->bd_invalidated = 1;
err = blkdev_get(bdev, FMODE_READ, NULL);
@@ -570,6 +584,7 @@ exit:
while ((part = disk_part_iter_next()))
kobject_uevent(_to_dev(part)->kobj, KOBJ_ADD);
disk_part_iter_exit();
+   return err;
 }
 
 /**
-- 
2.7.4



[PATCH 04/15] block: Return error from blk_integrity_add

2016-08-17 Thread Fam Zheng
The kobject_init_and_add call in blk_integrity_add can fail, return the
error code in this case, so that it can be handled in the caller.

Signed-off-by: Fam Zheng 
---
 block/blk-integrity.c | 12 
 include/linux/genhd.h |  4 ++--
 2 files changed, 10 insertions(+), 6 deletions(-)

diff --git a/block/blk-integrity.c b/block/blk-integrity.c
index d69c5c7..437e09a 100644
--- a/block/blk-integrity.c
+++ b/block/blk-integrity.c
@@ -450,13 +450,17 @@ void blk_integrity_revalidate(struct gendisk *disk)
~BDI_CAP_STABLE_WRITES;
 }
 
-void blk_integrity_add(struct gendisk *disk)
+int blk_integrity_add(struct gendisk *disk)
 {
-   if (kobject_init_and_add(>integrity_kobj, _ktype,
-_to_dev(disk)->kobj, "%s", "integrity"))
-   return;
+   int rc;
+
+   rc = kobject_init_and_add(>integrity_kobj, _ktype,
+ _to_dev(disk)->kobj, "%s", "integrity");
+   if (rc)
+   return rc;
 
kobject_uevent(>integrity_kobj, KOBJ_ADD);
+   return 0;
 }
 
 void blk_integrity_del(struct gendisk *disk)
diff --git a/include/linux/genhd.h b/include/linux/genhd.h
index b6fd666..85ce560 100644
--- a/include/linux/genhd.h
+++ b/include/linux/genhd.h
@@ -725,11 +725,11 @@ static inline void part_nr_sects_write(struct hd_struct 
*part, sector_t size)
 }
 
 #if defined(CONFIG_BLK_DEV_INTEGRITY)
-extern void blk_integrity_add(struct gendisk *);
+extern int blk_integrity_add(struct gendisk *);
 extern void blk_integrity_del(struct gendisk *);
 extern void blk_integrity_revalidate(struct gendisk *);
 #else  /* CONFIG_BLK_DEV_INTEGRITY */
-static inline void blk_integrity_add(struct gendisk *disk) { }
+static inline int blk_integrity_add(struct gendisk *disk) { return 0; }
 static inline void blk_integrity_del(struct gendisk *disk) { }
 static inline void blk_integrity_revalidate(struct gendisk *disk) { }
 #endif /* CONFIG_BLK_DEV_INTEGRITY */
-- 
2.7.4



[PATCH 02/15] genhd: Return error from register_disk()

2016-08-17 Thread Fam Zheng
Several operations in register_disk can fail, but the caller currently
cannot check for error due to missing return code. Change the function
return type and return -errno if any error happens.

Also add some documentation.

Signed-off-by: Fam Zheng 
---
 block/genhd.c | 27 +--
 1 file changed, 21 insertions(+), 6 deletions(-)

diff --git a/block/genhd.c b/block/genhd.c
index fcd6d4f..3dcecaa 100644
--- a/block/genhd.c
+++ b/block/genhd.c
@@ -506,7 +506,15 @@ static int exact_lock(dev_t devt, void *data)
return 0;
 }
 
-static void register_disk(struct device *parent, struct gendisk *disk)
+/**
+ * register_disk - register a gendisk to a parent device
+ * @parent: parent device for the disk
+ * @disk: per-device partitioning information
+ *
+ * RETURNS:
+ * 0 on success, -errno on failure.
+ */
+static int register_disk(struct device *parent, struct gendisk *disk)
 {
struct device *ddev = disk_to_dev(disk);
struct block_device *bdev;
@@ -521,14 +529,16 @@ static void register_disk(struct device *parent, struct 
gendisk *disk)
/* delay uevents, until we scanned partition table */
dev_set_uevent_suppress(ddev, 1);
 
-   if (device_add(ddev))
-   return;
+   err = device_add(ddev);
+   if (err)
+   return err;
+
if (!sysfs_deprecated) {
err = sysfs_create_link(block_depr, >kobj,
kobject_name(>kobj));
if (err) {
device_del(ddev);
-   return;
+   return err;
}
}
 
@@ -547,12 +557,16 @@ static void register_disk(struct device *parent, struct 
gendisk *disk)
goto exit;
 
/* No such device (e.g., media were just removed) */
-   if (!get_capacity(disk))
+   if (!get_capacity(disk)) {
+   err = -ENOMEDIUM;
goto exit;
+   }
 
bdev = bdget_disk(disk, 0);
-   if (!bdev)
+   if (!bdev) {
+   err = -ENOMEDIUM;
goto exit;
+   }
 
bdev->bd_invalidated = 1;
err = blkdev_get(bdev, FMODE_READ, NULL);
@@ -570,6 +584,7 @@ exit:
while ((part = disk_part_iter_next()))
kobject_uevent(_to_dev(part)->kobj, KOBJ_ADD);
disk_part_iter_exit();
+   return err;
 }
 
 /**
-- 
2.7.4



[PATCH 00/15] Fix issue with KOBJ_ADD uevent versus disk attributes

2016-08-17 Thread Fam Zheng
This is an attempt to fix the issue that some disks' sysfs attributes are not
ready at the time its KOBJ_ADD event is sent.

The symptom is during device hotplug, udev may fail to find certain attributes,
such as serial or wwn, of the disk. As a result the /dev/disk/by-id entries are
not created.

The cause is device_add_disk emits the uevent before returning, and the callers
have to create sysfs entries after that.

The fix here is to pass attr_groups from callers to device_add_disk, so it can
be added before KOBJ_ADD.

Also add basic error handling around device_add_disk code, (or add FIXME
comment where work is left).

Fam Zheng (15):
  disk: Drop add_disk in favor of device_add_disk
  genhd: Return error from register_disk()
  genhd: Return error from blk_register_region
  block: Return error from blk_integrity_add
  genhd: Return error from disk_{add,alloc}_events
  genhd: Add return code to device_add_disk
  genhd: Add attribute group parameter to device_add_disk
  nvme: Pass attribute group to device_add_disk
  virtio-blk: Pass attribute group to device_add_disk
  mtd: Pass attribute group to device_add_disk
  zram: Pass attribute group to device_add_disk
  mtip: Pass attribute group to device_add_disk
  aoeblk: Pass attribute group to device_add_disk
  axonram: Pass attribute group to device_add_disk
  block: Add FIXME comment to handle device_add_disk error

 arch/m68k/emu/nfblock.c |   3 +-
 arch/powerpc/sysdev/axonram.c   |  23 +---
 arch/um/drivers/ubd_kern.c  |   3 +-
 arch/xtensa/platforms/iss/simdisk.c |   3 +-
 block/blk-integrity.c   |  12 ++--
 block/genhd.c   | 112 +---
 drivers/block/DAC960.c  |   3 +-
 drivers/block/amiflop.c |   3 +-
 drivers/block/aoe/aoeblk.c  |  13 ++---
 drivers/block/ataflop.c |   3 +-
 drivers/block/brd.c |   6 +-
 drivers/block/cciss.c   |   3 +-
 drivers/block/drbd/drbd_main.c  |   3 +-
 drivers/block/floppy.c  |   5 +-
 drivers/block/hd.c  |   3 +-
 drivers/block/loop.c|   3 +-
 drivers/block/mg_disk.c |   3 +-
 drivers/block/mtip32xx/mtip32xx.c   |  39 +++--
 drivers/block/nbd.c |   3 +-
 drivers/block/null_blk.c|   3 +-
 drivers/block/osdblk.c  |   3 +-
 drivers/block/paride/pcd.c  |   3 +-
 drivers/block/paride/pd.c   |   3 +-
 drivers/block/paride/pf.c   |   3 +-
 drivers/block/pktcdvd.c |   3 +-
 drivers/block/ps3disk.c |   3 +-
 drivers/block/ps3vram.c |   3 +-
 drivers/block/rbd.c |   3 +-
 drivers/block/rsxx/dev.c|   3 +-
 drivers/block/skd_main.c|   5 +-
 drivers/block/sunvdc.c  |   3 +-
 drivers/block/swim.c|   3 +-
 drivers/block/swim3.c   |   3 +-
 drivers/block/sx8.c |   3 +-
 drivers/block/umem.c|   3 +-
 drivers/block/virtio_blk.c  |  38 
 drivers/block/xen-blkfront.c|   3 +-
 drivers/block/xsysace.c |   3 +-
 drivers/block/z2ram.c   |   3 +-
 drivers/block/zram/zram_drv.c   |   7 +--
 drivers/cdrom/gdrom.c   |   3 +-
 drivers/ide/ide-cd.c|   3 +-
 drivers/ide/ide-gd.c|   3 +-
 drivers/lightnvm/gennvm.c   |   3 +-
 drivers/md/bcache/super.c   |   6 +-
 drivers/md/dm.c |   3 +-
 drivers/md/md.c |   5 +-
 drivers/memstick/core/ms_block.c|   3 +-
 drivers/memstick/core/mspro_block.c |   3 +-
 drivers/mmc/card/block.c|   3 +-
 drivers/mtd/mtd_blkdevs.c   |   9 +--
 drivers/mtd/ubi/block.c |   3 +-
 drivers/nvdimm/blk.c|   3 +-
 drivers/nvdimm/btt.c|   3 +-
 drivers/nvdimm/pmem.c   |   3 +-
 drivers/nvme/host/core.c|   7 +--
 drivers/s390/block/dasd_genhd.c |   3 +-
 drivers/s390/block/dcssblk.c|   3 +-
 drivers/s390/block/scm_blk.c|   3 +-
 drivers/s390/block/xpram.c  |   3 +-
 drivers/sbus/char/jsflash.c |   3 +-
 drivers/scsi/sd.c   |   3 +-
 drivers/scsi/sr.c   |   5 +-
 drivers/scsi/st.c   |   4 +-
 fs/block_dev.c  |   2 +-
 include/linux/genhd.h   |  21 +++
 66 files changed, 277 insertions(+), 186 deletions(-)

-- 
2.7.4



[PATCH 00/15] Fix issue with KOBJ_ADD uevent versus disk attributes

2016-08-17 Thread Fam Zheng
This is an attempt to fix the issue that some disks' sysfs attributes are not
ready at the time its KOBJ_ADD event is sent.

The symptom is during device hotplug, udev may fail to find certain attributes,
such as serial or wwn, of the disk. As a result the /dev/disk/by-id entries are
not created.

The cause is device_add_disk emits the uevent before returning, and the callers
have to create sysfs entries after that.

The fix here is to pass attr_groups from callers to device_add_disk, so it can
be added before KOBJ_ADD.

Also add basic error handling around device_add_disk code, (or add FIXME
comment where work is left).

Fam Zheng (15):
  disk: Drop add_disk in favor of device_add_disk
  genhd: Return error from register_disk()
  genhd: Return error from blk_register_region
  block: Return error from blk_integrity_add
  genhd: Return error from disk_{add,alloc}_events
  genhd: Add return code to device_add_disk
  genhd: Add attribute group parameter to device_add_disk
  nvme: Pass attribute group to device_add_disk
  virtio-blk: Pass attribute group to device_add_disk
  mtd: Pass attribute group to device_add_disk
  zram: Pass attribute group to device_add_disk
  mtip: Pass attribute group to device_add_disk
  aoeblk: Pass attribute group to device_add_disk
  axonram: Pass attribute group to device_add_disk
  block: Add FIXME comment to handle device_add_disk error

 arch/m68k/emu/nfblock.c |   3 +-
 arch/powerpc/sysdev/axonram.c   |  23 +---
 arch/um/drivers/ubd_kern.c  |   3 +-
 arch/xtensa/platforms/iss/simdisk.c |   3 +-
 block/blk-integrity.c   |  12 ++--
 block/genhd.c   | 112 +---
 drivers/block/DAC960.c  |   3 +-
 drivers/block/amiflop.c |   3 +-
 drivers/block/aoe/aoeblk.c  |  13 ++---
 drivers/block/ataflop.c |   3 +-
 drivers/block/brd.c |   6 +-
 drivers/block/cciss.c   |   3 +-
 drivers/block/drbd/drbd_main.c  |   3 +-
 drivers/block/floppy.c  |   5 +-
 drivers/block/hd.c  |   3 +-
 drivers/block/loop.c|   3 +-
 drivers/block/mg_disk.c |   3 +-
 drivers/block/mtip32xx/mtip32xx.c   |  39 +++--
 drivers/block/nbd.c |   3 +-
 drivers/block/null_blk.c|   3 +-
 drivers/block/osdblk.c  |   3 +-
 drivers/block/paride/pcd.c  |   3 +-
 drivers/block/paride/pd.c   |   3 +-
 drivers/block/paride/pf.c   |   3 +-
 drivers/block/pktcdvd.c |   3 +-
 drivers/block/ps3disk.c |   3 +-
 drivers/block/ps3vram.c |   3 +-
 drivers/block/rbd.c |   3 +-
 drivers/block/rsxx/dev.c|   3 +-
 drivers/block/skd_main.c|   5 +-
 drivers/block/sunvdc.c  |   3 +-
 drivers/block/swim.c|   3 +-
 drivers/block/swim3.c   |   3 +-
 drivers/block/sx8.c |   3 +-
 drivers/block/umem.c|   3 +-
 drivers/block/virtio_blk.c  |  38 
 drivers/block/xen-blkfront.c|   3 +-
 drivers/block/xsysace.c |   3 +-
 drivers/block/z2ram.c   |   3 +-
 drivers/block/zram/zram_drv.c   |   7 +--
 drivers/cdrom/gdrom.c   |   3 +-
 drivers/ide/ide-cd.c|   3 +-
 drivers/ide/ide-gd.c|   3 +-
 drivers/lightnvm/gennvm.c   |   3 +-
 drivers/md/bcache/super.c   |   6 +-
 drivers/md/dm.c |   3 +-
 drivers/md/md.c |   5 +-
 drivers/memstick/core/ms_block.c|   3 +-
 drivers/memstick/core/mspro_block.c |   3 +-
 drivers/mmc/card/block.c|   3 +-
 drivers/mtd/mtd_blkdevs.c   |   9 +--
 drivers/mtd/ubi/block.c |   3 +-
 drivers/nvdimm/blk.c|   3 +-
 drivers/nvdimm/btt.c|   3 +-
 drivers/nvdimm/pmem.c   |   3 +-
 drivers/nvme/host/core.c|   7 +--
 drivers/s390/block/dasd_genhd.c |   3 +-
 drivers/s390/block/dcssblk.c|   3 +-
 drivers/s390/block/scm_blk.c|   3 +-
 drivers/s390/block/xpram.c  |   3 +-
 drivers/sbus/char/jsflash.c |   3 +-
 drivers/scsi/sd.c   |   3 +-
 drivers/scsi/sr.c   |   5 +-
 drivers/scsi/st.c   |   4 +-
 fs/block_dev.c  |   2 +-
 include/linux/genhd.h   |  21 +++
 66 files changed, 277 insertions(+), 186 deletions(-)

-- 
2.7.4



[PATCH 01/15] disk: Drop add_disk in favor of device_add_disk

2016-08-17 Thread Fam Zheng
add_disk is now a "convenience" wrapper of device_add_disk. Unwrap it so
that callers can be later converted to handle error and attribute group
more easily.

Callers are converted with coccinelle:

  @@
  expression x;
  @@
  - add_disk(x)
  + device_add_disk(NULL, x)

Removal of add_disk and update of related comments are done manually.

Signed-off-by: Fam Zheng <f...@redhat.com>
---
 arch/m68k/emu/nfblock.c | 2 +-
 arch/xtensa/platforms/iss/simdisk.c | 2 +-
 drivers/block/DAC960.c  | 2 +-
 drivers/block/amiflop.c | 2 +-
 drivers/block/aoe/aoeblk.c  | 4 ++--
 drivers/block/ataflop.c | 2 +-
 drivers/block/brd.c | 4 ++--
 drivers/block/drbd/drbd_main.c  | 2 +-
 drivers/block/floppy.c  | 2 +-
 drivers/block/hd.c  | 2 +-
 drivers/block/loop.c| 2 +-
 drivers/block/mg_disk.c | 2 +-
 drivers/block/nbd.c | 2 +-
 drivers/block/null_blk.c| 2 +-
 drivers/block/osdblk.c  | 2 +-
 drivers/block/paride/pcd.c  | 2 +-
 drivers/block/paride/pd.c   | 2 +-
 drivers/block/paride/pf.c   | 2 +-
 drivers/block/pktcdvd.c | 2 +-
 drivers/block/rbd.c | 2 +-
 drivers/block/skd_main.c| 2 +-
 drivers/block/swim.c| 2 +-
 drivers/block/swim3.c   | 2 +-
 drivers/block/sx8.c | 2 +-
 drivers/block/umem.c| 2 +-
 drivers/block/xsysace.c | 2 +-
 drivers/block/z2ram.c   | 2 +-
 drivers/block/zram/zram_drv.c   | 2 +-
 drivers/cdrom/gdrom.c   | 2 +-
 drivers/lightnvm/gennvm.c   | 2 +-
 drivers/md/bcache/super.c   | 4 ++--
 drivers/md/dm.c | 2 +-
 drivers/md/md.c | 4 ++--
 drivers/mtd/ubi/block.c | 2 +-
 drivers/s390/block/xpram.c  | 2 +-
 drivers/sbus/char/jsflash.c | 2 +-
 drivers/scsi/sr.c   | 2 +-
 drivers/scsi/st.c   | 4 ++--
 fs/block_dev.c  | 2 +-
 include/linux/genhd.h   | 4 
 40 files changed, 44 insertions(+), 48 deletions(-)

diff --git a/arch/m68k/emu/nfblock.c b/arch/m68k/emu/nfblock.c
index e9110b9..573f76d 100644
--- a/arch/m68k/emu/nfblock.c
+++ b/arch/m68k/emu/nfblock.c
@@ -138,7 +138,7 @@ static int __init nfhd_init_one(int id, u32 blocks, u32 
bsize)
set_capacity(dev->disk, (sector_t)blocks * (bsize / 512));
dev->disk->queue = dev->queue;
 
-   add_disk(dev->disk);
+   device_add_disk(NULL, dev->disk);
 
list_add_tail(>list, _list);
 
diff --git a/arch/xtensa/platforms/iss/simdisk.c 
b/arch/xtensa/platforms/iss/simdisk.c
index f58a4e6..da000c1 100644
--- a/arch/xtensa/platforms/iss/simdisk.c
+++ b/arch/xtensa/platforms/iss/simdisk.c
@@ -288,7 +288,7 @@ static int __init simdisk_setup(struct simdisk *dev, int 
which,
dev->gd->private_data = dev;
snprintf(dev->gd->disk_name, 32, "simdisk%d", which);
set_capacity(dev->gd, 0);
-   add_disk(dev->gd);
+   device_add_disk(NULL, dev->gd);
 
dev->procfile = proc_create_data(tmp, 0644, procdir, , dev);
return 0;
diff --git a/drivers/block/DAC960.c b/drivers/block/DAC960.c
index 811e11c..9f22a17 100644
--- a/drivers/block/DAC960.c
+++ b/drivers/block/DAC960.c
@@ -3175,7 +3175,7 @@ DAC960_Probe(struct pci_dev *dev, const struct 
pci_device_id *entry)
 
   for (disk = 0; disk < DAC960_MaxLogicalDrives; disk++) {
 set_capacity(Controller->disks[disk], disk_size(Controller, disk));
-add_disk(Controller->disks[disk]);
+device_add_disk(NULL, Controller->disks[disk]);
   }
   DAC960_CreateProcEntries(Controller);
   return 0;
diff --git a/drivers/block/amiflop.c b/drivers/block/amiflop.c
index 5fd50a2..398a30c 100644
--- a/drivers/block/amiflop.c
+++ b/drivers/block/amiflop.c
@@ -1738,7 +1738,7 @@ static int __init fd_probe_drives(void)
sprintf(disk->disk_name, "fd%d", drive);
disk->private_data = [drive];
set_capacity(disk, 880*2);
-   add_disk(disk);
+   device_add_disk(NULL, disk);
}
if ((drives > 0) || (nomem == 0)) {
if (drives == 0)
diff --git a/drivers/block/aoe/aoeblk.c b/drivers/block/aoe/aoeblk.c
index ec9d861..24f246b 100644
--- a/drivers/block/aoe/aoeblk.c
+++ b/drivers/block/aoe/aoeblk.c
@@ -345,7 +345,7 @@ static const struct block_device_operations aoe_bdops = {
.owner = THIS_MODULE,
 };
 
-/* alloc_disk and add_disk can sleep */
+/* alloc_disk and device_add_disk can sleep */
 void
 aoeblk_gdalloc(void *vp)
 {
@@ -417,7 +417,7 @@ aoeblk_gdalloc(void *vp)
 
spin_unlock_irqrestore(>lock, flags);
 
-   add_disk(gd);
+   device_add_disk(NULL, gd);
aoedisk_add_s

[PATCH 01/15] disk: Drop add_disk in favor of device_add_disk

2016-08-17 Thread Fam Zheng
add_disk is now a "convenience" wrapper of device_add_disk. Unwrap it so
that callers can be later converted to handle error and attribute group
more easily.

Callers are converted with coccinelle:

  @@
  expression x;
  @@
  - add_disk(x)
  + device_add_disk(NULL, x)

Removal of add_disk and update of related comments are done manually.

Signed-off-by: Fam Zheng 
---
 arch/m68k/emu/nfblock.c | 2 +-
 arch/xtensa/platforms/iss/simdisk.c | 2 +-
 drivers/block/DAC960.c  | 2 +-
 drivers/block/amiflop.c | 2 +-
 drivers/block/aoe/aoeblk.c  | 4 ++--
 drivers/block/ataflop.c | 2 +-
 drivers/block/brd.c | 4 ++--
 drivers/block/drbd/drbd_main.c  | 2 +-
 drivers/block/floppy.c  | 2 +-
 drivers/block/hd.c  | 2 +-
 drivers/block/loop.c| 2 +-
 drivers/block/mg_disk.c | 2 +-
 drivers/block/nbd.c | 2 +-
 drivers/block/null_blk.c| 2 +-
 drivers/block/osdblk.c  | 2 +-
 drivers/block/paride/pcd.c  | 2 +-
 drivers/block/paride/pd.c   | 2 +-
 drivers/block/paride/pf.c   | 2 +-
 drivers/block/pktcdvd.c | 2 +-
 drivers/block/rbd.c | 2 +-
 drivers/block/skd_main.c| 2 +-
 drivers/block/swim.c| 2 +-
 drivers/block/swim3.c   | 2 +-
 drivers/block/sx8.c | 2 +-
 drivers/block/umem.c| 2 +-
 drivers/block/xsysace.c | 2 +-
 drivers/block/z2ram.c   | 2 +-
 drivers/block/zram/zram_drv.c   | 2 +-
 drivers/cdrom/gdrom.c   | 2 +-
 drivers/lightnvm/gennvm.c   | 2 +-
 drivers/md/bcache/super.c   | 4 ++--
 drivers/md/dm.c | 2 +-
 drivers/md/md.c | 4 ++--
 drivers/mtd/ubi/block.c | 2 +-
 drivers/s390/block/xpram.c  | 2 +-
 drivers/sbus/char/jsflash.c | 2 +-
 drivers/scsi/sr.c   | 2 +-
 drivers/scsi/st.c   | 4 ++--
 fs/block_dev.c  | 2 +-
 include/linux/genhd.h   | 4 
 40 files changed, 44 insertions(+), 48 deletions(-)

diff --git a/arch/m68k/emu/nfblock.c b/arch/m68k/emu/nfblock.c
index e9110b9..573f76d 100644
--- a/arch/m68k/emu/nfblock.c
+++ b/arch/m68k/emu/nfblock.c
@@ -138,7 +138,7 @@ static int __init nfhd_init_one(int id, u32 blocks, u32 
bsize)
set_capacity(dev->disk, (sector_t)blocks * (bsize / 512));
dev->disk->queue = dev->queue;
 
-   add_disk(dev->disk);
+   device_add_disk(NULL, dev->disk);
 
list_add_tail(>list, _list);
 
diff --git a/arch/xtensa/platforms/iss/simdisk.c 
b/arch/xtensa/platforms/iss/simdisk.c
index f58a4e6..da000c1 100644
--- a/arch/xtensa/platforms/iss/simdisk.c
+++ b/arch/xtensa/platforms/iss/simdisk.c
@@ -288,7 +288,7 @@ static int __init simdisk_setup(struct simdisk *dev, int 
which,
dev->gd->private_data = dev;
snprintf(dev->gd->disk_name, 32, "simdisk%d", which);
set_capacity(dev->gd, 0);
-   add_disk(dev->gd);
+   device_add_disk(NULL, dev->gd);
 
dev->procfile = proc_create_data(tmp, 0644, procdir, , dev);
return 0;
diff --git a/drivers/block/DAC960.c b/drivers/block/DAC960.c
index 811e11c..9f22a17 100644
--- a/drivers/block/DAC960.c
+++ b/drivers/block/DAC960.c
@@ -3175,7 +3175,7 @@ DAC960_Probe(struct pci_dev *dev, const struct 
pci_device_id *entry)
 
   for (disk = 0; disk < DAC960_MaxLogicalDrives; disk++) {
 set_capacity(Controller->disks[disk], disk_size(Controller, disk));
-add_disk(Controller->disks[disk]);
+device_add_disk(NULL, Controller->disks[disk]);
   }
   DAC960_CreateProcEntries(Controller);
   return 0;
diff --git a/drivers/block/amiflop.c b/drivers/block/amiflop.c
index 5fd50a2..398a30c 100644
--- a/drivers/block/amiflop.c
+++ b/drivers/block/amiflop.c
@@ -1738,7 +1738,7 @@ static int __init fd_probe_drives(void)
sprintf(disk->disk_name, "fd%d", drive);
disk->private_data = [drive];
set_capacity(disk, 880*2);
-   add_disk(disk);
+   device_add_disk(NULL, disk);
}
if ((drives > 0) || (nomem == 0)) {
if (drives == 0)
diff --git a/drivers/block/aoe/aoeblk.c b/drivers/block/aoe/aoeblk.c
index ec9d861..24f246b 100644
--- a/drivers/block/aoe/aoeblk.c
+++ b/drivers/block/aoe/aoeblk.c
@@ -345,7 +345,7 @@ static const struct block_device_operations aoe_bdops = {
.owner = THIS_MODULE,
 };
 
-/* alloc_disk and add_disk can sleep */
+/* alloc_disk and device_add_disk can sleep */
 void
 aoeblk_gdalloc(void *vp)
 {
@@ -417,7 +417,7 @@ aoeblk_gdalloc(void *vp)
 
spin_unlock_irqrestore(>lock, flags);
 
-   add_disk(gd);
+   device_add_disk(NULL, gd);
aoedisk_add_sysfs(d);
aoe

Re: [PATCH v2 0/5] Add support for EPT execute only for nested hypervisors

2016-07-13 Thread Fam Zheng
On Wed, 07/13 11:20, Paolo Bonzini wrote:
> 
> 
> On 13/07/2016 00:18, Bandan Das wrote:
> > v1 of this series posted at https://lkml.org/lkml/2016/6/28/7
> > 
> > Changes since v1:
> >  - 1/5 : modify is_shadow_present_pte to check against 0x
> >Reasoning provided in commit message.
> >  - 2/5 : Removed 2/5 from v1 since kvm doesn't use execute only.
> >3/5 from v1 is now 2/5. Introduce shadow_present_mask that
> >signifies whether ept execute only is supported. Add/remove some
> >comments as suggested in v1.
> >  - 3/5 : 4/5 from v1 is now 3/5.
> >  - 4/5 : update_permission_bitmask now sets u=1 only if host doesn't
> >support ept execute only.
> >  - 5/5 : No change
> 
> These are the diffs I have after review, do they look okay?
> 
> diff --git a/arch/x86/kvm/mmu.c b/arch/x86/kvm/mmu.c
> index 190c0559c221..bd2535fdb9eb 100644
> --- a/arch/x86/kvm/mmu.c
> +++ b/arch/x86/kvm/mmu.c
> @@ -2524,11 +2524,10 @@ static int set_spte(struct kvm_vcpu *vcpu, u64 *sptep,
>   return 0;
>  
>   /*
> -  * In the non-EPT case, execonly is not valid and so
> -  * the following line is equivalent to spte |= PT_PRESENT_MASK.
>* For the EPT case, shadow_present_mask is 0 if hardware
> -  * supports it and we honor whatever way the guest set it.
> -  * See: FNAME(gpte_access) in paging_tmpl.h
> +  * supports exec-only page table entries.  In that case,
> +  * ACC_USER_MASK and shadow_user_mask are used to represent
> +  * read access.  See FNAME(gpte_access) in paging_tmpl.h.
>*/
>   spte |= shadow_present_mask;
>   if (!speculative)
> @@ -3923,9 +3922,6 @@ static void update_permission_bitmask(struct kvm_vcpu 
> *vcpu,
>*   clearer.
>*/
>   smap = cr4_smap && u && !uf && !ff;
> - } else {
> - if (shadow_present_mask)
> - u = 1;
>   }
>  
>   fault = (ff && !x) || (uf && !u) || (wf && !w) ||
> diff --git a/arch/x86/kvm/vmx.c b/arch/x86/kvm/vmx.c
> index 576c47cda1a3..dfef081e76c0 100644
> --- a/arch/x86/kvm/vmx.c
> +++ b/arch/x86/kvm/vmx.c
> @@ -6120,12 +6120,14 @@ static int handle_ept_violation(struct kvm_vcpu *vcpu)
>   gpa = vmcs_read64(GUEST_PHYSICAL_ADDRESS);
>   trace_kvm_page_fault(gpa, exit_qualification);
>  
> - /* It is a write fault? */
> + /* it is a read fault? */
> + error_code = (exit_qualification << 2) & PFERR_USER_MASK;
> + /* it is a write fault? */
>   error_code = exit_qualification & PFERR_WRITE_MASK;

Did you mean s/=/|=/ for this line?

Fam

>   /* It is a fetch fault? */
>   error_code |= (exit_qualification << 2) & PFERR_FETCH_MASK;
>   /* ept page table is present? */
> - error_code |= (exit_qualification >> 3) & PFERR_PRESENT_MASK;
> + error_code |= (exit_qualification & 0x38) != 0;
>  
>   vcpu->arch.exit_qualification = exit_qualification;
>  
> @@ -6474,8 +6476,7 @@ static __init int hardware_setup(void)
>   (enable_ept_ad_bits) ? VMX_EPT_DIRTY_BIT : 0ull,
>   0ull, VMX_EPT_EXECUTABLE_MASK,
>   cpu_has_vmx_ept_execute_only() ?
> -   0ull : PT_PRESENT_MASK);
> - BUILD_BUG_ON(PT_PRESENT_MASK != VMX_EPT_READABLE_MASK);
> +   0ull : VMX_EPT_READABLE_MASK);
>   ept_set_mmio_spte_mask();
>   kvm_enable_tdp();
>   } else
> 
> --
> To unsubscribe from this list: send the line "unsubscribe kvm" in
> the body of a message to majord...@vger.kernel.org
> More majordomo info at  http://vger.kernel.org/majordomo-info.html


Re: [PATCH v2 0/5] Add support for EPT execute only for nested hypervisors

2016-07-13 Thread Fam Zheng
On Wed, 07/13 11:20, Paolo Bonzini wrote:
> 
> 
> On 13/07/2016 00:18, Bandan Das wrote:
> > v1 of this series posted at https://lkml.org/lkml/2016/6/28/7
> > 
> > Changes since v1:
> >  - 1/5 : modify is_shadow_present_pte to check against 0x
> >Reasoning provided in commit message.
> >  - 2/5 : Removed 2/5 from v1 since kvm doesn't use execute only.
> >3/5 from v1 is now 2/5. Introduce shadow_present_mask that
> >signifies whether ept execute only is supported. Add/remove some
> >comments as suggested in v1.
> >  - 3/5 : 4/5 from v1 is now 3/5.
> >  - 4/5 : update_permission_bitmask now sets u=1 only if host doesn't
> >support ept execute only.
> >  - 5/5 : No change
> 
> These are the diffs I have after review, do they look okay?
> 
> diff --git a/arch/x86/kvm/mmu.c b/arch/x86/kvm/mmu.c
> index 190c0559c221..bd2535fdb9eb 100644
> --- a/arch/x86/kvm/mmu.c
> +++ b/arch/x86/kvm/mmu.c
> @@ -2524,11 +2524,10 @@ static int set_spte(struct kvm_vcpu *vcpu, u64 *sptep,
>   return 0;
>  
>   /*
> -  * In the non-EPT case, execonly is not valid and so
> -  * the following line is equivalent to spte |= PT_PRESENT_MASK.
>* For the EPT case, shadow_present_mask is 0 if hardware
> -  * supports it and we honor whatever way the guest set it.
> -  * See: FNAME(gpte_access) in paging_tmpl.h
> +  * supports exec-only page table entries.  In that case,
> +  * ACC_USER_MASK and shadow_user_mask are used to represent
> +  * read access.  See FNAME(gpte_access) in paging_tmpl.h.
>*/
>   spte |= shadow_present_mask;
>   if (!speculative)
> @@ -3923,9 +3922,6 @@ static void update_permission_bitmask(struct kvm_vcpu 
> *vcpu,
>*   clearer.
>*/
>   smap = cr4_smap && u && !uf && !ff;
> - } else {
> - if (shadow_present_mask)
> - u = 1;
>   }
>  
>   fault = (ff && !x) || (uf && !u) || (wf && !w) ||
> diff --git a/arch/x86/kvm/vmx.c b/arch/x86/kvm/vmx.c
> index 576c47cda1a3..dfef081e76c0 100644
> --- a/arch/x86/kvm/vmx.c
> +++ b/arch/x86/kvm/vmx.c
> @@ -6120,12 +6120,14 @@ static int handle_ept_violation(struct kvm_vcpu *vcpu)
>   gpa = vmcs_read64(GUEST_PHYSICAL_ADDRESS);
>   trace_kvm_page_fault(gpa, exit_qualification);
>  
> - /* It is a write fault? */
> + /* it is a read fault? */
> + error_code = (exit_qualification << 2) & PFERR_USER_MASK;
> + /* it is a write fault? */
>   error_code = exit_qualification & PFERR_WRITE_MASK;

Did you mean s/=/|=/ for this line?

Fam

>   /* It is a fetch fault? */
>   error_code |= (exit_qualification << 2) & PFERR_FETCH_MASK;
>   /* ept page table is present? */
> - error_code |= (exit_qualification >> 3) & PFERR_PRESENT_MASK;
> + error_code |= (exit_qualification & 0x38) != 0;
>  
>   vcpu->arch.exit_qualification = exit_qualification;
>  
> @@ -6474,8 +6476,7 @@ static __init int hardware_setup(void)
>   (enable_ept_ad_bits) ? VMX_EPT_DIRTY_BIT : 0ull,
>   0ull, VMX_EPT_EXECUTABLE_MASK,
>   cpu_has_vmx_ept_execute_only() ?
> -   0ull : PT_PRESENT_MASK);
> - BUILD_BUG_ON(PT_PRESENT_MASK != VMX_EPT_READABLE_MASK);
> +   0ull : VMX_EPT_READABLE_MASK);
>   ept_set_mmio_spte_mask();
>   kvm_enable_tdp();
>   } else
> 
> --
> To unsubscribe from this list: send the line "unsubscribe kvm" in
> the body of a message to majord...@vger.kernel.org
> More majordomo info at  http://vger.kernel.org/majordomo-info.html


Re: [PATCH v2 04/12] axonrom: Generate uevent after attribute available

2016-06-30 Thread Fam Zheng
On Thu, 06/30 15:10, Dan Williams wrote:
> On Wed, Jun 29, 2016 at 6:59 PM, Fam Zheng <f...@redhat.com> wrote:
> > It is documented that KOBJ_ADD should be generated after the object's
> > attributes and children are ready.  We can achieve this with the new
> > disk_gen_uevents interface.
> >
> > Signed-off-by: Fam Zheng <f...@redhat.com>
> > ---
> >  arch/powerpc/sysdev/axonram.c | 3 ++-
> >  1 file changed, 2 insertions(+), 1 deletion(-)
> >
> > diff --git a/arch/powerpc/sysdev/axonram.c b/arch/powerpc/sysdev/axonram.c
> > index 4efd69b..27e7175 100644
> > --- a/arch/powerpc/sysdev/axonram.c
> > +++ b/arch/powerpc/sysdev/axonram.c
> > @@ -238,7 +238,7 @@ static int axon_ram_probe(struct platform_device 
> > *device)
> > set_capacity(bank->disk, bank->size >> AXON_RAM_SECTOR_SHIFT);
> > blk_queue_make_request(bank->disk->queue, axon_ram_make_request);
> > blk_queue_logical_block_size(bank->disk->queue, 
> > AXON_RAM_SECTOR_SIZE);
> > -   add_disk(bank->disk, true);
> > +   add_disk(bank->disk, false);
> >
> > bank->irq_id = irq_of_parse_and_map(device->dev.of_node, 0);
> > if (bank->irq_id == NO_IRQ) {
> > @@ -262,6 +262,7 @@ static int axon_ram_probe(struct platform_device 
> > *device)
> > rc = -EFAULT;
> > goto failed;
> > }
> > +   disk_gen_uevents(bank->disk);
> 
> I assume you are doing this after:
> 
>rc = device_create_file(>dev, _attr_ecc);
> 
> ...so that userspace gets notified of the new attribute, but this
> attribute is on the parent device, not the disk itself.  Instead I
> think this attribute should simply be registered before the call to
> add_disk().  Then the KOBJ_ADD event for the disk comes after the
> attribute is available.  It's still not a clean fit, because userspace
> should not be expecting a child device uevent to signal new attributes
> available on the parent.

Yes you are right, this patch is a mistake. Moving to before add_disk makes
sense to me.

Thanks for taking a look!

Fam


Re: [PATCH v2 04/12] axonrom: Generate uevent after attribute available

2016-06-30 Thread Fam Zheng
On Thu, 06/30 15:10, Dan Williams wrote:
> On Wed, Jun 29, 2016 at 6:59 PM, Fam Zheng  wrote:
> > It is documented that KOBJ_ADD should be generated after the object's
> > attributes and children are ready.  We can achieve this with the new
> > disk_gen_uevents interface.
> >
> > Signed-off-by: Fam Zheng 
> > ---
> >  arch/powerpc/sysdev/axonram.c | 3 ++-
> >  1 file changed, 2 insertions(+), 1 deletion(-)
> >
> > diff --git a/arch/powerpc/sysdev/axonram.c b/arch/powerpc/sysdev/axonram.c
> > index 4efd69b..27e7175 100644
> > --- a/arch/powerpc/sysdev/axonram.c
> > +++ b/arch/powerpc/sysdev/axonram.c
> > @@ -238,7 +238,7 @@ static int axon_ram_probe(struct platform_device 
> > *device)
> > set_capacity(bank->disk, bank->size >> AXON_RAM_SECTOR_SHIFT);
> > blk_queue_make_request(bank->disk->queue, axon_ram_make_request);
> > blk_queue_logical_block_size(bank->disk->queue, 
> > AXON_RAM_SECTOR_SIZE);
> > -   add_disk(bank->disk, true);
> > +   add_disk(bank->disk, false);
> >
> > bank->irq_id = irq_of_parse_and_map(device->dev.of_node, 0);
> > if (bank->irq_id == NO_IRQ) {
> > @@ -262,6 +262,7 @@ static int axon_ram_probe(struct platform_device 
> > *device)
> > rc = -EFAULT;
> > goto failed;
> > }
> > +   disk_gen_uevents(bank->disk);
> 
> I assume you are doing this after:
> 
>rc = device_create_file(>dev, _attr_ecc);
> 
> ...so that userspace gets notified of the new attribute, but this
> attribute is on the parent device, not the disk itself.  Instead I
> think this attribute should simply be registered before the call to
> add_disk().  Then the KOBJ_ADD event for the disk comes after the
> attribute is available.  It's still not a clean fit, because userspace
> should not be expecting a child device uevent to signal new attributes
> available on the parent.

Yes you are right, this patch is a mistake. Moving to before add_disk makes
sense to me.

Thanks for taking a look!

Fam


Re: [PATCH v2 00/12] gendisk: Generate uevent after attribute available

2016-06-30 Thread Fam Zheng
On Wed, 06/29 23:38, Christoph Hellwig wrote:
> On Thu, Jun 30, 2016 at 02:35:54PM +0800, Fam Zheng wrote:
> > also more code and less flexible IMO.  For example, we need at least two
> > variants, for attribute_group and device_attribute separately, right?
> 
> Yes, or maybe just a calling convention that just passes both.

OK, I can look into that, but I'm not sure about the error handling. Currently
add_disk returns void, do you have any plan on that too? should I change it in
v3 (to at least return the attribute creation failure)?

Fam


Re: [PATCH v2 00/12] gendisk: Generate uevent after attribute available

2016-06-30 Thread Fam Zheng
On Wed, 06/29 23:38, Christoph Hellwig wrote:
> On Thu, Jun 30, 2016 at 02:35:54PM +0800, Fam Zheng wrote:
> > also more code and less flexible IMO.  For example, we need at least two
> > variants, for attribute_group and device_attribute separately, right?
> 
> Yes, or maybe just a calling convention that just passes both.

OK, I can look into that, but I'm not sure about the error handling. Currently
add_disk returns void, do you have any plan on that too? should I change it in
v3 (to at least return the attribute creation failure)?

Fam


Re: [PATCH v2 00/12] gendisk: Generate uevent after attribute available

2016-06-30 Thread Fam Zheng
On Wed, 06/29 23:24, Christoph Hellwig wrote:
> On Thu, Jun 30, 2016 at 09:59:41AM +0800, Fam Zheng wrote:
> > Documentation/kobject.txt:
> > > Use the KOBJ_ADD action for when the kobject is first added to the kernel.
> > > This should be done only after any attributes or children of the kobject
> > > have been initialized properly, as userspace will instantly start to look
> > > for them when this call happens.
> > 
> > Unfortunately it seems impossible to fix this generally without touching the
> > offending callers.  The approach I'm proposing here is adding a flag to
> > suppress uevent in add_disk(), which is patch 1, then in later patches, 
> > convert
> > any caller to only trigger the uevent when attributes are added.
> 
> We (or rather Dan) is touching most add_disk callers anyway for the
> driverfs_dev removal.  Let's just pass the array of attributes to
> a disk_add variant and solve the issue for real.

I thought about that. Its usage is more compact compared to this series, but is
also more code and less flexible IMO.  For example, we need at least two
variants, for attribute_group and device_attribute separately, right?

Fam


Re: [PATCH v2 00/12] gendisk: Generate uevent after attribute available

2016-06-30 Thread Fam Zheng
On Wed, 06/29 23:24, Christoph Hellwig wrote:
> On Thu, Jun 30, 2016 at 09:59:41AM +0800, Fam Zheng wrote:
> > Documentation/kobject.txt:
> > > Use the KOBJ_ADD action for when the kobject is first added to the kernel.
> > > This should be done only after any attributes or children of the kobject
> > > have been initialized properly, as userspace will instantly start to look
> > > for them when this call happens.
> > 
> > Unfortunately it seems impossible to fix this generally without touching the
> > offending callers.  The approach I'm proposing here is adding a flag to
> > suppress uevent in add_disk(), which is patch 1, then in later patches, 
> > convert
> > any caller to only trigger the uevent when attributes are added.
> 
> We (or rather Dan) is touching most add_disk callers anyway for the
> driverfs_dev removal.  Let's just pass the array of attributes to
> a disk_add variant and solve the issue for real.

I thought about that. Its usage is more compact compared to this series, but is
also more code and less flexible IMO.  For example, we need at least two
variants, for attribute_group and device_attribute separately, right?

Fam


[PATCH v2 02/12] genhd: Honor gen_uevent and add disk_gen_uevents

2016-06-29 Thread Fam Zheng
In add_disk(), don't send uevent to userspace when gen_uevent is true;
also export the refactored function disk_gen_uevents for later use.

Signed-off-by: Fam Zheng <f...@redhat.com>
---
 block/genhd.c | 23 +++
 include/linux/genhd.h |  1 +
 2 files changed, 20 insertions(+), 4 deletions(-)

diff --git a/block/genhd.c b/block/genhd.c
index 8e1bfa1..9b66953 100644
--- a/block/genhd.c
+++ b/block/genhd.c
@@ -506,12 +506,10 @@ static int exact_lock(dev_t devt, void *data)
return 0;
 }
 
-static void register_disk(struct gendisk *disk)
+static void register_disk(struct gendisk *disk, bool gen_uevent)
 {
struct device *ddev = disk_to_dev(disk);
struct block_device *bdev;
-   struct disk_part_iter piter;
-   struct hd_struct *part;
int err;
 
ddev->parent = disk->driverfs_dev;
@@ -563,6 +561,22 @@ static void register_disk(struct gendisk *disk)
 exit:
/* announce disk after possible partitions are created */
dev_set_uevent_suppress(ddev, 0);
+   if (gen_uevent)
+   disk_gen_uevents(disk);
+}
+
+/**
+ * disk_gen_uevents
+ * @disk - the disk to generate uevent
+ *
+ * Generate KOBJ_ADD uevents on the disk and partitions.
+ */
+void disk_gen_uevents(struct gendisk *disk)
+{
+   struct device *ddev = disk_to_dev(disk);
+   struct disk_part_iter piter;
+   struct hd_struct *part;
+
kobject_uevent(>kobj, KOBJ_ADD);
 
/* announce possible partitions */
@@ -571,6 +585,7 @@ exit:
kobject_uevent(_to_dev(part)->kobj, KOBJ_ADD);
disk_part_iter_exit();
 }
+EXPORT_SYMBOL(disk_gen_uevents);
 
 /**
  * add_disk - add partitioning information to kernel list
@@ -618,7 +633,7 @@ void add_disk(struct gendisk *disk, bool gen_uevent)
 
blk_register_region(disk_devt(disk), disk->minors, NULL,
exact_match, exact_lock, disk);
-   register_disk(disk);
+   register_disk(disk, gen_uevent);
blk_register_queue(disk);
 
/*
diff --git a/include/linux/genhd.h b/include/linux/genhd.h
index 038be80..87ad9e5 100644
--- a/include/linux/genhd.h
+++ b/include/linux/genhd.h
@@ -416,6 +416,7 @@ extern void part_round_stats(int cpu, struct hd_struct 
*part);
 /* block/genhd.c */
 extern void add_disk(struct gendisk *disk, bool gen_uevent);
 extern void del_gendisk(struct gendisk *gp);
+extern void disk_gen_uevents(struct gendisk *disk);
 extern struct gendisk *get_gendisk(dev_t dev, int *partno);
 extern struct block_device *bdget_disk(struct gendisk *disk, int partno);
 
-- 
2.9.0



[PATCH v2 02/12] genhd: Honor gen_uevent and add disk_gen_uevents

2016-06-29 Thread Fam Zheng
In add_disk(), don't send uevent to userspace when gen_uevent is true;
also export the refactored function disk_gen_uevents for later use.

Signed-off-by: Fam Zheng 
---
 block/genhd.c | 23 +++
 include/linux/genhd.h |  1 +
 2 files changed, 20 insertions(+), 4 deletions(-)

diff --git a/block/genhd.c b/block/genhd.c
index 8e1bfa1..9b66953 100644
--- a/block/genhd.c
+++ b/block/genhd.c
@@ -506,12 +506,10 @@ static int exact_lock(dev_t devt, void *data)
return 0;
 }
 
-static void register_disk(struct gendisk *disk)
+static void register_disk(struct gendisk *disk, bool gen_uevent)
 {
struct device *ddev = disk_to_dev(disk);
struct block_device *bdev;
-   struct disk_part_iter piter;
-   struct hd_struct *part;
int err;
 
ddev->parent = disk->driverfs_dev;
@@ -563,6 +561,22 @@ static void register_disk(struct gendisk *disk)
 exit:
/* announce disk after possible partitions are created */
dev_set_uevent_suppress(ddev, 0);
+   if (gen_uevent)
+   disk_gen_uevents(disk);
+}
+
+/**
+ * disk_gen_uevents
+ * @disk - the disk to generate uevent
+ *
+ * Generate KOBJ_ADD uevents on the disk and partitions.
+ */
+void disk_gen_uevents(struct gendisk *disk)
+{
+   struct device *ddev = disk_to_dev(disk);
+   struct disk_part_iter piter;
+   struct hd_struct *part;
+
kobject_uevent(>kobj, KOBJ_ADD);
 
/* announce possible partitions */
@@ -571,6 +585,7 @@ exit:
kobject_uevent(_to_dev(part)->kobj, KOBJ_ADD);
disk_part_iter_exit();
 }
+EXPORT_SYMBOL(disk_gen_uevents);
 
 /**
  * add_disk - add partitioning information to kernel list
@@ -618,7 +633,7 @@ void add_disk(struct gendisk *disk, bool gen_uevent)
 
blk_register_region(disk_devt(disk), disk->minors, NULL,
exact_match, exact_lock, disk);
-   register_disk(disk);
+   register_disk(disk, gen_uevent);
blk_register_queue(disk);
 
/*
diff --git a/include/linux/genhd.h b/include/linux/genhd.h
index 038be80..87ad9e5 100644
--- a/include/linux/genhd.h
+++ b/include/linux/genhd.h
@@ -416,6 +416,7 @@ extern void part_round_stats(int cpu, struct hd_struct 
*part);
 /* block/genhd.c */
 extern void add_disk(struct gendisk *disk, bool gen_uevent);
 extern void del_gendisk(struct gendisk *gp);
+extern void disk_gen_uevents(struct gendisk *disk);
 extern struct gendisk *get_gendisk(dev_t dev, int *partno);
 extern struct block_device *bdget_disk(struct gendisk *disk, int partno);
 
-- 
2.9.0



[PATCH v2 08/12] zram: Generate uevent after attribute available

2016-06-29 Thread Fam Zheng
It is documented that KOBJ_ADD should be generated after the object's
attributes and children are ready.  We can achieve this with the new
disk_gen_uevents interface.

Signed-off-by: Fam Zheng <f...@redhat.com>
---
 drivers/block/zram/zram_drv.c | 3 ++-
 1 file changed, 2 insertions(+), 1 deletion(-)

diff --git a/drivers/block/zram/zram_drv.c b/drivers/block/zram/zram_drv.c
index d735513..83f10a0 100644
--- a/drivers/block/zram/zram_drv.c
+++ b/drivers/block/zram/zram_drv.c
@@ -1287,7 +1287,7 @@ static int zram_add(void)
zram->disk->queue->limits.discard_zeroes_data = 0;
queue_flag_set_unlocked(QUEUE_FLAG_DISCARD, zram->disk->queue);
 
-   add_disk(zram->disk, true);
+   add_disk(zram->disk, false);
 
ret = sysfs_create_group(_to_dev(zram->disk)->kobj,
_disk_attr_group);
@@ -1296,6 +1296,7 @@ static int zram_add(void)
device_id);
goto out_free_disk;
}
+   disk_gen_uevents(zram->disk);
strlcpy(zram->compressor, default_compressor, sizeof(zram->compressor));
zram->meta = NULL;
 
-- 
2.9.0



[PATCH v2 08/12] zram: Generate uevent after attribute available

2016-06-29 Thread Fam Zheng
It is documented that KOBJ_ADD should be generated after the object's
attributes and children are ready.  We can achieve this with the new
disk_gen_uevents interface.

Signed-off-by: Fam Zheng 
---
 drivers/block/zram/zram_drv.c | 3 ++-
 1 file changed, 2 insertions(+), 1 deletion(-)

diff --git a/drivers/block/zram/zram_drv.c b/drivers/block/zram/zram_drv.c
index d735513..83f10a0 100644
--- a/drivers/block/zram/zram_drv.c
+++ b/drivers/block/zram/zram_drv.c
@@ -1287,7 +1287,7 @@ static int zram_add(void)
zram->disk->queue->limits.discard_zeroes_data = 0;
queue_flag_set_unlocked(QUEUE_FLAG_DISCARD, zram->disk->queue);
 
-   add_disk(zram->disk, true);
+   add_disk(zram->disk, false);
 
ret = sysfs_create_group(_to_dev(zram->disk)->kobj,
_disk_attr_group);
@@ -1296,6 +1296,7 @@ static int zram_add(void)
device_id);
goto out_free_disk;
}
+   disk_gen_uevents(zram->disk);
strlcpy(zram->compressor, default_compressor, sizeof(zram->compressor));
zram->meta = NULL;
 
-- 
2.9.0



[PATCH v2 07/12] pktcdvd: Generate uevent after attribute available

2016-06-29 Thread Fam Zheng
It is documented that KOBJ_ADD should be generated after the object's
attributes and children are ready.  We can achieve this with the new
disk_gen_uevents interface.

Signed-off-by: Fam Zheng <f...@redhat.com>
---
 drivers/block/pktcdvd.c | 4 +++-
 1 file changed, 3 insertions(+), 1 deletion(-)

diff --git a/drivers/block/pktcdvd.c b/drivers/block/pktcdvd.c
index 00928406..a4e6bb7 100644
--- a/drivers/block/pktcdvd.c
+++ b/drivers/block/pktcdvd.c
@@ -2785,11 +2785,13 @@ static int pkt_setup_dev(dev_t dev, dev_t* pkt_dev)
disk->events = pd->bdev->bd_disk->events;
disk->async_events = pd->bdev->bd_disk->async_events;
 
-   add_disk(disk, true);
+   add_disk(disk, false);
 
pkt_sysfs_dev_new(pd);
pkt_debugfs_dev_new(pd);
 
+   disk_gen_uevents(disk);
+
pkt_devs[idx] = pd;
if (pkt_dev)
*pkt_dev = pd->pkt_dev;
-- 
2.9.0



[PATCH v2 07/12] pktcdvd: Generate uevent after attribute available

2016-06-29 Thread Fam Zheng
It is documented that KOBJ_ADD should be generated after the object's
attributes and children are ready.  We can achieve this with the new
disk_gen_uevents interface.

Signed-off-by: Fam Zheng 
---
 drivers/block/pktcdvd.c | 4 +++-
 1 file changed, 3 insertions(+), 1 deletion(-)

diff --git a/drivers/block/pktcdvd.c b/drivers/block/pktcdvd.c
index 00928406..a4e6bb7 100644
--- a/drivers/block/pktcdvd.c
+++ b/drivers/block/pktcdvd.c
@@ -2785,11 +2785,13 @@ static int pkt_setup_dev(dev_t dev, dev_t* pkt_dev)
disk->events = pd->bdev->bd_disk->events;
disk->async_events = pd->bdev->bd_disk->async_events;
 
-   add_disk(disk, true);
+   add_disk(disk, false);
 
pkt_sysfs_dev_new(pd);
pkt_debugfs_dev_new(pd);
 
+   disk_gen_uevents(disk);
+
pkt_devs[idx] = pd;
if (pkt_dev)
*pkt_dev = pd->pkt_dev;
-- 
2.9.0



[PATCH v2 04/12] axonrom: Generate uevent after attribute available

2016-06-29 Thread Fam Zheng
It is documented that KOBJ_ADD should be generated after the object's
attributes and children are ready.  We can achieve this with the new
disk_gen_uevents interface.

Signed-off-by: Fam Zheng <f...@redhat.com>
---
 arch/powerpc/sysdev/axonram.c | 3 ++-
 1 file changed, 2 insertions(+), 1 deletion(-)

diff --git a/arch/powerpc/sysdev/axonram.c b/arch/powerpc/sysdev/axonram.c
index 4efd69b..27e7175 100644
--- a/arch/powerpc/sysdev/axonram.c
+++ b/arch/powerpc/sysdev/axonram.c
@@ -238,7 +238,7 @@ static int axon_ram_probe(struct platform_device *device)
set_capacity(bank->disk, bank->size >> AXON_RAM_SECTOR_SHIFT);
blk_queue_make_request(bank->disk->queue, axon_ram_make_request);
blk_queue_logical_block_size(bank->disk->queue, AXON_RAM_SECTOR_SIZE);
-   add_disk(bank->disk, true);
+   add_disk(bank->disk, false);
 
bank->irq_id = irq_of_parse_and_map(device->dev.of_node, 0);
if (bank->irq_id == NO_IRQ) {
@@ -262,6 +262,7 @@ static int axon_ram_probe(struct platform_device *device)
rc = -EFAULT;
goto failed;
}
+   disk_gen_uevents(bank->disk);
 
azfs_minor += bank->disk->minors;
 
-- 
2.9.0



[PATCH v2 03/12] virtio-blk: Generate uevent after attribute available

2016-06-29 Thread Fam Zheng
Userspace listens to the KOBJ_ADD uevent generated in add_disk. At that
point we haven't created the serial attribute file, therefore depending
on how fast udev reacts, the /dev/disk/by-id/ entry doesn't always get
created.

This race condition can be easily reproduced by hot plugging a number of
virtio-blk disks.

Also in systemd, there used to be a related workaround in udev rules
called 'WAIT_FOR="serial"', but it is removed in later versions.

Now let's generate a KOBJ_CHANGE event after the attributes are ready.

Signed-off-by: Fam Zheng <f...@redhat.com>
---
 drivers/block/virtio_blk.c | 3 ++-
 1 file changed, 2 insertions(+), 1 deletion(-)

diff --git a/drivers/block/virtio_blk.c b/drivers/block/virtio_blk.c
index f3a59f9..cd9a036 100644
--- a/drivers/block/virtio_blk.c
+++ b/drivers/block/virtio_blk.c
@@ -733,7 +733,7 @@ static int virtblk_probe(struct virtio_device *vdev)
 
virtio_device_ready(vdev);
 
-   add_disk(vblk->disk, true);
+   add_disk(vblk->disk, false);
err = device_create_file(disk_to_dev(vblk->disk), _attr_serial);
if (err)
goto out_del_disk;
@@ -746,6 +746,7 @@ static int virtblk_probe(struct virtio_device *vdev)
 _attr_cache_type_ro);
if (err)
goto out_del_disk;
+   disk_gen_uevents(vblk->disk);
return 0;
 
 out_del_disk:
-- 
2.9.0



[PATCH v2 04/12] axonrom: Generate uevent after attribute available

2016-06-29 Thread Fam Zheng
It is documented that KOBJ_ADD should be generated after the object's
attributes and children are ready.  We can achieve this with the new
disk_gen_uevents interface.

Signed-off-by: Fam Zheng 
---
 arch/powerpc/sysdev/axonram.c | 3 ++-
 1 file changed, 2 insertions(+), 1 deletion(-)

diff --git a/arch/powerpc/sysdev/axonram.c b/arch/powerpc/sysdev/axonram.c
index 4efd69b..27e7175 100644
--- a/arch/powerpc/sysdev/axonram.c
+++ b/arch/powerpc/sysdev/axonram.c
@@ -238,7 +238,7 @@ static int axon_ram_probe(struct platform_device *device)
set_capacity(bank->disk, bank->size >> AXON_RAM_SECTOR_SHIFT);
blk_queue_make_request(bank->disk->queue, axon_ram_make_request);
blk_queue_logical_block_size(bank->disk->queue, AXON_RAM_SECTOR_SIZE);
-   add_disk(bank->disk, true);
+   add_disk(bank->disk, false);
 
bank->irq_id = irq_of_parse_and_map(device->dev.of_node, 0);
if (bank->irq_id == NO_IRQ) {
@@ -262,6 +262,7 @@ static int axon_ram_probe(struct platform_device *device)
rc = -EFAULT;
goto failed;
}
+   disk_gen_uevents(bank->disk);
 
azfs_minor += bank->disk->minors;
 
-- 
2.9.0



[PATCH v2 03/12] virtio-blk: Generate uevent after attribute available

2016-06-29 Thread Fam Zheng
Userspace listens to the KOBJ_ADD uevent generated in add_disk. At that
point we haven't created the serial attribute file, therefore depending
on how fast udev reacts, the /dev/disk/by-id/ entry doesn't always get
created.

This race condition can be easily reproduced by hot plugging a number of
virtio-blk disks.

Also in systemd, there used to be a related workaround in udev rules
called 'WAIT_FOR="serial"', but it is removed in later versions.

Now let's generate a KOBJ_CHANGE event after the attributes are ready.

Signed-off-by: Fam Zheng 
---
 drivers/block/virtio_blk.c | 3 ++-
 1 file changed, 2 insertions(+), 1 deletion(-)

diff --git a/drivers/block/virtio_blk.c b/drivers/block/virtio_blk.c
index f3a59f9..cd9a036 100644
--- a/drivers/block/virtio_blk.c
+++ b/drivers/block/virtio_blk.c
@@ -733,7 +733,7 @@ static int virtblk_probe(struct virtio_device *vdev)
 
virtio_device_ready(vdev);
 
-   add_disk(vblk->disk, true);
+   add_disk(vblk->disk, false);
err = device_create_file(disk_to_dev(vblk->disk), _attr_serial);
if (err)
goto out_del_disk;
@@ -746,6 +746,7 @@ static int virtblk_probe(struct virtio_device *vdev)
 _attr_cache_type_ro);
if (err)
goto out_del_disk;
+   disk_gen_uevents(vblk->disk);
return 0;
 
 out_del_disk:
-- 
2.9.0



[PATCH v2 10/12] mmc: Generate uevent after attribute available

2016-06-29 Thread Fam Zheng
It is documented that KOBJ_ADD should be generated after the object's
attributes and children are ready.  We can achieve this with the new
disk_gen_uevents interface.

Signed-off-by: Fam Zheng <f...@redhat.com>
---
 drivers/mmc/card/block.c | 3 ++-
 1 file changed, 2 insertions(+), 1 deletion(-)

diff --git a/drivers/mmc/card/block.c b/drivers/mmc/card/block.c
index 94cf51e..4007106 100644
--- a/drivers/mmc/card/block.c
+++ b/drivers/mmc/card/block.c
@@ -2457,7 +2457,7 @@ static int mmc_add_disk(struct mmc_blk_data *md)
int ret;
struct mmc_card *card = md->queue.card;
 
-   add_disk(md->disk, true);
+   add_disk(md->disk, false);
md->force_ro.show = force_ro_show;
md->force_ro.store = force_ro_store;
sysfs_attr_init(>force_ro.attr);
@@ -2466,6 +2466,7 @@ static int mmc_add_disk(struct mmc_blk_data *md)
ret = device_create_file(disk_to_dev(md->disk), >force_ro);
if (ret)
goto force_ro_fail;
+   disk_gen_uevents(md->disk);
 
if ((md->area_type & MMC_BLK_DATA_AREA_BOOT) &&
 card->ext_csd.boot_ro_lockable) {
-- 
2.9.0



  1   2   3   4   >