[PATCH v2 2/3] block: Add badblock management for gendisks

2015-12-07 Thread Vishal Verma
NVDIMM devices, which can behave more like DRAM rather than block
devices, may develop bad cache lines, or 'poison'. A block device
exposed by the pmem driver can then consume poison via a read (or
write), and cause a machine check. On platforms without machine
check recovery features, this would mean a crash.

The block device maintaining a runtime list of all known sectors that
have poison can directly avoid this, and also provide a path forward
to enable proper handling/recovery for DAX faults on such a device.

Use the new badblock management interfaces to add a badblocks list to
gendisks.

Signed-off-by: Vishal Verma 
---
 block/genhd.c | 76 +++
 include/linux/genhd.h |  7 +
 2 files changed, 83 insertions(+)

diff --git a/block/genhd.c b/block/genhd.c
index 0c706f3..809e3e2 100644
--- a/block/genhd.c
+++ b/block/genhd.c
@@ -20,6 +20,7 @@
 #include 
 #include 
 #include 
+#include 
 
 #include "blk.h"
 
@@ -505,6 +506,16 @@ static int exact_lock(dev_t devt, void *data)
return 0;
 }
 
+int disk_alloc_badblocks(struct gendisk *disk)
+{
+   disk->bb = kzalloc(sizeof(*(disk->bb)), GFP_KERNEL);
+   if (!disk->bb)
+   return -ENOMEM;
+
+   return badblocks_init(disk->bb, 1);
+}
+EXPORT_SYMBOL(disk_alloc_badblocks);
+
 static void register_disk(struct gendisk *disk)
 {
struct device *ddev = disk_to_dev(disk);
@@ -657,6 +668,11 @@ void del_gendisk(struct gendisk *disk)
blk_unregister_queue(disk);
blk_unregister_region(disk_devt(disk), disk->minors);
 
+   if (disk->bb) {
+   badblocks_free(disk->bb);
+   kfree(disk->bb);
+   }
+
part_stat_set_all(>part0, 0);
disk->part0.stamp = 0;
 
@@ -670,6 +686,63 @@ void del_gendisk(struct gendisk *disk)
 }
 EXPORT_SYMBOL(del_gendisk);
 
+/*
+ * The gendisk usage of badblocks does not track acknowledgements for
+ * badblocks. We always assume they are acknowledged.
+ */
+int disk_check_badblocks(struct gendisk *disk, sector_t s, int sectors,
+  sector_t *first_bad, int *bad_sectors)
+{
+   if (!disk->bb)
+   return 0;
+
+   return badblocks_check(disk->bb, s, sectors, first_bad, bad_sectors);
+}
+EXPORT_SYMBOL(disk_check_badblocks);
+
+int disk_set_badblocks(struct gendisk *disk, sector_t s, int sectors)
+{
+   if (!disk->bb)
+   return 0;
+
+   return badblocks_set(disk->bb, s, sectors, 1);
+}
+EXPORT_SYMBOL(disk_set_badblocks);
+
+int disk_clear_badblocks(struct gendisk *disk, sector_t s, int sectors)
+{
+   if (!disk->bb)
+   return 0;
+
+   return badblocks_clear(disk->bb, s, sectors);
+}
+EXPORT_SYMBOL(disk_clear_badblocks);
+
+/* sysfs access to bad-blocks list. */
+static ssize_t disk_badblocks_show(struct device *dev,
+   struct device_attribute *attr,
+   char *page)
+{
+   struct gendisk *disk = dev_to_disk(dev);
+
+   if (!disk->bb)
+   return 0;
+
+   return badblocks_show(disk->bb, page, 0);
+}
+
+static ssize_t disk_badblocks_store(struct device *dev,
+   struct device_attribute *attr,
+   const char *page, size_t len)
+{
+   struct gendisk *disk = dev_to_disk(dev);
+
+   if (!disk->bb)
+   return 0;
+
+   return badblocks_store(disk->bb, page, len, 0);
+}
+
 /**
  * get_gendisk - get partitioning information for a given device
  * @devt: device to get partitioning information for
@@ -988,6 +1061,8 @@ static DEVICE_ATTR(discard_alignment, S_IRUGO, 
disk_discard_alignment_show,
 static DEVICE_ATTR(capability, S_IRUGO, disk_capability_show, NULL);
 static DEVICE_ATTR(stat, S_IRUGO, part_stat_show, NULL);
 static DEVICE_ATTR(inflight, S_IRUGO, part_inflight_show, NULL);
+static DEVICE_ATTR(badblocks, S_IRUGO | S_IWUSR, disk_badblocks_show,
+   disk_badblocks_store);
 #ifdef CONFIG_FAIL_MAKE_REQUEST
 static struct device_attribute dev_attr_fail =
__ATTR(make-it-fail, S_IRUGO|S_IWUSR, part_fail_show, part_fail_store);
@@ -1009,6 +1084,7 @@ static struct attribute *disk_attrs[] = {
_attr_capability.attr,
_attr_stat.attr,
_attr_inflight.attr,
+   _attr_badblocks.attr,
 #ifdef CONFIG_FAIL_MAKE_REQUEST
_attr_fail.attr,
 #endif
diff --git a/include/linux/genhd.h b/include/linux/genhd.h
index 2adbfa6..985eb94 100644
--- a/include/linux/genhd.h
+++ b/include/linux/genhd.h
@@ -162,6 +162,7 @@ struct disk_part_tbl {
 };
 
 struct disk_events;
+struct badblocks;
 
 struct gendisk {
/* major, first_minor and minors are input parameters only,
@@ -201,6 +202,7 @@ struct gendisk {
struct blk_integrity *integrity;
 #endif
int node_id;
+   struct badblocks *bb;
 };
 
 static inline struct gendisk *part_to_disk(struct hd_struct *part)
@@ -421,6 

Re: [PATCH v2 2/3] block: Add badblock management for gendisks

2015-12-04 Thread James Bottomley
On Wed, 2015-11-25 at 11:43 -0700, Vishal Verma wrote:
> NVDIMM devices, which can behave more like DRAM rather than block
> devices, may develop bad cache lines, or 'poison'. A block device
> exposed by the pmem driver can then consume poison via a read (or
> write), and cause a machine check. On platforms without machine
> check recovery features, this would mean a crash.
> 
> The block device maintaining a runtime list of all known sectors that
> have poison can directly avoid this, and also provide a path forward
> to enable proper handling/recovery for DAX faults on such a device.
> 
> Use the new badblock management interfaces to add a badblocks list to
> gendisks.
> 
> Signed-off-by: Vishal Verma 
> ---
>  block/genhd.c | 81 
> +++
>  include/linux/genhd.h |  6 
>  2 files changed, 87 insertions(+)
> 
> diff --git a/block/genhd.c b/block/genhd.c
> index 0c706f3..84fd65c 100644
> --- a/block/genhd.c
> +++ b/block/genhd.c
> @@ -20,6 +20,7 @@
>  #include 
>  #include 
>  #include 
> +#include 
>  
>  #include "blk.h"
>  
> @@ -505,6 +506,20 @@ static int exact_lock(dev_t devt, void *data)
>   return 0;
>  }
>  
> +static void disk_alloc_badblocks(struct gendisk *disk)
> +{
> + disk->bb = kzalloc(sizeof(*(disk->bb)), GFP_KERNEL);
> + if (!disk->bb) {
> + pr_warn("%s: failed to allocate space for badblocks\n",
> + disk->disk_name);
> + return;
> + }
> +
> + if (badblocks_init(disk->bb, 1))
> + pr_warn("%s: failed to initialize badblocks\n",
> + disk->disk_name);
> +}
> +
>  static void register_disk(struct gendisk *disk)
>  {
>   struct device *ddev = disk_to_dev(disk);
> @@ -609,6 +624,7 @@ void add_disk(struct gendisk *disk)
>   disk->first_minor = MINOR(devt);
>  
>   disk_alloc_events(disk);
> + disk_alloc_badblocks(disk);

Why unconditionally do this?  No-one currently uses the interface, but
every disk will now pay the price of an additional structure plus a page
for no benefit.  You should probably either export the initializer for
those who want to use it or, perhaps even better, make it lazily
allocated the first time anyone tries to set a bad block.

If you come up with a really good reason for allocating it
unconditionally, then it should probably be an embedded structure in the
gendisk.

James


--
To unsubscribe from this list: send the line "unsubscribe linux-scsi" in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html


Re: [PATCH v2 2/3] block: Add badblock management for gendisks

2015-12-04 Thread Verma, Vishal L
On Fri, 2015-12-04 at 15:33 -0800, James Bottomley wrote:
[...]
> >  static void register_disk(struct gendisk *disk)
> >  {
> >     struct device *ddev = disk_to_dev(disk);
> > @@ -609,6 +624,7 @@ void add_disk(struct gendisk *disk)
> >     disk->first_minor = MINOR(devt);
> >  
> >     disk_alloc_events(disk);
> > +   disk_alloc_badblocks(disk);
> 
> Why unconditionally do this?  No-one currently uses the interface, but
> every disk will now pay the price of an additional structure plus a
> page
> for no benefit.  You should probably either export the initializer for
> those who want to use it or, perhaps even better, make it lazily
> allocated the first time anyone tries to set a bad block.
> 
> If you come up with a really good reason for allocating it
> unconditionally, then it should probably be an embedded structure in
> the gendisk.
> 
Agreed - I'll fix for v3.

I'm considering an embedded structure in gendisk (same as md) (why is
this preferred to pointer chasing, especially when this wastes more
space?), and a new exported initializer that is used by anyone who wants
to use gendisk's badblocks.


-VishalN�r��yb�X��ǧv�^�)޺{.n�+{���"�{ay�ʇڙ�,j��f���h���z��w���
���j:+v���w�j�mzZ+�ݢj"��!�i

[PATCH v2 2/3] block: Add badblock management for gendisks

2015-11-25 Thread Vishal Verma
NVDIMM devices, which can behave more like DRAM rather than block
devices, may develop bad cache lines, or 'poison'. A block device
exposed by the pmem driver can then consume poison via a read (or
write), and cause a machine check. On platforms without machine
check recovery features, this would mean a crash.

The block device maintaining a runtime list of all known sectors that
have poison can directly avoid this, and also provide a path forward
to enable proper handling/recovery for DAX faults on such a device.

Use the new badblock management interfaces to add a badblocks list to
gendisks.

Signed-off-by: Vishal Verma 
---
 block/genhd.c | 81 +++
 include/linux/genhd.h |  6 
 2 files changed, 87 insertions(+)

diff --git a/block/genhd.c b/block/genhd.c
index 0c706f3..84fd65c 100644
--- a/block/genhd.c
+++ b/block/genhd.c
@@ -20,6 +20,7 @@
 #include 
 #include 
 #include 
+#include 
 
 #include "blk.h"
 
@@ -505,6 +506,20 @@ static int exact_lock(dev_t devt, void *data)
return 0;
 }
 
+static void disk_alloc_badblocks(struct gendisk *disk)
+{
+   disk->bb = kzalloc(sizeof(*(disk->bb)), GFP_KERNEL);
+   if (!disk->bb) {
+   pr_warn("%s: failed to allocate space for badblocks\n",
+   disk->disk_name);
+   return;
+   }
+
+   if (badblocks_init(disk->bb, 1))
+   pr_warn("%s: failed to initialize badblocks\n",
+   disk->disk_name);
+}
+
 static void register_disk(struct gendisk *disk)
 {
struct device *ddev = disk_to_dev(disk);
@@ -609,6 +624,7 @@ void add_disk(struct gendisk *disk)
disk->first_minor = MINOR(devt);
 
disk_alloc_events(disk);
+   disk_alloc_badblocks(disk);
 
/* Register BDI before referencing it from bdev */
bdi = >queue->backing_dev_info;
@@ -657,6 +673,11 @@ void del_gendisk(struct gendisk *disk)
blk_unregister_queue(disk);
blk_unregister_region(disk_devt(disk), disk->minors);
 
+   if (disk->bb) {
+   badblocks_free(disk->bb);
+   kfree(disk->bb);
+   }
+
part_stat_set_all(>part0, 0);
disk->part0.stamp = 0;
 
@@ -670,6 +691,63 @@ void del_gendisk(struct gendisk *disk)
 }
 EXPORT_SYMBOL(del_gendisk);
 
+/*
+ * The gendisk usage of badblocks does not track acknowledgements for
+ * badblocks. We always assume they are acknowledged.
+ */
+int disk_check_badblocks(struct gendisk *disk, sector_t s, int sectors,
+  sector_t *first_bad, int *bad_sectors)
+{
+   if (!disk->bb)
+   return 0;
+
+   return badblocks_check(disk->bb, s, sectors, first_bad, bad_sectors);
+}
+EXPORT_SYMBOL(disk_check_badblocks);
+
+int disk_set_badblocks(struct gendisk *disk, sector_t s, int sectors)
+{
+   if (!disk->bb)
+   return 0;
+
+   return badblocks_set(disk->bb, s, sectors, 1);
+}
+EXPORT_SYMBOL(disk_set_badblocks);
+
+int disk_clear_badblocks(struct gendisk *disk, sector_t s, int sectors)
+{
+   if (!disk->bb)
+   return 0;
+
+   return badblocks_clear(disk->bb, s, sectors);
+}
+EXPORT_SYMBOL(disk_clear_badblocks);
+
+/* sysfs access to bad-blocks list. */
+static ssize_t disk_badblocks_show(struct device *dev,
+   struct device_attribute *attr,
+   char *page)
+{
+   struct gendisk *disk = dev_to_disk(dev);
+
+   if (!disk->bb)
+   return 0;
+
+   return badblocks_show(disk->bb, page, 0);
+}
+
+static ssize_t disk_badblocks_store(struct device *dev,
+   struct device_attribute *attr,
+   const char *page, size_t len)
+{
+   struct gendisk *disk = dev_to_disk(dev);
+
+   if (!disk->bb)
+   return 0;
+
+   return badblocks_store(disk->bb, page, len, 0);
+}
+
 /**
  * get_gendisk - get partitioning information for a given device
  * @devt: device to get partitioning information for
@@ -988,6 +1066,8 @@ static DEVICE_ATTR(discard_alignment, S_IRUGO, 
disk_discard_alignment_show,
 static DEVICE_ATTR(capability, S_IRUGO, disk_capability_show, NULL);
 static DEVICE_ATTR(stat, S_IRUGO, part_stat_show, NULL);
 static DEVICE_ATTR(inflight, S_IRUGO, part_inflight_show, NULL);
+static DEVICE_ATTR(badblocks, S_IRUGO | S_IWUSR, disk_badblocks_show,
+   disk_badblocks_store);
 #ifdef CONFIG_FAIL_MAKE_REQUEST
 static struct device_attribute dev_attr_fail =
__ATTR(make-it-fail, S_IRUGO|S_IWUSR, part_fail_show, part_fail_store);
@@ -1009,6 +1089,7 @@ static struct attribute *disk_attrs[] = {
_attr_capability.attr,
_attr_stat.attr,
_attr_inflight.attr,
+   _attr_badblocks.attr,
 #ifdef CONFIG_FAIL_MAKE_REQUEST
_attr_fail.attr,
 #endif
diff --git a/include/linux/genhd.h b/include/linux/genhd.h
index 2adbfa6..5563bde