Re: [PATCH v4 1/1] dax: enable dax fault handler to report VM_FAULT_HWPOISON

2023-06-14 Thread Jane Chu

On 6/8/2023 8:16 PM, Dan Williams wrote:
[..]
  
+static inline int dax_mem2blk_err(int err)

+{
+   return (err == -EHWPOISON) ? -EIO : err;
+}


I think it is worth a comment on this function to indicate where this
helper is *not* used. I.e. it's easy to grep for where the error code is
converted for file I/O errors, but the subtlety of when the
dax_direct_acces() return value is not translated for fault handling
deserves a comment when we wonder why this function exists in a few
years time.


Good point!  v5 coming up next.

thanks!
-jane



RE: [PATCH v4 1/1] dax: enable dax fault handler to report VM_FAULT_HWPOISON

2023-06-08 Thread Dan Williams
Jane Chu wrote:
> When multiple processes mmap() a dax file, then at some point,
> a process issues a 'load' and consumes a hwpoison, the process
> receives a SIGBUS with si_code = BUS_MCEERR_AR and with si_lsb
> set for the poison scope. Soon after, any other process issues
> a 'load' to the poisoned page (that is unmapped from the kernel
> side by memory_failure), it receives a SIGBUS with
> si_code = BUS_ADRERR and without valid si_lsb.
> 
> This is confusing to user, and is different from page fault due
> to poison in RAM memory, also some helpful information is lost.
> 
> Channel dax backend driver's poison detection to the filesystem
> such that instead of reporting VM_FAULT_SIGBUS, it could report
> VM_FAULT_HWPOISON.
> 
> If user level block IO syscalls fail due to poison, the errno will
> be converted to EIO to maintain block API consistency.
> 
> Signed-off-by: Jane Chu 
> ---
>  drivers/dax/super.c  |  5 -
>  drivers/nvdimm/pmem.c|  2 +-
>  drivers/s390/block/dcssblk.c |  3 ++-
>  fs/dax.c | 11 ++-
>  fs/fuse/virtio_fs.c  |  3 ++-
>  include/linux/dax.h  |  5 +
>  include/linux/mm.h   |  2 ++
>  7 files changed, 22 insertions(+), 9 deletions(-)
> 
> diff --git a/drivers/dax/super.c b/drivers/dax/super.c
> index c4c4728a36e4..0da9232ea175 100644
> --- a/drivers/dax/super.c
> +++ b/drivers/dax/super.c
> @@ -203,6 +203,8 @@ size_t dax_copy_to_iter(struct dax_device *dax_dev, 
> pgoff_t pgoff, void *addr,
>  int dax_zero_page_range(struct dax_device *dax_dev, pgoff_t pgoff,
>   size_t nr_pages)
>  {
> + int ret;
> +
>   if (!dax_alive(dax_dev))
>   return -ENXIO;
>   /*
> @@ -213,7 +215,8 @@ int dax_zero_page_range(struct dax_device *dax_dev, 
> pgoff_t pgoff,
>   if (nr_pages != 1)
>   return -EIO;
>  
> - return dax_dev->ops->zero_page_range(dax_dev, pgoff, nr_pages);
> + ret = dax_dev->ops->zero_page_range(dax_dev, pgoff, nr_pages);
> + return dax_mem2blk_err(ret);
>  }
>  EXPORT_SYMBOL_GPL(dax_zero_page_range);
>  
> diff --git a/drivers/nvdimm/pmem.c b/drivers/nvdimm/pmem.c
> index ceea55f621cc..46e094e56159 100644
> --- a/drivers/nvdimm/pmem.c
> +++ b/drivers/nvdimm/pmem.c
> @@ -260,7 +260,7 @@ __weak long __pmem_direct_access(struct pmem_device 
> *pmem, pgoff_t pgoff,
>   long actual_nr;
>  
>   if (mode != DAX_RECOVERY_WRITE)
> - return -EIO;
> + return -EHWPOISON;
>  
>   /*
>* Set the recovery stride is set to kernel page size because
> diff --git a/drivers/s390/block/dcssblk.c b/drivers/s390/block/dcssblk.c
> index c09f2e053bf8..ee47ac520cd4 100644
> --- a/drivers/s390/block/dcssblk.c
> +++ b/drivers/s390/block/dcssblk.c
> @@ -54,7 +54,8 @@ static int dcssblk_dax_zero_page_range(struct dax_device 
> *dax_dev,
>   rc = dax_direct_access(dax_dev, pgoff, nr_pages, DAX_ACCESS,
>   , NULL);
>   if (rc < 0)
> - return rc;
> + return dax_mem2blk_err(rc);
> +
>   memset(kaddr, 0, nr_pages << PAGE_SHIFT);
>   dax_flush(dax_dev, kaddr, nr_pages << PAGE_SHIFT);
>   return 0;
> diff --git a/fs/dax.c b/fs/dax.c
> index 2ababb89918d..a26eb5abfdc0 100644
> --- a/fs/dax.c
> +++ b/fs/dax.c
> @@ -1148,7 +1148,7 @@ static int dax_iomap_copy_around(loff_t pos, uint64_t 
> length, size_t align_size,
>   if (!zero_edge) {
>   ret = dax_iomap_direct_access(srcmap, pos, size, , NULL);
>   if (ret)
> - return ret;
> + return dax_mem2blk_err(ret);
>   }
>  
>   if (copy_all) {
> @@ -1310,7 +1310,7 @@ static s64 dax_unshare_iter(struct iomap_iter *iter)
>  
>  out_unlock:
>   dax_read_unlock(id);
> - return ret;
> + return dax_mem2blk_err(ret);
>  }
>  
>  int dax_file_unshare(struct inode *inode, loff_t pos, loff_t len,
> @@ -1342,7 +1342,8 @@ static int dax_memzero(struct iomap_iter *iter, loff_t 
> pos, size_t size)
>   ret = dax_direct_access(iomap->dax_dev, pgoff, 1, DAX_ACCESS, ,
>   NULL);
>   if (ret < 0)
> - return ret;
> + return dax_mem2blk_err(ret);
> +
>   memset(kaddr + offset, 0, size);
>   if (iomap->flags & IOMAP_F_SHARED)
>   ret = dax_iomap_copy_around(pos, size, PAGE_SIZE, srcmap,
> @@ -1498,7 +1499,7 @@ static loff_t dax_iomap_iter(const struct iomap_iter 
> *iomi,
>  
>   map_len = dax_direct_access(dax_dev, pgoff, PHYS_PFN(size),
>   DAX_ACCESS, , NULL);
> - if (map_len == -EIO && iov_iter_rw(iter) == WRITE) {
> + if (map_len == -EHWPOISON && iov_iter_rw(iter) == WRITE) {
>   map_len = dax_direct_access(dax_dev, pgoff,
>   PHYS_PFN(size), DAX_RECOVERY_WRITE,
>   , NULL);
> 

Re: [PATCH v4 1/1] dax: enable dax fault handler to report VM_FAULT_HWPOISON

2023-05-30 Thread Jane Chu

Ping... Is there any further concern?

-jane

On 5/8/2023 10:47 PM, Jane Chu wrote:

When multiple processes mmap() a dax file, then at some point,
a process issues a 'load' and consumes a hwpoison, the process
receives a SIGBUS with si_code = BUS_MCEERR_AR and with si_lsb
set for the poison scope. Soon after, any other process issues
a 'load' to the poisoned page (that is unmapped from the kernel
side by memory_failure), it receives a SIGBUS with
si_code = BUS_ADRERR and without valid si_lsb.

This is confusing to user, and is different from page fault due
to poison in RAM memory, also some helpful information is lost.

Channel dax backend driver's poison detection to the filesystem
such that instead of reporting VM_FAULT_SIGBUS, it could report
VM_FAULT_HWPOISON.

If user level block IO syscalls fail due to poison, the errno will
be converted to EIO to maintain block API consistency.

Signed-off-by: Jane Chu 
---
  drivers/dax/super.c  |  5 -
  drivers/nvdimm/pmem.c|  2 +-
  drivers/s390/block/dcssblk.c |  3 ++-
  fs/dax.c | 11 ++-
  fs/fuse/virtio_fs.c  |  3 ++-
  include/linux/dax.h  |  5 +
  include/linux/mm.h   |  2 ++
  7 files changed, 22 insertions(+), 9 deletions(-)

diff --git a/drivers/dax/super.c b/drivers/dax/super.c
index c4c4728a36e4..0da9232ea175 100644
--- a/drivers/dax/super.c
+++ b/drivers/dax/super.c
@@ -203,6 +203,8 @@ size_t dax_copy_to_iter(struct dax_device *dax_dev, pgoff_t 
pgoff, void *addr,
  int dax_zero_page_range(struct dax_device *dax_dev, pgoff_t pgoff,
size_t nr_pages)
  {
+   int ret;
+
if (!dax_alive(dax_dev))
return -ENXIO;
/*
@@ -213,7 +215,8 @@ int dax_zero_page_range(struct dax_device *dax_dev, pgoff_t 
pgoff,
if (nr_pages != 1)
return -EIO;
  
-	return dax_dev->ops->zero_page_range(dax_dev, pgoff, nr_pages);

+   ret = dax_dev->ops->zero_page_range(dax_dev, pgoff, nr_pages);
+   return dax_mem2blk_err(ret);
  }
  EXPORT_SYMBOL_GPL(dax_zero_page_range);
  
diff --git a/drivers/nvdimm/pmem.c b/drivers/nvdimm/pmem.c

index ceea55f621cc..46e094e56159 100644
--- a/drivers/nvdimm/pmem.c
+++ b/drivers/nvdimm/pmem.c
@@ -260,7 +260,7 @@ __weak long __pmem_direct_access(struct pmem_device *pmem, 
pgoff_t pgoff,
long actual_nr;
  
  		if (mode != DAX_RECOVERY_WRITE)

-   return -EIO;
+   return -EHWPOISON;
  
  		/*

 * Set the recovery stride is set to kernel page size because
diff --git a/drivers/s390/block/dcssblk.c b/drivers/s390/block/dcssblk.c
index c09f2e053bf8..ee47ac520cd4 100644
--- a/drivers/s390/block/dcssblk.c
+++ b/drivers/s390/block/dcssblk.c
@@ -54,7 +54,8 @@ static int dcssblk_dax_zero_page_range(struct dax_device 
*dax_dev,
rc = dax_direct_access(dax_dev, pgoff, nr_pages, DAX_ACCESS,
, NULL);
if (rc < 0)
-   return rc;
+   return dax_mem2blk_err(rc);
+
memset(kaddr, 0, nr_pages << PAGE_SHIFT);
dax_flush(dax_dev, kaddr, nr_pages << PAGE_SHIFT);
return 0;
diff --git a/fs/dax.c b/fs/dax.c
index 2ababb89918d..a26eb5abfdc0 100644
--- a/fs/dax.c
+++ b/fs/dax.c
@@ -1148,7 +1148,7 @@ static int dax_iomap_copy_around(loff_t pos, uint64_t 
length, size_t align_size,
if (!zero_edge) {
ret = dax_iomap_direct_access(srcmap, pos, size, , NULL);
if (ret)
-   return ret;
+   return dax_mem2blk_err(ret);
}
  
  	if (copy_all) {

@@ -1310,7 +1310,7 @@ static s64 dax_unshare_iter(struct iomap_iter *iter)
  
  out_unlock:

dax_read_unlock(id);
-   return ret;
+   return dax_mem2blk_err(ret);
  }
  
  int dax_file_unshare(struct inode *inode, loff_t pos, loff_t len,

@@ -1342,7 +1342,8 @@ static int dax_memzero(struct iomap_iter *iter, loff_t 
pos, size_t size)
ret = dax_direct_access(iomap->dax_dev, pgoff, 1, DAX_ACCESS, ,
NULL);
if (ret < 0)
-   return ret;
+   return dax_mem2blk_err(ret);
+
memset(kaddr + offset, 0, size);
if (iomap->flags & IOMAP_F_SHARED)
ret = dax_iomap_copy_around(pos, size, PAGE_SIZE, srcmap,
@@ -1498,7 +1499,7 @@ static loff_t dax_iomap_iter(const struct iomap_iter 
*iomi,
  
  		map_len = dax_direct_access(dax_dev, pgoff, PHYS_PFN(size),

DAX_ACCESS, , NULL);
-   if (map_len == -EIO && iov_iter_rw(iter) == WRITE) {
+   if (map_len == -EHWPOISON && iov_iter_rw(iter) == WRITE) {
map_len = dax_direct_access(dax_dev, pgoff,
PHYS_PFN(size), DAX_RECOVERY_WRITE,
, NULL);
@@ -1506,7 +1507,7 @@ static loff_t dax_iomap_iter(const struct iomap_iter 
*iomi,
   

[PATCH v4 1/1] dax: enable dax fault handler to report VM_FAULT_HWPOISON

2023-05-08 Thread Jane Chu
When multiple processes mmap() a dax file, then at some point,
a process issues a 'load' and consumes a hwpoison, the process
receives a SIGBUS with si_code = BUS_MCEERR_AR and with si_lsb
set for the poison scope. Soon after, any other process issues
a 'load' to the poisoned page (that is unmapped from the kernel
side by memory_failure), it receives a SIGBUS with
si_code = BUS_ADRERR and without valid si_lsb.

This is confusing to user, and is different from page fault due
to poison in RAM memory, also some helpful information is lost.

Channel dax backend driver's poison detection to the filesystem
such that instead of reporting VM_FAULT_SIGBUS, it could report
VM_FAULT_HWPOISON.

If user level block IO syscalls fail due to poison, the errno will
be converted to EIO to maintain block API consistency.

Signed-off-by: Jane Chu 
---
 drivers/dax/super.c  |  5 -
 drivers/nvdimm/pmem.c|  2 +-
 drivers/s390/block/dcssblk.c |  3 ++-
 fs/dax.c | 11 ++-
 fs/fuse/virtio_fs.c  |  3 ++-
 include/linux/dax.h  |  5 +
 include/linux/mm.h   |  2 ++
 7 files changed, 22 insertions(+), 9 deletions(-)

diff --git a/drivers/dax/super.c b/drivers/dax/super.c
index c4c4728a36e4..0da9232ea175 100644
--- a/drivers/dax/super.c
+++ b/drivers/dax/super.c
@@ -203,6 +203,8 @@ size_t dax_copy_to_iter(struct dax_device *dax_dev, pgoff_t 
pgoff, void *addr,
 int dax_zero_page_range(struct dax_device *dax_dev, pgoff_t pgoff,
size_t nr_pages)
 {
+   int ret;
+
if (!dax_alive(dax_dev))
return -ENXIO;
/*
@@ -213,7 +215,8 @@ int dax_zero_page_range(struct dax_device *dax_dev, pgoff_t 
pgoff,
if (nr_pages != 1)
return -EIO;
 
-   return dax_dev->ops->zero_page_range(dax_dev, pgoff, nr_pages);
+   ret = dax_dev->ops->zero_page_range(dax_dev, pgoff, nr_pages);
+   return dax_mem2blk_err(ret);
 }
 EXPORT_SYMBOL_GPL(dax_zero_page_range);
 
diff --git a/drivers/nvdimm/pmem.c b/drivers/nvdimm/pmem.c
index ceea55f621cc..46e094e56159 100644
--- a/drivers/nvdimm/pmem.c
+++ b/drivers/nvdimm/pmem.c
@@ -260,7 +260,7 @@ __weak long __pmem_direct_access(struct pmem_device *pmem, 
pgoff_t pgoff,
long actual_nr;
 
if (mode != DAX_RECOVERY_WRITE)
-   return -EIO;
+   return -EHWPOISON;
 
/*
 * Set the recovery stride is set to kernel page size because
diff --git a/drivers/s390/block/dcssblk.c b/drivers/s390/block/dcssblk.c
index c09f2e053bf8..ee47ac520cd4 100644
--- a/drivers/s390/block/dcssblk.c
+++ b/drivers/s390/block/dcssblk.c
@@ -54,7 +54,8 @@ static int dcssblk_dax_zero_page_range(struct dax_device 
*dax_dev,
rc = dax_direct_access(dax_dev, pgoff, nr_pages, DAX_ACCESS,
, NULL);
if (rc < 0)
-   return rc;
+   return dax_mem2blk_err(rc);
+
memset(kaddr, 0, nr_pages << PAGE_SHIFT);
dax_flush(dax_dev, kaddr, nr_pages << PAGE_SHIFT);
return 0;
diff --git a/fs/dax.c b/fs/dax.c
index 2ababb89918d..a26eb5abfdc0 100644
--- a/fs/dax.c
+++ b/fs/dax.c
@@ -1148,7 +1148,7 @@ static int dax_iomap_copy_around(loff_t pos, uint64_t 
length, size_t align_size,
if (!zero_edge) {
ret = dax_iomap_direct_access(srcmap, pos, size, , NULL);
if (ret)
-   return ret;
+   return dax_mem2blk_err(ret);
}
 
if (copy_all) {
@@ -1310,7 +1310,7 @@ static s64 dax_unshare_iter(struct iomap_iter *iter)
 
 out_unlock:
dax_read_unlock(id);
-   return ret;
+   return dax_mem2blk_err(ret);
 }
 
 int dax_file_unshare(struct inode *inode, loff_t pos, loff_t len,
@@ -1342,7 +1342,8 @@ static int dax_memzero(struct iomap_iter *iter, loff_t 
pos, size_t size)
ret = dax_direct_access(iomap->dax_dev, pgoff, 1, DAX_ACCESS, ,
NULL);
if (ret < 0)
-   return ret;
+   return dax_mem2blk_err(ret);
+
memset(kaddr + offset, 0, size);
if (iomap->flags & IOMAP_F_SHARED)
ret = dax_iomap_copy_around(pos, size, PAGE_SIZE, srcmap,
@@ -1498,7 +1499,7 @@ static loff_t dax_iomap_iter(const struct iomap_iter 
*iomi,
 
map_len = dax_direct_access(dax_dev, pgoff, PHYS_PFN(size),
DAX_ACCESS, , NULL);
-   if (map_len == -EIO && iov_iter_rw(iter) == WRITE) {
+   if (map_len == -EHWPOISON && iov_iter_rw(iter) == WRITE) {
map_len = dax_direct_access(dax_dev, pgoff,
PHYS_PFN(size), DAX_RECOVERY_WRITE,
, NULL);
@@ -1506,7 +1507,7 @@ static loff_t dax_iomap_iter(const struct iomap_iter 
*iomi,
recovery = true;
}
if