date:20220902

[PATCH 3/3] mm, pmem, xfs: Introduce MF_MEM_REMOVE for unbind

2022-09-02 Thread Shiyang Ruan

This patch is inspired by Dan's "mm, dax, pmem: Introduce
dev_pagemap_failure()"[1].  With the help of dax_holder and
->notify_failure() mechanism, the pmem driver is able to ask filesystem
(or mapped device) on it to unmap all files in use and notify processes
who are using those files.

Call trace:
trigger unbind
 -> unbind_store()
  -> ... (skip)
   -> devres_release_all()   # was pmem driver ->remove() in v1
-> kill_dax()
 -> dax_holder_notify_failure(dax_dev, 0, U64_MAX, MF_MEM_PRE_REMOVE)
  -> xfs_dax_notify_failure()

Introduce MF_MEM_PRE_REMOVE to let filesystem know this is a remove
event.  So do not shutdown filesystem directly if something not
supported, or if failure range includes metadata area.  Make sure all
files and processes are handled correctly.

[1]: 
https://lore.kernel.org/linux-mm/161604050314.1463742.14151665140035795571.st...@dwillia2-desk3.amr.corp.intel.com/

Signed-off-by: Shiyang Ruan 
---
 drivers/dax/super.c |  3 ++-
 fs/xfs/xfs_notify_failure.c | 23 +++
 include/linux/mm.h  |  1 +
 3 files changed, 26 insertions(+), 1 deletion(-)

diff --git a/drivers/dax/super.c b/drivers/dax/super.c
index 9b5e2a5eb0ae..cf9a64563fbe 100644
--- a/drivers/dax/super.c
+++ b/drivers/dax/super.c
@@ -323,7 +323,8 @@ void kill_dax(struct dax_device *dax_dev)
return;
 
if (dax_dev->holder_data != NULL)
-   dax_holder_notify_failure(dax_dev, 0, U64_MAX, 0);
+   dax_holder_notify_failure(dax_dev, 0, U64_MAX,
+   MF_MEM_PRE_REMOVE);
 
clear_bit(DAXDEV_ALIVE, _dev->flags);
synchronize_srcu(_srcu);
diff --git a/fs/xfs/xfs_notify_failure.c b/fs/xfs/xfs_notify_failure.c
index 3830f908e215..5e04ba7fa403 100644
--- a/fs/xfs/xfs_notify_failure.c
+++ b/fs/xfs/xfs_notify_failure.c
@@ -22,6 +22,7 @@
 
 #include 
 #include 
+#include 
 
 struct xfs_failure_info {
xfs_agblock_t   startblock;
@@ -77,6 +78,9 @@ xfs_dax_failure_fn(
 
if (XFS_RMAP_NON_INODE_OWNER(rec->rm_owner) ||
(rec->rm_flags & (XFS_RMAP_ATTR_FORK | XFS_RMAP_BMBT_BLOCK))) {
+   /* The device is about to be removed.  Not a really failure. */
+   if (notify->mf_flags & MF_MEM_PRE_REMOVE)
+   return 0;
notify->want_shutdown = true;
return 0;
}
@@ -182,12 +186,23 @@ xfs_dax_notify_failure(
struct xfs_mount*mp = dax_holder(dax_dev);
u64 ddev_start;
u64 ddev_end;
+   int error;
 
if (!(mp->m_super->s_flags & SB_BORN)) {
xfs_warn(mp, "filesystem is not ready for notify_failure()!");
return -EIO;
}
 
+   if (mf_flags & MF_MEM_PRE_REMOVE) {
+   xfs_info(mp, "device is about to be removed!");
+   down_write(>m_super->s_umount);
+   error = sync_filesystem(mp->m_super);
+   drop_pagecache_sb(mp->m_super, NULL);
+   up_write(>m_super->s_umount);
+   if (error)
+   return error;
+   }
+
if (mp->m_rtdev_targp && mp->m_rtdev_targp->bt_daxdev == dax_dev) {
xfs_debug(mp,
 "notify_failure() not supported on realtime device!");
@@ -196,6 +211,8 @@ xfs_dax_notify_failure(
 
if (mp->m_logdev_targp && mp->m_logdev_targp->bt_daxdev == dax_dev &&
mp->m_logdev_targp != mp->m_ddev_targp) {
+   if (mf_flags & MF_MEM_PRE_REMOVE)
+   return 0;
xfs_err(mp, "ondisk log corrupt, shutting down fs!");
xfs_force_shutdown(mp, SHUTDOWN_CORRUPT_ONDISK);
return -EFSCORRUPTED;
@@ -209,6 +226,12 @@ xfs_dax_notify_failure(
ddev_start = mp->m_ddev_targp->bt_dax_part_off;
ddev_end = ddev_start + bdev_nr_bytes(mp->m_ddev_targp->bt_bdev) - 1;
 
+   /* Notify failure on the whole device */
+   if (offset == 0 && len == U64_MAX) {
+   offset = ddev_start;
+   len = bdev_nr_bytes(mp->m_ddev_targp->bt_bdev);
+   }
+
/* Ignore the range out of filesystem area */
if (offset + len - 1 < ddev_start)
return -ENXIO;
diff --git a/include/linux/mm.h b/include/linux/mm.h
index 21f8b27bd9fd..9122a1c57dd2 100644
--- a/include/linux/mm.h
+++ b/include/linux/mm.h
@@ -3183,6 +3183,7 @@ enum mf_flags {
MF_UNPOISON = 1 << 4,
MF_SW_SIMULATED = 1 << 5,
MF_NO_RETRY = 1 << 6,
+   MF_MEM_PRE_REMOVE = 1 << 7,
 };
 int mf_dax_kill_procs(struct address_space *mapping, pgoff_t index,
  unsigned long count, int mf_flags);
-- 
2.37.2

[PATCH v8 0/3] mm, pmem, xfs: Introduce MF_MEM_REMOVE for unbind

2022-09-02 Thread Shiyang Ruan

Changes since v7:
  1. Add P1 to fix calculation mistake
  2. Add P2 to move drop_pagecache_sb() to super.c for xfs to use
  3. P3: Add invalidate all mappings after sync.
  4. P3: Set offset to be start of device when it is to be removed.
  5. Rebase on 6.0-rc3 + Darrick's patch[1] + Dan's patch[2].

Changes since v6:
  1. Rebase on 6.0-rc2 and Darrick's patch[1].

[1]: https://lore.kernel.org/linux-xfs/Yv5wIa2crHioYeRr@magnolia/
[2]: 
https://lore.kernel.org/linux-xfs/166153426798.2758201.15108211981034512993.st...@dwillia2-xfh.jf.intel.com/

Shiyang Ruan (3):
  xfs: fix the calculation of length and end
  fs: move drop_pagecache_sb() for others to use
  mm, pmem, xfs: Introduce MF_MEM_REMOVE for unbind

 drivers/dax/super.c |  3 ++-
 fs/drop_caches.c| 33 -
 fs/super.c  | 34 ++
 fs/xfs/xfs_notify_failure.c | 31 +++
 include/linux/fs.h  |  1 +
 include/linux/mm.h  |  1 +
 6 files changed, 65 insertions(+), 38 deletions(-)

-- 
2.37.2

[PATCH 2/3] fs: move drop_pagecache_sb() for others to use

2022-09-02 Thread Shiyang Ruan

xfs_notify_failure requires a method to invalidate all mappings.
drop_pagecache_sb() can do this but it is a static function and only
build with CONFIG_SYSCTL.  Now, move it to super.c and make it available
for others.

Signed-off-by: Shiyang Ruan 
---
 fs/drop_caches.c   | 33 -
 fs/super.c | 34 ++
 include/linux/fs.h |  1 +
 3 files changed, 35 insertions(+), 33 deletions(-)

diff --git a/fs/drop_caches.c b/fs/drop_caches.c
index e619c31b6bd9..5c8406076f9b 100644
--- a/fs/drop_caches.c
+++ b/fs/drop_caches.c
@@ -3,7 +3,6 @@
  * Implement the manual drop-all-pagecache function
  */
 
-#include 
 #include 
 #include 
 #include 
@@ -15,38 +14,6 @@
 /* A global variable is a bit ugly, but it keeps the code simple */
 int sysctl_drop_caches;
 
-static void drop_pagecache_sb(struct super_block *sb, void *unused)
-{
-   struct inode *inode, *toput_inode = NULL;
-
-   spin_lock(>s_inode_list_lock);
-   list_for_each_entry(inode, >s_inodes, i_sb_list) {
-   spin_lock(>i_lock);
-   /*
-* We must skip inodes in unusual state. We may also skip
-* inodes without pages but we deliberately won't in case
-* we need to reschedule to avoid softlockups.
-*/
-   if ((inode->i_state & (I_FREEING|I_WILL_FREE|I_NEW)) ||
-   (mapping_empty(inode->i_mapping) && !need_resched())) {
-   spin_unlock(>i_lock);
-   continue;
-   }
-   __iget(inode);
-   spin_unlock(>i_lock);
-   spin_unlock(>s_inode_list_lock);
-
-   invalidate_mapping_pages(inode->i_mapping, 0, -1);
-   iput(toput_inode);
-   toput_inode = inode;
-
-   cond_resched();
-   spin_lock(>s_inode_list_lock);
-   }
-   spin_unlock(>s_inode_list_lock);
-   iput(toput_inode);
-}
-
 int drop_caches_sysctl_handler(struct ctl_table *table, int write,
void *buffer, size_t *length, loff_t *ppos)
 {
diff --git a/fs/super.c b/fs/super.c
index 734ed584a946..bdf53dbe834c 100644
--- a/fs/super.c
+++ b/fs/super.c
@@ -36,6 +36,7 @@
 #include 
 #include 
 #include 
+#include 
 #include 
 #include "internal.h"
 
@@ -677,6 +678,39 @@ void drop_super_exclusive(struct super_block *sb)
 }
 EXPORT_SYMBOL(drop_super_exclusive);
 
+void drop_pagecache_sb(struct super_block *sb, void *unused)
+{
+   struct inode *inode, *toput_inode = NULL;
+
+   spin_lock(>s_inode_list_lock);
+   list_for_each_entry(inode, >s_inodes, i_sb_list) {
+   spin_lock(>i_lock);
+   /*
+* We must skip inodes in unusual state. We may also skip
+* inodes without pages but we deliberately won't in case
+* we need to reschedule to avoid softlockups.
+*/
+   if ((inode->i_state & (I_FREEING|I_WILL_FREE|I_NEW)) ||
+   (mapping_empty(inode->i_mapping) && !need_resched())) {
+   spin_unlock(>i_lock);
+   continue;
+   }
+   __iget(inode);
+   spin_unlock(>i_lock);
+   spin_unlock(>s_inode_list_lock);
+
+   invalidate_mapping_pages(inode->i_mapping, 0, -1);
+   iput(toput_inode);
+   toput_inode = inode;
+
+   cond_resched();
+   spin_lock(>s_inode_list_lock);
+   }
+   spin_unlock(>s_inode_list_lock);
+   iput(toput_inode);
+}
+EXPORT_SYMBOL(drop_pagecache_sb);
+
 static void __iterate_supers(void (*f)(struct super_block *))
 {
struct super_block *sb, *p = NULL;
diff --git a/include/linux/fs.h b/include/linux/fs.h
index 9eced4cc286e..5ded28c0d2c9 100644
--- a/include/linux/fs.h
+++ b/include/linux/fs.h
@@ -3292,6 +3292,7 @@ extern struct super_block *get_super(struct block_device 
*);
 extern struct super_block *get_active_super(struct block_device *bdev);
 extern void drop_super(struct super_block *sb);
 extern void drop_super_exclusive(struct super_block *sb);
+void drop_pagecache_sb(struct super_block *sb, void *unused);
 extern void iterate_supers(void (*)(struct super_block *, void *), void *);
 extern void iterate_supers_type(struct file_system_type *,
void (*)(struct super_block *, void *), void *);
-- 
2.37.2

[PATCH 1/3] xfs: fix the calculation of length and end

2022-09-02 Thread Shiyang Ruan

The end should be start + length - 1.  Also fix the calculation of the
length when seeking for intersection of notify range and device.

Signed-off-by: Shiyang Ruan 
---
 fs/xfs/xfs_notify_failure.c | 8 
 1 file changed, 4 insertions(+), 4 deletions(-)

diff --git a/fs/xfs/xfs_notify_failure.c b/fs/xfs/xfs_notify_failure.c
index c4078d0ec108..3830f908e215 100644
--- a/fs/xfs/xfs_notify_failure.c
+++ b/fs/xfs/xfs_notify_failure.c
@@ -114,7 +114,7 @@ xfs_dax_notify_ddev_failure(
int error = 0;
xfs_fsblock_t   fsbno = XFS_DADDR_TO_FSB(mp, daddr);
xfs_agnumber_t  agno = XFS_FSB_TO_AGNO(mp, fsbno);
-   xfs_fsblock_t   end_fsbno = XFS_DADDR_TO_FSB(mp, daddr + bblen);
+   xfs_fsblock_t   end_fsbno = XFS_DADDR_TO_FSB(mp, daddr + bblen 
- 1);
xfs_agnumber_t  end_agno = XFS_FSB_TO_AGNO(mp, end_fsbno);
 
error = xfs_trans_alloc_empty(mp, );
@@ -210,7 +210,7 @@ xfs_dax_notify_failure(
ddev_end = ddev_start + bdev_nr_bytes(mp->m_ddev_targp->bt_bdev) - 1;
 
/* Ignore the range out of filesystem area */
-   if (offset + len < ddev_start)
+   if (offset + len - 1 < ddev_start)
return -ENXIO;
if (offset > ddev_end)
return -ENXIO;
@@ -222,8 +222,8 @@ xfs_dax_notify_failure(
len -= ddev_start - offset;
offset = 0;
}
-   if (offset + len > ddev_end)
-   len -= ddev_end - offset;
+   if (offset + len - 1 > ddev_end)
+   len -= offset + len - 1 - ddev_end;
 
return xfs_dax_notify_ddev_failure(mp, BTOBB(offset), BTOBB(len),
mf_flags);
-- 
2.37.2

[PATCH 3/3] mm, pmem, xfs: Introduce MF_MEM_REMOVE for unbind

[PATCH v8 0/3] mm, pmem, xfs: Introduce MF_MEM_REMOVE for unbind

[PATCH 2/3] fs: move drop_pagecache_sb() for others to use

[PATCH 1/3] xfs: fix the calculation of length and end

4 matches

Site Navigation

Mail list logo

Footer information