On 12/12/17 17:22, Abhi Das wrote:
Trim the list in gfs2_ordered_write() as we run through it
to write out inodes.
Also attempt to remove an inode from the list after it is
fsync'ed.
Finally, call gfs2_ordered_write() in case we were not able
to shrink the list in gfs2_ordered_shrink() in the hopes that
it will eventually cause the list to shrink.
---
  fs/gfs2/file.c   |  3 +++
  fs/gfs2/incore.h |  1 +
  fs/gfs2/log.c    | 13 +++++++++++--
  fs/gfs2/log.h    |  4 +++-
  fs/gfs2/quota.c  |  4 ++--
  5 files changed, 20 insertions(+), 5 deletions(-)
A further thought... eventually we'll probably need to build the bios ourselves in order that we can hook the I/O completion functions. That would be the best way to get notification that the I/O has completed. So something like a copy of the existing filemap/writepage functions but allowing us custom completions.

It would also be worth looking at the existing pagecache/mm code too, so see how the inode dirty flags get cleared after an fsync for example. That might be a useful reference for how to check that the inode is clean in a race free manner,

Steve.


diff --git a/fs/gfs2/file.c b/fs/gfs2/file.c
index 757ec66..75f9ac0 100644
--- a/fs/gfs2/file.c
+++ b/fs/gfs2/file.c
@@ -697,6 +697,9 @@ static int gfs2_fsync(struct file *file, loff_t start, 
loff_t end,
        if (mapping->nrpages)
                ret = filemap_fdatawait_range(mapping, start, end);
+ if (!ret && !ret1)
+               gfs2_ordered_del_inode(ip, ORD_WHENCE_FSYNC);
+
        return ret ? ret : ret1;
  }
diff --git a/fs/gfs2/incore.h b/fs/gfs2/incore.h
index 6fcad2a..93da360 100644
--- a/fs/gfs2/incore.h
+++ b/fs/gfs2/incore.h
@@ -661,6 +661,7 @@ struct ord_stats {
        unsigned long os_rm_trunc;
        unsigned long os_rm_evict;
        unsigned long os_rm_wait;
+       unsigned long os_rm_fsync;
        unsigned long os_rm_syncfs;
        unsigned long os_rm_write;
        unsigned long os_rm_setflags;
diff --git a/fs/gfs2/log.c b/fs/gfs2/log.c
index 6d618a1..4cfef47 100644
--- a/fs/gfs2/log.c
+++ b/fs/gfs2/log.c
@@ -507,9 +507,13 @@ static void gfs2_ordered_write(struct gfs2_sbd *sdp)
        list_sort(NULL, &sdp->sd_log_le_ordered, &ip_cmp);
        while (!list_empty(&sdp->sd_log_le_ordered)) {
                ip = list_entry(sdp->sd_log_le_ordered.next, struct gfs2_inode, 
i_ordered);
-               list_move(&ip->i_ordered, &written);
-               if (ip->i_inode.i_mapping->nrpages == 0)
+               if (ip->i_inode.i_mapping->nrpages == 0) {
+                       test_and_clear_bit(GIF_ORDERED, &ip->i_flags);
+                       list_del(&ip->i_ordered);
+                       ord_stats_adjust(sdp, -1, ORD_WHENCE_ORD_WRITE);
                        continue;
+               }
+               list_move(&ip->i_ordered, &written);
                spin_unlock(&sdp->sd_ordered_lock);
                filemap_fdatawrite(ip->i_inode.i_mapping);
                spin_lock(&sdp->sd_ordered_lock);
@@ -540,17 +544,22 @@ static void gfs2_ordered_wait(struct gfs2_sbd *sdp)
  void gfs2_ordered_shrink(struct gfs2_sbd *sdp, int whence)
  {
        struct gfs2_inode *ip, *tmp;
+       bool removed;
spin_lock(&sdp->sd_ordered_lock);
        list_for_each_entry_safe(ip, tmp, &sdp->sd_log_le_ordered, i_ordered) {
                if (ip->i_inode.i_mapping->nrpages != 0)
                        continue;
                if (test_and_clear_bit(GIF_ORDERED, &ip->i_flags)) {
+                       removed = true;
                        list_del(&ip->i_ordered);
                        ord_stats_adjust(sdp, -1, whence);
                }
        }
        spin_unlock(&sdp->sd_ordered_lock);
+
+       if (!removed)
+               gfs2_ordered_write(sdp);
  }
void gfs2_ordered_del_inode(struct gfs2_inode *ip, int whence)
diff --git a/fs/gfs2/log.h b/fs/gfs2/log.h
index 80c8861..0bc3620 100644
--- a/fs/gfs2/log.h
+++ b/fs/gfs2/log.h
@@ -55,7 +55,8 @@ enum {
        ORD_WHENCE_SYNCFS       = 3,
        ORD_WHENCE_ORD_WRITE    = 4,
        ORD_WHENCE_SETFLAGS     = 5,
-       ORD_WHENCE_ADD          = 6,
+       ORD_WHENCE_FSYNC        = 6,
+       ORD_WHENCE_ADD          = 7,
  };
static inline void ord_stats_adjust(struct gfs2_sbd *sdp, int count, int whence)
@@ -70,6 +71,7 @@ static inline void ord_stats_adjust(struct gfs2_sbd *sdp, int 
count, int whence)
        case ORD_WHENCE_SYNCFS:     os->os_rm_syncfs += -(count); break;
        case ORD_WHENCE_ORD_WRITE:  os->os_rm_write += -(count); break;
        case ORD_WHENCE_SETFLAGS:   os->os_rm_setflags += -(count); break;
+       case ORD_WHENCE_FSYNC:      os->os_rm_fsync += -(count); break;
case ORD_WHENCE_ADD: os->os_add += count; break;
        default: break;
diff --git a/fs/gfs2/quota.c b/fs/gfs2/quota.c
index 66c5126..63e3afa 100644
--- a/fs/gfs2/quota.c
+++ b/fs/gfs2/quota.c
@@ -1575,11 +1575,11 @@ int gfs2_quotad(void *data)
printk(KERN_WARNING "Ord list Size:%lu +[add_inode:%lu] "
                       "-[trunc:%lu evict:%lu wait:%lu syncfs:%lu ord_write:%lu"
-                      " setflags:%lu]\n", sdp->sd_ord_stats.os_ct,
+                      " setflags:%lu fsync:%lu]\n", sdp->sd_ord_stats.os_ct,
                       sdp->sd_ord_stats.os_add, sdp->sd_ord_stats.os_rm_trunc,
                       sdp->sd_ord_stats.os_rm_evict, 
sdp->sd_ord_stats.os_rm_wait,
                       sdp->sd_ord_stats.os_rm_syncfs, 
sdp->sd_ord_stats.os_rm_write,
-                      sdp->sd_ord_stats.os_rm_setflags);
+                      sdp->sd_ord_stats.os_rm_setflags, 
sdp->sd_ord_stats.os_rm_fsync);
        }
return 0;

Reply via email to