Add vrange support on addres_space structures, and add fvrange()
syscall for creating ranges on address_space structures.

Cc: [email protected]
Cc: Michael Kerrisk <[email protected]>
Cc: Arun Sharma <[email protected]>
Cc: Mel Gorman <[email protected]>
Cc: Hugh Dickins <[email protected]>
Cc: Dave Hansen <[email protected]>
Cc: Rik van Riel <[email protected]>
Cc: Neil Brown <[email protected]>
Cc: Mike Hommey <[email protected]>
Cc: Taras Glek <[email protected]>
Cc: KOSAKI Motohiro <[email protected]>
Cc: KAMEZAWA Hiroyuki <[email protected]>
Cc: Jason Evans <[email protected]>
Cc: [email protected]
Cc: Paul Turner <[email protected]>
Cc: Johannes Weiner <[email protected]>
Cc: Michel Lespinasse <[email protected]>
Cc: Andrew Morton <[email protected]>
Cc: Minchan Kim <[email protected]>
Signed-off-by: John Stultz <[email protected]>
---
 arch/x86/syscalls/syscall_64.tbl |    1 +
 fs/file_table.c                  |    5 +++
 fs/inode.c                       |    2 ++
 include/linux/fs.h               |    2 ++
 include/linux/vrange.h           |   19 +++++++++-
 include/linux/vrange_types.h     |    1 +
 mm/vrange.c                      |   72 +++++++++++++++++++++++++++++++++++++-
 7 files changed, 100 insertions(+), 2 deletions(-)

diff --git a/arch/x86/syscalls/syscall_64.tbl b/arch/x86/syscalls/syscall_64.tbl
index dc332bd..910d9f3 100644
--- a/arch/x86/syscalls/syscall_64.tbl
+++ b/arch/x86/syscalls/syscall_64.tbl
@@ -321,6 +321,7 @@
 312    common  kcmp                    sys_kcmp
 313    common  finit_module            sys_finit_module
 314    common  vrange                  sys_vrange
+315    common  fvrange                 sys_fvrange
 
 #
 # x32-specific system call numbers start at 512 to avoid cache impact
diff --git a/fs/file_table.c b/fs/file_table.c
index cd4d87a..61c8aaa 100644
--- a/fs/file_table.c
+++ b/fs/file_table.c
@@ -26,6 +26,7 @@
 #include <linux/hardirq.h>
 #include <linux/task_work.h>
 #include <linux/ima.h>
+#include <linux/vrange.h>
 
 #include <linux/atomic.h>
 
@@ -244,6 +245,10 @@ static void __fput(struct file *file)
                        file->f_op->fasync(-1, file, 0);
        }
        ima_file_free(file);
+
+       /* drop all vranges on last close */
+       mapping_exit_vrange(inode->i_mapping);
+
        if (file->f_op && file->f_op->release)
                file->f_op->release(inode, file);
        security_file_free(file);
diff --git a/fs/inode.c b/fs/inode.c
index f5f7c06..4707c95 100644
--- a/fs/inode.c
+++ b/fs/inode.c
@@ -17,6 +17,7 @@
 #include <linux/prefetch.h>
 #include <linux/buffer_head.h> /* for inode_has_buffers */
 #include <linux/ratelimit.h>
+#include <linux/vrange.h>
 #include "internal.h"
 
 /*
@@ -350,6 +351,7 @@ void address_space_init_once(struct address_space *mapping)
        spin_lock_init(&mapping->private_lock);
        mapping->i_mmap = RB_ROOT;
        INIT_LIST_HEAD(&mapping->i_mmap_nonlinear);
+       mapping_init_vrange(mapping);
 }
 EXPORT_SYMBOL(address_space_init_once);
 
diff --git a/include/linux/fs.h b/include/linux/fs.h
index 2c28271..6f86c7c 100644
--- a/include/linux/fs.h
+++ b/include/linux/fs.h
@@ -27,6 +27,7 @@
 #include <linux/lockdep.h>
 #include <linux/percpu-rwsem.h>
 #include <linux/blk_types.h>
+#include <linux/vrange_types.h>
 
 #include <asm/byteorder.h>
 #include <uapi/linux/fs.h>
@@ -411,6 +412,7 @@ struct address_space {
        struct rb_root          i_mmap;         /* tree of private and shared 
mappings */
        struct list_head        i_mmap_nonlinear;/*list VM_NONLINEAR mappings */
        struct mutex            i_mmap_mutex;   /* protect tree, count, list */
+       struct vrange_root      vroot;
        /* Protected by tree_lock together with the radix tree */
        unsigned long           nrpages;        /* number of total pages */
        pgoff_t                 writeback_index;/* writeback starts here */
diff --git a/include/linux/vrange.h b/include/linux/vrange.h
index b9b219c..91960eb 100644
--- a/include/linux/vrange.h
+++ b/include/linux/vrange.h
@@ -3,6 +3,7 @@
 
 #include <linux/vrange_types.h>
 #include <linux/mm.h>
+#include <linux/fs.h>
 
 #define vrange_entry(ptr) \
        container_of(ptr, struct vrange, node.rb)
@@ -11,10 +12,19 @@
 
 static inline void mm_init_vrange(struct mm_struct *mm)
 {
+       mm->vroot.type = VRANGE_ANON;
        mm->vroot.v_rb = RB_ROOT;
        mutex_init(&mm->vroot.v_lock);
 }
 
+static inline void mapping_init_vrange(struct address_space *mapping)
+{
+       mapping->vroot.type = VRANGE_FILE;
+       mapping->vroot.v_rb = RB_ROOT;
+       mutex_init(&mapping->vroot.v_lock);
+}
+
+
 static inline void vrange_lock(struct vrange_root *vroot)
 {
        mutex_lock(&vroot->v_lock);
@@ -25,15 +35,22 @@ static inline void vrange_unlock(struct vrange_root *vroot)
        mutex_unlock(&vroot->v_lock);
 }
 
-static inline struct mm_struct *vrange_get_owner_mm(struct vrange *vrange)
+static inline int vrange_type(struct vrange *vrange)
 {
+       return vrange->owner->type;
+}
 
+static inline struct mm_struct *vrange_get_owner_mm(struct vrange *vrange)
+{
+       if (vrange_type(vrange) != VRANGE_ANON)
+               return NULL;
        return container_of(vrange->owner, struct mm_struct, vroot);
 }
 
 
 void vrange_init(void);
 extern void mm_exit_vrange(struct mm_struct *mm);
+extern void mapping_exit_vrange(struct address_space *mapping);
 int discard_vpage(struct page *page);
 bool vrange_address(struct mm_struct *mm, unsigned long start,
                        unsigned long end);
diff --git a/include/linux/vrange_types.h b/include/linux/vrange_types.h
index bede336..c7154e4 100644
--- a/include/linux/vrange_types.h
+++ b/include/linux/vrange_types.h
@@ -7,6 +7,7 @@
 struct vrange_root {
        struct rb_root v_rb;            /* vrange rb tree */
        struct mutex v_lock;            /* Protect v_rb */
+       enum {VRANGE_ANON, VRANGE_FILE} type; /* range root type */
 };
 
 
diff --git a/mm/vrange.c b/mm/vrange.c
index 9facbbc..671909c 100644
--- a/mm/vrange.c
+++ b/mm/vrange.c
@@ -14,6 +14,7 @@
 #include <linux/swapops.h>
 #include <linux/mmu_notifier.h>
 #include <linux/migrate.h>
+#include <linux/file.h>
 
 struct vrange_walker_private {
        struct zone *zone;
@@ -234,6 +235,20 @@ void mm_exit_vrange(struct mm_struct *mm)
        }
 }
 
+void mapping_exit_vrange(struct address_space *mapping)
+{
+       struct vrange *range;
+       struct rb_node *next;
+
+       next = rb_first(&mapping->vroot.v_rb);
+       while (next) {
+               range = vrange_entry(next);
+               next = rb_next(next);
+               __remove_range(range);
+               put_vrange(range);
+       }
+}
+
 /*
  * The vrange(2) system call.
  *
@@ -291,6 +306,51 @@ out:
 }
 
 
+SYSCALL_DEFINE5(fvrange, int, fd, size_t, offset,
+               size_t, len, int, mode, int, behavior)
+{
+       struct fd f = fdget(fd);
+       struct address_space *mapping;
+       u64 start = offset;
+       u64 end;
+       int ret = -EINVAL;
+
+       if (!f.file)
+               return -EBADF;
+
+       if (S_ISFIFO(file_inode(f.file)->i_mode)) {
+               ret = -ESPIPE;
+               goto out;
+       }
+
+       mapping = f.file->f_mapping;
+       if (!mapping || len < 0) {
+               ret = -EINVAL;
+               goto out;
+       }
+
+       if (start & ~PAGE_MASK)
+               goto out;
+
+
+       len &= PAGE_MASK;
+       if (!len)
+               goto out;
+
+       end = start + len;
+       if (end < start)
+               goto out;
+
+       if (mode == VRANGE_VOLATILE)
+               ret = add_vrange(&mapping->vroot, start, end - 1);
+       else if (mode == VRANGE_NOVOLATILE)
+               ret = remove_vrange(&mapping->vroot, start, end - 1);
+out:
+       fdput(f);
+       return ret;
+}
+
+
 static bool __vrange_address(struct mm_struct *mm,
                        unsigned long start, unsigned long end)
 {
@@ -641,6 +701,9 @@ unsigned int discard_vrange(struct zone *zone, struct 
vrange *vrange,
 
        mm = vrange_get_owner_mm(vrange);
 
+       if (!mm)
+               goto out;
+
        if (!down_read_trylock(&mm->mmap_sem))
                goto out;
 
@@ -683,6 +746,12 @@ static struct vrange *get_victim_vrange(void)
        list_for_each_prev_safe(cur, tmp, &lru_vrange) {
                vrange = list_entry(cur, struct vrange, lru);
                mm = vrange_get_owner_mm(vrange);
+
+               if (!mm) {
+                       vrange = NULL;
+                       continue;
+               }
+
                /* the process is exiting so pass it */
                if (atomic_read(&mm->mm_users) == 0) {
                        list_del_init(&vrange->lru);
@@ -720,7 +789,8 @@ static void put_victim_range(struct vrange *vrange)
        struct mm_struct *mm = vrange_get_owner_mm(vrange);
 
        put_vrange(vrange);
-       mmdrop(mm);
+       if (mm)
+               mmdrop(mm);
 }
 
 unsigned int discard_vrange_pages(struct zone *zone, int nr_to_discard)
-- 
1.7.10.4

--
To unsubscribe from this list: send the line "unsubscribe linux-kernel" in
the body of a message to [email protected]
More majordomo info at  http://vger.kernel.org/majordomo-info.html
Please read the FAQ at  http://www.tux.org/lkml/

Reply via email to