When inspecting a vague code inside prctl(PR_SET_MM_MEM)
call (which testing the RLIMIT_DATA value to figure out
if we're allowed to assign new @start_brk, @brk, @start_data,
@end_data from mm_struct) it's been commited that RLIMIT_DATA
in a form it's implemented now doesn't do anything useful
because most of user-space libraries use mmap() syscall
for dynamic memory allocations.

So Linus suggested to convert RLIMIT_DATA rlimit into something
suitable for anonymous memory accounting. Here we introduce
new @anon_vm member into mm descriptor which is updated
every vm_stat_account call. When mmap_region() is called
we test if current RLIMIT_DATA limit is not exceeded.

This should give a way to control the amount of anonymous
memory allocated.

p.s.: This rfc not for application, I would like to
hear if I move in right direction in general. In
particular I need to add hugetlb anon mappings here
as well and update docs and etc.

So comments are highly appreciated.

CC: Quentin Casasnovas <[email protected]>
CC: Vegard Nossum <[email protected]>
CC: Linus Torvalds <[email protected]>
CC: Willy Tarreau <[email protected]>
CC: Andy Lutomirski <[email protected]>
CC: Kees Cook <[email protected]>
CC: Vladimir Davydov <[email protected]>
CC: Konstantin Khlebnikov <[email protected]>
CC: Pavel Emelyanov <[email protected]>
CC: Vladimir Davydov <[email protected]>
CC: Peter Zijlstra <[email protected]>
Signed-off-by: Cyrill Gorcunov <[email protected]>
---
 fs/proc/task_mmu.c       |    2 ++
 include/linux/mm_types.h |    1 +
 mm/mmap.c                |   22 ++++++++++++++++++++++
 3 files changed, 25 insertions(+)

Index: linux-ml.git/fs/proc/task_mmu.c
===================================================================
--- linux-ml.git.orig/fs/proc/task_mmu.c
+++ linux-ml.git/fs/proc/task_mmu.c
@@ -53,6 +53,7 @@ void task_mem(struct seq_file *m, struct
                "VmHWM:\t%8lu kB\n"
                "VmRSS:\t%8lu kB\n"
                "VmData:\t%8lu kB\n"
+               "VmAnon:\t%8lu kB\n"
                "VmStk:\t%8lu kB\n"
                "VmExe:\t%8lu kB\n"
                "VmLib:\t%8lu kB\n"
@@ -66,6 +67,7 @@ void task_mem(struct seq_file *m, struct
                hiwater_rss << (PAGE_SHIFT-10),
                total_rss << (PAGE_SHIFT-10),
                data << (PAGE_SHIFT-10),
+               mm->anon_vm << (PAGE_SHIFT-10),
                mm->stack_vm << (PAGE_SHIFT-10), text, lib,
                ptes >> 10,
                pmds >> 10,
Index: linux-ml.git/include/linux/mm_types.h
===================================================================
--- linux-ml.git.orig/include/linux/mm_types.h
+++ linux-ml.git/include/linux/mm_types.h
@@ -429,6 +429,7 @@ struct mm_struct {
        unsigned long shared_vm;        /* Shared pages (files) */
        unsigned long exec_vm;          /* VM_EXEC & ~VM_WRITE */
        unsigned long stack_vm;         /* VM_GROWSUP/DOWN */
+       unsigned long anon_vm;          /* Anonymous pages mapped */
        unsigned long def_flags;
        unsigned long start_code, end_code, start_data, end_data;
        unsigned long start_brk, brk, start_stack;
Index: linux-ml.git/mm/mmap.c
===================================================================
--- linux-ml.git.orig/mm/mmap.c
+++ linux-ml.git/mm/mmap.c
@@ -1214,6 +1214,8 @@ void vm_stat_account(struct mm_struct *m
 {
        const unsigned long stack_flags
                = VM_STACK_FLAGS & (VM_GROWSUP|VM_GROWSDOWN);
+       const unsigned long not_anon_acc
+               = VM_GROWSUP | VM_GROWSDOWN | VM_SHARED | VM_MAYSHARE;
 
        mm->total_vm += pages;
 
@@ -1223,6 +1225,9 @@ void vm_stat_account(struct mm_struct *m
                        mm->exec_vm += pages;
        } else if (flags & stack_flags)
                mm->stack_vm += pages;
+
+       if (!file && (flags & not_anon_acc) == 0)
+               mm->anon_vm += pages;
 }
 #endif /* CONFIG_PROC_FS */
 
@@ -1534,6 +1539,13 @@ static inline int accountable_mapping(st
        return (vm_flags & (VM_NORESERVE | VM_SHARED | VM_WRITE)) == VM_WRITE;
 }
 
+static inline int anon_accountable_mapping(struct file *file, vm_flags_t 
vm_flags)
+{
+       return !file &&
+               (vm_flags & (VM_GROWSDOWN | VM_GROWSUP |
+                            VM_SHARED | VM_MAYSHARE)) == 0;
+}
+
 unsigned long mmap_region(struct file *file, unsigned long addr,
                unsigned long len, vm_flags_t vm_flags, unsigned long pgoff)
 {
@@ -1578,6 +1590,16 @@ unsigned long mmap_region(struct file *f
        }
 
        /*
+        * For anon mappings make sure we don't exceed the limit.
+        */
+       if (anon_accountable_mapping(file, vm_flags)) {
+               unsigned long lim = rlimit(RLIMIT_DATA) >> PAGE_SHIFT;
+
+               if (lim < (mm->anon_vm + (len >> PAGE_SHIFT)))
+                       return -ENOMEM;
+       }
+
+       /*
         * Can we just expand an old mapping?
         */
        vma = vma_merge(mm, prev, addr, addr + len, vm_flags,
--
To unsubscribe from this list: send the line "unsubscribe linux-kernel" in
the body of a message to [email protected]
More majordomo info at  http://vger.kernel.org/majordomo-info.html
Please read the FAQ at  http://www.tux.org/lkml/

Reply via email to