On Fri 30-06-17 10:48:15, Linus Torvalds wrote:
> On Fri, Jun 30, 2017 at 10:26 AM, Michal Hocko <mho...@kernel.org> wrote:
> >
> > Ohh, you misunderstood I guess. They wanted that only for internal
> > testing (e.g. make sure that everything that matters blows up if it is
> > doing something wrong). Absolutely nothing to base any compilator
> > decistion on.
> 
> Oh, good.
> 
> If that's the case, I really think we should try to add some code that
> checks that the stack grows strictly one page at a time, and have a
> way to enable SIGSEGV if that is ever not the case.
> 
> That should be trivial to add in expand_downwards/expand_upwards.

yes, but I would be worried to have this hardcoded. People very often do
run 3rd party code compiled with a non-default distribution compiler. I
also expect there are sensible usecases where probing on the stack could
lead to a performance degradation so people might want to explicitely
disable it.

> We could make a "warn once" thing unconditional for distro testing,
> but since compiler people would presumably want to test this before
> the rest of the distro is clean, they'd need some rlimit or something
> like that to enable it for particular processes.

yes, WARN_ONCE is not very practical to identify offenders..
 
> Would that be ok for them?
> 
> Some prctl to get/set that "max I'm allowed to extend the stack"?

prctl would be less code than proc interface I've had and quite
straightforward as well. Below is what I have ended up with for them. I
doesn't warn by default because I thought it would be too noisy without
stack probing used more widely.

If you think this is worth pursuing in upstream, just let me know and I
can polish it, add a patch for the man page and other things.
---
diff --git a/fs/exec.c b/fs/exec.c
index 904199086490..7589fb701fca 100644
--- a/fs/exec.c
+++ b/fs/exec.c
@@ -320,6 +320,13 @@ static int __bprm_mm_init(struct linux_binprm *bprm)
        arch_bprm_mm_init(mm, vma);
        up_write(&mm->mmap_sem);
        bprm->p = vma->vm_end - sizeof(void *);
+
+       /*
+        * We cannot set the stack expand limit now because we will do manual
+        * stack expansions which might be larger. See setup_arg_pages
+        */
+       if (current->mm)
+               bprm->expand_stack_limit = current->mm->expand_stack_limit;
        return 0;
 err:
        up_write(&mm->mmap_sem);
@@ -786,6 +793,14 @@ int setup_arg_pages(struct linux_binprm *bprm,
        if (ret)
                ret = -EFAULT;
 
+       /*
+        * Stack is finilized now and so all later expansions have
+        * to comply with the inherited limit.
+        *
+        * TODO: Do we want to allow non-privileged task to set
+        * the limit for privileged ones?
+        */
+       vma->vm_mm->expand_stack_limit = bprm->expand_stack_limit;
 out_unlock:
        up_write(&mm->mmap_sem);
        return ret;
diff --git a/include/linux/binfmts.h b/include/linux/binfmts.h
index 05488da3aee9..2d3d7fff4811 100644
--- a/include/linux/binfmts.h
+++ b/include/linux/binfmts.h
@@ -17,6 +17,7 @@ struct linux_binprm {
        char buf[BINPRM_BUF_SIZE];
 #ifdef CONFIG_MMU
        struct vm_area_struct *vma;
+       unsigned long expand_stack_limit;
        unsigned long vma_pages;
 #else
 # define MAX_ARG_PAGES 32
diff --git a/include/linux/mm_types.h b/include/linux/mm_types.h
index 45cdb27791a3..24ee38a45a9e 100644
--- a/include/linux/mm_types.h
+++ b/include/linux/mm_types.h
@@ -425,6 +425,7 @@ struct mm_struct {
        unsigned long start_brk, brk, start_stack;
        unsigned long arg_start, arg_end, env_start, env_end;
 
+       unsigned long expand_stack_limit;       /* how much we can grow stack 
at once */
        unsigned long saved_auxv[AT_VECTOR_SIZE]; /* for /proc/PID/auxv */
 
        /*
diff --git a/include/uapi/linux/prctl.h b/include/uapi/linux/prctl.h
index a8d0759a9e40..6f3c65530c71 100644
--- a/include/uapi/linux/prctl.h
+++ b/include/uapi/linux/prctl.h
@@ -197,4 +197,7 @@ struct prctl_mm_map {
 # define PR_CAP_AMBIENT_LOWER          3
 # define PR_CAP_AMBIENT_CLEAR_ALL      4
 
+#define PR_SET_STACK_EXPAND_LIMIT 48
+#define PR_GET_STACK_EXPAND_LIMIT 49
+
 #endif /* _LINUX_PRCTL_H */
diff --git a/kernel/sys.c b/kernel/sys.c
index 4111d584ff4a..05bdcdeda98f 100644
--- a/kernel/sys.c
+++ b/kernel/sys.c
@@ -2290,6 +2290,17 @@ SYSCALL_DEFINE5(prctl, int, option, unsigned long, arg2, 
unsigned long, arg3,
        case PR_GET_FP_MODE:
                error = GET_FP_MODE(me);
                break;
+       case PR_SET_STACK_EXPAND_LIMIT:
+               if (arg3 || arg4 || arg5)
+                       return -EINVAL;
+               if (down_write_killable(&me->mm->mmap_sem))
+                       return -EINTR;
+               me->mm->expand_stack_limit = arg2;
+               up_write(&me->mm->mmap_sem);
+               break;
+       case PR_GET_STACK_EXPAND_LIMIT:
+               error = me->mm->expand_stack_limit;
+               break;
        default:
                error = -EINVAL;
                break;
diff --git a/mm/mmap.c b/mm/mmap.c
index 5a0ba9788cdd..ff2a5981ee92 100644
--- a/mm/mmap.c
+++ b/mm/mmap.c
@@ -2266,7 +2266,16 @@ int expand_upwards(struct vm_area_struct *vma, unsigned 
long address)
                unsigned long size, grow;
 
                size = address - vma->vm_start;
-               grow = (address - vma->vm_end) >> PAGE_SHIFT;
+               grow = address - vma->vm_end;
+
+               if (mm->expand_stack_limit && grow > mm->expand_stack_limit) {
+                       pr_warn("%s[%d]: disallowed stack expansion %lu with 
limit %lu\n",
+                                       current->comm, task_pid_nr(current),
+                                       grow, mm->expand_stack_limit);
+                       anon_vma_unlock_write(vma->anon_vma);
+                       return -ENOMEM;
+               }
+               grow >>= PAGE_SHIFT;
 
                error = -ENOMEM;
                if (vma->vm_pgoff + (size >> PAGE_SHIFT) >= vma->vm_pgoff) {
@@ -2350,7 +2359,20 @@ int expand_downwards(struct vm_area_struct *vma,
                unsigned long size, grow;
 
                size = vma->vm_end - address;
-               grow = (vma->vm_start - address) >> PAGE_SHIFT;
+               grow = vma->vm_start - address;
+
+               /*
+                * Make sure that a single stack expansion doesn't exceed the
+                * configured limit.
+                */
+               if (mm->expand_stack_limit && grow > mm->expand_stack_limit) {
+                       pr_warn("%s[%d]: disallowed stack expansion %lu with 
limit %lu\n",
+                                       current->comm, task_pid_nr(current),
+                                       grow, mm->expand_stack_limit);
+                       anon_vma_unlock_write(vma->anon_vma);
+                       return -ENOMEM;
+               }
+               grow >>= PAGE_SHIFT;
 
                error = -ENOMEM;
                if (grow <= vma->vm_pgoff) {
-- 
Michal Hocko
SUSE Labs

Reply via email to