When we restore a task we need to restore its exe link from userspace to
the values the task had at checkpoint time.

Currently this operations required the global CAP_SYS_RESOURCE, which is
always absent in a non-root user namespace.

So this patch introduces a new security bit which:
* can be set only if a task has the global CAP_SYS_RESOURCE
* inherited  by  child  processes
* is saved when a task moves in another userns
* allows to change a task exe link even if a task doesn't have CAP_SYS_RESOURCE

Cc: Andrew Morton <a...@linux-foundation.org>
Cc: Oleg Nesterov <o...@redhat.com>
Cc: Al Viro <v...@zeniv.linux.org.uk>
Cc: Kees Cook <keesc...@chromium.org>
Cc: "Eric W. Biederman" <ebied...@xmission.com>
Cc: Stephen Rothwell <s...@canb.auug.org.au>
Cc: Pavel Emelyanov <xe...@parallels.com>
Cc: Aditya Kali <adityak...@google.com>
Signed-off-by: Andrey Vagin <ava...@openvz.org>
---
 include/uapi/linux/securebits.h | 12 +++++++++++-
 kernel/sys.c                    |  5 +++++
 kernel/user_namespace.c         |  3 ++-
 security/commoncap.c            |  7 +++++++
 4 files changed, 25 insertions(+), 2 deletions(-)

diff --git a/include/uapi/linux/securebits.h b/include/uapi/linux/securebits.h
index 985aac9..c99803b 100644
--- a/include/uapi/linux/securebits.h
+++ b/include/uapi/linux/securebits.h
@@ -43,9 +43,19 @@
 #define SECBIT_KEEP_CAPS       (issecure_mask(SECURE_KEEP_CAPS))
 #define SECBIT_KEEP_CAPS_LOCKED (issecure_mask(SECURE_KEEP_CAPS_LOCKED))
 
+/* When set, a process can do PR_SET_MM_EXE_FILE even if it doesn't
+ * have CAP_SYS_RESOURCE. Setting of this bit requires CAP_SYS_RESOURCE.
+ * This bit is not dropped when a task moves in another userns. */
+#define SECURE_SET_EXE_FILE            6
+#define SECURE_SET_EXE_FILE_LOCKED     7  /* make bit-6 immutable */
+
+#define SECBIT_SET_EXE_FILE       (issecure_mask(SECURE_SET_EXE_FILE))
+#define SECBIT_SET_EXE_FILE_LOCKED (issecure_mask(SECURE_SET_EXE_FILE_LOCKED))
+
 #define SECURE_ALL_BITS                (issecure_mask(SECURE_NOROOT) | \
                                 issecure_mask(SECURE_NO_SETUID_FIXUP) | \
-                                issecure_mask(SECURE_KEEP_CAPS))
+                                issecure_mask(SECURE_KEEP_CAPS) | \
+                                issecure_mask(SECURE_SET_EXE_FILE))
 #define SECURE_ALL_LOCKS       (SECURE_ALL_BITS << 1)
 
 #endif /* _UAPI_LINUX_SECUREBITS_H */
diff --git a/kernel/sys.c b/kernel/sys.c
index 939370c..2f0925d 100644
--- a/kernel/sys.c
+++ b/kernel/sys.c
@@ -18,6 +18,7 @@
 #include <linux/kernel.h>
 #include <linux/workqueue.h>
 #include <linux/capability.h>
+#include <linux/securebits.h>
 #include <linux/device.h>
 #include <linux/key.h>
 #include <linux/times.h>
@@ -1714,6 +1715,10 @@ static int prctl_set_mm(int opt, unsigned long addr,
                        if (rlimit(RLIMIT_STACK) < RLIM_INFINITY)
                                return -EPERM;
                        break;
+               case PR_SET_MM_EXE_FILE:
+                       if (!issecure(SECURE_SET_EXE_FILE))
+                               return -EPERM;
+                       break;
                default:
                        return -EPERM;
                }
diff --git a/kernel/user_namespace.c b/kernel/user_namespace.c
index 240fb62..59584fe 100644
--- a/kernel/user_namespace.c
+++ b/kernel/user_namespace.c
@@ -34,7 +34,8 @@ static void set_cred_user_ns(struct cred *cred, struct 
user_namespace *user_ns)
        /* Start with the same capabilities as init but useless for doing
         * anything as the capabilities are bound to the new user namespace.
         */
-       cred->securebits = SECUREBITS_DEFAULT;
+       cred->securebits = SECUREBITS_DEFAULT |
+                               (cred->securebits & SECBIT_SET_EXE_FILE);
        cred->cap_inheritable = CAP_EMPTY_SET;
        cred->cap_permitted = CAP_FULL_SET;
        cred->cap_effective = CAP_FULL_SET;
diff --git a/security/commoncap.c b/security/commoncap.c
index b9d613e..eda1eb8 100644
--- a/security/commoncap.c
+++ b/security/commoncap.c
@@ -907,6 +907,13 @@ int cap_task_prctl(int option, unsigned long arg2, 
unsigned long arg3,
                    )
                        /* cannot change a locked bit */
                        goto error;
+
+               /* Setting SECURE_SET_EXE_FILE requires CAP_SYS_RESOURCE */
+               if ((arg2 & SECBIT_SET_EXE_FILE) &&
+                   !(new->securebits & SECBIT_SET_EXE_FILE) &&
+                   !capable(CAP_SYS_RESOURCE))
+                       goto error;
+
                new->securebits = arg2;
                goto changed;
 
-- 
1.8.5.3

--
To unsubscribe from this list: send the line "unsubscribe linux-kernel" in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html
Please read the FAQ at  http://www.tux.org/lkml/

Reply via email to