On clone3() we first create and switch to user namespace, when ve
namespace owned by this user namespace and switch to it, and only then
create other namespaces, this way we can make sure that all the
Container namespaces created together with VE namespace have
get_exec_env() pointing to new VE namespace.

On unshare() we can't easily enforce it, as this code path requires
creating all namespaces first and only then joins them (basically
namespace change either happens for all requested namespaces or it does
not happen atomically). So while in this patch we reorder it for code
consistency, it does not really work, so we prohibit using unshare()
with CLONE_NEWVE together with non CLONE_NEWUSER namespaces.

Fixes: 8a771a3d6bea ("ve: Introduce VE namespace")
https://virtuozzo.atlassian.net/browse/VSTOR-118289
Signed-off-by: Pavel Tikhomirov <[email protected]>

Feature: ve: ve generic structures
---
v2: new path
---
 kernel/fork.c | 46 +++++++++++++++++++++++++++++-----------------
 1 file changed, 29 insertions(+), 17 deletions(-)

diff --git a/kernel/fork.c b/kernel/fork.c
index 5ce4c2ac91c1..b4e09af18288 100644
--- a/kernel/fork.c
+++ b/kernel/fork.c
@@ -2389,15 +2389,15 @@ __latent_entropy struct task_struct *copy_process(
        retval = copy_mm(clone_flags, p);
        if (retval)
                goto bad_fork_cleanup_signal;
-       retval = copy_namespaces(clone_flags, p);
+       retval = copy_ve_ns(clone_flags, p);
        if (retval)
                goto bad_fork_cleanup_mm;
-       retval = copy_ve_ns(clone_flags, p);
+       retval = copy_namespaces(clone_flags, p);
        if (retval)
-               goto bad_fork_cleanup_namespaces;
+               goto bad_fork_cleanup_ve_namespace;
        retval = copy_io(clone_flags, p);
        if (retval)
-               goto bad_fork_cleanup_ve_namespace;
+               goto bad_fork_cleanup_namespaces;
        retval = copy_thread(p, args);
        if (retval)
                goto bad_fork_cleanup_io;
@@ -2652,10 +2652,10 @@ __latent_entropy struct task_struct *copy_process(
 bad_fork_cleanup_io:
        if (p->io_context)
                exit_io_context(p);
-bad_fork_cleanup_ve_namespace:
-       exit_ve_namespace(p);
 bad_fork_cleanup_namespaces:
        exit_task_namespaces(p);
+bad_fork_cleanup_ve_namespace:
+       exit_ve_namespace(p);
 bad_fork_cleanup_mm:
        if (p->mm) {
                mm_clear_owner(p->mm, p);
@@ -3237,6 +3237,18 @@ static int check_unshare_flags(unsigned long 
unshare_flags)
                        return -EINVAL;
        }
 
+       /*
+        * Unhsare creates all namespaces first and only then switches to them.
+        * So get_exec_env() yet returns previous VE while we are creating
+        * other namespaces. That leads to network and mount namespace
+        * initialized incorrectly, having ->owner_ve links set to previous VE.
+        * To avoid confusion, forbid unshare of new VE together with other
+        * namespaces. CLONE_NEWUSER is allowed as it should own VE namespace,
+        * not vice versa.
+        */
+       if (unshare_flags & CLONE_NEWVE && unshare_flags & ~CLONE_NEWUSER)
+               return -EINVAL;
+
        return 0;
 }
 
@@ -3342,25 +3354,20 @@ int ksys_unshare(unsigned long unshare_flags)
        err = unshare_userns(unshare_flags, &new_cred);
        if (err)
                goto bad_unshare_cleanup_fd;
+       err = unshare_ve_namespace(unshare_flags, &new_ve_ns, new_cred);
+       if (err)
+               goto bad_unshare_cleanup_cred;
        err = unshare_nsproxy_namespaces(unshare_flags, &new_nsproxy,
                                         new_cred, new_fs);
        if (err)
-               goto bad_unshare_cleanup_cred;
-       err = unshare_ve_namespace(unshare_flags, &new_ve_ns, new_cred);
-       if (err) {
-               if (new_nsproxy)
-                       free_nsproxy(new_nsproxy);
-               goto bad_unshare_cleanup_cred;
-       }
+               goto bad_unshare_cleanup_ve_namespace;
 
        if (new_cred) {
                err = set_cred_ucounts(new_cred);
                if (err) {
                        if (new_nsproxy)
                                free_nsproxy(new_nsproxy);
-                       if (new_ve_ns)
-                               free_ve_ns(new_ve_ns);
-                       goto bad_unshare_cleanup_cred;
+                       goto bad_unshare_cleanup_ve_namespace;
                }
        }
 
@@ -3380,8 +3387,10 @@ int ksys_unshare(unsigned long unshare_flags)
                if (new_nsproxy)
                        switch_task_namespaces(current, new_nsproxy);
 
-               if (new_ve_ns)
+               if (new_ve_ns) {
                        switch_ve_namespace(current, new_ve_ns);
+                       new_ve_ns = NULL;
+               }
 
                task_lock(current);
 
@@ -3410,6 +3419,9 @@ int ksys_unshare(unsigned long unshare_flags)
 
        perf_event_namespaces(current);
 
+bad_unshare_cleanup_ve_namespace:
+       if (new_ve_ns)
+               free_ve_ns(new_ve_ns);
 bad_unshare_cleanup_cred:
        if (new_cred)
                put_cred(new_cred);
-- 
2.52.0

_______________________________________________
Devel mailing list
[email protected]
https://lists.openvz.org/mailman/listinfo/devel

Reply via email to