On Thu, 05 Feb 2015 10:34:11 +0800
Ian Kent <ik...@redhat.com> wrote:

> The call_usermodehelper() function executes all binaries in the
> global "init" root context. This doesn't allow a binary to be run
> within a namespace (eg. the namespace of a container).
> 
> Both containerized NFS client and NFS server need the ability to
> execute a binary in a container's context. To do this use the init
> process of the callers environment is used to setup the namespaces
> in the same way the root init process is used otherwise.
> 
> Signed-off-by: Ian Kent <ik...@redhat.com>
> Cc: Benjamin Coddington <bcodd...@redhat.com>
> Cc: Al Viro <v...@zeniv.linux.org.uk>
> Cc: J. Bruce Fields <bfie...@fieldses.org>
> Cc: David Howells <dhowe...@redhat.com>
> Cc: Trond Myklebust <trond.mykleb...@primarydata.com>
> Cc: Oleg Nesterov <onest...@redhat.com>
> Cc: Eric W. Biederman <ebied...@xmission.com>
> Cc: Jeff Layton <jeff.lay...@primarydata.com>
> ---
>  include/linux/kmod.h |   16 +++++++
>  kernel/kmod.c        |  115 
> +++++++++++++++++++++++++++++++++++++++++++++++++-
>  2 files changed, 128 insertions(+), 3 deletions(-)
> 
> diff --git a/include/linux/kmod.h b/include/linux/kmod.h
> index 15bdeed..b0f1b3c 100644
> --- a/include/linux/kmod.h
> +++ b/include/linux/kmod.h
> @@ -52,6 +52,7 @@ struct file;
>  #define UMH_WAIT_EXEC        1       /* wait for the exec, but not the 
> process */
>  #define UMH_WAIT_PROC        2       /* wait for the process to complete */
>  #define UMH_KILLABLE 4       /* wait for EXEC/PROC killable */
> +#define UMH_USE_NS   8       /* exec using caller's init namespace */
>  
>  struct subprocess_info {
>       struct work_struct work;
> @@ -69,6 +70,21 @@ struct subprocess_info {
>  extern int
>  call_usermodehelper(char *path, char **argv, char **envp, int flags);
>  
> +#if !defined(CONFIG_PROC_FS) || !defined(CONFIG_NAMESPACES)
> +inline struct task_struct *umh_get_init_task(void)
> +{
> +     return ERR_PTR(-ENOTSUP);
> +}
> +
> +inline int umh_enter_ns(struct task_struct *tsk, struct cred *new)
> +{
> +     return -ENOTSUP;
> +}
> +#else
> +struct task_struct *umh_get_init_pid(void);
> +int umh_enter_ns(struct task_struct *tsk, struct cred *new);
> +#endif
> +
>  extern struct subprocess_info *
>  call_usermodehelper_setup(char *path, char **argv, char **envp, gfp_t 
> gfp_mask,
>                         int (*init)(struct subprocess_info *info, struct cred 
> *new),
> diff --git a/kernel/kmod.c b/kernel/kmod.c
> index 14c0188..4c649d6 100644
> --- a/kernel/kmod.c
> +++ b/kernel/kmod.c
> @@ -582,6 +582,98 @@ unlock:
>  }
>  EXPORT_SYMBOL(call_usermodehelper_exec);
>  
> +#if defined(CONFIG_PROC_FS) && defined(CONFIG_NAMESPACES)
> +#define NS_PATH_MAX  35
> +#define NS_PATH_FMT  "%lu/ns/%s"
> +
> +/* Note namespace name order is significant */
> +static const char *ns_names[] = { "user", "ipc", "uts", "net", "pid", "mnt", 
> NULL };
> +
> +struct task_struct *umh_get_init_pid(void)

nit: we're not getting a pid here but a task_struct pointer. Maybe this
should be called umh_get_init_task?

> +{
> +     struct task_struct *tsk;
> +
> +     rcu_read_lock();
> +     tsk = find_task_by_vpid(1);
> +     if (tsk)
> +             get_task_struct(tsk);
> +     rcu_read_unlock();

I'm not terribly familiar with the task_struct lifetime rules...

I assume that you can be assured that tsk won't go away while you hold
the rcu_read_lock, but is doing a get_task_struct while holding it
sufficient to pin it after you drop the lock?

IOW, could the refcount on the task_struct do a 0->1 transition here and
end up being freed anyway after you've grabbed a reference?

> +     if (!tsk)
> +             return ERR_PTR(-ESRCH);
> +
> +     return tsk;
> +}
> +EXPORT_SYMBOL(umh_get_init_pid);
> +
> +int umh_enter_ns(struct task_struct *tsk, struct cred *new)
> +{
> +     char path[NS_PATH_MAX];
> +     struct vfsmount *mnt;
> +     const char *name;
> +     pid_t pid;
> +     int err = 0;
> +
> +     pid = task_pid_nr(tsk);
> +
> +     /*
> +      * The user mode thread runner runs in the root init namespace
> +      * so it will see all system pids.
> +      */
> +     mnt = task_active_pid_ns(current)->proc_mnt;
> +
> +     for (name = ns_names[0]; *name; name++) {
> +             struct file *this;
> +             int len;
> +
> +             len = snprintf(path,
> +                            NS_PATH_MAX, NS_PATH_FMT,
> +                            (unsigned long) pid, name);
> +             if (len >= NS_PATH_MAX) {
> +                     err = -ENAMETOOLONG;
> +                     break;
> +             }
> +
> +             this = file_open_root(mnt->mnt_root, mnt, path, O_RDONLY);
> +             if (unlikely(IS_ERR(this))) {
> +                     err = PTR_ERR(this);
> +                     break;
> +             }
> +
> +             err = setns_inode(file_inode(this), 0);
> +             fput(this);
> +             if (err)
> +                     break;
> +     }
> +
> +     return err;
> +}
> +EXPORT_SYMBOL(umh_enter_ns);
> +
> +static int umh_set_ns(struct subprocess_info *info, struct cred *new)
> +{
> +     struct task_struct *tsk = info->data;
> +
> +     return umh_enter_ns(tsk, new);
> +}
> +
> +static void umh_free_ns(struct subprocess_info *info)
> +{
> +     struct task_struct *tsk = info->data;
> +
> +     if (tsk)
> +             put_task_struct(tsk);
> +}
> +#else
> +static int umh_set_ns(struct subprocess_info *info, struct cred *new)
> +{
> +     return 0;
> +}
> +
> +static void umh_free_ns(struct subprocess_info *info)
> +{
> +}
> +#endif
> +
>  /**
>   * call_usermodehelper() - prepare and start a usermode application
>   * @path: path to usermode executable
> @@ -599,11 +691,28 @@ int call_usermodehelper(char *path, char **argv, char 
> **envp, int flags)
>  {
>       struct subprocess_info *info;
>       gfp_t gfp_mask = (flags == UMH_NO_WAIT) ? GFP_ATOMIC : GFP_KERNEL;
> +     unsigned int use_ns = flags & UMH_USE_NS;
> +     struct task_struct *tsk = NULL;
> +
> +     if (use_ns) {
> +             tsk = umh_get_init_pid();
> +             if (IS_ERR(tsk))
> +                     return PTR_ERR(tsk);
> +     }
>  
> -     info = call_usermodehelper_setup(path, argv, envp, gfp_mask,
> -                                      NULL, NULL, NULL);
> -     if (info == NULL)
> +     if (!tsk)
> +             info = call_usermodehelper_setup(path, argv, envp,
> +                                              gfp_mask, NULL, NULL, NULL);
> +     else {
> +             info = call_usermodehelper_setup(path, argv, envp, gfp_mask,
> +                                              umh_set_ns, umh_free_ns,
> +                                              tsk);
> +     }
> +     if (info == NULL) {
> +             if (tsk)
> +                     put_task_struct(tsk);
>               return -ENOMEM;
> +     }
>  
>       return call_usermodehelper_exec(info, flags);
>  }
> 


-- 
Jeff Layton <jeff.lay...@primarydata.com>
--
To unsubscribe from this list: send the line "unsubscribe linux-kernel" in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html
Please read the FAQ at  http://www.tux.org/lkml/

Reply via email to