Hi,

On Thu, Jul 02, 2015 at 10:28:33AM +0200, David Herrmann wrote:
> Whenever we send messages to a target connection, all we know about the
> target is the 'struct file' associated with the kdbus connection. Hence,
> we cannot know which namespaces a receiving process will be in when it
> calls KDBUS_CMD_RECV on the message. So far, we pinned all metadata we
> wanna send and translate it on RECV-time, since we then know the exact
> namespaces to translate into.
> 
> This has several drawbacks:
>  - Depending on the process calling RECV, the behavior is different (as
>    multiple processes might be in different namespaces but share the same
>    fd). This is unwanted behavior, as described by Eric here:
>        http://www.spinics.net/lists/netdev/msg329322.html
>  - We need to pin metadata with a message instead of translating it right
>    away.
>  - We cannot prep a message at SEND time as we don't know the size of the
>    translated metadata. Hence, we need to do all that at RECV time.
> 
> This patch changes the namespace behavior. Instead of using the namespaces
> at RECV time, we now pin the namespaces at HELLO (i.e., open()). So
> regardless who calls RECV on this file-descriptor, the same namespaces
> will be used.
> This gives us the advantage that we now always know the target namespaces
> for a message. Hence, we can now properly prep a message at SEND time and
> never have to carry any metadata pins around.
> 
> Signed-off-by: David Herrmann <[email protected]>
> ---
>  ipc/kdbus/bus.c        |  2 +-
>  ipc/kdbus/connection.c |  8 +++++++-
>  ipc/kdbus/connection.h |  6 ++++++
>  ipc/kdbus/metadata.c   | 54 
> ++++++++++++++++++++++++++------------------------
>  ipc/kdbus/metadata.h   |  1 +
>  ipc/kdbus/queue.c      |  1 +
>  6 files changed, 44 insertions(+), 28 deletions(-)
> 
> diff --git a/ipc/kdbus/bus.c b/ipc/kdbus/bus.c
> index 7d2c336..8fffc2f 100644
> --- a/ipc/kdbus/bus.c
> +++ b/ipc/kdbus/bus.c
> @@ -507,7 +507,7 @@ int kdbus_cmd_bus_creator_info(struct kdbus_conn *conn, 
> void __user *argp)
>               goto exit;
>       }
>  
> -     ret = kdbus_meta_export(bus->creator_meta, NULL, attach_flags,
> +     ret = kdbus_meta_export(bus->creator_meta, NULL, conn, attach_flags,
>                               slice, hdr_size, &meta_size);
>       if (ret < 0)
>               goto exit;
> diff --git a/ipc/kdbus/connection.c b/ipc/kdbus/connection.c
> index e5e9c1e..bf9a8a6 100644
> --- a/ipc/kdbus/connection.c
> +++ b/ipc/kdbus/connection.c
> @@ -130,6 +130,9 @@ static struct kdbus_conn *kdbus_conn_new(struct kdbus_ep 
> *ep, bool privileged,
>       atomic_set(&conn->lost_count, 0);
>       INIT_DELAYED_WORK(&conn->work, kdbus_reply_list_scan_work);
>       conn->cred = get_current_cred();
> +     conn->user_ns = get_user_ns(current_user_ns());
> +     conn->pid_ns = get_pid_ns(task_active_pid_ns(current));
> +     get_fs_root(current->fs, &conn->root_path);
>       init_waitqueue_head(&conn->wait);
>       kdbus_queue_init(&conn->queue);
>       conn->privileged = privileged;
> @@ -271,6 +274,9 @@ static void __kdbus_conn_free(struct kref *kref)
>       kdbus_match_db_free(conn->match_db);
>       kdbus_pool_free(conn->pool);
>       kdbus_ep_unref(conn->ep);
> +     path_put(&conn->root_path);
> +     put_pid_ns(conn->pid_ns);
> +     put_user_ns(conn->user_ns);
>       put_cred(conn->cred);
>       kfree(conn->description);
>       kfree(conn->quota);
> @@ -1792,7 +1798,7 @@ int kdbus_cmd_conn_info(struct kdbus_conn *conn, void 
> __user *argp)
>               goto exit;
>       }
>  
> -     ret = kdbus_meta_export(owner_conn->meta, conn_meta, attach_flags,
> +     ret = kdbus_meta_export(owner_conn->meta, conn_meta, conn, attach_flags,
>                               slice, sizeof(info), &meta_size);
>       if (ret < 0)
>               goto exit;
> diff --git a/ipc/kdbus/connection.h b/ipc/kdbus/connection.h
> index d1ffe90..226f3ff 100644
> --- a/ipc/kdbus/connection.h
> +++ b/ipc/kdbus/connection.h
> @@ -59,6 +59,9 @@ struct kdbus_kmsg;
>   * @pool:            The user's buffer to receive messages
>   * @user:            Owner of the connection
>   * @cred:            The credentials of the connection at creation time
> + * @user_ns:         User namespace at creation time
> + * @pid_ns:          Pid namespace at creation time

Pid -> PID ?

> + * @root_path:               Root path at creation time
>   * @name_count:              Number of owned well-known names
>   * @request_count:   Number of pending requests issued by this
>   *                   connection that are waiting for replies from
> @@ -97,6 +100,9 @@ struct kdbus_conn {
>       struct kdbus_pool *pool;
>       struct kdbus_user *user;
>       const struct cred *cred;
> +     struct user_namespace *user_ns;
> +     struct pid_namespace *pid_ns;
> +     struct path root_path;
>       atomic_t name_count;
>       atomic_t request_count;
>       atomic_t lost_count;
> diff --git a/ipc/kdbus/metadata.c b/ipc/kdbus/metadata.c
> index c36b9cc..79f0e8c 100644
> --- a/ipc/kdbus/metadata.c
> +++ b/ipc/kdbus/metadata.c
> @@ -888,7 +888,8 @@ static int kdbus_meta_push_kvec(struct kvec *kvec,
>  }
>  
>  static void kdbus_meta_export_caps(struct kdbus_meta_caps *out,
> -                                struct kdbus_meta_proc *mp)
> +                                struct kdbus_meta_proc *mp,
> +                                struct user_namespace *user_ns)
>  {
>       struct user_namespace *iter;
>       const struct cred *cred = mp->cred;
> @@ -896,18 +897,18 @@ static void kdbus_meta_export_caps(struct 
> kdbus_meta_caps *out,
>       int i;
>  
>       /*
> -      * This translates the effective capabilities of 'cred' into the current
> -      * user-namespace. If the current user-namespace is a child-namespace of
> +      * This translates the effective capabilities of 'cred' into the given
> +      * user-namespace. If the given user-namespace is a child-namespace of
>        * the user-namespace of 'cred', the mask can be copied verbatim. If
>        * not, the mask is cleared.
>        * There's one exception: If 'cred' is the owner of any user-namespace
> -      * in the path between the current user-namespace and the user-namespace
> +      * in the path between the given user-namespace and the user-namespace
>        * of 'cred', then it has all effective capabilities set. This means,
>        * the user who created a user-namespace always has all effective
>        * capabilities in any child namespaces. Note that this is based on the
>        * uid of the namespace creator, not the task hierarchy.
>        */
> -     for (iter = current_user_ns(); iter; iter = iter->parent) {
> +     for (iter = user_ns; iter; iter = iter->parent) {

Is check `iter' for being not NULL is needed here? I mean, `iter' takes
range from `user_ns' (which is cred->user_ns) to &init_user_ns.

>               if (iter == cred->user_ns) {
>                       parent = true;
>                       break;
> @@ -951,23 +952,22 @@ static void kdbus_meta_export_caps(struct 
> kdbus_meta_caps *out,
>  }
>  
>  /* This is equivalent to from_kuid_munged(), but maps INVALID_UID to itself 
> */
> -static uid_t kdbus_from_kuid_keep(kuid_t uid)
> +static uid_t kdbus_from_kuid_keep(struct user_namespace *ns, kuid_t uid)
>  {
> -     return uid_valid(uid) ?
> -             from_kuid_munged(current_user_ns(), uid) : ((uid_t)-1);
> +     return uid_valid(uid) ? from_kuid_munged(ns, uid) : ((uid_t)-1);
>  }
>  
>  /* This is equivalent to from_kgid_munged(), but maps INVALID_GID to itself 
> */
> -static gid_t kdbus_from_kgid_keep(kgid_t gid)
> +static gid_t kdbus_from_kgid_keep(struct user_namespace *ns, kgid_t gid)
>  {
> -     return gid_valid(gid) ?
> -             from_kgid_munged(current_user_ns(), gid) : ((gid_t)-1);
> +     return gid_valid(gid) ? from_kgid_munged(ns, gid) : ((gid_t)-1);
>  }
>  
>  /**
>   * kdbus_meta_export() - export information from metadata into a slice
>   * @mp:              Process metadata, or NULL
>   * @mc:              Connection metadata, or NULL
> + * @conn:    Target connection to translate metadata into
>   * @mask:    Mask of KDBUS_ATTACH_* flags to export
>   * @slice:   The slice to export to
>   * @offset:  The offset inside @slice to write to
> @@ -983,18 +983,19 @@ static gid_t kdbus_from_kgid_keep(kgid_t gid)
>   * kdbus_meta_export_prepare(); depending on the namespaces in question, it
>   * might use up less than that.
>   *
> - * All information will be translated using the current namespaces.
> + * All information will be translated using the namespaces of @conn.
>   *
>   * Return: 0 on success, negative error number otherwise.
>   */
>  int kdbus_meta_export(struct kdbus_meta_proc *mp,
>                     struct kdbus_meta_conn *mc,
> +                   struct kdbus_conn *conn,
>                     u64 mask,
>                     struct kdbus_pool_slice *slice,
>                     off_t offset,
>                     size_t *real_size)
>  {
> -     struct user_namespace *user_ns = current_user_ns();
> +     struct user_namespace *user_ns = conn->user_ns;
>       struct kdbus_item_header item_hdr[13], *hdr;
>       char *exe_pathname = NULL;
>       struct kdbus_creds creds;
> @@ -1016,23 +1017,23 @@ int kdbus_meta_export(struct kdbus_meta_proc *mp,
>       /* process metadata */
>  
>       if (mp && (mask & KDBUS_ATTACH_CREDS)) {
> -             creds.uid       = kdbus_from_kuid_keep(mp->uid);
> -             creds.euid      = kdbus_from_kuid_keep(mp->euid);
> -             creds.suid      = kdbus_from_kuid_keep(mp->suid);
> -             creds.fsuid     = kdbus_from_kuid_keep(mp->fsuid);
> -             creds.gid       = kdbus_from_kgid_keep(mp->gid);
> -             creds.egid      = kdbus_from_kgid_keep(mp->egid);
> -             creds.sgid      = kdbus_from_kgid_keep(mp->sgid);
> -             creds.fsgid     = kdbus_from_kgid_keep(mp->fsgid);
> +             creds.uid       = kdbus_from_kuid_keep(user_ns, mp->uid);
> +             creds.euid      = kdbus_from_kuid_keep(user_ns, mp->euid);
> +             creds.suid      = kdbus_from_kuid_keep(user_ns, mp->suid);
> +             creds.fsuid     = kdbus_from_kuid_keep(user_ns, mp->fsuid);
> +             creds.gid       = kdbus_from_kgid_keep(user_ns, mp->gid);
> +             creds.egid      = kdbus_from_kgid_keep(user_ns, mp->egid);
> +             creds.sgid      = kdbus_from_kgid_keep(user_ns, mp->sgid);
> +             creds.fsgid     = kdbus_from_kgid_keep(user_ns, mp->fsgid);
>  
>               cnt += kdbus_meta_push_kvec(kvec + cnt, hdr++, KDBUS_ITEM_CREDS,
>                                           &creds, sizeof(creds), &size);
>       }
>  
>       if (mp && (mask & KDBUS_ATTACH_PIDS)) {
> -             pids.pid = pid_vnr(mp->tgid);
> -             pids.tid = pid_vnr(mp->pid);
> -             pids.ppid = pid_vnr(mp->ppid);
> +             pids.pid = pid_nr_ns(mp->tgid, conn->pid_ns);
> +             pids.tid = pid_nr_ns(mp->pid, conn->pid_ns);
> +             pids.ppid = pid_nr_ns(mp->ppid, conn->pid_ns);
>  
>               cnt += kdbus_meta_push_kvec(kvec + cnt, hdr++, KDBUS_ITEM_PIDS,
>                                           &pids, sizeof(pids), &size);
> @@ -1078,7 +1079,8 @@ int kdbus_meta_export(struct kdbus_meta_proc *mp,
>                */
>  
>               get_fs_root(current->fs, &p);
> -             if (path_equal(&p, &mp->root_path)) {
> +             if (path_equal(&p, &mp->root_path) &&
> +                 path_equal(&p, &conn->root_path)) {
>                       exe_page = (void *)__get_free_page(GFP_TEMPORARY);
>                       if (!exe_page) {
>                               path_put(&p);
> @@ -1116,7 +1118,7 @@ int kdbus_meta_export(struct kdbus_meta_proc *mp,
>       if (mp && (mask & KDBUS_ATTACH_CAPS)) {
>               struct kdbus_meta_caps caps = {};
>  
> -             kdbus_meta_export_caps(&caps, mp);
> +             kdbus_meta_export_caps(&caps, mp, user_ns);
>               cnt += kdbus_meta_push_kvec(kvec + cnt, hdr++,
>                                           KDBUS_ITEM_CAPS, &caps,
>                                           sizeof(caps), &size);
> diff --git a/ipc/kdbus/metadata.h b/ipc/kdbus/metadata.h
> index 79b6ac3..2dbbb3d 100644
> --- a/ipc/kdbus/metadata.h
> +++ b/ipc/kdbus/metadata.h
> @@ -46,6 +46,7 @@ int kdbus_meta_export_prepare(struct kdbus_meta_proc *mp,
>                             u64 *mask, size_t *sz);
>  int kdbus_meta_export(struct kdbus_meta_proc *mp,
>                     struct kdbus_meta_conn *mc,
> +                   struct kdbus_conn *conn,
>                     u64 mask,
>                     struct kdbus_pool_slice *slice,
>                     off_t offset, size_t *real_size);
> diff --git a/ipc/kdbus/queue.c b/ipc/kdbus/queue.c
> index 25bb3ad..6650b78 100644
> --- a/ipc/kdbus/queue.c
> +++ b/ipc/kdbus/queue.c
> @@ -479,6 +479,7 @@ int kdbus_queue_entry_install(struct kdbus_queue_entry 
> *entry,
>  
>               ret = kdbus_meta_export(entry->proc_meta,
>                                       entry->conn_meta,
> +                                     conn_dst,
>                                       entry->attach_flags,
>                                       entry->slice,
>                                       entry->meta_offset,
> -- 
> 2.4.5
> 
> --
> To unsubscribe from this list: send the line "unsubscribe linux-kernel" in
> the body of a message to [email protected]
> More majordomo info at  http://vger.kernel.org/majordomo-info.html
> Please read the FAQ at  http://www.tux.org/lkml/
--
To unsubscribe from this list: send the line "unsubscribe linux-kernel" in
the body of a message to [email protected]
More majordomo info at  http://vger.kernel.org/majordomo-info.html
Please read the FAQ at  http://www.tux.org/lkml/

Reply via email to