On Tue, Jan 29, 2019 at 03:26:26PM +0200, Joel Nider wrote:
> Add a new handler for new uverb reg_remote_mr. The purpose is to register
> a memory region in a different address space (i.e. process) than the
> caller.
> 
> The main use case which motivated this change is post-copy container
> migration. When a migration manager (i.e. CRIU) starts a migration, it
> must have an open connection for handling any page faults that occur
> in the container after restoration on the target machine. Even though
> CRIU establishes and maintains the connection, ultimately the memory
> is copied from the container being migrated (i.e. a remote address
> space). This container must remain passive -- meaning it cannot have
> any knowledge of the RDMA connection; therefore the migration manager
> must have the ability to register a remote memory region. This remote
> memory region will serve as the source for any memory pages that must
> be copied (on-demand or otherwise) during the migration.
> 
> Signed-off-by: Joel Nider <jo...@il.ibm.com>
>  drivers/infiniband/core/uverbs_std_types_mr.c | 129 
> +++++++++++++++++++++++++-
>  include/rdma/ib_verbs.h                       |   8 ++
>  include/uapi/rdma/ib_user_ioctl_cmds.h        |  13 +++
>  3 files changed, 149 insertions(+), 1 deletion(-)
> 
> diff --git a/drivers/infiniband/core/uverbs_std_types_mr.c 
> b/drivers/infiniband/core/uverbs_std_types_mr.c
> index 4d4be0c..bf7b4b2 100644
> +++ b/drivers/infiniband/core/uverbs_std_types_mr.c
> @@ -150,6 +150,99 @@ static int UVERBS_HANDLER(UVERBS_METHOD_DM_MR_REG)(
>       return ret;
>  }
>  
> +static int UVERBS_HANDLER(UVERBS_METHOD_REG_REMOTE_MR)(
> +     struct uverbs_attr_bundle *attrs)
> +{

I think this should just be REG_MR with an optional remote PID
argument

> +     struct pid *owner_pid;
> +     struct ib_reg_remote_mr_attr attr = {};
> +     struct ib_uobject *uobj =
> +             uverbs_attr_get_uobject(attrs,
> +                                     UVERBS_ATTR_REG_REMOTE_MR_HANDLE);
> +     struct ib_pd *pd =
> +             uverbs_attr_get_obj(attrs, UVERBS_ATTR_REG_REMOTE_MR_PD_HANDLE);
> +
> +     struct ib_mr *mr;
> +     int ret;
> +
> +     ret = uverbs_copy_from(&attr.start, attrs,
> +                             UVERBS_ATTR_REG_REMOTE_MR_START);
> +     if (ret)
> +             return ret;
> +
> +     ret = uverbs_copy_from(&attr.length, attrs,
> +                             UVERBS_ATTR_REG_REMOTE_MR_LENGTH);
> +     if (ret)
> +             return ret;
> +
> +     ret = uverbs_copy_from(&attr.hca_va, attrs,
> +                             UVERBS_ATTR_REG_REMOTE_MR_HCA_VA);
> +     if (ret)
> +             return ret;
> +
> +     ret = uverbs_copy_from(&attr.owner, attrs,
> +                             UVERBS_ATTR_REG_REMOTE_MR_OWNER);
> +     if (ret)
> +             return ret;

Maybe these should use the const version, it is becoming intended for
small integers, then we can do sensible things like use uintptr_t to
store pointer values, and size_t to store sizes - the code will
automatically bounds check the user input if it is done like this.

> +     ret = uverbs_get_flags32(&attr.access_flags, attrs,
> +                              UVERBS_ATTR_REG_REMOTE_MR_ACCESS_FLAGS,
> +                              IB_ACCESS_SUPPORTED);
> +     if (ret)
> +             return ret;
> +
> +     /* ensure the offsets are identical */
> +     if ((attr.start & ~PAGE_MASK) != (attr.hca_va & ~PAGE_MASK))
> +             return -EINVAL;
> +
> +     ret = ib_check_mr_access(attr.access_flags);
> +     if (ret)
> +             return ret;
> +
> +     if (attr.access_flags & IB_ACCESS_ON_DEMAND) {
> +             if (!(pd->device->attrs.device_cap_flags &
> +                   IB_DEVICE_ON_DEMAND_PAGING)) {
> +                     pr_debug("ODP support not available\n");
> +                     ret = -EINVAL;
> +                     return ret;
> +             }
> +     }
> +
> +     /* get the owner's pid struct before something happens to it */
> +     owner_pid = find_get_pid(attr.owner);

security? Match what ptrace does?

> +     mr = pd->device->ops.reg_user_mr(pd, attr.start, attr.length,
> +             attr.hca_va, attr.access_flags, owner_pid, NULL);
> +     if (IS_ERR(mr))
> +             return PTR_ERR(mr);
> +
> +     mr->device  = pd->device;
> +     mr->pd      = pd;
> +     mr->dm      = NULL;
> +     mr->uobject = uobj;
> +     atomic_inc(&pd->usecnt);
> +     mr->res.type = RDMA_RESTRACK_MR;
> +     mr->res.task = get_pid_task(owner_pid, PIDTYPE_PID);
> +     rdma_restrack_kadd(&mr->res);
> +
> +     uobj->object = mr;
> +
> +     ret = uverbs_copy_to(attrs, UVERBS_ATTR_REG_REMOTE_MR_RESP_LKEY,
> +                &mr->lkey, sizeof(mr->lkey));
> +     if (ret)
> +             goto err_dereg;
> +
> +     ret = uverbs_copy_to(attrs, UVERBS_ATTR_REG_REMOTE_MR_RESP_RKEY,
> +                     &mr->rkey, sizeof(mr->rkey));
> +     if (ret)
> +             goto err_dereg;
> +
> +     return 0;
> +
> +err_dereg:
> +     ib_dereg_mr(mr);
> +
> +     return ret;
> +}
> +
>  DECLARE_UVERBS_NAMED_METHOD(
>       UVERBS_METHOD_ADVISE_MR,
>       UVERBS_ATTR_IDR(UVERBS_ATTR_ADVISE_MR_PD_HANDLE,
> @@ -203,12 +296,46 @@ DECLARE_UVERBS_NAMED_METHOD_DESTROY(
>                       UVERBS_ACCESS_DESTROY,
>                       UA_MANDATORY));
>  
> +DECLARE_UVERBS_NAMED_METHOD(
> +     UVERBS_METHOD_REG_REMOTE_MR,
> +     UVERBS_ATTR_IDR(UVERBS_ATTR_REG_REMOTE_MR_HANDLE,
> +                     UVERBS_OBJECT_MR,
> +                     UVERBS_ACCESS_NEW,
> +                     UA_MANDATORY),
> +     UVERBS_ATTR_IDR(UVERBS_ATTR_REG_REMOTE_MR_PD_HANDLE,
> +                     UVERBS_OBJECT_PD,
> +                     UVERBS_ACCESS_READ,
> +                     UA_MANDATORY),
> +     UVERBS_ATTR_PTR_IN(UVERBS_ATTR_REG_REMOTE_MR_START,
> +                        UVERBS_ATTR_TYPE(u64),
> +                        UA_MANDATORY),
> +     UVERBS_ATTR_PTR_IN(UVERBS_ATTR_REG_REMOTE_MR_LENGTH,
> +                        UVERBS_ATTR_TYPE(u64),
> +                        UA_MANDATORY),
> +     UVERBS_ATTR_PTR_IN(UVERBS_ATTR_REG_REMOTE_MR_HCA_VA,
> +                        UVERBS_ATTR_TYPE(u64),
> +                        UA_MANDATORY),
> +     UVERBS_ATTR_FLAGS_IN(UVERBS_ATTR_REG_REMOTE_MR_ACCESS_FLAGS,
> +                          enum ib_access_flags),
> +     UVERBS_ATTR_PTR_IN(UVERBS_ATTR_REG_REMOTE_MR_OWNER,
> +                        UVERBS_ATTR_TYPE(u32),
> +                        UA_MANDATORY),
> +     UVERBS_ATTR_PTR_OUT(UVERBS_ATTR_REG_REMOTE_MR_RESP_LKEY,
> +                         UVERBS_ATTR_TYPE(u32),
> +                         UA_MANDATORY),
> +     UVERBS_ATTR_PTR_OUT(UVERBS_ATTR_REG_REMOTE_MR_RESP_RKEY,
> +                         UVERBS_ATTR_TYPE(u32),
> +                         UA_MANDATORY),
> +);
> +
>  DECLARE_UVERBS_NAMED_OBJECT(
>       UVERBS_OBJECT_MR,
>       UVERBS_TYPE_ALLOC_IDR(uverbs_free_mr),
>       &UVERBS_METHOD(UVERBS_METHOD_DM_MR_REG),
>       &UVERBS_METHOD(UVERBS_METHOD_MR_DESTROY),
> -     &UVERBS_METHOD(UVERBS_METHOD_ADVISE_MR));
> +     &UVERBS_METHOD(UVERBS_METHOD_ADVISE_MR),
> +     &UVERBS_METHOD(UVERBS_METHOD_REG_REMOTE_MR),
> +);

I'm kind of surprised this compiles with the trailing comma?

>  const struct uapi_definition uverbs_def_obj_mr[] = {
>       UAPI_DEF_CHAIN_OBJ_TREE_NAMED(UVERBS_OBJECT_MR,
> diff --git a/include/rdma/ib_verbs.h b/include/rdma/ib_verbs.h
> index 3432404..dcf5edc 100644
> +++ b/include/rdma/ib_verbs.h
> @@ -334,6 +334,14 @@ struct ib_dm_alloc_attr {
>       u32     flags;
>  };
>  
> +struct ib_reg_remote_mr_attr {
> +     u64      start;
> +     u64      length;
> +     u64      hca_va;
> +     u32      access_flags;
> +     u32      owner;
> +};

Why? Why here?

Jason

Reply via email to