On Wed, Jun 10, 2015 at 4:01 PM, Hong Zhiguo <[email protected]> wrote:
> in current implementaion init_net is always used.
>
> But in most cases, if user do a rbd map or ceph mount in
> a container, it's expected to use the container network namespace.
>
> This patch saves the container's netns in ceph_options on a rbd map
> or ceph mount. And use the netns other than init_net when creating
> socket. Ref count of the netns is only taken by the ceph_options
> in ceph_client since lifetime of osds and mon is within that of
> ceph_client.
>
> I've tested this patch in docker container with below operations:
> - rbd map
> - write/read on the rbd
> - rbd unmap
>
> Signed-off-by: Hong Zhiguo <[email protected]>
> ---
> fs/ceph/mds_client.c | 3 ++-
> include/linux/ceph/libceph.h | 3 +++
> include/linux/ceph/messenger.h | 4 +++-
> net/ceph/ceph_common.c | 7 ++++---
> net/ceph/messenger.c | 8 +++++++-
> net/ceph/mon_client.c | 2 +-
> net/ceph/osd_client.c | 3 ++-
> 7 files changed, 22 insertions(+), 8 deletions(-)
>
> diff --git a/fs/ceph/mds_client.c b/fs/ceph/mds_client.c
> index 8080d48..3fb0976 100644
> --- a/fs/ceph/mds_client.c
> +++ b/fs/ceph/mds_client.c
> @@ -440,7 +440,8 @@ static struct ceph_mds_session *register_session(struct
> ceph_mds_client *mdsc,
> s->s_seq = 0;
> mutex_init(&s->s_mutex);
>
> - ceph_con_init(&s->s_con, s, &mds_con_ops, &mdsc->fsc->client->msgr);
> + ceph_con_init(&s->s_con, s, &mds_con_ops, &mdsc->fsc->client->msgr,
> + mdsc->fsc->client->options->netns);
>
> spin_lock_init(&s->s_gen_ttl_lock);
> s->s_cap_gen = 0;
> diff --git a/include/linux/ceph/libceph.h b/include/linux/ceph/libceph.h
> index d73a569..442d9f3 100644
> --- a/include/linux/ceph/libceph.h
> +++ b/include/linux/ceph/libceph.h
> @@ -22,6 +22,8 @@
> #include <linux/ceph/osd_client.h>
> #include <linux/ceph/ceph_fs.h>
>
> +struct net;
> +
> /*
> * mount options
> */
> @@ -46,6 +48,7 @@ struct ceph_options {
> unsigned long mount_timeout; /* jiffies */
> unsigned long osd_idle_ttl; /* jiffies */
> unsigned long osd_keepalive_timeout; /* jiffies */
> + struct net *netns;
>
> /*
> * any type that can't be simply compared or doesn't need need
> diff --git a/include/linux/ceph/messenger.h b/include/linux/ceph/messenger.h
> index e154994..3b0a314 100644
> --- a/include/linux/ceph/messenger.h
> +++ b/include/linux/ceph/messenger.h
> @@ -14,6 +14,7 @@
>
> struct ceph_msg;
> struct ceph_connection;
> +struct net;
>
> /*
> * Ceph defines these callbacks for handling connection events.
> @@ -189,6 +190,7 @@ struct ceph_connection {
> struct ceph_messenger *msgr;
>
> atomic_t sock_state;
> + struct net *netns;
> struct socket *sock;
> struct ceph_entity_addr peer_addr; /* peer address */
> struct ceph_entity_addr peer_addr_for_me;
> @@ -270,7 +272,7 @@ extern void ceph_messenger_init(struct ceph_messenger
> *msgr,
>
> extern void ceph_con_init(struct ceph_connection *con, void *private,
> const struct ceph_connection_operations *ops,
> - struct ceph_messenger *msgr);
> + struct ceph_messenger *msgr, struct net *netns);
> extern void ceph_con_open(struct ceph_connection *con,
> __u8 entity_type, __u64 entity_num,
> struct ceph_entity_addr *addr);
> diff --git a/net/ceph/ceph_common.c b/net/ceph/ceph_common.c
> index 925d0c8..1c42d96 100644
> --- a/net/ceph/ceph_common.c
> +++ b/net/ceph/ceph_common.c
> @@ -269,6 +269,9 @@ static match_table_t opt_tokens = {
> void ceph_destroy_options(struct ceph_options *opt)
> {
> dout("destroy_options %p\n", opt);
> + if (opt->netns) {
> + put_net(opt->netns);
> + }
> kfree(opt->name);
> if (opt->key) {
> ceph_crypto_key_destroy(opt->key);
> @@ -335,9 +338,6 @@ ceph_parse_options(char *options, const char *dev_name,
> int err = -ENOMEM;
> substring_t argstr[MAX_OPT_ARGS];
>
> - if (current->nsproxy->net_ns != &init_net)
> - return ERR_PTR(-EINVAL);
> -
> opt = kzalloc(sizeof(*opt), GFP_KERNEL);
> if (!opt)
> return ERR_PTR(-ENOMEM);
> @@ -501,6 +501,7 @@ ceph_parse_options(char *options, const char *dev_name,
> }
>
> /* success */
> + opt->netns = get_net(current->nsproxy->net_ns);
> return opt;
>
> out:
> diff --git a/net/ceph/messenger.c b/net/ceph/messenger.c
> index 967080a..43ec07d 100644
> --- a/net/ceph/messenger.c
> +++ b/net/ceph/messenger.c
> @@ -736,7 +736,7 @@ bool ceph_con_opened(struct ceph_connection *con)
> */
> void ceph_con_init(struct ceph_connection *con, void *private,
> const struct ceph_connection_operations *ops,
> - struct ceph_messenger *msgr)
> + struct ceph_messenger *msgr, struct net *netns)
> {
> dout("con_init %p\n", con);
> memset(con, 0, sizeof(*con));
> @@ -744,6 +744,12 @@ void ceph_con_init(struct ceph_connection *con, void
> *private,
> con->ops = ops;
> con->msgr = msgr;
>
> + /*
> + * don't take extra refcnt of netns here since both mon and osds
> + * have lifetime within that of ceph_client
> + */
> + con->netns = netns;
> +
> con_sock_state_init(con);
>
> mutex_init(&con->mutex);
> diff --git a/net/ceph/mon_client.c b/net/ceph/mon_client.c
> index 9d6ff12..04128af 100644
> --- a/net/ceph/mon_client.c
> +++ b/net/ceph/mon_client.c
> @@ -832,7 +832,7 @@ int ceph_monc_init(struct ceph_mon_client *monc, struct
> ceph_client *cl)
> goto out_auth_reply;
>
> ceph_con_init(&monc->con, monc, &mon_con_ops,
> - &monc->client->msgr);
> + &monc->client->msgr, monc->client->options->netns);
>
> monc->cur_mon = -1;
> monc->hunting = true;
> diff --git a/net/ceph/osd_client.c b/net/ceph/osd_client.c
> index 5003367..32d9fa9 100644
> --- a/net/ceph/osd_client.c
> +++ b/net/ceph/osd_client.c
> @@ -1022,7 +1022,8 @@ static struct ceph_osd *create_osd(struct
> ceph_osd_client *osdc, int onum)
> INIT_LIST_HEAD(&osd->o_osd_lru);
> osd->o_incarnation = 1;
>
> - ceph_con_init(&osd->o_con, osd, &osd_con_ops, &osdc->client->msgr);
> + ceph_con_init(&osd->o_con, osd, &osd_con_ops, &osdc->client->msgr,
> + osdc->client->options->netns);
>
> INIT_LIST_HEAD(&osd->o_keepalive_item);
> return osd;
It seems to me your patch boils down to killing the init_ns check and
adding a netns field to struct ceph_connection, which is assigned to
but never used. Given that, can you elaborate on the "And use the
netns other than init_net when creating socket" part and explain in
a little bit more detail what is accomplished here?
Thanks,
Ilya
--
To unsubscribe from this list: send the line "unsubscribe ceph-devel" in
the body of a message to [email protected]
More majordomo info at http://vger.kernel.org/majordomo-info.html