From: Geliang Tang <[email protected]>

To add MPTCP support in "NVMe over TCP", the host side needs to pass
IPPROTO_MPTCP to sock_create_kern() instead of IPPROTO_TCP to create an
MPTCP socket.

Similar to the target-side nvmet_tcp_proto, this patch defines the
host-side nvme_tcp_proto structure, which contains the protocol of the
socket and a set of function pointers for socket operations. The only
difference is that it defines .set_syncnt instead of .set_reuseaddr.

A TCP-specific version of this structure is defined, and a proto field is
added to nvme_tcp_ctrl. When the transport string is "tcp", it is assigned
to ctrl->proto.

All locations that previously called TCP setsockopt functions are updated
to call the corresponding function pointers in the nvme_tcp_proto
structure. The controller's proto pointer is set during initialization and
remains valid throughout the controller's lifetime.

v2:
 - use 'trtype' instead of '--mptcp' (Hannes)

v3:
 - check mptcp protocol from opts->transport instead of passing a
parameter (Hannes).

v4:
 - check CONFIG_MPTCP.

v5:
 - define nvme_tcp_proto struct.
 - add a pointer to this structure in nvme_tcp_ctrl.

Cc: Hannes Reinecke <[email protected]>
Cc: John Meneghini <[email protected]>
Cc: Randy Jennings <[email protected]>
Cc: Nilay Shroff <[email protected]>
Co-developed-by: zhenwei pi <[email protected]>
Signed-off-by: zhenwei pi <[email protected]>
Co-developed-by: Hui Zhu <[email protected]>
Signed-off-by: Hui Zhu <[email protected]>
Co-developed-by: Gang Yan <[email protected]>
Signed-off-by: Gang Yan <[email protected]>
Signed-off-by: Geliang Tang <[email protected]>
---
 drivers/nvme/host/tcp.c | 44 ++++++++++++++++++++++++++++++++++-------
 1 file changed, 37 insertions(+), 7 deletions(-)

diff --git a/drivers/nvme/host/tcp.c b/drivers/nvme/host/tcp.c
index 15d36d6a728e..13a5240623ef 100644
--- a/drivers/nvme/host/tcp.c
+++ b/drivers/nvme/host/tcp.c
@@ -182,6 +182,16 @@ struct nvme_tcp_queue {
        void (*write_space)(struct sock *);
 };
 
+struct nvme_tcp_proto {
+       int                     protocol;
+       int (*set_syncnt)(struct sock *sk, int val);
+       void (*set_nodelay)(struct sock *sk);
+       void (*no_linger)(struct sock *sk);
+       void (*set_priority)(struct sock *sk, u32 priority);
+       void (*set_tos)(struct sock *sk, int val);
+       const struct nvme_ctrl_ops *ops;
+};
+
 struct nvme_tcp_ctrl {
        /* read only in the hot path */
        struct nvme_tcp_queue   *queues;
@@ -198,6 +208,8 @@ struct nvme_tcp_ctrl {
        struct delayed_work     connect_work;
        struct nvme_tcp_request async_req;
        u32                     io_queues[HCTX_MAX_TYPES];
+
+       const struct nvme_tcp_proto *proto;
 };
 
 static LIST_HEAD(nvme_tcp_ctrl_list);
@@ -1799,7 +1811,7 @@ static int nvme_tcp_alloc_queue(struct nvme_ctrl *nctrl, 
int qid,
 
        ret = sock_create_kern(current->nsproxy->net_ns,
                        ctrl->addr.ss_family, SOCK_STREAM,
-                       IPPROTO_TCP, &queue->sock);
+                       ctrl->proto->protocol, &queue->sock);
        if (ret) {
                dev_err(nctrl->device,
                        "failed to create socket: %d\n", ret);
@@ -1816,24 +1828,24 @@ static int nvme_tcp_alloc_queue(struct nvme_ctrl 
*nctrl, int qid,
        nvme_tcp_reclassify_socket(queue->sock);
 
        /* Single syn retry */
-       tcp_sock_set_syncnt(queue->sock->sk, 1);
+       ctrl->proto->set_syncnt(queue->sock->sk, 1);
 
        /* Set TCP no delay */
-       tcp_sock_set_nodelay(queue->sock->sk);
+       ctrl->proto->set_nodelay(queue->sock->sk);
 
        /*
         * Cleanup whatever is sitting in the TCP transmit queue on socket
         * close. This is done to prevent stale data from being sent should
         * the network connection be restored before TCP times out.
         */
-       sock_no_linger(queue->sock->sk);
+       ctrl->proto->no_linger(queue->sock->sk);
 
        if (so_priority > 0)
-               sock_set_priority(queue->sock->sk, so_priority);
+               ctrl->proto->set_priority(queue->sock->sk, so_priority);
 
        /* Set socket type of service */
        if (nctrl->opts->tos >= 0)
-               ip_sock_set_tos(queue->sock->sk, nctrl->opts->tos);
+               ctrl->proto->set_tos(queue->sock->sk, nctrl->opts->tos);
 
        /* Set 10 seconds timeout for icresp recvmsg */
        queue->sock->sk->sk_rcvtimeo = 10 * HZ;
@@ -2900,6 +2912,17 @@ nvme_tcp_existing_controller(struct nvmf_ctrl_options 
*opts)
        return found;
 }
 
+static const struct nvme_tcp_proto nvme_tcp_proto = {
+       .protocol       = IPPROTO_TCP,
+       .set_syncnt     = tcp_sock_set_syncnt,
+       .set_nodelay    = tcp_sock_set_nodelay,
+       .no_linger      = sock_no_linger,
+       .set_priority   = sock_set_priority,
+       .set_tos        = ip_sock_set_tos,
+       .ops            = &nvme_tcp_ctrl_ops,
+
+};
+
 static struct nvme_tcp_ctrl *nvme_tcp_alloc_ctrl(struct device *dev,
                struct nvmf_ctrl_options *opts)
 {
@@ -2964,13 +2987,20 @@ static struct nvme_tcp_ctrl *nvme_tcp_alloc_ctrl(struct 
device *dev,
                goto out_free_ctrl;
        }
 
+       if (!strcmp(ctrl->ctrl.opts->transport, "tcp")) {
+               ctrl->proto = &nvme_tcp_proto;
+       } else {
+               ret = -EINVAL;
+               goto out_free_ctrl;
+       }
+
        ctrl->queues = kzalloc_objs(*ctrl->queues, ctrl->ctrl.queue_count);
        if (!ctrl->queues) {
                ret = -ENOMEM;
                goto out_free_ctrl;
        }
 
-       ret = nvme_init_ctrl(&ctrl->ctrl, dev, &nvme_tcp_ctrl_ops, 0);
+       ret = nvme_init_ctrl(&ctrl->ctrl, dev, ctrl->proto->ops, 0);
        if (ret)
                goto out_kfree_queues;
 
-- 
2.53.0


Reply via email to