From: Geliang Tang <[email protected]>

An MPTCP-specific version of struct nvme_tcp_proto is implemented,
and it is assigned to ctrl->proto when the transport string is "mptcp".

The socket option setting logic is similar to the target side, except that
mptcp_sock_set_syncnt is newly defined for the host side.

These helpers set the values on all existing subflows of an MPTCP
connection, except for set_reuseaddr which only applies to the first
subflow. The values are then synchronized to other newly created
subflows in sync_socket_options().

A separate nvme_mptcp_ctrl_ops structure with .name = "mptcp" is defined
and used for MPTCP controllers.

"mptcp" is planned to be introduced as a new NVMe transport type into the
NVMe Base Specification in the future.

Currently, the Discovery Log does not yet recognize trtype=4 (MPTCP), and
will show "trtype: unrecognized" for such entries:

 =====Discovery Log Entry 0======
 trtype:  unrecognized
 adrfam:  ipv4
 subtype: current discovery subsystem
 treq:    not specified, sq flow control disable supported
 portid:  23106
 trsvcid: 23601
 subnqn:  nqn.2014-08.org.nvmexpress.discovery
 traddr:  10.1.1.1
 eflags:  none

Cc: Hannes Reinecke <[email protected]>
Cc: John Meneghini <[email protected]>
Cc: Randy Jennings <[email protected]>
Cc: Nilay Shroff <[email protected]>
Co-developed-by: zhenwei pi <[email protected]>
Signed-off-by: zhenwei pi <[email protected]>
Co-developed-by: Hui Zhu <[email protected]>
Signed-off-by: Hui Zhu <[email protected]>
Co-developed-by: Gang Yan <[email protected]>
Signed-off-by: Gang Yan <[email protected]>
Signed-off-by: Geliang Tang <[email protected]>
---
 drivers/nvme/host/tcp.c | 34 ++++++++++++++++++++++++++++++++++
 include/net/mptcp.h     | 11 +++++++++++
 net/mptcp/sockopt.c     | 30 +++++++++++++++++++++++++++++-
 3 files changed, 74 insertions(+), 1 deletion(-)

diff --git a/drivers/nvme/host/tcp.c b/drivers/nvme/host/tcp.c
index 305624d59c50..2388a8c443cc 100644
--- a/drivers/nvme/host/tcp.c
+++ b/drivers/nvme/host/tcp.c
@@ -2895,6 +2895,24 @@ static const struct nvme_ctrl_ops nvme_tcp_ctrl_ops = {
        .get_virt_boundary      = nvmf_get_virt_boundary,
 };
 
+#ifdef CONFIG_MPTCP
+static const struct nvme_ctrl_ops nvme_mptcp_ctrl_ops = {
+       .name                   = "mptcp",
+       .module                 = THIS_MODULE,
+       .flags                  = NVME_F_FABRICS | NVME_F_BLOCKING,
+       .reg_read32             = nvmf_reg_read32,
+       .reg_read64             = nvmf_reg_read64,
+       .reg_write32            = nvmf_reg_write32,
+       .subsystem_reset        = nvmf_subsystem_reset,
+       .free_ctrl              = nvme_tcp_free_ctrl,
+       .submit_async_event     = nvme_tcp_submit_async_event,
+       .delete_ctrl            = nvme_tcp_delete_ctrl,
+       .get_address            = nvme_tcp_get_address,
+       .stop_ctrl              = nvme_tcp_stop_ctrl,
+       .get_virt_boundary      = nvmf_get_virt_boundary,
+};
+#endif
+
 static bool
 nvme_tcp_existing_controller(struct nvmf_ctrl_options *opts)
 {
@@ -2923,6 +2941,18 @@ static const struct nvme_tcp_proto nvme_tcp_proto = {
 
 };
 
+#ifdef CONFIG_MPTCP
+static const struct nvme_tcp_proto nvme_mptcp_proto = {
+       .protocol       = IPPROTO_MPTCP,
+       .set_syncnt     = mptcp_sock_set_syncnt,
+       .set_nodelay    = mptcp_sock_set_nodelay,
+       .no_linger      = mptcp_sock_no_linger,
+       .set_priority   = mptcp_sock_set_priority,
+       .set_tos        = __mptcp_sock_set_tos,
+       .ops            = &nvme_mptcp_ctrl_ops,
+};
+#endif
+
 static struct nvme_tcp_ctrl *nvme_tcp_alloc_ctrl(struct device *dev,
                struct nvmf_ctrl_options *opts)
 {
@@ -2989,6 +3019,10 @@ static struct nvme_tcp_ctrl *nvme_tcp_alloc_ctrl(struct 
device *dev,
 
        if (!strcmp(ctrl->ctrl.opts->transport, "tcp")) {
                ctrl->proto = &nvme_tcp_proto;
+#ifdef CONFIG_MPTCP
+       } else if (!strcmp(ctrl->ctrl.opts->transport, "mptcp")) {
+               ctrl->proto = &nvme_mptcp_proto;
+#endif
        } else {
                ret = -EINVAL;
                goto out_free_ctrl;
diff --git a/include/net/mptcp.h b/include/net/mptcp.h
index b8ab214a7890..160267e35b13 100644
--- a/include/net/mptcp.h
+++ b/include/net/mptcp.h
@@ -238,11 +238,15 @@ void mptcp_sock_no_linger(struct sock *sk);
 
 void mptcp_sock_set_priority(struct sock *sk, u32 priority);
 
+void __mptcp_sock_set_tos(struct sock *sk, int val);
+
 void mptcp_sock_set_tos(struct sock *sk);
 
 void mptcp_sock_set_reuseaddr(struct sock *sk);
 
 void mptcp_sock_set_nodelay(struct sock *sk);
+
+int mptcp_sock_set_syncnt(struct sock *sk, int val);
 #else
 
 static inline void mptcp_init(void)
@@ -334,11 +338,18 @@ static inline void mptcp_sock_no_linger(struct sock *sk) 
{ }
 
 static inline void mptcp_sock_set_priority(struct sock *sk, u32 priority) { }
 
+static inline void __mptcp_sock_set_tos(struct sock *sk, int val) { }
+
 static inline void mptcp_sock_set_tos(struct sock *sk) { }
 
 static inline void mptcp_sock_set_reuseaddr(struct sock *sk) { }
 
 static inline void mptcp_sock_set_nodelay(struct sock *sk) { }
+
+static inline int mptcp_sock_set_syncnt(struct sock *sk, int val)
+{
+       return 0;
+}
 #endif /* CONFIG_MPTCP */
 
 #if IS_ENABLED(CONFIG_MPTCP_IPV6)
diff --git a/net/mptcp/sockopt.c b/net/mptcp/sockopt.c
index 0adbbe568f6e..7857dac62afc 100644
--- a/net/mptcp/sockopt.c
+++ b/net/mptcp/sockopt.c
@@ -1598,6 +1598,8 @@ static void sync_socket_options(struct mptcp_sock *msk, 
struct sock *ssk)
        WRITE_ONCE(inet_sk(ssk)->local_port_range, 
READ_ONCE(inet_sk(sk)->local_port_range));
 
        ssk->sk_reuse = sk->sk_reuse;
+       if (inet_csk(sk)->icsk_syn_retries > 0)
+               tcp_sock_set_syncnt(ssk, inet_csk(sk)->icsk_syn_retries);
 }
 
 void mptcp_sockopt_sync_locked(struct mptcp_sock *msk, struct sock *ssk)
@@ -1709,7 +1711,7 @@ void mptcp_sock_set_priority(struct sock *sk, u32 
priority)
 }
 EXPORT_SYMBOL(mptcp_sock_set_priority);
 
-static void __mptcp_sock_set_tos(struct sock *sk, int val)
+void __mptcp_sock_set_tos(struct sock *sk, int val)
 {
        struct mptcp_sock *msk = mptcp_sk(sk);
        struct mptcp_subflow_context *subflow;
@@ -1728,6 +1730,7 @@ static void __mptcp_sock_set_tos(struct sock *sk, int val)
        }
        release_sock(sk);
 }
+EXPORT_SYMBOL(__mptcp_sock_set_tos);
 
 void mptcp_sock_set_tos(struct sock *sk)
 {
@@ -1783,3 +1786,28 @@ void mptcp_sock_set_nodelay(struct sock *sk)
        release_sock(sk);
 }
 EXPORT_SYMBOL(mptcp_sock_set_nodelay);
+
+int mptcp_sock_set_syncnt(struct sock *sk, int val)
+{
+       struct mptcp_sock *msk = mptcp_sk(sk);
+       struct mptcp_subflow_context *subflow;
+       struct sock *ssk;
+
+       if (val < 1 || val > MAX_TCP_SYNCNT)
+               return -EINVAL;
+
+       lock_sock(sk);
+       sockopt_seq_inc(msk);
+       inet_csk(sk)->icsk_syn_retries = val;
+       mptcp_for_each_subflow(msk, subflow) {
+               ssk = mptcp_subflow_tcp_sock(subflow);
+               if (ssk) {
+                       lock_sock_nested(ssk, SINGLE_DEPTH_NESTING);
+                       tcp_sock_set_syncnt(ssk, val);
+                       release_sock(ssk);
+               }
+       }
+       release_sock(sk);
+       return 0;
+}
+EXPORT_SYMBOL(mptcp_sock_set_syncnt);
-- 
2.53.0


Reply via email to