From: Geliang Tang <[email protected]> An MPTCP-specific version of struct nvme_tcp_proto is implemented, and it is assigned to ctrl->proto when the transport string is "mptcp".
The socket option setting logic is similar to the target side, except that mptcp_sock_set_syncnt is newly defined for the host side. These helpers set the values on all existing subflows of an MPTCP connection, except for set_reuseaddr which only applies to the first subflow. The values are then synchronized to other newly created subflows in sync_socket_options(). A separate nvme_mptcp_ctrl_ops structure with .name = "mptcp" is defined and used for MPTCP controllers. "mptcp" is planned to be introduced as a new NVMe transport type into the NVMe Base Specification in the future. Currently, the Discovery Log does not yet recognize trtype=4 (MPTCP), and will show "trtype: unrecognized" for such entries: =====Discovery Log Entry 0====== trtype: unrecognized adrfam: ipv4 subtype: current discovery subsystem treq: not specified, sq flow control disable supported portid: 23106 trsvcid: 23601 subnqn: nqn.2014-08.org.nvmexpress.discovery traddr: 10.1.1.1 eflags: none Cc: Hannes Reinecke <[email protected]> Cc: John Meneghini <[email protected]> Cc: Randy Jennings <[email protected]> Cc: Nilay Shroff <[email protected]> Co-developed-by: zhenwei pi <[email protected]> Signed-off-by: zhenwei pi <[email protected]> Co-developed-by: Hui Zhu <[email protected]> Signed-off-by: Hui Zhu <[email protected]> Co-developed-by: Gang Yan <[email protected]> Signed-off-by: Gang Yan <[email protected]> Signed-off-by: Geliang Tang <[email protected]> --- drivers/nvme/host/tcp.c | 34 ++++++++++++++++++++++++++++++++++ include/net/mptcp.h | 11 +++++++++++ net/mptcp/sockopt.c | 30 +++++++++++++++++++++++++++++- 3 files changed, 74 insertions(+), 1 deletion(-) diff --git a/drivers/nvme/host/tcp.c b/drivers/nvme/host/tcp.c index 305624d59c50..2388a8c443cc 100644 --- a/drivers/nvme/host/tcp.c +++ b/drivers/nvme/host/tcp.c @@ -2895,6 +2895,24 @@ static const struct nvme_ctrl_ops nvme_tcp_ctrl_ops = { .get_virt_boundary = nvmf_get_virt_boundary, }; +#ifdef CONFIG_MPTCP +static const struct nvme_ctrl_ops nvme_mptcp_ctrl_ops = { + .name = "mptcp", + .module = THIS_MODULE, + .flags = NVME_F_FABRICS | NVME_F_BLOCKING, + .reg_read32 = nvmf_reg_read32, + .reg_read64 = nvmf_reg_read64, + .reg_write32 = nvmf_reg_write32, + .subsystem_reset = nvmf_subsystem_reset, + .free_ctrl = nvme_tcp_free_ctrl, + .submit_async_event = nvme_tcp_submit_async_event, + .delete_ctrl = nvme_tcp_delete_ctrl, + .get_address = nvme_tcp_get_address, + .stop_ctrl = nvme_tcp_stop_ctrl, + .get_virt_boundary = nvmf_get_virt_boundary, +}; +#endif + static bool nvme_tcp_existing_controller(struct nvmf_ctrl_options *opts) { @@ -2923,6 +2941,18 @@ static const struct nvme_tcp_proto nvme_tcp_proto = { }; +#ifdef CONFIG_MPTCP +static const struct nvme_tcp_proto nvme_mptcp_proto = { + .protocol = IPPROTO_MPTCP, + .set_syncnt = mptcp_sock_set_syncnt, + .set_nodelay = mptcp_sock_set_nodelay, + .no_linger = mptcp_sock_no_linger, + .set_priority = mptcp_sock_set_priority, + .set_tos = __mptcp_sock_set_tos, + .ops = &nvme_mptcp_ctrl_ops, +}; +#endif + static struct nvme_tcp_ctrl *nvme_tcp_alloc_ctrl(struct device *dev, struct nvmf_ctrl_options *opts) { @@ -2989,6 +3019,10 @@ static struct nvme_tcp_ctrl *nvme_tcp_alloc_ctrl(struct device *dev, if (!strcmp(ctrl->ctrl.opts->transport, "tcp")) { ctrl->proto = &nvme_tcp_proto; +#ifdef CONFIG_MPTCP + } else if (!strcmp(ctrl->ctrl.opts->transport, "mptcp")) { + ctrl->proto = &nvme_mptcp_proto; +#endif } else { ret = -EINVAL; goto out_free_ctrl; diff --git a/include/net/mptcp.h b/include/net/mptcp.h index b8ab214a7890..160267e35b13 100644 --- a/include/net/mptcp.h +++ b/include/net/mptcp.h @@ -238,11 +238,15 @@ void mptcp_sock_no_linger(struct sock *sk); void mptcp_sock_set_priority(struct sock *sk, u32 priority); +void __mptcp_sock_set_tos(struct sock *sk, int val); + void mptcp_sock_set_tos(struct sock *sk); void mptcp_sock_set_reuseaddr(struct sock *sk); void mptcp_sock_set_nodelay(struct sock *sk); + +int mptcp_sock_set_syncnt(struct sock *sk, int val); #else static inline void mptcp_init(void) @@ -334,11 +338,18 @@ static inline void mptcp_sock_no_linger(struct sock *sk) { } static inline void mptcp_sock_set_priority(struct sock *sk, u32 priority) { } +static inline void __mptcp_sock_set_tos(struct sock *sk, int val) { } + static inline void mptcp_sock_set_tos(struct sock *sk) { } static inline void mptcp_sock_set_reuseaddr(struct sock *sk) { } static inline void mptcp_sock_set_nodelay(struct sock *sk) { } + +static inline int mptcp_sock_set_syncnt(struct sock *sk, int val) +{ + return 0; +} #endif /* CONFIG_MPTCP */ #if IS_ENABLED(CONFIG_MPTCP_IPV6) diff --git a/net/mptcp/sockopt.c b/net/mptcp/sockopt.c index 0adbbe568f6e..7857dac62afc 100644 --- a/net/mptcp/sockopt.c +++ b/net/mptcp/sockopt.c @@ -1598,6 +1598,8 @@ static void sync_socket_options(struct mptcp_sock *msk, struct sock *ssk) WRITE_ONCE(inet_sk(ssk)->local_port_range, READ_ONCE(inet_sk(sk)->local_port_range)); ssk->sk_reuse = sk->sk_reuse; + if (inet_csk(sk)->icsk_syn_retries > 0) + tcp_sock_set_syncnt(ssk, inet_csk(sk)->icsk_syn_retries); } void mptcp_sockopt_sync_locked(struct mptcp_sock *msk, struct sock *ssk) @@ -1709,7 +1711,7 @@ void mptcp_sock_set_priority(struct sock *sk, u32 priority) } EXPORT_SYMBOL(mptcp_sock_set_priority); -static void __mptcp_sock_set_tos(struct sock *sk, int val) +void __mptcp_sock_set_tos(struct sock *sk, int val) { struct mptcp_sock *msk = mptcp_sk(sk); struct mptcp_subflow_context *subflow; @@ -1728,6 +1730,7 @@ static void __mptcp_sock_set_tos(struct sock *sk, int val) } release_sock(sk); } +EXPORT_SYMBOL(__mptcp_sock_set_tos); void mptcp_sock_set_tos(struct sock *sk) { @@ -1783,3 +1786,28 @@ void mptcp_sock_set_nodelay(struct sock *sk) release_sock(sk); } EXPORT_SYMBOL(mptcp_sock_set_nodelay); + +int mptcp_sock_set_syncnt(struct sock *sk, int val) +{ + struct mptcp_sock *msk = mptcp_sk(sk); + struct mptcp_subflow_context *subflow; + struct sock *ssk; + + if (val < 1 || val > MAX_TCP_SYNCNT) + return -EINVAL; + + lock_sock(sk); + sockopt_seq_inc(msk); + inet_csk(sk)->icsk_syn_retries = val; + mptcp_for_each_subflow(msk, subflow) { + ssk = mptcp_subflow_tcp_sock(subflow); + if (ssk) { + lock_sock_nested(ssk, SINGLE_DEPTH_NESTING); + tcp_sock_set_syncnt(ssk, val); + release_sock(ssk); + } + } + release_sock(sk); + return 0; +} +EXPORT_SYMBOL(mptcp_sock_set_syncnt); -- 2.53.0
