On 2016-08-24 12:08, Jon Maloy wrote: > > > On 08/23/2016 10:41 AM, Richard Alpe wrote: >> This patch introduces UDP replicast. A concept where we emulate >> multicast by sending multiple unicast messages to configured peers. >> >> The purpose of replicast is mainly to be able to use TIPC in cloud >> environments where IP multicast is disabled. Using replicas to unicast >> multicast messages is costly as we have to copy each skb and send the >> copies individually. >> >> Signed-off-by: Richard Alpe <richard.a...@ericsson.com> >> --- >> include/uapi/linux/tipc_netlink.h | 1 + >> net/tipc/bearer.c | 44 +++++++++++++ >> net/tipc/bearer.h | 1 + >> net/tipc/netlink.c | 5 ++ >> net/tipc/udp_media.c | 126 >> ++++++++++++++++++++++++++++++++++---- >> net/tipc/udp_media.h | 44 +++++++++++++ >> 6 files changed, 210 insertions(+), 11 deletions(-) >> create mode 100644 net/tipc/udp_media.h >> >> diff --git a/include/uapi/linux/tipc_netlink.h >> b/include/uapi/linux/tipc_netlink.h >> index bcb65ef..b15664c 100644 >> --- a/include/uapi/linux/tipc_netlink.h >> +++ b/include/uapi/linux/tipc_netlink.h >> @@ -60,6 +60,7 @@ enum { >> TIPC_NL_MON_GET, >> TIPC_NL_MON_PEER_GET, >> TIPC_NL_PEER_REMOVE, >> + TIPC_NL_BEARER_ADD, >> >> __TIPC_NL_CMD_MAX, >> TIPC_NL_CMD_MAX = __TIPC_NL_CMD_MAX - 1 >> diff --git a/net/tipc/bearer.c b/net/tipc/bearer.c >> index 6fc4e3c..b82cb00 100644 >> --- a/net/tipc/bearer.c >> +++ b/net/tipc/bearer.c >> @@ -42,6 +42,7 @@ >> #include "monitor.h" >> #include "bcast.h" >> #include "netlink.h" >> +#include "udp_media.h" >> >> #define MAX_ADDR_STR 60 >> >> @@ -897,6 +898,49 @@ int tipc_nl_bearer_enable(struct sk_buff *skb, struct >> genl_info *info) >> return 0; >> } >> >> +int tipc_nl_bearer_add(struct sk_buff *skb, struct genl_info *info) >> +{ >> + int err; >> + char *name; >> + struct tipc_bearer *b; >> + struct nlattr *attrs[TIPC_NLA_BEARER_MAX + 1]; >> + struct net *net = sock_net(skb->sk); >> + >> + if (!info->attrs[TIPC_NLA_BEARER]) >> + return -EINVAL; >> + >> + err = nla_parse_nested(attrs, TIPC_NLA_BEARER_MAX, >> + info->attrs[TIPC_NLA_BEARER], >> + tipc_nl_bearer_policy); >> + if (err) >> + return err; >> + >> + if (!attrs[TIPC_NLA_BEARER_NAME]) >> + return -EINVAL; >> + name = nla_data(attrs[TIPC_NLA_BEARER_NAME]); >> + >> + rtnl_lock(); >> + b = tipc_bearer_find(net, name); >> + if (!b) { >> + rtnl_unlock(); >> + return -EINVAL; >> + } >> + >> +#ifdef CONFIG_TIPC_MEDIA_UDP >> + if (attrs[TIPC_NLA_BEARER_UDP_OPTS]) { >> + err = tipc_udp_nl_bearer_add(b, >> + attrs[TIPC_NLA_BEARER_UDP_OPTS]); >> + if (err) { >> + rtnl_unlock(); >> + return err; >> + } >> + } >> +#endif >> + rtnl_unlock(); >> + >> + return 0; >> +} >> + >> int tipc_nl_bearer_set(struct sk_buff *skb, struct genl_info *info) >> { >> int err; >> diff --git a/net/tipc/bearer.h b/net/tipc/bearer.h >> index 83a9abb..78892e2f 100644 >> --- a/net/tipc/bearer.h >> +++ b/net/tipc/bearer.h >> @@ -181,6 +181,7 @@ int tipc_nl_bearer_enable(struct sk_buff *skb, struct >> genl_info *info); >> int tipc_nl_bearer_dump(struct sk_buff *skb, struct netlink_callback *cb); >> int tipc_nl_bearer_get(struct sk_buff *skb, struct genl_info *info); >> int tipc_nl_bearer_set(struct sk_buff *skb, struct genl_info *info); >> +int tipc_nl_bearer_add(struct sk_buff *skb, struct genl_info *info); >> >> int tipc_nl_media_dump(struct sk_buff *skb, struct netlink_callback *cb); >> int tipc_nl_media_get(struct sk_buff *skb, struct genl_info *info); >> diff --git a/net/tipc/netlink.c b/net/tipc/netlink.c >> index 2718de6..3122f21 100644 >> --- a/net/tipc/netlink.c >> +++ b/net/tipc/netlink.c >> @@ -161,6 +161,11 @@ static const struct genl_ops tipc_genl_v2_ops[] = { >> .policy = tipc_nl_policy, >> }, >> { >> + .cmd = TIPC_NL_BEARER_ADD, >> + .doit = tipc_nl_bearer_add, >> + .policy = tipc_nl_policy, >> + }, >> + { >> .cmd = TIPC_NL_BEARER_SET, >> .doit = tipc_nl_bearer_set, >> .policy = tipc_nl_policy, >> diff --git a/net/tipc/udp_media.c b/net/tipc/udp_media.c >> index b8ec1a1..d4517f4 100644 >> --- a/net/tipc/udp_media.c >> +++ b/net/tipc/udp_media.c >> @@ -49,6 +49,7 @@ >> #include "core.h" >> #include "bearer.h" >> #include "netlink.h" >> +#include "msg.h" >> >> /* IANA assigned UDP port */ >> #define UDP_PORT_DEFAULT 6118 >> @@ -70,6 +71,13 @@ struct udp_media_addr { >> }; >> }; >> >> +/* struct udp_replicast - container for UDP remote addresses */ >> +struct udp_replicast { >> + struct udp_media_addr addr; >> + struct rcu_head rcu; >> + struct list_head list; >> +}; >> + >> /** >> * struct udp_bearer - ip/udp bearer data structure >> * @bearer: associated generic tipc bearer >> @@ -82,6 +90,7 @@ struct udp_bearer { >> struct socket *ubsock; >> u32 ifindex; >> struct work_struct work; >> + struct udp_replicast rcast; >> }; >> >> static int tipc_udp_is_mcast_addr(struct udp_media_addr *addr) >> @@ -209,23 +218,79 @@ static int tipc_udp_send_msg(struct net *net, struct >> sk_buff *skb, >> if (skb_headroom(skb) < UDP_MIN_HEADROOM) { >> err = pskb_expand_head(skb, UDP_MIN_HEADROOM, 0, GFP_ATOMIC); >> if (err) >> - goto tx_error; >> + goto err_out; >> } >> >> skb_set_inner_protocol(skb, htons(ETH_P_TIPC)); >> ub = rcu_dereference_rtnl(b->media_ptr); >> if (!ub) { >> err = -ENODEV; >> - goto tx_error; >> + goto err_out; >> + } >> + >> + /* Replicast, send an skb to each configured IP address */ >> + if (unlikely(addr->broadcast)) { >> + bool first = true; >> + struct udp_replicast *rcast; >> + >> + list_for_each_entry_rcu(rcast, &ub->rcast.list, list) { >> + struct sk_buff *_skb; >> + >> + /* Avoid one extra skb copy */ >> + if (first) { >> + dst = &rcast->addr; >> + first = false; >> + continue; >> + } >> + >> + _skb = pskb_copy(skb, GFP_ATOMIC); >> + if (!_skb) { >> + err = -ENOMEM; >> + goto err_out; >> + } >> + >> + err = tipc_udp_xmit(net, _skb, ub, src, &rcast->addr); >> + if (err) { >> + kfree_skb(_skb); >> + goto err_out; >> + } >> + } > Slightly confusing. Why don't you return here? Even when the list is > non-empty you go on and send to the first given address, which may or > may not be a multicast address. > It is probably correct, but the logics is not intuitive. Maybe some > refactoring and comments will help? We avoid an extra skb clone. The logic is as follows.
We always have an skb when we come here. * If this is a multicast bearer, we skip the replicast code and send it as it is. * If it's a peer-to-peer bearer we sent it to the first address in the replicast list, i.e. our peer. * If it's a replicast bearer, we send a skb copy to all but the first peer, which gets the original skb. This way we avoid an extra skb_copy() and skb_free(). This is an effect of moving the peer-to-peer address from the broadcast field and placing it in the replicast list, like you suggested yesterday. Regards Richard > > ///jon > > >> } >> >> return tipc_udp_xmit(net, skb, ub, src, dst); >> >> -tx_error: >> +err_out: >> kfree_skb(skb); >> return err; >> } >> >> +static int tipc_udp_rcast_add(struct tipc_bearer *b, >> + struct udp_media_addr *addr) >> +{ >> + struct udp_replicast *rcast; >> + struct udp_bearer *ub; >> + >> + ub = rcu_dereference_rtnl(b->media_ptr); >> + if (!ub) >> + return -ENODEV; >> + >> + rcast = kmalloc(sizeof(*rcast), GFP_ATOMIC); >> + if (!rcast) >> + return -ENOMEM; >> + >> + memcpy(&rcast->addr, addr, sizeof(struct udp_media_addr)); >> + >> + if (ntohs(addr->proto) == ETH_P_IP) >> + pr_info("New replicast peer: %pI4\n", &rcast->addr.ipv4); >> +#if IS_ENABLED(CONFIG_IPV6) >> + else if (ntohs(addr->proto) == ETH_P_IPV6) >> + pr_info("New replicast peer: %pI6\n", &rcast->addr.ipv6); >> +#endif >> + >> + list_add_rcu(&rcast->list, &ub->rcast.list); >> + return 0; >> +} >> + >> /* tipc_udp_recv - read data from bearer socket */ >> static int tipc_udp_recv(struct sock *sk, struct sk_buff *skb) >> { >> @@ -320,6 +385,32 @@ static int tipc_parse_udp_addr(struct nlattr *nla, >> struct udp_media_addr *addr, >> return -EADDRNOTAVAIL; >> } >> >> +int tipc_udp_nl_bearer_add(struct tipc_bearer *b, struct nlattr *attr) >> +{ >> + int err; >> + struct udp_media_addr addr = {0}; >> + struct nlattr *opts[TIPC_NLA_UDP_MAX + 1]; >> + struct udp_media_addr *dst; >> + >> + if (nla_parse_nested(opts, TIPC_NLA_UDP_MAX, attr, tipc_nl_udp_policy)) >> + return -EINVAL; >> + >> + if (!opts[TIPC_NLA_UDP_REMOTE]) >> + return -EINVAL; >> + >> + err = tipc_parse_udp_addr(opts[TIPC_NLA_UDP_REMOTE], &addr, NULL); >> + if (err) >> + return err; >> + >> + dst = (struct udp_media_addr *)&b->bcast_addr.value; >> + if (tipc_udp_is_mcast_addr(dst)) { >> + pr_err("Can't add remote ip to TIPC UDP multicast bearer\n"); >> + return -EINVAL; >> + } >> + >> + return tipc_udp_rcast_add(b, &addr); >> +} >> + >> /** >> * tipc_udp_enable - callback to create a new udp bearer instance >> * @net: network namespace >> @@ -334,7 +425,7 @@ static int tipc_udp_enable(struct net *net, struct >> tipc_bearer *b, >> { >> int err = -EINVAL; >> struct udp_bearer *ub; >> - struct udp_media_addr *remote; >> + struct udp_media_addr remote = {0}; >> struct udp_media_addr local = {0}; >> struct udp_port_cfg udp_conf = {0}; >> struct udp_tunnel_sock_cfg tuncfg = {NULL}; >> @@ -344,6 +435,8 @@ static int tipc_udp_enable(struct net *net, struct >> tipc_bearer *b, >> if (!ub) >> return -ENOMEM; >> >> + INIT_LIST_HEAD(&ub->rcast.list); >> + >> if (!attrs[TIPC_NLA_BEARER_UDP_OPTS]) >> goto err; >> >> @@ -362,9 +455,7 @@ static int tipc_udp_enable(struct net *net, struct >> tipc_bearer *b, >> if (err) >> goto err; >> >> - remote = (struct udp_media_addr *)&b->bcast_addr.value; >> - memset(remote, 0, sizeof(struct udp_media_addr)); >> - err = tipc_parse_udp_addr(opts[TIPC_NLA_UDP_REMOTE], remote, NULL); >> + err = tipc_parse_udp_addr(opts[TIPC_NLA_UDP_REMOTE], &remote, NULL); >> if (err) >> goto err; >> >> @@ -409,10 +500,17 @@ static int tipc_udp_enable(struct net *net, struct >> tipc_bearer *b, >> tuncfg.encap_destroy = NULL; >> setup_udp_tunnel_sock(net, ub->ubsock, &tuncfg); >> >> - if (tipc_udp_is_mcast_addr(remote)) { >> - if (enable_mcast(ub, remote)) >> - goto err; >> - } >> + /** >> + * The bcast media address port is used for all peers and the ip >> + * is used if it's a multicast address. >> + */ >> + memcpy(&b->bcast_addr.value, &remote, sizeof(remote)); >> + if (tipc_udp_is_mcast_addr(&remote)) >> + err = enable_mcast(ub, &remote); >> + else >> + err= tipc_udp_rcast_add(b, &remote); >> + if (err) >> + goto err; >> >> return 0; >> err: >> @@ -424,6 +522,12 @@ err: >> static void cleanup_bearer(struct work_struct *work) >> { >> struct udp_bearer *ub = container_of(work, struct udp_bearer, work); >> + struct udp_replicast *rcast, *tmp; >> + >> + list_for_each_entry_safe(rcast, tmp, &ub->rcast.list, list) { >> + list_del_rcu(&rcast->list); >> + kfree_rcu(rcast, rcu); >> + } >> >> if (ub->ubsock) >> udp_tunnel_sock_release(ub->ubsock); >> diff --git a/net/tipc/udp_media.h b/net/tipc/udp_media.h >> new file mode 100644 >> index 0000000..4dcb548 >> --- /dev/null >> +++ b/net/tipc/udp_media.h >> @@ -0,0 +1,44 @@ >> +/* >> + * net/tipc/udp_media.h: Include file for UDP bearer media >> + * >> + * Copyright (c) 1996-2006, 2013-2016, Ericsson AB >> + * Copyright (c) 2005, 2010-2011, Wind River Systems >> + * All rights reserved. >> + * >> + * Redistribution and use in source and binary forms, with or without >> + * modification, are permitted provided that the following conditions are >> met: >> + * >> + * 1. Redistributions of source code must retain the above copyright >> + * notice, this list of conditions and the following disclaimer. >> + * 2. Redistributions in binary form must reproduce the above copyright >> + * notice, this list of conditions and the following disclaimer in the >> + * documentation and/or other materials provided with the distribution. >> + * 3. Neither the names of the copyright holders nor the names of its >> + * contributors may be used to endorse or promote products derived from >> + * this software without specific prior written permission. >> + * >> + * Alternatively, this software may be distributed under the terms of the >> + * GNU General Public License ("GPL") version 2 as published by the Free >> + * Software Foundation. >> + * >> + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS >> IS" >> + * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE >> + * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR >> PURPOSE >> + * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE >> + * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR >> + * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF >> + * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS >> + * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN >> + * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) >> + * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF >> THE >> + * POSSIBILITY OF SUCH DAMAGE. >> + */ >> + >> +#ifdef CONFIG_TIPC_MEDIA_UDP >> +#ifndef _TIPC_UDP_MEDIA_H >> +#define _TIPC_UDP_MEDIA_H >> + >> +int tipc_udp_nl_bearer_add(struct tipc_bearer *b, struct nlattr *attr); >> + >> +#endif >> +#endif > > > ------------------------------------------------------------------------------ > _______________________________________________ > tipc-discussion mailing list > tipc-discussion@lists.sourceforge.net > https://lists.sourceforge.net/lists/listinfo/tipc-discussion > ------------------------------------------------------------------------------ _______________________________________________ tipc-discussion mailing list tipc-discussion@lists.sourceforge.net https://lists.sourceforge.net/lists/listinfo/tipc-discussion