On 08/24/2016 06:57 AM, Richard Alpe wrote: > On 2016-08-24 12:08, Jon Maloy wrote: >> >> On 08/23/2016 10:41 AM, Richard Alpe wrote: >>> This patch introduces UDP replicast. A concept where we emulate >>> multicast by sending multiple unicast messages to configured peers. >>> >>> The purpose of replicast is mainly to be able to use TIPC in cloud >>> environments where IP multicast is disabled. Using replicas to unicast >>> multicast messages is costly as we have to copy each skb and send the >>> copies individually. >>> >>> Signed-off-by: Richard Alpe <richard.a...@ericsson.com> >>> --- >>> include/uapi/linux/tipc_netlink.h | 1 + >>> net/tipc/bearer.c | 44 +++++++++++++ >>> net/tipc/bearer.h | 1 + >>> net/tipc/netlink.c | 5 ++ >>> net/tipc/udp_media.c | 126 >>> ++++++++++++++++++++++++++++++++++---- >>> net/tipc/udp_media.h | 44 +++++++++++++ >>> 6 files changed, 210 insertions(+), 11 deletions(-) >>> create mode 100644 net/tipc/udp_media.h >>> >>> diff --git a/include/uapi/linux/tipc_netlink.h >>> b/include/uapi/linux/tipc_netlink.h >>> index bcb65ef..b15664c 100644 >>> --- a/include/uapi/linux/tipc_netlink.h >>> +++ b/include/uapi/linux/tipc_netlink.h >>> @@ -60,6 +60,7 @@ enum { >>> TIPC_NL_MON_GET, >>> TIPC_NL_MON_PEER_GET, >>> TIPC_NL_PEER_REMOVE, >>> + TIPC_NL_BEARER_ADD, >>> >>> __TIPC_NL_CMD_MAX, >>> TIPC_NL_CMD_MAX = __TIPC_NL_CMD_MAX - 1 >>> diff --git a/net/tipc/bearer.c b/net/tipc/bearer.c >>> index 6fc4e3c..b82cb00 100644 >>> --- a/net/tipc/bearer.c >>> +++ b/net/tipc/bearer.c >>> @@ -42,6 +42,7 @@ >>> #include "monitor.h" >>> #include "bcast.h" >>> #include "netlink.h" >>> +#include "udp_media.h" >>> >>> #define MAX_ADDR_STR 60 >>> >>> @@ -897,6 +898,49 @@ int tipc_nl_bearer_enable(struct sk_buff *skb, struct >>> genl_info *info) >>> return 0; >>> } >>> >>> +int tipc_nl_bearer_add(struct sk_buff *skb, struct genl_info *info) >>> +{ >>> + int err; >>> + char *name; >>> + struct tipc_bearer *b; >>> + struct nlattr *attrs[TIPC_NLA_BEARER_MAX + 1]; >>> + struct net *net = sock_net(skb->sk); >>> + >>> + if (!info->attrs[TIPC_NLA_BEARER]) >>> + return -EINVAL; >>> + >>> + err = nla_parse_nested(attrs, TIPC_NLA_BEARER_MAX, >>> + info->attrs[TIPC_NLA_BEARER], >>> + tipc_nl_bearer_policy); >>> + if (err) >>> + return err; >>> + >>> + if (!attrs[TIPC_NLA_BEARER_NAME]) >>> + return -EINVAL; >>> + name = nla_data(attrs[TIPC_NLA_BEARER_NAME]); >>> + >>> + rtnl_lock(); >>> + b = tipc_bearer_find(net, name); >>> + if (!b) { >>> + rtnl_unlock(); >>> + return -EINVAL; >>> + } >>> + >>> +#ifdef CONFIG_TIPC_MEDIA_UDP >>> + if (attrs[TIPC_NLA_BEARER_UDP_OPTS]) { >>> + err = tipc_udp_nl_bearer_add(b, >>> + attrs[TIPC_NLA_BEARER_UDP_OPTS]); >>> + if (err) { >>> + rtnl_unlock(); >>> + return err; >>> + } >>> + } >>> +#endif >>> + rtnl_unlock(); >>> + >>> + return 0; >>> +} >>> + >>> int tipc_nl_bearer_set(struct sk_buff *skb, struct genl_info *info) >>> { >>> int err; >>> diff --git a/net/tipc/bearer.h b/net/tipc/bearer.h >>> index 83a9abb..78892e2f 100644 >>> --- a/net/tipc/bearer.h >>> +++ b/net/tipc/bearer.h >>> @@ -181,6 +181,7 @@ int tipc_nl_bearer_enable(struct sk_buff *skb, struct >>> genl_info *info); >>> int tipc_nl_bearer_dump(struct sk_buff *skb, struct netlink_callback >>> *cb); >>> int tipc_nl_bearer_get(struct sk_buff *skb, struct genl_info *info); >>> int tipc_nl_bearer_set(struct sk_buff *skb, struct genl_info *info); >>> +int tipc_nl_bearer_add(struct sk_buff *skb, struct genl_info *info); >>> >>> int tipc_nl_media_dump(struct sk_buff *skb, struct netlink_callback *cb); >>> int tipc_nl_media_get(struct sk_buff *skb, struct genl_info *info); >>> diff --git a/net/tipc/netlink.c b/net/tipc/netlink.c >>> index 2718de6..3122f21 100644 >>> --- a/net/tipc/netlink.c >>> +++ b/net/tipc/netlink.c >>> @@ -161,6 +161,11 @@ static const struct genl_ops tipc_genl_v2_ops[] = { >>> .policy = tipc_nl_policy, >>> }, >>> { >>> + .cmd = TIPC_NL_BEARER_ADD, >>> + .doit = tipc_nl_bearer_add, >>> + .policy = tipc_nl_policy, >>> + }, >>> + { >>> .cmd = TIPC_NL_BEARER_SET, >>> .doit = tipc_nl_bearer_set, >>> .policy = tipc_nl_policy, >>> diff --git a/net/tipc/udp_media.c b/net/tipc/udp_media.c >>> index b8ec1a1..d4517f4 100644 >>> --- a/net/tipc/udp_media.c >>> +++ b/net/tipc/udp_media.c >>> @@ -49,6 +49,7 @@ >>> #include "core.h" >>> #include "bearer.h" >>> #include "netlink.h" >>> +#include "msg.h" >>> >>> /* IANA assigned UDP port */ >>> #define UDP_PORT_DEFAULT 6118 >>> @@ -70,6 +71,13 @@ struct udp_media_addr { >>> }; >>> }; >>> >>> +/* struct udp_replicast - container for UDP remote addresses */ >>> +struct udp_replicast { >>> + struct udp_media_addr addr; >>> + struct rcu_head rcu; >>> + struct list_head list; >>> +}; >>> + >>> /** >>> * struct udp_bearer - ip/udp bearer data structure >>> * @bearer: associated generic tipc bearer >>> @@ -82,6 +90,7 @@ struct udp_bearer { >>> struct socket *ubsock; >>> u32 ifindex; >>> struct work_struct work; >>> + struct udp_replicast rcast; >>> }; >>> >>> static int tipc_udp_is_mcast_addr(struct udp_media_addr *addr) >>> @@ -209,23 +218,79 @@ static int tipc_udp_send_msg(struct net *net, struct >>> sk_buff *skb, >>> if (skb_headroom(skb) < UDP_MIN_HEADROOM) { >>> err = pskb_expand_head(skb, UDP_MIN_HEADROOM, 0, GFP_ATOMIC); >>> if (err) >>> - goto tx_error; >>> + goto err_out; >>> } >>> >>> skb_set_inner_protocol(skb, htons(ETH_P_TIPC)); >>> ub = rcu_dereference_rtnl(b->media_ptr); >>> if (!ub) { >>> err = -ENODEV; >>> - goto tx_error; >>> + goto err_out; >>> + } >>> + >>> + /* Replicast, send an skb to each configured IP address */ >>> + if (unlikely(addr->broadcast)) { >>> + bool first = true; >>> + struct udp_replicast *rcast; >>> + >>> + list_for_each_entry_rcu(rcast, &ub->rcast.list, list) { >>> + struct sk_buff *_skb; >>> + >>> + /* Avoid one extra skb copy */ >>> + if (first) { >>> + dst = &rcast->addr; >>> + first = false; >>> + continue; >>> + } >>> + >>> + _skb = pskb_copy(skb, GFP_ATOMIC); >>> + if (!_skb) { >>> + err = -ENOMEM; >>> + goto err_out; >>> + } >>> + >>> + err = tipc_udp_xmit(net, _skb, ub, src, &rcast->addr); >>> + if (err) { >>> + kfree_skb(_skb); >>> + goto err_out; >>> + } >>> + } >> Slightly confusing. Why don't you return here? Even when the list is >> non-empty you go on and send to the first given address, which may or >> may not be a multicast address. >> It is probably correct, but the logics is not intuitive. Maybe some >> refactoring and comments will help? > We avoid an extra skb clone. > The logic is as follows. > > We always have an skb when we come here. > * If this is a multicast bearer, we skip the replicast code and send it > as it is. > * If it's a peer-to-peer bearer we sent it to the first address in the > replicast list, i.e. our peer. I don't think the concept of peer-to-peer bearer even exists from now on. To me it is just a (very unusual) special case of a replicast bearer where N == 1, and it deserves no special consideration in the code if we do things right.
> * If it's a replicast bearer, we send a skb copy to all but the first > peer, which gets the original skb. This way we avoid an extra > skb_copy() and skb_free(). A cloning extra is nothing if you are anyway replicasting to N >> 1 nodes. Code clarity should be more important. What about: if (!addr->broadcast || skb_queue_empty(list)) return tipc_udp_xmit(net, skb, ub, src, dst); /* Replicast */ list_for_each_entry(...) { clone; xmit; } skb_free(skb); I think this would be easier to understand. Here I assume that the peer list is empty if we have multicast bearer, and that all peer addresses only exist in the form of media addresses in the individual links. If the peer list for some reason I don't understand cannot be empty, we have to find some other way to distinguish between multicast and replicast, such as a separate 'replicast' value in tipc_media_address. > > This is an effect of moving the peer-to-peer address If the "peer-to-peer" address *is* a multicast address it should go to the broadcast field, otherwise it should be added at the first address in the list. BR ///jon > from the > broadcast field and placing it in the replicast list, like you suggested > yesterday. > > Regards > Richard > >> ///jon >> >> >>> } >>> >>> return tipc_udp_xmit(net, skb, ub, src, dst); >>> >>> -tx_error: >>> +err_out: >>> kfree_skb(skb); >>> return err; >>> } >>> >>> +static int tipc_udp_rcast_add(struct tipc_bearer *b, >>> + struct udp_media_addr *addr) >>> +{ >>> + struct udp_replicast *rcast; >>> + struct udp_bearer *ub; >>> + >>> + ub = rcu_dereference_rtnl(b->media_ptr); >>> + if (!ub) >>> + return -ENODEV; >>> + >>> + rcast = kmalloc(sizeof(*rcast), GFP_ATOMIC); >>> + if (!rcast) >>> + return -ENOMEM; >>> + >>> + memcpy(&rcast->addr, addr, sizeof(struct udp_media_addr)); >>> + >>> + if (ntohs(addr->proto) == ETH_P_IP) >>> + pr_info("New replicast peer: %pI4\n", &rcast->addr.ipv4); >>> +#if IS_ENABLED(CONFIG_IPV6) >>> + else if (ntohs(addr->proto) == ETH_P_IPV6) >>> + pr_info("New replicast peer: %pI6\n", &rcast->addr.ipv6); >>> +#endif >>> + >>> + list_add_rcu(&rcast->list, &ub->rcast.list); >>> + return 0; >>> +} >>> + >>> /* tipc_udp_recv - read data from bearer socket */ >>> static int tipc_udp_recv(struct sock *sk, struct sk_buff *skb) >>> { >>> @@ -320,6 +385,32 @@ static int tipc_parse_udp_addr(struct nlattr *nla, >>> struct udp_media_addr *addr, >>> return -EADDRNOTAVAIL; >>> } >>> >>> +int tipc_udp_nl_bearer_add(struct tipc_bearer *b, struct nlattr *attr) >>> +{ >>> + int err; >>> + struct udp_media_addr addr = {0}; >>> + struct nlattr *opts[TIPC_NLA_UDP_MAX + 1]; >>> + struct udp_media_addr *dst; >>> + >>> + if (nla_parse_nested(opts, TIPC_NLA_UDP_MAX, attr, tipc_nl_udp_policy)) >>> + return -EINVAL; >>> + >>> + if (!opts[TIPC_NLA_UDP_REMOTE]) >>> + return -EINVAL; >>> + >>> + err = tipc_parse_udp_addr(opts[TIPC_NLA_UDP_REMOTE], &addr, NULL); >>> + if (err) >>> + return err; >>> + >>> + dst = (struct udp_media_addr *)&b->bcast_addr.value; >>> + if (tipc_udp_is_mcast_addr(dst)) { >>> + pr_err("Can't add remote ip to TIPC UDP multicast bearer\n"); >>> + return -EINVAL; >>> + } >>> + >>> + return tipc_udp_rcast_add(b, &addr); >>> +} >>> + >>> /** >>> * tipc_udp_enable - callback to create a new udp bearer instance >>> * @net: network namespace >>> @@ -334,7 +425,7 @@ static int tipc_udp_enable(struct net *net, struct >>> tipc_bearer *b, >>> { >>> int err = -EINVAL; >>> struct udp_bearer *ub; >>> - struct udp_media_addr *remote; >>> + struct udp_media_addr remote = {0}; >>> struct udp_media_addr local = {0}; >>> struct udp_port_cfg udp_conf = {0}; >>> struct udp_tunnel_sock_cfg tuncfg = {NULL}; >>> @@ -344,6 +435,8 @@ static int tipc_udp_enable(struct net *net, struct >>> tipc_bearer *b, >>> if (!ub) >>> return -ENOMEM; >>> >>> + INIT_LIST_HEAD(&ub->rcast.list); >>> + >>> if (!attrs[TIPC_NLA_BEARER_UDP_OPTS]) >>> goto err; >>> >>> @@ -362,9 +455,7 @@ static int tipc_udp_enable(struct net *net, struct >>> tipc_bearer *b, >>> if (err) >>> goto err; >>> >>> - remote = (struct udp_media_addr *)&b->bcast_addr.value; >>> - memset(remote, 0, sizeof(struct udp_media_addr)); >>> - err = tipc_parse_udp_addr(opts[TIPC_NLA_UDP_REMOTE], remote, NULL); >>> + err = tipc_parse_udp_addr(opts[TIPC_NLA_UDP_REMOTE], &remote, NULL); >>> if (err) >>> goto err; >>> >>> @@ -409,10 +500,17 @@ static int tipc_udp_enable(struct net *net, struct >>> tipc_bearer *b, >>> tuncfg.encap_destroy = NULL; >>> setup_udp_tunnel_sock(net, ub->ubsock, &tuncfg); >>> >>> - if (tipc_udp_is_mcast_addr(remote)) { >>> - if (enable_mcast(ub, remote)) >>> - goto err; >>> - } >>> + /** >>> + * The bcast media address port is used for all peers and the ip >>> + * is used if it's a multicast address. >>> + */ >>> + memcpy(&b->bcast_addr.value, &remote, sizeof(remote)); >>> + if (tipc_udp_is_mcast_addr(&remote)) >>> + err = enable_mcast(ub, &remote); >>> + else >>> + err= tipc_udp_rcast_add(b, &remote); >>> + if (err) >>> + goto err; >>> >>> return 0; >>> err: >>> @@ -424,6 +522,12 @@ err: >>> static void cleanup_bearer(struct work_struct *work) >>> { >>> struct udp_bearer *ub = container_of(work, struct udp_bearer, work); >>> + struct udp_replicast *rcast, *tmp; >>> + >>> + list_for_each_entry_safe(rcast, tmp, &ub->rcast.list, list) { >>> + list_del_rcu(&rcast->list); >>> + kfree_rcu(rcast, rcu); >>> + } >>> >>> if (ub->ubsock) >>> udp_tunnel_sock_release(ub->ubsock); >>> diff --git a/net/tipc/udp_media.h b/net/tipc/udp_media.h >>> new file mode 100644 >>> index 0000000..4dcb548 >>> --- /dev/null >>> +++ b/net/tipc/udp_media.h >>> @@ -0,0 +1,44 @@ >>> +/* >>> + * net/tipc/udp_media.h: Include file for UDP bearer media >>> + * >>> + * Copyright (c) 1996-2006, 2013-2016, Ericsson AB >>> + * Copyright (c) 2005, 2010-2011, Wind River Systems >>> + * All rights reserved. >>> + * >>> + * Redistribution and use in source and binary forms, with or without >>> + * modification, are permitted provided that the following conditions are >>> met: >>> + * >>> + * 1. Redistributions of source code must retain the above copyright >>> + * notice, this list of conditions and the following disclaimer. >>> + * 2. Redistributions in binary form must reproduce the above copyright >>> + * notice, this list of conditions and the following disclaimer in the >>> + * documentation and/or other materials provided with the distribution. >>> + * 3. Neither the names of the copyright holders nor the names of its >>> + * contributors may be used to endorse or promote products derived from >>> + * this software without specific prior written permission. >>> + * >>> + * Alternatively, this software may be distributed under the terms of the >>> + * GNU General Public License ("GPL") version 2 as published by the Free >>> + * Software Foundation. >>> + * >>> + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS >>> IS" >>> + * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, >>> THE >>> + * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR >>> PURPOSE >>> + * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE >>> + * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR >>> + * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF >>> + * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS >>> + * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN >>> + * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) >>> + * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF >>> THE >>> + * POSSIBILITY OF SUCH DAMAGE. >>> + */ >>> + >>> +#ifdef CONFIG_TIPC_MEDIA_UDP >>> +#ifndef _TIPC_UDP_MEDIA_H >>> +#define _TIPC_UDP_MEDIA_H >>> + >>> +int tipc_udp_nl_bearer_add(struct tipc_bearer *b, struct nlattr *attr); >>> + >>> +#endif >>> +#endif >> >> ------------------------------------------------------------------------------ >> _______________________________________________ >> tipc-discussion mailing list >> tipc-discussion@lists.sourceforge.net >> https://lists.sourceforge.net/lists/listinfo/tipc-discussion >> ------------------------------------------------------------------------------ _______________________________________________ tipc-discussion mailing list tipc-discussion@lists.sourceforge.net https://lists.sourceforge.net/lists/listinfo/tipc-discussion