On 2016-08-24 12:08, Jon Maloy wrote:
>
>
> On 08/23/2016 10:41 AM, Richard Alpe wrote:
>> This patch introduces UDP replicast. A concept where we emulate
>> multicast by sending multiple unicast messages to configured peers.
>>
>> The purpose of replicast is mainly to be able to use TIPC in cloud
>> environments where IP multicast is disabled. Using replicas to unicast
>> multicast messages is costly as we have to copy each skb and send the
>> copies individually.
>>
>> Signed-off-by: Richard Alpe <[email protected]>
>> ---
>> include/uapi/linux/tipc_netlink.h | 1 +
>> net/tipc/bearer.c | 44 +++++++++++++
>> net/tipc/bearer.h | 1 +
>> net/tipc/netlink.c | 5 ++
>> net/tipc/udp_media.c | 126
>> ++++++++++++++++++++++++++++++++++----
>> net/tipc/udp_media.h | 44 +++++++++++++
>> 6 files changed, 210 insertions(+), 11 deletions(-)
>> create mode 100644 net/tipc/udp_media.h
>>
>> diff --git a/include/uapi/linux/tipc_netlink.h
>> b/include/uapi/linux/tipc_netlink.h
>> index bcb65ef..b15664c 100644
>> --- a/include/uapi/linux/tipc_netlink.h
>> +++ b/include/uapi/linux/tipc_netlink.h
>> @@ -60,6 +60,7 @@ enum {
>> TIPC_NL_MON_GET,
>> TIPC_NL_MON_PEER_GET,
>> TIPC_NL_PEER_REMOVE,
>> + TIPC_NL_BEARER_ADD,
>>
>> __TIPC_NL_CMD_MAX,
>> TIPC_NL_CMD_MAX = __TIPC_NL_CMD_MAX - 1
>> diff --git a/net/tipc/bearer.c b/net/tipc/bearer.c
>> index 6fc4e3c..b82cb00 100644
>> --- a/net/tipc/bearer.c
>> +++ b/net/tipc/bearer.c
>> @@ -42,6 +42,7 @@
>> #include "monitor.h"
>> #include "bcast.h"
>> #include "netlink.h"
>> +#include "udp_media.h"
>>
>> #define MAX_ADDR_STR 60
>>
>> @@ -897,6 +898,49 @@ int tipc_nl_bearer_enable(struct sk_buff *skb, struct
>> genl_info *info)
>> return 0;
>> }
>>
>> +int tipc_nl_bearer_add(struct sk_buff *skb, struct genl_info *info)
>> +{
>> + int err;
>> + char *name;
>> + struct tipc_bearer *b;
>> + struct nlattr *attrs[TIPC_NLA_BEARER_MAX + 1];
>> + struct net *net = sock_net(skb->sk);
>> +
>> + if (!info->attrs[TIPC_NLA_BEARER])
>> + return -EINVAL;
>> +
>> + err = nla_parse_nested(attrs, TIPC_NLA_BEARER_MAX,
>> + info->attrs[TIPC_NLA_BEARER],
>> + tipc_nl_bearer_policy);
>> + if (err)
>> + return err;
>> +
>> + if (!attrs[TIPC_NLA_BEARER_NAME])
>> + return -EINVAL;
>> + name = nla_data(attrs[TIPC_NLA_BEARER_NAME]);
>> +
>> + rtnl_lock();
>> + b = tipc_bearer_find(net, name);
>> + if (!b) {
>> + rtnl_unlock();
>> + return -EINVAL;
>> + }
>> +
>> +#ifdef CONFIG_TIPC_MEDIA_UDP
>> + if (attrs[TIPC_NLA_BEARER_UDP_OPTS]) {
>> + err = tipc_udp_nl_bearer_add(b,
>> + attrs[TIPC_NLA_BEARER_UDP_OPTS]);
>> + if (err) {
>> + rtnl_unlock();
>> + return err;
>> + }
>> + }
>> +#endif
>> + rtnl_unlock();
>> +
>> + return 0;
>> +}
>> +
>> int tipc_nl_bearer_set(struct sk_buff *skb, struct genl_info *info)
>> {
>> int err;
>> diff --git a/net/tipc/bearer.h b/net/tipc/bearer.h
>> index 83a9abb..78892e2f 100644
>> --- a/net/tipc/bearer.h
>> +++ b/net/tipc/bearer.h
>> @@ -181,6 +181,7 @@ int tipc_nl_bearer_enable(struct sk_buff *skb, struct
>> genl_info *info);
>> int tipc_nl_bearer_dump(struct sk_buff *skb, struct netlink_callback *cb);
>> int tipc_nl_bearer_get(struct sk_buff *skb, struct genl_info *info);
>> int tipc_nl_bearer_set(struct sk_buff *skb, struct genl_info *info);
>> +int tipc_nl_bearer_add(struct sk_buff *skb, struct genl_info *info);
>>
>> int tipc_nl_media_dump(struct sk_buff *skb, struct netlink_callback *cb);
>> int tipc_nl_media_get(struct sk_buff *skb, struct genl_info *info);
>> diff --git a/net/tipc/netlink.c b/net/tipc/netlink.c
>> index 2718de6..3122f21 100644
>> --- a/net/tipc/netlink.c
>> +++ b/net/tipc/netlink.c
>> @@ -161,6 +161,11 @@ static const struct genl_ops tipc_genl_v2_ops[] = {
>> .policy = tipc_nl_policy,
>> },
>> {
>> + .cmd = TIPC_NL_BEARER_ADD,
>> + .doit = tipc_nl_bearer_add,
>> + .policy = tipc_nl_policy,
>> + },
>> + {
>> .cmd = TIPC_NL_BEARER_SET,
>> .doit = tipc_nl_bearer_set,
>> .policy = tipc_nl_policy,
>> diff --git a/net/tipc/udp_media.c b/net/tipc/udp_media.c
>> index b8ec1a1..d4517f4 100644
>> --- a/net/tipc/udp_media.c
>> +++ b/net/tipc/udp_media.c
>> @@ -49,6 +49,7 @@
>> #include "core.h"
>> #include "bearer.h"
>> #include "netlink.h"
>> +#include "msg.h"
>>
>> /* IANA assigned UDP port */
>> #define UDP_PORT_DEFAULT 6118
>> @@ -70,6 +71,13 @@ struct udp_media_addr {
>> };
>> };
>>
>> +/* struct udp_replicast - container for UDP remote addresses */
>> +struct udp_replicast {
>> + struct udp_media_addr addr;
>> + struct rcu_head rcu;
>> + struct list_head list;
>> +};
>> +
>> /**
>> * struct udp_bearer - ip/udp bearer data structure
>> * @bearer: associated generic tipc bearer
>> @@ -82,6 +90,7 @@ struct udp_bearer {
>> struct socket *ubsock;
>> u32 ifindex;
>> struct work_struct work;
>> + struct udp_replicast rcast;
>> };
>>
>> static int tipc_udp_is_mcast_addr(struct udp_media_addr *addr)
>> @@ -209,23 +218,79 @@ static int tipc_udp_send_msg(struct net *net, struct
>> sk_buff *skb,
>> if (skb_headroom(skb) < UDP_MIN_HEADROOM) {
>> err = pskb_expand_head(skb, UDP_MIN_HEADROOM, 0, GFP_ATOMIC);
>> if (err)
>> - goto tx_error;
>> + goto err_out;
>> }
>>
>> skb_set_inner_protocol(skb, htons(ETH_P_TIPC));
>> ub = rcu_dereference_rtnl(b->media_ptr);
>> if (!ub) {
>> err = -ENODEV;
>> - goto tx_error;
>> + goto err_out;
>> + }
>> +
>> + /* Replicast, send an skb to each configured IP address */
>> + if (unlikely(addr->broadcast)) {
>> + bool first = true;
>> + struct udp_replicast *rcast;
>> +
>> + list_for_each_entry_rcu(rcast, &ub->rcast.list, list) {
>> + struct sk_buff *_skb;
>> +
>> + /* Avoid one extra skb copy */
>> + if (first) {
>> + dst = &rcast->addr;
>> + first = false;
>> + continue;
>> + }
>> +
>> + _skb = pskb_copy(skb, GFP_ATOMIC);
>> + if (!_skb) {
>> + err = -ENOMEM;
>> + goto err_out;
>> + }
>> +
>> + err = tipc_udp_xmit(net, _skb, ub, src, &rcast->addr);
>> + if (err) {
>> + kfree_skb(_skb);
>> + goto err_out;
>> + }
>> + }
> Slightly confusing. Why don't you return here? Even when the list is
> non-empty you go on and send to the first given address, which may or
> may not be a multicast address.
> It is probably correct, but the logics is not intuitive. Maybe some
> refactoring and comments will help?
We avoid an extra skb clone.
The logic is as follows.
We always have an skb when we come here.
* If this is a multicast bearer, we skip the replicast code and send it
as it is.
* If it's a peer-to-peer bearer we sent it to the first address in the
replicast list, i.e. our peer.
* If it's a replicast bearer, we send a skb copy to all but the first
peer, which gets the original skb. This way we avoid an extra
skb_copy() and skb_free().
This is an effect of moving the peer-to-peer address from the
broadcast field and placing it in the replicast list, like you suggested
yesterday.
Regards
Richard
>
> ///jon
>
>
>> }
>>
>> return tipc_udp_xmit(net, skb, ub, src, dst);
>>
>> -tx_error:
>> +err_out:
>> kfree_skb(skb);
>> return err;
>> }
>>
>> +static int tipc_udp_rcast_add(struct tipc_bearer *b,
>> + struct udp_media_addr *addr)
>> +{
>> + struct udp_replicast *rcast;
>> + struct udp_bearer *ub;
>> +
>> + ub = rcu_dereference_rtnl(b->media_ptr);
>> + if (!ub)
>> + return -ENODEV;
>> +
>> + rcast = kmalloc(sizeof(*rcast), GFP_ATOMIC);
>> + if (!rcast)
>> + return -ENOMEM;
>> +
>> + memcpy(&rcast->addr, addr, sizeof(struct udp_media_addr));
>> +
>> + if (ntohs(addr->proto) == ETH_P_IP)
>> + pr_info("New replicast peer: %pI4\n", &rcast->addr.ipv4);
>> +#if IS_ENABLED(CONFIG_IPV6)
>> + else if (ntohs(addr->proto) == ETH_P_IPV6)
>> + pr_info("New replicast peer: %pI6\n", &rcast->addr.ipv6);
>> +#endif
>> +
>> + list_add_rcu(&rcast->list, &ub->rcast.list);
>> + return 0;
>> +}
>> +
>> /* tipc_udp_recv - read data from bearer socket */
>> static int tipc_udp_recv(struct sock *sk, struct sk_buff *skb)
>> {
>> @@ -320,6 +385,32 @@ static int tipc_parse_udp_addr(struct nlattr *nla,
>> struct udp_media_addr *addr,
>> return -EADDRNOTAVAIL;
>> }
>>
>> +int tipc_udp_nl_bearer_add(struct tipc_bearer *b, struct nlattr *attr)
>> +{
>> + int err;
>> + struct udp_media_addr addr = {0};
>> + struct nlattr *opts[TIPC_NLA_UDP_MAX + 1];
>> + struct udp_media_addr *dst;
>> +
>> + if (nla_parse_nested(opts, TIPC_NLA_UDP_MAX, attr, tipc_nl_udp_policy))
>> + return -EINVAL;
>> +
>> + if (!opts[TIPC_NLA_UDP_REMOTE])
>> + return -EINVAL;
>> +
>> + err = tipc_parse_udp_addr(opts[TIPC_NLA_UDP_REMOTE], &addr, NULL);
>> + if (err)
>> + return err;
>> +
>> + dst = (struct udp_media_addr *)&b->bcast_addr.value;
>> + if (tipc_udp_is_mcast_addr(dst)) {
>> + pr_err("Can't add remote ip to TIPC UDP multicast bearer\n");
>> + return -EINVAL;
>> + }
>> +
>> + return tipc_udp_rcast_add(b, &addr);
>> +}
>> +
>> /**
>> * tipc_udp_enable - callback to create a new udp bearer instance
>> * @net: network namespace
>> @@ -334,7 +425,7 @@ static int tipc_udp_enable(struct net *net, struct
>> tipc_bearer *b,
>> {
>> int err = -EINVAL;
>> struct udp_bearer *ub;
>> - struct udp_media_addr *remote;
>> + struct udp_media_addr remote = {0};
>> struct udp_media_addr local = {0};
>> struct udp_port_cfg udp_conf = {0};
>> struct udp_tunnel_sock_cfg tuncfg = {NULL};
>> @@ -344,6 +435,8 @@ static int tipc_udp_enable(struct net *net, struct
>> tipc_bearer *b,
>> if (!ub)
>> return -ENOMEM;
>>
>> + INIT_LIST_HEAD(&ub->rcast.list);
>> +
>> if (!attrs[TIPC_NLA_BEARER_UDP_OPTS])
>> goto err;
>>
>> @@ -362,9 +455,7 @@ static int tipc_udp_enable(struct net *net, struct
>> tipc_bearer *b,
>> if (err)
>> goto err;
>>
>> - remote = (struct udp_media_addr *)&b->bcast_addr.value;
>> - memset(remote, 0, sizeof(struct udp_media_addr));
>> - err = tipc_parse_udp_addr(opts[TIPC_NLA_UDP_REMOTE], remote, NULL);
>> + err = tipc_parse_udp_addr(opts[TIPC_NLA_UDP_REMOTE], &remote, NULL);
>> if (err)
>> goto err;
>>
>> @@ -409,10 +500,17 @@ static int tipc_udp_enable(struct net *net, struct
>> tipc_bearer *b,
>> tuncfg.encap_destroy = NULL;
>> setup_udp_tunnel_sock(net, ub->ubsock, &tuncfg);
>>
>> - if (tipc_udp_is_mcast_addr(remote)) {
>> - if (enable_mcast(ub, remote))
>> - goto err;
>> - }
>> + /**
>> + * The bcast media address port is used for all peers and the ip
>> + * is used if it's a multicast address.
>> + */
>> + memcpy(&b->bcast_addr.value, &remote, sizeof(remote));
>> + if (tipc_udp_is_mcast_addr(&remote))
>> + err = enable_mcast(ub, &remote);
>> + else
>> + err= tipc_udp_rcast_add(b, &remote);
>> + if (err)
>> + goto err;
>>
>> return 0;
>> err:
>> @@ -424,6 +522,12 @@ err:
>> static void cleanup_bearer(struct work_struct *work)
>> {
>> struct udp_bearer *ub = container_of(work, struct udp_bearer, work);
>> + struct udp_replicast *rcast, *tmp;
>> +
>> + list_for_each_entry_safe(rcast, tmp, &ub->rcast.list, list) {
>> + list_del_rcu(&rcast->list);
>> + kfree_rcu(rcast, rcu);
>> + }
>>
>> if (ub->ubsock)
>> udp_tunnel_sock_release(ub->ubsock);
>> diff --git a/net/tipc/udp_media.h b/net/tipc/udp_media.h
>> new file mode 100644
>> index 0000000..4dcb548
>> --- /dev/null
>> +++ b/net/tipc/udp_media.h
>> @@ -0,0 +1,44 @@
>> +/*
>> + * net/tipc/udp_media.h: Include file for UDP bearer media
>> + *
>> + * Copyright (c) 1996-2006, 2013-2016, Ericsson AB
>> + * Copyright (c) 2005, 2010-2011, Wind River Systems
>> + * All rights reserved.
>> + *
>> + * Redistribution and use in source and binary forms, with or without
>> + * modification, are permitted provided that the following conditions are
>> met:
>> + *
>> + * 1. Redistributions of source code must retain the above copyright
>> + * notice, this list of conditions and the following disclaimer.
>> + * 2. Redistributions in binary form must reproduce the above copyright
>> + * notice, this list of conditions and the following disclaimer in the
>> + * documentation and/or other materials provided with the distribution.
>> + * 3. Neither the names of the copyright holders nor the names of its
>> + * contributors may be used to endorse or promote products derived from
>> + * this software without specific prior written permission.
>> + *
>> + * Alternatively, this software may be distributed under the terms of the
>> + * GNU General Public License ("GPL") version 2 as published by the Free
>> + * Software Foundation.
>> + *
>> + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS
>> IS"
>> + * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
>> + * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
>> PURPOSE
>> + * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
>> + * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
>> + * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
>> + * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
>> + * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
>> + * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
>> + * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF
>> THE
>> + * POSSIBILITY OF SUCH DAMAGE.
>> + */
>> +
>> +#ifdef CONFIG_TIPC_MEDIA_UDP
>> +#ifndef _TIPC_UDP_MEDIA_H
>> +#define _TIPC_UDP_MEDIA_H
>> +
>> +int tipc_udp_nl_bearer_add(struct tipc_bearer *b, struct nlattr *attr);
>> +
>> +#endif
>> +#endif
>
>
> ------------------------------------------------------------------------------
> _______________________________________________
> tipc-discussion mailing list
> [email protected]
> https://lists.sourceforge.net/lists/listinfo/tipc-discussion
>
------------------------------------------------------------------------------
_______________________________________________
tipc-discussion mailing list
[email protected]
https://lists.sourceforge.net/lists/listinfo/tipc-discussion