On 2016-08-24 12:08, Jon Maloy wrote:
> 
> 
> On 08/23/2016 10:41 AM, Richard Alpe wrote:
>> This patch introduces UDP replicast. A concept where we emulate
>> multicast by sending multiple unicast messages to configured peers.
>>
>> The purpose of replicast is mainly to be able to use TIPC in cloud
>> environments where IP multicast is disabled. Using replicas to unicast
>> multicast messages is costly as we have to copy each skb and send the
>> copies individually.
>>
>> Signed-off-by: Richard Alpe <richard.a...@ericsson.com>
>> ---
>>   include/uapi/linux/tipc_netlink.h |   1 +
>>   net/tipc/bearer.c                 |  44 +++++++++++++
>>   net/tipc/bearer.h                 |   1 +
>>   net/tipc/netlink.c                |   5 ++
>>   net/tipc/udp_media.c              | 126 
>> ++++++++++++++++++++++++++++++++++----
>>   net/tipc/udp_media.h              |  44 +++++++++++++
>>   6 files changed, 210 insertions(+), 11 deletions(-)
>>   create mode 100644 net/tipc/udp_media.h
>>
>> diff --git a/include/uapi/linux/tipc_netlink.h 
>> b/include/uapi/linux/tipc_netlink.h
>> index bcb65ef..b15664c 100644
>> --- a/include/uapi/linux/tipc_netlink.h
>> +++ b/include/uapi/linux/tipc_netlink.h
>> @@ -60,6 +60,7 @@ enum {
>>      TIPC_NL_MON_GET,
>>      TIPC_NL_MON_PEER_GET,
>>      TIPC_NL_PEER_REMOVE,
>> +    TIPC_NL_BEARER_ADD,
>>   
>>      __TIPC_NL_CMD_MAX,
>>      TIPC_NL_CMD_MAX = __TIPC_NL_CMD_MAX - 1
>> diff --git a/net/tipc/bearer.c b/net/tipc/bearer.c
>> index 6fc4e3c..b82cb00 100644
>> --- a/net/tipc/bearer.c
>> +++ b/net/tipc/bearer.c
>> @@ -42,6 +42,7 @@
>>   #include "monitor.h"
>>   #include "bcast.h"
>>   #include "netlink.h"
>> +#include "udp_media.h"
>>   
>>   #define MAX_ADDR_STR 60
>>   
>> @@ -897,6 +898,49 @@ int tipc_nl_bearer_enable(struct sk_buff *skb, struct 
>> genl_info *info)
>>      return 0;
>>   }
>>   
>> +int tipc_nl_bearer_add(struct sk_buff *skb, struct genl_info *info)
>> +{
>> +    int err;
>> +    char *name;
>> +    struct tipc_bearer *b;
>> +    struct nlattr *attrs[TIPC_NLA_BEARER_MAX + 1];
>> +    struct net *net = sock_net(skb->sk);
>> +
>> +    if (!info->attrs[TIPC_NLA_BEARER])
>> +            return -EINVAL;
>> +
>> +    err = nla_parse_nested(attrs, TIPC_NLA_BEARER_MAX,
>> +                           info->attrs[TIPC_NLA_BEARER],
>> +                           tipc_nl_bearer_policy);
>> +    if (err)
>> +            return err;
>> +
>> +    if (!attrs[TIPC_NLA_BEARER_NAME])
>> +            return -EINVAL;
>> +    name = nla_data(attrs[TIPC_NLA_BEARER_NAME]);
>> +
>> +    rtnl_lock();
>> +    b = tipc_bearer_find(net, name);
>> +    if (!b) {
>> +            rtnl_unlock();
>> +            return -EINVAL;
>> +    }
>> +
>> +#ifdef CONFIG_TIPC_MEDIA_UDP
>> +    if (attrs[TIPC_NLA_BEARER_UDP_OPTS]) {
>> +            err = tipc_udp_nl_bearer_add(b,
>> +                                         attrs[TIPC_NLA_BEARER_UDP_OPTS]);
>> +            if (err) {
>> +                    rtnl_unlock();
>> +                    return err;
>> +            }
>> +    }
>> +#endif
>> +    rtnl_unlock();
>> +
>> +    return 0;
>> +}
>> +
>>   int tipc_nl_bearer_set(struct sk_buff *skb, struct genl_info *info)
>>   {
>>      int err;
>> diff --git a/net/tipc/bearer.h b/net/tipc/bearer.h
>> index 83a9abb..78892e2f 100644
>> --- a/net/tipc/bearer.h
>> +++ b/net/tipc/bearer.h
>> @@ -181,6 +181,7 @@ int tipc_nl_bearer_enable(struct sk_buff *skb, struct 
>> genl_info *info);
>>   int tipc_nl_bearer_dump(struct sk_buff *skb, struct netlink_callback *cb);
>>   int tipc_nl_bearer_get(struct sk_buff *skb, struct genl_info *info);
>>   int tipc_nl_bearer_set(struct sk_buff *skb, struct genl_info *info);
>> +int tipc_nl_bearer_add(struct sk_buff *skb, struct genl_info *info);
>>   
>>   int tipc_nl_media_dump(struct sk_buff *skb, struct netlink_callback *cb);
>>   int tipc_nl_media_get(struct sk_buff *skb, struct genl_info *info);
>> diff --git a/net/tipc/netlink.c b/net/tipc/netlink.c
>> index 2718de6..3122f21 100644
>> --- a/net/tipc/netlink.c
>> +++ b/net/tipc/netlink.c
>> @@ -161,6 +161,11 @@ static const struct genl_ops tipc_genl_v2_ops[] = {
>>              .policy = tipc_nl_policy,
>>      },
>>      {
>> +            .cmd    = TIPC_NL_BEARER_ADD,
>> +            .doit   = tipc_nl_bearer_add,
>> +            .policy = tipc_nl_policy,
>> +    },
>> +    {
>>              .cmd    = TIPC_NL_BEARER_SET,
>>              .doit   = tipc_nl_bearer_set,
>>              .policy = tipc_nl_policy,
>> diff --git a/net/tipc/udp_media.c b/net/tipc/udp_media.c
>> index b8ec1a1..d4517f4 100644
>> --- a/net/tipc/udp_media.c
>> +++ b/net/tipc/udp_media.c
>> @@ -49,6 +49,7 @@
>>   #include "core.h"
>>   #include "bearer.h"
>>   #include "netlink.h"
>> +#include "msg.h"
>>   
>>   /* IANA assigned UDP port */
>>   #define UDP_PORT_DEFAULT   6118
>> @@ -70,6 +71,13 @@ struct udp_media_addr {
>>      };
>>   };
>>   
>> +/* struct udp_replicast - container for UDP remote addresses */
>> +struct udp_replicast {
>> +    struct udp_media_addr addr;
>> +    struct rcu_head rcu;
>> +    struct list_head list;
>> +};
>> +
>>   /**
>>    * struct udp_bearer - ip/udp bearer data structure
>>    * @bearer:        associated generic tipc bearer
>> @@ -82,6 +90,7 @@ struct udp_bearer {
>>      struct socket *ubsock;
>>      u32 ifindex;
>>      struct work_struct work;
>> +    struct udp_replicast rcast;
>>   };
>>   
>>   static int tipc_udp_is_mcast_addr(struct udp_media_addr *addr)
>> @@ -209,23 +218,79 @@ static int tipc_udp_send_msg(struct net *net, struct 
>> sk_buff *skb,
>>      if (skb_headroom(skb) < UDP_MIN_HEADROOM) {
>>              err = pskb_expand_head(skb, UDP_MIN_HEADROOM, 0, GFP_ATOMIC);
>>              if (err)
>> -                    goto tx_error;
>> +                    goto err_out;
>>      }
>>   
>>      skb_set_inner_protocol(skb, htons(ETH_P_TIPC));
>>      ub = rcu_dereference_rtnl(b->media_ptr);
>>      if (!ub) {
>>              err = -ENODEV;
>> -            goto tx_error;
>> +            goto err_out;
>> +    }
>> +
>> +    /* Replicast, send an skb to each configured IP address */
>> +    if (unlikely(addr->broadcast)) {
>> +            bool first = true;
>> +            struct udp_replicast *rcast;
>> +
>> +            list_for_each_entry_rcu(rcast, &ub->rcast.list, list) {
>> +                    struct sk_buff *_skb;
>> +
>> +                    /* Avoid one extra skb copy */
>> +                    if (first) {
>> +                            dst = &rcast->addr;
>> +                            first = false;
>> +                            continue;
>> +                    }
>> +
>> +                    _skb = pskb_copy(skb, GFP_ATOMIC);
>> +                    if (!_skb) {
>> +                            err = -ENOMEM;
>> +                            goto err_out;
>> +                    }
>> +
>> +                    err = tipc_udp_xmit(net, _skb, ub, src, &rcast->addr);
>> +                    if (err) {
>> +                            kfree_skb(_skb);
>> +                            goto err_out;
>> +                    }
>> +            }
> Slightly confusing. Why don't you return here? Even when the list is 
> non-empty you go on and send to the first given address, which may or 
> may not be a multicast address.
> It is probably correct, but the logics is not intuitive. Maybe some 
> refactoring and comments will help?
We avoid an extra skb clone.
The logic is as follows.

We always have an skb when we come here.
* If this is a multicast bearer, we skip the replicast code and send it
  as it is.
* If it's a peer-to-peer bearer we sent it to the first address in the
  replicast list, i.e. our peer.
* If it's a replicast bearer, we send a skb copy to all but the first
  peer, which gets the original skb. This way we avoid an extra
  skb_copy() and skb_free().

This is an effect of moving the peer-to-peer address from the
broadcast field and placing it in the replicast list, like you suggested
yesterday.

Regards
Richard

> 
> ///jon
> 
> 
>>      }
>>   
>>      return tipc_udp_xmit(net, skb, ub, src, dst);
>>   
>> -tx_error:
>> +err_out:
>>      kfree_skb(skb);
>>      return err;
>>   }
>>   
>> +static int tipc_udp_rcast_add(struct tipc_bearer *b,
>> +                          struct udp_media_addr *addr)
>> +{
>> +    struct udp_replicast *rcast;
>> +    struct udp_bearer *ub;
>> +
>> +    ub = rcu_dereference_rtnl(b->media_ptr);
>> +    if (!ub)
>> +            return -ENODEV;
>> +
>> +    rcast = kmalloc(sizeof(*rcast), GFP_ATOMIC);
>> +    if (!rcast)
>> +            return -ENOMEM;
>> +
>> +    memcpy(&rcast->addr, addr, sizeof(struct udp_media_addr));
>> +
>> +    if (ntohs(addr->proto) == ETH_P_IP)
>> +            pr_info("New replicast peer: %pI4\n", &rcast->addr.ipv4);
>> +#if IS_ENABLED(CONFIG_IPV6)
>> +    else if (ntohs(addr->proto) == ETH_P_IPV6)
>> +            pr_info("New replicast peer: %pI6\n", &rcast->addr.ipv6);
>> +#endif
>> +
>> +    list_add_rcu(&rcast->list, &ub->rcast.list);
>> +    return 0;
>> +}
>> +
>>   /* tipc_udp_recv - read data from bearer socket */
>>   static int tipc_udp_recv(struct sock *sk, struct sk_buff *skb)
>>   {
>> @@ -320,6 +385,32 @@ static int tipc_parse_udp_addr(struct nlattr *nla, 
>> struct udp_media_addr *addr,
>>      return -EADDRNOTAVAIL;
>>   }
>>   
>> +int tipc_udp_nl_bearer_add(struct tipc_bearer *b, struct nlattr *attr)
>> +{
>> +    int err;
>> +    struct udp_media_addr addr = {0};
>> +    struct nlattr *opts[TIPC_NLA_UDP_MAX + 1];
>> +    struct udp_media_addr *dst;
>> +
>> +    if (nla_parse_nested(opts, TIPC_NLA_UDP_MAX, attr, tipc_nl_udp_policy))
>> +            return -EINVAL;
>> +
>> +    if (!opts[TIPC_NLA_UDP_REMOTE])
>> +            return -EINVAL;
>> +
>> +    err = tipc_parse_udp_addr(opts[TIPC_NLA_UDP_REMOTE], &addr, NULL);
>> +    if (err)
>> +            return err;
>> +
>> +    dst = (struct udp_media_addr *)&b->bcast_addr.value;
>> +    if (tipc_udp_is_mcast_addr(dst)) {
>> +            pr_err("Can't add remote ip to TIPC UDP multicast bearer\n");
>> +            return -EINVAL;
>> +    }
>> +
>> +    return tipc_udp_rcast_add(b, &addr);
>> +}
>> +
>>   /**
>>    * tipc_udp_enable - callback to create a new udp bearer instance
>>    * @net:   network namespace
>> @@ -334,7 +425,7 @@ static int tipc_udp_enable(struct net *net, struct 
>> tipc_bearer *b,
>>   {
>>      int err = -EINVAL;
>>      struct udp_bearer *ub;
>> -    struct udp_media_addr *remote;
>> +    struct udp_media_addr remote = {0};
>>      struct udp_media_addr local = {0};
>>      struct udp_port_cfg udp_conf = {0};
>>      struct udp_tunnel_sock_cfg tuncfg = {NULL};
>> @@ -344,6 +435,8 @@ static int tipc_udp_enable(struct net *net, struct 
>> tipc_bearer *b,
>>      if (!ub)
>>              return -ENOMEM;
>>   
>> +    INIT_LIST_HEAD(&ub->rcast.list);
>> +
>>      if (!attrs[TIPC_NLA_BEARER_UDP_OPTS])
>>              goto err;
>>   
>> @@ -362,9 +455,7 @@ static int tipc_udp_enable(struct net *net, struct 
>> tipc_bearer *b,
>>      if (err)
>>              goto err;
>>   
>> -    remote = (struct udp_media_addr *)&b->bcast_addr.value;
>> -    memset(remote, 0, sizeof(struct udp_media_addr));
>> -    err = tipc_parse_udp_addr(opts[TIPC_NLA_UDP_REMOTE], remote, NULL);
>> +    err = tipc_parse_udp_addr(opts[TIPC_NLA_UDP_REMOTE], &remote, NULL);
>>      if (err)
>>              goto err;
>>   
>> @@ -409,10 +500,17 @@ static int tipc_udp_enable(struct net *net, struct 
>> tipc_bearer *b,
>>      tuncfg.encap_destroy = NULL;
>>      setup_udp_tunnel_sock(net, ub->ubsock, &tuncfg);
>>   
>> -    if (tipc_udp_is_mcast_addr(remote)) {
>> -            if (enable_mcast(ub, remote))
>> -                    goto err;
>> -    }
>> +    /**
>> +     * The bcast media address port is used for all peers and the ip
>> +     * is used if it's a multicast address.
>> +     */
>> +    memcpy(&b->bcast_addr.value, &remote, sizeof(remote));
>> +    if (tipc_udp_is_mcast_addr(&remote))
>> +            err = enable_mcast(ub, &remote);
>> +    else
>> +            err= tipc_udp_rcast_add(b, &remote);
>> +    if (err)
>> +            goto err;
>>   
>>      return 0;
>>   err:
>> @@ -424,6 +522,12 @@ err:
>>   static void cleanup_bearer(struct work_struct *work)
>>   {
>>      struct udp_bearer *ub = container_of(work, struct udp_bearer, work);
>> +    struct udp_replicast *rcast, *tmp;
>> +
>> +    list_for_each_entry_safe(rcast, tmp, &ub->rcast.list, list) {
>> +            list_del_rcu(&rcast->list);
>> +            kfree_rcu(rcast, rcu);
>> +    }
>>   
>>      if (ub->ubsock)
>>              udp_tunnel_sock_release(ub->ubsock);
>> diff --git a/net/tipc/udp_media.h b/net/tipc/udp_media.h
>> new file mode 100644
>> index 0000000..4dcb548
>> --- /dev/null
>> +++ b/net/tipc/udp_media.h
>> @@ -0,0 +1,44 @@
>> +/*
>> + * net/tipc/udp_media.h: Include file for UDP bearer media
>> + *
>> + * Copyright (c) 1996-2006, 2013-2016, Ericsson AB
>> + * Copyright (c) 2005, 2010-2011, Wind River Systems
>> + * All rights reserved.
>> + *
>> + * Redistribution and use in source and binary forms, with or without
>> + * modification, are permitted provided that the following conditions are 
>> met:
>> + *
>> + * 1. Redistributions of source code must retain the above copyright
>> + *    notice, this list of conditions and the following disclaimer.
>> + * 2. Redistributions in binary form must reproduce the above copyright
>> + *    notice, this list of conditions and the following disclaimer in the
>> + *    documentation and/or other materials provided with the distribution.
>> + * 3. Neither the names of the copyright holders nor the names of its
>> + *    contributors may be used to endorse or promote products derived from
>> + *    this software without specific prior written permission.
>> + *
>> + * Alternatively, this software may be distributed under the terms of the
>> + * GNU General Public License ("GPL") version 2 as published by the Free
>> + * Software Foundation.
>> + *
>> + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS 
>> IS"
>> + * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
>> + * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR 
>> PURPOSE
>> + * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
>> + * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
>> + * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
>> + * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
>> + * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
>> + * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
>> + * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF 
>> THE
>> + * POSSIBILITY OF SUCH DAMAGE.
>> + */
>> +
>> +#ifdef CONFIG_TIPC_MEDIA_UDP
>> +#ifndef _TIPC_UDP_MEDIA_H
>> +#define _TIPC_UDP_MEDIA_H
>> +
>> +int tipc_udp_nl_bearer_add(struct tipc_bearer *b, struct nlattr *attr);
>> +
>> +#endif
>> +#endif
> 
> 
> ------------------------------------------------------------------------------
> _______________________________________________
> tipc-discussion mailing list
> tipc-discussion@lists.sourceforge.net
> https://lists.sourceforge.net/lists/listinfo/tipc-discussion
> 


------------------------------------------------------------------------------
_______________________________________________
tipc-discussion mailing list
tipc-discussion@lists.sourceforge.net
https://lists.sourceforge.net/lists/listinfo/tipc-discussion

Reply via email to