On 05/23/2018 10:38 PM, GhantaKrishnamurthy MohanKrishna wrote:
> A peer node is considered down if there are no
> active links (or) lost contact to the node. In current implementation,
> a peer node instance is deleted either if
> a) TIPC module is removed (or)
> b) Application can use a netlink/iproute2 interface to delete a
> specific down node.
> 
> Thus, a down node instance lives in the system forever, unless the
> application explicitly removes it.
> 
> We fix this by deleting the nodes which are down for
> a specified amount of time (5 minutes).
> Existing node supervision timer is used to achieve this.
> 
> Signed-off-by: GhantaKrishnamurthy MohanKrishna 
> <[email protected]>
> ---
>  net/tipc/node.c | 77 
> ++++++++++++++++++++++++++++++++++++++++++++++++---------
>  1 file changed, 66 insertions(+), 11 deletions(-)
> 
> diff --git a/net/tipc/node.c b/net/tipc/node.c
> index b71e4e376bb9..e08f78a72f03 100644
> --- a/net/tipc/node.c
> +++ b/net/tipc/node.c
> @@ -45,6 +45,7 @@
>  #include "netlink.h"
>  
>  #define INVALID_NODE_SIG     0x10000
> +#define NODE_CLEANUP_AFTER   0x493E0
>  
>  /* Flags used to take different actions according to flag type
>   * TIPC_NOTIFY_NODE_DOWN: notify node is down
> @@ -96,6 +97,7 @@ struct tipc_bclink_entry {
>   * @link_id: local and remote bearer ids of changing link, if any
>   * @publ_list: list of publications
>   * @rcu: rcu struct for tipc_node
> + * @down_at: indicates the time for deleting a down node
>   */
>  struct tipc_node {
>       u32 addr;
> @@ -121,6 +123,7 @@ struct tipc_node {
>       unsigned long keepalive_intv;
>       struct timer_list timer;
>       struct rcu_head rcu;
> +     unsigned long down_at;
>  };
>  
>  /* Node FSM states and events:
> @@ -160,6 +163,7 @@ static struct tipc_node *tipc_node_find(struct net *net, 
> u32 addr);
>  static struct tipc_node *tipc_node_find_by_id(struct net *net, u8 *id);
>  static void tipc_node_put(struct tipc_node *node);
>  static bool node_is_up(struct tipc_node *n);
> +static void tipc_node_delete_from_list(struct tipc_node *node);
>  
>  struct tipc_sock_conn {
>       u32 port;
> @@ -369,6 +373,7 @@ static struct tipc_node *tipc_node_create(struct net 
> *net, u32 addr,
>       for (i = 0; i < MAX_BEARERS; i++)
>               spin_lock_init(&n->links[i].lock);
>       n->state = SELF_DOWN_PEER_LEAVING;
> +     n->down_at = jiffies + msecs_to_jiffies(NODE_CLEANUP_AFTER);
>       n->signature = INVALID_NODE_SIG;
>       n->active_links[0] = INVALID_BEARER_ID;
>       n->active_links[1] = INVALID_BEARER_ID;
> @@ -412,11 +417,16 @@ static void tipc_node_calculate_timer(struct tipc_node 
> *n, struct tipc_link *l)
>       tipc_link_set_abort_limit(l, tol / n->keepalive_intv);
>  }
>  
> -static void tipc_node_delete(struct tipc_node *node)
> +static void tipc_node_delete_from_list(struct tipc_node *node)
>  {
>       list_del_rcu(&node->list);
>       hlist_del_rcu(&node->hash);
>       tipc_node_put(node);
> +}
> +
> +static void tipc_node_delete(struct tipc_node *node)
> +{
> +     tipc_node_delete_from_list(node);
>  
>       del_timer_sync(&node->timer);
>       tipc_node_put(node);
> @@ -523,6 +533,53 @@ void tipc_node_remove_conn(struct net *net, u32 dnode, 
> u32 port)
>       tipc_node_put(node);
>  }
>  
> +static void  tipc_node_clear_links(struct tipc_node *node)
> +{
> +     int i;
> +
> +     for (i = 0; i < MAX_BEARERS; i++) {
> +             struct tipc_link_entry *le = &node->links[i];
> +
> +             if (le->link) {
> +                     kfree(le->link);
> +                     le->link = NULL;
> +                     node->link_cnt--;
> +             }
> +     }
> +}
> +
> +/* tipc_node_cleanup - delete nodes that does not
> + * have active links for NODE_CLEANUP_AFTER time
> + */
> +static int tipc_node_cleanup(struct tipc_node *peer)
> +{
> +     struct tipc_net *tn = tipc_net(peer->net);
> +     int err = 0;
> +
> +     spin_lock_bh(&tn->node_list_lock);
> +     tipc_node_write_lock(peer);

When we detect whether a node can be automatically deleted, we hold
node_list_lock and node write lock at the same time. In most of time,
the below two statements are false. So the detection is quite expensive.

I want to optimize this, for instance, we can just hold node read lock
while detecting node state. But finally I find it's unsafe as there are
two concurrent threads in which a node might be removed. One is that a
node can be deleted in node timeout function, and another is in
tipc_nl_peer_rm(). Moreover, the latter can be interrupted by the
former. So the we really need hold node_list_lock in the whole
tipc_node_cleanup().

Anyway, thanks for your contribution, and this patch is fine with me.

Acked-by: Ying Xue <[email protected]>

> +
> +     if (node_is_up(peer)) {
> +             tipc_node_write_unlock(peer);
> +             err = -EBUSY;
> +             goto err_out;
> +     }
> +
> +     if (!time_after(jiffies, peer->down_at)) {
> +             tipc_node_write_unlock(peer);
> +             err = -EBUSY;
> +             goto err_out;
> +     }
> +
> +     tipc_node_clear_links(peer);
> +     tipc_node_write_unlock(peer);
> +     tipc_node_delete_from_list(peer);
> +
> +err_out:
> +     spin_unlock_bh(&tn->node_list_lock);
> +     return err;
> +}
> +
>  /* tipc_node_timeout - handle expiration of node timer
>   */
>  static void tipc_node_timeout(struct timer_list *t)
> @@ -533,6 +590,12 @@ static void tipc_node_timeout(struct timer_list *t)
>       int bearer_id;
>       int rc = 0;
>  
> +     if (!tipc_node_cleanup(n)) {
> +             /*Removing the reference of Timer*/
> +             tipc_node_put(n);
> +             return;
> +     }
> +
>       __skb_queue_head_init(&xmitq);
>  
>       for (bearer_id = 0; bearer_id < MAX_BEARERS; bearer_id++) {
> @@ -1150,6 +1213,7 @@ static void node_lost_contact(struct tipc_node *n,
>       uint i;
>  
>       pr_debug("Lost contact with %x\n", n->addr);
> +     n->down_at = jiffies + msecs_to_jiffies(NODE_CLEANUP_AFTER);
>  
>       /* Clean up broadcast state */
>       tipc_bcast_remove_peer(n->net, n->bc_entry.link);
> @@ -1719,7 +1783,6 @@ int tipc_nl_peer_rm(struct sk_buff *skb, struct 
> genl_info *info)
>       struct tipc_node *peer;
>       u32 addr;
>       int err;
> -     int i;
>  
>       /* We identify the peer by its net */
>       if (!info->attrs[TIPC_NLA_NET])
> @@ -1754,15 +1817,7 @@ int tipc_nl_peer_rm(struct sk_buff *skb, struct 
> genl_info *info)
>               goto err_out;
>       }
>  
> -     for (i = 0; i < MAX_BEARERS; i++) {
> -             struct tipc_link_entry *le = &peer->links[i];
> -
> -             if (le->link) {
> -                     kfree(le->link);
> -                     le->link = NULL;
> -                     peer->link_cnt--;
> -             }
> -     }
> +     tipc_node_clear_links(peer);
>       tipc_node_write_unlock(peer);
>       tipc_node_delete(peer);
>  
> 

------------------------------------------------------------------------------
Check out the vibrant tech community on one of the world's most
engaging tech sites, Slashdot.org! http://sdm.link/slashdot
_______________________________________________
tipc-discussion mailing list
[email protected]
https://lists.sourceforge.net/lists/listinfo/tipc-discussion

Reply via email to