On 05/23/2018 10:38 PM, GhantaKrishnamurthy MohanKrishna wrote: > A peer node is considered down if there are no > active links (or) lost contact to the node. In current implementation, > a peer node instance is deleted either if > a) TIPC module is removed (or) > b) Application can use a netlink/iproute2 interface to delete a > specific down node. > > Thus, a down node instance lives in the system forever, unless the > application explicitly removes it. > > We fix this by deleting the nodes which are down for > a specified amount of time (5 minutes). > Existing node supervision timer is used to achieve this. > > Signed-off-by: GhantaKrishnamurthy MohanKrishna > <[email protected]> > --- > net/tipc/node.c | 77 > ++++++++++++++++++++++++++++++++++++++++++++++++--------- > 1 file changed, 66 insertions(+), 11 deletions(-) > > diff --git a/net/tipc/node.c b/net/tipc/node.c > index b71e4e376bb9..e08f78a72f03 100644 > --- a/net/tipc/node.c > +++ b/net/tipc/node.c > @@ -45,6 +45,7 @@ > #include "netlink.h" > > #define INVALID_NODE_SIG 0x10000 > +#define NODE_CLEANUP_AFTER 0x493E0 > > /* Flags used to take different actions according to flag type > * TIPC_NOTIFY_NODE_DOWN: notify node is down > @@ -96,6 +97,7 @@ struct tipc_bclink_entry { > * @link_id: local and remote bearer ids of changing link, if any > * @publ_list: list of publications > * @rcu: rcu struct for tipc_node > + * @down_at: indicates the time for deleting a down node > */ > struct tipc_node { > u32 addr; > @@ -121,6 +123,7 @@ struct tipc_node { > unsigned long keepalive_intv; > struct timer_list timer; > struct rcu_head rcu; > + unsigned long down_at; > }; > > /* Node FSM states and events: > @@ -160,6 +163,7 @@ static struct tipc_node *tipc_node_find(struct net *net, > u32 addr); > static struct tipc_node *tipc_node_find_by_id(struct net *net, u8 *id); > static void tipc_node_put(struct tipc_node *node); > static bool node_is_up(struct tipc_node *n); > +static void tipc_node_delete_from_list(struct tipc_node *node); > > struct tipc_sock_conn { > u32 port; > @@ -369,6 +373,7 @@ static struct tipc_node *tipc_node_create(struct net > *net, u32 addr, > for (i = 0; i < MAX_BEARERS; i++) > spin_lock_init(&n->links[i].lock); > n->state = SELF_DOWN_PEER_LEAVING; > + n->down_at = jiffies + msecs_to_jiffies(NODE_CLEANUP_AFTER); > n->signature = INVALID_NODE_SIG; > n->active_links[0] = INVALID_BEARER_ID; > n->active_links[1] = INVALID_BEARER_ID; > @@ -412,11 +417,16 @@ static void tipc_node_calculate_timer(struct tipc_node > *n, struct tipc_link *l) > tipc_link_set_abort_limit(l, tol / n->keepalive_intv); > } > > -static void tipc_node_delete(struct tipc_node *node) > +static void tipc_node_delete_from_list(struct tipc_node *node) > { > list_del_rcu(&node->list); > hlist_del_rcu(&node->hash); > tipc_node_put(node); > +} > + > +static void tipc_node_delete(struct tipc_node *node) > +{ > + tipc_node_delete_from_list(node); > > del_timer_sync(&node->timer); > tipc_node_put(node); > @@ -523,6 +533,53 @@ void tipc_node_remove_conn(struct net *net, u32 dnode, > u32 port) > tipc_node_put(node); > } > > +static void tipc_node_clear_links(struct tipc_node *node) > +{ > + int i; > + > + for (i = 0; i < MAX_BEARERS; i++) { > + struct tipc_link_entry *le = &node->links[i]; > + > + if (le->link) { > + kfree(le->link); > + le->link = NULL; > + node->link_cnt--; > + } > + } > +} > + > +/* tipc_node_cleanup - delete nodes that does not > + * have active links for NODE_CLEANUP_AFTER time > + */ > +static int tipc_node_cleanup(struct tipc_node *peer) > +{ > + struct tipc_net *tn = tipc_net(peer->net); > + int err = 0; > + > + spin_lock_bh(&tn->node_list_lock); > + tipc_node_write_lock(peer);
When we detect whether a node can be automatically deleted, we hold node_list_lock and node write lock at the same time. In most of time, the below two statements are false. So the detection is quite expensive. I want to optimize this, for instance, we can just hold node read lock while detecting node state. But finally I find it's unsafe as there are two concurrent threads in which a node might be removed. One is that a node can be deleted in node timeout function, and another is in tipc_nl_peer_rm(). Moreover, the latter can be interrupted by the former. So the we really need hold node_list_lock in the whole tipc_node_cleanup(). Anyway, thanks for your contribution, and this patch is fine with me. Acked-by: Ying Xue <[email protected]> > + > + if (node_is_up(peer)) { > + tipc_node_write_unlock(peer); > + err = -EBUSY; > + goto err_out; > + } > + > + if (!time_after(jiffies, peer->down_at)) { > + tipc_node_write_unlock(peer); > + err = -EBUSY; > + goto err_out; > + } > + > + tipc_node_clear_links(peer); > + tipc_node_write_unlock(peer); > + tipc_node_delete_from_list(peer); > + > +err_out: > + spin_unlock_bh(&tn->node_list_lock); > + return err; > +} > + > /* tipc_node_timeout - handle expiration of node timer > */ > static void tipc_node_timeout(struct timer_list *t) > @@ -533,6 +590,12 @@ static void tipc_node_timeout(struct timer_list *t) > int bearer_id; > int rc = 0; > > + if (!tipc_node_cleanup(n)) { > + /*Removing the reference of Timer*/ > + tipc_node_put(n); > + return; > + } > + > __skb_queue_head_init(&xmitq); > > for (bearer_id = 0; bearer_id < MAX_BEARERS; bearer_id++) { > @@ -1150,6 +1213,7 @@ static void node_lost_contact(struct tipc_node *n, > uint i; > > pr_debug("Lost contact with %x\n", n->addr); > + n->down_at = jiffies + msecs_to_jiffies(NODE_CLEANUP_AFTER); > > /* Clean up broadcast state */ > tipc_bcast_remove_peer(n->net, n->bc_entry.link); > @@ -1719,7 +1783,6 @@ int tipc_nl_peer_rm(struct sk_buff *skb, struct > genl_info *info) > struct tipc_node *peer; > u32 addr; > int err; > - int i; > > /* We identify the peer by its net */ > if (!info->attrs[TIPC_NLA_NET]) > @@ -1754,15 +1817,7 @@ int tipc_nl_peer_rm(struct sk_buff *skb, struct > genl_info *info) > goto err_out; > } > > - for (i = 0; i < MAX_BEARERS; i++) { > - struct tipc_link_entry *le = &peer->links[i]; > - > - if (le->link) { > - kfree(le->link); > - le->link = NULL; > - peer->link_cnt--; > - } > - } > + tipc_node_clear_links(peer); > tipc_node_write_unlock(peer); > tipc_node_delete(peer); > > ------------------------------------------------------------------------------ Check out the vibrant tech community on one of the world's most engaging tech sites, Slashdot.org! http://sdm.link/slashdot _______________________________________________ tipc-discussion mailing list [email protected] https://lists.sourceforge.net/lists/listinfo/tipc-discussion
