[PATCH net v1 1/1] tipc: fix hanging poll() for stream sockets
In commit 42b531de17d2f6 ("tipc: Fix missing connection request handling"), we replaced unconditional wakeup() with condtional wakeup for clients with flags POLLIN | POLLRDNORM | POLLRDBAND. This breaks the applications which do a connect followed by poll with POLLOUT flag. These applications are not woken when the connection is ESTABLISHED and hence sleep forever. In this commit, we fix it by including the POLLOUT event for sockets in TIPC_CONNECTING state. Fixes: 42b531de17d2f6 ("tipc: Fix missing connection request handling") Acked-by: Jon Maloy Signed-off-by: Parthasarathy Bhuvaragan --- net/tipc/socket.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/net/tipc/socket.c b/net/tipc/socket.c index 41127d0b925e..3b4084480377 100644 --- a/net/tipc/socket.c +++ b/net/tipc/socket.c @@ -727,11 +727,11 @@ static unsigned int tipc_poll(struct file *file, struct socket *sock, switch (sk->sk_state) { case TIPC_ESTABLISHED: + case TIPC_CONNECTING: if (!tsk->cong_link_cnt && !tsk_conn_cong(tsk)) revents |= POLLOUT; /* fall thru' */ case TIPC_LISTEN: - case TIPC_CONNECTING: if (!skb_queue_empty(&sk->sk_receive_queue)) revents |= POLLIN | POLLRDNORM; break; -- 2.11.0
[PATCH net v1 1/1] tipc: use only positive error codes in messages
In commit e3a77561e7d32 ("tipc: split up function tipc_msg_eval()"), we have updated the function tipc_msg_lookup_dest() to set the error codes to negative values at destination lookup failures. Thus when the function sets the error code to -TIPC_ERR_NO_NAME, its inserted into the 4 bit error field of the message header as 0xf instead of TIPC_ERR_NO_NAME (1). The value 0xf is an unknown error code. In this commit, we set only positive error code. Fixes: e3a77561e7d32 ("tipc: split up function tipc_msg_eval()") Signed-off-by: Parthasarathy Bhuvaragan --- net/tipc/msg.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/net/tipc/msg.c b/net/tipc/msg.c index 6ef379f004ac..121e59a1d0e7 100644 --- a/net/tipc/msg.c +++ b/net/tipc/msg.c @@ -551,7 +551,7 @@ bool tipc_msg_lookup_dest(struct net *net, struct sk_buff *skb, int *err) return false; if (msg_errcode(msg)) return false; - *err = -TIPC_ERR_NO_NAME; + *err = TIPC_ERR_NO_NAME; if (skb_linearize(skb)) return false; msg = buf_msg(skb); -- 2.1.4
[PATCH net v1 1/1] tipc: permit bond slave as bearer
For a bond slave device as a tipc bearer, the dev represents the bond interface and orig_dev represents the slave in tipc_l2_rcv_msg(). Since we decode the tipc_ptr from bonding device (dev), we fail to find the bearer and thus tipc links are not established. In this commit, we register the tipc protocol callback per device and look for tipc bearer from both the devices. Signed-off-by: Parthasarathy Bhuvaragan --- net/tipc/bearer.c | 26 +++--- net/tipc/bearer.h | 2 ++ 2 files changed, 13 insertions(+), 15 deletions(-) diff --git a/net/tipc/bearer.c b/net/tipc/bearer.c index 767e0537dde5..89cd061c4468 100644 --- a/net/tipc/bearer.c +++ b/net/tipc/bearer.c @@ -65,6 +65,8 @@ static struct tipc_bearer *bearer_get(struct net *net, int bearer_id) } static void bearer_disable(struct net *net, struct tipc_bearer *b); +static int tipc_l2_rcv_msg(struct sk_buff *skb, struct net_device *dev, + struct packet_type *pt, struct net_device *orig_dev); /** * tipc_media_find - locates specified media object by name @@ -428,6 +430,10 @@ int tipc_enable_l2_media(struct net *net, struct tipc_bearer *b, /* Associate TIPC bearer with L2 bearer */ rcu_assign_pointer(b->media_ptr, dev); + b->pt.dev = dev; + b->pt.type = htons(ETH_P_TIPC); + b->pt.func = tipc_l2_rcv_msg; + dev_add_pack(&b->pt); memset(&b->bcast_addr, 0, sizeof(b->bcast_addr)); memcpy(b->bcast_addr.value, dev->broadcast, b->media->hwaddr_len); b->bcast_addr.media_id = b->media->type_id; @@ -447,6 +453,7 @@ void tipc_disable_l2_media(struct tipc_bearer *b) struct net_device *dev; dev = (struct net_device *)rtnl_dereference(b->media_ptr); + dev_remove_pack(&b->pt); RCU_INIT_POINTER(dev->tipc_ptr, NULL); synchronize_net(); dev_put(dev); @@ -594,11 +601,12 @@ static int tipc_l2_rcv_msg(struct sk_buff *skb, struct net_device *dev, struct tipc_bearer *b; rcu_read_lock(); - b = rcu_dereference_rtnl(dev->tipc_ptr); + b = rcu_dereference_rtnl(dev->tipc_ptr) ?: + rcu_dereference_rtnl(orig_dev->tipc_ptr); if (likely(b && test_bit(0, &b->up) && (skb->pkt_type <= PACKET_MULTICAST))) { skb->next = NULL; - tipc_rcv(dev_net(dev), skb, b); + tipc_rcv(dev_net(b->pt.dev), skb, b); rcu_read_unlock(); return NET_RX_SUCCESS; } @@ -659,11 +667,6 @@ static int tipc_l2_device_event(struct notifier_block *nb, unsigned long evt, return NOTIFY_OK; } -static struct packet_type tipc_packet_type __read_mostly = { - .type = htons(ETH_P_TIPC), - .func = tipc_l2_rcv_msg, -}; - static struct notifier_block notifier = { .notifier_call = tipc_l2_device_event, .priority = 0, @@ -671,19 +674,12 @@ static struct notifier_block notifier = { int tipc_bearer_setup(void) { - int err; - - err = register_netdevice_notifier(¬ifier); - if (err) - return err; - dev_add_pack(&tipc_packet_type); - return 0; + return register_netdevice_notifier(¬ifier); } void tipc_bearer_cleanup(void) { unregister_netdevice_notifier(¬ifier); - dev_remove_pack(&tipc_packet_type); } void tipc_bearer_stop(struct net *net) diff --git a/net/tipc/bearer.h b/net/tipc/bearer.h index 635c9086e19a..e07a55a80c18 100644 --- a/net/tipc/bearer.h +++ b/net/tipc/bearer.h @@ -131,6 +131,7 @@ struct tipc_media { * @name: bearer name (format = media:interface) * @media: ptr to media structure associated with bearer * @bcast_addr: media address used in broadcasting + * @pt: packet type for bearer * @rcu: rcu struct for tipc_bearer * @priority: default link priority for bearer * @window: default window size for bearer @@ -151,6 +152,7 @@ struct tipc_bearer { char name[TIPC_MAX_BEARER_NAME]; struct tipc_media *media; struct tipc_media_addr bcast_addr; + struct packet_type pt; struct rcu_head rcu; u32 priority; u32 window; -- 2.1.4
[PATCH net v1 3/3] tipc: context imbalance at node read unlock
If we fail to find a valid bearer in tipc_node_get_linkname(), node_read_unlock() is called without holding the node read lock. This commit fixes this error. Signed-off-by: Parthasarathy Bhuvaragan --- net/tipc/node.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/net/tipc/node.c b/net/tipc/node.c index b113a52f8914..7dd22330a6b4 100644 --- a/net/tipc/node.c +++ b/net/tipc/node.c @@ -1126,8 +1126,8 @@ int tipc_node_get_linkname(struct net *net, u32 bearer_id, u32 addr, strncpy(linkname, tipc_link_name(link), len); err = 0; } -exit: tipc_node_read_unlock(node); +exit: tipc_node_put(node); return err; } -- 2.1.4
[PATCH net v1 2/3] tipc: reassign pointers after skb reallocation / linearization
In tipc_msg_reverse(), we assign skb attributes to local pointers in stack at startup. This is followed by skb_linearize() and for cloned buffers we perform skb relocation using pskb_expand_head(). Both these methods may update the skb attributes and thus making the pointers incorrect. In this commit, we fix this error by ensuring that the pointers are re-assigned after any of these skb operations. Fixes: 29042e19f2c60 ("tipc: let function tipc_msg_reverse() expand header when needed") Signed-off-by: Parthasarathy Bhuvaragan Reviewed-by: Jon Maloy --- net/tipc/msg.c | 7 +-- 1 file changed, 5 insertions(+), 2 deletions(-) diff --git a/net/tipc/msg.c b/net/tipc/msg.c index dcd90e6fa7c3..6ef379f004ac 100644 --- a/net/tipc/msg.c +++ b/net/tipc/msg.c @@ -479,13 +479,14 @@ bool tipc_msg_make_bundle(struct sk_buff **skb, struct tipc_msg *msg, bool tipc_msg_reverse(u32 own_node, struct sk_buff **skb, int err) { struct sk_buff *_skb = *skb; - struct tipc_msg *hdr = buf_msg(_skb); + struct tipc_msg *hdr; struct tipc_msg ohdr; - int dlen = min_t(uint, msg_data_sz(hdr), MAX_FORWARD_SIZE); + int dlen; if (skb_linearize(_skb)) goto exit; hdr = buf_msg(_skb); + dlen = min_t(uint, msg_data_sz(hdr), MAX_FORWARD_SIZE); if (msg_dest_droppable(hdr)) goto exit; if (msg_errcode(hdr)) @@ -511,6 +512,8 @@ bool tipc_msg_reverse(u32 own_node, struct sk_buff **skb, int err) pskb_expand_head(_skb, BUF_HEADROOM, BUF_TAILROOM, GFP_ATOMIC)) goto exit; + /* reassign after skb header modifications */ + hdr = buf_msg(_skb); /* Now reverse the concerned fields */ msg_set_errcode(hdr, err); msg_set_non_seq(hdr, 0); -- 2.1.4
[PATCH net v1 1/3] tipc: perform skb_linearize() before parsing the inner header
In tipc_rcv(), we linearize only the header and usually the packets are consumed as the nodes permit direct reception. However, if the skb contains tunnelled message due to fail over or synchronization we parse it in tipc_node_check_state() without performing linearization. This will cause link disturbances if the skb was non linear. In this commit, we perform linearization for the above messages. Signed-off-by: Parthasarathy Bhuvaragan Reviewed-by: Jon Maloy --- net/tipc/node.c | 2 ++ 1 file changed, 2 insertions(+) diff --git a/net/tipc/node.c b/net/tipc/node.c index 9b4dcb6a16b5..b113a52f8914 100644 --- a/net/tipc/node.c +++ b/net/tipc/node.c @@ -1557,6 +1557,8 @@ void tipc_rcv(struct net *net, struct sk_buff *skb, struct tipc_bearer *b) /* Check/update node state before receiving */ if (unlikely(skb)) { + if (unlikely(skb_linearize(skb))) + goto discard; tipc_node_write_lock(n); if (tipc_node_check_state(n, skb, bearer_id, &xmitq)) { if (le->link) { -- 2.1.4
[PATCH net v1 0/3] tipc: buffer reassignment fixes
This series contains fixes for buffer reassignments and a context imbalance. Parthasarathy Bhuvaragan (3): tipc: perform skb_linearize() before parsing the inner header tipc: reassign pointers after skb reallocation / linearization tipc: context imbalance at node read unlock net/tipc/msg.c | 7 +-- net/tipc/node.c | 4 +++- 2 files changed, 8 insertions(+), 3 deletions(-) -- 2.1.4
[PATCH net v1 2/2] tipc: fix a race condition of releasing subscriber object
From: Ying Xue No matter whether a request is inserted into workqueue as a work item to cancel a subscription or to delete a subscription's subscriber asynchronously, the work items may be executed in different workers. As a result, it doesn't mean that one request which is raised prior to another request is definitely handled before the latter. By contrast, if the latter request is executed before the former request, below error may happen: [ 656.183644] BUG: spinlock bad magic on CPU#0, kworker/u8:0/12117 [ 656.184487] general protection fault: [#1] SMP [ 656.185160] Modules linked in: tipc ip6_udp_tunnel udp_tunnel 9pnet_virtio 9p 9pnet virtio_net virtio_pci virtio_ring virtio [last unloaded: ip6_udp_tunnel] [ 656.187003] CPU: 0 PID: 12117 Comm: kworker/u8:0 Not tainted 4.11.0-rc7+ #6 [ 656.187920] Hardware name: Bochs Bochs, BIOS Bochs 01/01/2011 [ 656.188690] Workqueue: tipc_rcv tipc_recv_work [tipc] [ 656.189371] task: 88003f5cec40 task.stack: c90004448000 [ 656.190157] RIP: 0010:spin_bug+0xdd/0xf0 [ 656.190678] RSP: 0018:c9000444bcb8 EFLAGS: 00010202 [ 656.191375] RAX: 0034 RBX: 88003f8d1388 RCX: [ 656.192321] RDX: 88003ba13708 RSI: 88003ba0cd08 RDI: 88003ba0cd08 [ 656.193265] RBP: c9000444bcd0 R08: 0030 R09: 6b6b6b6b [ 656.194208] R10: 8800bde3e000 R11: 01b4 R12: 6b6b6b6b6b6b6b6b [ 656.195157] R13: 81a3ca64 R14: 88003f8d1388 R15: 88003f8d13a0 [ 656.196101] FS: () GS:88003ba0() knlGS: [ 656.197172] CS: 0010 DS: ES: CR0: 80050033 [ 656.197935] CR2: 7f0b3d2e6000 CR3: 3ef9e000 CR4: 06f0 [ 656.198873] Call Trace: [ 656.199210] do_raw_spin_lock+0x66/0xa0 [ 656.199735] _raw_spin_lock_bh+0x19/0x20 [ 656.200258] tipc_subscrb_subscrp_delete+0x28/0xf0 [tipc] [ 656.200990] tipc_subscrb_rcv_cb+0x45/0x260 [tipc] [ 656.201632] tipc_receive_from_sock+0xaf/0x100 [tipc] [ 656.202299] tipc_recv_work+0x2b/0x60 [tipc] [ 656.202872] process_one_work+0x157/0x420 [ 656.203404] worker_thread+0x69/0x4c0 [ 656.203898] kthread+0x138/0x170 [ 656.204328] ? process_one_work+0x420/0x420 [ 656.204889] ? kthread_create_on_node+0x40/0x40 [ 656.205527] ret_from_fork+0x29/0x40 [ 656.206012] Code: 48 8b 0c 25 00 c5 00 00 48 c7 c7 f0 24 a3 81 48 81 c1 f0 05 00 00 65 8b 15 61 ef f5 7e e8 9a 4c 09 00 4d 85 e4 44 8b 4b 08 74 92 <45> 8b 84 24 40 04 00 00 49 8d 8c 24 f0 05 00 00 eb 8d 90 0f 1f [ 656.208504] RIP: spin_bug+0xdd/0xf0 RSP: c9000444bcb8 [ 656.209798] ---[ end trace e2a800e6eb0770be ]--- In above scenario, the request of deleting subscriber was performed earlier than the request of canceling a subscription although the latter was issued before the former, which means tipc_subscrb_delete() was called before tipc_subscrp_cancel(). As a result, when tipc_subscrb_subscrp_delete() called by tipc_subscrp_cancel() was executed to cancel a subscription, the subscription's subscriber refcnt had been decreased to 1. After tipc_subscrp_delete() where the subscriber was freed because its refcnt was decremented to zero, but the subscriber's lock had to be released, as a consequence, panic happened. By contrast, if we increase subscriber's refcnt before tipc_subscrb_subscrp_delete() is called in tipc_subscrp_cancel(), the panic issue can be avoided. Fixes: d094c4d5f5c7 ("tipc: add subscription refcount to avoid invalid delete") Reported-by: Parthasarathy Bhuvaragan Signed-off-by: Ying Xue --- net/tipc/subscr.c | 2 ++ 1 file changed, 2 insertions(+) diff --git a/net/tipc/subscr.c b/net/tipc/subscr.c index f2c81f42dfda..be3d9e3183dc 100644 --- a/net/tipc/subscr.c +++ b/net/tipc/subscr.c @@ -242,7 +242,9 @@ static void tipc_subscrb_delete(struct tipc_subscriber *subscriber) static void tipc_subscrp_cancel(struct tipc_subscr *s, struct tipc_subscriber *subscriber) { + tipc_subscrb_get(subscriber); tipc_subscrb_subscrp_delete(subscriber, s); + tipc_subscrb_put(subscriber); } static struct tipc_subscription *tipc_subscrp_create(struct net *net, -- 2.1.4
[PATCH net v1 1/2] tipc: remove subscription references only for pending timers
In commit, 139bb36f754a ("tipc: advance the time of deleting subscription from subscriber->subscrp_list"), we delete the subscription from the subscribers list and from nametable unconditionally. This leads to the following bug if the timer running tipc_subscrp_timeout() in another CPU accesses the subscription list after the subscription delete request. [39.570] general protection fault: [#1] SMP :: [39.574] task: 81c10540 task.stack: 81c0 [39.575] RIP: 0010:tipc_subscrp_timeout+0x32/0x80 [tipc] [39.576] RSP: 0018:88003ba03e90 EFLAGS: 00010282 [39.576] RAX: dead0200 RBX: 88003f0f3600 RCX: 0101 [39.577] RDX: dead0100 RSI: 0201 RDI: 88003f0d7948 [39.578] RBP: 88003ba03ea0 R08: 0001 R09: 88003ba03ef8 [39.579] R10: 014f R11: R12: 88003f0d7948 [39.580] R13: 88003f0f3618 R14: a006c250 R15: 88003f0f3600 [39.581] FS: () GS:88003ba0() knlGS: [39.582] CS: 0010 DS: ES: CR0: 80050033 [39.583] CR2: 7f831c6e0714 CR3: 3d3b CR4: 06f0 [39.584] Call Trace: [39.584] [39.585] call_timer_fn+0x3d/0x180 [39.585] ? tipc_subscrb_rcv_cb+0x260/0x260 [tipc] [39.586] run_timer_softirq+0x168/0x1f0 [39.586] ? sched_clock_cpu+0x16/0xc0 [39.587] __do_softirq+0x9b/0x2de [39.587] irq_exit+0x60/0x70 [39.588] smp_apic_timer_interrupt+0x3d/0x50 [39.588] apic_timer_interrupt+0x86/0x90 [39.589] RIP: 0010:default_idle+0x20/0xf0 [39.589] RSP: 0018:81c03e58 EFLAGS: 0246 ORIG_RAX: ff10 [39.590] RAX: RBX: 81c10540 RCX: [39.591] RDX: RSI: RDI: [39.592] RBP: 81c03e68 R08: R09: [39.593] R10: c90001cbbe00 R11: R12: [39.594] R13: 81c10540 R14: R15: [39.595] :: [39.603] RIP: tipc_subscrp_timeout+0x32/0x80 [tipc] RSP: 88003ba03e90 [39.604] ---[ end trace 79ce94b7216cb459 ]--- Fixes: 139bb36f754a ("tipc: advance the time of deleting subscription from subscriber->subscrp_list") Signed-off-by: Parthasarathy Bhuvaragan --- net/tipc/subscr.c | 19 +++ 1 file changed, 7 insertions(+), 12 deletions(-) diff --git a/net/tipc/subscr.c b/net/tipc/subscr.c index 0bf91cd3733c..f2c81f42dfda 100644 --- a/net/tipc/subscr.c +++ b/net/tipc/subscr.c @@ -52,7 +52,6 @@ struct tipc_subscriber { struct list_head subscrp_list; }; -static void tipc_subscrp_delete(struct tipc_subscription *sub); static void tipc_subscrb_put(struct tipc_subscriber *subscriber); /** @@ -197,15 +196,19 @@ static void tipc_subscrb_subscrp_delete(struct tipc_subscriber *subscriber, { struct list_head *subscription_list = &subscriber->subscrp_list; struct tipc_subscription *sub, *temp; + u32 timeout; spin_lock_bh(&subscriber->lock); list_for_each_entry_safe(sub, temp, subscription_list, subscrp_list) { if (s && memcmp(s, &sub->evt.s, sizeof(struct tipc_subscr))) continue; - tipc_nametbl_unsubscribe(sub); - list_del(&sub->subscrp_list); - tipc_subscrp_delete(sub); + timeout = htohl(sub->evt.s.timeout, sub->swap); + if (timeout == TIPC_WAIT_FOREVER || del_timer(&sub->timer)) { + tipc_nametbl_unsubscribe(sub); + list_del(&sub->subscrp_list); + tipc_subscrp_put(sub); + } if (s) break; @@ -236,14 +239,6 @@ static void tipc_subscrb_delete(struct tipc_subscriber *subscriber) tipc_subscrb_put(subscriber); } -static void tipc_subscrp_delete(struct tipc_subscription *sub) -{ - u32 timeout = htohl(sub->evt.s.timeout, sub->swap); - - if (timeout == TIPC_WAIT_FOREVER || del_timer(&sub->timer)) - tipc_subscrp_put(sub); -} - static void tipc_subscrp_cancel(struct tipc_subscr *s, struct tipc_subscriber *subscriber) { -- 2.1.4
[PATCH net v1 0/2] tipc: topology server fixes
The following commits fixes two race conditions causing general protection faults. Parthasarathy Bhuvaragan (1): tipc: remove subscription references only for pending timers Ying Xue (1): tipc: fix a race condition of releasing subscriber object net/tipc/subscr.c | 21 + 1 file changed, 9 insertions(+), 12 deletions(-) -- 2.1.4
[PATCH net v1 0/3] tipc: fix hanging socket connections
This patch series contains fixes for the socket layer to prevent hanging / stale connections. Parthasarathy Bhuvaragan (3): tipc: Fix missing connection request handling tipc: improve error validations for sockets in CONNECTING state tipc: close the connection if protocol messages contain errors net/tipc/socket.c | 36 +++- 1 file changed, 31 insertions(+), 5 deletions(-) -- 2.1.4
[PATCH net v1 2/3] tipc: improve error validations for sockets in CONNECTING state
Until now, the checks for sockets in CONNECTING state was based on the assumption that the incoming message was always from the peer's accepted data socket. However an application using a non-blocking socket sends an implicit connect, this socket which is in CONNECTING state can receive error messages from the peer's listening socket. As we discard these messages, the application socket hangs as there due to inactivity. In addition to this, there are other places where we process errors but do not notify the user. In this commit, we process such incoming error messages and notify our users about them using sk_state_change(). Signed-off-by: Parthasarathy Bhuvaragan Reviewed-by: Jon Maloy --- net/tipc/socket.c | 25 ++--- 1 file changed, 22 insertions(+), 3 deletions(-) diff --git a/net/tipc/socket.c b/net/tipc/socket.c index 3b8df510a80c..38c367f6ced4 100644 --- a/net/tipc/socket.c +++ b/net/tipc/socket.c @@ -1259,7 +1259,10 @@ static int tipc_wait_for_rcvmsg(struct socket *sock, long *timeop) struct sock *sk = sock->sk; DEFINE_WAIT(wait); long timeo = *timeop; - int err; + int err = sock_error(sk); + + if (err) + return err; for (;;) { prepare_to_wait(sk_sleep(sk), &wait, TASK_INTERRUPTIBLE); @@ -1281,6 +1284,10 @@ static int tipc_wait_for_rcvmsg(struct socket *sock, long *timeop) err = sock_intr_errno(timeo); if (signal_pending(current)) break; + + err = sock_error(sk); + if (err) + break; } finish_wait(sk_sleep(sk), &wait); *timeop = timeo; @@ -1551,6 +1558,8 @@ static bool filter_connect(struct tipc_sock *tsk, struct sk_buff *skb) struct sock *sk = &tsk->sk; struct net *net = sock_net(sk); struct tipc_msg *hdr = buf_msg(skb); + u32 pport = msg_origport(hdr); + u32 pnode = msg_orignode(hdr); if (unlikely(msg_mcast(hdr))) return false; @@ -1558,18 +1567,28 @@ static bool filter_connect(struct tipc_sock *tsk, struct sk_buff *skb) switch (sk->sk_state) { case TIPC_CONNECTING: /* Accept only ACK or NACK message */ - if (unlikely(!msg_connected(hdr))) - return false; + if (unlikely(!msg_connected(hdr))) { + if (pport != tsk_peer_port(tsk) || + pnode != tsk_peer_node(tsk)) + return false; + + tipc_set_sk_state(sk, TIPC_DISCONNECTING); + sk->sk_err = ECONNREFUSED; + sk->sk_state_change(sk); + return true; + } if (unlikely(msg_errcode(hdr))) { tipc_set_sk_state(sk, TIPC_DISCONNECTING); sk->sk_err = ECONNREFUSED; + sk->sk_state_change(sk); return true; } if (unlikely(!msg_isdata(hdr))) { tipc_set_sk_state(sk, TIPC_DISCONNECTING); sk->sk_err = EINVAL; + sk->sk_state_change(sk); return true; } -- 2.1.4
[PATCH net v1 1/3] tipc: Fix missing connection request handling
In filter_connect, we use waitqueue_active() to check for any connections to wakeup. But waitqueue_active() is missing memory barriers while accessing the critical sections, leading to inconsistent results. In this commit, we replace this with an SMP safe wq_has_sleeper() using the generic socket callback sk_data_ready(). Signed-off-by: Parthasarathy Bhuvaragan Reviewed-by: Jon Maloy --- net/tipc/socket.c | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) diff --git a/net/tipc/socket.c b/net/tipc/socket.c index 566906795c8c..3b8df510a80c 100644 --- a/net/tipc/socket.c +++ b/net/tipc/socket.c @@ -1581,8 +1581,7 @@ static bool filter_connect(struct tipc_sock *tsk, struct sk_buff *skb) return true; /* If empty 'ACK-' message, wake up sleeping connect() */ - if (waitqueue_active(sk_sleep(sk))) - wake_up_interruptible(sk_sleep(sk)); + sk->sk_data_ready(sk); /* 'ACK-' message is neither accepted nor rejected: */ msg_set_dest_droppable(hdr, 1); -- 2.1.4
[PATCH net v1 3/3] tipc: close the connection if protocol messages contain errors
When a socket is shutting down, we notify the peer node about the connection termination by reusing an incoming message if possible. If the last received message was a connection acknowledgment message, we reverse this message and set the error code to TIPC_ERR_NO_PORT and send it to peer. In tipc_sk_proto_rcv(), we never check for message errors while processing the connection acknowledgment or probe messages. Thus this message performs the usual flow control accounting and leaves the session hanging. In this commit, we terminate the connection when we receive such error messages. Signed-off-by: Parthasarathy Bhuvaragan Reviewed-by: Jon Maloy --- net/tipc/socket.c | 8 1 file changed, 8 insertions(+) diff --git a/net/tipc/socket.c b/net/tipc/socket.c index 38c367f6ced4..bdce99f9407a 100644 --- a/net/tipc/socket.c +++ b/net/tipc/socket.c @@ -866,6 +866,14 @@ static void tipc_sk_proto_rcv(struct tipc_sock *tsk, struct sk_buff *skb, if (!tsk_peer_msg(tsk, hdr)) goto exit; + if (unlikely(msg_errcode(hdr))) { + tipc_set_sk_state(sk, TIPC_DISCONNECTING); + tipc_node_remove_conn(sock_net(sk), tsk_peer_node(tsk), + tsk_peer_port(tsk)); + sk->sk_state_change(sk); + goto exit; + } + tsk->probe_unacked = false; if (mtyp == CONN_PROBE) { -- 2.1.4
[PATCH net v1 2/2] tipc: fix socket flow control accounting error at tipc_recv_stream
Until now in tipc_recv_stream(), we update the received unacknowledged bytes based on a stack variable and not based on the actual message size. If the user buffer passed at tipc_recv_stream() is smaller than the received skb, the size variable in stack differs from the actual message size in the skb. This leads to a flow control accounting error causing permanent congestion. In this commit, we fix this accounting error by always using the size of the incoming message. Fixes: 10724cc7bb78 ("tipc: redesign connection-level flow control") Signed-off-by: Parthasarathy Bhuvaragan Reviewed-by: Jon Maloy --- net/tipc/socket.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/net/tipc/socket.c b/net/tipc/socket.c index b28e94f1c739..566906795c8c 100644 --- a/net/tipc/socket.c +++ b/net/tipc/socket.c @@ -1484,7 +1484,7 @@ static int tipc_recv_stream(struct socket *sock, struct msghdr *m, if (unlikely(flags & MSG_PEEK)) goto exit; - tsk->rcv_unacked += tsk_inc(tsk, hlen + sz); + tsk->rcv_unacked += tsk_inc(tsk, hlen + msg_data_sz(msg)); if (unlikely(tsk->rcv_unacked >= (tsk->rcv_win / 4))) tipc_sk_send_ack(tsk); tsk_advance_rx_queue(sk); -- 2.1.4
[PATCH net v1 1/2] tipc: fix socket flow control accounting error at tipc_send_stream
Until now in tipc_send_stream(), we return -1 when the socket encounters link congestion even if the socket had successfully sent partial data. This is incorrect as the application resends the same the partial data leading to data corruption at receiver's end. In this commit, we return the partially sent bytes as the return value at link congestion. Fixes: 10724cc7bb78 ("tipc: redesign connection-level flow control") Signed-off-by: Parthasarathy Bhuvaragan Reviewed-by: Jon Maloy --- net/tipc/socket.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/net/tipc/socket.c b/net/tipc/socket.c index 7130e73bd42c..b28e94f1c739 100644 --- a/net/tipc/socket.c +++ b/net/tipc/socket.c @@ -1083,7 +1083,7 @@ static int __tipc_sendstream(struct socket *sock, struct msghdr *m, size_t dlen) } } while (sent < dlen && !rc); - return rc ? rc : sent; + return sent ? sent : rc; } /** -- 2.1.4
[PATCH net-next v1 1/2] tipc: add support for stream/seqpacket socketpairs
From: Erik Hugne sockets A and B are connected back-to-back, similar to what AF_UNIX does. Signed-off-by: Erik Hugne Signed-off-by: Parthasarathy Bhuvaragan --- net/tipc/socket.c | 14 -- 1 file changed, 12 insertions(+), 2 deletions(-) diff --git a/net/tipc/socket.c b/net/tipc/socket.c index 7130e73bd42c..1198dddf72e8 100644 --- a/net/tipc/socket.c +++ b/net/tipc/socket.c @@ -2511,6 +2511,16 @@ static int tipc_ioctl(struct socket *sock, unsigned int cmd, unsigned long arg) } } +static int tipc_socketpair(struct socket *sock1, struct socket *sock2) +{ + struct tipc_sock *tsk2 = tipc_sk(sock2->sk); + struct tipc_sock *tsk1 = tipc_sk(sock1->sk); + + tipc_sk_finish_conn(tsk1, tsk2->portid, 0); + tipc_sk_finish_conn(tsk2, tsk1->portid, 0); + return 0; +} + /* Protocol switches for the various types of TIPC sockets */ static const struct proto_ops msg_ops = { @@ -2540,7 +2550,7 @@ static const struct proto_ops packet_ops = { .release= tipc_release, .bind = tipc_bind, .connect= tipc_connect, - .socketpair = sock_no_socketpair, + .socketpair = tipc_socketpair, .accept = tipc_accept, .getname= tipc_getname, .poll = tipc_poll, @@ -2561,7 +2571,7 @@ static const struct proto_ops stream_ops = { .release= tipc_release, .bind = tipc_bind, .connect= tipc_connect, - .socketpair = sock_no_socketpair, + .socketpair = tipc_socketpair, .accept = tipc_accept, .getname= tipc_getname, .poll = tipc_poll, -- 2.1.4
[PATCH net-next v1 0/2] tipc: add socketpair support
We add socketpair support for connection oriented sockets in the first patch and for connection less in the second. Erik Hugne (2): tipc: add support for stream/seqpacket socketpairs tipc: allow rdm/dgram socketpairs net/tipc/socket.c | 28 +--- 1 file changed, 25 insertions(+), 3 deletions(-) -- 2.1.4
[PATCH net-next v1 2/2] tipc: allow rdm/dgram socketpairs
From: Erik Hugne for socketpairs using connectionless transport, we cache the respective node local TIPC portid to use in subsequent calls to send() in the socket's private data. Signed-off-by: Erik Hugne Signed-off-by: Parthasarathy Bhuvaragan --- net/tipc/socket.c | 20 1 file changed, 16 insertions(+), 4 deletions(-) diff --git a/net/tipc/socket.c b/net/tipc/socket.c index 1198dddf72e8..15f6ce7bf868 100644 --- a/net/tipc/socket.c +++ b/net/tipc/socket.c @@ -2515,9 +2515,21 @@ static int tipc_socketpair(struct socket *sock1, struct socket *sock2) { struct tipc_sock *tsk2 = tipc_sk(sock2->sk); struct tipc_sock *tsk1 = tipc_sk(sock1->sk); - - tipc_sk_finish_conn(tsk1, tsk2->portid, 0); - tipc_sk_finish_conn(tsk2, tsk1->portid, 0); + u32 onode = tipc_own_addr(sock_net(sock1->sk)); + + tsk1->peer.family = AF_TIPC; + tsk1->peer.addrtype = TIPC_ADDR_ID; + tsk1->peer.scope = TIPC_NODE_SCOPE; + tsk1->peer.addr.id.ref = tsk2->portid; + tsk1->peer.addr.id.node = onode; + tsk2->peer.family = AF_TIPC; + tsk2->peer.addrtype = TIPC_ADDR_ID; + tsk2->peer.scope = TIPC_NODE_SCOPE; + tsk2->peer.addr.id.ref = tsk1->portid; + tsk2->peer.addr.id.node = onode; + + tipc_sk_finish_conn(tsk1, tsk2->portid, onode); + tipc_sk_finish_conn(tsk2, tsk1->portid, onode); return 0; } @@ -2529,7 +2541,7 @@ static const struct proto_ops msg_ops = { .release= tipc_release, .bind = tipc_bind, .connect= tipc_connect, - .socketpair = sock_no_socketpair, + .socketpair = tipc_socketpair, .accept = sock_no_accept, .getname= tipc_getname, .poll = tipc_poll, -- 2.1.4
[PATCH net-next v1 2/2] tipc: adjust the policy of holding subscription kref
From: Ying Xue When a new subscription object is inserted into name_seq->subscriptions list, it's under name_seq->lock protection; when a subscription is deleted from the list, it's also under the same lock protection; similarly, when accessing a subscription by going through subscriptions list, the entire process is also protected by the name_seq->lock. Therefore, if subscription refcount is increased before it's inserted into subscriptions list, and its refcount is decreased after it's deleted from the list, it will be unnecessary to hold refcount at all before accessing subscription object which is obtained by going through subscriptions list under name_seq->lock protection. Signed-off-by: Ying Xue Reviewed-by: Jon Maloy --- net/tipc/name_table.c | 2 ++ net/tipc/subscr.c | 8 ++-- net/tipc/subscr.h | 3 +++ 3 files changed, 7 insertions(+), 6 deletions(-) diff --git a/net/tipc/name_table.c b/net/tipc/name_table.c index 9be6592e4a6f..bd0aac87b41a 100644 --- a/net/tipc/name_table.c +++ b/net/tipc/name_table.c @@ -416,6 +416,7 @@ static void tipc_nameseq_subscribe(struct name_seq *nseq, tipc_subscrp_convert_seq(&s->evt.s.seq, s->swap, &ns); + tipc_subscrp_get(s); list_add(&s->nameseq_list, &nseq->subscriptions); if (!sseq) @@ -787,6 +788,7 @@ void tipc_nametbl_unsubscribe(struct tipc_subscription *s) if (seq != NULL) { spin_lock_bh(&seq->lock); list_del_init(&s->nameseq_list); + tipc_subscrp_put(s); if (!seq->first_free && list_empty(&seq->subscriptions)) { hlist_del_init_rcu(&seq->ns_list); kfree(seq->sseqs); diff --git a/net/tipc/subscr.c b/net/tipc/subscr.c index 0649bc29c6bb..0bf91cd3733c 100644 --- a/net/tipc/subscr.c +++ b/net/tipc/subscr.c @@ -54,8 +54,6 @@ struct tipc_subscriber { static void tipc_subscrp_delete(struct tipc_subscription *sub); static void tipc_subscrb_put(struct tipc_subscriber *subscriber); -static void tipc_subscrp_put(struct tipc_subscription *subscription); -static void tipc_subscrp_get(struct tipc_subscription *subscription); /** * htohl - convert value to endianness used by destination @@ -125,7 +123,6 @@ void tipc_subscrp_report_overlap(struct tipc_subscription *sub, u32 found_lower, { struct tipc_name_seq seq; - tipc_subscrp_get(sub); tipc_subscrp_convert_seq(&sub->evt.s.seq, sub->swap, &seq); if (!tipc_subscrp_check_overlap(&seq, found_lower, found_upper)) return; @@ -135,7 +132,6 @@ void tipc_subscrp_report_overlap(struct tipc_subscription *sub, u32 found_lower, tipc_subscrp_send_event(sub, found_lower, found_upper, event, port_ref, node); - tipc_subscrp_put(sub); } static void tipc_subscrp_timeout(unsigned long data) @@ -183,12 +179,12 @@ static void tipc_subscrp_kref_release(struct kref *kref) tipc_subscrb_put(subscriber); } -static void tipc_subscrp_put(struct tipc_subscription *subscription) +void tipc_subscrp_put(struct tipc_subscription *subscription) { kref_put(&subscription->kref, tipc_subscrp_kref_release); } -static void tipc_subscrp_get(struct tipc_subscription *subscription) +void tipc_subscrp_get(struct tipc_subscription *subscription) { kref_get(&subscription->kref); } diff --git a/net/tipc/subscr.h b/net/tipc/subscr.h index ffdc214c117a..ee52957dc952 100644 --- a/net/tipc/subscr.h +++ b/net/tipc/subscr.h @@ -78,4 +78,7 @@ u32 tipc_subscrp_convert_seq_type(u32 type, int swap); int tipc_topsrv_start(struct net *net); void tipc_topsrv_stop(struct net *net); +void tipc_subscrp_put(struct tipc_subscription *subscription); +void tipc_subscrp_get(struct tipc_subscription *subscription); + #endif -- 2.1.4
[PATCH net-next v1 1/2] tipc: advance the time of deleting subscription from subscriber->subscrp_list
From: Ying Xue After a subscription object is created, it's inserted into its subscriber subscrp_list list under subscriber lock protection, similarly, before it's destroyed, it should be first removed from its subscriber->subscrp_list. Since the subscription list is accessed with subscriber lock, all the subscriptions are valid during the lock duration. Hence in tipc_subscrb_subscrp_delete(), we remove subscription get/put and the extra subscriber unlock/lock. After this change, the subscriptions refcount cleanup is very simple and does not access any lock. Acked-by: Jon Maloy Signed-off-by: Ying Xue Signed-off-by: Parthasarathy Bhuvaragan --- net/tipc/subscr.c | 9 ++--- 1 file changed, 2 insertions(+), 7 deletions(-) diff --git a/net/tipc/subscr.c b/net/tipc/subscr.c index 271cd66e4b3b..0649bc29c6bb 100644 --- a/net/tipc/subscr.c +++ b/net/tipc/subscr.c @@ -145,6 +145,7 @@ static void tipc_subscrp_timeout(unsigned long data) spin_lock_bh(&subscriber->lock); tipc_nametbl_unsubscribe(sub); + list_del(&sub->subscrp_list); spin_unlock_bh(&subscriber->lock); /* Notify subscriber of timeout */ @@ -177,10 +178,7 @@ static void tipc_subscrp_kref_release(struct kref *kref) struct tipc_net *tn = net_generic(sub->net, tipc_net_id); struct tipc_subscriber *subscriber = sub->subscriber; - spin_lock_bh(&subscriber->lock); - list_del(&sub->subscrp_list); atomic_dec(&tn->subscription_count); - spin_unlock_bh(&subscriber->lock); kfree(sub); tipc_subscrb_put(subscriber); } @@ -210,11 +208,8 @@ static void tipc_subscrb_subscrp_delete(struct tipc_subscriber *subscriber, continue; tipc_nametbl_unsubscribe(sub); - tipc_subscrp_get(sub); - spin_unlock_bh(&subscriber->lock); + list_del(&sub->subscrp_list); tipc_subscrp_delete(sub); - tipc_subscrp_put(sub); - spin_lock_bh(&subscriber->lock); if (s) break; -- 2.1.4
[PATCH net-next v1 0/2] tipc: subscription refcount simplifications
The first patch makes the subscription refcount cleanup lockless and the second updates the subscription refcount policy. Ying Xue (2): tipc: advance the time of deleting subscription from subscriber->subscrp_list tipc: adjust the policy of holding subscription kref net/tipc/name_table.c | 2 ++ net/tipc/subscr.c | 17 - net/tipc/subscr.h | 3 +++ 3 files changed, 9 insertions(+), 13 deletions(-) -- 2.1.4
[PATCH net v1 1/1] tipc: fix nametbl deadlock at tipc_nametbl_unsubscribe
From: Ying Xue Until now, tipc_nametbl_unsubscribe() is called at subscriptions reference count cleanup. Usually the subscriptions cleanup is called at subscription timeout or at subscription cancel or at subscriber delete. We have ignored the possibility of this being called from other locations, which causes deadlock as we try to grab the tn->nametbl_lock while holding it already. CPU1: CPU2: -- tipc_nametbl_publish spin_lock_bh(&tn->nametbl_lock) tipc_nametbl_insert_publ tipc_nameseq_insert_publ tipc_subscrp_report_overlap tipc_subscrp_get tipc_subscrp_send_event tipc_close_conn tipc_subscrb_release_cb tipc_subscrb_delete tipc_subscrp_put tipc_subscrp_put tipc_subscrp_kref_release tipc_nametbl_unsubscribe spin_lock_bh(&tn->nametbl_lock) <> CPU1: CPU2: -- tipc_nametbl_stop spin_lock_bh(&tn->nametbl_lock) tipc_purge_publications tipc_nameseq_remove_publ tipc_subscrp_report_overlap tipc_subscrp_get tipc_subscrp_send_event tipc_close_conn tipc_subscrb_release_cb tipc_subscrb_delete tipc_subscrp_put tipc_subscrp_put tipc_subscrp_kref_release tipc_nametbl_unsubscribe spin_lock_bh(&tn->nametbl_lock) <> In this commit, we advance the calling of tipc_nametbl_unsubscribe() from the refcount cleanup to the intended callers. Fixes: d094c4d5f5c7 ("tipc: add subscription refcount to avoid invalid delete") Reported-by: John Thompson Acked-by: Jon Maloy Signed-off-by: Ying Xue Signed-off-by: Parthasarathy Bhuvaragan --- net/tipc/subscr.c | 7 ++- 1 file changed, 6 insertions(+), 1 deletion(-) diff --git a/net/tipc/subscr.c b/net/tipc/subscr.c index 9d94e65d0894..271cd66e4b3b 100644 --- a/net/tipc/subscr.c +++ b/net/tipc/subscr.c @@ -141,6 +141,11 @@ void tipc_subscrp_report_overlap(struct tipc_subscription *sub, u32 found_lower, static void tipc_subscrp_timeout(unsigned long data) { struct tipc_subscription *sub = (struct tipc_subscription *)data; + struct tipc_subscriber *subscriber = sub->subscriber; + + spin_lock_bh(&subscriber->lock); + tipc_nametbl_unsubscribe(sub); + spin_unlock_bh(&subscriber->lock); /* Notify subscriber of timeout */ tipc_subscrp_send_event(sub, sub->evt.s.seq.lower, sub->evt.s.seq.upper, @@ -173,7 +178,6 @@ static void tipc_subscrp_kref_release(struct kref *kref) struct tipc_subscriber *subscriber = sub->subscriber; spin_lock_bh(&subscriber->lock); - tipc_nametbl_unsubscribe(sub); list_del(&sub->subscrp_list); atomic_dec(&tn->subscription_count); spin_unlock_bh(&subscriber->lock); @@ -205,6 +209,7 @@ static void tipc_subscrb_subscrp_delete(struct tipc_subscriber *subscriber, if (s && memcmp(s, &sub->evt.s, sizeof(struct tipc_subscr))) continue; + tipc_nametbl_unsubscribe(sub); tipc_subscrp_get(sub); spin_unlock_bh(&subscriber->lock); tipc_subscrp_delete(sub); -- 2.1.4
[PATCH net v1 0/6] topology server fixes for nametable soft lockup
In this series, we revert the commit 333f796235a527 ("tipc: fix a race condition leading to subscriber refcnt bug") and provide an alternate solution to fix the race conditions in commits 2-4. We have to do this as the above commit introduced a nametbl soft lockup at module exit as described by patch#4. Parthasarathy Bhuvaragan (6): tipc: fix nametbl_lock soft lockup at node/link events tipc: add subscription refcount to avoid invalid delete tipc: fix connection refcount error tipc: fix nametbl_lock soft lockup at module exit tipc: ignore requests when the connection state is not CONNECTED tipc: fix cleanup at module unload net/tipc/node.c | 9 +++- net/tipc/server.c | 48 + net/tipc/subscr.c | 124 ++ net/tipc/subscr.h | 1 + 4 files changed, 99 insertions(+), 83 deletions(-) -- 2.1.4
[PATCH net v1 4/6] tipc: fix nametbl_lock soft lockup at module exit
Commit 333f796235a527 ("tipc: fix a race condition leading to subscriber refcnt bug") reveals a soft lockup while acquiring nametbl_lock. Before commit 333f796235a527, we call tipc_conn_shutdown() from tipc_close_conn() in the context of tipc_topsrv_stop(). In that context, we are allowed to grab the nametbl_lock. Commit 333f796235a527, moved tipc_conn_release (renamed from tipc_conn_shutdown) to the connection refcount cleanup. This allows either tipc_nametbl_withdraw() or tipc_topsrv_stop() to the cleanup. Since tipc_exit_net() first calls tipc_topsrv_stop() and then tipc_nametble_withdraw() increases the chances for the later to perform the connection cleanup. The soft lockup occurs in the call chain of tipc_nametbl_withdraw(), when it performs the tipc_conn_kref_release() as it tries to grab nametbl_lock again while holding it already. tipc_nametbl_withdraw() grabs nametbl_lock tipc_nametbl_remove_publ() tipc_subscrp_report_overlap() tipc_subscrp_send_event() tipc_conn_sendmsg() << if (con->flags != CF_CONNECTED) we do conn_put(), triggering the cleanup as refcount=0. >> tipc_conn_kref_release tipc_sock_release tipc_conn_release tipc_subscrb_delete tipc_subscrp_delete tipc_nametbl_unsubscribe << Soft Lockup >> The previous changes in this series fixes the race conditions fixed by commit 333f796235a527. Hence we can now revert the commit. Fixes: 333f796235a52727 ("tipc: fix a race condition leading to subscriber refcnt bug") Reported-and-Tested-by: John Thompson Acked-by: Ying Xue Acked-by: Jon Maloy Signed-off-by: Parthasarathy Bhuvaragan --- net/tipc/server.c | 16 +--- 1 file changed, 5 insertions(+), 11 deletions(-) diff --git a/net/tipc/server.c b/net/tipc/server.c index 2e803601aa99..826cde2c401e 100644 --- a/net/tipc/server.c +++ b/net/tipc/server.c @@ -86,7 +86,6 @@ struct outqueue_entry { static void tipc_recv_work(struct work_struct *work); static void tipc_send_work(struct work_struct *work); static void tipc_clean_outqueues(struct tipc_conn *con); -static void tipc_sock_release(struct tipc_conn *con); static void tipc_conn_kref_release(struct kref *kref) { @@ -104,7 +103,6 @@ static void tipc_conn_kref_release(struct kref *kref) } saddr->scope = -TIPC_NODE_SCOPE; kernel_bind(sock, (struct sockaddr *)saddr, sizeof(*saddr)); - tipc_sock_release(con); sock_release(sock); con->sock = NULL; @@ -194,19 +192,15 @@ static void tipc_unregister_callbacks(struct tipc_conn *con) write_unlock_bh(&sk->sk_callback_lock); } -static void tipc_sock_release(struct tipc_conn *con) +static void tipc_close_conn(struct tipc_conn *con) { struct tipc_server *s = con->server; - if (con->conid) - s->tipc_conn_release(con->conid, con->usr_data); - - tipc_unregister_callbacks(con); -} - -static void tipc_close_conn(struct tipc_conn *con) -{ if (test_and_clear_bit(CF_CONNECTED, &con->flags)) { + tipc_unregister_callbacks(con); + + if (con->conid) + s->tipc_conn_release(con->conid, con->usr_data); /* We shouldn't flush pending works as we may be in the * thread. In fact the races with pending rx/tx work structs -- 2.1.4
[PATCH net v1 3/6] tipc: fix connection refcount error
Until now, the generic server framework maintains the connection id's per subscriber in server's conn_idr. At tipc_close_conn, we remove the connection id from the server list, but the connection is valid until we call the refcount cleanup. Hence we have a window where the server allocates the same connection to an new subscriber leading to inconsistent reference count. We have another refcount warning we grab the refcount in tipc_conn_lookup() for connections with flag with CF_CONNECTED not set. This usually occurs at shutdown when the we stop the topology server and withdraw TIPC_CFG_SRV publication thereby triggering a withdraw message to subscribers. In this commit, we: 1. remove the connection from the server list at recount cleanup. 2. grab the refcount for a connection only if CF_CONNECTED is set. Tested-by: John Thompson Acked-by: Ying Xue Acked-by: Jon Maloy Signed-off-by: Parthasarathy Bhuvaragan --- net/tipc/server.c | 19 ++- 1 file changed, 10 insertions(+), 9 deletions(-) diff --git a/net/tipc/server.c b/net/tipc/server.c index 215849ce453d..2e803601aa99 100644 --- a/net/tipc/server.c +++ b/net/tipc/server.c @@ -91,7 +91,8 @@ static void tipc_sock_release(struct tipc_conn *con); static void tipc_conn_kref_release(struct kref *kref) { struct tipc_conn *con = container_of(kref, struct tipc_conn, kref); - struct sockaddr_tipc *saddr = con->server->saddr; + struct tipc_server *s = con->server; + struct sockaddr_tipc *saddr = s->saddr; struct socket *sock = con->sock; struct sock *sk; @@ -106,6 +107,11 @@ static void tipc_conn_kref_release(struct kref *kref) tipc_sock_release(con); sock_release(sock); con->sock = NULL; + + spin_lock_bh(&s->idr_lock); + idr_remove(&s->conn_idr, con->conid); + s->idr_in_use--; + spin_unlock_bh(&s->idr_lock); } tipc_clean_outqueues(con); @@ -128,8 +134,10 @@ static struct tipc_conn *tipc_conn_lookup(struct tipc_server *s, int conid) spin_lock_bh(&s->idr_lock); con = idr_find(&s->conn_idr, conid); - if (con) + if (con && test_bit(CF_CONNECTED, &con->flags)) conn_get(con); + else + con = NULL; spin_unlock_bh(&s->idr_lock); return con; } @@ -198,15 +206,8 @@ static void tipc_sock_release(struct tipc_conn *con) static void tipc_close_conn(struct tipc_conn *con) { - struct tipc_server *s = con->server; - if (test_and_clear_bit(CF_CONNECTED, &con->flags)) { - spin_lock_bh(&s->idr_lock); - idr_remove(&s->conn_idr, con->conid); - s->idr_in_use--; - spin_unlock_bh(&s->idr_lock); - /* We shouldn't flush pending works as we may be in the * thread. In fact the races with pending rx/tx work structs * are harmless for us here as we have already deleted this -- 2.1.4
[PATCH net v1 1/6] tipc: fix nametbl_lock soft lockup at node/link events
We trigger a soft lockup as we grab nametbl_lock twice if the node has a pending node up/down or link up/down event while: - we process an incoming named message in tipc_named_rcv() and perform an tipc_update_nametbl(). - we have pending backlog items in the name distributor queue during a nametable update using tipc_nametbl_publish() or tipc_nametbl_withdraw(). The following are the call chain associated: tipc_named_rcv() Grabs nametbl_lock tipc_update_nametbl() (publish/withdraw) tipc_node_subscribe()/unsubscribe() tipc_node_write_unlock() << lockup occurs if an outstanding node/link event exits, as we grabs nametbl_lock again >> tipc_nametbl_withdraw() Grab nametbl_lock tipc_named_process_backlog() tipc_update_nametbl() << rest as above >> The function tipc_node_write_unlock(), in addition to releasing the lock processes the outstanding node/link up/down events. To do this, we need to grab the nametbl_lock again leading to the lockup. In this commit we fix the soft lockup by introducing a fast variant of node_unlock(), where we just release the lock. We adapt the node_subscribe()/node_unsubscribe() to use the fast variants. Reported-and-Tested-by: John Thompson Acked-by: Ying Xue Acked-by: Jon Maloy Signed-off-by: Parthasarathy Bhuvaragan --- net/tipc/node.c | 9 +++-- 1 file changed, 7 insertions(+), 2 deletions(-) diff --git a/net/tipc/node.c b/net/tipc/node.c index 9d2f4c2b08ab..27753325e06e 100644 --- a/net/tipc/node.c +++ b/net/tipc/node.c @@ -263,6 +263,11 @@ static void tipc_node_write_lock(struct tipc_node *n) write_lock_bh(&n->lock); } +static void tipc_node_write_unlock_fast(struct tipc_node *n) +{ + write_unlock_bh(&n->lock); +} + static void tipc_node_write_unlock(struct tipc_node *n) { struct net *net = n->net; @@ -417,7 +422,7 @@ void tipc_node_subscribe(struct net *net, struct list_head *subscr, u32 addr) } tipc_node_write_lock(n); list_add_tail(subscr, &n->publ_list); - tipc_node_write_unlock(n); + tipc_node_write_unlock_fast(n); tipc_node_put(n); } @@ -435,7 +440,7 @@ void tipc_node_unsubscribe(struct net *net, struct list_head *subscr, u32 addr) } tipc_node_write_lock(n); list_del_init(subscr); - tipc_node_write_unlock(n); + tipc_node_write_unlock_fast(n); tipc_node_put(n); } -- 2.1.4
[PATCH net v1 5/6] tipc: ignore requests when the connection state is not CONNECTED
In tipc_conn_sendmsg(), we first queue the request to the outqueue followed by the connection state check. If the connection is not connected, we should not queue this message. In this commit, we reject the messages if the connection state is not CF_CONNECTED. Acked-by: Ying Xue Acked-by: Jon Maloy Tested-by: John Thompson Signed-off-by: Parthasarathy Bhuvaragan --- net/tipc/server.c | 13 +++-- 1 file changed, 7 insertions(+), 6 deletions(-) diff --git a/net/tipc/server.c b/net/tipc/server.c index 826cde2c401e..04ff441b8065 100644 --- a/net/tipc/server.c +++ b/net/tipc/server.c @@ -453,6 +453,11 @@ int tipc_conn_sendmsg(struct tipc_server *s, int conid, if (!con) return -EINVAL; + if (!test_bit(CF_CONNECTED, &con->flags)) { + conn_put(con); + return 0; + } + e = tipc_alloc_entry(data, len); if (!e) { conn_put(con); @@ -466,12 +471,8 @@ int tipc_conn_sendmsg(struct tipc_server *s, int conid, list_add_tail(&e->list, &con->outqueue); spin_unlock_bh(&con->outqueue_lock); - if (test_bit(CF_CONNECTED, &con->flags)) { - if (!queue_work(s->send_wq, &con->swork)) - conn_put(con); - } else { + if (!queue_work(s->send_wq, &con->swork)) conn_put(con); - } return 0; } @@ -495,7 +496,7 @@ static void tipc_send_to_sock(struct tipc_conn *con) int ret; spin_lock_bh(&con->outqueue_lock); - while (1) { + while (test_bit(CF_CONNECTED, &con->flags)) { e = list_entry(con->outqueue.next, struct outqueue_entry, list); if ((struct list_head *) e == &con->outqueue) -- 2.1.4
[PATCH net v1 2/6] tipc: add subscription refcount to avoid invalid delete
Until now, the subscribers keep track of the subscriptions using reference count at subscriber level. At subscription cancel or subscriber delete, we delete the subscription only if the timer was pending for the subscription. This approach is incorrect as: 1. del_timer() is not SMP safe, if on CPU0 the check for pending timer returns true but CPU1 might schedule the timer callback thereby deleting the subscription. Thus when CPU0 is scheduled, it deletes an invalid subscription. 2. We export tipc_subscrp_report_overlap(), which accesses the subscription pointer multiple times. Meanwhile the subscription timer can expire thereby freeing the subscription and we might continue to access the subscription pointer leading to memory violations. In this commit, we introduce subscription refcount to avoid deleting an invalid subscription. Reported-and-Tested-by: John Thompson Acked-by: Ying Xue Acked-by: Jon Maloy Signed-off-by: Parthasarathy Bhuvaragan --- net/tipc/subscr.c | 124 ++ net/tipc/subscr.h | 1 + 2 files changed, 71 insertions(+), 54 deletions(-) diff --git a/net/tipc/subscr.c b/net/tipc/subscr.c index 0dd02244e21d..9d94e65d0894 100644 --- a/net/tipc/subscr.c +++ b/net/tipc/subscr.c @@ -54,6 +54,8 @@ struct tipc_subscriber { static void tipc_subscrp_delete(struct tipc_subscription *sub); static void tipc_subscrb_put(struct tipc_subscriber *subscriber); +static void tipc_subscrp_put(struct tipc_subscription *subscription); +static void tipc_subscrp_get(struct tipc_subscription *subscription); /** * htohl - convert value to endianness used by destination @@ -123,6 +125,7 @@ void tipc_subscrp_report_overlap(struct tipc_subscription *sub, u32 found_lower, { struct tipc_name_seq seq; + tipc_subscrp_get(sub); tipc_subscrp_convert_seq(&sub->evt.s.seq, sub->swap, &seq); if (!tipc_subscrp_check_overlap(&seq, found_lower, found_upper)) return; @@ -132,30 +135,23 @@ void tipc_subscrp_report_overlap(struct tipc_subscription *sub, u32 found_lower, tipc_subscrp_send_event(sub, found_lower, found_upper, event, port_ref, node); + tipc_subscrp_put(sub); } static void tipc_subscrp_timeout(unsigned long data) { struct tipc_subscription *sub = (struct tipc_subscription *)data; - struct tipc_subscriber *subscriber = sub->subscriber; /* Notify subscriber of timeout */ tipc_subscrp_send_event(sub, sub->evt.s.seq.lower, sub->evt.s.seq.upper, TIPC_SUBSCR_TIMEOUT, 0, 0); - spin_lock_bh(&subscriber->lock); - tipc_subscrp_delete(sub); - spin_unlock_bh(&subscriber->lock); - - tipc_subscrb_put(subscriber); + tipc_subscrp_put(sub); } static void tipc_subscrb_kref_release(struct kref *kref) { - struct tipc_subscriber *subcriber = container_of(kref, - struct tipc_subscriber, kref); - - kfree(subcriber); + kfree(container_of(kref,struct tipc_subscriber, kref)); } static void tipc_subscrb_put(struct tipc_subscriber *subscriber) @@ -168,6 +164,59 @@ static void tipc_subscrb_get(struct tipc_subscriber *subscriber) kref_get(&subscriber->kref); } +static void tipc_subscrp_kref_release(struct kref *kref) +{ + struct tipc_subscription *sub = container_of(kref, +struct tipc_subscription, +kref); + struct tipc_net *tn = net_generic(sub->net, tipc_net_id); + struct tipc_subscriber *subscriber = sub->subscriber; + + spin_lock_bh(&subscriber->lock); + tipc_nametbl_unsubscribe(sub); + list_del(&sub->subscrp_list); + atomic_dec(&tn->subscription_count); + spin_unlock_bh(&subscriber->lock); + kfree(sub); + tipc_subscrb_put(subscriber); +} + +static void tipc_subscrp_put(struct tipc_subscription *subscription) +{ + kref_put(&subscription->kref, tipc_subscrp_kref_release); +} + +static void tipc_subscrp_get(struct tipc_subscription *subscription) +{ + kref_get(&subscription->kref); +} + +/* tipc_subscrb_subscrp_delete - delete a specific subscription or all + * subscriptions for a given subscriber. + */ +static void tipc_subscrb_subscrp_delete(struct tipc_subscriber *subscriber, + struct tipc_subscr *s) +{ + struct list_head *subscription_list = &subscriber->subscrp_list; + struct tipc_subscription *sub, *temp; + + spin_lock_bh(&subscriber->lock); + list_for_each_entry_safe(sub, temp, subscription_list, subscrp_list) { + if (s && memcmp(s, &sub->evt.s, sizeof(struct tipc_subscr))
[PATCH net v1 6/6] tipc: fix cleanup at module unload
In tipc_server_stop(), we iterate over the connections with limiting factor as server's idr_in_use. We ignore the fact that this variable is decremented in tipc_close_conn(), leading to premature exit. In this commit, we iterate until the we have no connections left. Acked-by: Ying Xue Acked-by: Jon Maloy Tested-by: John Thompson Signed-off-by: Parthasarathy Bhuvaragan --- net/tipc/server.c | 4 +--- 1 file changed, 1 insertion(+), 3 deletions(-) diff --git a/net/tipc/server.c b/net/tipc/server.c index 04ff441b8065..3cd6402e812c 100644 --- a/net/tipc/server.c +++ b/net/tipc/server.c @@ -619,14 +619,12 @@ int tipc_server_start(struct tipc_server *s) void tipc_server_stop(struct tipc_server *s) { struct tipc_conn *con; - int total = 0; int id; spin_lock_bh(&s->idr_lock); - for (id = 0; total < s->idr_in_use; id++) { + for (id = 0; s->idr_in_use; id++) { con = idr_find(&s->conn_idr, id); if (con) { - total++; spin_unlock_bh(&s->idr_lock); tipc_close_conn(con); spin_lock_bh(&s->idr_lock); -- 2.1.4
[PATCH net v1 1/1] tipc: allocate user memory with GFP_KERNEL flag
Until now, we allocate memory always with GFP_ATOMIC flag. When the system is under memory pressure and a user tries to send, the send fails due to low memory. However, the user application can wait for free memory if we allocate it using GFP_KERNEL flag. In this commit, we use allocate memory with GFP_KERNEL for all user allocation. Reported-by: Rune Torgersen Acked-by: Jon Maloy Signed-off-by: Parthasarathy Bhuvaragan --- net/tipc/discover.c | 4 ++-- net/tipc/link.c | 2 +- net/tipc/msg.c| 16 net/tipc/msg.h| 2 +- net/tipc/name_distr.c | 2 +- 5 files changed, 13 insertions(+), 13 deletions(-) diff --git a/net/tipc/discover.c b/net/tipc/discover.c index 6b109a808d4c..02462d67d191 100644 --- a/net/tipc/discover.c +++ b/net/tipc/discover.c @@ -169,7 +169,7 @@ void tipc_disc_rcv(struct net *net, struct sk_buff *skb, /* Send response, if necessary */ if (respond && (mtyp == DSC_REQ_MSG)) { - rskb = tipc_buf_acquire(MAX_H_SIZE); + rskb = tipc_buf_acquire(MAX_H_SIZE, GFP_ATOMIC); if (!rskb) return; tipc_disc_init_msg(net, rskb, DSC_RESP_MSG, bearer); @@ -278,7 +278,7 @@ int tipc_disc_create(struct net *net, struct tipc_bearer *b, req = kmalloc(sizeof(*req), GFP_ATOMIC); if (!req) return -ENOMEM; - req->buf = tipc_buf_acquire(MAX_H_SIZE); + req->buf = tipc_buf_acquire(MAX_H_SIZE, GFP_ATOMIC); if (!req->buf) { kfree(req); return -ENOMEM; diff --git a/net/tipc/link.c b/net/tipc/link.c index b758ca8b2f79..b0f8646e0631 100644 --- a/net/tipc/link.c +++ b/net/tipc/link.c @@ -1384,7 +1384,7 @@ void tipc_link_tnl_prepare(struct tipc_link *l, struct tipc_link *tnl, msg_set_seqno(hdr, seqno++); pktlen = msg_size(hdr); msg_set_size(&tnlhdr, pktlen + INT_H_SIZE); - tnlskb = tipc_buf_acquire(pktlen + INT_H_SIZE); + tnlskb = tipc_buf_acquire(pktlen + INT_H_SIZE, GFP_ATOMIC); if (!tnlskb) { pr_warn("%sunable to send packet\n", link_co_err); return; diff --git a/net/tipc/msg.c b/net/tipc/msg.c index a22be502f1bd..ab02d0742476 100644 --- a/net/tipc/msg.c +++ b/net/tipc/msg.c @@ -58,12 +58,12 @@ static unsigned int align(unsigned int i) * NOTE: Headroom is reserved to allow prepending of a data link header. * There may also be unrequested tailroom present at the buffer's end. */ -struct sk_buff *tipc_buf_acquire(u32 size) +struct sk_buff *tipc_buf_acquire(u32 size, gfp_t gfp) { struct sk_buff *skb; unsigned int buf_size = (BUF_HEADROOM + size + 3) & ~3u; - skb = alloc_skb_fclone(buf_size, GFP_ATOMIC); + skb = alloc_skb_fclone(buf_size, gfp); if (skb) { skb_reserve(skb, BUF_HEADROOM); skb_put(skb, size); @@ -95,7 +95,7 @@ struct sk_buff *tipc_msg_create(uint user, uint type, struct tipc_msg *msg; struct sk_buff *buf; - buf = tipc_buf_acquire(hdr_sz + data_sz); + buf = tipc_buf_acquire(hdr_sz + data_sz, GFP_ATOMIC); if (unlikely(!buf)) return NULL; @@ -261,7 +261,7 @@ int tipc_msg_build(struct tipc_msg *mhdr, struct msghdr *m, /* No fragmentation needed? */ if (likely(msz <= pktmax)) { - skb = tipc_buf_acquire(msz); + skb = tipc_buf_acquire(msz, GFP_KERNEL); if (unlikely(!skb)) return -ENOMEM; skb_orphan(skb); @@ -282,7 +282,7 @@ int tipc_msg_build(struct tipc_msg *mhdr, struct msghdr *m, msg_set_importance(&pkthdr, msg_importance(mhdr)); /* Prepare first fragment */ - skb = tipc_buf_acquire(pktmax); + skb = tipc_buf_acquire(pktmax, GFP_KERNEL); if (!skb) return -ENOMEM; skb_orphan(skb); @@ -313,7 +313,7 @@ int tipc_msg_build(struct tipc_msg *mhdr, struct msghdr *m, pktsz = drem + INT_H_SIZE; else pktsz = pktmax; - skb = tipc_buf_acquire(pktsz); + skb = tipc_buf_acquire(pktsz, GFP_KERNEL); if (!skb) { rc = -ENOMEM; goto error; @@ -448,7 +448,7 @@ bool tipc_msg_make_bundle(struct sk_buff **skb, struct tipc_msg *msg, if (msz > (max / 2)) return false; - _skb = tipc_buf_acquire(max); + _skb = tipc_buf_acquire(max, GFP_ATOMIC); if (!_skb) return false; @@ -496,7 +496,7 @@ bool tipc_msg_reverse(u32 own_node, struct sk_buff **skb, int err) /* Never return SHORT header; expand by replacing buffer if necessary */ if (msg_short(hdr)) { - *skb = tipc_buf_
[PATCH net-next v2 13/16] tipc: create TIPC_DISCONNECTING as a new sk_state
In this commit, we create a new tipc socket state TIPC_DISCONNECTING in sk_state. TIPC_DISCONNECTING is replacing the socket connection status update using SS_DISCONNECTING. TIPC_DISCONNECTING is set for connection oriented sockets at: - tipc_shutdown() - connection probe timeout - when we receive an error message on the connection. There is no functional change in this commit. Signed-off-by: Parthasarathy Bhuvaragan --- v2: set TIPC_DISCONNECTING to TCP_CLOSE_WAIT. --- net/tipc/socket.c | 39 --- 1 file changed, 24 insertions(+), 15 deletions(-) diff --git a/net/tipc/socket.c b/net/tipc/socket.c index b14dd2549980..a48c0c0676cf 100644 --- a/net/tipc/socket.c +++ b/net/tipc/socket.c @@ -54,6 +54,7 @@ enum { TIPC_LISTEN = TCP_LISTEN, TIPC_ESTABLISHED = TCP_ESTABLISHED, TIPC_OPEN = TCP_CLOSE, + TIPC_DISCONNECTING = TCP_CLOSE_WAIT, }; /** @@ -362,10 +363,14 @@ static int tipc_set_sk_state(struct sock *sk, int state) break; case TIPC_ESTABLISHED: if (oldstate == SS_CONNECTING || - oldstate == SS_UNCONNECTED || oldsk_state == TIPC_OPEN) res = 0; break; + case TIPC_DISCONNECTING: + if (oldstate == SS_CONNECTING || + oldsk_state == TIPC_ESTABLISHED) + res = 0; + break; } if (!res) @@ -621,13 +626,14 @@ static int tipc_getname(struct socket *sock, struct sockaddr *uaddr, int *uaddr_len, int peer) { struct sockaddr_tipc *addr = (struct sockaddr_tipc *)uaddr; - struct tipc_sock *tsk = tipc_sk(sock->sk); + struct sock *sk = sock->sk; + struct tipc_sock *tsk = tipc_sk(sk); struct tipc_net *tn = net_generic(sock_net(sock->sk), tipc_net_id); memset(addr, 0, sizeof(*addr)); if (peer) { if ((sock->state != SS_CONNECTED) && - ((peer != 2) || (sock->state != SS_DISCONNECTING))) + ((peer != 2) || (sk->sk_state != TIPC_DISCONNECTING))) return -ENOTCONN; addr->addr.id.ref = tsk_peer_port(tsk); addr->addr.id.node = tsk_peer_node(tsk); @@ -693,6 +699,9 @@ static unsigned int tipc_poll(struct file *file, struct socket *sock, (!skb_queue_empty(&sk->sk_receive_queue))) mask |= (POLLIN | POLLRDNORM); break; + case TIPC_DISCONNECTING: + mask = (POLLIN | POLLRDNORM | POLLHUP); + break; case TIPC_LISTEN: if (!skb_queue_empty(&sk->sk_receive_queue)) mask |= (POLLIN | POLLRDNORM); @@ -1028,7 +1037,7 @@ static int tipc_wait_for_sndpkt(struct socket *sock, long *timeo_p) int err = sock_error(sk); if (err) return err; - if (sock->state == SS_DISCONNECTING) + if (sk->sk_state == TIPC_DISCONNECTING) return -EPIPE; else if (sock->state != SS_CONNECTED) return -ENOTCONN; @@ -1098,7 +1107,7 @@ static int __tipc_send_stream(struct socket *sock, struct msghdr *m, size_t dsz) return -EMSGSIZE; if (unlikely(sock->state != SS_CONNECTED)) { - if (sock->state == SS_DISCONNECTING) + if (sk->sk_state == TIPC_DISCONNECTING) return -EPIPE; else return -ENOTCONN; @@ -1626,7 +1635,7 @@ static bool filter_connect(struct tipc_sock *tsk, struct sk_buff *skb) return false; if (unlikely(msg_errcode(hdr))) { - sock->state = SS_DISCONNECTING; + tipc_set_sk_state(sk, TIPC_DISCONNECTING); /* Let timer expire on it's own */ tipc_node_remove_conn(net, tsk_peer_node(tsk), tsk->portid); @@ -1641,13 +1650,13 @@ static bool filter_connect(struct tipc_sock *tsk, struct sk_buff *skb) return false; if (unlikely(msg_errcode(hdr))) { - sock->state = SS_DISCONNECTING; + tipc_set_sk_state(sk, TIPC_DISCONNECTING); sk->sk_err = ECONNREFUSED; return true; } if (unlikely(!msg_isdata(hdr))) { - sock->state = SS_DISCONNECTING; + tipc_set_sk_state(sk, TIPC_DISCONNECTING); sk->sk_err = EINVAL; return true; }
[PATCH net-next v2 10/16] tipc: create TIPC_LISTEN as a new sk_state
Until now, tipc maintains the socket state in sock->state variable. This is used to maintain generic socket states, but in tipc we overload it and save tipc socket states like TIPC_LISTEN. Other protocols like TCP, UDP store protocol specific states in sk->sk_state instead. In this commit, we : - declare a new tipc state TIPC_LISTEN, that replaces SS_LISTEN - Create a new function tipc_set_state(), to update sk->sk_state. - TIPC_LISTEN state is maintained in sk->sk_state. - replace references to SS_LISTEN with TIPC_LISTEN. There is no functional change in this commit. Signed-off-by: Parthasarathy Bhuvaragan --- v2: set TIPC_LISTEN value to TCP_LISTEN to permit the usage of generic sk_* helpers as suggested by Eric Dumazet. --- net/tipc/socket.c | 62 ++- 1 file changed, 43 insertions(+), 19 deletions(-) diff --git a/net/tipc/socket.c b/net/tipc/socket.c index a8c10764f2f6..ce7d9be8833c 100644 --- a/net/tipc/socket.c +++ b/net/tipc/socket.c @@ -44,8 +44,6 @@ #include "bcast.h" #include "netlink.h" -#define SS_LISTENING -1 /* socket is listening */ - #define CONN_TIMEOUT_DEFAULT 8000/* default connect timeout = 8s */ #define CONN_PROBING_INTERVAL msecs_to_jiffies(360) /* [ms] => 1 h */ #define TIPC_FWD_MSG 1 @@ -54,6 +52,10 @@ #define TIPC_MAX_PORT 0x #define TIPC_MIN_PORT 1 +enum { + TIPC_LISTEN = TCP_LISTEN, +}; + /** * struct tipc_sock - TIPC socket structure * @sk: socket - interacts with 'port' and with user via the socket API @@ -337,6 +339,31 @@ static bool tsk_peer_msg(struct tipc_sock *tsk, struct tipc_msg *msg) return false; } +/* tipc_set_sk_state - set the sk_state of the socket + * @sk: socket + * + * Caller must hold socket lock + * + * Returns 0 on success, errno otherwise + */ +static int tipc_set_sk_state(struct sock *sk, int state) +{ + int oldstate = sk->sk_socket->state; + int res = -EINVAL; + + switch (state) { + case TIPC_LISTEN: + if (oldstate == SS_UNCONNECTED) + res = 0; + break; + } + + if (!res) + sk->sk_state = state; + + return res; +} + /** * tipc_sk_create - create a TIPC socket * @net: network namespace (must be default network) @@ -666,15 +693,22 @@ static unsigned int tipc_poll(struct file *file, struct socket *sock, switch ((int)sock->state) { case SS_UNCONNECTED: - if (!tsk->link_cong) - mask |= POLLOUT; + switch (sk->sk_state) { + case TIPC_LISTEN: + if (!skb_queue_empty(&sk->sk_receive_queue)) + mask |= (POLLIN | POLLRDNORM); + break; + default: + if (!tsk->link_cong) + mask |= POLLOUT; + break; + } break; case SS_CONNECTED: if (!tsk->link_cong && !tsk_conn_cong(tsk)) mask |= POLLOUT; /* fall thru' */ case SS_CONNECTING: - case SS_LISTENING: if (!skb_queue_empty(&sk->sk_receive_queue)) mask |= (POLLIN | POLLRDNORM); break; @@ -925,7 +959,7 @@ static int __tipc_sendmsg(struct socket *sock, struct msghdr *m, size_t dsz) return -EINVAL; } if (!is_connectionless) { - if (sock->state == SS_LISTENING) + if (sk->sk_state == TIPC_LISTEN) return -EPIPE; if (sock->state != SS_UNCONNECTED) return -EISCONN; @@ -1651,7 +1685,6 @@ static bool filter_connect(struct tipc_sock *tsk, struct sk_buff *skb) msg_set_dest_droppable(hdr, 1); return false; - case SS_LISTENING: case SS_UNCONNECTED: /* Accept only SYN message */ @@ -2026,15 +2059,9 @@ static int tipc_listen(struct socket *sock, int len) int res; lock_sock(sk); - - if (sock->state != SS_UNCONNECTED) - res = -EINVAL; - else { - sock->state = SS_LISTENING; - res = 0; - } - + res = tipc_set_sk_state(sk, TIPC_LISTEN); release_sock(sk); + return res; } @@ -2060,9 +2087,6 @@ static int tipc_wait_for_accept(struct socket *sock, long timeo) err = 0; if (!skb_queue_empty(&sk->sk_receive_queue)) break; - err = -EINVAL; - if (sock->state != SS_LISTENING) - break; err = -EAGAIN; if (!timeo) break; @@ -
[PATCH net-next v2 11/16] tipc: create TIPC_ESTABLISHED as a new sk_state
Until now, tipc maintains probing state for connected sockets in tsk->probing_state variable. In this commit, we express this information as socket states and this remove the variable. We set probe_unacked flag when a probe is sent out and reset it if we receive a reply. Instead of the probing state TIPC_CONN_OK, we create a new state TIPC_ESTABLISHED. There is no functional change in this commit. Signed-off-by: Parthasarathy Bhuvaragan --- v2: remove TIPC_PROBING state and replace it with probe_unacked flag. --- net/tipc/socket.c | 18 +++--- 1 file changed, 11 insertions(+), 7 deletions(-) diff --git a/net/tipc/socket.c b/net/tipc/socket.c index ce7d9be8833c..9215e2144b6a 100644 --- a/net/tipc/socket.c +++ b/net/tipc/socket.c @@ -47,13 +47,12 @@ #define CONN_TIMEOUT_DEFAULT 8000/* default connect timeout = 8s */ #define CONN_PROBING_INTERVAL msecs_to_jiffies(360) /* [ms] => 1 h */ #define TIPC_FWD_MSG 1 -#define TIPC_CONN_OK 0 -#define TIPC_CONN_PROBING 1 #define TIPC_MAX_PORT 0x #define TIPC_MIN_PORT 1 enum { TIPC_LISTEN = TCP_LISTEN, + TIPC_ESTABLISHED = TCP_ESTABLISHED, }; /** @@ -88,9 +87,9 @@ struct tipc_sock { struct list_head sock_list; struct list_head publications; u32 pub_count; - u32 probing_state; uint conn_timeout; atomic_t dupl_rcvcnt; + bool probe_unacked; bool link_cong; u16 snt_unacked; u16 snd_win; @@ -356,6 +355,11 @@ static int tipc_set_sk_state(struct sock *sk, int state) if (oldstate == SS_UNCONNECTED) res = 0; break; + case TIPC_ESTABLISHED: + if (oldstate == SS_CONNECTING || + oldstate == SS_UNCONNECTED) + res = 0; + break; } if (!res) @@ -858,7 +862,7 @@ static void tipc_sk_proto_rcv(struct tipc_sock *tsk, struct sk_buff *skb, if (!tsk_peer_msg(tsk, hdr)) goto exit; - tsk->probing_state = TIPC_CONN_OK; + tsk->probe_unacked = false; if (mtyp == CONN_PROBE) { msg_set_type(hdr, CONN_PROBE_REPLY); @@ -1198,8 +1202,8 @@ static void tipc_sk_finish_conn(struct tipc_sock *tsk, u32 peer_port, msg_set_lookup_scope(msg, 0); msg_set_hdr_sz(msg, SHORT_H_SIZE); - tsk->probing_state = TIPC_CONN_OK; sk_reset_timer(sk, &sk->sk_timer, jiffies + CONN_PROBING_INTERVAL); + tipc_set_sk_state(sk, TIPC_ESTABLISHED); tipc_node_add_conn(net, peer_node, tsk->portid, peer_port); tsk->max_pkt = tipc_node_get_mtu(net, peer_node, tsk->portid); tsk->peer_caps = tipc_node_get_capabilities(net, peer_node); @@ -2263,7 +2267,7 @@ static void tipc_sk_timeout(unsigned long data) peer_port = tsk_peer_port(tsk); peer_node = tsk_peer_node(tsk); - if (tsk->probing_state == TIPC_CONN_PROBING) { + if (tsk->probe_unacked) { if (!sock_owned_by_user(sk)) { sk->sk_socket->state = SS_DISCONNECTING; tipc_node_remove_conn(sock_net(sk), tsk_peer_node(tsk), @@ -2281,7 +2285,7 @@ static void tipc_sk_timeout(unsigned long data) skb = tipc_msg_create(CONN_MANAGER, CONN_PROBE, INT_H_SIZE, 0, peer_node, own_node, peer_port, tsk->portid, TIPC_OK); - tsk->probing_state = TIPC_CONN_PROBING; + tsk->probe_unacked = true; sk_reset_timer(sk, &sk->sk_timer, jiffies + CONN_PROBING_INTERVAL); bh_unlock_sock(sk); if (skb) -- 2.1.4
[PATCH net-next v2 16/16] tipc: remove SS_CONNECTED sock state
In this commit, we replace references to sock->state SS_CONNECTE with sk_state TIPC_ESTABLISHED. Finally, the sock->state is no longer explicitly used by tipc. The FSM below is for various types of connection oriented sockets. Stream Server Listening Socket: +---+ +-+ | TIPC_OPEN |-->| TIPC_LISTEN | +---+ +-+ Stream Server Data Socket: +---+ +--+ | TIPC_OPEN |-->| TIPC_ESTABLISHED | +---+ +--+ ^ | | | | v ++ | TIPC_DISCONNECTING | ++ Stream Socket Client: +---+ +-+ | TIPC_OPEN |-->| TIPC_CONNECTING |--+ +---+ +-+ | || || v| +--+ | | TIPC_ESTABLISHED | | +--+ | ^ | | | | | | v | ++ | | TIPC_DISCONNECTING |<--+ ++ Signed-off-by: Parthasarathy Bhuvaragan --- v2: adapt to the v2 versions of earlier patches. --- net/tipc/socket.c | 86 --- 1 file changed, 38 insertions(+), 48 deletions(-) diff --git a/net/tipc/socket.c b/net/tipc/socket.c index 074f4d546828..149396366e80 100644 --- a/net/tipc/socket.c +++ b/net/tipc/socket.c @@ -294,7 +294,7 @@ static void tsk_rej_rx_queue(struct sock *sk) static bool tipc_sk_connected(struct sock *sk) { - return sk->sk_socket->state == SS_CONNECTED; + return sk->sk_state == TIPC_ESTABLISHED; } /* tipc_sk_type_connectionless - check if the socket is datagram socket @@ -639,7 +639,7 @@ static int tipc_getname(struct socket *sock, struct sockaddr *uaddr, memset(addr, 0, sizeof(*addr)); if (peer) { - if ((sock->state != SS_CONNECTED) && + if ((!tipc_sk_connected(sk)) && ((peer != 2) || (sk->sk_state != TIPC_DISCONNECTING))) return -ENOTCONN; addr->addr.id.ref = tsk_peer_port(tsk); @@ -690,29 +690,26 @@ static unsigned int tipc_poll(struct file *file, struct socket *sock, if (sk->sk_shutdown == SHUTDOWN_MASK) mask |= POLLHUP; - if ((int)sock->state == SS_CONNECTED) { + switch (sk->sk_state) { + case TIPC_ESTABLISHED: if (!tsk->link_cong && !tsk_conn_cong(tsk)) mask |= POLLOUT; + /* fall thru' */ + case TIPC_LISTEN: + case TIPC_CONNECTING: if (!skb_queue_empty(&sk->sk_receive_queue)) mask |= (POLLIN | POLLRDNORM); - } else { - switch (sk->sk_state) { - case TIPC_OPEN: - if (!tsk->link_cong) - mask |= POLLOUT; - if (tipc_sk_type_connectionless(sk) && - (!skb_queue_empty(&sk->sk_receive_queue))) - mask |= (POLLIN | POLLRDNORM); - break; - case TIPC_DISCONNECTING: - mask = (POLLIN | POLLRDNORM | POLLHUP); - break; - case TIPC_LISTEN: - case TIPC_CONNECTING: - if (!skb_queue_empty(&sk->sk_receive_queue)) - mask |= (POLLIN | POLLRDNORM); - break; - } + break; + case TIPC_OPEN: + if (!tsk->link_cong) + mask |= POLLOUT; + if (tipc_sk_type_connectionless(sk) && + (!skb_queue_empty(&sk->sk_receive_queue))) + mask |= (POLLIN | POLLRDNORM); + break; + case TIPC_DISCONNECTING: + mask = (POLLIN | POLLRDNORM | POLLHUP); + break; } return mask; @@ -1045,7 +1042,7 @@ static int tipc_wait_for_sndpkt(struct socket *sock, long *timeo_p) return err; if (sk->sk_state == TIPC_DISCONNECTING) return -EPIPE; - else if (sock->state != SS_CONNECTED) + else if (!tipc_sk_connected(sk)) return -ENOTCONN; if (!*timeo_p) return -EAGAIN;
[PATCH net-next v2 12/16] tipc: create TIPC_OPEN as a new sk_state
In this commit, we create a new tipc socket state TIPC_OPEN in sk_state. We primarily replace the SS_UNCONNECTED sock->state with TIPC_OPEN. Signed-off-by: Parthasarathy Bhuvaragan --- v2: TIPC_OPEN is set to the default sk_state TCP_CLOSE. --- net/tipc/socket.c | 97 --- 1 file changed, 43 insertions(+), 54 deletions(-) diff --git a/net/tipc/socket.c b/net/tipc/socket.c index 9215e2144b6a..b14dd2549980 100644 --- a/net/tipc/socket.c +++ b/net/tipc/socket.c @@ -53,6 +53,7 @@ enum { TIPC_LISTEN = TCP_LISTEN, TIPC_ESTABLISHED = TCP_ESTABLISHED, + TIPC_OPEN = TCP_CLOSE, }; /** @@ -348,16 +349,21 @@ static bool tsk_peer_msg(struct tipc_sock *tsk, struct tipc_msg *msg) static int tipc_set_sk_state(struct sock *sk, int state) { int oldstate = sk->sk_socket->state; + int oldsk_state = sk->sk_state; int res = -EINVAL; switch (state) { + case TIPC_OPEN: + res = 0; + break; case TIPC_LISTEN: - if (oldstate == SS_UNCONNECTED) + if (oldsk_state == TIPC_OPEN) res = 0; break; case TIPC_ESTABLISHED: if (oldstate == SS_CONNECTING || - oldstate == SS_UNCONNECTED) + oldstate == SS_UNCONNECTED || + oldsk_state == TIPC_OPEN) res = 0; break; } @@ -423,8 +429,8 @@ static int tipc_sk_create(struct net *net, struct socket *sock, /* Finish initializing socket data structures */ sock->ops = ops; - sock->state = SS_UNCONNECTED; sock_init_data(sock, sk); + tipc_set_sk_state(sk, TIPC_OPEN); if (tipc_sk_insert(tsk)) { pr_warn("Socket create failed; port number exhausted\n"); return -EINVAL; @@ -448,6 +454,7 @@ static int tipc_sk_create(struct net *net, struct socket *sock, if (sock->type == SOCK_DGRAM) tsk_set_unreliable(tsk, true); } + return 0; } @@ -652,28 +659,6 @@ static int tipc_getname(struct socket *sock, struct sockaddr *uaddr, * exits. TCP and other protocols seem to rely on higher level poll routines * to handle any preventable race conditions, so TIPC will do the same ... * - * TIPC sets the returned events as follows: - * - * socket stateflags set - * - - * unconnected no read flags - * POLLOUT if port is not congested - * - * connecting POLLIN/POLLRDNORM if ACK/NACK in rx queue - * no write flags - * - * connected POLLIN/POLLRDNORM if data in rx queue - * POLLOUT if port is not congested - * - * disconnecting POLLIN/POLLRDNORM/POLLHUP - * no write flags - * - * listening POLLIN if SYN in rx queue - * no write flags - * - * ready POLLIN/POLLRDNORM if data in rx queue - * [connectionless]POLLOUT (since port cannot be congested) - * * IMPORTANT: The fact that a read or write operation is indicated does NOT * imply that the operation will succeed, merely that it should be performed * and will not block. @@ -687,27 +672,7 @@ static unsigned int tipc_poll(struct file *file, struct socket *sock, sock_poll_wait(file, sk_sleep(sk), wait); - if (tipc_sk_type_connectionless(sk)) { - if (!tsk->link_cong) - mask |= POLLOUT; - if (!skb_queue_empty(&sk->sk_receive_queue)) - mask |= (POLLIN | POLLRDNORM); - return mask; - } - switch ((int)sock->state) { - case SS_UNCONNECTED: - switch (sk->sk_state) { - case TIPC_LISTEN: - if (!skb_queue_empty(&sk->sk_receive_queue)) - mask |= (POLLIN | POLLRDNORM); - break; - default: - if (!tsk->link_cong) - mask |= POLLOUT; - break; - } - break; case SS_CONNECTED: if (!tsk->link_cong && !tsk_conn_cong(tsk)) mask |= POLLOUT; @@ -719,6 +684,20 @@ static unsigned int tipc_poll(struct file *file, struct socket *sock, case SS_DISCONNECTING: mask = (POLLIN | POLLRDNORM | POLLHUP); break; + default: + switch (sk->sk_state) { + case TIPC_OPEN: + if (!tsk->link_cong) + mask |= POLLOUT; + if (tipc_sk_type_connectionless(sk) && + (!skb_queue_empty(&am
[PATCH net-next v2 15/16] tipc: create TIPC_CONNECTING as a new sk_state
In this commit, we create a new tipc socket state TIPC_CONNECTING by primarily replacing the SS_CONNECTING with TIPC_CONNECTING. There is no functional change in this commit. Signed-off-by: Parthasarathy Bhuvaragan --- v2: set TIPC_CONNECTING to TCP_SYN_SENT. --- net/tipc/socket.c | 60 ++- 1 file changed, 28 insertions(+), 32 deletions(-) diff --git a/net/tipc/socket.c b/net/tipc/socket.c index e732b1fe7eab..074f4d546828 100644 --- a/net/tipc/socket.c +++ b/net/tipc/socket.c @@ -55,6 +55,7 @@ enum { TIPC_ESTABLISHED = TCP_ESTABLISHED, TIPC_OPEN = TCP_CLOSE, TIPC_DISCONNECTING = TCP_CLOSE_WAIT, + TIPC_CONNECTING = TCP_SYN_SENT, }; /** @@ -349,7 +350,6 @@ static bool tsk_peer_msg(struct tipc_sock *tsk, struct tipc_msg *msg) */ static int tipc_set_sk_state(struct sock *sk, int state) { - int oldstate = sk->sk_socket->state; int oldsk_state = sk->sk_state; int res = -EINVAL; @@ -358,16 +358,17 @@ static int tipc_set_sk_state(struct sock *sk, int state) res = 0; break; case TIPC_LISTEN: + case TIPC_CONNECTING: if (oldsk_state == TIPC_OPEN) res = 0; break; case TIPC_ESTABLISHED: - if (oldstate == SS_CONNECTING || + if (oldsk_state == TIPC_CONNECTING || oldsk_state == TIPC_OPEN) res = 0; break; case TIPC_DISCONNECTING: - if (oldstate == SS_CONNECTING || + if (oldsk_state == TIPC_CONNECTING || oldsk_state == TIPC_ESTABLISHED) res = 0; break; @@ -689,16 +690,12 @@ static unsigned int tipc_poll(struct file *file, struct socket *sock, if (sk->sk_shutdown == SHUTDOWN_MASK) mask |= POLLHUP; - switch ((int)sock->state) { - case SS_CONNECTED: + if ((int)sock->state == SS_CONNECTED) { if (!tsk->link_cong && !tsk_conn_cong(tsk)) mask |= POLLOUT; - /* fall thru' */ - case SS_CONNECTING: if (!skb_queue_empty(&sk->sk_receive_queue)) mask |= (POLLIN | POLLRDNORM); - break; - default: + } else { switch (sk->sk_state) { case TIPC_OPEN: if (!tsk->link_cong) @@ -711,6 +708,7 @@ static unsigned int tipc_poll(struct file *file, struct socket *sock, mask = (POLLIN | POLLRDNORM | POLLHUP); break; case TIPC_LISTEN: + case TIPC_CONNECTING: if (!skb_queue_empty(&sk->sk_receive_queue)) mask |= (POLLIN | POLLRDNORM); break; @@ -1014,7 +1012,7 @@ static int __tipc_sendmsg(struct socket *sock, struct msghdr *m, size_t dsz) rc = tipc_node_xmit(net, &pktchain, dnode, tsk->portid); if (likely(!rc)) { if (!is_connectionless) - sock->state = SS_CONNECTING; + tipc_set_sk_state(sk, TIPC_CONNECTING); return dsz; } if (rc == -ELINKCONG) { @@ -1650,9 +1648,10 @@ static bool filter_connect(struct tipc_sock *tsk, struct sk_buff *skb) sk->sk_state_change(sk); } return true; + } - case SS_CONNECTING: - + switch (sk->sk_state) { + case TIPC_CONNECTING: /* Accept only ACK or NACK message */ if (unlikely(!msg_connected(hdr))) return false; @@ -1684,9 +1683,7 @@ static bool filter_connect(struct tipc_sock *tsk, struct sk_buff *skb) /* 'ACK-' message is neither accepted nor rejected: */ msg_set_dest_droppable(hdr, 1); return false; - } - switch (sk->sk_state) { case TIPC_OPEN: case TIPC_DISCONNECTING: break; @@ -1955,7 +1952,8 @@ static int tipc_wait_for_connect(struct socket *sock, long *timeo_p) return sock_intr_errno(*timeo_p); prepare_to_wait(sk_sleep(sk), &wait, TASK_INTERRUPTIBLE); - done = sk_wait_event(sk, timeo_p, sock->state != SS_CONNECTING); + done = sk_wait_event(sk, timeo_p, +sk->sk_state != TIPC_CONNECTING); finish_wait(sk_sleep(sk), &wait); } while (!done); return 0; @@ -1978,7 +1976,7 @@ static int tipc_connect(struct socket *sock, struct sockaddr *dest, struct sockaddr_tipc *dst = (struct sockaddr_tipc *)dest;
[PATCH net-next v2 09/16] tipc: remove socket state SS_READY
Until now, tipc socket state SS_READY declares that the socket is a connectionless socket. In this commit, we remove the state SS_READY and replace it with a condition which returns true for datagram / connectionless sockets. Acked-by: Ying Xue Acked-by: Jon Maloy Signed-off-by: Parthasarathy Bhuvaragan --- net/tipc/socket.c | 49 +++-- 1 file changed, 31 insertions(+), 18 deletions(-) diff --git a/net/tipc/socket.c b/net/tipc/socket.c index 1b1aa941cd06..a8c10764f2f6 100644 --- a/net/tipc/socket.c +++ b/net/tipc/socket.c @@ -45,7 +45,6 @@ #include "netlink.h" #define SS_LISTENING -1 /* socket is listening */ -#define SS_READY -2 /* socket is connectionless */ #define CONN_TIMEOUT_DEFAULT 8000/* default connect timeout = 8s */ #define CONN_PROBING_INTERVAL msecs_to_jiffies(360) /* [ms] => 1 h */ @@ -294,6 +293,16 @@ static bool tipc_sk_connected(struct sock *sk) return sk->sk_socket->state == SS_CONNECTED; } +/* tipc_sk_type_connectionless - check if the socket is datagram socket + * @sk: socket + * + * Returns true if connection less, false otherwise + */ +static bool tipc_sk_type_connectionless(struct sock *sk) +{ + return sk->sk_type == SOCK_RDM || sk->sk_type == SOCK_DGRAM; +} + /* tsk_peer_msg - verify if message was sent by connected port's peer * * Handles cases where the node's network address has changed from @@ -345,7 +354,6 @@ static int tipc_sk_create(struct net *net, struct socket *sock, { struct tipc_net *tn; const struct proto_ops *ops; - socket_state state; struct sock *sk; struct tipc_sock *tsk; struct tipc_msg *msg; @@ -357,16 +365,13 @@ static int tipc_sk_create(struct net *net, struct socket *sock, switch (sock->type) { case SOCK_STREAM: ops = &stream_ops; - state = SS_UNCONNECTED; break; case SOCK_SEQPACKET: ops = &packet_ops; - state = SS_UNCONNECTED; break; case SOCK_DGRAM: case SOCK_RDM: ops = &msg_ops; - state = SS_READY; break; default: return -EPROTOTYPE; @@ -387,7 +392,7 @@ static int tipc_sk_create(struct net *net, struct socket *sock, /* Finish initializing socket data structures */ sock->ops = ops; - sock->state = state; + sock->state = SS_UNCONNECTED; sock_init_data(sock, sk); if (tipc_sk_insert(tsk)) { pr_warn("Socket create failed; port number exhausted\n"); @@ -407,7 +412,7 @@ static int tipc_sk_create(struct net *net, struct socket *sock, tsk->snd_win = tsk_adv_blocks(RCVBUF_MIN); tsk->rcv_win = tsk->snd_win; - if (sock->state == SS_READY) { + if (tipc_sk_type_connectionless(sk)) { tsk_set_unreturnable(tsk, true); if (sock->type == SOCK_DGRAM) tsk_set_unreliable(tsk, true); @@ -651,12 +656,19 @@ static unsigned int tipc_poll(struct file *file, struct socket *sock, sock_poll_wait(file, sk_sleep(sk), wait); + if (tipc_sk_type_connectionless(sk)) { + if (!tsk->link_cong) + mask |= POLLOUT; + if (!skb_queue_empty(&sk->sk_receive_queue)) + mask |= (POLLIN | POLLRDNORM); + return mask; + } + switch ((int)sock->state) { case SS_UNCONNECTED: if (!tsk->link_cong) mask |= POLLOUT; break; - case SS_READY: case SS_CONNECTED: if (!tsk->link_cong && !tsk_conn_cong(tsk)) mask |= POLLOUT; @@ -893,6 +905,7 @@ static int __tipc_sendmsg(struct socket *sock, struct msghdr *m, size_t dsz) struct tipc_msg *mhdr = &tsk->phdr; u32 dnode, dport; struct sk_buff_head pktchain; + bool is_connectionless = tipc_sk_type_connectionless(sk); struct sk_buff *skb; struct tipc_name_seq *seq; struct iov_iter save; @@ -903,7 +916,7 @@ static int __tipc_sendmsg(struct socket *sock, struct msghdr *m, size_t dsz) if (dsz > TIPC_MAX_USER_MSG_SIZE) return -EMSGSIZE; if (unlikely(!dest)) { - if (sock->state == SS_READY && tsk->peer.family == AF_TIPC) + if (is_connectionless && tsk->peer.family == AF_TIPC) dest = &tsk->peer; else return -EDESTADDRREQ; @@ -911,7 +924,7 @@ static int __tipc_sendmsg(struct socket *sock, struct msghdr *m, size_t dsz) dest->family != AF_TIPC) {
[PATCH net-next v2 14/16] tipc: remove SS_DISCONNECTING state
In this commit, we replace the references to SS_DISCONNECTING with the combination of sk_state TIPC_DISCONNECTING and flags set in sk_shutdown. We introduce a new function _tipc_shutdown(), which provides the common code required by tipc_release() and tipc_shutdown(). Signed-off-by: Parthasarathy Bhuvaragan --- v2: introduce __tipc_shutdown() to avoid code duplication. replace the TIPC_CLOSING state in v1 with sk_shutdown flag. --- net/tipc/socket.c | 132 +- 1 file changed, 52 insertions(+), 80 deletions(-) diff --git a/net/tipc/socket.c b/net/tipc/socket.c index a48c0c0676cf..e732b1fe7eab 100644 --- a/net/tipc/socket.c +++ b/net/tipc/socket.c @@ -442,6 +442,7 @@ static int tipc_sk_create(struct net *net, struct socket *sock, } msg_set_origport(msg, tsk->portid); setup_timer(&sk->sk_timer, tipc_sk_timeout, (unsigned long)tsk); + sk->sk_shutdown = 0; sk->sk_backlog_rcv = tipc_backlog_rcv; sk->sk_rcvbuf = sysctl_tipc_rmem[1]; sk->sk_data_ready = tipc_data_ready; @@ -470,6 +471,44 @@ static void tipc_sk_callback(struct rcu_head *head) sock_put(&tsk->sk); } +/* Caller should hold socket lock for the socket. */ +static void __tipc_shutdown(struct socket *sock, int error) +{ + struct sock *sk = sock->sk; + struct tipc_sock *tsk = tipc_sk(sk); + struct net *net = sock_net(sk); + u32 dnode = tsk_peer_node(tsk); + struct sk_buff *skb; + + /* Reject all unreceived messages, except on an active connection +* (which disconnects locally & sends a 'FIN+' to peer). +*/ + while ((skb = __skb_dequeue(&sk->sk_receive_queue)) != NULL) { + if (TIPC_SKB_CB(skb)->bytes_read) { + kfree_skb(skb); + } else { + if (!tipc_sk_type_connectionless(sk) && + sk->sk_state != TIPC_DISCONNECTING) { + tipc_set_sk_state(sk, TIPC_DISCONNECTING); + tipc_node_remove_conn(net, dnode, tsk->portid); + } + tipc_sk_respond(sk, skb, error); + } + } + if (sk->sk_state != TIPC_DISCONNECTING) { + skb = tipc_msg_create(TIPC_CRITICAL_IMPORTANCE, + TIPC_CONN_MSG, SHORT_H_SIZE, 0, dnode, + tsk_own_node(tsk), tsk_peer_port(tsk), + tsk->portid, error); + if (skb) + tipc_node_xmit_skb(net, skb, dnode, tsk->portid); + if (!tipc_sk_type_connectionless(sk)) { + tipc_node_remove_conn(net, dnode, tsk->portid); + tipc_set_sk_state(sk, TIPC_DISCONNECTING); + } + } +} + /** * tipc_release - destroy a TIPC socket * @sock: socket to destroy @@ -489,10 +528,7 @@ static void tipc_sk_callback(struct rcu_head *head) static int tipc_release(struct socket *sock) { struct sock *sk = sock->sk; - struct net *net; struct tipc_sock *tsk; - struct sk_buff *skb; - u32 dnode; /* * Exit if socket isn't fully initialized (occurs when a failed accept() @@ -501,46 +537,16 @@ static int tipc_release(struct socket *sock) if (sk == NULL) return 0; - net = sock_net(sk); tsk = tipc_sk(sk); lock_sock(sk); - /* -* Reject all unreceived messages, except on an active connection -* (which disconnects locally & sends a 'FIN+' to peer) -*/ - dnode = tsk_peer_node(tsk); - while (sock->state != SS_DISCONNECTING) { - skb = __skb_dequeue(&sk->sk_receive_queue); - if (skb == NULL) - break; - if (TIPC_SKB_CB(skb)->bytes_read) - kfree_skb(skb); - else { - if ((sock->state == SS_CONNECTING) || - (sock->state == SS_CONNECTED)) { - sock->state = SS_DISCONNECTING; - tipc_node_remove_conn(net, dnode, tsk->portid); - } - tipc_sk_respond(sk, skb, TIPC_ERR_NO_PORT); - } - } - + __tipc_shutdown(sock, TIPC_ERR_NO_PORT); + sk->sk_shutdown = SHUTDOWN_MASK; tipc_sk_withdraw(tsk, 0, NULL); sk_stop_timer(sk, &sk->sk_timer); tipc_sk_remove(tsk); - if (tipc_sk_connected(sk)) { - skb = tipc_msg_create(TIPC_CRITICAL_IMPORTANCE, - TIPC_CONN_MSG, SHORT_H_SIZE, 0, dnode, - tsk_own_node(tsk), tsk_peer_port(tsk), -
[PATCH net-next v2 08/16] tipc: remove probing_intv from tipc_sock
Until now, probing_intv is a variable in struct tipc_sock but is always set to a constant CONN_PROBING_INTERVAL. The socket connection is probed based on this value. In this commit, we remove this variable and setup the socket timer based on the constant CONN_PROBING_INTERVAL. There is no functional change in this commit. Acked-by: Ying Xue Acked-by: Jon Maloy Signed-off-by: Parthasarathy Bhuvaragan --- net/tipc/socket.c | 19 +-- 1 file changed, 9 insertions(+), 10 deletions(-) diff --git a/net/tipc/socket.c b/net/tipc/socket.c index 7b6a1847cf8a..1b1aa941cd06 100644 --- a/net/tipc/socket.c +++ b/net/tipc/socket.c @@ -67,7 +67,6 @@ * @publications: list of publications for port * @pub_count: total # of publications port has made during its lifetime * @probing_state: - * @probing_intv: * @conn_timeout: the time we can wait for an unresponded setup request * @dupl_rcvcnt: number of bytes counted twice, in both backlog and rcv queue * @link_cong: non-zero if owner must sleep because of link congestion @@ -89,7 +88,6 @@ struct tipc_sock { struct list_head publications; u32 pub_count; u32 probing_state; - unsigned long probing_intv; uint conn_timeout; atomic_t dupl_rcvcnt; bool link_cong; @@ -1153,9 +1151,8 @@ static void tipc_sk_finish_conn(struct tipc_sock *tsk, u32 peer_port, msg_set_lookup_scope(msg, 0); msg_set_hdr_sz(msg, SHORT_H_SIZE); - tsk->probing_intv = CONN_PROBING_INTERVAL; tsk->probing_state = TIPC_CONN_OK; - sk_reset_timer(sk, &sk->sk_timer, jiffies + tsk->probing_intv); + sk_reset_timer(sk, &sk->sk_timer, jiffies + CONN_PROBING_INTERVAL); tipc_node_add_conn(net, peer_node, tsk->portid, peer_port); tsk->max_pkt = tipc_node_get_mtu(net, peer_node, tsk->portid); tsk->peer_caps = tipc_node_get_capabilities(net, peer_node); @@ -2240,13 +2237,15 @@ static void tipc_sk_timeout(unsigned long data) sk_reset_timer(sk, &sk->sk_timer, (HZ / 20)); } - } else { - skb = tipc_msg_create(CONN_MANAGER, CONN_PROBE, - INT_H_SIZE, 0, peer_node, own_node, - peer_port, tsk->portid, TIPC_OK); - tsk->probing_state = TIPC_CONN_PROBING; - sk_reset_timer(sk, &sk->sk_timer, jiffies + tsk->probing_intv); + bh_unlock_sock(sk); + goto exit; } + + skb = tipc_msg_create(CONN_MANAGER, CONN_PROBE, + INT_H_SIZE, 0, peer_node, own_node, + peer_port, tsk->portid, TIPC_OK); + tsk->probing_state = TIPC_CONN_PROBING; + sk_reset_timer(sk, &sk->sk_timer, jiffies + CONN_PROBING_INTERVAL); bh_unlock_sock(sk); if (skb) tipc_node_xmit_skb(sock_net(sk), skb, peer_node, tsk->portid); -- 2.1.4
[PATCH net-next v2 06/16] tipc: remove tsk->connected for connectionless sockets
Until now, for connectionless sockets the peer information during connect is stored in tsk->peer and a connection state is set in tsk->connected. This is redundant. In this commit, for connectionless sockets we update: - __tipc_sendmsg(), when the destination is NULL the peer existence is determined by tsk->peer.family, instead of tsk->connected. - tipc_connect(), remove set/unset of tsk->connected. Hence tsk->connected is no longer used for connectionless sockets. There is no functional change in this commit. Acked-by: Ying Xue Acked-by: Jon Maloy Signed-off-by: Parthasarathy Bhuvaragan --- net/tipc/socket.c | 4 +--- 1 file changed, 1 insertion(+), 3 deletions(-) diff --git a/net/tipc/socket.c b/net/tipc/socket.c index 0546556d3517..524abe47560d 100644 --- a/net/tipc/socket.c +++ b/net/tipc/socket.c @@ -902,7 +902,7 @@ static int __tipc_sendmsg(struct socket *sock, struct msghdr *m, size_t dsz) if (dsz > TIPC_MAX_USER_MSG_SIZE) return -EMSGSIZE; if (unlikely(!dest)) { - if (tsk->connected && sock->state == SS_READY) + if (sock->state == SS_READY && tsk->peer.family == AF_TIPC) dest = &tsk->peer; else return -EDESTADDRREQ; @@ -1939,12 +1939,10 @@ static int tipc_connect(struct socket *sock, struct sockaddr *dest, if (sock->state == SS_READY) { if (dst->family == AF_UNSPEC) { memset(&tsk->peer, 0, sizeof(struct sockaddr_tipc)); - tsk->connected = 0; } else if (destlen != sizeof(struct sockaddr_tipc)) { res = -EINVAL; } else { memcpy(&tsk->peer, dest, destlen); - tsk->connected = 1; } goto exit; } -- 2.1.4
[PATCH net-next v2 07/16] tipc: remove tsk->connected from tipc_sock
Until now, we determine if a socket is connected or not based on tsk->connected, which is set once when the probing state is set to TIPC_CONN_OK. It is unset when the sock->state is updated from SS_CONNECTED to any other state. In this commit, we remove connected variable from tipc_sock and derive socket connection status from the following condition: sock->state == SS_CONNECTED => tsk->connected There is no functional change in this commit. Acked-by: Ying Xue Acked-by: Jon Maloy Signed-off-by: Parthasarathy Bhuvaragan --- net/tipc/socket.c | 36 +++- 1 file changed, 19 insertions(+), 17 deletions(-) diff --git a/net/tipc/socket.c b/net/tipc/socket.c index 524abe47560d..7b6a1847cf8a 100644 --- a/net/tipc/socket.c +++ b/net/tipc/socket.c @@ -58,7 +58,6 @@ /** * struct tipc_sock - TIPC socket structure * @sk: socket - interacts with 'port' and with user via the socket API - * @connected: non-zero if port is currently connected to a peer port * @conn_type: TIPC type used when connection was established * @conn_instance: TIPC instance used when connection was established * @published: non-zero if port has one or more associated names @@ -80,7 +79,6 @@ */ struct tipc_sock { struct sock sk; - int connected; u32 conn_type; u32 conn_instance; int published; @@ -293,6 +291,11 @@ static void tsk_rej_rx_queue(struct sock *sk) tipc_sk_respond(sk, skb, TIPC_ERR_NO_PORT); } +static bool tipc_sk_connected(struct sock *sk) +{ + return sk->sk_socket->state == SS_CONNECTED; +} + /* tsk_peer_msg - verify if message was sent by connected port's peer * * Handles cases where the node's network address has changed from @@ -300,12 +303,13 @@ static void tsk_rej_rx_queue(struct sock *sk) */ static bool tsk_peer_msg(struct tipc_sock *tsk, struct tipc_msg *msg) { - struct tipc_net *tn = net_generic(sock_net(&tsk->sk), tipc_net_id); + struct sock *sk = &tsk->sk; + struct tipc_net *tn = net_generic(sock_net(sk), tipc_net_id); u32 peer_port = tsk_peer_port(tsk); u32 orig_node; u32 peer_node; - if (unlikely(!tsk->connected)) + if (unlikely(!tipc_sk_connected(sk))) return false; if (unlikely(msg_origport(msg) != peer_port)) @@ -470,7 +474,6 @@ static int tipc_release(struct socket *sock) if ((sock->state == SS_CONNECTING) || (sock->state == SS_CONNECTED)) { sock->state = SS_DISCONNECTING; - tsk->connected = 0; tipc_node_remove_conn(net, dnode, tsk->portid); } tipc_sk_respond(sk, skb, TIPC_ERR_NO_PORT); @@ -480,7 +483,7 @@ static int tipc_release(struct socket *sock) tipc_sk_withdraw(tsk, 0, NULL); sk_stop_timer(sk, &sk->sk_timer); tipc_sk_remove(tsk); - if (tsk->connected) { + if (tipc_sk_connected(sk)) { skb = tipc_msg_create(TIPC_CRITICAL_IMPORTANCE, TIPC_CONN_MSG, SHORT_H_SIZE, 0, dnode, tsk_own_node(tsk), tsk_peer_port(tsk), @@ -1010,7 +1013,7 @@ static int tipc_wait_for_sndpkt(struct socket *sock, long *timeo_p) done = sk_wait_event(sk, timeo_p, (!tsk->link_cong && !tsk_conn_cong(tsk)) || -!tsk->connected); + !tipc_sk_connected(sk)); finish_wait(sk_sleep(sk), &wait); } while (!done); return 0; @@ -1152,7 +1155,6 @@ static void tipc_sk_finish_conn(struct tipc_sock *tsk, u32 peer_port, tsk->probing_intv = CONN_PROBING_INTERVAL; tsk->probing_state = TIPC_CONN_OK; - tsk->connected = 1; sk_reset_timer(sk, &sk->sk_timer, jiffies + tsk->probing_intv); tipc_node_add_conn(net, peer_node, tsk->portid, peer_port); tsk->max_pkt = tipc_node_get_mtu(net, peer_node, tsk->portid); @@ -1261,13 +1263,14 @@ static int tipc_sk_anc_data_recv(struct msghdr *m, struct tipc_msg *msg, static void tipc_sk_send_ack(struct tipc_sock *tsk) { - struct net *net = sock_net(&tsk->sk); + struct sock *sk = &tsk->sk; + struct net *net = sock_net(sk); struct sk_buff *skb = NULL; struct tipc_msg *msg; u32 peer_port = tsk_peer_port(tsk); u32 dnode = tsk_peer_node(tsk); - if (!tsk->connected) + if (!tipc_sk_connected(sk)) return; skb = tipc_msg_create(CONN_MANAGER, CONN_ACK, INT_H_SIZE, 0, dnode, tsk_own_node(tsk), peer_
[PATCH net-next v2 00/16] tipc: socket layer improvements
The following issues with the current socket layer hinders socket diagnostics implementation, which led to this patch series. 1. tipc socket state is derived from multiple variables like sock->state, tsk->probing_state and tsk->connected. This style forces us to export multiple attributes to the user space, which has to be backward compatible. 2. Abuse of sock->state cannot be exported to user-space without requiring tipc specific hacks in the user-space. - For connection less (CL) sockets sock->state is overloaded to tipc state SS_READY. - For connection oriented (CO) listening socket sock->state is overloaded to tipc state SS_LISTEN. This series is split into four: 1. Bug fixes in patch #1,2,3. 2. Minor cleanups in patch#4-5. 3. Express all tipc states using a single variable in patch#6-8. 4. Migrate the new tipc states to sk->sk_state in patch#9-16. The figures below represents the FSM after this series: Stream Server Listening Socket: +---+ +-+ | TIPC_OPEN |-->| TIPC_LISTEN | +---+ +-+ Stream Server Data Socket: +---+ +--+ | TIPC_OPEN |-->| TIPC_ESTABLISHED | +---+ +--+ ^ | | | | v ++ | TIPC_DISCONNECTING | ++ Stream Socket Client: +---+ +-+ | TIPC_OPEN |-->| TIPC_CONNECTING |--+ +---+ +-+ | || || v| +--+ | | TIPC_ESTABLISHED | | +--+ | ^ | | | | | | v | ++ | | TIPC_DISCONNECTING |<--+ ++ NOTE: This is just a base refractoring required for socket diagnostics. TIPC socket diagnostics support will be introduced in a later series. v2: - remove extra cast and parenthesis as suggested by David S. Miller in #4. - map new tipc state values to tcp states to address Eric Dumazet's concern, thus allow the usage of generic sk_* helpers. This is done in patch#10-15. - remove TIPC_PROBING state and replace it with probe_unacked flag in #11. - replace the TIPC_CLOSING state in v1 with sk_shutdown flag in #14. - introduce __tipc_shutdown() to avoid code duplication in #14. Parthasarathy Bhuvaragan (16): tipc: return early for non-blocking sockets at link congestion tipc: wakeup sleeping users at disconnect tipc: set kern=0 in sk_alloc() during tipc_accept() tipc: rename struct tipc_skb_cb member handle to bytes_read tipc: rename tsk->remote to tsk->peer for consistent naming tipc: remove tsk->connected for connectionless sockets tipc: remove tsk->connected from tipc_sock tipc: remove probing_intv from tipc_sock tipc: remove socket state SS_READY tipc: create TIPC_LISTEN as a new sk_state tipc: create TIPC_ESTABLISHED as a new sk_state tipc: create TIPC_OPEN as a new sk_state tipc: create TIPC_DISCONNECTING as a new sk_state tipc: remove SS_DISCONNECTING state tipc: create TIPC_CONNECTING as a new sk_state tipc: remove SS_CONNECTED sock state net/tipc/msg.h| 2 +- net/tipc/socket.c | 478 +++--- 2 files changed, 242 insertions(+), 238 deletions(-) -- 2.1.4
[PATCH net-next v2 05/16] tipc: rename tsk->remote to tsk->peer for consistent naming
Until now, the peer information for connect is stored in tsk->remote but the rest of code uses the name peer for peer/remote. In this commit, we rename tsk->remote to tsk->peer to align with naming convention followed in the rest of the code. There is no functional change in this commit. Acked-by: Ying Xue Acked-by: Jon Maloy Signed-off-by: Parthasarathy Bhuvaragan --- net/tipc/socket.c | 11 +-- 1 file changed, 5 insertions(+), 6 deletions(-) diff --git a/net/tipc/socket.c b/net/tipc/socket.c index c543ae6cbf65..0546556d3517 100644 --- a/net/tipc/socket.c +++ b/net/tipc/socket.c @@ -65,7 +65,6 @@ * @max_pkt: maximum packet size "hint" used when building messages sent by port * @portid: unique port identity in TIPC socket hash table * @phdr: preformatted message header used when sending messages - * @port_list: adjacent ports in TIPC's global list of ports * @publications: list of publications for port * @pub_count: total # of publications port has made during its lifetime * @probing_state: @@ -75,7 +74,7 @@ * @link_cong: non-zero if owner must sleep because of link congestion * @sent_unacked: # messages sent by socket, and not yet acked by peer * @rcv_unacked: # messages read by user, but not yet acked back to peer - * @remote: 'connected' peer for dgram/rdm + * @peer: 'connected' peer for dgram/rdm * @node: hash table node * @rcu: rcu struct for tipc_sock */ @@ -101,7 +100,7 @@ struct tipc_sock { u16 peer_caps; u16 rcv_unacked; u16 rcv_win; - struct sockaddr_tipc remote; + struct sockaddr_tipc peer; struct rhash_head node; struct rcu_head rcu; }; @@ -904,7 +903,7 @@ static int __tipc_sendmsg(struct socket *sock, struct msghdr *m, size_t dsz) return -EMSGSIZE; if (unlikely(!dest)) { if (tsk->connected && sock->state == SS_READY) - dest = &tsk->remote; + dest = &tsk->peer; else return -EDESTADDRREQ; } else if (unlikely(m->msg_namelen < sizeof(*dest)) || @@ -1939,12 +1938,12 @@ static int tipc_connect(struct socket *sock, struct sockaddr *dest, /* DGRAM/RDM connect(), just save the destaddr */ if (sock->state == SS_READY) { if (dst->family == AF_UNSPEC) { - memset(&tsk->remote, 0, sizeof(struct sockaddr_tipc)); + memset(&tsk->peer, 0, sizeof(struct sockaddr_tipc)); tsk->connected = 0; } else if (destlen != sizeof(struct sockaddr_tipc)) { res = -EINVAL; } else { - memcpy(&tsk->remote, dest, destlen); + memcpy(&tsk->peer, dest, destlen); tsk->connected = 1; } goto exit; -- 2.1.4
[PATCH net-next v2 03/16] tipc: set kern=0 in sk_alloc() during tipc_accept()
Until now, tipc_accept() calls sk_alloc() with kern=1. This is incorrect as the data socket's owner is the user application. Thus for these accepted data sockets the network namespace refcount is skipped. In this commit, we fix this by setting kern=0. Acked-by: Ying Xue Acked-by: Jon Maloy Signed-off-by: Parthasarathy Bhuvaragan --- net/tipc/socket.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/net/tipc/socket.c b/net/tipc/socket.c index cd01deb1da9c..82aec2eb8497 100644 --- a/net/tipc/socket.c +++ b/net/tipc/socket.c @@ -2093,7 +2093,7 @@ static int tipc_accept(struct socket *sock, struct socket *new_sock, int flags) buf = skb_peek(&sk->sk_receive_queue); - res = tipc_sk_create(sock_net(sock->sk), new_sock, 0, 1); + res = tipc_sk_create(sock_net(sock->sk), new_sock, 0, 0); if (res) goto exit; security_sk_clone(sock->sk, new_sock->sk); -- 2.1.4
[PATCH net-next v2 01/16] tipc: return early for non-blocking sockets at link congestion
Until now, in stream/mcast send() we pass the message to the link layer even when the link is congested and add the socket to the link's wakeup queue. This is unnecessary for non-blocking sockets. If a socket is set to non-blocking and sends multicast with zero back off time while receiving EAGAIN, we exhaust the memory. In this commit, we return immediately at stream/mcast send() for non-blocking sockets. Acked-by: Jon Maloy Signed-off-by: Parthasarathy Bhuvaragan --- net/tipc/socket.c | 6 ++ 1 file changed, 6 insertions(+) diff --git a/net/tipc/socket.c b/net/tipc/socket.c index f9f5f3c3dab5..adf3e6ecf61e 100644 --- a/net/tipc/socket.c +++ b/net/tipc/socket.c @@ -697,6 +697,9 @@ static int tipc_sendmcast(struct socket *sock, struct tipc_name_seq *seq, uint mtu; int rc; + if (!timeo && tsk->link_cong) + return -ELINKCONG; + msg_set_type(mhdr, TIPC_MCAST_MSG); msg_set_lookup_scope(mhdr, TIPC_CLUSTER_SCOPE); msg_set_destport(mhdr, 0); @@ -1072,6 +1075,9 @@ static int __tipc_send_stream(struct socket *sock, struct msghdr *m, size_t dsz) } timeo = sock_sndtimeo(sk, m->msg_flags & MSG_DONTWAIT); + if (!timeo && tsk->link_cong) + return -ELINKCONG; + dnode = tsk_peer_node(tsk); skb_queue_head_init(&pktchain); -- 2.1.4
[PATCH net-next v2 04/16] tipc: rename struct tipc_skb_cb member handle to bytes_read
In this commit, we rename handle to bytes_read indicating the purpose of the member. Signed-off-by: Parthasarathy Bhuvaragan --- v2: Remove unnecessary cast and parenthesis as suggested by David S. Miller. --- net/tipc/msg.h| 2 +- net/tipc/socket.c | 18 ++ 2 files changed, 11 insertions(+), 9 deletions(-) diff --git a/net/tipc/msg.h b/net/tipc/msg.h index 50a739860d37..8d408612ffa4 100644 --- a/net/tipc/msg.h +++ b/net/tipc/msg.h @@ -95,7 +95,7 @@ struct plist; #define TIPC_MEDIA_INFO_OFFSET 5 struct tipc_skb_cb { - void *handle; + u32 bytes_read; struct sk_buff *tail; bool validated; bool wakeup_pending; diff --git a/net/tipc/socket.c b/net/tipc/socket.c index 82aec2eb8497..c543ae6cbf65 100644 --- a/net/tipc/socket.c +++ b/net/tipc/socket.c @@ -465,7 +465,7 @@ static int tipc_release(struct socket *sock) skb = __skb_dequeue(&sk->sk_receive_queue); if (skb == NULL) break; - if (TIPC_SKB_CB(skb)->handle != NULL) + if (TIPC_SKB_CB(skb)->bytes_read) kfree_skb(skb); else { if ((sock->state == SS_CONNECTING) || @@ -1435,7 +1435,7 @@ static int tipc_recv_stream(struct socket *sock, struct msghdr *m, struct tipc_msg *msg; long timeo; unsigned int sz; - int sz_to_copy, target, needed; + int target; int sz_copied = 0; u32 err; int res = 0, hlen; @@ -1483,11 +1483,13 @@ static int tipc_recv_stream(struct socket *sock, struct msghdr *m, /* Capture message data (if valid) & compute return value (always) */ if (!err) { - u32 offset = (u32)(unsigned long)(TIPC_SKB_CB(buf)->handle); + u32 offset = TIPC_SKB_CB(buf)->bytes_read; + u32 needed; + int sz_to_copy; sz -= offset; needed = (buf_len - sz_copied); - sz_to_copy = (sz <= needed) ? sz : needed; + sz_to_copy = min(sz, needed); res = skb_copy_datagram_msg(buf, hlen + offset, m, sz_to_copy); if (res) @@ -1497,8 +1499,8 @@ static int tipc_recv_stream(struct socket *sock, struct msghdr *m, if (sz_to_copy < sz) { if (!(flags & MSG_PEEK)) - TIPC_SKB_CB(buf)->handle = - (void *)(unsigned long)(offset + sz_to_copy); + TIPC_SKB_CB(buf)->bytes_read = + offset + sz_to_copy; goto exit; } } else { @@ -1742,7 +1744,7 @@ static bool filter_rcv(struct sock *sk, struct sk_buff *skb, } /* Enqueue message */ - TIPC_SKB_CB(skb)->handle = NULL; + TIPC_SKB_CB(skb)->bytes_read = 0; __skb_queue_tail(&sk->sk_receive_queue, skb); skb_set_owner_r(skb, sk); @@ -2177,7 +2179,7 @@ static int tipc_shutdown(struct socket *sock, int how) /* Disconnect and send a 'FIN+' or 'FIN-' message to peer */ skb = __skb_dequeue(&sk->sk_receive_queue); if (skb) { - if (TIPC_SKB_CB(skb)->handle != NULL) { + if (TIPC_SKB_CB(skb)->bytes_read) { kfree_skb(skb); goto restart; } -- 2.1.4
[PATCH net-next v2 02/16] tipc: wakeup sleeping users at disconnect
Until now, in filter_connect() when we terminate a connection due to an error message from peer, we set the socket state to DISCONNECTING. The socket is notified about this broken connection using EPIPE when a user tries to send a message. However if a socket was waiting on a poll() while the connection is being terminated, we fail to wakeup that socket. In this commit, we wakeup sleeping sockets at connection termination. Acked-by: Jon Maloy Signed-off-by: Parthasarathy Bhuvaragan --- net/tipc/socket.c | 1 + 1 file changed, 1 insertion(+) diff --git a/net/tipc/socket.c b/net/tipc/socket.c index adf3e6ecf61e..cd01deb1da9c 100644 --- a/net/tipc/socket.c +++ b/net/tipc/socket.c @@ -1599,6 +1599,7 @@ static bool filter_connect(struct tipc_sock *tsk, struct sk_buff *skb) /* Let timer expire on it's own */ tipc_node_remove_conn(net, tsk_peer_node(tsk), tsk->portid); + sk->sk_state_change(sk); } return true; -- 2.1.4
Re: [PATCH net-next v1 13/16] tipc: create TIPC_DISCONNECTING as a new sk_state
On 10/27/2016 05:03 PM, Eric Dumazet wrote: On Thu, 2016-10-27 at 16:22 +0200, Parthasarathy Bhuvaragan wrote: In this commit, we create a new tipc socket state TIPC_DISCONNECTING in sk_state. TIPC_DISCONNECTING is replacing the socket connection status update using SS_DISCONNECTING. TIPC_DISCONNECTING is set for connection oriented sockets at: - tipc_shutdown() - connection probe timeout - when we receive an error message on the connection. There is no functional change in this commit. Acked-by: Ying Xue Acked-by: Jon Maloy Signed-off-by: Parthasarathy Bhuvaragan --- include/uapi/linux/tipc.h | 1 + net/tipc/socket.c | 39 +++ 2 files changed, 24 insertions(+), 16 deletions(-) diff --git a/include/uapi/linux/tipc.h b/include/uapi/linux/tipc.h index ae45de5e0d93..f5c03fdfc8a8 100644 --- a/include/uapi/linux/tipc.h +++ b/include/uapi/linux/tipc.h @@ -184,6 +184,7 @@ enum { TIPC_PROBING, TIPC_ESTABLISHED, TIPC_OPEN, + TIPC_DISCONNECTING, }; Note that all these TIPC socket states might conflict with sk_fullsock(), sk_listener(), inet6_sk(), ip_skb_dst_mtu(), sk_const_to_full_sk() , skb_to_full_sk() helpers. So there is definitely a high risk. Eric, i can map the tipc state to a corresponding tcp state like the patch below. This resembles the way its done in l2tp and sctp. Do you see any issue with that? diff --git a/include/uapi/linux/tipc.h b/include/uapi/linux/tipc.h index f2d9294d0920..e697f809549e 100644 --- a/include/uapi/linux/tipc.h +++ b/include/uapi/linux/tipc.h @@ -180,15 +180,17 @@ struct tipc_event { * Definitions for the TIPC protocol sk_state field. */ enum { - TIPC_LISTEN = 1, - TIPC_PROBING, - TIPC_ESTABLISHED, - TIPC_OPEN, - TIPC_DISCONNECTING, - TIPC_CLOSING, - TIPC_CONNECTING, + TIPC_LISTEN = TCP_LISTEN, + TIPC_ESTABLISHED = TCP_ESTABLISHED, + TIPC_CLOSE = TCP_CLOSE, + TIPC_CONNECTING = TCP_SYN_SENT, + TIPC_DISCONNECTING = TCP_CLOSE_WAIT, }; I will deleted TIPC_OPEN and TIPC_PROBING, the former can be set to TIPC_CLOSE like its done in tcp and the later can be replaced this with a probes_out counter. /Partha
[PATCH net-next v1 10/16] tipc: create TIPC_LISTEN as a new sk_state
Until now, tipc maintains the socket state in sock->state variable. This is used to maintain generic socket states, but in tipc we overload it and save tipc socket states like TIPC_LISTEN. Other protocols like TCP, UDP store protocol specific states in sk->sk_state instead. In this commit, we : - declare a new tipc state TIPC_LISTEN, that replaces SS_LISTEN - Create a new function tipc_set_state(), to update sk->sk_state. - TIPC_LISTEN state is maintained in sk->sk_state. - replace references to SS_LISTEN with TIPC_LISTEN. There is no functional change in this commit. Acked-by: Ying Xue Acked-by: Jon Maloy Signed-off-by: Parthasarathy Bhuvaragan --- include/uapi/linux/tipc.h | 7 ++ net/tipc/socket.c | 59 --- 2 files changed, 47 insertions(+), 19 deletions(-) diff --git a/include/uapi/linux/tipc.h b/include/uapi/linux/tipc.h index bf049e8fe31b..8a107085f268 100644 --- a/include/uapi/linux/tipc.h +++ b/include/uapi/linux/tipc.h @@ -177,6 +177,13 @@ struct tipc_event { }; /* + * Definitions for the TIPC protocol sk_state field. + */ +enum { + TIPC_LISTEN = 1, +}; + +/* * Socket API */ diff --git a/net/tipc/socket.c b/net/tipc/socket.c index 6c71951b7d0c..7c9c97363e81 100644 --- a/net/tipc/socket.c +++ b/net/tipc/socket.c @@ -44,8 +44,6 @@ #include "bcast.h" #include "netlink.h" -#define SS_LISTENING -1 /* socket is listening */ - #define CONN_TIMEOUT_DEFAULT 8000/* default connect timeout = 8s */ #define CONN_PROBING_INTERVAL msecs_to_jiffies(360) /* [ms] => 1 h */ #define TIPC_FWD_MSG 1 @@ -337,6 +335,32 @@ static bool tsk_peer_msg(struct tipc_sock *tsk, struct tipc_msg *msg) return false; } +/* tipc_set_sk_state - set the sk_state of the socket + * @sk: socket + * + * Caller must hold socket lock + * + * Returns 0 on success, errno otherwise + */ +static int tipc_set_sk_state(struct sock *sk, int state) +{ + int oldstate = sk->sk_socket->state; + int res = -EINVAL; + + switch (state) { + case TIPC_LISTEN: + if (oldstate == SS_UNCONNECTED) + res = 0; + break; + } + + if (res) + return res; + + sk->sk_state = state; + return 0; +} + /** * tipc_sk_create - create a TIPC socket * @net: network namespace (must be default network) @@ -666,15 +690,22 @@ static unsigned int tipc_poll(struct file *file, struct socket *sock, switch ((int)sock->state) { case SS_UNCONNECTED: - if (!tsk->link_cong) - mask |= POLLOUT; + switch (sk->sk_state) { + case TIPC_LISTEN: + if (!skb_queue_empty(&sk->sk_receive_queue)) + mask |= (POLLIN | POLLRDNORM); + break; + default: + if (!tsk->link_cong) + mask |= POLLOUT; + break; + } break; case SS_CONNECTED: if (!tsk->link_cong && !tsk_conn_cong(tsk)) mask |= POLLOUT; /* fall thru' */ case SS_CONNECTING: - case SS_LISTENING: if (!skb_queue_empty(&sk->sk_receive_queue)) mask |= (POLLIN | POLLRDNORM); break; @@ -925,7 +956,7 @@ static int __tipc_sendmsg(struct socket *sock, struct msghdr *m, size_t dsz) return -EINVAL; } if (!is_connectionless) { - if (sock->state == SS_LISTENING) + if (sk->sk_state == TIPC_LISTEN) return -EPIPE; if (sock->state != SS_UNCONNECTED) return -EISCONN; @@ -1651,7 +1682,6 @@ static bool filter_connect(struct tipc_sock *tsk, struct sk_buff *skb) msg_set_dest_droppable(hdr, 1); return false; - case SS_LISTENING: case SS_UNCONNECTED: /* Accept only SYN message */ @@ -2026,15 +2056,9 @@ static int tipc_listen(struct socket *sock, int len) int res; lock_sock(sk); - - if (sock->state != SS_UNCONNECTED) - res = -EINVAL; - else { - sock->state = SS_LISTENING; - res = 0; - } - + res = tipc_set_sk_state(sk, TIPC_LISTEN); release_sock(sk); + return res; } @@ -2060,9 +2084,6 @@ static int tipc_wait_for_accept(struct socket *sock, long timeo) err = 0; if (!skb_queue_empty(&sk->sk_receive_queue)) break; - err = -EINVAL; - if (sock->state != SS_LISTENING) - break; err = -EAGAIN; if (
[PATCH net-next v1 01/16] tipc: return early for non-blocking sockets at link congestion
Until now, in stream/mcast send() we pass the message to the link layer even when the link is congested and add the socket to the link's wakeup queue. This is unnecessary for non-blocking sockets. If a socket is set to non-blocking and sends multicast with zero back off time while receiving EAGAIN, we exhaust the memory. In this commit, we return immediately at stream/mcast send() for non-blocking sockets. Acked-by: Jon Maloy Signed-off-by: Parthasarathy Bhuvaragan --- net/tipc/socket.c | 6 ++ 1 file changed, 6 insertions(+) diff --git a/net/tipc/socket.c b/net/tipc/socket.c index f9f5f3c3dab5..adf3e6ecf61e 100644 --- a/net/tipc/socket.c +++ b/net/tipc/socket.c @@ -697,6 +697,9 @@ static int tipc_sendmcast(struct socket *sock, struct tipc_name_seq *seq, uint mtu; int rc; + if (!timeo && tsk->link_cong) + return -ELINKCONG; + msg_set_type(mhdr, TIPC_MCAST_MSG); msg_set_lookup_scope(mhdr, TIPC_CLUSTER_SCOPE); msg_set_destport(mhdr, 0); @@ -1072,6 +1075,9 @@ static int __tipc_send_stream(struct socket *sock, struct msghdr *m, size_t dsz) } timeo = sock_sndtimeo(sk, m->msg_flags & MSG_DONTWAIT); + if (!timeo && tsk->link_cong) + return -ELINKCONG; + dnode = tsk_peer_node(tsk); skb_queue_head_init(&pktchain); -- 2.1.4
[PATCH net-next v1 04/16] tipc: rename struct tipc_skb_cb member handle to bytes_read
In this commit, we rename handle to bytes_read indicating the purpose of the member. Acked-by: Ying Xue Acked-by: Jon Maloy Signed-off-by: Parthasarathy Bhuvaragan --- net/tipc/msg.h| 2 +- net/tipc/socket.c | 18 ++ 2 files changed, 11 insertions(+), 9 deletions(-) diff --git a/net/tipc/msg.h b/net/tipc/msg.h index c3832cdf2278..a2aa01b442ef 100644 --- a/net/tipc/msg.h +++ b/net/tipc/msg.h @@ -95,7 +95,7 @@ struct plist; #define TIPC_MEDIA_INFO_OFFSET 5 struct tipc_skb_cb { - void *handle; + u32 bytes_read; struct sk_buff *tail; bool validated; bool wakeup_pending; diff --git a/net/tipc/socket.c b/net/tipc/socket.c index 82aec2eb8497..6e3774a7c831 100644 --- a/net/tipc/socket.c +++ b/net/tipc/socket.c @@ -465,7 +465,7 @@ static int tipc_release(struct socket *sock) skb = __skb_dequeue(&sk->sk_receive_queue); if (skb == NULL) break; - if (TIPC_SKB_CB(skb)->handle != NULL) + if (TIPC_SKB_CB(skb)->bytes_read) kfree_skb(skb); else { if ((sock->state == SS_CONNECTING) || @@ -1435,7 +1435,7 @@ static int tipc_recv_stream(struct socket *sock, struct msghdr *m, struct tipc_msg *msg; long timeo; unsigned int sz; - int sz_to_copy, target, needed; + int target; int sz_copied = 0; u32 err; int res = 0, hlen; @@ -1483,11 +1483,13 @@ static int tipc_recv_stream(struct socket *sock, struct msghdr *m, /* Capture message data (if valid) & compute return value (always) */ if (!err) { - u32 offset = (u32)(unsigned long)(TIPC_SKB_CB(buf)->handle); + u32 offset = (u32)(TIPC_SKB_CB(buf)->bytes_read); + u32 needed; + int sz_to_copy; sz -= offset; needed = (buf_len - sz_copied); - sz_to_copy = (sz <= needed) ? sz : needed; + sz_to_copy = min(sz, needed); res = skb_copy_datagram_msg(buf, hlen + offset, m, sz_to_copy); if (res) @@ -1497,8 +1499,8 @@ static int tipc_recv_stream(struct socket *sock, struct msghdr *m, if (sz_to_copy < sz) { if (!(flags & MSG_PEEK)) - TIPC_SKB_CB(buf)->handle = - (void *)(unsigned long)(offset + sz_to_copy); + TIPC_SKB_CB(buf)->bytes_read = + offset + sz_to_copy; goto exit; } } else { @@ -1742,7 +1744,7 @@ static bool filter_rcv(struct sock *sk, struct sk_buff *skb, } /* Enqueue message */ - TIPC_SKB_CB(skb)->handle = NULL; + TIPC_SKB_CB(skb)->bytes_read = 0; __skb_queue_tail(&sk->sk_receive_queue, skb); skb_set_owner_r(skb, sk); @@ -2177,7 +2179,7 @@ static int tipc_shutdown(struct socket *sock, int how) /* Disconnect and send a 'FIN+' or 'FIN-' message to peer */ skb = __skb_dequeue(&sk->sk_receive_queue); if (skb) { - if (TIPC_SKB_CB(skb)->handle != NULL) { + if (TIPC_SKB_CB(skb)->bytes_read) { kfree_skb(skb); goto restart; } -- 2.1.4
[PATCH net-next v1 00/16] tipc: socket layer improvements
The following issues with the current socket layer hinders socket diagnostics implementation, which led to this patch series. 1. tipc socket state is derived from multiple variables like sock->state, tsk->probing_state and tsk->connected. This style forces us to export multiple attributes to the user space, which has to be backward compatible. 2. Abuse of sock->state cannot be exported to user-space without requiring tipc specific hacks in the user-space. - For connection less (CL) sockets sock->state is overloaded to tipc state SS_READY. - For connection oriented (CO) listening socket sock->state is overloaded to tipc state SS_LISTEN. This series is split into four: 1. Bug fixes in patch #1,2,3. 2. Minor cleanups in patch#4-5. 3. Express all tipc states using a single variable in patch#6-8. 4. Migrate the new tipc states to sk->sk_state in patch#9-16. The figures below represents the FSM after this series: For connectionless sockets: +---+ +--+ | TIPC_OPEN |-->| TIPC_CLOSING | +---+ +--+ Stream Server Listening Socket: +---+ +-+ | TIPC_OPEN |-->| TIPC_LISTEN | +---+ +-+ | +--+| | TIPC_CLOSING |<---+ +--+ Stream Server Data Socket: +---+ +--+ | TIPC_OPEN |-->| TIPC_ESTABLISHED |<---+ +---+ +--+| ^ || | | |+--+ | v +--+ | TIPC_PROBING | +--+ | | v +--+++ | TIPC_CLOSING |<---| TIPC_DISCONNECTING | +--+++ Stream Socket Client: +---+ +-+ | TIPC_OPEN |-->| TIPC_CONNECTING | +---+ +-+ | | v +--+ | TIPC_ESTABLISHED |<---+ +--+| ^ || | | |+-+ | v +--+ | TIPC_PROBING | +--+ | | v +--+++ | TIPC_CLOSING |<---| TIPC_DISCONNECTING | +--+++ NOTE: This is just a base refractoring required for socket diagnostics. TIPC socket diagnostics support will be introduced in a later series. Parthasarathy Bhuvaragan (16): tipc: return early for non-blocking sockets at link congestion tipc: wakeup sleeping users at disconnect tipc: set kern=0 in sk_alloc() during tipc_accept() tipc: rename struct tipc_skb_cb member handle to bytes_read tipc: rename tsk->remote to tsk->peer for consistent naming tipc: remove tsk->connected for connectionless sockets tipc: remove tsk->connected from tipc_sock tipc: remove probing_intv from tipc_sock tipc: remove socket state SS_READY tipc: create TIPC_LISTEN as a new sk_state tipc: create TIPC_PROBING/TIPC_ESTABLISHED as new sk_states tipc: create TIPC_OPEN as a new sk_state tipc: create TIPC_DISCONNECTING as a new sk_state tipc: create TIPC_CLOSING as a new sk_state tipc: create TIPC_CONNECTING as a new sk_state tipc: remove SS_CONNECTED sock state include/uapi/linux/tipc.h | 13 ++ net/tipc/msg.h| 2 +- net/tipc/socket.c | 385 +- 3 files changed, 222 insertions(+), 178 deletions(-) -- 2.1.4
[PATCH net-next v1 06/16] tipc: remove tsk->connected for connectionless sockets
Until now, for connectionless sockets the peer information during connect is stored in tsk->peer and a connection state is set in tsk->connected. This is redundant. In this commit, for connectionless sockets we update: - __tipc_sendmsg(), when the destination is NULL the peer existence is determined by tsk->peer.family, instead of tsk->connected. - tipc_connect(), remove set/unset of tsk->connected. Hence tsk->connected is no longer used for connectionless sockets. There is no functional change in this commit. Acked-by: Ying Xue Acked-by: Jon Maloy Signed-off-by: Parthasarathy Bhuvaragan --- net/tipc/socket.c | 4 +--- 1 file changed, 1 insertion(+), 3 deletions(-) diff --git a/net/tipc/socket.c b/net/tipc/socket.c index 8bc9c32c41b5..ebe000888667 100644 --- a/net/tipc/socket.c +++ b/net/tipc/socket.c @@ -902,7 +902,7 @@ static int __tipc_sendmsg(struct socket *sock, struct msghdr *m, size_t dsz) if (dsz > TIPC_MAX_USER_MSG_SIZE) return -EMSGSIZE; if (unlikely(!dest)) { - if (tsk->connected && sock->state == SS_READY) + if (sock->state == SS_READY && tsk->peer.family == AF_TIPC) dest = &tsk->peer; else return -EDESTADDRREQ; @@ -1939,12 +1939,10 @@ static int tipc_connect(struct socket *sock, struct sockaddr *dest, if (sock->state == SS_READY) { if (dst->family == AF_UNSPEC) { memset(&tsk->peer, 0, sizeof(struct sockaddr_tipc)); - tsk->connected = 0; } else if (destlen != sizeof(struct sockaddr_tipc)) { res = -EINVAL; } else { memcpy(&tsk->peer, dest, destlen); - tsk->connected = 1; } goto exit; } -- 2.1.4
[PATCH net-next v1 09/16] tipc: remove socket state SS_READY
Until now, tipc socket state SS_READY declares that the socket is a connectionless socket. In this commit, we remove the state SS_READY and replace it with a condition which returns true for datagram / connectionless sockets. Acked-by: Ying Xue Acked-by: Jon Maloy Signed-off-by: Parthasarathy Bhuvaragan --- net/tipc/socket.c | 49 +++-- 1 file changed, 31 insertions(+), 18 deletions(-) diff --git a/net/tipc/socket.c b/net/tipc/socket.c index 82e06488de08..6c71951b7d0c 100644 --- a/net/tipc/socket.c +++ b/net/tipc/socket.c @@ -45,7 +45,6 @@ #include "netlink.h" #define SS_LISTENING -1 /* socket is listening */ -#define SS_READY -2 /* socket is connectionless */ #define CONN_TIMEOUT_DEFAULT 8000/* default connect timeout = 8s */ #define CONN_PROBING_INTERVAL msecs_to_jiffies(360) /* [ms] => 1 h */ @@ -294,6 +293,16 @@ static bool tipc_sk_connected(struct sock *sk) return sk->sk_socket->state == SS_CONNECTED; } +/* tipc_sk_type_connectionless - check if the socket is datagram socket + * @sk: socket + * + * Returns true if connection less, false otherwise + */ +static bool tipc_sk_type_connectionless(struct sock *sk) +{ + return sk->sk_type == SOCK_RDM || sk->sk_type == SOCK_DGRAM; +} + /* tsk_peer_msg - verify if message was sent by connected port's peer * * Handles cases where the node's network address has changed from @@ -345,7 +354,6 @@ static int tipc_sk_create(struct net *net, struct socket *sock, { struct tipc_net *tn; const struct proto_ops *ops; - socket_state state; struct sock *sk; struct tipc_sock *tsk; struct tipc_msg *msg; @@ -357,16 +365,13 @@ static int tipc_sk_create(struct net *net, struct socket *sock, switch (sock->type) { case SOCK_STREAM: ops = &stream_ops; - state = SS_UNCONNECTED; break; case SOCK_SEQPACKET: ops = &packet_ops; - state = SS_UNCONNECTED; break; case SOCK_DGRAM: case SOCK_RDM: ops = &msg_ops; - state = SS_READY; break; default: return -EPROTOTYPE; @@ -387,7 +392,7 @@ static int tipc_sk_create(struct net *net, struct socket *sock, /* Finish initializing socket data structures */ sock->ops = ops; - sock->state = state; + sock->state = SS_UNCONNECTED; sock_init_data(sock, sk); if (tipc_sk_insert(tsk)) { pr_warn("Socket create failed; port number exhausted\n"); @@ -407,7 +412,7 @@ static int tipc_sk_create(struct net *net, struct socket *sock, tsk->snd_win = tsk_adv_blocks(RCVBUF_MIN); tsk->rcv_win = tsk->snd_win; - if (sock->state == SS_READY) { + if (tipc_sk_type_connectionless(sk)) { tsk_set_unreturnable(tsk, true); if (sock->type == SOCK_DGRAM) tsk_set_unreliable(tsk, true); @@ -651,12 +656,19 @@ static unsigned int tipc_poll(struct file *file, struct socket *sock, sock_poll_wait(file, sk_sleep(sk), wait); + if (tipc_sk_type_connectionless(sk)) { + if (!tsk->link_cong) + mask |= POLLOUT; + if (!skb_queue_empty(&sk->sk_receive_queue)) + mask |= (POLLIN | POLLRDNORM); + return mask; + } + switch ((int)sock->state) { case SS_UNCONNECTED: if (!tsk->link_cong) mask |= POLLOUT; break; - case SS_READY: case SS_CONNECTED: if (!tsk->link_cong && !tsk_conn_cong(tsk)) mask |= POLLOUT; @@ -893,6 +905,7 @@ static int __tipc_sendmsg(struct socket *sock, struct msghdr *m, size_t dsz) struct tipc_msg *mhdr = &tsk->phdr; u32 dnode, dport; struct sk_buff_head pktchain; + bool is_connectionless = tipc_sk_type_connectionless(sk); struct sk_buff *skb; struct tipc_name_seq *seq; struct iov_iter save; @@ -903,7 +916,7 @@ static int __tipc_sendmsg(struct socket *sock, struct msghdr *m, size_t dsz) if (dsz > TIPC_MAX_USER_MSG_SIZE) return -EMSGSIZE; if (unlikely(!dest)) { - if (sock->state == SS_READY && tsk->peer.family == AF_TIPC) + if (is_connectionless && tsk->peer.family == AF_TIPC) dest = &tsk->peer; else return -EDESTADDRREQ; @@ -911,7 +924,7 @@ static int __tipc_sendmsg(struct socket *sock, struct msghdr *m, size_t dsz) dest->family != AF_TIPC) {
[PATCH net-next v1 12/16] tipc: create TIPC_OPEN as a new sk_state
In this commit, we create a new tipc socket state TIPC_OPEN in sk_state. We primarily replace the SS_UNCONNECTED sock->state with TIPC_OPEN. Acked-by: Ying Xue Acked-by: Jon Maloy Signed-off-by: Parthasarathy Bhuvaragan --- include/uapi/linux/tipc.h | 1 + net/tipc/socket.c | 94 --- 2 files changed, 41 insertions(+), 54 deletions(-) diff --git a/include/uapi/linux/tipc.h b/include/uapi/linux/tipc.h index 189bfed0363c..ae45de5e0d93 100644 --- a/include/uapi/linux/tipc.h +++ b/include/uapi/linux/tipc.h @@ -183,6 +183,7 @@ enum { TIPC_LISTEN = 1, TIPC_PROBING, TIPC_ESTABLISHED, + TIPC_OPEN, }; /* diff --git a/net/tipc/socket.c b/net/tipc/socket.c index 04d388c063a6..da7f7e8244f2 100644 --- a/net/tipc/socket.c +++ b/net/tipc/socket.c @@ -347,8 +347,11 @@ static int tipc_set_sk_state(struct sock *sk, int state) int res = -EINVAL; switch (state) { + case TIPC_OPEN: + res = 0; + break; case TIPC_LISTEN: - if (oldstate == SS_UNCONNECTED) + if (oldsk_state == TIPC_OPEN) res = 0; break; case TIPC_PROBING: @@ -359,7 +362,7 @@ static int tipc_set_sk_state(struct sock *sk, int state) if (oldsk_state == TIPC_PROBING || oldsk_state == TIPC_ESTABLISHED || oldstate == SS_CONNECTING || - oldstate == SS_UNCONNECTED) + oldsk_state == TIPC_OPEN) res = 0; break; } @@ -426,8 +429,8 @@ static int tipc_sk_create(struct net *net, struct socket *sock, /* Finish initializing socket data structures */ sock->ops = ops; - sock->state = SS_UNCONNECTED; sock_init_data(sock, sk); + tipc_set_sk_state(sk, TIPC_OPEN); if (tipc_sk_insert(tsk)) { pr_warn("Socket create failed; port number exhausted\n"); return -EINVAL; @@ -451,6 +454,7 @@ static int tipc_sk_create(struct net *net, struct socket *sock, if (sock->type == SOCK_DGRAM) tsk_set_unreliable(tsk, true); } + return 0; } @@ -655,28 +659,6 @@ static int tipc_getname(struct socket *sock, struct sockaddr *uaddr, * exits. TCP and other protocols seem to rely on higher level poll routines * to handle any preventable race conditions, so TIPC will do the same ... * - * TIPC sets the returned events as follows: - * - * socket stateflags set - * - - * unconnected no read flags - * POLLOUT if port is not congested - * - * connecting POLLIN/POLLRDNORM if ACK/NACK in rx queue - * no write flags - * - * connected POLLIN/POLLRDNORM if data in rx queue - * POLLOUT if port is not congested - * - * disconnecting POLLIN/POLLRDNORM/POLLHUP - * no write flags - * - * listening POLLIN if SYN in rx queue - * no write flags - * - * ready POLLIN/POLLRDNORM if data in rx queue - * [connectionless]POLLOUT (since port cannot be congested) - * * IMPORTANT: The fact that a read or write operation is indicated does NOT * imply that the operation will succeed, merely that it should be performed * and will not block. @@ -690,27 +672,7 @@ static unsigned int tipc_poll(struct file *file, struct socket *sock, sock_poll_wait(file, sk_sleep(sk), wait); - if (tipc_sk_type_connectionless(sk)) { - if (!tsk->link_cong) - mask |= POLLOUT; - if (!skb_queue_empty(&sk->sk_receive_queue)) - mask |= (POLLIN | POLLRDNORM); - return mask; - } - switch ((int)sock->state) { - case SS_UNCONNECTED: - switch (sk->sk_state) { - case TIPC_LISTEN: - if (!skb_queue_empty(&sk->sk_receive_queue)) - mask |= (POLLIN | POLLRDNORM); - break; - default: - if (!tsk->link_cong) - mask |= POLLOUT; - break; - } - break; case SS_CONNECTED: if (!tsk->link_cong && !tsk_conn_cong(tsk)) mask |= POLLOUT; @@ -722,6 +684,20 @@ static unsigned int tipc_poll(struct file *file, struct socket *sock, case SS_DISCONNECTING: mask = (POLLIN | POLLRDNORM | POLLHUP); break; + default: + switch (sk->sk_state) { + case TIPC_OPEN: + if (!tsk->link_cong) + mask |= PO
[PATCH net-next v1 02/16] tipc: wakeup sleeping users at disconnect
Until now, in filter_connect() when we terminate a connection due to an error message from peer, we set the socket state to DISCONNECTING. The socket is notified about this broken connection using EPIPE when a user tries to send a message. However if a socket was waiting on a poll() while the connection is being terminated, we fail to wakeup that socket. In this commit, we wakeup sleeping sockets at connection termination. Acked-by: Jon Maloy Signed-off-by: Parthasarathy Bhuvaragan --- net/tipc/socket.c | 1 + 1 file changed, 1 insertion(+) diff --git a/net/tipc/socket.c b/net/tipc/socket.c index adf3e6ecf61e..cd01deb1da9c 100644 --- a/net/tipc/socket.c +++ b/net/tipc/socket.c @@ -1599,6 +1599,7 @@ static bool filter_connect(struct tipc_sock *tsk, struct sk_buff *skb) /* Let timer expire on it's own */ tipc_node_remove_conn(net, tsk_peer_node(tsk), tsk->portid); + sk->sk_state_change(sk); } return true; -- 2.1.4
[PATCH net-next v1 14/16] tipc: create TIPC_CLOSING as a new sk_state
In this commit, we create a new tipc socket state TIPC_CLOSING in sk_state. We primarily replace all the remaining references to SS_DISCONNECTING sock->state with TIPC_CLOSING. Thus the FSM for sockets will be as follows: For connectionless sockets: +---+ +--+ | TIPC_OPEN |-->| TIPC_CLOSING | +---+ +--+ For connection oriented sockets: +---+ +--+ +--+ | TIPC_OPEN |-->| TIPC_ (1..N) |->| TIPC_CLOSING | +---+ +--+ +--+ There is no functional change in this commit. Acked-by: Ying Xue Acked-by: Jon Maloy Signed-off-by: Parthasarathy Bhuvaragan --- include/uapi/linux/tipc.h | 1 + net/tipc/socket.c | 19 --- 2 files changed, 9 insertions(+), 11 deletions(-) diff --git a/include/uapi/linux/tipc.h b/include/uapi/linux/tipc.h index f5c03fdfc8a8..a45ebf273e76 100644 --- a/include/uapi/linux/tipc.h +++ b/include/uapi/linux/tipc.h @@ -185,6 +185,7 @@ enum { TIPC_ESTABLISHED, TIPC_OPEN, TIPC_DISCONNECTING, + TIPC_CLOSING, }; /* diff --git a/net/tipc/socket.c b/net/tipc/socket.c index 6fef087c4aac..cddf9bca9a90 100644 --- a/net/tipc/socket.c +++ b/net/tipc/socket.c @@ -348,6 +348,7 @@ static int tipc_set_sk_state(struct sock *sk, int state) switch (state) { case TIPC_OPEN: + case TIPC_CLOSING: res = 0; break; case TIPC_LISTEN: @@ -508,7 +509,7 @@ static int tipc_release(struct socket *sock) * (which disconnects locally & sends a 'FIN+' to peer) */ dnode = tsk_peer_node(tsk); - while (sock->state != SS_DISCONNECTING) { + while (sk->sk_state != TIPC_CLOSING) { skb = __skb_dequeue(&sk->sk_receive_queue); if (skb == NULL) break; @@ -517,7 +518,7 @@ static int tipc_release(struct socket *sock) else { if ((sock->state == SS_CONNECTING) || (sock->state == SS_CONNECTED)) { - sock->state = SS_DISCONNECTING; + tipc_set_sk_state(sk, TIPC_CLOSING); tipc_node_remove_conn(net, dnode, tsk->portid); } tipc_sk_respond(sk, skb, TIPC_ERR_NO_PORT); @@ -538,7 +539,7 @@ static int tipc_release(struct socket *sock) } /* Reject any messages that accumulated in backlog queue */ - sock->state = SS_DISCONNECTING; + tipc_set_sk_state(sk, TIPC_CLOSING); release_sock(sk); call_rcu(&tsk->rcu, tipc_sk_callback); @@ -685,9 +686,6 @@ static unsigned int tipc_poll(struct file *file, struct socket *sock, if (!skb_queue_empty(&sk->sk_receive_queue)) mask |= (POLLIN | POLLRDNORM); break; - case SS_DISCONNECTING: - mask = (POLLIN | POLLRDNORM | POLLHUP); - break; default: switch (sk->sk_state) { case TIPC_OPEN: @@ -697,6 +695,7 @@ static unsigned int tipc_poll(struct file *file, struct socket *sock, (!skb_queue_empty(&sk->sk_receive_queue))) mask |= (POLLIN | POLLRDNORM); break; + case TIPC_CLOSING: case TIPC_DISCONNECTING: mask = (POLLIN | POLLRDNORM | POLLHUP); break; @@ -881,7 +880,7 @@ static int tipc_wait_for_sndmsg(struct socket *sock, long *timeo_p) int err = sock_error(sk); if (err) return err; - if (sock->state == SS_DISCONNECTING) + if (sk->sk_state == TIPC_CLOSING) return -EPIPE; if (!*timeo_p) return -EAGAIN; @@ -1334,7 +1333,7 @@ static int tipc_wait_for_rcvmsg(struct socket *sock, long *timeop) for (;;) { prepare_to_wait(sk_sleep(sk), &wait, TASK_INTERRUPTIBLE); if (timeo && skb_queue_empty(&sk->sk_receive_queue)) { - if (sock->state == SS_DISCONNECTING) { + if (sk->sk_state == TIPC_CLOSING) { err = -ENOTCONN; break; } @@ -1675,13 +1674,11 @@ static bool filter_connect(struct tipc_sock *tsk, struct sk_buff *skb) /* 'ACK-' message is neither accepted nor rejected: */ msg_set_dest_droppable(hdr, 1); return false; - - case SS_DISCONNECTING: - break; } switch (sk->sk_state) { case TIPC_OPEN: + case TIPC_CLOSING: case TIPC_DISCONNECTING: break; case TIPC_LISTEN: -- 2.1.4
[PATCH net-next v1 11/16] tipc: create TIPC_PROBING/TIPC_ESTABLISHED as new sk_states
Until now, tipc maintains probing state for connected sockets in tsk->probing_state variable. In this commit, we express this information as socket states and this remove the variable. The sk_state is set to TIPC_PROBING instead of setting probing_state to TIPC_CONN_PROBING. Similarly sk_state is set to TIPC_ESTABLISHED instead of TIPC_CONN_OK. There is no functional change in this commit. Acked-by: Ying Xue Acked-by: Jon Maloy Signed-off-by: Parthasarathy Bhuvaragan --- include/uapi/linux/tipc.h | 2 ++ net/tipc/socket.c | 23 +-- 2 files changed, 19 insertions(+), 6 deletions(-) diff --git a/include/uapi/linux/tipc.h b/include/uapi/linux/tipc.h index 8a107085f268..189bfed0363c 100644 --- a/include/uapi/linux/tipc.h +++ b/include/uapi/linux/tipc.h @@ -181,6 +181,8 @@ struct tipc_event { */ enum { TIPC_LISTEN = 1, + TIPC_PROBING, + TIPC_ESTABLISHED, }; /* diff --git a/net/tipc/socket.c b/net/tipc/socket.c index 7c9c97363e81..04d388c063a6 100644 --- a/net/tipc/socket.c +++ b/net/tipc/socket.c @@ -47,8 +47,6 @@ #define CONN_TIMEOUT_DEFAULT 8000/* default connect timeout = 8s */ #define CONN_PROBING_INTERVAL msecs_to_jiffies(360) /* [ms] => 1 h */ #define TIPC_FWD_MSG 1 -#define TIPC_CONN_OK 0 -#define TIPC_CONN_PROBING 1 #define TIPC_MAX_PORT 0x #define TIPC_MIN_PORT 1 @@ -345,6 +343,7 @@ static bool tsk_peer_msg(struct tipc_sock *tsk, struct tipc_msg *msg) static int tipc_set_sk_state(struct sock *sk, int state) { int oldstate = sk->sk_socket->state; + int oldsk_state = sk->sk_state; int res = -EINVAL; switch (state) { @@ -352,6 +351,17 @@ static int tipc_set_sk_state(struct sock *sk, int state) if (oldstate == SS_UNCONNECTED) res = 0; break; + case TIPC_PROBING: + if (oldsk_state == TIPC_ESTABLISHED) + res = 0; + break; + case TIPC_ESTABLISHED: + if (oldsk_state == TIPC_PROBING || + oldsk_state == TIPC_ESTABLISHED || + oldstate == SS_CONNECTING || + oldstate == SS_UNCONNECTED) + res = 0; + break; } if (res) @@ -855,7 +865,8 @@ static void tipc_sk_proto_rcv(struct tipc_sock *tsk, struct sk_buff *skb, if (!tsk_peer_msg(tsk, hdr)) goto exit; - tsk->probing_state = TIPC_CONN_OK; + if (tipc_set_sk_state(sk, TIPC_ESTABLISHED)) + goto exit; if (mtyp == CONN_PROBE) { msg_set_type(hdr, CONN_PROBE_REPLY); @@ -1195,8 +1206,8 @@ static void tipc_sk_finish_conn(struct tipc_sock *tsk, u32 peer_port, msg_set_lookup_scope(msg, 0); msg_set_hdr_sz(msg, SHORT_H_SIZE); - tsk->probing_state = TIPC_CONN_OK; sk_reset_timer(sk, &sk->sk_timer, jiffies + CONN_PROBING_INTERVAL); + tipc_set_sk_state(sk, TIPC_ESTABLISHED); tipc_node_add_conn(net, peer_node, tsk->portid, peer_port); tsk->max_pkt = tipc_node_get_mtu(net, peer_node, tsk->portid); tsk->peer_caps = tipc_node_get_capabilities(net, peer_node); @@ -2260,7 +2271,7 @@ static void tipc_sk_timeout(unsigned long data) peer_port = tsk_peer_port(tsk); peer_node = tsk_peer_node(tsk); - if (tsk->probing_state == TIPC_CONN_PROBING) { + if (sk->sk_state == TIPC_PROBING) { if (!sock_owned_by_user(sk)) { sk->sk_socket->state = SS_DISCONNECTING; tipc_node_remove_conn(sock_net(sk), tsk_peer_node(tsk), @@ -2278,7 +2289,7 @@ static void tipc_sk_timeout(unsigned long data) skb = tipc_msg_create(CONN_MANAGER, CONN_PROBE, INT_H_SIZE, 0, peer_node, own_node, peer_port, tsk->portid, TIPC_OK); - tsk->probing_state = TIPC_CONN_PROBING; + tipc_set_sk_state(sk, TIPC_PROBING); sk_reset_timer(sk, &sk->sk_timer, jiffies + CONN_PROBING_INTERVAL); bh_unlock_sock(sk); if (skb) -- 2.1.4
[PATCH net-next v1 16/16] tipc: remove SS_CONNECTED sock state
In this commit, we remove the state SS_CONNECTED and replace it with the function tipc_sk_state_connected() wherever possible. A socket with sk_state TIPC_ESTABLISHED or TIPC_PROBING replaces the socket state SS_CONNECTED. After these changes, the sock->state is no longer explicitly used by tipc. The FSM below is for various types of connection oriented sockets. Stream Server Listening Socket: +---+ +-+ | TIPC_OPEN |-->| TIPC_LISTEN | +---+ +-+ | +--+| | TIPC_CLOSING |<---+ +--+ Stream Server Data Socket: +---+ +--+ | TIPC_OPEN |-->| TIPC_ESTABLISHED |<---+ +---+ +--+| ^ || | | |+--+ | v +--+ | TIPC_PROBING | +--+ | | v +--+++ | TIPC_CLOSING |<---| TIPC_DISCONNECTING | +--+++ Stream Socket Client: +---+ +-+ | TIPC_OPEN |-->| TIPC_CONNECTING | +---+ +-+ | | v +--+ | TIPC_ESTABLISHED |<---+ +--+| ^ || | | |+-+ | v +--+ | TIPC_PROBING | +--+ | | v +--+++ | TIPC_CLOSING |<---| TIPC_DISCONNECTING | +--+++ Acked-by: Ying Xue Acked-by: Jon Maloy Signed-off-by: Parthasarathy Bhuvaragan --- net/tipc/socket.c | 100 +- 1 file changed, 47 insertions(+), 53 deletions(-) diff --git a/net/tipc/socket.c b/net/tipc/socket.c index 7702acf0bfab..2a095fa6bff7 100644 --- a/net/tipc/socket.c +++ b/net/tipc/socket.c @@ -286,7 +286,7 @@ static void tsk_rej_rx_queue(struct sock *sk) static bool tipc_sk_connected(struct sock *sk) { - return sk->sk_socket->state == SS_CONNECTED; + return sk->sk_state == TIPC_ESTABLISHED || sk->sk_state == TIPC_PROBING; } /* tipc_sk_type_connectionless - check if the socket is datagram socket @@ -517,7 +517,7 @@ static int tipc_release(struct socket *sock) kfree_skb(skb); else { if ((sk->sk_state == TIPC_CONNECTING) || - (sock->state == SS_CONNECTED)) { + tipc_sk_connected(sk)) { tipc_set_sk_state(sk, TIPC_CLOSING); tipc_node_remove_conn(net, dnode, tsk->portid); } @@ -631,7 +631,7 @@ static int tipc_getname(struct socket *sock, struct sockaddr *uaddr, memset(addr, 0, sizeof(*addr)); if (peer) { - if ((sock->state != SS_CONNECTED) && + if ((!tipc_sk_connected(sk)) && ((peer != 2) || (sk->sk_state != TIPC_DISCONNECTING))) return -ENOTCONN; addr->addr.id.ref = tsk_peer_port(tsk); @@ -677,30 +677,28 @@ static unsigned int tipc_poll(struct file *file, struct socket *sock, sock_poll_wait(file, sk_sleep(sk), wait); - if ((int)sock->state == SS_CONNECTED) { + switch (sk->sk_state) { + case TIPC_PROBING: + case TIPC_ESTABLISHED: if (!tsk->link_cong && !tsk_conn_cong(tsk)) mask |= POLLOUT; + /* fall thru' */ + case TIPC_LISTEN: + case TIPC_CONNECTING: if (!skb_queue_empty(&sk->sk_receive_queue)) mask |= (POLLIN | POLLRDNORM); - } else { - switch (sk->sk_state) { - case TIPC_OPEN: - if (!tsk->link_cong) - mask |= POLLOUT; - if (tipc_sk_type_connectionless(sk) && - (!skb_queue_empty(&sk->sk_receive_queue))) - mask |= (POLLIN | POLLRDNORM); - break; - case TIPC_CLOSING: - case TIPC_DISCONNECTING: - mask = (POLLIN | POLLRDNORM | POLLHUP); - break; - case TIPC_LISTEN: - case TIPC_CONNE
[PATCH net-next v1 15/16] tipc: create TIPC_CONNECTING as a new sk_state
In this commit, we create a new tipc socket state TIPC_CONNECTING by primarily replacing the SS_CONNECTING with TIPC_CONNECTING. There is no functional change in this commit. Acked-by: Ying Xue Acked-by: Jon Maloy Signed-off-by: Parthasarathy Bhuvaragan --- include/uapi/linux/tipc.h | 1 + net/tipc/socket.c | 61 ++- 2 files changed, 29 insertions(+), 33 deletions(-) diff --git a/include/uapi/linux/tipc.h b/include/uapi/linux/tipc.h index a45ebf273e76..f2d9294d0920 100644 --- a/include/uapi/linux/tipc.h +++ b/include/uapi/linux/tipc.h @@ -186,6 +186,7 @@ enum { TIPC_OPEN, TIPC_DISCONNECTING, TIPC_CLOSING, + TIPC_CONNECTING, }; /* diff --git a/net/tipc/socket.c b/net/tipc/socket.c index cddf9bca9a90..7702acf0bfab 100644 --- a/net/tipc/socket.c +++ b/net/tipc/socket.c @@ -342,7 +342,6 @@ static bool tsk_peer_msg(struct tipc_sock *tsk, struct tipc_msg *msg) */ static int tipc_set_sk_state(struct sock *sk, int state) { - int oldstate = sk->sk_socket->state; int oldsk_state = sk->sk_state; int res = -EINVAL; @@ -352,6 +351,7 @@ static int tipc_set_sk_state(struct sock *sk, int state) res = 0; break; case TIPC_LISTEN: + case TIPC_CONNECTING: if (oldsk_state == TIPC_OPEN) res = 0; break; @@ -366,7 +366,7 @@ static int tipc_set_sk_state(struct sock *sk, int state) case TIPC_DISCONNECTING: if (oldsk_state == TIPC_PROBING || oldsk_state == TIPC_ESTABLISHED || - oldstate == SS_CONNECTING) + oldsk_state == TIPC_CONNECTING) res = 0; break; } @@ -516,7 +516,7 @@ static int tipc_release(struct socket *sock) if (TIPC_SKB_CB(skb)->bytes_read) kfree_skb(skb); else { - if ((sock->state == SS_CONNECTING) || + if ((sk->sk_state == TIPC_CONNECTING) || (sock->state == SS_CONNECTED)) { tipc_set_sk_state(sk, TIPC_CLOSING); tipc_node_remove_conn(net, dnode, tsk->portid); @@ -677,16 +677,12 @@ static unsigned int tipc_poll(struct file *file, struct socket *sock, sock_poll_wait(file, sk_sleep(sk), wait); - switch ((int)sock->state) { - case SS_CONNECTED: + if ((int)sock->state == SS_CONNECTED) { if (!tsk->link_cong && !tsk_conn_cong(tsk)) mask |= POLLOUT; - /* fall thru' */ - case SS_CONNECTING: if (!skb_queue_empty(&sk->sk_receive_queue)) mask |= (POLLIN | POLLRDNORM); - break; - default: + } else { switch (sk->sk_state) { case TIPC_OPEN: if (!tsk->link_cong) @@ -700,6 +696,7 @@ static unsigned int tipc_poll(struct file *file, struct socket *sock, mask = (POLLIN | POLLRDNORM | POLLHUP); break; case TIPC_LISTEN: + case TIPC_CONNECTING: if (!skb_queue_empty(&sk->sk_receive_queue)) mask |= (POLLIN | POLLRDNORM); break; @@ -1004,7 +1001,7 @@ static int __tipc_sendmsg(struct socket *sock, struct msghdr *m, size_t dsz) rc = tipc_node_xmit(net, &pktchain, dnode, tsk->portid); if (likely(!rc)) { if (!is_connectionless) - sock->state = SS_CONNECTING; + tipc_set_sk_state(sk, TIPC_CONNECTING); return dsz; } if (rc == -ELINKCONG) { @@ -1640,9 +1637,10 @@ static bool filter_connect(struct tipc_sock *tsk, struct sk_buff *skb) sk->sk_state_change(sk); } return true; + } - case SS_CONNECTING: - + switch (sk->sk_state) { + case TIPC_CONNECTING: /* Accept only ACK or NACK message */ if (unlikely(!msg_connected(hdr))) return false; @@ -1674,9 +1672,7 @@ static bool filter_connect(struct tipc_sock *tsk, struct sk_buff *skb) /* 'ACK-' message is neither accepted nor rejected: */ msg_set_dest_droppable(hdr, 1); return false; - } - switch (sk->sk_state) { case TIPC_OPEN: case TIPC_CLOSING: case TIPC_DISCONNECTING: @@ -1946,7 +1942,8 @@ static int tipc_wait_for_connect(struct socket *sock, long *timeo_p) return sock_intr_errno(*timeo_p);
[PATCH net-next v1 08/16] tipc: remove probing_intv from tipc_sock
Until now, probing_intv is a variable in struct tipc_sock but is always set to a constant CONN_PROBING_INTERVAL. The socket connection is probed based on this value. In this commit, we remove this variable and setup the socket timer based on the constant CONN_PROBING_INTERVAL. There is no functional change in this commit. Acked-by: Ying Xue Acked-by: Jon Maloy Signed-off-by: Parthasarathy Bhuvaragan --- net/tipc/socket.c | 19 +-- 1 file changed, 9 insertions(+), 10 deletions(-) diff --git a/net/tipc/socket.c b/net/tipc/socket.c index 8299964cd373..82e06488de08 100644 --- a/net/tipc/socket.c +++ b/net/tipc/socket.c @@ -67,7 +67,6 @@ * @publications: list of publications for port * @pub_count: total # of publications port has made during its lifetime * @probing_state: - * @probing_intv: * @conn_timeout: the time we can wait for an unresponded setup request * @dupl_rcvcnt: number of bytes counted twice, in both backlog and rcv queue * @link_cong: non-zero if owner must sleep because of link congestion @@ -89,7 +88,6 @@ struct tipc_sock { struct list_head publications; u32 pub_count; u32 probing_state; - unsigned long probing_intv; uint conn_timeout; atomic_t dupl_rcvcnt; bool link_cong; @@ -1153,9 +1151,8 @@ static void tipc_sk_finish_conn(struct tipc_sock *tsk, u32 peer_port, msg_set_lookup_scope(msg, 0); msg_set_hdr_sz(msg, SHORT_H_SIZE); - tsk->probing_intv = CONN_PROBING_INTERVAL; tsk->probing_state = TIPC_CONN_OK; - sk_reset_timer(sk, &sk->sk_timer, jiffies + tsk->probing_intv); + sk_reset_timer(sk, &sk->sk_timer, jiffies + CONN_PROBING_INTERVAL); tipc_node_add_conn(net, peer_node, tsk->portid, peer_port); tsk->max_pkt = tipc_node_get_mtu(net, peer_node, tsk->portid); tsk->peer_caps = tipc_node_get_capabilities(net, peer_node); @@ -2240,13 +2237,15 @@ static void tipc_sk_timeout(unsigned long data) sk_reset_timer(sk, &sk->sk_timer, (HZ / 20)); } - } else { - skb = tipc_msg_create(CONN_MANAGER, CONN_PROBE, - INT_H_SIZE, 0, peer_node, own_node, - peer_port, tsk->portid, TIPC_OK); - tsk->probing_state = TIPC_CONN_PROBING; - sk_reset_timer(sk, &sk->sk_timer, jiffies + tsk->probing_intv); + bh_unlock_sock(sk); + goto exit; } + + skb = tipc_msg_create(CONN_MANAGER, CONN_PROBE, + INT_H_SIZE, 0, peer_node, own_node, + peer_port, tsk->portid, TIPC_OK); + tsk->probing_state = TIPC_CONN_PROBING; + sk_reset_timer(sk, &sk->sk_timer, jiffies + CONN_PROBING_INTERVAL); bh_unlock_sock(sk); if (skb) tipc_node_xmit_skb(sock_net(sk), skb, peer_node, tsk->portid); -- 2.1.4
[PATCH net-next v1 13/16] tipc: create TIPC_DISCONNECTING as a new sk_state
In this commit, we create a new tipc socket state TIPC_DISCONNECTING in sk_state. TIPC_DISCONNECTING is replacing the socket connection status update using SS_DISCONNECTING. TIPC_DISCONNECTING is set for connection oriented sockets at: - tipc_shutdown() - connection probe timeout - when we receive an error message on the connection. There is no functional change in this commit. Acked-by: Ying Xue Acked-by: Jon Maloy Signed-off-by: Parthasarathy Bhuvaragan --- include/uapi/linux/tipc.h | 1 + net/tipc/socket.c | 39 +++ 2 files changed, 24 insertions(+), 16 deletions(-) diff --git a/include/uapi/linux/tipc.h b/include/uapi/linux/tipc.h index ae45de5e0d93..f5c03fdfc8a8 100644 --- a/include/uapi/linux/tipc.h +++ b/include/uapi/linux/tipc.h @@ -184,6 +184,7 @@ enum { TIPC_PROBING, TIPC_ESTABLISHED, TIPC_OPEN, + TIPC_DISCONNECTING, }; /* diff --git a/net/tipc/socket.c b/net/tipc/socket.c index da7f7e8244f2..6fef087c4aac 100644 --- a/net/tipc/socket.c +++ b/net/tipc/socket.c @@ -359,10 +359,13 @@ static int tipc_set_sk_state(struct sock *sk, int state) res = 0; break; case TIPC_ESTABLISHED: + if (oldsk_state == TIPC_OPEN) + res = 0; + /* fall thru' */ + case TIPC_DISCONNECTING: if (oldsk_state == TIPC_PROBING || oldsk_state == TIPC_ESTABLISHED || - oldstate == SS_CONNECTING || - oldsk_state == TIPC_OPEN) + oldstate == SS_CONNECTING) res = 0; break; } @@ -621,13 +624,14 @@ static int tipc_getname(struct socket *sock, struct sockaddr *uaddr, int *uaddr_len, int peer) { struct sockaddr_tipc *addr = (struct sockaddr_tipc *)uaddr; - struct tipc_sock *tsk = tipc_sk(sock->sk); + struct sock *sk = sock->sk; + struct tipc_sock *tsk = tipc_sk(sk); struct tipc_net *tn = net_generic(sock_net(sock->sk), tipc_net_id); memset(addr, 0, sizeof(*addr)); if (peer) { if ((sock->state != SS_CONNECTED) && - ((peer != 2) || (sock->state != SS_DISCONNECTING))) + ((peer != 2) || (sk->sk_state != TIPC_DISCONNECTING))) return -ENOTCONN; addr->addr.id.ref = tsk_peer_port(tsk); addr->addr.id.node = tsk_peer_node(tsk); @@ -693,6 +697,9 @@ static unsigned int tipc_poll(struct file *file, struct socket *sock, (!skb_queue_empty(&sk->sk_receive_queue))) mask |= (POLLIN | POLLRDNORM); break; + case TIPC_DISCONNECTING: + mask = (POLLIN | POLLRDNORM | POLLHUP); + break; case TIPC_LISTEN: if (!skb_queue_empty(&sk->sk_receive_queue)) mask |= (POLLIN | POLLRDNORM); @@ -1029,7 +1036,7 @@ static int tipc_wait_for_sndpkt(struct socket *sock, long *timeo_p) int err = sock_error(sk); if (err) return err; - if (sock->state == SS_DISCONNECTING) + if (sk->sk_state == TIPC_DISCONNECTING) return -EPIPE; else if (sock->state != SS_CONNECTED) return -ENOTCONN; @@ -1099,7 +1106,7 @@ static int __tipc_send_stream(struct socket *sock, struct msghdr *m, size_t dsz) return -EMSGSIZE; if (unlikely(sock->state != SS_CONNECTED)) { - if (sock->state == SS_DISCONNECTING) + if (sk->sk_state == TIPC_DISCONNECTING) return -EPIPE; else return -ENOTCONN; @@ -1627,7 +1634,7 @@ static bool filter_connect(struct tipc_sock *tsk, struct sk_buff *skb) return false; if (unlikely(msg_errcode(hdr))) { - sock->state = SS_DISCONNECTING; + tipc_set_sk_state(sk, TIPC_DISCONNECTING); /* Let timer expire on it's own */ tipc_node_remove_conn(net, tsk_peer_node(tsk), tsk->portid); @@ -1642,13 +1649,13 @@ static bool filter_connect(struct tipc_sock *tsk, struct sk_buff *skb) return false; if (unlikely(msg_errcode(hdr))) { - sock->state = SS_DISCONNECTING; + tipc_set_sk_state(sk, TIPC_DISCONNECTING); sk->sk_err = ECONNREFUSED; return true; } if (unlikely(!msg_isdata(hdr))) { -
[PATCH net-next v1 07/16] tipc: remove tsk->connected from tipc_sock
Until now, we determine if a socket is connected or not based on tsk->connected, which is set once when the probing state is set to TIPC_CONN_OK. It is unset when the sock->state is updated from SS_CONNECTED to any other state. In this commit, we remove connected variable from tipc_sock and derive socket connection status from the following condition: sock->state == SS_CONNECTED => tsk->connected There is no functional change in this commit. Acked-by: Ying Xue Acked-by: Jon Maloy Signed-off-by: Parthasarathy Bhuvaragan --- net/tipc/socket.c | 36 +++- 1 file changed, 19 insertions(+), 17 deletions(-) diff --git a/net/tipc/socket.c b/net/tipc/socket.c index ebe000888667..8299964cd373 100644 --- a/net/tipc/socket.c +++ b/net/tipc/socket.c @@ -58,7 +58,6 @@ /** * struct tipc_sock - TIPC socket structure * @sk: socket - interacts with 'port' and with user via the socket API - * @connected: non-zero if port is currently connected to a peer port * @conn_type: TIPC type used when connection was established * @conn_instance: TIPC instance used when connection was established * @published: non-zero if port has one or more associated names @@ -80,7 +79,6 @@ */ struct tipc_sock { struct sock sk; - int connected; u32 conn_type; u32 conn_instance; int published; @@ -293,6 +291,11 @@ static void tsk_rej_rx_queue(struct sock *sk) tipc_sk_respond(sk, skb, TIPC_ERR_NO_PORT); } +static bool tipc_sk_connected(struct sock *sk) +{ + return sk->sk_socket->state == SS_CONNECTED; +} + /* tsk_peer_msg - verify if message was sent by connected port's peer * * Handles cases where the node's network address has changed from @@ -300,12 +303,13 @@ static void tsk_rej_rx_queue(struct sock *sk) */ static bool tsk_peer_msg(struct tipc_sock *tsk, struct tipc_msg *msg) { - struct tipc_net *tn = net_generic(sock_net(&tsk->sk), tipc_net_id); + struct sock *sk = &tsk->sk; + struct tipc_net *tn = net_generic(sock_net(sk), tipc_net_id); u32 peer_port = tsk_peer_port(tsk); u32 orig_node; u32 peer_node; - if (unlikely(!tsk->connected)) + if (unlikely(!tipc_sk_connected(sk))) return false; if (unlikely(msg_origport(msg) != peer_port)) @@ -470,7 +474,6 @@ static int tipc_release(struct socket *sock) if ((sock->state == SS_CONNECTING) || (sock->state == SS_CONNECTED)) { sock->state = SS_DISCONNECTING; - tsk->connected = 0; tipc_node_remove_conn(net, dnode, tsk->portid); } tipc_sk_respond(sk, skb, TIPC_ERR_NO_PORT); @@ -480,7 +483,7 @@ static int tipc_release(struct socket *sock) tipc_sk_withdraw(tsk, 0, NULL); sk_stop_timer(sk, &sk->sk_timer); tipc_sk_remove(tsk); - if (tsk->connected) { + if (tipc_sk_connected(sk)) { skb = tipc_msg_create(TIPC_CRITICAL_IMPORTANCE, TIPC_CONN_MSG, SHORT_H_SIZE, 0, dnode, tsk_own_node(tsk), tsk_peer_port(tsk), @@ -1010,7 +1013,7 @@ static int tipc_wait_for_sndpkt(struct socket *sock, long *timeo_p) done = sk_wait_event(sk, timeo_p, (!tsk->link_cong && !tsk_conn_cong(tsk)) || -!tsk->connected); + !tipc_sk_connected(sk)); finish_wait(sk_sleep(sk), &wait); } while (!done); return 0; @@ -1152,7 +1155,6 @@ static void tipc_sk_finish_conn(struct tipc_sock *tsk, u32 peer_port, tsk->probing_intv = CONN_PROBING_INTERVAL; tsk->probing_state = TIPC_CONN_OK; - tsk->connected = 1; sk_reset_timer(sk, &sk->sk_timer, jiffies + tsk->probing_intv); tipc_node_add_conn(net, peer_node, tsk->portid, peer_port); tsk->max_pkt = tipc_node_get_mtu(net, peer_node, tsk->portid); @@ -1261,13 +1263,14 @@ static int tipc_sk_anc_data_recv(struct msghdr *m, struct tipc_msg *msg, static void tipc_sk_send_ack(struct tipc_sock *tsk) { - struct net *net = sock_net(&tsk->sk); + struct sock *sk = &tsk->sk; + struct net *net = sock_net(sk); struct sk_buff *skb = NULL; struct tipc_msg *msg; u32 peer_port = tsk_peer_port(tsk); u32 dnode = tsk_peer_node(tsk); - if (!tsk->connected) + if (!tipc_sk_connected(sk)) return; skb = tipc_msg_create(CONN_MANAGER, CONN_ACK, INT_H_SIZE, 0, dnode, tsk_own_node(tsk), peer_
[PATCH net-next v1 05/16] tipc: rename tsk->remote to tsk->peer for consistent naming
Until now, the peer information for connect is stored in tsk->remote but the rest of code uses the name peer for peer/remote. In this commit, we rename tsk->remote to tsk->peer to align with naming convention followed in the rest of the code. There is no functional change in this commit. Acked-by: Ying Xue Acked-by: Jon Maloy Signed-off-by: Parthasarathy Bhuvaragan --- net/tipc/socket.c | 11 +-- 1 file changed, 5 insertions(+), 6 deletions(-) diff --git a/net/tipc/socket.c b/net/tipc/socket.c index 6e3774a7c831..8bc9c32c41b5 100644 --- a/net/tipc/socket.c +++ b/net/tipc/socket.c @@ -65,7 +65,6 @@ * @max_pkt: maximum packet size "hint" used when building messages sent by port * @portid: unique port identity in TIPC socket hash table * @phdr: preformatted message header used when sending messages - * @port_list: adjacent ports in TIPC's global list of ports * @publications: list of publications for port * @pub_count: total # of publications port has made during its lifetime * @probing_state: @@ -75,7 +74,7 @@ * @link_cong: non-zero if owner must sleep because of link congestion * @sent_unacked: # messages sent by socket, and not yet acked by peer * @rcv_unacked: # messages read by user, but not yet acked back to peer - * @remote: 'connected' peer for dgram/rdm + * @peer: 'connected' peer for dgram/rdm * @node: hash table node * @rcu: rcu struct for tipc_sock */ @@ -101,7 +100,7 @@ struct tipc_sock { u16 peer_caps; u16 rcv_unacked; u16 rcv_win; - struct sockaddr_tipc remote; + struct sockaddr_tipc peer; struct rhash_head node; struct rcu_head rcu; }; @@ -904,7 +903,7 @@ static int __tipc_sendmsg(struct socket *sock, struct msghdr *m, size_t dsz) return -EMSGSIZE; if (unlikely(!dest)) { if (tsk->connected && sock->state == SS_READY) - dest = &tsk->remote; + dest = &tsk->peer; else return -EDESTADDRREQ; } else if (unlikely(m->msg_namelen < sizeof(*dest)) || @@ -1939,12 +1938,12 @@ static int tipc_connect(struct socket *sock, struct sockaddr *dest, /* DGRAM/RDM connect(), just save the destaddr */ if (sock->state == SS_READY) { if (dst->family == AF_UNSPEC) { - memset(&tsk->remote, 0, sizeof(struct sockaddr_tipc)); + memset(&tsk->peer, 0, sizeof(struct sockaddr_tipc)); tsk->connected = 0; } else if (destlen != sizeof(struct sockaddr_tipc)) { res = -EINVAL; } else { - memcpy(&tsk->remote, dest, destlen); + memcpy(&tsk->peer, dest, destlen); tsk->connected = 1; } goto exit; -- 2.1.4
[PATCH net-next v1 03/16] tipc: set kern=0 in sk_alloc() during tipc_accept()
Until now, tipc_accept() calls sk_alloc() with kern=1. This is incorrect as the data socket's owner is the user application. Thus for these accepted data sockets the network namespace refcount is skipped. In this commit, we fix this by setting kern=0. Acked-by: Ying Xue Acked-by: Jon Maloy Signed-off-by: Parthasarathy Bhuvaragan --- net/tipc/socket.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/net/tipc/socket.c b/net/tipc/socket.c index cd01deb1da9c..82aec2eb8497 100644 --- a/net/tipc/socket.c +++ b/net/tipc/socket.c @@ -2093,7 +2093,7 @@ static int tipc_accept(struct socket *sock, struct socket *new_sock, int flags) buf = skb_peek(&sk->sk_receive_queue); - res = tipc_sk_create(sock_net(sock->sk), new_sock, 0, 1); + res = tipc_sk_create(sock_net(sock->sk), new_sock, 0, 0); if (res) goto exit; security_sk_clone(sock->sk, new_sock->sk); -- 2.1.4
[PATCH iproute2 net-next v1 0/7] tipc: updates for neighbour monitor
We add configuration support for the new link monitoring attributes. Parthasarathy Bhuvaragan (7): tipc: remove dead code tipc: add link monitor set threshold tipc: add link monitor get threshold tipc: add link monitor summary tipc: refractor bearer to facilitate link monitor tipc: add link monitor list tipc: update man page for link monitor man/man8/tipc-link.8 | 104 + tipc/bearer.c| 75 ++ tipc/bearer.h| 4 + tipc/link.c | 408 ++- 4 files changed, 556 insertions(+), 35 deletions(-) -- 2.1.4
[PATCH iproute2 net-next v1 1/7] tipc: remove dead code
remove dead code and a newline. Signed-off-by: Parthasarathy Bhuvaragan --- tipc/link.c | 3 --- 1 file changed, 3 deletions(-) diff --git a/tipc/link.c b/tipc/link.c index 061b1c534389..8bdc98224d39 100644 --- a/tipc/link.c +++ b/tipc/link.c @@ -90,7 +90,6 @@ static int link_get_cb(const struct nlmsghdr *nlh, void *data) return MNL_CB_OK; } - static int cmd_link_get_prop(struct nlmsghdr *nlh, const struct cmd *cmd, struct cmdl *cmdl, void *data) { @@ -475,8 +474,6 @@ static int cmd_link_set_prop(struct nlmsghdr *nlh, const struct cmd *cmd, mnl_attr_nest_end(nlh, attrs); return msg_doit(nlh, link_get_cb, &prop); - - return 0; } static int cmd_link_set(struct nlmsghdr *nlh, const struct cmd *cmd, -- 2.1.4
[PATCH iproute2 net-next v1 6/7] tipc: add link monitor list
In this commit, we list the monitor attributes. By default it lists the attributes for all bearers, otherwise the specified bearer. A sample usage is shown below: $ tipc link monitor list bearer eth:data0 node status monitored generation applied_node_status [non_applied_node:status] 1.1.1 up direct16 UU [] 1.1.2 up direct16 UU [] 1.1.3 up direct16 UU [] bearer eth:data1 node status monitored generation applied_node_status [non_applied_node:status] 1.1.1 up direct2 UU [] 1.1.2 up direct3 UU [] 1.1.3 up direct3 UU [] $ tipc link monitor list media eth device data0 bearer eth:data0 node status monitored generation applied_node_status [non_applied_node:status] 1.1.1 up direct16 UU [] 1.1.2 up direct16 UU [] 1.1.3 up direct16 UU [] $ tipc link monitor list -h Usage: tipc monitor list [ media MEDIA ARGS...] MEDIA udp - User Datagram Protocol ib- Infiniband eth - Ethernet Acked-by: Jon Maloy Tested-by: Jon Maloy Signed-off-by: Parthasarathy Bhuvaragan --- tipc/link.c | 237 1 file changed, 237 insertions(+) diff --git a/tipc/link.c b/tipc/link.c index df93409f2173..0b5c0491a35f 100644 --- a/tipc/link.c +++ b/tipc/link.c @@ -22,6 +22,7 @@ #include "cmdl.h" #include "msg.h" #include "link.h" +#include "bearer.h" static int link_list_cb(const struct nlmsghdr *nlh, void *data) { @@ -558,6 +559,240 @@ static int cmd_link_mon_summary(struct nlmsghdr *nlh, const struct cmd *cmd, return msg_dumpit(nlh, link_mon_summary_cb, NULL); } +#define STATUS_WIDTH 7 +#define MAX_NODE_WIDTH 14 /* 255.4095.4095 */ +#define MAX_DOM_GEN_WIDTH 11 /* 65535 */ +#define DIRECTLY_MON_WIDTH 10 + +#define APPL_NODE_STATUS_WIDTH 5 + +static int map_get(uint64_t up_map, int i) +{ + return (up_map & (1 << i)) >> i; +} + +/* print the applied members, since we know the the members + * are listed in ascending order, we print only the state */ +static void link_mon_print_applied(uint16_t applied, uint64_t up_map) +{ + int i; + char state; + + for (i = 0; i < applied; i++) { + /* print the delimiter for every -n- entry */ + if (i && !(i % APPL_NODE_STATUS_WIDTH)) + printf(","); + + state = map_get(up_map, i) ? 'U' : 'D'; + printf("%c", state); + } +} + +/* print the non applied members, since we dont know + * the members, we print them along with the state */ +static void link_mon_print_non_applied(uint16_t applied, uint16_t member_cnt, + uint64_t up_map, uint32_t *members) +{ + int i; + char state; + + printf(" ["); + for (i = applied; i < member_cnt; i++) { + char addr_str[16]; + + /* print the delimiter for every entry */ + if (i != applied) + printf(","); + + sprintf(addr_str, "%u.%u.%u:", tipc_zone(members[i]), + tipc_cluster(members[i]), tipc_node(members[i])); + state = map_get(up_map, i) ? 'U' : 'D'; + printf("%s%c", addr_str, state); + } + printf("]"); +} + +static void link_mon_print_peer_state(const uint32_t addr, const char *status, + const char *monitored, + const uint32_t dom_gen) +{ + char addr_str[16]; + + sprintf(addr_str, "%u.%u.%u", tipc_zone(addr), tipc_cluster(addr), + tipc_node(addr)); + + printf("%-*s", MAX_NODE_WIDTH, addr_str); + printf("%-*s", STATUS_WIDTH, status); + printf("%-*s", DIRECTLY_MON_WIDTH, monitored); + printf("%-*u", MAX_DOM_GEN_WIDTH, dom_gen); +} + +static int link_mon_peer_list_cb(const struct nlmsghdr *nlh, void *data) +{ + struct genlmsghdr *genl = mnl_nlmsg_get_payload(nlh); + struct nlattr *attrs[TIPC_NLA_MON_PEER_MAX + 1] = {}; + struct nlattr *info[TIPC_NLA_MAX + 1] = {}; + uint16_t member_cnt; + uint32_t applied; + uint32_t dom_gen; + uint64_t up_map; + char status[16]; + char monitored[16]; + + mnl_attr_parse(nlh, sizeof(*genl), parse_attrs, info); + if (!info[TIPC_NLA_MON_PEER]) + return MNL_CB_ERROR; + + mnl_attr_parse_nested(info[TIPC_NLA_MON_PEER], parse_attrs, attrs); + + (attrs[TIPC_NLA_MON_PEER_LOCAL] || attrs[TIPC_NLA_MON_PEER_HEAD]) ? +
[PATCH iproute2 net-next v1 5/7] tipc: refractor bearer to facilitate link monitor
In this commit, we: 1. Export print_bearer_media() 2. Move the bearer name handling from nl_add_bearer_name() into a new function cmd_get_unique_bearer_name(). These exported functions will be used by link monitor used in subsequent commits. Signed-off-by: Parthasarathy Bhuvaragan --- tipc/bearer.c | 75 +++ tipc/bearer.h | 4 2 files changed, 48 insertions(+), 31 deletions(-) diff --git a/tipc/bearer.c b/tipc/bearer.c index 8729dad4a060..810344f672af 100644 --- a/tipc/bearer.c +++ b/tipc/bearer.c @@ -45,7 +45,7 @@ static void _print_bearer_opts(void) " window- Bearer link window\n"); } -static void _print_bearer_media(void) +void print_bearer_media(void) { fprintf(stderr, "\nMEDIA\n" @@ -192,14 +192,28 @@ static int nl_add_udp_enable_opts(struct nlmsghdr *nlh, struct opt *opts, } static int nl_add_bearer_name(struct nlmsghdr *nlh, const struct cmd *cmd, - struct cmdl *cmdl, struct opt *opts, - struct tipc_sup_media sup_media[]) + struct cmdl *cmdl, struct opt *opts, + const struct tipc_sup_media *sup_media) +{ + char bname[TIPC_MAX_BEARER_NAME]; + int err; + + if ((err = cmd_get_unique_bearer_name(cmd, cmdl, opts, bname, sup_media))) + return err; + + mnl_attr_put_strz(nlh, TIPC_NLA_BEARER_NAME, bname); + return 0; +} + +int cmd_get_unique_bearer_name(const struct cmd *cmd, struct cmdl *cmdl, + struct opt *opts, char *bname, + const struct tipc_sup_media *sup_media) { - char id[TIPC_MAX_BEARER_NAME]; char *media; char *identifier; struct opt *opt; - struct tipc_sup_media *entry; + const struct tipc_sup_media *entry; + if (!(opt = get_opt(opts, "media"))) { if (help_flag) @@ -219,13 +233,12 @@ static int nl_add_bearer_name(struct nlmsghdr *nlh, const struct cmd *cmd, (entry->help)(cmdl, media); else fprintf(stderr, "error, missing bearer %s\n", - entry->identifier); + entry->identifier); return -EINVAL; } identifier = opt->val; - snprintf(id, sizeof(id), "%s:%s", media, identifier); - mnl_attr_put_strz(nlh, TIPC_NLA_BEARER_NAME, id); + snprintf(bname, TIPC_MAX_BEARER_NAME, "%s:%s", media, identifier); return 0; } @@ -270,13 +283,13 @@ static int udp_bearer_add(struct nlmsghdr *nlh, struct opt *opts, if ((err = getaddrinfo(ip, remport, &hints, &addr))) { fprintf(stderr, "UDP address error: %s\n", - gai_strerror(err)); + gai_strerror(err)); freeaddrinfo(addr); return err; } mnl_attr_put(nlh, TIPC_NLA_UDP_REMOTE, addr->ai_addrlen, - addr->ai_addr); +addr->ai_addr); freeaddrinfo(addr); } else { fprintf(stderr, "error, missing remoteip\n"); @@ -302,7 +315,7 @@ static int cmd_bearer_add_media(struct nlmsghdr *nlh, const struct cmd *cmd, { "media", OPT_KEYVAL, NULL }, { NULL } }; - struct tipc_sup_media sup_media[] = { + const struct tipc_sup_media sup_media[] = { { "udp","name", cmd_bearer_add_udp_help}, { NULL, }, }; @@ -366,7 +379,7 @@ static void cmd_bearer_enable_help(struct cmdl *cmdl) " domain DOMAIN - Discovery domain\n" " priority PRIORITY - Bearer priority\n", cmdl->argv[0]); - _print_bearer_media(); + print_bearer_media(); } static int cmd_bearer_enable(struct nlmsghdr *nlh, const struct cmd *cmd, @@ -389,9 +402,9 @@ static int cmd_bearer_enable(struct nlmsghdr *nlh, const struct cmd *cmd, { NULL } }; struct tipc_sup_media sup_media[] = { - { "udp","name", cmd_bearer_enable_udp_help}, - { "eth","device", cmd_bearer_enable_l2_help }, - { "ib", "device", cmd_bearer_enable_l2_help }, + { "udp","name", cmd_bearer_enable_udp_help}, +
[PATCH iproute2 net-next v1 3/7] tipc: add link monitor get threshold
The command prints the monitor activation threshold. A sample usage is shown below: $ tipc link monitor get threshold 32 $ tipc link monitor get -h Usage: tipc monitor get PPROPERTY PROPERTIES threshold - Get monitor activation threshold Acked-by: Jon Maloy Tested-by: Jon Maloy Signed-off-by: Parthasarathy Bhuvaragan --- tipc/link.c | 56 +++- 1 file changed, 55 insertions(+), 1 deletion(-) diff --git a/tipc/link.c b/tipc/link.c index 3469cd302469..3f0c32106772 100644 --- a/tipc/link.c +++ b/tipc/link.c @@ -534,12 +534,65 @@ static int cmd_link_mon_set(struct nlmsghdr *nlh, const struct cmd *cmd, return run_cmd(nlh, cmd, cmds, cmdl, NULL); } +static void cmd_link_mon_get_help(struct cmdl *cmdl) +{ + fprintf(stderr, "Usage: %s monitor get PPROPERTY \n\n" + "PROPERTIES\n" + " threshold - Get monitor activation threshold\n", + cmdl->argv[0]); +} + +static int link_mon_get_cb(const struct nlmsghdr *nlh, void *data) +{ + struct genlmsghdr *genl = mnl_nlmsg_get_payload(nlh); + struct nlattr *info[TIPC_NLA_MAX + 1] = {}; + struct nlattr *attrs[TIPC_NLA_MON_MAX + 1] = {}; + + mnl_attr_parse(nlh, sizeof(*genl), parse_attrs, info); + if (!info[TIPC_NLA_MON]) + return MNL_CB_ERROR; + + mnl_attr_parse_nested(info[TIPC_NLA_MON], parse_attrs, attrs); + if (!attrs[TIPC_NLA_MON_ACTIVATION_THRESHOLD]) + return MNL_CB_ERROR; + + printf("%u\n", + mnl_attr_get_u32(attrs[TIPC_NLA_MON_ACTIVATION_THRESHOLD])); + + return MNL_CB_OK; +} + +static int cmd_link_mon_get_prop(struct nlmsghdr *nlh, const struct cmd *cmd, +struct cmdl *cmdl, void *data) +{ + char buf[MNL_SOCKET_BUFFER_SIZE]; + + if (!(nlh = msg_init(buf, TIPC_NL_MON_GET))) { + fprintf(stderr, "error, message initialisation failed\n"); + return -1; + } + + return msg_doit(nlh,link_mon_get_cb,NULL); +} + +static int cmd_link_mon_get(struct nlmsghdr *nlh, const struct cmd *cmd, + struct cmdl *cmdl, void *data) +{ + const struct cmd cmds[] = { + { "threshold", cmd_link_mon_get_prop, NULL}, + { NULL } + }; + + return run_cmd(nlh, cmd, cmds, cmdl, NULL); +} + static void cmd_link_mon_help(struct cmdl *cmdl) { fprintf(stderr, "Usage: %s montior COMMAND [ARGS] ...\n\n" "COMMANDS\n" - " set - Set monitor properties\n", + " set - Set monitor properties\n" + " get - Get monitor properties\n", cmdl->argv[0]); } @@ -548,6 +601,7 @@ static int cmd_link_mon(struct nlmsghdr *nlh, const struct cmd *cmd, struct cmdl { const struct cmd cmds[] = { { "set",cmd_link_mon_set, cmd_link_mon_set_help }, + { "get",cmd_link_mon_get, cmd_link_mon_get_help }, { NULL } }; -- 2.1.4
[PATCH iproute2 net-next v1 7/7] tipc: update man page for link monitor
Add description for the new link monitor commands. Signed-off-by: Parthasarathy Bhuvaragan --- man/man8/tipc-link.8 | 104 +++ 1 file changed, 104 insertions(+) diff --git a/man/man8/tipc-link.8 b/man/man8/tipc-link.8 index 2ee03a0bd96e..fee283e5cfff 100644 --- a/man/man8/tipc-link.8 +++ b/man/man8/tipc-link.8 @@ -39,6 +39,29 @@ tipc-link \- show links or modify link properties .B tipc link list .br +.ti -8 +.B tipc link monitor set +.RB "{ " "threshold" " } " + +.ti -8 +.B tipc link monitor get +.RB "{ " "threshold" " } " + +.ti -8 +.B tipc link monitor summary +.br + +.ti -8 +.B tipc link monitor list +.br +.RB "[ " "media " " { " eth " | " ib " } " device +.IR "DEVICE" " ]" +.RB "|" +.br +.RB "[ " "media udp name" +.IR NAME " ]" +.br + .SH OPTIONS Options (flags) that can be passed anywhere in the command chain. .TP @@ -204,6 +227,87 @@ The link window controls how many unacknowledged messages a link endpoint can have in its transmit queue before TIPC's congestion control mechanism is activated. +.SS Monitor properties + +.TP +.B threshold +.br +The threshold specifies the cluster size exceeding which the link monitoring +algorithm will switch from "full-mesh" to "overlapping-ring". +If set of 0 the overlapping-ring monitoring is always on and if set to a +value larger than anticipated cluster size the overlapping-ring is disabled. +The default value is 32. + +.SS Monitor information + +.TP +.B table_generation +.br +Represents the event count in a node's local monitoring list. It steps every +time something changes in the local monitor list, including changes in the +local domain. + +.TP +.B cluster_size +.br +Represents the current count of cluster members. + +.TP +.B algorithm +.br +The current supervision algorithm used for neighbour monitoring for the bearer. +Possible values are full-mesh or overlapping-ring. + +.TP +.B status +.br +The node status derived by the local node. +Possible status are up or down. + +.TP +.B monitored +.br +Represent the type of monitoring chosen by the local node. +Possible values are direct or indirect. + +.TP +.B generation +.br +Represents the domain generation which is the event count in a node's local +domain. Every time something changes (peer add/remove/up/down) the domain +generation is stepped and a new version of node record is sent to inform +the neighbors about this change. The domain generation helps the receiver +of a domain record to know if it should ignore or process the record. + +.TP +.B applied_node_status +.br +The node status reported by the peer node for the succeeding peers in +the node list. The Node list is a circular list of ascending addresses +starting with the local node. +Possible status are: U or D. The status U implies up and D down. + +.TP +.B [non_applied_node:status] +.br +Represents the nodes and their status as reported by the peer node. +These nodes were not applied to the monitoring list for this peer node. +They are usually transient and occur during the cluster startup phase +or network reconfiguration. +Possible status are: U or D. The status U implies up and D down. + +.SH EXAMPLES +.PP +tipc link monitor list +.RS 4 +Shows the link monitoring information for cluster members on device data0. +.RE +.PP +tipc link monitor summary +.RS 4 +The monitor summary command prints the basic attributes. +.RE + .SH EXIT STATUS Exit status is 0 if command was successful or a positive integer upon failure. -- 2.1.4
[PATCH iproute2 net-next v1 4/7] tipc: add link monitor summary
The monitor summary command prints the basic attributes specific to the local node. A sample usage is shown below: $ tipc link monitor summary bearer eth:data0 table_generation 15 cluster_size 8 algorithm overlapping-ring bearer eth:data1 table_generation 15 cluster_size 8 algorithm overlapping-ring $ tipc link monitor summary -h Usage: tipc monitor summary Acked-by: Jon Maloy Tested-by: Jon Maloy Signed-off-by: Parthasarathy Bhuvaragan --- tipc/link.c | 47 ++- 1 file changed, 46 insertions(+), 1 deletion(-) diff --git a/tipc/link.c b/tipc/link.c index 3f0c32106772..df93409f2173 100644 --- a/tipc/link.c +++ b/tipc/link.c @@ -515,6 +515,49 @@ static int cmd_link_mon_set_prop(struct nlmsghdr *nlh, const struct cmd *cmd, return msg_doit(nlh, NULL, NULL); } +static int link_mon_summary_cb(const struct nlmsghdr *nlh, void *data) +{ + struct genlmsghdr *genl = mnl_nlmsg_get_payload(nlh); + struct nlattr *info[TIPC_NLA_MAX + 1] = {}; + struct nlattr *attrs[TIPC_NLA_MON_MAX + 1] = {}; + + mnl_attr_parse(nlh, sizeof(*genl), parse_attrs, info); + if (!info[TIPC_NLA_MON]) + return MNL_CB_ERROR; + + mnl_attr_parse_nested(info[TIPC_NLA_MON], parse_attrs, attrs); + + printf("\nbearer %s\n", + mnl_attr_get_str(attrs[TIPC_NLA_MON_BEARER_NAME])); + + printf("table_generation %u\n", + mnl_attr_get_u32(attrs[TIPC_NLA_MON_LISTGEN])); + printf("cluster_size %u\n", + mnl_attr_get_u32(attrs[TIPC_NLA_MON_PEERCNT])); + printf("algorithm %s\n", + attrs[TIPC_NLA_MON_ACTIVE] ? "overlapping-ring" : "full-mesh"); + + return MNL_CB_OK; +} + +static int cmd_link_mon_summary(struct nlmsghdr *nlh, const struct cmd *cmd, + struct cmdl *cmdl, void *data) +{ + char buf[MNL_SOCKET_BUFFER_SIZE]; + + if (help_flag) { + fprintf(stderr, "Usage: %s monitor summary\n", cmdl->argv[0]); + return -EINVAL; + } + + if (!(nlh = msg_init(buf, TIPC_NL_MON_GET))) { + fprintf(stderr, "error, message initialisation failed\n"); + return -1; + } + + return msg_dumpit(nlh, link_mon_summary_cb, NULL); +} + static void cmd_link_mon_set_help(struct cmdl *cmdl) { fprintf(stderr, "Usage: %s monitor set PPROPERTY\n\n" @@ -592,7 +635,8 @@ static void cmd_link_mon_help(struct cmdl *cmdl) "Usage: %s montior COMMAND [ARGS] ...\n\n" "COMMANDS\n" " set - Set monitor properties\n" - " get - Get monitor properties\n", + " get - Get monitor properties\n" + " summary - Show local node monitor summary\n", cmdl->argv[0]); } @@ -602,6 +646,7 @@ static int cmd_link_mon(struct nlmsghdr *nlh, const struct cmd *cmd, struct cmdl const struct cmd cmds[] = { { "set",cmd_link_mon_set, cmd_link_mon_set_help }, { "get",cmd_link_mon_get, cmd_link_mon_get_help }, + { "summary",cmd_link_mon_summary, NULL }, { NULL } }; -- 2.1.4
[PATCH iproute2 net-next v1 2/7] tipc: add link monitor set threshold
The command sets the activation threshold for the new cluster ring supervision. A sample usage is shown below: $ tipc link monitor set threshold 4 $ tipc link monitor set -h Usage: tipc monitor set PPROPERTY PROPERTIES threshold SIZE - Set activation threshold for monitor Acked-by: Jon Maloy Tested-by: Jon Maloy Signed-off-by: Parthasarathy Bhuvaragan --- tipc/link.c | 69 - 1 file changed, 68 insertions(+), 1 deletion(-) diff --git a/tipc/link.c b/tipc/link.c index 8bdc98224d39..3469cd302469 100644 --- a/tipc/link.c +++ b/tipc/link.c @@ -489,6 +489,71 @@ static int cmd_link_set(struct nlmsghdr *nlh, const struct cmd *cmd, return run_cmd(nlh, cmd, cmds, cmdl, NULL); } +static int cmd_link_mon_set_prop(struct nlmsghdr *nlh, const struct cmd *cmd, +struct cmdl *cmdl, void *data) +{ + int size; + char buf[MNL_SOCKET_BUFFER_SIZE]; + struct nlattr *attrs; + + if (cmdl->argc != cmdl->optind + 1) { + fprintf(stderr, "error, missing value\n"); + return -EINVAL; + } + size = atoi(shift_cmdl(cmdl)); + + if (!(nlh = msg_init(buf, TIPC_NL_MON_SET))) { + fprintf(stderr, "error, message initialisation failed\n"); + return -1; + } + attrs = mnl_attr_nest_start(nlh, TIPC_NLA_MON); + + mnl_attr_put_u32(nlh, TIPC_NLA_MON_ACTIVATION_THRESHOLD, size); + + mnl_attr_nest_end(nlh, attrs); + + return msg_doit(nlh, NULL, NULL); +} + +static void cmd_link_mon_set_help(struct cmdl *cmdl) +{ + fprintf(stderr, "Usage: %s monitor set PPROPERTY\n\n" + "PROPERTIES\n" + " threshold SIZE- Set monitor activation threshold\n", + cmdl->argv[0]); +} + +static int cmd_link_mon_set(struct nlmsghdr *nlh, const struct cmd *cmd, + struct cmdl *cmdl, void *data) +{ + const struct cmd cmds[] = { + { "threshold", cmd_link_mon_set_prop, NULL }, + { NULL } + }; + + return run_cmd(nlh, cmd, cmds, cmdl, NULL); +} + +static void cmd_link_mon_help(struct cmdl *cmdl) +{ + fprintf(stderr, + "Usage: %s montior COMMAND [ARGS] ...\n\n" + "COMMANDS\n" + " set - Set monitor properties\n", + cmdl->argv[0]); +} + +static int cmd_link_mon(struct nlmsghdr *nlh, const struct cmd *cmd, struct cmdl *cmdl, + void *data) +{ + const struct cmd cmds[] = { + { "set",cmd_link_mon_set, cmd_link_mon_set_help }, + { NULL } + }; + + return run_cmd(nlh, cmd, cmds, cmdl, NULL); +} + void cmd_link_help(struct cmdl *cmdl) { fprintf(stderr, @@ -498,7 +563,8 @@ void cmd_link_help(struct cmdl *cmdl) " list - List links\n" " get - Get various link properties\n" " set - Set various link properties\n" - " statistics- Show or reset statistics\n", + " statistics- Show or reset statistics\n" + " monitor - Show or set link supervision\n", cmdl->argv[0]); } @@ -510,6 +576,7 @@ int cmd_link(struct nlmsghdr *nlh, const struct cmd *cmd, struct cmdl *cmdl, { "list", cmd_link_list, NULL }, { "set",cmd_link_set, cmd_link_set_help }, { "statistics", cmd_link_stat, cmd_link_stat_help }, + { "monitor",cmd_link_mon, cmd_link_mon_help }, { NULL } }; -- 2.1.4
[PATCH net v1] tipc: fix random link resets while adding a second bearer
In a dual bearer configuration, if the second tipc link becomes active while the first link still has pending nametable "bulk" updates, it randomly leads to reset of the second link. When a link is established, the function named_distribute(), fills the skb based on node mtu (allows room for TUNNEL_PROTOCOL) with NAME_DISTRIBUTOR message for each PUBLICATION. However, the function named_distribute() allocates the buffer by increasing the node mtu by INT_H_SIZE (to insert NAME_DISTRIBUTOR). This consumes the space allocated for TUNNEL_PROTOCOL. When establishing the second link, the link shall tunnel all the messages in the first link queue including the "bulk" update. As size of the NAME_DISTRIBUTOR messages while tunnelling, exceeds the link mtu the transmission fails (-EMSGSIZE). Thus, the synch point based on the message count of the tunnel packets is never reached leading to link timeout. In this commit, we adjust the size of name distributor message so that they can be tunnelled. Reviewed-by: Jon Maloy Signed-off-by: Parthasarathy Bhuvaragan --- net/tipc/name_distr.c | 8 +--- 1 file changed, 5 insertions(+), 3 deletions(-) diff --git a/net/tipc/name_distr.c b/net/tipc/name_distr.c index 6b626a64b517..a04fe9be1c60 100644 --- a/net/tipc/name_distr.c +++ b/net/tipc/name_distr.c @@ -62,6 +62,8 @@ static void publ_to_item(struct distr_item *i, struct publication *p) /** * named_prepare_buf - allocate & initialize a publication message + * + * The buffer returned is of size INT_H_SIZE + payload size */ static struct sk_buff *named_prepare_buf(struct net *net, u32 type, u32 size, u32 dest) @@ -141,9 +143,9 @@ static void named_distribute(struct net *net, struct sk_buff_head *list, struct publication *publ; struct sk_buff *skb = NULL; struct distr_item *item = NULL; - uint msg_dsz = (tipc_node_get_mtu(net, dnode, 0) / ITEM_SIZE) * - ITEM_SIZE; - uint msg_rem = msg_dsz; + u32 msg_dsz = ((tipc_node_get_mtu(net, dnode, 0) - INT_H_SIZE) / + ITEM_SIZE) * ITEM_SIZE; + u32 msg_rem = msg_dsz; list_for_each_entry(publ, pls, local_list) { /* Prepare next buffer: */ -- 2.1.4
[PATCH net v1 1/1] tipc: fix variable dereference before NULL check
In commit cf6f7e1d5109 ("tipc: dump monitor attributes"), I dereferenced a pointer before checking if its valid. This is reported by static check Smatch as: net/tipc/monitor.c:733 tipc_nl_add_monitor_peer() warn: variable dereferenced before check 'mon' (see line 731) In this commit, we check for a valid monitor before proceeding with any other operation. Fixes: cf6f7e1d5109 ("tipc: dump monitor attributes") Reported-by: Dan Carpenter Signed-off-by: Parthasarathy Bhuvaragan --- net/tipc/monitor.c | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/net/tipc/monitor.c b/net/tipc/monitor.c index b62caa1c770c..ed97a5876ebe 100644 --- a/net/tipc/monitor.c +++ b/net/tipc/monitor.c @@ -728,12 +728,13 @@ int tipc_nl_add_monitor_peer(struct net *net, struct tipc_nl_msg *msg, u32 bearer_id, u32 *prev_node) { struct tipc_monitor *mon = tipc_monitor(net, bearer_id); - struct tipc_peer *peer = mon->self; + struct tipc_peer *peer; if (!mon) return -EINVAL; read_lock_bh(&mon->lock); + peer = mon->self; do { if (*prev_node) { if (peer->addr == *prev_node) -- 2.1.4
[PATCH net-next v2 2/5] tipc: make cluster size threshold for monitoring configurable
In this commit, we introduce support to configure the minimum threshold to activate the new link monitoring algorithm. Reviewed-by: Jon Maloy Signed-off-by: Parthasarathy Bhuvaragan --- include/uapi/linux/tipc_netlink.h | 11 +++ net/tipc/monitor.c| 12 net/tipc/monitor.h| 1 + net/tipc/netlink.c| 15 +-- net/tipc/netlink.h| 1 + net/tipc/node.c | 27 +++ net/tipc/node.h | 1 + 7 files changed, 66 insertions(+), 2 deletions(-) diff --git a/include/uapi/linux/tipc_netlink.h b/include/uapi/linux/tipc_netlink.h index d4c8f142ba63..d387b65a0d97 100644 --- a/include/uapi/linux/tipc_netlink.h +++ b/include/uapi/linux/tipc_netlink.h @@ -56,6 +56,7 @@ enum { TIPC_NL_NET_GET, TIPC_NL_NET_SET, TIPC_NL_NAME_TABLE_GET, + TIPC_NL_MON_SET, __TIPC_NL_CMD_MAX, TIPC_NL_CMD_MAX = __TIPC_NL_CMD_MAX - 1 @@ -72,6 +73,7 @@ enum { TIPC_NLA_NODE, /* nest */ TIPC_NLA_NET, /* nest */ TIPC_NLA_NAME_TABLE,/* nest */ + TIPC_NLA_MON, /* nest */ __TIPC_NLA_MAX, TIPC_NLA_MAX = __TIPC_NLA_MAX - 1 @@ -166,6 +168,15 @@ enum { TIPC_NLA_NAME_TABLE_MAX = __TIPC_NLA_NAME_TABLE_MAX - 1 }; +/* Monitor info */ +enum { + TIPC_NLA_MON_UNSPEC, + TIPC_NLA_MON_ACTIVATION_THRESHOLD, /* u32 */ + + __TIPC_NLA_MON_MAX, + TIPC_NLA_MON_MAX = __TIPC_NLA_MON_MAX - 1 +}; + /* Publication info */ enum { TIPC_NLA_PUBL_UNSPEC, diff --git a/net/tipc/monitor.c b/net/tipc/monitor.c index 0d489e81fcca..3892d05b8b45 100644 --- a/net/tipc/monitor.c +++ b/net/tipc/monitor.c @@ -649,3 +649,15 @@ void tipc_mon_delete(struct net *net, int bearer_id) kfree(self); kfree(mon); } + +int tipc_nl_monitor_set_threshold(struct net *net, u32 cluster_size) +{ + struct tipc_net *tn = tipc_net(net); + + if (cluster_size > TIPC_CLUSTER_SIZE) + return -EINVAL; + + tn->mon_threshold = cluster_size; + + return 0; +} diff --git a/net/tipc/monitor.h b/net/tipc/monitor.h index 598459cbed5d..91f5dd09432b 100644 --- a/net/tipc/monitor.h +++ b/net/tipc/monitor.h @@ -69,5 +69,6 @@ void tipc_mon_get_state(struct net *net, u32 addr, int bearer_id); void tipc_mon_remove_peer(struct net *net, u32 addr, int bearer_id); +int tipc_nl_monitor_set_threshold(struct net *net, u32 cluster_size); extern const int tipc_max_domain_size; #endif diff --git a/net/tipc/netlink.c b/net/tipc/netlink.c index 56935df2167a..1e43ac0200ed 100644 --- a/net/tipc/netlink.c +++ b/net/tipc/netlink.c @@ -52,7 +52,8 @@ static const struct nla_policy tipc_nl_policy[TIPC_NLA_MAX + 1] = { [TIPC_NLA_MEDIA]= { .type = NLA_NESTED, }, [TIPC_NLA_NODE] = { .type = NLA_NESTED, }, [TIPC_NLA_NET] = { .type = NLA_NESTED, }, - [TIPC_NLA_NAME_TABLE] = { .type = NLA_NESTED, } + [TIPC_NLA_NAME_TABLE] = { .type = NLA_NESTED, }, + [TIPC_NLA_MON] = { .type = NLA_NESTED, }, }; const struct nla_policy @@ -61,6 +62,11 @@ tipc_nl_name_table_policy[TIPC_NLA_NAME_TABLE_MAX + 1] = { [TIPC_NLA_NAME_TABLE_PUBL] = { .type = NLA_NESTED } }; +const struct nla_policy tipc_nl_monitor_policy[TIPC_NLA_MON_MAX + 1] = { + [TIPC_NLA_MON_UNSPEC] = { .type = NLA_UNSPEC }, + [TIPC_NLA_MON_ACTIVATION_THRESHOLD] = { .type = NLA_U32 }, +}; + const struct nla_policy tipc_nl_sock_policy[TIPC_NLA_SOCK_MAX + 1] = { [TIPC_NLA_SOCK_UNSPEC] = { .type = NLA_UNSPEC }, [TIPC_NLA_SOCK_ADDR]= { .type = NLA_U32 }, @@ -214,7 +220,12 @@ static const struct genl_ops tipc_genl_v2_ops[] = { .cmd= TIPC_NL_NAME_TABLE_GET, .dumpit = tipc_nl_name_table_dump, .policy = tipc_nl_policy, - } + }, + { + .cmd= TIPC_NL_MON_SET, + .doit = tipc_nl_node_set_monitor, + .policy = tipc_nl_policy, + }, }; int tipc_nlmsg_parse(const struct nlmsghdr *nlh, struct nlattr ***attr) diff --git a/net/tipc/netlink.h b/net/tipc/netlink.h index ed1dbcb4afbd..4ba0ad422110 100644 --- a/net/tipc/netlink.h +++ b/net/tipc/netlink.h @@ -55,6 +55,7 @@ extern const struct nla_policy tipc_nl_prop_policy[]; extern const struct nla_policy tipc_nl_bearer_policy[]; extern const struct nla_policy tipc_nl_media_policy[]; extern const struct nla_policy tipc_nl_udp_policy[]; +extern const struct nla_policy tipc_nl_monitor_policy[]; int tipc_netlink_start(void); int tipc_netlink_compat_start(void); diff --git a/net/tipc/node.c b/net/tipc/node.c index 95cc78b51532..0fc531d0f709 100644 --- a/net/tipc/node.c +++ b/net/tipc/node.c @@ -1928,3 +1928,30 @@ out: return sk
[PATCH net-next v2 0/5] tipc: netlink updates for neighbour monitor
This series contains the updates to configure and read the attributes for neighbour monitor. v2: rebase on top of net-next Parthasarathy Bhuvaragan (5): tipc: introduce constants for tipc address validation tipc: make cluster size threshold for monitoring configurable tipc: get monitor threshold for the cluster tipc: add a function to get the bearer name tipc: dump monitor attributes include/uapi/linux/tipc.h | 30 ++- include/uapi/linux/tipc_netlink.h | 37 + net/tipc/addr.h | 5 +- net/tipc/bearer.c | 25 +- net/tipc/bearer.h | 1 + net/tipc/monitor.c| 152 +++ net/tipc/monitor.h| 9 +++ net/tipc/netlink.c| 27 ++- net/tipc/netlink.h| 1 + net/tipc/node.c | 165 ++ net/tipc/node.h | 5 ++ 11 files changed, 445 insertions(+), 12 deletions(-) -- 2.1.4
[PATCH net-next v2 5/5] tipc: dump monitor attributes
In this commit, we dump the monitor attributes when queried. The link monitor attributes are separated into two kinds: 1. general attributes per bearer 2. specific attributes per node/peer This style resembles the socket attributes and the nametable publications per socket. Reviewed-by: Jon Maloy Signed-off-by: Parthasarathy Bhuvaragan --- include/uapi/linux/tipc_netlink.h | 25 +++ net/tipc/monitor.c| 133 ++ net/tipc/monitor.h| 6 ++ net/tipc/netlink.c| 7 ++ net/tipc/node.c | 86 net/tipc/node.h | 3 + 6 files changed, 260 insertions(+) diff --git a/include/uapi/linux/tipc_netlink.h b/include/uapi/linux/tipc_netlink.h index d07c6ec76062..5f3f6d09fb79 100644 --- a/include/uapi/linux/tipc_netlink.h +++ b/include/uapi/linux/tipc_netlink.h @@ -58,6 +58,7 @@ enum { TIPC_NL_NAME_TABLE_GET, TIPC_NL_MON_SET, TIPC_NL_MON_GET, + TIPC_NL_MON_PEER_GET, __TIPC_NL_CMD_MAX, TIPC_NL_CMD_MAX = __TIPC_NL_CMD_MAX - 1 @@ -75,6 +76,7 @@ enum { TIPC_NLA_NET, /* nest */ TIPC_NLA_NAME_TABLE,/* nest */ TIPC_NLA_MON, /* nest */ + TIPC_NLA_MON_PEER, /* nest */ __TIPC_NLA_MAX, TIPC_NLA_MAX = __TIPC_NLA_MAX - 1 @@ -173,6 +175,11 @@ enum { enum { TIPC_NLA_MON_UNSPEC, TIPC_NLA_MON_ACTIVATION_THRESHOLD, /* u32 */ + TIPC_NLA_MON_REF, /* u32 */ + TIPC_NLA_MON_ACTIVE,/* flag */ + TIPC_NLA_MON_BEARER_NAME, /* string */ + TIPC_NLA_MON_PEERCNT, /* u32 */ + TIPC_NLA_MON_LISTGEN, /* u32 */ __TIPC_NLA_MON_MAX, TIPC_NLA_MON_MAX = __TIPC_NLA_MON_MAX - 1 @@ -194,6 +201,24 @@ enum { TIPC_NLA_PUBL_MAX = __TIPC_NLA_PUBL_MAX - 1 }; +/* Monitor peer info */ +enum { + TIPC_NLA_MON_PEER_UNSPEC, + + TIPC_NLA_MON_PEER_ADDR, /* u32 */ + TIPC_NLA_MON_PEER_DOMGEN, /* u32 */ + TIPC_NLA_MON_PEER_APPLIED, /* u32 */ + TIPC_NLA_MON_PEER_UPMAP,/* u64 */ + TIPC_NLA_MON_PEER_MEMBERS, /* tlv */ + TIPC_NLA_MON_PEER_UP, /* flag */ + TIPC_NLA_MON_PEER_HEAD, /* flag */ + TIPC_NLA_MON_PEER_LOCAL,/* flag */ + TIPC_NLA_MON_PEER_PAD, /* flag */ + + __TIPC_NLA_MON_PEER_MAX, + TIPC_NLA_MON_PEER_MAX = __TIPC_NLA_MON_PEER_MAX - 1 +}; + /* Nest, connection info */ enum { TIPC_NLA_CON_UNSPEC, diff --git a/net/tipc/monitor.c b/net/tipc/monitor.c index 3579126e2ac8..be70a57c1ff9 100644 --- a/net/tipc/monitor.c +++ b/net/tipc/monitor.c @@ -33,9 +33,11 @@ * POSSIBILITY OF SUCH DAMAGE. */ +#include #include "core.h" #include "addr.h" #include "monitor.h" +#include "bearer.h" #define MAX_MON_DOMAIN 64 #define MON_TIMEOUT 12 @@ -668,3 +670,134 @@ int tipc_nl_monitor_get_threshold(struct net *net) return tn->mon_threshold; } + +int __tipc_nl_add_monitor_peer(struct tipc_peer *peer, struct tipc_nl_msg *msg) +{ + struct tipc_mon_domain *dom = peer->domain; + struct nlattr *attrs; + void *hdr; + + hdr = genlmsg_put(msg->skb, msg->portid, msg->seq, &tipc_genl_family, + NLM_F_MULTI, TIPC_NL_MON_PEER_GET); + if (!hdr) + return -EMSGSIZE; + + attrs = nla_nest_start(msg->skb, TIPC_NLA_MON_PEER); + if (!attrs) + goto msg_full; + + if (nla_put_u32(msg->skb, TIPC_NLA_MON_PEER_ADDR, peer->addr)) + goto attr_msg_full; + if (nla_put_u32(msg->skb, TIPC_NLA_MON_PEER_APPLIED, peer->applied)) + goto attr_msg_full; + + if (peer->is_up) + if (nla_put_flag(msg->skb, TIPC_NLA_MON_PEER_UP)) + goto attr_msg_full; + if (peer->is_local) + if (nla_put_flag(msg->skb, TIPC_NLA_MON_PEER_LOCAL)) + goto attr_msg_full; + if (peer->is_head) + if (nla_put_flag(msg->skb, TIPC_NLA_MON_PEER_HEAD)) + goto attr_msg_full; + + if (dom) { + if (nla_put_u32(msg->skb, TIPC_NLA_MON_PEER_DOMGEN, dom->gen)) + goto attr_msg_full; + if (nla_put_u64_64bit(msg->skb, TIPC_NLA_MON_PEER_UPMAP, + dom->up_map, TIPC_NLA_MON_PEER_PAD)) + goto attr_msg_full; + if (nla_put(msg->skb, TIPC_NLA_MON_PEER_MEMBERS, + dom->member_cnt * sizeof(u32), &dom->members)) +
[PATCH net-next v2 4/5] tipc: add a function to get the bearer name
Introduce a new function to get the bearer name from its id. This is used in subsequent commit. Reviewed-by: Jon Maloy Signed-off-by: Parthasarathy Bhuvaragan --- net/tipc/bearer.c | 21 + net/tipc/bearer.h | 1 + 2 files changed, 22 insertions(+) diff --git a/net/tipc/bearer.c b/net/tipc/bearer.c index 65b0998a9bab..65b1bbf133bd 100644 --- a/net/tipc/bearer.c +++ b/net/tipc/bearer.c @@ -171,6 +171,27 @@ struct tipc_bearer *tipc_bearer_find(struct net *net, const char *name) return NULL; } +/* tipc_bearer_get_name - get the bearer name from its id. + * @net: network namespace + * @name: a pointer to the buffer where the name will be stored. + * @bearer_id: the id to get the name from. + */ +int tipc_bearer_get_name(struct net *net, char *name, u32 bearer_id) +{ + struct tipc_net *tn = tipc_net(net); + struct tipc_bearer *b; + + if (bearer_id >= MAX_BEARERS) + return -EINVAL; + + b = rtnl_dereference(tn->bearer_list[bearer_id]); + if (!b) + return -EINVAL; + + strcpy(name, b->name); + return 0; +} + void tipc_bearer_add_dest(struct net *net, u32 bearer_id, u32 dest) { struct tipc_net *tn = net_generic(net, tipc_net_id); diff --git a/net/tipc/bearer.h b/net/tipc/bearer.h index f1e6db5e6345..43757f1f9cb3 100644 --- a/net/tipc/bearer.h +++ b/net/tipc/bearer.h @@ -197,6 +197,7 @@ int tipc_l2_send_msg(struct net *net, struct sk_buff *buf, void tipc_bearer_add_dest(struct net *net, u32 bearer_id, u32 dest); void tipc_bearer_remove_dest(struct net *net, u32 bearer_id, u32 dest); struct tipc_bearer *tipc_bearer_find(struct net *net, const char *name); +int tipc_bearer_get_name(struct net *net, char *name, u32 bearer_id); struct tipc_media *tipc_media_find(const char *name); void tipc_bearer_reset_all(struct net *net); int tipc_bearer_setup(void); -- 2.1.4
[PATCH net-next v2 1/5] tipc: introduce constants for tipc address validation
In this commit, we introduce defines for tipc address size, offset and mask specification for Zone.Cluster.Node. There is no functional change in this commit. Reviewed-by: Jon Maloy Signed-off-by: Parthasarathy Bhuvaragan --- include/uapi/linux/tipc.h | 30 ++ net/tipc/addr.h | 5 + net/tipc/bearer.c | 4 ++-- 3 files changed, 29 insertions(+), 10 deletions(-) diff --git a/include/uapi/linux/tipc.h b/include/uapi/linux/tipc.h index 6f71b9b41595..bf049e8fe31b 100644 --- a/include/uapi/linux/tipc.h +++ b/include/uapi/linux/tipc.h @@ -60,26 +60,48 @@ struct tipc_name_seq { __u32 upper; }; +/* TIPC Address Size, Offset, Mask specification for Z.C.N + */ +#define TIPC_NODE_BITS 12 +#define TIPC_CLUSTER_BITS 12 +#define TIPC_ZONE_BITS 8 + +#define TIPC_NODE_OFFSET0 +#define TIPC_CLUSTER_OFFSET TIPC_NODE_BITS +#define TIPC_ZONE_OFFSET(TIPC_CLUSTER_OFFSET + TIPC_CLUSTER_BITS) + +#define TIPC_NODE_SIZE ((1UL << TIPC_NODE_BITS) - 1) +#define TIPC_CLUSTER_SIZE ((1UL << TIPC_CLUSTER_BITS) - 1) +#define TIPC_ZONE_SIZE ((1UL << TIPC_ZONE_BITS) - 1) + +#define TIPC_NODE_MASK (TIPC_NODE_SIZE << TIPC_NODE_OFFSET) +#define TIPC_CLUSTER_MASK (TIPC_CLUSTER_SIZE << TIPC_CLUSTER_OFFSET) +#define TIPC_ZONE_MASK (TIPC_ZONE_SIZE << TIPC_ZONE_OFFSET) + +#define TIPC_ZONE_CLUSTER_MASK (TIPC_ZONE_MASK | TIPC_CLUSTER_MASK) + static inline __u32 tipc_addr(unsigned int zone, unsigned int cluster, unsigned int node) { - return (zone << 24) | (cluster << 12) | node; + return (zone << TIPC_ZONE_OFFSET) | + (cluster << TIPC_CLUSTER_OFFSET) | + node; } static inline unsigned int tipc_zone(__u32 addr) { - return addr >> 24; + return addr >> TIPC_ZONE_OFFSET; } static inline unsigned int tipc_cluster(__u32 addr) { - return (addr >> 12) & 0xfff; + return (addr & TIPC_CLUSTER_MASK) >> TIPC_CLUSTER_OFFSET; } static inline unsigned int tipc_node(__u32 addr) { - return addr & 0xfff; + return addr & TIPC_NODE_MASK; } /* diff --git a/net/tipc/addr.h b/net/tipc/addr.h index 64f4004a6fac..bebb347803ce 100644 --- a/net/tipc/addr.h +++ b/net/tipc/addr.h @@ -43,9 +43,6 @@ #include #include "core.h" -#define TIPC_ZONE_MASK 0xff00u -#define TIPC_CLUSTER_MASK 0xf000u - static inline u32 tipc_own_addr(struct net *net) { struct tipc_net *tn = net_generic(net, tipc_net_id); @@ -60,7 +57,7 @@ static inline u32 tipc_zone_mask(u32 addr) static inline u32 tipc_cluster_mask(u32 addr) { - return addr & TIPC_CLUSTER_MASK; + return addr & TIPC_ZONE_CLUSTER_MASK; } u32 tipc_own_addr(struct net *net); diff --git a/net/tipc/bearer.c b/net/tipc/bearer.c index 4131d5a86f55..65b0998a9bab 100644 --- a/net/tipc/bearer.c +++ b/net/tipc/bearer.c @@ -225,7 +225,7 @@ static int tipc_enable_bearer(struct net *net, const char *name, if (tipc_addr_domain_valid(disc_domain) && (disc_domain != tn->own_addr)) { if (tipc_in_scope(disc_domain, tn->own_addr)) { - disc_domain = tn->own_addr & TIPC_CLUSTER_MASK; + disc_domain = tn->own_addr & TIPC_ZONE_CLUSTER_MASK; res = 0; /* accept any node in own cluster */ } else if (in_own_cluster_exact(net, disc_domain)) res = 0; /* accept specified node in own cluster */ @@ -832,7 +832,7 @@ int tipc_nl_bearer_enable(struct sk_buff *skb, struct genl_info *info) u32 prio; prio = TIPC_MEDIA_LINK_PRI; - domain = tn->own_addr & TIPC_CLUSTER_MASK; + domain = tn->own_addr & TIPC_ZONE_CLUSTER_MASK; if (!info->attrs[TIPC_NLA_BEARER]) return -EINVAL; -- 2.1.4
[PATCH net-next v2 3/5] tipc: get monitor threshold for the cluster
In this commit, we add support to fetch the configured cluster monitoring threshold. Reviewed-by: Jon Maloy Signed-off-by: Parthasarathy Bhuvaragan --- include/uapi/linux/tipc_netlink.h | 1 + net/tipc/monitor.c| 7 ++ net/tipc/monitor.h| 2 ++ net/tipc/netlink.c| 5 net/tipc/node.c | 52 +++ net/tipc/node.h | 1 + 6 files changed, 68 insertions(+) diff --git a/include/uapi/linux/tipc_netlink.h b/include/uapi/linux/tipc_netlink.h index d387b65a0d97..d07c6ec76062 100644 --- a/include/uapi/linux/tipc_netlink.h +++ b/include/uapi/linux/tipc_netlink.h @@ -57,6 +57,7 @@ enum { TIPC_NL_NET_SET, TIPC_NL_NAME_TABLE_GET, TIPC_NL_MON_SET, + TIPC_NL_MON_GET, __TIPC_NL_CMD_MAX, TIPC_NL_CMD_MAX = __TIPC_NL_CMD_MAX - 1 diff --git a/net/tipc/monitor.c b/net/tipc/monitor.c index 3892d05b8b45..3579126e2ac8 100644 --- a/net/tipc/monitor.c +++ b/net/tipc/monitor.c @@ -661,3 +661,10 @@ int tipc_nl_monitor_set_threshold(struct net *net, u32 cluster_size) return 0; } + +int tipc_nl_monitor_get_threshold(struct net *net) +{ + struct tipc_net *tn = tipc_net(net); + + return tn->mon_threshold; +} diff --git a/net/tipc/monitor.h b/net/tipc/monitor.h index 91f5dd09432b..aedf62c60bd3 100644 --- a/net/tipc/monitor.h +++ b/net/tipc/monitor.h @@ -70,5 +70,7 @@ void tipc_mon_get_state(struct net *net, u32 addr, void tipc_mon_remove_peer(struct net *net, u32 addr, int bearer_id); int tipc_nl_monitor_set_threshold(struct net *net, u32 cluster_size); +int tipc_nl_monitor_get_threshold(struct net *net); + extern const int tipc_max_domain_size; #endif diff --git a/net/tipc/netlink.c b/net/tipc/netlink.c index 1e43ac0200ed..2cfc5f7c6380 100644 --- a/net/tipc/netlink.c +++ b/net/tipc/netlink.c @@ -226,6 +226,11 @@ static const struct genl_ops tipc_genl_v2_ops[] = { .doit = tipc_nl_node_set_monitor, .policy = tipc_nl_policy, }, + { + .cmd= TIPC_NL_MON_GET, + .doit = tipc_nl_node_get_monitor, + .policy = tipc_nl_policy, + }, }; int tipc_nlmsg_parse(const struct nlmsghdr *nlh, struct nlattr ***attr) diff --git a/net/tipc/node.c b/net/tipc/node.c index 0fc531d0f709..2a7e74753f9f 100644 --- a/net/tipc/node.c +++ b/net/tipc/node.c @@ -1955,3 +1955,55 @@ int tipc_nl_node_set_monitor(struct sk_buff *skb, struct genl_info *info) return 0; } + +static int __tipc_nl_add_monitor_prop(struct net *net, struct tipc_nl_msg *msg) +{ + struct nlattr *attrs; + void *hdr; + u32 val; + + hdr = genlmsg_put(msg->skb, msg->portid, msg->seq, &tipc_genl_family, + 0, TIPC_NL_MON_GET); + if (!hdr) + return -EMSGSIZE; + + attrs = nla_nest_start(msg->skb, TIPC_NLA_MON); + if (!attrs) + goto msg_full; + + val = tipc_nl_monitor_get_threshold(net); + + if (nla_put_u32(msg->skb, TIPC_NLA_MON_ACTIVATION_THRESHOLD, val)) + goto attr_msg_full; + + nla_nest_end(msg->skb, attrs); + genlmsg_end(msg->skb, hdr); + + return 0; + +attr_msg_full: + nla_nest_cancel(msg->skb, attrs); +msg_full: + genlmsg_cancel(msg->skb, hdr); + + return -EMSGSIZE; +} + +int tipc_nl_node_get_monitor(struct sk_buff *skb, struct genl_info *info) +{ + struct net *net = sock_net(skb->sk); + struct tipc_nl_msg msg; + int err; + + msg.skb = nlmsg_new(NLMSG_GOODSIZE, GFP_KERNEL); + msg.portid = info->snd_portid; + msg.seq = info->snd_seq; + + err = __tipc_nl_add_monitor_prop(net, &msg); + if (err) { + nlmsg_free(msg.skb); + return err; + } + + return genlmsg_reply(msg.skb, info); +} diff --git a/net/tipc/node.h b/net/tipc/node.h index 65aa12ede8a5..216f053b817f 100644 --- a/net/tipc/node.h +++ b/net/tipc/node.h @@ -79,4 +79,5 @@ int tipc_nl_node_get_link(struct sk_buff *skb, struct genl_info *info); int tipc_nl_node_set_link(struct sk_buff *skb, struct genl_info *info); int tipc_nl_node_set_monitor(struct sk_buff *skb, struct genl_info *info); +int tipc_nl_node_get_monitor(struct sk_buff *skb, struct genl_info *info); #endif -- 2.1.4
[PATCH net-next v1 0/5] tipc: netlink updates for neighbour monitor
This series contains the updates to configure and read the attributes for neighbour monitor. Parthasarathy Bhuvaragan (5): tipc: introduce constants for tipc address validation tipc: make cluster size threshold for monitoring configurable tipc: get monitor threshold for the cluster tipc: add a function to get the bearer name tipc: dump monitor attributes include/uapi/linux/tipc.h | 30 ++- include/uapi/linux/tipc_netlink.h | 37 + net/tipc/addr.h | 5 +- net/tipc/bearer.c | 25 +- net/tipc/bearer.h | 1 + net/tipc/monitor.c| 152 +++ net/tipc/monitor.h| 9 +++ net/tipc/netlink.c| 27 ++- net/tipc/netlink.h| 1 + net/tipc/node.c | 165 ++ net/tipc/node.h | 5 ++ 11 files changed, 445 insertions(+), 12 deletions(-) -- 2.1.4
[PATCH net-next v1 1/5] tipc: introduce constants for tipc address validation
In this commit, we introduce defines for tipc address size, offset and mask specification for Zone.Cluster.Node. There is no functional change in this commit. Signed-off-by: Parthasarathy Bhuvaragan Reviewed-by: Jon Maloy --- include/uapi/linux/tipc.h | 30 ++ net/tipc/addr.h | 5 + net/tipc/bearer.c | 4 ++-- 3 files changed, 29 insertions(+), 10 deletions(-) diff --git a/include/uapi/linux/tipc.h b/include/uapi/linux/tipc.h index 6f71b9b41595..bf049e8fe31b 100644 --- a/include/uapi/linux/tipc.h +++ b/include/uapi/linux/tipc.h @@ -60,26 +60,48 @@ struct tipc_name_seq { __u32 upper; }; +/* TIPC Address Size, Offset, Mask specification for Z.C.N + */ +#define TIPC_NODE_BITS 12 +#define TIPC_CLUSTER_BITS 12 +#define TIPC_ZONE_BITS 8 + +#define TIPC_NODE_OFFSET0 +#define TIPC_CLUSTER_OFFSET TIPC_NODE_BITS +#define TIPC_ZONE_OFFSET(TIPC_CLUSTER_OFFSET + TIPC_CLUSTER_BITS) + +#define TIPC_NODE_SIZE ((1UL << TIPC_NODE_BITS) - 1) +#define TIPC_CLUSTER_SIZE ((1UL << TIPC_CLUSTER_BITS) - 1) +#define TIPC_ZONE_SIZE ((1UL << TIPC_ZONE_BITS) - 1) + +#define TIPC_NODE_MASK (TIPC_NODE_SIZE << TIPC_NODE_OFFSET) +#define TIPC_CLUSTER_MASK (TIPC_CLUSTER_SIZE << TIPC_CLUSTER_OFFSET) +#define TIPC_ZONE_MASK (TIPC_ZONE_SIZE << TIPC_ZONE_OFFSET) + +#define TIPC_ZONE_CLUSTER_MASK (TIPC_ZONE_MASK | TIPC_CLUSTER_MASK) + static inline __u32 tipc_addr(unsigned int zone, unsigned int cluster, unsigned int node) { - return (zone << 24) | (cluster << 12) | node; + return (zone << TIPC_ZONE_OFFSET) | + (cluster << TIPC_CLUSTER_OFFSET) | + node; } static inline unsigned int tipc_zone(__u32 addr) { - return addr >> 24; + return addr >> TIPC_ZONE_OFFSET; } static inline unsigned int tipc_cluster(__u32 addr) { - return (addr >> 12) & 0xfff; + return (addr & TIPC_CLUSTER_MASK) >> TIPC_CLUSTER_OFFSET; } static inline unsigned int tipc_node(__u32 addr) { - return addr & 0xfff; + return addr & TIPC_NODE_MASK; } /* diff --git a/net/tipc/addr.h b/net/tipc/addr.h index 64f4004a6fac..bebb347803ce 100644 --- a/net/tipc/addr.h +++ b/net/tipc/addr.h @@ -43,9 +43,6 @@ #include #include "core.h" -#define TIPC_ZONE_MASK 0xff00u -#define TIPC_CLUSTER_MASK 0xf000u - static inline u32 tipc_own_addr(struct net *net) { struct tipc_net *tn = net_generic(net, tipc_net_id); @@ -60,7 +57,7 @@ static inline u32 tipc_zone_mask(u32 addr) static inline u32 tipc_cluster_mask(u32 addr) { - return addr & TIPC_CLUSTER_MASK; + return addr & TIPC_ZONE_CLUSTER_MASK; } u32 tipc_own_addr(struct net *net); diff --git a/net/tipc/bearer.c b/net/tipc/bearer.c index 8584cc48654c..e87731c8998c 100644 --- a/net/tipc/bearer.c +++ b/net/tipc/bearer.c @@ -225,7 +225,7 @@ static int tipc_enable_bearer(struct net *net, const char *name, if (tipc_addr_domain_valid(disc_domain) && (disc_domain != tn->own_addr)) { if (tipc_in_scope(disc_domain, tn->own_addr)) { - disc_domain = tn->own_addr & TIPC_CLUSTER_MASK; + disc_domain = tn->own_addr & TIPC_ZONE_CLUSTER_MASK; res = 0; /* accept any node in own cluster */ } else if (in_own_cluster_exact(net, disc_domain)) res = 0; /* accept specified node in own cluster */ @@ -817,7 +817,7 @@ int tipc_nl_bearer_enable(struct sk_buff *skb, struct genl_info *info) u32 prio; prio = TIPC_MEDIA_LINK_PRI; - domain = tn->own_addr & TIPC_CLUSTER_MASK; + domain = tn->own_addr & TIPC_ZONE_CLUSTER_MASK; if (!info->attrs[TIPC_NLA_BEARER]) return -EINVAL; -- 2.1.4
[PATCH net-next v1 4/5] tipc: add a function to get the bearer name
Introduce a new function to get the bearer name from its id. This is used in subsequent commit. Signed-off-by: Parthasarathy Bhuvaragan Reviewed-by: Jon Maloy --- net/tipc/bearer.c | 21 + net/tipc/bearer.h | 1 + 2 files changed, 22 insertions(+) diff --git a/net/tipc/bearer.c b/net/tipc/bearer.c index e87731c8998c..6b28f4e27c55 100644 --- a/net/tipc/bearer.c +++ b/net/tipc/bearer.c @@ -171,6 +171,27 @@ struct tipc_bearer *tipc_bearer_find(struct net *net, const char *name) return NULL; } +/* tipc_bearer_get_name - get the bearer name from its id. + * @net: network namespace + * @name: a pointer to the buffer where the name will be stored. + * @bearer_id: the id to get the name from. + */ +int tipc_bearer_get_name(struct net *net, char *name, u32 bearer_id) +{ + struct tipc_net *tn = tipc_net(net); + struct tipc_bearer *b; + + if (bearer_id >= MAX_BEARERS) + return -EINVAL; + + b = rtnl_dereference(tn->bearer_list[bearer_id]); + if (!b) + return -EINVAL; + + strcpy(name, b->name); + return 0; +} + void tipc_bearer_add_dest(struct net *net, u32 bearer_id, u32 dest) { struct tipc_net *tn = net_generic(net, tipc_net_id); diff --git a/net/tipc/bearer.h b/net/tipc/bearer.h index 0d337c7b6fad..ddaf51856414 100644 --- a/net/tipc/bearer.h +++ b/net/tipc/bearer.h @@ -197,6 +197,7 @@ int tipc_l2_send_msg(struct net *net, struct sk_buff *buf, void tipc_bearer_add_dest(struct net *net, u32 bearer_id, u32 dest); void tipc_bearer_remove_dest(struct net *net, u32 bearer_id, u32 dest); struct tipc_bearer *tipc_bearer_find(struct net *net, const char *name); +int tipc_bearer_get_name(struct net *net, char *name, u32 bearer_id); struct tipc_media *tipc_media_find(const char *name); int tipc_bearer_setup(void); void tipc_bearer_cleanup(void); -- 2.1.4
[PATCH net-next v1 5/5] tipc: dump monitor attributes
In this commit, we dump the monitor attributes when queried. The link monitor attributes are separated into two kinds: 1. general attributes per bearer 2. specific attributes per node/peer This style resembles the socket attributes and the nametable publications per socket. Signed-off-by: Parthasarathy Bhuvaragan Reviewed-by: Jon Maloy --- include/uapi/linux/tipc_netlink.h | 25 +++ net/tipc/monitor.c| 133 ++ net/tipc/monitor.h| 6 ++ net/tipc/netlink.c| 7 ++ net/tipc/node.c | 86 net/tipc/node.h | 3 + 6 files changed, 260 insertions(+) diff --git a/include/uapi/linux/tipc_netlink.h b/include/uapi/linux/tipc_netlink.h index d07c6ec76062..5f3f6d09fb79 100644 --- a/include/uapi/linux/tipc_netlink.h +++ b/include/uapi/linux/tipc_netlink.h @@ -58,6 +58,7 @@ enum { TIPC_NL_NAME_TABLE_GET, TIPC_NL_MON_SET, TIPC_NL_MON_GET, + TIPC_NL_MON_PEER_GET, __TIPC_NL_CMD_MAX, TIPC_NL_CMD_MAX = __TIPC_NL_CMD_MAX - 1 @@ -75,6 +76,7 @@ enum { TIPC_NLA_NET, /* nest */ TIPC_NLA_NAME_TABLE,/* nest */ TIPC_NLA_MON, /* nest */ + TIPC_NLA_MON_PEER, /* nest */ __TIPC_NLA_MAX, TIPC_NLA_MAX = __TIPC_NLA_MAX - 1 @@ -173,6 +175,11 @@ enum { enum { TIPC_NLA_MON_UNSPEC, TIPC_NLA_MON_ACTIVATION_THRESHOLD, /* u32 */ + TIPC_NLA_MON_REF, /* u32 */ + TIPC_NLA_MON_ACTIVE,/* flag */ + TIPC_NLA_MON_BEARER_NAME, /* string */ + TIPC_NLA_MON_PEERCNT, /* u32 */ + TIPC_NLA_MON_LISTGEN, /* u32 */ __TIPC_NLA_MON_MAX, TIPC_NLA_MON_MAX = __TIPC_NLA_MON_MAX - 1 @@ -194,6 +201,24 @@ enum { TIPC_NLA_PUBL_MAX = __TIPC_NLA_PUBL_MAX - 1 }; +/* Monitor peer info */ +enum { + TIPC_NLA_MON_PEER_UNSPEC, + + TIPC_NLA_MON_PEER_ADDR, /* u32 */ + TIPC_NLA_MON_PEER_DOMGEN, /* u32 */ + TIPC_NLA_MON_PEER_APPLIED, /* u32 */ + TIPC_NLA_MON_PEER_UPMAP,/* u64 */ + TIPC_NLA_MON_PEER_MEMBERS, /* tlv */ + TIPC_NLA_MON_PEER_UP, /* flag */ + TIPC_NLA_MON_PEER_HEAD, /* flag */ + TIPC_NLA_MON_PEER_LOCAL,/* flag */ + TIPC_NLA_MON_PEER_PAD, /* flag */ + + __TIPC_NLA_MON_PEER_MAX, + TIPC_NLA_MON_PEER_MAX = __TIPC_NLA_MON_PEER_MAX - 1 +}; + /* Nest, connection info */ enum { TIPC_NLA_CON_UNSPEC, diff --git a/net/tipc/monitor.c b/net/tipc/monitor.c index 3579126e2ac8..be70a57c1ff9 100644 --- a/net/tipc/monitor.c +++ b/net/tipc/monitor.c @@ -33,9 +33,11 @@ * POSSIBILITY OF SUCH DAMAGE. */ +#include #include "core.h" #include "addr.h" #include "monitor.h" +#include "bearer.h" #define MAX_MON_DOMAIN 64 #define MON_TIMEOUT 12 @@ -668,3 +670,134 @@ int tipc_nl_monitor_get_threshold(struct net *net) return tn->mon_threshold; } + +int __tipc_nl_add_monitor_peer(struct tipc_peer *peer, struct tipc_nl_msg *msg) +{ + struct tipc_mon_domain *dom = peer->domain; + struct nlattr *attrs; + void *hdr; + + hdr = genlmsg_put(msg->skb, msg->portid, msg->seq, &tipc_genl_family, + NLM_F_MULTI, TIPC_NL_MON_PEER_GET); + if (!hdr) + return -EMSGSIZE; + + attrs = nla_nest_start(msg->skb, TIPC_NLA_MON_PEER); + if (!attrs) + goto msg_full; + + if (nla_put_u32(msg->skb, TIPC_NLA_MON_PEER_ADDR, peer->addr)) + goto attr_msg_full; + if (nla_put_u32(msg->skb, TIPC_NLA_MON_PEER_APPLIED, peer->applied)) + goto attr_msg_full; + + if (peer->is_up) + if (nla_put_flag(msg->skb, TIPC_NLA_MON_PEER_UP)) + goto attr_msg_full; + if (peer->is_local) + if (nla_put_flag(msg->skb, TIPC_NLA_MON_PEER_LOCAL)) + goto attr_msg_full; + if (peer->is_head) + if (nla_put_flag(msg->skb, TIPC_NLA_MON_PEER_HEAD)) + goto attr_msg_full; + + if (dom) { + if (nla_put_u32(msg->skb, TIPC_NLA_MON_PEER_DOMGEN, dom->gen)) + goto attr_msg_full; + if (nla_put_u64_64bit(msg->skb, TIPC_NLA_MON_PEER_UPMAP, + dom->up_map, TIPC_NLA_MON_PEER_PAD)) + goto attr_msg_full; + if (nla_put(msg->skb, TIPC_NLA_MON_PEER_MEMBERS, + dom->member_cnt * sizeof(u32), &dom->members)) +
[PATCH net-next v1 3/5] tipc: get monitor threshold for the cluster
In this commit, we add support to fetch the configured cluster monitoring threshold. Signed-off-by: Parthasarathy Bhuvaragan Reviewed-by: Jon Maloy --- include/uapi/linux/tipc_netlink.h | 1 + net/tipc/monitor.c| 7 ++ net/tipc/monitor.h| 2 ++ net/tipc/netlink.c| 5 net/tipc/node.c | 52 +++ net/tipc/node.h | 1 + 6 files changed, 68 insertions(+) diff --git a/include/uapi/linux/tipc_netlink.h b/include/uapi/linux/tipc_netlink.h index d387b65a0d97..d07c6ec76062 100644 --- a/include/uapi/linux/tipc_netlink.h +++ b/include/uapi/linux/tipc_netlink.h @@ -57,6 +57,7 @@ enum { TIPC_NL_NET_SET, TIPC_NL_NAME_TABLE_GET, TIPC_NL_MON_SET, + TIPC_NL_MON_GET, __TIPC_NL_CMD_MAX, TIPC_NL_CMD_MAX = __TIPC_NL_CMD_MAX - 1 diff --git a/net/tipc/monitor.c b/net/tipc/monitor.c index 3892d05b8b45..3579126e2ac8 100644 --- a/net/tipc/monitor.c +++ b/net/tipc/monitor.c @@ -661,3 +661,10 @@ int tipc_nl_monitor_set_threshold(struct net *net, u32 cluster_size) return 0; } + +int tipc_nl_monitor_get_threshold(struct net *net) +{ + struct tipc_net *tn = tipc_net(net); + + return tn->mon_threshold; +} diff --git a/net/tipc/monitor.h b/net/tipc/monitor.h index 91f5dd09432b..aedf62c60bd3 100644 --- a/net/tipc/monitor.h +++ b/net/tipc/monitor.h @@ -70,5 +70,7 @@ void tipc_mon_get_state(struct net *net, u32 addr, void tipc_mon_remove_peer(struct net *net, u32 addr, int bearer_id); int tipc_nl_monitor_set_threshold(struct net *net, u32 cluster_size); +int tipc_nl_monitor_get_threshold(struct net *net); + extern const int tipc_max_domain_size; #endif diff --git a/net/tipc/netlink.c b/net/tipc/netlink.c index 1e43ac0200ed..2cfc5f7c6380 100644 --- a/net/tipc/netlink.c +++ b/net/tipc/netlink.c @@ -226,6 +226,11 @@ static const struct genl_ops tipc_genl_v2_ops[] = { .doit = tipc_nl_node_set_monitor, .policy = tipc_nl_policy, }, + { + .cmd= TIPC_NL_MON_GET, + .doit = tipc_nl_node_get_monitor, + .policy = tipc_nl_policy, + }, }; int tipc_nlmsg_parse(const struct nlmsghdr *nlh, struct nlattr ***attr) diff --git a/net/tipc/node.c b/net/tipc/node.c index fa524d73b031..4a817daa9f6f 100644 --- a/net/tipc/node.c +++ b/net/tipc/node.c @@ -1948,3 +1948,55 @@ int tipc_nl_node_set_monitor(struct sk_buff *skb, struct genl_info *info) return 0; } + +static int __tipc_nl_add_monitor_prop(struct net *net, struct tipc_nl_msg *msg) +{ + struct nlattr *attrs; + void *hdr; + u32 val; + + hdr = genlmsg_put(msg->skb, msg->portid, msg->seq, &tipc_genl_family, + 0, TIPC_NL_MON_GET); + if (!hdr) + return -EMSGSIZE; + + attrs = nla_nest_start(msg->skb, TIPC_NLA_MON); + if (!attrs) + goto msg_full; + + val = tipc_nl_monitor_get_threshold(net); + + if (nla_put_u32(msg->skb, TIPC_NLA_MON_ACTIVATION_THRESHOLD, val)) + goto attr_msg_full; + + nla_nest_end(msg->skb, attrs); + genlmsg_end(msg->skb, hdr); + + return 0; + +attr_msg_full: + nla_nest_cancel(msg->skb, attrs); +msg_full: + genlmsg_cancel(msg->skb, hdr); + + return -EMSGSIZE; +} + +int tipc_nl_node_get_monitor(struct sk_buff *skb, struct genl_info *info) +{ + struct net *net = sock_net(skb->sk); + struct tipc_nl_msg msg; + int err; + + msg.skb = nlmsg_new(NLMSG_GOODSIZE, GFP_KERNEL); + msg.portid = info->snd_portid; + msg.seq = info->snd_seq; + + err = __tipc_nl_add_monitor_prop(net, &msg); + if (err) { + nlmsg_free(msg.skb); + return err; + } + + return genlmsg_reply(msg.skb, info); +} diff --git a/net/tipc/node.h b/net/tipc/node.h index 65aa12ede8a5..216f053b817f 100644 --- a/net/tipc/node.h +++ b/net/tipc/node.h @@ -79,4 +79,5 @@ int tipc_nl_node_get_link(struct sk_buff *skb, struct genl_info *info); int tipc_nl_node_set_link(struct sk_buff *skb, struct genl_info *info); int tipc_nl_node_set_monitor(struct sk_buff *skb, struct genl_info *info); +int tipc_nl_node_get_monitor(struct sk_buff *skb, struct genl_info *info); #endif -- 2.1.4
[PATCH net-next v1 2/5] tipc: make cluster size threshold for monitoring configurable
In this commit, we introduce support to configure the minimum threshold to activate the new link monitoring algorithm. Signed-off-by: Parthasarathy Bhuvaragan Reviewed-by: Jon Maloy --- include/uapi/linux/tipc_netlink.h | 11 +++ net/tipc/monitor.c| 12 net/tipc/monitor.h| 1 + net/tipc/netlink.c| 15 +-- net/tipc/netlink.h| 1 + net/tipc/node.c | 27 +++ net/tipc/node.h | 1 + 7 files changed, 66 insertions(+), 2 deletions(-) diff --git a/include/uapi/linux/tipc_netlink.h b/include/uapi/linux/tipc_netlink.h index d4c8f142ba63..d387b65a0d97 100644 --- a/include/uapi/linux/tipc_netlink.h +++ b/include/uapi/linux/tipc_netlink.h @@ -56,6 +56,7 @@ enum { TIPC_NL_NET_GET, TIPC_NL_NET_SET, TIPC_NL_NAME_TABLE_GET, + TIPC_NL_MON_SET, __TIPC_NL_CMD_MAX, TIPC_NL_CMD_MAX = __TIPC_NL_CMD_MAX - 1 @@ -72,6 +73,7 @@ enum { TIPC_NLA_NODE, /* nest */ TIPC_NLA_NET, /* nest */ TIPC_NLA_NAME_TABLE,/* nest */ + TIPC_NLA_MON, /* nest */ __TIPC_NLA_MAX, TIPC_NLA_MAX = __TIPC_NLA_MAX - 1 @@ -166,6 +168,15 @@ enum { TIPC_NLA_NAME_TABLE_MAX = __TIPC_NLA_NAME_TABLE_MAX - 1 }; +/* Monitor info */ +enum { + TIPC_NLA_MON_UNSPEC, + TIPC_NLA_MON_ACTIVATION_THRESHOLD, /* u32 */ + + __TIPC_NLA_MON_MAX, + TIPC_NLA_MON_MAX = __TIPC_NLA_MON_MAX - 1 +}; + /* Publication info */ enum { TIPC_NLA_PUBL_UNSPEC, diff --git a/net/tipc/monitor.c b/net/tipc/monitor.c index 0d489e81fcca..3892d05b8b45 100644 --- a/net/tipc/monitor.c +++ b/net/tipc/monitor.c @@ -649,3 +649,15 @@ void tipc_mon_delete(struct net *net, int bearer_id) kfree(self); kfree(mon); } + +int tipc_nl_monitor_set_threshold(struct net *net, u32 cluster_size) +{ + struct tipc_net *tn = tipc_net(net); + + if (cluster_size > TIPC_CLUSTER_SIZE) + return -EINVAL; + + tn->mon_threshold = cluster_size; + + return 0; +} diff --git a/net/tipc/monitor.h b/net/tipc/monitor.h index 598459cbed5d..91f5dd09432b 100644 --- a/net/tipc/monitor.h +++ b/net/tipc/monitor.h @@ -69,5 +69,6 @@ void tipc_mon_get_state(struct net *net, u32 addr, int bearer_id); void tipc_mon_remove_peer(struct net *net, u32 addr, int bearer_id); +int tipc_nl_monitor_set_threshold(struct net *net, u32 cluster_size); extern const int tipc_max_domain_size; #endif diff --git a/net/tipc/netlink.c b/net/tipc/netlink.c index 56935df2167a..1e43ac0200ed 100644 --- a/net/tipc/netlink.c +++ b/net/tipc/netlink.c @@ -52,7 +52,8 @@ static const struct nla_policy tipc_nl_policy[TIPC_NLA_MAX + 1] = { [TIPC_NLA_MEDIA]= { .type = NLA_NESTED, }, [TIPC_NLA_NODE] = { .type = NLA_NESTED, }, [TIPC_NLA_NET] = { .type = NLA_NESTED, }, - [TIPC_NLA_NAME_TABLE] = { .type = NLA_NESTED, } + [TIPC_NLA_NAME_TABLE] = { .type = NLA_NESTED, }, + [TIPC_NLA_MON] = { .type = NLA_NESTED, }, }; const struct nla_policy @@ -61,6 +62,11 @@ tipc_nl_name_table_policy[TIPC_NLA_NAME_TABLE_MAX + 1] = { [TIPC_NLA_NAME_TABLE_PUBL] = { .type = NLA_NESTED } }; +const struct nla_policy tipc_nl_monitor_policy[TIPC_NLA_MON_MAX + 1] = { + [TIPC_NLA_MON_UNSPEC] = { .type = NLA_UNSPEC }, + [TIPC_NLA_MON_ACTIVATION_THRESHOLD] = { .type = NLA_U32 }, +}; + const struct nla_policy tipc_nl_sock_policy[TIPC_NLA_SOCK_MAX + 1] = { [TIPC_NLA_SOCK_UNSPEC] = { .type = NLA_UNSPEC }, [TIPC_NLA_SOCK_ADDR]= { .type = NLA_U32 }, @@ -214,7 +220,12 @@ static const struct genl_ops tipc_genl_v2_ops[] = { .cmd= TIPC_NL_NAME_TABLE_GET, .dumpit = tipc_nl_name_table_dump, .policy = tipc_nl_policy, - } + }, + { + .cmd= TIPC_NL_MON_SET, + .doit = tipc_nl_node_set_monitor, + .policy = tipc_nl_policy, + }, }; int tipc_nlmsg_parse(const struct nlmsghdr *nlh, struct nlattr ***attr) diff --git a/net/tipc/netlink.h b/net/tipc/netlink.h index ed1dbcb4afbd..4ba0ad422110 100644 --- a/net/tipc/netlink.h +++ b/net/tipc/netlink.h @@ -55,6 +55,7 @@ extern const struct nla_policy tipc_nl_prop_policy[]; extern const struct nla_policy tipc_nl_bearer_policy[]; extern const struct nla_policy tipc_nl_media_policy[]; extern const struct nla_policy tipc_nl_udp_policy[]; +extern const struct nla_policy tipc_nl_monitor_policy[]; int tipc_netlink_start(void); int tipc_netlink_compat_start(void); diff --git a/net/tipc/node.c b/net/tipc/node.c index a3fc0a3f4077..fa524d73b031 100644 --- a/net/tipc/node.c +++ b/net/tipc/node.c @@ -1921,3 +1921,30 @@ out: return sk
[PATCH net-next v1 1/1] tipc: fix stale links after re-enabling bearer
Commit 42b18f605fea ("tipc: refactor function tipc_link_timeout()"), introduced a bug which prevents sending of probe messages during link synchronization phase. This leads to hanging links, if the bearer is disabled/enabled after links are up. In this commit, we send the probe messages correctly. Fixes: 42b18f605fea ("tipc: refactor function tipc_link_timeout()") Acked-by: Jon Maloy Signed-off-by: Parthasarathy Bhuvaragan --- net/tipc/link.c | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) diff --git a/net/tipc/link.c b/net/tipc/link.c index 2e28a7d7e802..7059c94f33c5 100644 --- a/net/tipc/link.c +++ b/net/tipc/link.c @@ -721,8 +721,7 @@ int tipc_link_timeout(struct tipc_link *l, struct sk_buff_head *xmitq) mtyp = STATE_MSG; state = bc_acked != bc_snt; probe = l->silent_intv_cnt; - if (probe) - l->silent_intv_cnt++; + l->silent_intv_cnt++; break; case LINK_RESET: setup = l->rst_cnt++ <= 4; -- 2.1.4
[PATCH net-next v1 1/1] tipc: fix a race condition leading to subscriber refcnt bug
Until now, the requests sent to topology server are queued to a workqueue by the generic server framework. These messages are processed by worker threads and trigger the registered callbacks. To reduce latency on uniprocessor systems, explicit rescheduling is performed using cond_resched() after MAX_RECV_MSG_COUNT(25) messages. This implementation on SMP systems leads to an subscriber refcnt error as described below: When a worker thread yields by calling cond_resched() in a SMP system, a new worker is created on another CPU to process the pending workitem. Sometimes the sleeping thread wakes up before the new thread finishes execution. This breaks the assumption on ordering and being single threaded. The fault is more frequent when MAX_RECV_MSG_COUNT is lowered. If the first thread was processing subscription create and the second thread processing close(), the close request will free the subscriber and the create request oops as follows: [31.224137] WARNING: CPU: 2 PID: 266 at include/linux/kref.h:46 tipc_subscrb_rcv_cb+0x317/0x380 [tipc] [31.228143] CPU: 2 PID: 266 Comm: kworker/u8:1 Not tainted 4.5.0+ #97 [31.228377] Workqueue: tipc_rcv tipc_recv_work [tipc] [...] [31.228377] Call Trace: [31.228377] [] dump_stack+0x4d/0x72 [31.228377] [] __warn+0xd1/0xf0 [31.228377] [] warn_slowpath_null+0x1d/0x20 [31.228377] [] tipc_subscrb_rcv_cb+0x317/0x380 [tipc] [31.228377] [] tipc_receive_from_sock+0xd4/0x130 [tipc] [31.228377] [] tipc_recv_work+0x2b/0x50 [tipc] [31.228377] [] process_one_work+0x145/0x3d0 [31.246554] ---[ end trace c3882c9baa05a4fd ]--- [31.248327] BUG: spinlock bad magic on CPU#2, kworker/u8:1/266 [31.249119] BUG: unable to handle kernel NULL pointer dereference at 0428 [31.249323] IP: [] spin_dump+0x5c/0xe0 [31.249323] PGD 0 [31.249323] Oops: [#1] SMP In this commit, we - rename tipc_conn_shutdown() to tipc_conn_release(). - move connection release callback execution from tipc_close_conn() to a new function tipc_sock_release(), which is executed before we free the connection. Thus we release the subscriber during connection release procedure rather than connection shutdown procedure. Signed-off-by: Parthasarathy Bhuvaragan Acked-by: Ying Xue --- net/tipc/server.c | 19 +-- net/tipc/server.h | 4 ++-- net/tipc/subscr.c | 4 ++-- 3 files changed, 17 insertions(+), 10 deletions(-) diff --git a/net/tipc/server.c b/net/tipc/server.c index 2446bfbaa309..7a0af2dc0406 100644 --- a/net/tipc/server.c +++ b/net/tipc/server.c @@ -86,6 +86,7 @@ struct outqueue_entry { static void tipc_recv_work(struct work_struct *work); static void tipc_send_work(struct work_struct *work); static void tipc_clean_outqueues(struct tipc_conn *con); +static void tipc_sock_release(struct tipc_conn *con); static void tipc_conn_kref_release(struct kref *kref) { @@ -102,6 +103,7 @@ static void tipc_conn_kref_release(struct kref *kref) } saddr->scope = -TIPC_NODE_SCOPE; kernel_bind(sock, (struct sockaddr *)saddr, sizeof(*saddr)); + tipc_sock_release(con); sock_release(sock); con->sock = NULL; } @@ -184,26 +186,31 @@ static void tipc_unregister_callbacks(struct tipc_conn *con) write_unlock_bh(&sk->sk_callback_lock); } +static void tipc_sock_release(struct tipc_conn *con) +{ + struct tipc_server *s = con->server; + + if (con->conid) + s->tipc_conn_release(con->conid, con->usr_data); + + tipc_unregister_callbacks(con); +} + static void tipc_close_conn(struct tipc_conn *con) { struct tipc_server *s = con->server; if (test_and_clear_bit(CF_CONNECTED, &con->flags)) { - if (con->conid) - s->tipc_conn_shutdown(con->conid, con->usr_data); spin_lock_bh(&s->idr_lock); idr_remove(&s->conn_idr, con->conid); s->idr_in_use--; spin_unlock_bh(&s->idr_lock); - tipc_unregister_callbacks(con); - /* We shouldn't flush pending works as we may be in the * thread. In fact the races with pending rx/tx work structs * are harmless for us here as we have already deleted this -* connection from server connection list and set -* sk->sk_user_data to 0 before releasing connection object. +* connection from server connection list. */ kernel_sock_shutdown(con->sock, SHUT_RDWR); diff --git a/net/tipc/server.h b/net/tipc/server.h index 9015faedb1b0..34f8055afa3b 100644 --- a/net/tipc/server.h +++ b/net/tipc/server.h @@ -53,7 +53,7 @@ * @send_wq: send workqueue * @max_rcvbuf_size: maximum permitted receive message length * @tipc_conn_new: callback will be called when new connection i
[PATCH net-next v1 1/1] tipc: Revert "tipc: use existing sk_write_queue for outgoing packet chain"
reverts commit 94153e36e709e ("tipc: use existing sk_write_queue for outgoing packet chain") In the above commit, we assume that we fill & empty the socket's sk_write_queue within the same lock_sock() session. This is not true if the link is congested. During congestion, the socket lock is released while we wait for the congestion to cease. This implementation causes a nullptr exception, if the user space program has several threads accessing the same socket descriptor. Consider two threads of the same program performing the following: Thread1 Thread2 -- Enter tipc_sendmsg()Enter tipc_sendmsg() lock_sock() lock_sock() Enter tipc_link_xmit(), ret=ELINKCONG spin on socket lock.. sk_wait_event() : release_sock() grab socket lock : Enter tipc_link_xmit(), ret=0 : release_sock() Wakeup after congestion lock_sock() skb = skb_peek(pktchain); !! TIPC_SKB_CB(skb)->wakeup_pending = tsk->link_cong; In this case, the second thread transmits the buffers belonging to both thread1 and thread2 successfully. When the first thread wakeup after the congestion it assumes that the pktchain is intact and operates on the skb's in it, which leads to the following exception: [2102.439969] BUG: unable to handle kernel NULL pointer dereference at 00d0 [2102.440074] IP: [] __tipc_link_xmit+0x2b0/0x4d0 [tipc] [2102.440074] PGD 3fa3f067 PUD 3fa6b067 PMD 0 [2102.440074] Oops: [#1] SMP [2102.440074] CPU: 2 PID: 244 Comm: sender Not tainted 3.12.28 #1 [2102.440074] RIP: 0010:[] [] __tipc_link_xmit+0x2b0/0x4d0 [tipc] [...] [2102.440074] Call Trace: [2102.440074] [] ? schedule+0x29/0x70 [2102.440074] [] ? tipc_node_unlock+0x46/0x170 [tipc] [2102.440074] [] tipc_link_xmit+0x51/0xf0 [tipc] [2102.440074] [] tipc_send_stream+0x11e/0x4f0 [tipc] [2102.440074] [] ? __wake_up_sync+0x20/0x20 [2102.440074] [] tipc_send_packet+0x1c/0x20 [tipc] [2102.440074] [] sock_sendmsg+0xa8/0xd0 [2102.440074] [] ? release_sock+0x145/0x170 [2102.440074] [] ___sys_sendmsg+0x3d8/0x3e0 [2102.440074] [] ? _raw_spin_unlock+0xe/0x10 [2102.440074] [] ? handle_mm_fault+0x6ca/0x9d0 [2102.440074] [] ? set_next_entity+0x85/0xa0 [2102.440074] [] ? _raw_spin_unlock_irq+0xe/0x20 [2102.440074] [] ? finish_task_switch+0x5c/0xc0 [2102.440074] [] ? __schedule+0x34c/0x950 [2102.440074] [] __sys_sendmsg+0x42/0x80 [2102.440074] [] SyS_sendmsg+0x12/0x20 [2102.440074] [] system_call_fastpath+0x16/0x1b In this commit, we maintain the skb list always in the stack. Signed-off-by: Parthasarathy Bhuvaragan Acked-by: Ying Xue Acked-by: Jon Maloy --- net/tipc/socket.c | 33 +++-- 1 file changed, 19 insertions(+), 14 deletions(-) diff --git a/net/tipc/socket.c b/net/tipc/socket.c index 69c29050f14a..4d420bb27396 100644 --- a/net/tipc/socket.c +++ b/net/tipc/socket.c @@ -673,7 +673,7 @@ static int tipc_sendmcast(struct socket *sock, struct tipc_name_seq *seq, struct tipc_sock *tsk = tipc_sk(sk); struct net *net = sock_net(sk); struct tipc_msg *mhdr = &tsk->phdr; - struct sk_buff_head *pktchain = &sk->sk_write_queue; + struct sk_buff_head pktchain; struct iov_iter save = msg->msg_iter; uint mtu; int rc; @@ -687,14 +687,16 @@ static int tipc_sendmcast(struct socket *sock, struct tipc_name_seq *seq, msg_set_nameupper(mhdr, seq->upper); msg_set_hdr_sz(mhdr, MCAST_H_SIZE); + skb_queue_head_init(&pktchain); + new_mtu: mtu = tipc_bcast_get_mtu(net); - rc = tipc_msg_build(mhdr, msg, 0, dsz, mtu, pktchain); + rc = tipc_msg_build(mhdr, msg, 0, dsz, mtu, &pktchain); if (unlikely(rc < 0)) return rc; do { - rc = tipc_bcast_xmit(net, pktchain); + rc = tipc_bcast_xmit(net, &pktchain); if (likely(!rc)) return dsz; @@ -704,7 +706,7 @@ new_mtu: if (!rc) continue; } - __skb_queue_purge(pktchain); + __skb_queue_purge(&pktchain); if (rc == -EMSGSIZE) { msg->msg_iter = save; goto new_mtu; @@ -863,7 +865,7 @@ static int __tipc_sendmsg(struct socket *sock, struct msghdr *m, size_t dsz) struct net *net = sock_net(sk); struct tipc_msg *mhdr = &tsk->phdr; u32 dnode, dport; - struct sk_buff_head *pktchain = &sk->sk_write_queue; + struct sk_buff_head pktchain; struct sk_buff *skb; struct tipc_name_seq *seq; struct iov_iter save; @@ -924,17 +
[PATCH net v1 1/1] tipc: fix nullptr crash during subscription cancel
commit 4d5cfcba2f6e ('tipc: fix connection abort during subscription cancel'), removes the check for a valid subscription before calling tipc_nametbl_subscribe(). This will lead to a nullptr exception when we process a subscription cancel request. For a cancel request, a null subscription is passed to tipc_nametbl_subscribe() resulting in exception. In this commit, we call tipc_nametbl_subscribe() only for a valid subscription. Fixes: 4d5cfcba2f6e ('tipc: fix connection abort during subscription cancel') Reported-by: Anders Widell Signed-off-by: Parthasarathy Bhuvaragan Acked-by: Jon Maloy --- net/tipc/subscr.c | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/net/tipc/subscr.c b/net/tipc/subscr.c index 69ee2eeef968..f9ff73a8d815 100644 --- a/net/tipc/subscr.c +++ b/net/tipc/subscr.c @@ -296,7 +296,8 @@ static void tipc_subscrb_rcv_cb(struct net *net, int conid, if (tipc_subscrp_create(net, (struct tipc_subscr *)buf, subscrb, &sub)) return tipc_conn_terminate(tn->topsrv, subscrb->conid); - tipc_nametbl_subscribe(sub); + if (sub) + tipc_nametbl_subscribe(sub); } /* Handle one request to establish a new subscriber */ -- 2.1.4
[PATCH net v1 1/1] tipc: Revert "tipc: use existing sk_write_queue for outgoing packet chain"
reverts commit 94153e36e709e ("tipc: use existing sk_write_queue for outgoing packet chain") In Commit 94153e36e709e, we assume that we fill & empty the socket's sk_write_queue within the same lock_sock() session. This is not true if the link is congested. During congestion, the socket lock is released while we wait for the congestion to cease. This implementation causes a nullptr exception, if the user space program has several threads accessing the same socket descriptor. Consider two threads of the same program performing the following: Thread1 Thread2 -- Enter tipc_sendmsg()Enter tipc_sendmsg() lock_sock() lock_sock() Enter tipc_link_xmit(), ret=ELINKCONG spin on socket lock.. sk_wait_event() : release_sock() grab socket lock : Enter tipc_link_xmit(), ret=0 : release_sock() Wakeup after congestion lock_sock() skb = skb_peek(pktchain); !! TIPC_SKB_CB(skb)->wakeup_pending = tsk->link_cong; In this case, the second thread transmits the buffers belonging to both thread1 and thread2 successfully. When the first thread wakeup after the congestion it assumes that the pktchain is intact and operates on the skb's in it, which leads to the following exception: [2102.439969] BUG: unable to handle kernel NULL pointer dereference at 00d0 [2102.440074] IP: [] __tipc_link_xmit+0x2b0/0x4d0 [tipc] [2102.440074] PGD 3fa3f067 PUD 3fa6b067 PMD 0 [2102.440074] Oops: [#1] SMP [2102.440074] CPU: 2 PID: 244 Comm: sender Not tainted 3.12.28 #1 [2102.440074] RIP: 0010:[] [] __tipc_link_xmit+0x2b0/0x4d0 [tipc] [...] [2102.440074] Call Trace: [2102.440074] [] ? schedule+0x29/0x70 [2102.440074] [] ? tipc_node_unlock+0x46/0x170 [tipc] [2102.440074] [] tipc_link_xmit+0x51/0xf0 [tipc] [2102.440074] [] tipc_send_stream+0x11e/0x4f0 [tipc] [2102.440074] [] ? __wake_up_sync+0x20/0x20 [2102.440074] [] tipc_send_packet+0x1c/0x20 [tipc] [2102.440074] [] sock_sendmsg+0xa8/0xd0 [2102.440074] [] ? release_sock+0x145/0x170 [2102.440074] [] ___sys_sendmsg+0x3d8/0x3e0 [2102.440074] [] ? _raw_spin_unlock+0xe/0x10 [2102.440074] [] ? handle_mm_fault+0x6ca/0x9d0 [2102.440074] [] ? set_next_entity+0x85/0xa0 [2102.440074] [] ? _raw_spin_unlock_irq+0xe/0x20 [2102.440074] [] ? finish_task_switch+0x5c/0xc0 [2102.440074] [] ? __schedule+0x34c/0x950 [2102.440074] [] __sys_sendmsg+0x42/0x80 [2102.440074] [] SyS_sendmsg+0x12/0x20 [2102.440074] [] system_call_fastpath+0x16/0x1b In this commit, we maintain the skb list always in the stack. Signed-off-by: Parthasarathy Bhuvaragan Acked-by: Ying Xue Acked-by: Jon Maloy --- net/tipc/socket.c | 33 +++-- 1 file changed, 19 insertions(+), 14 deletions(-) diff --git a/net/tipc/socket.c b/net/tipc/socket.c index 69c29050f14a..4d420bb27396 100644 --- a/net/tipc/socket.c +++ b/net/tipc/socket.c @@ -673,7 +673,7 @@ static int tipc_sendmcast(struct socket *sock, struct tipc_name_seq *seq, struct tipc_sock *tsk = tipc_sk(sk); struct net *net = sock_net(sk); struct tipc_msg *mhdr = &tsk->phdr; - struct sk_buff_head *pktchain = &sk->sk_write_queue; + struct sk_buff_head pktchain; struct iov_iter save = msg->msg_iter; uint mtu; int rc; @@ -687,14 +687,16 @@ static int tipc_sendmcast(struct socket *sock, struct tipc_name_seq *seq, msg_set_nameupper(mhdr, seq->upper); msg_set_hdr_sz(mhdr, MCAST_H_SIZE); + skb_queue_head_init(&pktchain); + new_mtu: mtu = tipc_bcast_get_mtu(net); - rc = tipc_msg_build(mhdr, msg, 0, dsz, mtu, pktchain); + rc = tipc_msg_build(mhdr, msg, 0, dsz, mtu, &pktchain); if (unlikely(rc < 0)) return rc; do { - rc = tipc_bcast_xmit(net, pktchain); + rc = tipc_bcast_xmit(net, &pktchain); if (likely(!rc)) return dsz; @@ -704,7 +706,7 @@ new_mtu: if (!rc) continue; } - __skb_queue_purge(pktchain); + __skb_queue_purge(&pktchain); if (rc == -EMSGSIZE) { msg->msg_iter = save; goto new_mtu; @@ -863,7 +865,7 @@ static int __tipc_sendmsg(struct socket *sock, struct msghdr *m, size_t dsz) struct net *net = sock_net(sk); struct tipc_msg *mhdr = &tsk->phdr; u32 dnode, dport; - struct sk_buff_head *pktchain = &sk->sk_write_queue; + struct sk_buff_head pktchain; struct sk_buff *skb; struct tipc_name_seq *seq; struct iov_iter save; @@ -924,17 +
[PATCH net-next v2 07/10] tipc: hold subscriber->lock for tipc_nametbl_subscribe()
Until now, while creating a subscription the subscriber lock protects only the subscribers subscription list and not the nametable. The call to tipc_nametbl_subscribe() is outside the lock. However, at subscription timeout and cancel both the subscribers subscription list and the nametable are protected by the subscriber lock. This asymmetric locking mechanism leads to the following problem: In a SMP system, the timer can be fire on another core before the create request is complete. When the timer thread calls tipc_nametbl_unsubscribe() before create thread calls tipc_nametbl_subscribe(), we get a nullptr exception. This can be simulated by creating subscription with timeout=0 and sometimes the timeout occurs before the create request is complete. The following is the oops: [57.569661] BUG: unable to handle kernel NULL pointer dereference at (null) [57.577498] IP: [] tipc_nametbl_unsubscribe+0x8a/0x120 [tipc] [57.584820] PGD 0 [57.586834] Oops: 0002 [#1] SMP [57.685506] CPU: 14 PID: 10077 Comm: kworker/u40:1 Tainted: P OENX 3.12.48-52.27.1. 9688.1.PTF-default #1 [57.703637] Workqueue: tipc_rcv tipc_recv_work [tipc] [57.708697] task: 88064c7f00c0 ti: 880629ef4000 task.ti: 880629ef4000 [57.716181] RIP: 0010:[] [] tipc_nametbl_unsubscribe+0x8a/ 0x120 [tipc] [...] [57.812327] Call Trace: [57.814806] [] tipc_subscrp_delete+0x37/0x90 [tipc] [57.821357] [] tipc_subscrp_timeout+0x3f/0x70 [tipc] [57.827982] [] call_timer_fn+0x31/0x100 [57.833490] [] run_timer_softirq+0x1f9/0x2b0 [57.839414] [] __do_softirq+0xe5/0x230 [57.844827] [] call_softirq+0x1c/0x30 [57.850150] [] do_softirq+0x55/0x90 [57.855285] [] irq_exit+0x95/0xa0 [57.860290] [] smp_apic_timer_interrupt+0x45/0x60 [57.866644] [] apic_timer_interrupt+0x6d/0x80 [57.872686] [] tipc_subscrb_rcv_cb+0x2a5/0x3f0 [tipc] [57.879425] [] tipc_receive_from_sock+0x9f/0x100 [tipc] [57.886324] [] tipc_recv_work+0x26/0x60 [tipc] [57.892463] [] process_one_work+0x172/0x420 [57.898309] [] worker_thread+0x11a/0x3c0 [57.903871] [] kthread+0xb4/0xc0 [57.908751] [] ret_from_fork+0x58/0x90 In this commit, we do the following at subscription creation: 1. set the subscription's subscriber pointer before performing tipc_nametbl_subscribe(), as this value is required further in the call chain ex: by tipc_subscrp_send_event(). 2. move tipc_nametbl_subscribe() under the scope of subscriber lock Acked-by: Ying Xue Reviewed-by: Jon Maloy Signed-off-by: Parthasarathy Bhuvaragan --- net/tipc/subscr.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/net/tipc/subscr.c b/net/tipc/subscr.c index 24d2c8128bac..e4ebbc161e42 100644 --- a/net/tipc/subscr.c +++ b/net/tipc/subscr.c @@ -284,13 +284,13 @@ static void tipc_subscrp_subscribe(struct net *net, struct tipc_subscr *s, spin_lock_bh(&subscriber->lock); list_add(&sub->subscrp_list, &subscriber->subscrp_list); + sub->subscriber = subscriber; + tipc_nametbl_subscribe(sub); spin_unlock_bh(&subscriber->lock); - sub->subscriber = subscriber; timeout = htohl(sub->evt.s.timeout, swap); if (!mod_timer(&sub->timer, jiffies + msecs_to_jiffies(timeout))) tipc_subscrb_get(subscriber); - tipc_nametbl_subscribe(sub); } /* Handle one termination request for the subscriber */ -- 2.1.4
[PATCH net-next v2 03/10] tipc: remove struct tipc_name_seq from struct tipc_subscription
Until now, struct tipc_subscriber has duplicate fields for type, upper and lower (as member of struct tipc_name_seq) at: 1. as member seq in struct tipc_subscription 2. as member seq in struct tipc_subscr, which is contained in struct tipc_event The former structure contains the type, upper and lower values in network byte order and the later contains the intact copy of the request. The struct tipc_subscription contains a field swap to determine if request needs network byte order conversion. Thus by using swap, we can convert the request when required instead of duplicating it. In this commit, 1. we remove the references to these elements as members of struct tipc_subscription and replace them with elements from struct tipc_subscr. 2. provide new functions to convert the user request into network byte order. Acked-by: Ying Xue Reviewed-by: Jon Maloy Signed-off-by: Parthasarathy Bhuvaragan --- net/tipc/name_table.c | 14 ++ net/tipc/subscr.c | 33 +++-- net/tipc/subscr.h | 6 -- 3 files changed, 37 insertions(+), 16 deletions(-) diff --git a/net/tipc/name_table.c b/net/tipc/name_table.c index 91fce70291a8..777b979b8463 100644 --- a/net/tipc/name_table.c +++ b/net/tipc/name_table.c @@ -418,6 +418,9 @@ static void tipc_nameseq_subscribe(struct name_seq *nseq, struct tipc_subscription *s) { struct sub_seq *sseq = nseq->sseqs; + struct tipc_name_seq ns; + + tipc_subscrp_convert_seq(&s->evt.s.seq, s->swap, &ns); list_add(&s->nameseq_list, &nseq->subscriptions); @@ -425,7 +428,7 @@ static void tipc_nameseq_subscribe(struct name_seq *nseq, return; while (sseq != &nseq->sseqs[nseq->first_free]) { - if (tipc_subscrp_check_overlap(s, sseq->lower, sseq->upper)) { + if (tipc_subscrp_check_overlap(&ns, sseq->lower, sseq->upper)) { struct publication *crs; struct name_info *info = sseq->info; int must_report = 1; @@ -722,9 +725,10 @@ int tipc_nametbl_withdraw(struct net *net, u32 type, u32 lower, u32 ref, void tipc_nametbl_subscribe(struct tipc_subscription *s) { struct tipc_net *tn = net_generic(s->net, tipc_net_id); - u32 type = s->seq.type; + u32 type = tipc_subscrp_convert_seq_type(s->evt.s.seq.type, s->swap); int index = hash(type); struct name_seq *seq; + struct tipc_name_seq ns; spin_lock_bh(&tn->nametbl_lock); seq = nametbl_find_seq(s->net, type); @@ -735,8 +739,9 @@ void tipc_nametbl_subscribe(struct tipc_subscription *s) tipc_nameseq_subscribe(seq, s); spin_unlock_bh(&seq->lock); } else { + tipc_subscrp_convert_seq(&s->evt.s.seq, s->swap, &ns); pr_warn("Failed to create subscription for {%u,%u,%u}\n", - s->seq.type, s->seq.lower, s->seq.upper); + ns.type, ns.lower, ns.upper); } spin_unlock_bh(&tn->nametbl_lock); } @@ -748,9 +753,10 @@ void tipc_nametbl_unsubscribe(struct tipc_subscription *s) { struct tipc_net *tn = net_generic(s->net, tipc_net_id); struct name_seq *seq; + u32 type = tipc_subscrp_convert_seq_type(s->evt.s.seq.type, s->swap); spin_lock_bh(&tn->nametbl_lock); - seq = nametbl_find_seq(s->net, s->seq.type); + seq = nametbl_find_seq(s->net, type); if (seq != NULL) { spin_lock_bh(&seq->lock); list_del_init(&s->nameseq_list); diff --git a/net/tipc/subscr.c b/net/tipc/subscr.c index fb8406573f30..702a81d8dbb6 100644 --- a/net/tipc/subscr.c +++ b/net/tipc/subscr.c @@ -92,23 +92,39 @@ static void tipc_subscrp_send_event(struct tipc_subscription *sub, * * Returns 1 if there is overlap, otherwise 0. */ -int tipc_subscrp_check_overlap(struct tipc_subscription *sub, u32 found_lower, +int tipc_subscrp_check_overlap(struct tipc_name_seq *seq, u32 found_lower, u32 found_upper) { - if (found_lower < sub->seq.lower) - found_lower = sub->seq.lower; - if (found_upper > sub->seq.upper) - found_upper = sub->seq.upper; + if (found_lower < seq->lower) + found_lower = seq->lower; + if (found_upper > seq->upper) + found_upper = seq->upper; if (found_lower > found_upper) return 0; return 1; } +u32 tipc_subscrp_convert_seq_type(u32 type, int swap) +{ + return htohl(type, swap); +} + +void tipc_subscrp_convert_seq(struct tipc_name_seq *in, int swap, + struct tipc_name_seq *out) +{ + out-
[PATCH net-next v2 08/10] tipc: protect tipc_subscrb_get() with subscriber spin lock
Until now, during subscription creation the mod_time() & tipc_subscrb_get() are called after releasing the subscriber spin lock. In a SMP system when performing a subscription creation, if the subscription timeout occurs simultaneously (the timer is scheduled to run on another CPU) then the timer thread might decrement the subscribers refcount before the create thread increments the refcount. This can be simulated by creating subscription with timeout=0 and sometimes the timeout occurs before the create request is complete. This leads to the following message: [30.702949] BUG: spinlock bad magic on CPU#1, kworker/u8:3/87 [30.703834] general protection fault: [#1] SMP [30.704826] CPU: 1 PID: 87 Comm: kworker/u8:3 Not tainted 4.4.0-rc8+ #18 [30.704826] Workqueue: tipc_rcv tipc_recv_work [tipc] [30.704826] task: 88003f878600 ti: 88003fae task.ti: 88003fae [30.704826] RIP: 0010:[] [] spin_dump+0x5c/0xe0 [...] [30.704826] Call Trace: [30.704826] [] spin_bug+0x26/0x30 [30.704826] [] do_raw_spin_lock+0xe5/0x120 [30.704826] [] _raw_spin_lock_bh+0x19/0x20 [30.704826] [] tipc_subscrb_rcv_cb+0x1d0/0x330 [tipc] [30.704826] [] tipc_receive_from_sock+0xc1/0x150 [tipc] [30.704826] [] tipc_recv_work+0x3f/0x80 [tipc] [30.704826] [] process_one_work+0x149/0x3c0 [30.704826] [] worker_thread+0x66/0x460 [30.704826] [] ? process_one_work+0x3c0/0x3c0 [30.704826] [] ? process_one_work+0x3c0/0x3c0 [30.704826] [] kthread+0xed/0x110 [30.704826] [] ? kthread_create_on_node+0x190/0x190 [30.704826] [] ret_from_fork+0x3f/0x70 In this commit, 1. we remove the check for the return code for mod_timer() 2. we protect tipc_subscrb_get() using the subscriber spin lock. We increment the subscriber's refcount as soon as we add the subscription to subscriber's subscription list. Acked-by: Ying Xue Reviewed-by: Jon Maloy Signed-off-by: Parthasarathy Bhuvaragan --- net/tipc/subscr.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/net/tipc/subscr.c b/net/tipc/subscr.c index e4ebbc161e42..7d226ecb0490 100644 --- a/net/tipc/subscr.c +++ b/net/tipc/subscr.c @@ -284,13 +284,13 @@ static void tipc_subscrp_subscribe(struct net *net, struct tipc_subscr *s, spin_lock_bh(&subscriber->lock); list_add(&sub->subscrp_list, &subscriber->subscrp_list); + tipc_subscrb_get(subscriber); sub->subscriber = subscriber; tipc_nametbl_subscribe(sub); spin_unlock_bh(&subscriber->lock); timeout = htohl(sub->evt.s.timeout, swap); - if (!mod_timer(&sub->timer, jiffies + msecs_to_jiffies(timeout))) - tipc_subscrb_get(subscriber); + mod_timer(&sub->timer, jiffies + msecs_to_jiffies(timeout)); } /* Handle one termination request for the subscriber */ -- 2.1.4
[PATCH net-next v2 06/10] tipc: fix connection abort when receiving invalid cancel request
Until now, the subscribers endianness for a subscription create/cancel request is determined as: swap = !(s->filter & (TIPC_SUB_PORTS | TIPC_SUB_SERVICE)) The checks are performed only for port/service subscriptions. The swap calculation is incorrect if the filter in the subscription cancellation request is set to TIPC_SUB_CANCEL (it's a malformed cancel request, as the corresponding subscription create filter is missing). Thus, the check if the request is for cancellation fails and the request is treated as a subscription create request. The subscription creation fails as the request is illegal, which terminates this connection. In this commit we determine the endianness by including TIPC_SUB_CANCEL, which will set swap correctly and the request is processed as a cancellation request. Acked-by: Ying Xue Reviewed-by: Jon Maloy Signed-off-by: Parthasarathy Bhuvaragan --- net/tipc/subscr.c | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/net/tipc/subscr.c b/net/tipc/subscr.c index 531227208ae2..24d2c8128bac 100644 --- a/net/tipc/subscr.c +++ b/net/tipc/subscr.c @@ -309,7 +309,8 @@ static void tipc_subscrb_rcv_cb(struct net *net, int conid, int swap; /* Determine subscriber's endianness */ - swap = !(s->filter & (TIPC_SUB_PORTS | TIPC_SUB_SERVICE)); + swap = !(s->filter & (TIPC_SUB_PORTS | TIPC_SUB_SERVICE | + TIPC_SUB_CANCEL)); /* Detect & process a subscription cancellation request */ if (s->filter & htohl(TIPC_SUB_CANCEL, swap)) { -- 2.1.4
[PATCH net-next v2 10/10] tipc: use alloc_ordered_workqueue() instead of WQ_UNBOUND w/ max_active = 1
Until now, tipc_rcv and tipc_send workqueues in server are allocated with parameters WQ_UNBOUND & max_active = 1. This parameters passed to this function makes it equivalent to alloc_ordered_workqueue(). The later form is more explicit and can inherit future ordered_workqueue changes. In this commit we replace alloc_workqueue() with more readable alloc_ordered_workqueue(). Acked-by: Ying Xue Reviewed-by: Jon Maloy Signed-off-by: Parthasarathy Bhuvaragan --- net/tipc/server.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/net/tipc/server.c b/net/tipc/server.c index 922e04a43396..2446bfbaa309 100644 --- a/net/tipc/server.c +++ b/net/tipc/server.c @@ -571,13 +571,13 @@ static void tipc_work_stop(struct tipc_server *s) static int tipc_work_start(struct tipc_server *s) { - s->rcv_wq = alloc_workqueue("tipc_rcv", WQ_UNBOUND, 1); + s->rcv_wq = alloc_ordered_workqueue("tipc_rcv", 0); if (!s->rcv_wq) { pr_err("can't start tipc receive workqueue\n"); return -ENOMEM; } - s->send_wq = alloc_workqueue("tipc_send", WQ_UNBOUND, 1); + s->send_wq = alloc_ordered_workqueue("tipc_send", 0); if (!s->send_wq) { pr_err("can't start tipc send workqueue\n"); destroy_workqueue(s->rcv_wq); -- 2.1.4
[PATCH net-next v2 09/10] tipc: donot create timers if subscription timeout = TIPC_WAIT_FOREVER
Until now, we create timers even for the subscription requests with timeout = TIPC_WAIT_FOREVER. This can be improved by avoiding timer creation when the timeout is set to TIPC_WAIT_FOREVER. In this commit, we introduce a check to creates timers only when timeout != TIPC_WAIT_FOREVER. Acked-by: Ying Xue Reviewed-by: Jon Maloy Signed-off-by: Parthasarathy Bhuvaragan --- net/tipc/subscr.c | 14 +++--- 1 file changed, 11 insertions(+), 3 deletions(-) diff --git a/net/tipc/subscr.c b/net/tipc/subscr.c index 7d226ecb0490..22963cafd5ed 100644 --- a/net/tipc/subscr.c +++ b/net/tipc/subscr.c @@ -188,12 +188,14 @@ static struct tipc_subscriber *tipc_subscrb_create(int conid) static void tipc_subscrb_delete(struct tipc_subscriber *subscriber) { struct tipc_subscription *sub, *temp; + u32 timeout; spin_lock_bh(&subscriber->lock); /* Destroy any existing subscriptions for subscriber */ list_for_each_entry_safe(sub, temp, &subscriber->subscrp_list, subscrp_list) { - if (del_timer(&sub->timer)) { + timeout = htohl(sub->evt.s.timeout, sub->swap); + if ((timeout == TIPC_WAIT_FOREVER) || del_timer(&sub->timer)) { tipc_subscrp_delete(sub); tipc_subscrb_put(subscriber); } @@ -217,13 +219,16 @@ static void tipc_subscrp_cancel(struct tipc_subscr *s, struct tipc_subscriber *subscriber) { struct tipc_subscription *sub, *temp; + u32 timeout; spin_lock_bh(&subscriber->lock); /* Find first matching subscription, exit if not found */ list_for_each_entry_safe(sub, temp, &subscriber->subscrp_list, subscrp_list) { if (!memcmp(s, &sub->evt.s, sizeof(struct tipc_subscr))) { - if (del_timer(&sub->timer)) { + timeout = htohl(sub->evt.s.timeout, sub->swap); + if ((timeout == TIPC_WAIT_FOREVER) || + del_timer(&sub->timer)) { tipc_subscrp_delete(sub); tipc_subscrb_put(subscriber); } @@ -267,7 +272,6 @@ static struct tipc_subscription *tipc_subscrp_create(struct net *net, sub->swap = swap; memcpy(&sub->evt.s, s, sizeof(*s)); atomic_inc(&tn->subscription_count); - setup_timer(&sub->timer, tipc_subscrp_timeout, (unsigned long)sub); return sub; } @@ -290,6 +294,10 @@ static void tipc_subscrp_subscribe(struct net *net, struct tipc_subscr *s, spin_unlock_bh(&subscriber->lock); timeout = htohl(sub->evt.s.timeout, swap); + if (timeout == TIPC_WAIT_FOREVER) + return; + + setup_timer(&sub->timer, tipc_subscrp_timeout, (unsigned long)sub); mod_timer(&sub->timer, jiffies + msecs_to_jiffies(timeout)); } -- 2.1.4
[PATCH net-next v2 02/10] tipc: remove filter and timeout elements from struct tipc_subscription
Until now, struct tipc_subscription has duplicate timeout and filter attributes present: 1. directly as members of struct tipc_subscription 2. in struct tipc_subscr, which is contained in struct tipc_event In this commit, we remove the references to these elements as members of struct tipc_subscription and replace them with elements from struct tipc_subscr. Acked-by: Ying Xue Reviewed-by: Jon Maloy Signed-off-by: Parthasarathy Bhuvaragan --- net/tipc/subscr.c | 15 --- net/tipc/subscr.h | 5 - 2 files changed, 8 insertions(+), 12 deletions(-) diff --git a/net/tipc/subscr.c b/net/tipc/subscr.c index 991ac81b3920..fb8406573f30 100644 --- a/net/tipc/subscr.c +++ b/net/tipc/subscr.c @@ -110,7 +110,8 @@ void tipc_subscrp_report_overlap(struct tipc_subscription *sub, u32 found_lower, { if (!tipc_subscrp_check_overlap(sub, found_lower, found_upper)) return; - if (!must && !(sub->filter & TIPC_SUB_PORTS)) + if (!must && + !(htohl(sub->evt.s.filter, sub->swap) & TIPC_SUB_PORTS)) return; tipc_subscrp_send_event(sub, found_lower, found_upper, event, port_ref, @@ -222,6 +223,7 @@ static int tipc_subscrp_create(struct net *net, struct tipc_subscr *s, { struct tipc_net *tn = net_generic(net, tipc_net_id); struct tipc_subscription *sub; + u32 timeout, filter; int swap; /* Determine subscriber's endianness */ @@ -253,10 +255,8 @@ static int tipc_subscrp_create(struct net *net, struct tipc_subscr *s, sub->seq.type = htohl(s->seq.type, swap); sub->seq.lower = htohl(s->seq.lower, swap); sub->seq.upper = htohl(s->seq.upper, swap); - sub->timeout = msecs_to_jiffies(htohl(s->timeout, swap)); - sub->filter = htohl(s->filter, swap); - if ((!(sub->filter & TIPC_SUB_PORTS) == -!(sub->filter & TIPC_SUB_SERVICE)) || + filter = htohl(s->filter, swap); + if (((filter & TIPC_SUB_PORTS) && (filter & TIPC_SUB_SERVICE)) || (sub->seq.lower > sub->seq.upper)) { pr_warn("Subscription rejected, illegal request\n"); kfree(sub); @@ -265,13 +265,14 @@ static int tipc_subscrp_create(struct net *net, struct tipc_subscr *s, spin_lock_bh(&subscriber->lock); list_add(&sub->subscrp_list, &subscriber->subscrp_list); spin_unlock_bh(&subscriber->lock); + sub->subscriber = subscriber; sub->swap = swap; memcpy(&sub->evt.s, s, sizeof(*s)); atomic_inc(&tn->subscription_count); setup_timer(&sub->timer, tipc_subscrp_timeout, (unsigned long)sub); - sub->timeout += jiffies; - if (!mod_timer(&sub->timer, sub->timeout)) + timeout = htohl(sub->evt.s.timeout, swap); + if (!mod_timer(&sub->timer, jiffies + msecs_to_jiffies(timeout))) tipc_subscrb_get(subscriber); *sub_p = sub; return 0; diff --git a/net/tipc/subscr.h b/net/tipc/subscr.h index 92ee18cc5fe6..9e69dbf05626 100644 --- a/net/tipc/subscr.h +++ b/net/tipc/subscr.h @@ -50,12 +50,9 @@ struct tipc_subscriber; * @subscriber: pointer to its subscriber * @seq: name sequence associated with subscription * @net: point to network namespace - * @timeout: duration of subscription (in ms) - * @filter: event filtering to be done for subscription * @timer: timer governing subscription duration (optional) * @nameseq_list: adjacent subscriptions in name sequence's subscription list * @subscrp_list: adjacent subscriptions in subscriber's subscription list - * @server_ref: object reference of server port associated with subscription * @swap: indicates if subscriber uses opposite endianness in its messages * @evt: template for events generated by subscription */ @@ -63,8 +60,6 @@ struct tipc_subscription { struct tipc_subscriber *subscriber; struct tipc_name_seq seq; struct net *net; - unsigned long timeout; - u32 filter; struct timer_list timer; struct list_head nameseq_list; struct list_head subscrp_list; -- 2.1.4