[tipc-discussion] [PATCH net v1] tipc: fix hanging clients using poll with EPOLLOUT flag
commit 517d7c79bdb398 ("tipc: fix hanging poll() for stream sockets") introduced a regression for clients using non-blocking sockets. After the commit, we send EPOLLOUT event to the client even in TIPC_CONNECTING state. This causes the subsequent send() to fail with ENOTCONN, as the socket is still not in TIPC_ESTABLISHED state. In this commit, we: - improve the fix for hanging poll() by replacing sk_data_ready() with sk_state_change() to wake up all clients. - revert the faulty updates introduced by commit 517d7c79bdb398 ("tipc: fix hanging poll() for stream sockets"). Fixes: 517d7c79bdb398 ("tipc: fix hanging poll() for stream sockets") Signed-off-by: Parthasarathy Bhuvaragan Acked-by: Jon Maloy --- net/tipc/socket.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/net/tipc/socket.c b/net/tipc/socket.c index b542f14ed444..2851937f6e32 100644 --- a/net/tipc/socket.c +++ b/net/tipc/socket.c @@ -734,11 +734,11 @@ static __poll_t tipc_poll(struct file *file, struct socket *sock, switch (sk->sk_state) { case TIPC_ESTABLISHED: - case TIPC_CONNECTING: if (!tsk->cong_link_cnt && !tsk_conn_cong(tsk)) revents |= EPOLLOUT; /* fall through */ case TIPC_LISTEN: + case TIPC_CONNECTING: if (!skb_queue_empty(>sk_receive_queue)) revents |= EPOLLIN | EPOLLRDNORM; break; @@ -2041,7 +2041,7 @@ static bool tipc_sk_filter_connect(struct tipc_sock *tsk, struct sk_buff *skb) if (msg_data_sz(hdr)) return true; /* Empty ACK-, - wake up sleeping connect() and drop */ - sk->sk_data_ready(sk); + sk->sk_state_change(sk); msg_set_dest_droppable(hdr, 1); return false; } -- 2.21.0 ___ tipc-discussion mailing list tipc-discussion@lists.sourceforge.net https://lists.sourceforge.net/lists/listinfo/tipc-discussion
[tipc-discussion] [PATCH net v1] tipc: fix hanging clients using poll with EPOLLOUT flag
commit 517d7c79bdb398 ("tipc: fix hanging poll() for stream sockets") introduced a regression for clients using non-blocking sockets. After the commit, we send EPOLLOUT event to the client even in TIPC_CONNECTING state. This causes the subsequent send() to fail with ENOTCONN, as the socket is still not in TIPC_ESTABLISHED state. In this commit, we: - improve the fix for hanging poll() by replacing sk_data_ready() with sk_state_change() to wake up all clients. - revert the faulty updates introduced by commit 517d7c79bdb398 ("tipc: fix hanging poll() for stream sockets"). Fixes: 517d7c79bdb398 ("tipc: fix hanging poll() for stream sockets") Signed-off-by: Parthasarathy Bhuvaragan --- net/tipc/socket.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/net/tipc/socket.c b/net/tipc/socket.c index b542f14ed444..2851937f6e32 100644 --- a/net/tipc/socket.c +++ b/net/tipc/socket.c @@ -734,11 +734,11 @@ static __poll_t tipc_poll(struct file *file, struct socket *sock, switch (sk->sk_state) { case TIPC_ESTABLISHED: - case TIPC_CONNECTING: if (!tsk->cong_link_cnt && !tsk_conn_cong(tsk)) revents |= EPOLLOUT; /* fall through */ case TIPC_LISTEN: + case TIPC_CONNECTING: if (!skb_queue_empty(>sk_receive_queue)) revents |= EPOLLIN | EPOLLRDNORM; break; @@ -2041,7 +2041,7 @@ static bool tipc_sk_filter_connect(struct tipc_sock *tsk, struct sk_buff *skb) if (msg_data_sz(hdr)) return true; /* Empty ACK-, - wake up sleeping connect() and drop */ - sk->sk_data_ready(sk); + sk->sk_state_change(sk); msg_set_dest_droppable(hdr, 1); return false; } -- 2.21.0 ___ tipc-discussion mailing list tipc-discussion@lists.sourceforge.net https://lists.sourceforge.net/lists/listinfo/tipc-discussion
Re: [tipc-discussion] [net-next] tipc:add more debug printouts
Hi Hoang, In general I feel that you should avoild using pr_debug when the information belongs to per socket. For sockets use tracepoints ( https://lwn.net/Articles/379903/) or atleast the ratelimited variants. See my comments below. On Thu, Sep 27, 2018 at 11:04 AM Hoang Le wrote: > It is useful when using dynamic debug (dyndbg) to turn on/off > debug printouts. > In this commit, we provide as much as possible debug information > that we used to troubleshoot issues in our lab. > > Signed-off-by: Hoang Le > --- > net/tipc/bearer.c | 3 +++ > net/tipc/link.c | 68 ++- > net/tipc/link.h | 4 +++ > net/tipc/node.c | 14 -- > net/tipc/socket.c | 34 ++-- > 5 files changed, 100 insertions(+), 23 deletions(-) > > diff --git a/net/tipc/bearer.c b/net/tipc/bearer.c > index 91891041e5e1..1ad49f14c129 100644 > --- a/net/tipc/bearer.c > +++ b/net/tipc/bearer.c [partha] To get the module name prefixed in the print, you add the line below as the first line in the C file: #define pr_fmt(fmt) KBUILD_MODNAME ": " fmt > @@ -607,6 +607,9 @@ static int tipc_l2_device_event(struct notifier_block > *nb, unsigned long evt, > if (!b) > return NOTIFY_DONE; > > + pr_debug("Bearer %s got event %lu from dev %s, status %lu, > carrier:%d\n", > +b->name, evt, dev->name, b->up, netif_carrier_ok(dev)); > + > switch (evt) { > case NETDEV_CHANGE: > if (netif_carrier_ok(dev)) > diff --git a/net/tipc/link.c b/net/tipc/link.c > index b1f0bee54eac..25bed78816d0 100644 > --- a/net/tipc/link.c > +++ b/net/tipc/link.c > @@ -542,15 +542,16 @@ bool tipc_link_bc_create(struct net *net, u32 > ownnode, u32 peer, > int tipc_link_fsm_evt(struct tipc_link *l, int evt) > { > int rc = 0; > + int state = l->state; [partha] int old_state = l->state; > > - switch (l->state) { > + switch (state) { > case LINK_RESETTING: > switch (evt) { > case LINK_PEER_RESET_EVT: > - l->state = LINK_PEER_RESET; > + state = LINK_PEER_RESET; > break; > case LINK_RESET_EVT: > - l->state = LINK_RESET; > + state = LINK_RESET; > break; > case LINK_FAILURE_EVT: > case LINK_FAILOVER_BEGIN_EVT: > @@ -565,10 +566,10 @@ int tipc_link_fsm_evt(struct tipc_link *l, int evt) > case LINK_RESET: > switch (evt) { > case LINK_PEER_RESET_EVT: > - l->state = LINK_ESTABLISHING; > + state = LINK_ESTABLISHING; > break; > case LINK_FAILOVER_BEGIN_EVT: > - l->state = LINK_FAILINGOVER; > + state = LINK_FAILINGOVER; > case LINK_FAILURE_EVT: > case LINK_RESET_EVT: > case LINK_ESTABLISH_EVT: > @@ -583,7 +584,7 @@ int tipc_link_fsm_evt(struct tipc_link *l, int evt) > case LINK_PEER_RESET: > switch (evt) { > case LINK_RESET_EVT: > - l->state = LINK_ESTABLISHING; > + state = LINK_ESTABLISHING; > break; > case LINK_PEER_RESET_EVT: > case LINK_ESTABLISH_EVT: > @@ -600,7 +601,7 @@ int tipc_link_fsm_evt(struct tipc_link *l, int evt) > case LINK_FAILINGOVER: > switch (evt) { > case LINK_FAILOVER_END_EVT: > - l->state = LINK_RESET; > + state = LINK_RESET; > break; > case LINK_PEER_RESET_EVT: > case LINK_RESET_EVT: > @@ -617,13 +618,13 @@ int tipc_link_fsm_evt(struct tipc_link *l, int evt) > case LINK_ESTABLISHING: > switch (evt) { > case LINK_ESTABLISH_EVT: > - l->state = LINK_ESTABLISHED; > + state = LINK_ESTABLISHED; > break; > case LINK_FAILOVER_BEGIN_EVT: > - l->state = LINK_FAILINGOVER; > + state = LINK_FAILINGOVER; > break; > case LINK_RESET_EVT: > - l->state = LINK_RESET; > + state = LINK_RESET; > break; > case LINK_FAILURE_EVT: > case LINK_PEER_RESET_EVT: > @@ -638,21 +639,21 @@ int tipc_link_fsm_evt(struct tipc_link *l, int evt) > case LINK_ESTABLISHED: > switch (evt) { > case LINK_PEER_RESET_EVT: > - l->state = LINK_PEER_RESET; > + state = LINK_PEER_RESET; > rc |= TIPC_LINK_DOWN_EVT; >
[tipc-discussion] [PATCH net v1 1/1] tipc: fix hanging poll() for stream sockets
In commit 42b531de17d2f6 ("tipc: Fix missing connection request handling"), we replaced unconditional wakeup() with condtional wakeup for clients with flags POLLIN | POLLRDNORM | POLLRDBAND. This breaks the applications which do a connect followed by poll with POLLOUT flag. These applications are not woken when the connection is ESTABLISHED and hence sleep forever. In this commit, we fix it by including the POLLOUT event for sockets in TIPC_CONNECTING state. Fixes: 42b531de17d2f6 ("tipc: Fix missing connection request handling") Acked-by: Jon Maloy <jon.ma...@ericsson.com> Signed-off-by: Parthasarathy Bhuvaragan <parthasarathy.bhuvara...@gmail.com> --- net/tipc/socket.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/net/tipc/socket.c b/net/tipc/socket.c index 41127d0b925e..3b4084480377 100644 --- a/net/tipc/socket.c +++ b/net/tipc/socket.c @@ -727,11 +727,11 @@ static unsigned int tipc_poll(struct file *file, struct socket *sock, switch (sk->sk_state) { case TIPC_ESTABLISHED: + case TIPC_CONNECTING: if (!tsk->cong_link_cnt && !tsk_conn_cong(tsk)) revents |= POLLOUT; /* fall thru' */ case TIPC_LISTEN: - case TIPC_CONNECTING: if (!skb_queue_empty(>sk_receive_queue)) revents |= POLLIN | POLLRDNORM; break; -- 2.11.0 -- Check out the vibrant tech community on one of the world's most engaging tech sites, Slashdot.org! http://sdm.link/slashdot ___ tipc-discussion mailing list tipc-discussion@lists.sourceforge.net https://lists.sourceforge.net/lists/listinfo/tipc-discussion
[tipc-discussion] PATCH v1: 0001-tipc-fix-hanging-poll-for-stream-clients.patch
>From 2c23b6a88a134bd9df0acc998383a34aa1df6583 Mon Sep 17 00:00:00 2001 From: Parthasarathy Bhuvaragan <parthasarathy.bhuvara...@edgeware.tv> Date: Fri, 22 Dec 2017 15:43:23 +0100 Subject: [PATCH] tipc: fix hanging poll() for stream clients In commit 42b531de17d2f6 ("tipc: Fix missing connection request handling"), we replaced unconditional wakeup() with condtional wakeup for clients with flags POLLIN | POLLRDNORM | POLLRDBAND. This breaks the ABI for applications which do a connect followed by poll with POLLOUT flag. These applications are not woken when the connection is ESTABLISHED. In this commit, we fix it by including the POLLOUT event for sockets in TIPC_CONNECTING state. Fixes: 42b531de17d2f6 ("tipc: Fix missing connection request handling") Reported-by: pontus.skoldst...@ri.se Signed-off-by: Parthasarathy Bhuvaragan < parthasarathy.bhuvara...@edgeware.tv> --- net/tipc/socket.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/net/tipc/socket.c b/net/tipc/socket.c index 41127d0b925e..3b4084480377 100644 --- a/net/tipc/socket.c +++ b/net/tipc/socket.c @@ -727,11 +727,11 @@ static unsigned int tipc_poll(struct file *file, struct socket *sock, switch (sk->sk_state) { case TIPC_ESTABLISHED: + case TIPC_CONNECTING: if (!tsk->cong_link_cnt && !tsk_conn_cong(tsk)) revents |= POLLOUT; /* fall thru' */ case TIPC_LISTEN: - case TIPC_CONNECTING: if (!skb_queue_empty(>sk_receive_queue)) revents |= POLLIN | POLLRDNORM; break; -- 2.11.0 -- Check out the vibrant tech community on one of the world's most engaging tech sites, Slashdot.org! http://sdm.link/slashdot ___ tipc-discussion mailing list tipc-discussion@lists.sourceforge.net https://lists.sourceforge.net/lists/listinfo/tipc-discussion
Re: [tipc-discussion] FW: Kernel crash
Hi Rune, What kind of timeout do you use in your subscription? Is it 0 or low value (< 10ms) ?? Apart from the fixes Jon has suggested, we still have issues as having low timeouts are very racy. /Partha On Wed, Nov 29, 2017 at 3:05 PM, Rune Torgersenwrote: > (Resending as I think it got lost somewhere). > > A bug that I thought had been fixed is rearing its ugly head again in > latest Ubuntu 16.04 LTS kernel (4.4.0-97) > It is happening to me quite frequently (2-3 times a week). > > The application where this happens uses lots of short lived sockets, and > also lots of short-lived connections to the topology server. > > [151611.149711] BUG: unable to handle kernel NULL pointer dereference at > 0028 > [151611.149946] IP: [] tipc_nametbl_unsubscribe+0x72/0x100 > [tipc] > [151611.150069] PGD 0 > [151611.150104] Oops: 0002 [#1] SMP > [151611.150160] Modules linked in: tipc ip6_udp_tunnel udp_tunnel > intel_powerclamp coretemp kvm_intel gpio_ich input_leds joydev kvm > irqbypass i7core_edac edac_core serio_raw lpc_ich shpchp hpilo 8250_fintek > ipmi_ssif acpi_power_meter mac_hid lp parport ipmi_watchdog ipmi_si > ipmi_devintf ipmi_msghandler autofs4 raid10 raid456 async_raid6_recov > async_memcpy async_pq async_xor async_tx xor raid6_pq libcrc32c raid1 raid0 > multipath linear amdkfd amd_iommu_v2 radeon hid_generic i2c_algo_bit ttm > drm_kms_helper syscopyarea sysfillrect psmouse sysimgblt fb_sys_fops usbhid > drm hid pata_acpi bnx2 hpsa netxen_nic scsi_transport_sas fjes > [151611.151291] CPU: 1 PID: 14873 Comm: kworker/u64:2 Tainted: G > I 4.4.0-97-generic #120-Ubuntu > [151611.151429] Hardware name: HP ProLiant DL360 G6, BIOS P64 05/15/2010 > [151611.151547] Workqueue: tipc_rcv tipc_recv_work [tipc] > [151611.151631] task: 880213c98cc0 ti: 8802131b8000 task.ti: > 8802131b8000 > [151611.151740] RIP: 0010:[] [] > tipc_nametbl_unsubscribe+0x72/0x100 [tipc] > [151611.151889] RSP: 0018:88021f443e10 EFLAGS: 00010246 > [151611.151967] RAX: 880213d87f80 RBX: 880213d87f00 RCX: > 0020 > [151611.152071] RDX: 000e RSI: 0067 RDI: > 8802101a9638 > [151611.152176] RBP: 88021f443e30 R08: 88021f45a0c0 R09: > 880217003b00 > [151611.152280] R10: 8800da043f40 R11: 880213c98d20 R12: > 8802101a9600 > [151611.152385] R13: 8800d9fa9120 R14: 8802101a9638 R15: > 880213d87f00 > [151611.152490] FS: () GS:88021f44() > knlGS: > [151611.152631] CS: 0010 DS: ES: CR0: 8005003b > [151611.152730] CR2: 0028 CR3: 01e0a000 CR4: > 06e0 > [151611.152835] Stack: > [151611.152865] 880213d87f00 8800d9fa8000 880213c499c8 > c0468ed0 > [151611.152989] 88021f443e50 c04688bf 880213d87f00 > 880213c499c0 > [151611.153113] 88021f443e78 c0468f15 88021f44ddc0 > 880213d87f30 > [151611.153237] Call Trace: > [151611.153275] > [151611.153311] [] ? tipc_subscrb_shutdown_cb+0xc0/0xc0 > [tipc] > [151611.153422] [] tipc_subscrp_delete+0x2f/0x80 [tipc] > [151611.153523] [] tipc_subscrp_timeout+0x45/0x70 [tipc] > [151611.153624] [] call_timer_fn+0x35/0x120 > [151611.153735] [] ? tipc_subscrb_shutdown_cb+0xc0/0xc0 > [tipc] > [151611.153846] [] run_timer_softirq+0x23a/0x2f0 > [151611.153936] [] __do_softirq+0x101/0x290 > [151611.154017] [] irq_exit+0xa3/0xb0 > [151611.154091] [] smp_apic_timer_interrupt+0x42/0x50 > [151611.154185] [] apic_timer_interrupt+0x82/0x90 > [151611.154272] > [151611.154305] [] ? _raw_spin_unlock_irqrestore+ > 0x15/0x20 > [151611.154407] [] mod_timer+0x10f/0x240 > [151611.154489] [] tipc_subscrb_rcv_cb+0x1c0/0x390 > [tipc] > [151611.154591] [] tipc_receive_from_sock+0xc2/0x120 > [tipc] > [151611.154695] [] tipc_recv_work+0x2b/0x60 [tipc] > [151611.154809] [] process_one_work+0x165/0x480 > [151611.159008] [] worker_thread+0x4b/0x4c0 > [151611.163372] [] ? process_one_work+0x480/0x480 > [151611.167622] [] kthread+0xe5/0x100 > [151611.171755] [] ? kthread_create_on_node+0x1e0/0x1e0 > [151611.175858] [] ret_from_fork+0x3f/0x70 > [151611.17] [] ? kthread_create_on_node+0x1e0/0x1e0 > [151611.184004] Code: ff ff 48 85 c0 74 56 4c 8d 70 38 49 89 c4 4c 89 f7 > e8 43 92 3d c1 48 8b 8b 80 00 00 00 48 8b 93 88 00 00 00 48 8d 83 80 00 00 > 00 <48> 89 51 08 48 89 0a 48 89 83 80 00 00 00 48 89 83 88 00 00 00 > [151611.192678] RIP [] tipc_nametbl_unsubscribe+0x72/0x100 > [tipc] > [151611.196733] RSP > [151611.200739] CR2: 0028 > > > > -- > Check out the vibrant tech community on one of the world's most > engaging tech sites, Slashdot.org! http://sdm.link/slashdot > ___ > tipc-discussion mailing list > tipc-discussion@lists.sourceforge.net >
[tipc-discussion] [PATCH net v1 1/1] tipc: use only positive error codes in messages
In commit e3a77561e7d32 ("tipc: split up function tipc_msg_eval()"), we have updated the function tipc_msg_lookup_dest() to set the error codes to negative values at destination lookup failures. Thus when the function sets the error code to -TIPC_ERR_NO_NAME, its inserted into the 4 bit error field of the message header as 0xf instead of TIPC_ERR_NO_NAME (1). The value 0xf is an unknown error code. In this commit, we set only positive error code. Fixes: e3a77561e7d32 ("tipc: split up function tipc_msg_eval()") Signed-off-by: Parthasarathy Bhuvaragan <parthasarathy.bhuvara...@ericsson.com> --- net/tipc/msg.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/net/tipc/msg.c b/net/tipc/msg.c index 6ef379f004ac..121e59a1d0e7 100644 --- a/net/tipc/msg.c +++ b/net/tipc/msg.c @@ -551,7 +551,7 @@ bool tipc_msg_lookup_dest(struct net *net, struct sk_buff *skb, int *err) return false; if (msg_errcode(msg)) return false; - *err = -TIPC_ERR_NO_NAME; + *err = TIPC_ERR_NO_NAME; if (skb_linearize(skb)) return false; msg = buf_msg(skb); -- 2.1.4 -- Check out the vibrant tech community on one of the world's most engaging tech sites, Slashdot.org! http://sdm.link/slashdot ___ tipc-discussion mailing list tipc-discussion@lists.sourceforge.net https://lists.sourceforge.net/lists/listinfo/tipc-discussion
[tipc-discussion] [PATCH net v1 3/3] tipc: context imbalance at node read unlock
If we fail to find a valid bearer in tipc_node_get_linkname(), node_read_unlock() is called without holding the node read lock. This commit fixes this error. Signed-off-by: Parthasarathy Bhuvaragan <parthasarathy.bhuvara...@ericsson.com> --- net/tipc/node.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/net/tipc/node.c b/net/tipc/node.c index b113a52f8914..7dd22330a6b4 100644 --- a/net/tipc/node.c +++ b/net/tipc/node.c @@ -1126,8 +1126,8 @@ int tipc_node_get_linkname(struct net *net, u32 bearer_id, u32 addr, strncpy(linkname, tipc_link_name(link), len); err = 0; } -exit: tipc_node_read_unlock(node); +exit: tipc_node_put(node); return err; } -- 2.1.4 -- Check out the vibrant tech community on one of the world's most engaging tech sites, Slashdot.org! http://sdm.link/slashdot ___ tipc-discussion mailing list tipc-discussion@lists.sourceforge.net https://lists.sourceforge.net/lists/listinfo/tipc-discussion
[tipc-discussion] [PATCH net v1 2/3] tipc: reassign pointers after skb reallocation / linearization
In tipc_msg_reverse(), we assign skb attributes to local pointers in stack at startup. This is followed by skb_linearize() and for cloned buffers we perform skb relocation using pskb_expand_head(). Both these methods may update the skb attributes and thus making the pointers incorrect. In this commit, we fix this error by ensuring that the pointers are re-assigned after any of these skb operations. Fixes: 29042e19f2c60 ("tipc: let function tipc_msg_reverse() expand header when needed") Signed-off-by: Parthasarathy Bhuvaragan <parthasarathy.bhuvara...@ericsson.com> Reviewed-by: Jon Maloy <jon.ma...@ericsson.com> --- net/tipc/msg.c | 7 +-- 1 file changed, 5 insertions(+), 2 deletions(-) diff --git a/net/tipc/msg.c b/net/tipc/msg.c index dcd90e6fa7c3..6ef379f004ac 100644 --- a/net/tipc/msg.c +++ b/net/tipc/msg.c @@ -479,13 +479,14 @@ bool tipc_msg_make_bundle(struct sk_buff **skb, struct tipc_msg *msg, bool tipc_msg_reverse(u32 own_node, struct sk_buff **skb, int err) { struct sk_buff *_skb = *skb; - struct tipc_msg *hdr = buf_msg(_skb); + struct tipc_msg *hdr; struct tipc_msg ohdr; - int dlen = min_t(uint, msg_data_sz(hdr), MAX_FORWARD_SIZE); + int dlen; if (skb_linearize(_skb)) goto exit; hdr = buf_msg(_skb); + dlen = min_t(uint, msg_data_sz(hdr), MAX_FORWARD_SIZE); if (msg_dest_droppable(hdr)) goto exit; if (msg_errcode(hdr)) @@ -511,6 +512,8 @@ bool tipc_msg_reverse(u32 own_node, struct sk_buff **skb, int err) pskb_expand_head(_skb, BUF_HEADROOM, BUF_TAILROOM, GFP_ATOMIC)) goto exit; + /* reassign after skb header modifications */ + hdr = buf_msg(_skb); /* Now reverse the concerned fields */ msg_set_errcode(hdr, err); msg_set_non_seq(hdr, 0); -- 2.1.4 -- Check out the vibrant tech community on one of the world's most engaging tech sites, Slashdot.org! http://sdm.link/slashdot ___ tipc-discussion mailing list tipc-discussion@lists.sourceforge.net https://lists.sourceforge.net/lists/listinfo/tipc-discussion
[tipc-discussion] [PATCH net v1 1/3] tipc: perform skb_linearize() before parsing the inner header
In tipc_rcv(), we linearize only the header and usually the packets are consumed as the nodes permit direct reception. However, if the skb contains tunnelled message due to fail over or synchronization we parse it in tipc_node_check_state() without performing linearization. This will cause link disturbances if the skb was non linear. In this commit, we perform linearization for the above messages. Signed-off-by: Parthasarathy Bhuvaragan <parthasarathy.bhuvara...@ericsson.com> Reviewed-by: Jon Maloy <jon.ma...@ericsson.com> --- net/tipc/node.c | 2 ++ 1 file changed, 2 insertions(+) diff --git a/net/tipc/node.c b/net/tipc/node.c index 9b4dcb6a16b5..b113a52f8914 100644 --- a/net/tipc/node.c +++ b/net/tipc/node.c @@ -1557,6 +1557,8 @@ void tipc_rcv(struct net *net, struct sk_buff *skb, struct tipc_bearer *b) /* Check/update node state before receiving */ if (unlikely(skb)) { + if (unlikely(skb_linearize(skb))) + goto discard; tipc_node_write_lock(n); if (tipc_node_check_state(n, skb, bearer_id, )) { if (le->link) { -- 2.1.4 -- Check out the vibrant tech community on one of the world's most engaging tech sites, Slashdot.org! http://sdm.link/slashdot ___ tipc-discussion mailing list tipc-discussion@lists.sourceforge.net https://lists.sourceforge.net/lists/listinfo/tipc-discussion
[tipc-discussion] [PATCH net v1 0/3] tipc: buffer reassignment fixes
This series contains fixes for buffer reassignments and a context imbalance. Parthasarathy Bhuvaragan (3): tipc: perform skb_linearize() before parsing the inner header tipc: reassign pointers after skb reallocation / linearization tipc: context imbalance at node read unlock net/tipc/msg.c | 7 +-- net/tipc/node.c | 4 +++- 2 files changed, 8 insertions(+), 3 deletions(-) -- 2.1.4 -- Check out the vibrant tech community on one of the world's most engaging tech sites, Slashdot.org! http://sdm.link/slashdot ___ tipc-discussion mailing list tipc-discussion@lists.sourceforge.net https://lists.sourceforge.net/lists/listinfo/tipc-discussion
Re: [tipc-discussion] Add tipc-link-watcher
Hi Akbar, Thanks for the patch. Its merged now. https://sourceforge.net/p/tipc/tipcutils/ci/88ffe493ae72d1c16fa5bdb36ed92d39a05f9fd2/ /Partha On 06/15/2017 12:22 PM, Akbar Rezvan wrote: Hi Ying, Sorry for the delay. Here it is (Pls see below): Ciao//Anthony From 11155e72babf481b526e3aaf1d9427c41d718dd5 Mon Sep 17 00:00:00 2001 From: Akbar RezvanpourDate: Thu, 1 Jun 2017 17:58:57 +0200 Subject: [PATCH] Introducing tipc-link-watcher In this commit we add tipc-link-watcher which monitors the tipc link status and reports in foreground mode as shown below. Sample output of the program when invoked by following options: ./tipc-link-watcher -d -p 2 -s 2 1.1.2:data1-1.1.1:data1: link reset:0 rx_packets:0 tx_packets:0 rx_loss:0 tx_loss:0 Working :) 1.1.2:data1-1.1.3:data1: link reset:0 rx_packets:0 tx_packets:0 rx_loss:0 tx_loss:0 Working :) 1.1.2:data1-1.1.4:data1: link reset:0 rx_packets:0 tx_packets:0 rx_loss:0 tx_loss:0 Working :) 1.1.2:data0-1.1.1:data0: link reset:4 rx_packets:0 tx_packets:0 rx_loss:0 tx_loss:0 Working :) 1.1.2:data0-1.1.4:data0: link reset:5 rx_packets:0 tx_packets:0 rx_loss:0 tx_loss:0 Working :) 1.1.2:data0-1.1.3:data0: link reset:5 rx_packets:0 tx_packets:0 rx_loss:0 tx_loss:0 Working :) 1.1.2:data0-1.1.1:data1: link reset:0 rx_packets:0 tx_packets:0 rx_loss:0 tx_loss:0 Working :) 1.1.2:data0-1.1.3:data1: link reset:0 rx_packets:0 tx_packets:0 rx_loss:0 tx_loss:0 Working :) The user is always able to dump the tipc link statistics. Further information is available in the README. Signed-off-by: Akbar Rezvanpour --- .gitignore|1 + Makefile.am |2 +- configure.ac |1 + tipc-link-watcher/Makefile.am |4 + tipc-link-watcher/README | 233 tipc-link-watcher/tipc-link-watcher.c | 2143 + 6 files changed, 2383 insertions(+), 1 deletion(-) create mode 100644 tipc-link-watcher/Makefile.am create mode 100644 tipc-link-watcher/README create mode 100644 tipc-link-watcher/tipc-link-watcher.c diff --git a/.gitignore b/.gitignore index f544ddfceff9..0ff143dea8ec 100644 --- a/.gitignore +++ b/.gitignore @@ -48,3 +48,4 @@ tipc-pipe/tipc-pipe tipclog/tipclog multicast_blast/mcast_tipc multicast_blast/group_cast +tipc-link-watcher/tipc-link-watcher diff --git a/Makefile.am b/Makefile.am index 0643d78e03b5..5f0d5e979b15 100644 --- a/Makefile.am +++ b/Makefile.am @@ -1,4 +1,4 @@ SUBDIRS=tipc-pipe ptts demos man scripts if TIPC_LINK_STATE_SUBSCRITION -SUBDIRS+=tipclog multicast_blast +SUBDIRS+=tipclog multicast_blast tipc-link-watcher endif diff --git a/configure.ac b/configure.ac index 0602878bada7..b466d56cc522 100644 --- a/configure.ac +++ b/configure.ac @@ -40,6 +40,7 @@ AC_CONFIG_FILES([ man/Makefile scripts/Makefile tipclog/Makefile + tipc-link-watcher/Makefile ]) AM_CONDITIONAL(WITH_SCRIPTS, false) AC_ARG_ENABLE(scripts, diff --git a/tipc-link-watcher/Makefile.am b/tipc-link-watcher/Makefile.am new file mode 100644 index ..f8484d07ff45 --- /dev/null +++ b/tipc-link-watcher/Makefile.am @@ -0,0 +1,4 @@ +sbin_PROGRAMS=tipc-link-watcher +tipc_link_watcher_SOURCES=tipc-link-watcher.c +tipc_link_watcher_CFLAGS=-Werror $(shell $(PKG_CONFIG) libmnl --cflags) +tipc_link_watcher_LDFLAGS=$(shell $(PKG_CONFIG) libmnl --libs) diff --git a/tipc-link-watcher/README b/tipc-link-watcher/README new file mode 100644 index ..5c8a7b901b80 --- /dev/null +++ b/tipc-link-watcher/README @@ -0,0 +1,233 @@ +#The following commits are pre-requisite for this feature. +1. https://git.kernel.org/pub/scm/linux/kernel/git/torvalds/linux.git/commit/?id=78acb1f9b898e85fa2c1e28e700b54b66b288e8d +2. https://lwn.net/Articles/611994/ +#The former commit is implemented in kernel v3.16 and the later in v3.19. So this tool will work only from v3.19. + +#Install the shared library libmnl-dev on your build environment. + +=== To Build tipc-link-watcher === + +#To build the daemon (SuSE environment): + +#First install libmnl on your build host: (libmnl-1.0.4) + +"zypper install libmnl-devel" + +#and run: + +"gcc -o tipc-link-watcher $(pkg-config --libs --cflags libmnl) *.c" + +=== 2Do === +- Possible bug in current code or TIPC regarding values of rx_packets and tx_packets. + When a link is reset, rx_packets and tx_packets contain rubbish values until the link is reestablished. + +=== Couple of notes === + +-Option "-x" and "-a" should not be used at the same time. + +-One shot (-x): +You will get one single "layer 1" sample when you run with option "-x".
[tipc-discussion] [PATCH net v1 1/1] tipc: context imbalance at node read unlock
If we fail to find a valid bearer in tipc_node_get_linkname(), node_read_unlock() is called without holding the node read lock. This commit fixes this error. Signed-off-by: Parthasarathy Bhuvaragan <parthasarathy.bhuvara...@ericsson.com> --- net/tipc/node.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/net/tipc/node.c b/net/tipc/node.c index 9b4dcb6a16b5..64972736bf43 100644 --- a/net/tipc/node.c +++ b/net/tipc/node.c @@ -1126,8 +1126,8 @@ int tipc_node_get_linkname(struct net *net, u32 bearer_id, u32 addr, strncpy(linkname, tipc_link_name(link), len); err = 0; } -exit: tipc_node_read_unlock(node); +exit: tipc_node_put(node); return err; } -- 2.1.4 -- Check out the vibrant tech community on one of the world's most engaging tech sites, Slashdot.org! http://sdm.link/slashdot ___ tipc-discussion mailing list tipc-discussion@lists.sourceforge.net https://lists.sourceforge.net/lists/listinfo/tipc-discussion
[tipc-discussion] [PATCH net-next v1] tipc: permit bond slave as bearer
For a bond slave device as a tipc bearer, the dev represents the bond interface and orig_dev represents the slave in tipc_l2_rcv_msg(). Since we decode the tipc_ptr from bonding device (dev), we fail to find the bearer and thus tipc links are not established. In this commit, we register the tipc protocol callback per device and look for tipc bearer from both the devices. Signed-off-by: Parthasarathy Bhuvaragan <parthasarathy.bhuvara...@ericsson.com> --- net/tipc/bearer.c | 26 +++--- net/tipc/bearer.h | 2 ++ 2 files changed, 13 insertions(+), 15 deletions(-) diff --git a/net/tipc/bearer.c b/net/tipc/bearer.c index 767e0537dde5..89cd061c4468 100644 --- a/net/tipc/bearer.c +++ b/net/tipc/bearer.c @@ -65,6 +65,8 @@ static struct tipc_bearer *bearer_get(struct net *net, int bearer_id) } static void bearer_disable(struct net *net, struct tipc_bearer *b); +static int tipc_l2_rcv_msg(struct sk_buff *skb, struct net_device *dev, + struct packet_type *pt, struct net_device *orig_dev); /** * tipc_media_find - locates specified media object by name @@ -428,6 +430,10 @@ int tipc_enable_l2_media(struct net *net, struct tipc_bearer *b, /* Associate TIPC bearer with L2 bearer */ rcu_assign_pointer(b->media_ptr, dev); + b->pt.dev = dev; + b->pt.type = htons(ETH_P_TIPC); + b->pt.func = tipc_l2_rcv_msg; + dev_add_pack(>pt); memset(>bcast_addr, 0, sizeof(b->bcast_addr)); memcpy(b->bcast_addr.value, dev->broadcast, b->media->hwaddr_len); b->bcast_addr.media_id = b->media->type_id; @@ -447,6 +453,7 @@ void tipc_disable_l2_media(struct tipc_bearer *b) struct net_device *dev; dev = (struct net_device *)rtnl_dereference(b->media_ptr); + dev_remove_pack(>pt); RCU_INIT_POINTER(dev->tipc_ptr, NULL); synchronize_net(); dev_put(dev); @@ -594,11 +601,12 @@ static int tipc_l2_rcv_msg(struct sk_buff *skb, struct net_device *dev, struct tipc_bearer *b; rcu_read_lock(); - b = rcu_dereference_rtnl(dev->tipc_ptr); + b = rcu_dereference_rtnl(dev->tipc_ptr) ?: + rcu_dereference_rtnl(orig_dev->tipc_ptr); if (likely(b && test_bit(0, >up) && (skb->pkt_type <= PACKET_MULTICAST))) { skb->next = NULL; - tipc_rcv(dev_net(dev), skb, b); + tipc_rcv(dev_net(b->pt.dev), skb, b); rcu_read_unlock(); return NET_RX_SUCCESS; } @@ -659,11 +667,6 @@ static int tipc_l2_device_event(struct notifier_block *nb, unsigned long evt, return NOTIFY_OK; } -static struct packet_type tipc_packet_type __read_mostly = { - .type = htons(ETH_P_TIPC), - .func = tipc_l2_rcv_msg, -}; - static struct notifier_block notifier = { .notifier_call = tipc_l2_device_event, .priority = 0, @@ -671,19 +674,12 @@ static struct notifier_block notifier = { int tipc_bearer_setup(void) { - int err; - - err = register_netdevice_notifier(); - if (err) - return err; - dev_add_pack(_packet_type); - return 0; + return register_netdevice_notifier(); } void tipc_bearer_cleanup(void) { unregister_netdevice_notifier(); - dev_remove_pack(_packet_type); } void tipc_bearer_stop(struct net *net) diff --git a/net/tipc/bearer.h b/net/tipc/bearer.h index 635c9086e19a..e07a55a80c18 100644 --- a/net/tipc/bearer.h +++ b/net/tipc/bearer.h @@ -131,6 +131,7 @@ struct tipc_media { * @name: bearer name (format = media:interface) * @media: ptr to media structure associated with bearer * @bcast_addr: media address used in broadcasting + * @pt: packet type for bearer * @rcu: rcu struct for tipc_bearer * @priority: default link priority for bearer * @window: default window size for bearer @@ -151,6 +152,7 @@ struct tipc_bearer { char name[TIPC_MAX_BEARER_NAME]; struct tipc_media *media; struct tipc_media_addr bcast_addr; + struct packet_type pt; struct rcu_head rcu; u32 priority; u32 window; -- 2.1.4 -- Check out the vibrant tech community on one of the world's most engaging tech sites, Slashdot.org! http://sdm.link/slashdot ___ tipc-discussion mailing list tipc-discussion@lists.sourceforge.net https://lists.sourceforge.net/lists/listinfo/tipc-discussion
[tipc-discussion] [PATCH net v1 2/2] tipc: fix a race condition of releasing subscriber object
From: Ying Xue <ying@windriver.com> No matter whether a request is inserted into workqueue as a work item to cancel a subscription or to delete a subscription's subscriber asynchronously, the work items may be executed in different workers. As a result, it doesn't mean that one request which is raised prior to another request is definitely handled before the latter. By contrast, if the latter request is executed before the former request, below error may happen: [ 656.183644] BUG: spinlock bad magic on CPU#0, kworker/u8:0/12117 [ 656.184487] general protection fault: [#1] SMP [ 656.185160] Modules linked in: tipc ip6_udp_tunnel udp_tunnel 9pnet_virtio 9p 9pnet virtio_net virtio_pci virtio_ring virtio [last unloaded: ip6_udp_tunnel] [ 656.187003] CPU: 0 PID: 12117 Comm: kworker/u8:0 Not tainted 4.11.0-rc7+ #6 [ 656.187920] Hardware name: Bochs Bochs, BIOS Bochs 01/01/2011 [ 656.188690] Workqueue: tipc_rcv tipc_recv_work [tipc] [ 656.189371] task: 88003f5cec40 task.stack: c90004448000 [ 656.190157] RIP: 0010:spin_bug+0xdd/0xf0 [ 656.190678] RSP: 0018:c9000444bcb8 EFLAGS: 00010202 [ 656.191375] RAX: 0034 RBX: 88003f8d1388 RCX: [ 656.192321] RDX: 88003ba13708 RSI: 88003ba0cd08 RDI: 88003ba0cd08 [ 656.193265] RBP: c9000444bcd0 R08: 0030 R09: 6b6b6b6b [ 656.194208] R10: 8800bde3e000 R11: 01b4 R12: 6b6b6b6b6b6b6b6b [ 656.195157] R13: 81a3ca64 R14: 88003f8d1388 R15: 88003f8d13a0 [ 656.196101] FS: () GS:88003ba0() knlGS: [ 656.197172] CS: 0010 DS: ES: CR0: 80050033 [ 656.197935] CR2: 7f0b3d2e6000 CR3: 3ef9e000 CR4: 06f0 [ 656.198873] Call Trace: [ 656.199210] do_raw_spin_lock+0x66/0xa0 [ 656.199735] _raw_spin_lock_bh+0x19/0x20 [ 656.200258] tipc_subscrb_subscrp_delete+0x28/0xf0 [tipc] [ 656.200990] tipc_subscrb_rcv_cb+0x45/0x260 [tipc] [ 656.201632] tipc_receive_from_sock+0xaf/0x100 [tipc] [ 656.202299] tipc_recv_work+0x2b/0x60 [tipc] [ 656.202872] process_one_work+0x157/0x420 [ 656.203404] worker_thread+0x69/0x4c0 [ 656.203898] kthread+0x138/0x170 [ 656.204328] ? process_one_work+0x420/0x420 [ 656.204889] ? kthread_create_on_node+0x40/0x40 [ 656.205527] ret_from_fork+0x29/0x40 [ 656.206012] Code: 48 8b 0c 25 00 c5 00 00 48 c7 c7 f0 24 a3 81 48 81 c1 f0 05 00 00 65 8b 15 61 ef f5 7e e8 9a 4c 09 00 4d 85 e4 44 8b 4b 08 74 92 <45> 8b 84 24 40 04 00 00 49 8d 8c 24 f0 05 00 00 eb 8d 90 0f 1f [ 656.208504] RIP: spin_bug+0xdd/0xf0 RSP: c9000444bcb8 [ 656.209798] ---[ end trace e2a800e6eb0770be ]--- In above scenario, the request of deleting subscriber was performed earlier than the request of canceling a subscription although the latter was issued before the former, which means tipc_subscrb_delete() was called before tipc_subscrp_cancel(). As a result, when tipc_subscrb_subscrp_delete() called by tipc_subscrp_cancel() was executed to cancel a subscription, the subscription's subscriber refcnt had been decreased to 1. After tipc_subscrp_delete() where the subscriber was freed because its refcnt was decremented to zero, but the subscriber's lock had to be released, as a consequence, panic happened. By contrast, if we increase subscriber's refcnt before tipc_subscrb_subscrp_delete() is called in tipc_subscrp_cancel(), the panic issue can be avoided. Fixes: d094c4d5f5c7 ("tipc: add subscription refcount to avoid invalid delete") Reported-by: Parthasarathy Bhuvaragan <parthasarathy.bhuvara...@ericsson.com> Signed-off-by: Ying Xue <ying@windriver.com> --- net/tipc/subscr.c | 2 ++ 1 file changed, 2 insertions(+) diff --git a/net/tipc/subscr.c b/net/tipc/subscr.c index f2c81f42dfda..be3d9e3183dc 100644 --- a/net/tipc/subscr.c +++ b/net/tipc/subscr.c @@ -242,7 +242,9 @@ static void tipc_subscrb_delete(struct tipc_subscriber *subscriber) static void tipc_subscrp_cancel(struct tipc_subscr *s, struct tipc_subscriber *subscriber) { + tipc_subscrb_get(subscriber); tipc_subscrb_subscrp_delete(subscriber, s); + tipc_subscrb_put(subscriber); } static struct tipc_subscription *tipc_subscrp_create(struct net *net, -- 2.1.4 -- Check out the vibrant tech community on one of the world's most engaging tech sites, Slashdot.org! http://sdm.link/slashdot ___ tipc-discussion mailing list tipc-discussion@lists.sourceforge.net https://lists.sourceforge.net/lists/listinfo/tipc-discussion
[tipc-discussion] [PATCH net v1 1/2] tipc: remove subscription references only for pending timers
In commit, 139bb36f754a ("tipc: advance the time of deleting subscription from subscriber->subscrp_list"), we delete the subscription from the subscribers list and from nametable unconditionally. This leads to the following bug if the timer running tipc_subscrp_timeout() in another CPU accesses the subscription list after the subscription delete request. [39.570] general protection fault: [#1] SMP :: [39.574] task: 81c10540 task.stack: 81c0 [39.575] RIP: 0010:tipc_subscrp_timeout+0x32/0x80 [tipc] [39.576] RSP: 0018:88003ba03e90 EFLAGS: 00010282 [39.576] RAX: dead0200 RBX: 88003f0f3600 RCX: 0101 [39.577] RDX: dead0100 RSI: 0201 RDI: 88003f0d7948 [39.578] RBP: 88003ba03ea0 R08: 0001 R09: 88003ba03ef8 [39.579] R10: 014f R11: R12: 88003f0d7948 [39.580] R13: 88003f0f3618 R14: a006c250 R15: 88003f0f3600 [39.581] FS: () GS:88003ba0() knlGS: [39.582] CS: 0010 DS: ES: CR0: 80050033 [39.583] CR2: 7f831c6e0714 CR3: 3d3b CR4: 06f0 [39.584] Call Trace: [39.584] [39.585] call_timer_fn+0x3d/0x180 [39.585] ? tipc_subscrb_rcv_cb+0x260/0x260 [tipc] [39.586] run_timer_softirq+0x168/0x1f0 [39.586] ? sched_clock_cpu+0x16/0xc0 [39.587] __do_softirq+0x9b/0x2de [39.587] irq_exit+0x60/0x70 [39.588] smp_apic_timer_interrupt+0x3d/0x50 [39.588] apic_timer_interrupt+0x86/0x90 [39.589] RIP: 0010:default_idle+0x20/0xf0 [39.589] RSP: 0018:81c03e58 EFLAGS: 0246 ORIG_RAX: ff10 [39.590] RAX: RBX: 81c10540 RCX: [39.591] RDX: RSI: RDI: [39.592] RBP: 81c03e68 R08: R09: [39.593] R10: c90001cbbe00 R11: R12: [39.594] R13: 81c10540 R14: R15: [39.595] :: [39.603] RIP: tipc_subscrp_timeout+0x32/0x80 [tipc] RSP: 88003ba03e90 [39.604] ---[ end trace 79ce94b7216cb459 ]--- Fixes: 139bb36f754a ("tipc: advance the time of deleting subscription from subscriber->subscrp_list") Signed-off-by: Parthasarathy Bhuvaragan <parthasarathy.bhuvara...@ericsson.com> --- net/tipc/subscr.c | 19 +++ 1 file changed, 7 insertions(+), 12 deletions(-) diff --git a/net/tipc/subscr.c b/net/tipc/subscr.c index 0bf91cd3733c..f2c81f42dfda 100644 --- a/net/tipc/subscr.c +++ b/net/tipc/subscr.c @@ -52,7 +52,6 @@ struct tipc_subscriber { struct list_head subscrp_list; }; -static void tipc_subscrp_delete(struct tipc_subscription *sub); static void tipc_subscrb_put(struct tipc_subscriber *subscriber); /** @@ -197,15 +196,19 @@ static void tipc_subscrb_subscrp_delete(struct tipc_subscriber *subscriber, { struct list_head *subscription_list = >subscrp_list; struct tipc_subscription *sub, *temp; + u32 timeout; spin_lock_bh(>lock); list_for_each_entry_safe(sub, temp, subscription_list, subscrp_list) { if (s && memcmp(s, >evt.s, sizeof(struct tipc_subscr))) continue; - tipc_nametbl_unsubscribe(sub); - list_del(>subscrp_list); - tipc_subscrp_delete(sub); + timeout = htohl(sub->evt.s.timeout, sub->swap); + if (timeout == TIPC_WAIT_FOREVER || del_timer(>timer)) { + tipc_nametbl_unsubscribe(sub); + list_del(>subscrp_list); + tipc_subscrp_put(sub); + } if (s) break; @@ -236,14 +239,6 @@ static void tipc_subscrb_delete(struct tipc_subscriber *subscriber) tipc_subscrb_put(subscriber); } -static void tipc_subscrp_delete(struct tipc_subscription *sub) -{ - u32 timeout = htohl(sub->evt.s.timeout, sub->swap); - - if (timeout == TIPC_WAIT_FOREVER || del_timer(>timer)) - tipc_subscrp_put(sub); -} - static void tipc_subscrp_cancel(struct tipc_subscr *s, struct tipc_subscriber *subscriber) { -- 2.1.4 -- Check out the vibrant tech community on one of the world's most engaging tech sites, Slashdot.org! http://sdm.link/slashdot ___ tipc-discussion mailing list tipc-discussion@lists.sourceforge.net https://lists.sourceforge.net/lists/listinfo/tipc-discussion
[tipc-discussion] [PATCH net v1 0/2] tipc: topology server fixes
The following commits fixes two race conditions causing general protection faults. Parthasarathy Bhuvaragan (1): tipc: remove subscription references only for pending timers Ying Xue (1): tipc: fix a race condition of releasing subscriber object net/tipc/subscr.c | 21 + 1 file changed, 9 insertions(+), 12 deletions(-) -- 2.1.4 -- Check out the vibrant tech community on one of the world's most engaging tech sites, Slashdot.org! http://sdm.link/slashdot ___ tipc-discussion mailing list tipc-discussion@lists.sourceforge.net https://lists.sourceforge.net/lists/listinfo/tipc-discussion
Re: [tipc-discussion] [net-next v2 1/1] tipc: don't reset stale broadcast send link
On 08/14/2017 06:36 PM, Jon Maloy wrote: Ying, Partha, Do you have any viewpoints on this one? I would like to get this into net-next asap. FYI, this was originally an attempt to mitigate the consequences of a stale broadcast link before we found the root cause of the problem. (Which is fixed in a patch I just sent to netdev.) Strictly spoken we could do without it after the root cause was found, but I still think it is the right thing to do. I completely agree with you. I need a clarification on your statement "This elminates any risk that the broadcast link may be declared stale within the worst-case failure dectection time of an unresponsive peer." With this patch we will increase the worst case detection from 1 second to 250 * 10 (TIPC_BC_RETR_LIMIT) => 2.5 seconds. What do you mean by failure dectection time of an unresponsive peer (link tolerance ?) ? For the fix, you may add Reviewed-by: Parthasarathy Bhuvaragan <parthasarathy.bhuvara...@ericsson.com>. /Partha ///jon -Original Message- From: Jon Maloy Sent: Wednesday, August 09, 2017 14:22 To: Jon Maloy <jon.ma...@ericsson.com>; Jon Maloy <ma...@donjonn.com> Cc: Parthasarathy Bhuvaragan <parthasarathy.bhuvara...@ericsson.com>; Ying Xue <ying@windriver.com>; tipc-discussion@lists.sourceforge.net Subject: [net-next v2 1/1] tipc: don't reset stale broadcast send link When the broadcast send link after 100 attempts has failed to transfer a packet to all peers, we consider it stale, and reset it. Thereafter it needs to re-synchronize with the peers, something currently done by just resetting and re-establishing all links to all peers. This has turned out to be overkill, with potentially unwanted consequences for the remaining cluster. A closer analysis reveals that this can be done much simpler. When this kind of failure happens, for reasons that may lie outside the TIPC protocol, it is typically only one peer which is failing to receive and acknowledge packets. It is hence sufficient to identify and reset the links only to that peer to resolve the situation, without having to reset the broadcast link at all. This solution entails a much lower risk of negative consequences for the own node as well as for the overall cluster. At the same time, we expand the limit for the link stale counter from 100 to 250. This elminates any risk that the broadcast link may be declared stale within the worst-case failure dectection time of an unresponsive peer. We implement this change in this commit. Signed-off-by: Jon Maloy <jon.ma...@ericsson.com> --- net/tipc/bearer.c | 24 net/tipc/bearer.h | 1 - net/tipc/link.c | 23 +-- net/tipc/node.c | 14 -- 4 files changed, 17 insertions(+), 45 deletions(-) diff --git a/net/tipc/bearer.c b/net/tipc/bearer.c index d174ee3..7daf9f1 100644 --- a/net/tipc/bearer.c +++ b/net/tipc/bearer.c @@ -365,30 +365,6 @@ static int tipc_reset_bearer(struct net *net, struct tipc_bearer *b) return 0; } -/* tipc_bearer_reset_all - reset all links on all bearers - */ -void tipc_bearer_reset_all(struct net *net) -{ - struct tipc_bearer *b; - int i; - - for (i = 0; i < MAX_BEARERS; i++) { - b = bearer_get(net, i); - if (b) - clear_bit_unlock(0, >up); - } - for (i = 0; i < MAX_BEARERS; i++) { - b = bearer_get(net, i); - if (b) - tipc_reset_bearer(net, b); - } - for (i = 0; i < MAX_BEARERS; i++) { - b = bearer_get(net, i); - if (b) - test_and_set_bit_lock(0, >up); - } -} - /** * bearer_disable * diff --git a/net/tipc/bearer.h b/net/tipc/bearer.h index 635c908..865cb09 100644 --- a/net/tipc/bearer.h +++ b/net/tipc/bearer.h @@ -210,7 +210,6 @@ void tipc_bearer_remove_dest(struct net *net, u32 bearer_id, u32 dest); struct tipc_bearer *tipc_bearer_find(struct net *net, const char *name); int tipc_bearer_get_name(struct net *net, char *name, u32 bearer_id); struct tipc_media *tipc_media_find(const char *name); - void tipc_bearer_reset_all(struct net *net); int tipc_bearer_setup(void); void tipc_bearer_cleanup(void); void tipc_bearer_stop(struct net *net); diff --git a/net/tipc/link.c b/net/tipc/link.c index 60820dc..99b6fee 100644 --- a/net/tipc/link.c +++ b/net/tipc/link.c @@ -978,15 +978,15 @@ static void link_retransmit_failure(struct tipc_link *l, struct sk_buff *skb) struct tipc_msg *hdr = buf_msg(skb); pr_warn("Retransmission failure on link <%s>\n", l->name); - link_print(l, "Resetting link "); + link_print(l, "State of link "); pr_info("Failed msg: usr %u, typ %u, len %u, err %u\n", msg_user(hdr), msg_type(hdr), msg_size(hdr), msg_errcode(hdr)); pr_in
Re: [tipc-discussion] TIPC connection stalling due to invalid congestion status when bearer 0 recovers
Hi Peter, Have you looked through this? https://sourceforge.net/p/tipc/mailman/message/35809792/ The symptoms you describe is identical to mine, its worth a try my patch on your system. I need to address comments from Jon.M before pushing it to net-next. regards Partha On 07/21/2017 10:20 PM, Butler, Peter wrote: Hello, I am using a 19-node TIPC configuration, whereby each card (node) in the mesh has two Ethernet interfaces connected to two disjoint subnets served by switch0 and switch1, respectively. TIPC is set to use two bearers on each card. 16 of these cards are using TIPC 4.4.0 (with a few patches backported from later releases as per John Maloy, Parthasarathy Bhuvaragan, and Ying Xue). (The other 3 cards are using a much older 1.7-based TIPC, but are not actually involved in the testing pertaining to the issue detailed below.) There are applications on several of the (4.4.0-based) cards which are collectively sending/receiving about 500 TIPC msg/s (i.e. in total, not each). When I reboot switch0, I often get strange behaviour soon after the switch comes back into service. To be clear, there are no issues that appear to stem from the loss of connectivity on the switch0 Ethernet fabric: while that switch is rebooting (or powered off, or otherwise unavailable) the applications behave fine by using the Ethernet fabric associated with switch1. However, shortly after switch0 returns to service, one or more of the cards in the TIPC mesh will often then experience problems on the switch0 fabric. Specifically, the sendto() calls (on the cards in question) will fail. By default, we are using a blocking sendto() call, and the associated process is being put to sleep by the kernel at this line in socket.c: static int tipc_wait_for_sndmsg(struct socket *sock, long *timeo_p) { struct sock *sk = sock->sk; struct tipc_sock *tsk = tipc_sk(sk); DEFINE_WAIT(wait); int done; do { int err = sock_error(sk); if (err) return err; if (sock->state == SS_DISCONNECTING) return -EPIPE; if (!*timeo_p) return -EAGAIN; if (signal_pending(current)) return sock_intr_errno(*timeo_p); prepare_to_wait(sk_sleep(sk), , TASK_INTERRUPTIBLE); done = sk_wait_event(sk, timeo_p, !tsk->link_cong); <- finish_wait(sk_sleep(sk), ); } while (!done); return 0; } Once in this state the process never recovers, and at the very least needs to be killed off and restarted, or the card rebooted. When changing this to a non-blocking sendto() call, the process is no longer put to sleep, but will forever fail the sendto() calls with -EAGAIN, and once again at the very least needs to be killed off and restarted, or the card rebooted. The TIPC traffic in question is connectionless, on a SOCK_RDM socket, and with destination-droppable set to false. Note that the hardware setup I am using is essentially identical to that used by Andrew Booth in his recent post "TIPC issue: connection stalls when switch for bearer 0 recovers" - both issues are almost certainly related, if not identical. Although in each of our cases the problem was detected using different application-level software. Could it be that TIPC is erroneously flagging the link as being congested and thus preventing any further traffic on it? (Just speculating, based on the line of code shown above.) Peter Butler -- Check out the vibrant tech community on one of the world's most engaging tech sites, Slashdot.org! http://sdm.link/slashdot ___ tipc-discussion mailing list tipc-discussion@lists.sourceforge.net https://lists.sourceforge.net/lists/listinfo/tipc-discussion
Re: [tipc-discussion] [PATCH] tipc: fix a race condition of releasing subscriber object
On 05/02/2017 12:46 PM, Ying Xue wrote: > No matter whether a request is inserted into workqueue as a work item > to cancel a subscription or to delete a subscription's subscriber > asynchronously, the work items may be executed in different workers. > As a result, it doesn't mean that one request which is raised prior to > another request is definitely handled before the latter. By contrast, > if the latter request is executed before the former request, below > error may happen: > > [ 656.183644] BUG: spinlock bad magic on CPU#0, kworker/u8:0/12117 > [ 656.184487] general protection fault: [#1] SMP > [ 656.185160] Modules linked in: tipc ip6_udp_tunnel udp_tunnel 9pnet_virtio > 9p 9pnet virtio_net virtio_pci virtio_ring virtio [last unloaded: > ip6_udp_tunnel] > [ 656.187003] CPU: 0 PID: 12117 Comm: kworker/u8:0 Not tainted 4.11.0-rc7+ #6 > [ 656.187920] Hardware name: Bochs Bochs, BIOS Bochs 01/01/2011 > [ 656.188690] Workqueue: tipc_rcv tipc_recv_work [tipc] > [ 656.189371] task: 88003f5cec40 task.stack: c90004448000 > [ 656.190157] RIP: 0010:spin_bug+0xdd/0xf0 > [ 656.190678] RSP: 0018:c9000444bcb8 EFLAGS: 00010202 > [ 656.191375] RAX: 0034 RBX: 88003f8d1388 RCX: > > [ 656.192321] RDX: 88003ba13708 RSI: 88003ba0cd08 RDI: > 88003ba0cd08 > [ 656.193265] RBP: c9000444bcd0 R08: 0030 R09: > 6b6b6b6b > [ 656.194208] R10: 8800bde3e000 R11: 01b4 R12: > 6b6b6b6b6b6b6b6b > [ 656.195157] R13: 81a3ca64 R14: 88003f8d1388 R15: > 88003f8d13a0 > [ 656.196101] FS: () GS:88003ba0() > knlGS: > [ 656.197172] CS: 0010 DS: ES: CR0: 80050033 > [ 656.197935] CR2: 7f0b3d2e6000 CR3: 3ef9e000 CR4: > 06f0 > [ 656.198873] Call Trace: > [ 656.199210] do_raw_spin_lock+0x66/0xa0 > [ 656.199735] _raw_spin_lock_bh+0x19/0x20 > [ 656.200258] tipc_subscrb_subscrp_delete+0x28/0xf0 [tipc] > [ 656.200990] tipc_subscrb_rcv_cb+0x45/0x260 [tipc] > [ 656.201632] tipc_receive_from_sock+0xaf/0x100 [tipc] > [ 656.202299] tipc_recv_work+0x2b/0x60 [tipc] > [ 656.202872] process_one_work+0x157/0x420 > [ 656.203404] worker_thread+0x69/0x4c0 > [ 656.203898] kthread+0x138/0x170 > [ 656.204328] ? process_one_work+0x420/0x420 > [ 656.204889] ? kthread_create_on_node+0x40/0x40 > [ 656.205527] ret_from_fork+0x29/0x40 > [ 656.206012] Code: 48 8b 0c 25 00 c5 00 00 48 c7 c7 f0 24 a3 81 48 81 c1 f0 > 05 00 00 65 8b 15 61 ef f5 7e e8 9a 4c 09 00 4d 85 e4 44 8b 4b 08 74 92 <45> > 8b 84 24 40 04 00 00 49 8d 8c 24 f0 05 00 00 eb 8d 90 0f 1f > [ 656.208504] RIP: spin_bug+0xdd/0xf0 RSP: c9000444bcb8 > [ 656.209798] ---[ end trace e2a800e6eb0770be ]--- > > In above scenario, the request of deleting subscriber was performed > earlier than the request of canceling a subscription although the > latter was issued before the former, which means tipc_subscrb_delete() > was called before tipc_subscrp_cancel(). As a result, when > tipc_subscrb_subscrp_delete() called by tipc_subscrp_cancel() was > executed to cancel a subscription, the subscription's subscriber > refcnt had been decreased to 1. After tipc_subscrp_delete() where > the subscriber was freed because its refcnt was decremented to zero, > but the subscriber's lock had to be released, as a consequence, panic > happened. > > By contrast, if we increase subscriber's refcnt before > tipc_subscrb_subscrp_delete() is called in tipc_subscrp_cancel(), > the panic issue can be avoided. > > Fixes: d094c4d5f5c7 ("tipc: add subscription refcount to avoid invalid > delete") > Reported-by: Parthasarathy Bhuvaragan <parthasarathy.bhuvara...@ericsson.com> > Signed-off-by: Ying Xue <ying@windriver.com> > --- > net/tipc/subscr.c | 2 ++ > 1 file changed, 2 insertions(+) > > diff --git a/net/tipc/subscr.c b/net/tipc/subscr.c > index 0bf91cd..94f7357d 100644 > --- a/net/tipc/subscr.c > +++ b/net/tipc/subscr.c > @@ -247,7 +247,9 @@ static void tipc_subscrp_delete(struct tipc_subscription > *sub) > static void tipc_subscrp_cancel(struct tipc_subscr *s, > struct tipc_subscriber *subscriber) > { > + tipc_subscrb_get(s); Ying, We have mixed the pointer types, this should be tipc_subscrb_get(subscriber) instead. So my testing earlier was not correct, i missed this completely. Iam off next week, so i can run the test with this fix in 2 weeks. net/tipc/subscr.c: In function ‘tipc_subscrp_cancel’: net/tipc/subscr.c:245:19: warning: passing argument 1 of ‘tipc_subscrb_get’ from incompatible pointer type tipc_subscrb_get(s); ^ net
Re: [tipc-discussion] [PATCH net v1 1/1] tipc: remove subscription references only for pending timers
Hi Ying, I tested both your patch plus mine and they together fixed the random panics I saw. If you dont have any other comments, I can send both your patch "tipc: fix a race condition of releasing subscriber " and mine to net. /Partha On 05/18/2017 09:54 AM, Ying Xue wrote: > Hi Partha, > > Sorry for late response. > > Good catch! Please add my "ack-by". > > Thanks, > Ying > > On 05/08/2017 09:46 PM, Parthasarathy Bhuvaragan wrote: >> In commit, 139bb36f754a ("tipc: advance the time of deleting >> subscription from subscriber->subscrp_list"), we delete the >> subscription from the subscribers list and from nametable >> unconditionally. This leads to the following bug if the timer >> running tipc_subscrp_timeout() in another CPU accesses the >> subscription list after the subscription delete request. >> >> [39.570] general protection fault: [#1] SMP >> :: >> [39.574] task: 81c10540 task.stack: 81c0 >> [39.575] RIP: 0010:tipc_subscrp_timeout+0x32/0x80 [tipc] >> [39.576] RSP: 0018:88003ba03e90 EFLAGS: 00010282 >> [39.576] RAX: dead0200 RBX: 88003f0f3600 RCX: 0101 >> [39.577] RDX: dead0100 RSI: 0201 RDI: 88003f0d7948 >> [39.578] RBP: 88003ba03ea0 R08: 0001 R09: 88003ba03ef8 >> [39.579] R10: 014f R11: R12: 88003f0d7948 >> [39.580] R13: 88003f0f3618 R14: a006c250 R15: 88003f0f3600 >> [39.581] FS: () GS:88003ba0() >> knlGS: >> [39.582] CS: 0010 DS: ES: CR0: 80050033 >> [39.583] CR2: 7f831c6e0714 CR3: 3d3b CR4: 06f0 >> [39.584] Call Trace: >> [39.584] >> [39.585] call_timer_fn+0x3d/0x180 >> [39.585] ? tipc_subscrb_rcv_cb+0x260/0x260 [tipc] >> [39.586] run_timer_softirq+0x168/0x1f0 >> [39.586] ? sched_clock_cpu+0x16/0xc0 >> [39.587] __do_softirq+0x9b/0x2de >> [39.587] irq_exit+0x60/0x70 >> [39.588] smp_apic_timer_interrupt+0x3d/0x50 >> [39.588] apic_timer_interrupt+0x86/0x90 >> [39.589] RIP: 0010:default_idle+0x20/0xf0 >> [39.589] RSP: 0018:81c03e58 EFLAGS: 0246 ORIG_RAX: >> ff10 >> [39.590] RAX: RBX: 81c10540 RCX: >> [39.591] RDX: RSI: RDI: >> [39.592] RBP: 81c03e68 R08: R09: >> [39.593] R10: c90001cbbe00 R11: R12: >> [39.594] R13: 81c10540 R14: R15: 0000 >> [39.595] >> :: >> [39.603] RIP: tipc_subscrp_timeout+0x32/0x80 [tipc] RSP: 88003ba03e90 >> [39.604] ---[ end trace 79ce94b7216cb459 ]--- >> >> Fixes: 139bb36f754a ("tipc: advance the time of deleting subscription from >> subscriber->subscrp_list") >> Signed-off-by: Parthasarathy Bhuvaragan >> <parthasarathy.bhuvara...@ericsson.com> >> --- >> net/tipc/subscr.c | 19 +++ >> 1 file changed, 7 insertions(+), 12 deletions(-) >> >> diff --git a/net/tipc/subscr.c b/net/tipc/subscr.c >> index 0bf91cd3733c..f2c81f42dfda 100644 >> --- a/net/tipc/subscr.c >> +++ b/net/tipc/subscr.c >> @@ -52,7 +52,6 @@ struct tipc_subscriber { >> struct list_head subscrp_list; >> }; >> >> -static void tipc_subscrp_delete(struct tipc_subscription *sub); >> static void tipc_subscrb_put(struct tipc_subscriber *subscriber); >> >> /** >> @@ -197,15 +196,19 @@ static void tipc_subscrb_subscrp_delete(struct >> tipc_subscriber *subscriber, >> { >> struct list_head *subscription_list = >subscrp_list; >> struct tipc_subscription *sub, *temp; >> +u32 timeout; >> >> spin_lock_bh(>lock); >> list_for_each_entry_safe(sub, temp, subscription_list, subscrp_list) { >> if (s && memcmp(s, >evt.s, sizeof(struct tipc_subscr))) >> continue; >> >> -tipc_nametbl_unsubscribe(sub); >> -list_del(>subscrp_list); >> -tipc_subscrp_delete(sub); >> +timeout = htohl(sub->evt.s.timeout, sub->swap); >> +if (timeout == TIPC_WAIT_FOREVER || del_timer(>timer)) { >> +tipc_nametbl_unsubscribe(sub); >> +list_del(>subscrp_list); >> +tipc_subscrp_put(sub); >> +} >> >> if (s) >>
[tipc-discussion] [PATCH net v1 1/1] tipc: remove subscription references only for pending timers
In commit, 139bb36f754a ("tipc: advance the time of deleting subscription from subscriber->subscrp_list"), we delete the subscription from the subscribers list and from nametable unconditionally. This leads to the following bug if the timer running tipc_subscrp_timeout() in another CPU accesses the subscription list after the subscription delete request. [39.570] general protection fault: [#1] SMP :: [39.574] task: 81c10540 task.stack: 81c0 [39.575] RIP: 0010:tipc_subscrp_timeout+0x32/0x80 [tipc] [39.576] RSP: 0018:88003ba03e90 EFLAGS: 00010282 [39.576] RAX: dead0200 RBX: 88003f0f3600 RCX: 0101 [39.577] RDX: dead0100 RSI: 0201 RDI: 88003f0d7948 [39.578] RBP: 88003ba03ea0 R08: 0001 R09: 88003ba03ef8 [39.579] R10: 014f R11: R12: 88003f0d7948 [39.580] R13: 88003f0f3618 R14: a006c250 R15: 88003f0f3600 [39.581] FS: () GS:88003ba0() knlGS: [39.582] CS: 0010 DS: ES: CR0: 80050033 [39.583] CR2: 7f831c6e0714 CR3: 3d3b CR4: 06f0 [39.584] Call Trace: [39.584] [39.585] call_timer_fn+0x3d/0x180 [39.585] ? tipc_subscrb_rcv_cb+0x260/0x260 [tipc] [39.586] run_timer_softirq+0x168/0x1f0 [39.586] ? sched_clock_cpu+0x16/0xc0 [39.587] __do_softirq+0x9b/0x2de [39.587] irq_exit+0x60/0x70 [39.588] smp_apic_timer_interrupt+0x3d/0x50 [39.588] apic_timer_interrupt+0x86/0x90 [39.589] RIP: 0010:default_idle+0x20/0xf0 [39.589] RSP: 0018:81c03e58 EFLAGS: 0246 ORIG_RAX: ff10 [39.590] RAX: RBX: 81c10540 RCX: [39.591] RDX: RSI: RDI: [39.592] RBP: 81c03e68 R08: R09: [39.593] R10: c90001cbbe00 R11: R12: [39.594] R13: 81c10540 R14: R15: [39.595] :: [39.603] RIP: tipc_subscrp_timeout+0x32/0x80 [tipc] RSP: 88003ba03e90 [39.604] ---[ end trace 79ce94b7216cb459 ]--- Fixes: 139bb36f754a ("tipc: advance the time of deleting subscription from subscriber->subscrp_list") Signed-off-by: Parthasarathy Bhuvaragan <parthasarathy.bhuvara...@ericsson.com> --- net/tipc/subscr.c | 19 +++ 1 file changed, 7 insertions(+), 12 deletions(-) diff --git a/net/tipc/subscr.c b/net/tipc/subscr.c index 0bf91cd3733c..f2c81f42dfda 100644 --- a/net/tipc/subscr.c +++ b/net/tipc/subscr.c @@ -52,7 +52,6 @@ struct tipc_subscriber { struct list_head subscrp_list; }; -static void tipc_subscrp_delete(struct tipc_subscription *sub); static void tipc_subscrb_put(struct tipc_subscriber *subscriber); /** @@ -197,15 +196,19 @@ static void tipc_subscrb_subscrp_delete(struct tipc_subscriber *subscriber, { struct list_head *subscription_list = >subscrp_list; struct tipc_subscription *sub, *temp; + u32 timeout; spin_lock_bh(>lock); list_for_each_entry_safe(sub, temp, subscription_list, subscrp_list) { if (s && memcmp(s, >evt.s, sizeof(struct tipc_subscr))) continue; - tipc_nametbl_unsubscribe(sub); - list_del(>subscrp_list); - tipc_subscrp_delete(sub); + timeout = htohl(sub->evt.s.timeout, sub->swap); + if (timeout == TIPC_WAIT_FOREVER || del_timer(>timer)) { + tipc_nametbl_unsubscribe(sub); + list_del(>subscrp_list); + tipc_subscrp_put(sub); + } if (s) break; @@ -236,14 +239,6 @@ static void tipc_subscrb_delete(struct tipc_subscriber *subscriber) tipc_subscrb_put(subscriber); } -static void tipc_subscrp_delete(struct tipc_subscription *sub) -{ - u32 timeout = htohl(sub->evt.s.timeout, sub->swap); - - if (timeout == TIPC_WAIT_FOREVER || del_timer(>timer)) - tipc_subscrp_put(sub); -} - static void tipc_subscrp_cancel(struct tipc_subscr *s, struct tipc_subscriber *subscriber) { -- 2.1.4 -- Check out the vibrant tech community on one of the world's most engaging tech sites, Slashdot.org! http://sdm.link/slashdot ___ tipc-discussion mailing list tipc-discussion@lists.sourceforge.net https://lists.sourceforge.net/lists/listinfo/tipc-discussion
[tipc-discussion] [PATCH net v1 4/6] tipc: lock wakeup & inputq at tipc_link_reset()
Until now, in tipc_link_reset() we call copy the wakeup queue to input queue using skb_queue_splice_init(link->wakeupq, link->inputq). This is performed without holding any locks. This list might be simultaneously be accessed by other cpu threads in tipc_sk_rcv() and that leads to random missing packets. Signed-off-by: Parthasarathy Bhuvaragan <parthasarathy.bhuvara...@ericsson.com> --- net/tipc/link.c | 7 ++- 1 file changed, 6 insertions(+), 1 deletion(-) diff --git a/net/tipc/link.c b/net/tipc/link.c index ddd2dd6f77aa..722b82a884dc 100644 --- a/net/tipc/link.c +++ b/net/tipc/link.c @@ -829,9 +829,14 @@ void tipc_link_reset(struct tipc_link *l) l->peer_session = ANY_SESSION; l->session++; l->mtu = l->advertised_mtu; + spin_lock_bh(>wakeupq.lock); + spin_lock_bh(>inputq->lock); + skb_queue_splice_init(>wakeupq, l->inputq); + spin_unlock_bh(>inputq->lock); + spin_unlock_bh(>wakeupq.lock); + __skb_queue_purge(>transmq); __skb_queue_purge(>deferdq); - skb_queue_splice_init(>wakeupq, l->inputq); __skb_queue_purge(>backlogq); l->backlog[TIPC_LOW_IMPORTANCE].len = 0; l->backlog[TIPC_MEDIUM_IMPORTANCE].len = 0; -- 2.1.4 -- Check out the vibrant tech community on one of the world's most engaging tech sites, Slashdot.org! http://sdm.link/slashdot ___ tipc-discussion mailing list tipc-discussion@lists.sourceforge.net https://lists.sourceforge.net/lists/listinfo/tipc-discussion
[tipc-discussion] [PATCH net v1 0/6] tipc: multiple bug fixes
This patch series has several fixes. Parthasarathy Bhuvaragan (6): tipc: queue socket protocol error messages into socket receive buffer tipc: expand buffers with GFP_ATOMIC flag tipc: reset bearer if device carrier not ok tipc: lock wakeup & inputq at tipc_link_reset() tipc: perform skb_linearize() before parsing the inner header tipc: reassign pointers after skb reallocation / linearization net/tipc/bearer.c | 11 +++ net/tipc/link.c | 7 ++- net/tipc/msg.c| 10 +++--- net/tipc/node.c | 2 ++ net/tipc/socket.c | 47 +-- 5 files changed, 47 insertions(+), 30 deletions(-) -- 2.1.4 -- Check out the vibrant tech community on one of the world's most engaging tech sites, Slashdot.org! http://sdm.link/slashdot ___ tipc-discussion mailing list tipc-discussion@lists.sourceforge.net https://lists.sourceforge.net/lists/listinfo/tipc-discussion
[tipc-discussion] [PATCH net v1 2/3] tipc: improve error validations for sockets in CONNECTING state
Until now, the checks for sockets in CONNECTING state was based on the assumption that the incoming message was always from the peer's accepted data socket. However an application using a non-blocking socket sends an implicit connect, this socket which is in CONNECTING state can receive error messages from the peer's listening socket. As we discard these messages, the application socket hangs as there due to inactivity. In addition to this, there are other places where we process errors but do not notify the user. In this commit, we process such incoming error messages and notify our users about them using sk_state_change(). Signed-off-by: Parthasarathy Bhuvaragan <parthasarathy.bhuvara...@ericsson.com> Reviewed-by: Jon Maloy <jon.ma...@ericsson.com> --- net/tipc/socket.c | 25 ++--- 1 file changed, 22 insertions(+), 3 deletions(-) diff --git a/net/tipc/socket.c b/net/tipc/socket.c index 3b8df510a80c..38c367f6ced4 100644 --- a/net/tipc/socket.c +++ b/net/tipc/socket.c @@ -1259,7 +1259,10 @@ static int tipc_wait_for_rcvmsg(struct socket *sock, long *timeop) struct sock *sk = sock->sk; DEFINE_WAIT(wait); long timeo = *timeop; - int err; + int err = sock_error(sk); + + if (err) + return err; for (;;) { prepare_to_wait(sk_sleep(sk), , TASK_INTERRUPTIBLE); @@ -1281,6 +1284,10 @@ static int tipc_wait_for_rcvmsg(struct socket *sock, long *timeop) err = sock_intr_errno(timeo); if (signal_pending(current)) break; + + err = sock_error(sk); + if (err) + break; } finish_wait(sk_sleep(sk), ); *timeop = timeo; @@ -1551,6 +1558,8 @@ static bool filter_connect(struct tipc_sock *tsk, struct sk_buff *skb) struct sock *sk = >sk; struct net *net = sock_net(sk); struct tipc_msg *hdr = buf_msg(skb); + u32 pport = msg_origport(hdr); + u32 pnode = msg_orignode(hdr); if (unlikely(msg_mcast(hdr))) return false; @@ -1558,18 +1567,28 @@ static bool filter_connect(struct tipc_sock *tsk, struct sk_buff *skb) switch (sk->sk_state) { case TIPC_CONNECTING: /* Accept only ACK or NACK message */ - if (unlikely(!msg_connected(hdr))) - return false; + if (unlikely(!msg_connected(hdr))) { + if (pport != tsk_peer_port(tsk) || + pnode != tsk_peer_node(tsk)) + return false; + + tipc_set_sk_state(sk, TIPC_DISCONNECTING); + sk->sk_err = ECONNREFUSED; + sk->sk_state_change(sk); + return true; + } if (unlikely(msg_errcode(hdr))) { tipc_set_sk_state(sk, TIPC_DISCONNECTING); sk->sk_err = ECONNREFUSED; + sk->sk_state_change(sk); return true; } if (unlikely(!msg_isdata(hdr))) { tipc_set_sk_state(sk, TIPC_DISCONNECTING); sk->sk_err = EINVAL; + sk->sk_state_change(sk); return true; } -- 2.1.4 -- Check out the vibrant tech community on one of the world's most engaging tech sites, Slashdot.org! http://sdm.link/slashdot ___ tipc-discussion mailing list tipc-discussion@lists.sourceforge.net https://lists.sourceforge.net/lists/listinfo/tipc-discussion
[tipc-discussion] [PATCH net v1 3/3] tipc: close the connection if protocol messages contain errors
When a socket is shutting down, we notify the peer node about the connection termination by reusing an incoming message if possible. If the last received message was a connection acknowledgment message, we reverse this message and set the error code to TIPC_ERR_NO_PORT and send it to peer. In tipc_sk_proto_rcv(), we never check for message errors while processing the connection acknowledgment or probe messages. Thus this message performs the usual flow control accounting and leaves the session hanging. In this commit, we terminate the connection when we receive such error messages. Signed-off-by: Parthasarathy Bhuvaragan <parthasarathy.bhuvara...@ericsson.com> Reviewed-by: Jon Maloy <jon.ma...@ericsson.com> --- net/tipc/socket.c | 8 1 file changed, 8 insertions(+) diff --git a/net/tipc/socket.c b/net/tipc/socket.c index 38c367f6ced4..bdce99f9407a 100644 --- a/net/tipc/socket.c +++ b/net/tipc/socket.c @@ -866,6 +866,14 @@ static void tipc_sk_proto_rcv(struct tipc_sock *tsk, struct sk_buff *skb, if (!tsk_peer_msg(tsk, hdr)) goto exit; + if (unlikely(msg_errcode(hdr))) { + tipc_set_sk_state(sk, TIPC_DISCONNECTING); + tipc_node_remove_conn(sock_net(sk), tsk_peer_node(tsk), + tsk_peer_port(tsk)); + sk->sk_state_change(sk); + goto exit; + } + tsk->probe_unacked = false; if (mtyp == CONN_PROBE) { -- 2.1.4 -- Check out the vibrant tech community on one of the world's most engaging tech sites, Slashdot.org! http://sdm.link/slashdot ___ tipc-discussion mailing list tipc-discussion@lists.sourceforge.net https://lists.sourceforge.net/lists/listinfo/tipc-discussion
[tipc-discussion] [PATCH net v1 1/2] tipc: fix socket flow control accounting error at tipc_send_stream
Until now in tipc_send_stream(), we return -1 when the socket encounters link congestion even if the socket had successfully sent partial data. This is incorrect as the application resends the same the partial data leading to data corruption at receiver's end. In this commit, we return the partially sent bytes as the return value at link congestion. Fixes: 10724cc7bb78 ("tipc: redesign connection-level flow control") Signed-off-by: Parthasarathy Bhuvaragan <parthasarathy.bhuvara...@ericsson.com> Reviewed-by: Jon Maloy <jon.ma...@ericsson.com> --- net/tipc/socket.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/net/tipc/socket.c b/net/tipc/socket.c index 7130e73bd42c..b28e94f1c739 100644 --- a/net/tipc/socket.c +++ b/net/tipc/socket.c @@ -1083,7 +1083,7 @@ static int __tipc_sendstream(struct socket *sock, struct msghdr *m, size_t dlen) } } while (sent < dlen && !rc); - return rc ? rc : sent; + return sent ? sent : rc; } /** -- 2.1.4 -- Check out the vibrant tech community on one of the world's most engaging tech sites, Slashdot.org! http://sdm.link/slashdot ___ tipc-discussion mailing list tipc-discussion@lists.sourceforge.net https://lists.sourceforge.net/lists/listinfo/tipc-discussion
[tipc-discussion] [PATCH net-next v1 2/2] tipc: allow rdm/dgram socketpairs
From: Erik Hugne <erik.hu...@gmail.com> for socketpairs using connectionless transport, we cache the respective node local TIPC portid to use in subsequent calls to send() in the socket's private data. Signed-off-by: Erik Hugne <erik.hu...@gmail.com> Signed-off-by: Parthasarathy Bhuvaragan <parthasarathy.bhuvara...@ericsson.com> --- net/tipc/socket.c | 20 1 file changed, 16 insertions(+), 4 deletions(-) diff --git a/net/tipc/socket.c b/net/tipc/socket.c index 1198dddf72e8..15f6ce7bf868 100644 --- a/net/tipc/socket.c +++ b/net/tipc/socket.c @@ -2515,9 +2515,21 @@ static int tipc_socketpair(struct socket *sock1, struct socket *sock2) { struct tipc_sock *tsk2 = tipc_sk(sock2->sk); struct tipc_sock *tsk1 = tipc_sk(sock1->sk); - - tipc_sk_finish_conn(tsk1, tsk2->portid, 0); - tipc_sk_finish_conn(tsk2, tsk1->portid, 0); + u32 onode = tipc_own_addr(sock_net(sock1->sk)); + + tsk1->peer.family = AF_TIPC; + tsk1->peer.addrtype = TIPC_ADDR_ID; + tsk1->peer.scope = TIPC_NODE_SCOPE; + tsk1->peer.addr.id.ref = tsk2->portid; + tsk1->peer.addr.id.node = onode; + tsk2->peer.family = AF_TIPC; + tsk2->peer.addrtype = TIPC_ADDR_ID; + tsk2->peer.scope = TIPC_NODE_SCOPE; + tsk2->peer.addr.id.ref = tsk1->portid; + tsk2->peer.addr.id.node = onode; + + tipc_sk_finish_conn(tsk1, tsk2->portid, onode); + tipc_sk_finish_conn(tsk2, tsk1->portid, onode); return 0; } @@ -2529,7 +2541,7 @@ static const struct proto_ops msg_ops = { .release= tipc_release, .bind = tipc_bind, .connect= tipc_connect, - .socketpair = sock_no_socketpair, + .socketpair = tipc_socketpair, .accept = sock_no_accept, .getname= tipc_getname, .poll = tipc_poll, -- 2.1.4 -- Check out the vibrant tech community on one of the world's most engaging tech sites, Slashdot.org! http://sdm.link/slashdot ___ tipc-discussion mailing list tipc-discussion@lists.sourceforge.net https://lists.sourceforge.net/lists/listinfo/tipc-discussion
[tipc-discussion] [PATCH net-next v1 1/2] tipc: add support for stream/seqpacket socketpairs
From: Erik Hugne <erik.hu...@gmail.com> sockets A and B are connected back-to-back, similar to what AF_UNIX does. Signed-off-by: Erik Hugne <erik.hu...@gmail.com> Signed-off-by: Parthasarathy Bhuvaragan <parthasarathy.bhuvara...@ericsson.com> --- net/tipc/socket.c | 14 -- 1 file changed, 12 insertions(+), 2 deletions(-) diff --git a/net/tipc/socket.c b/net/tipc/socket.c index 7130e73bd42c..1198dddf72e8 100644 --- a/net/tipc/socket.c +++ b/net/tipc/socket.c @@ -2511,6 +2511,16 @@ static int tipc_ioctl(struct socket *sock, unsigned int cmd, unsigned long arg) } } +static int tipc_socketpair(struct socket *sock1, struct socket *sock2) +{ + struct tipc_sock *tsk2 = tipc_sk(sock2->sk); + struct tipc_sock *tsk1 = tipc_sk(sock1->sk); + + tipc_sk_finish_conn(tsk1, tsk2->portid, 0); + tipc_sk_finish_conn(tsk2, tsk1->portid, 0); + return 0; +} + /* Protocol switches for the various types of TIPC sockets */ static const struct proto_ops msg_ops = { @@ -2540,7 +2550,7 @@ static const struct proto_ops packet_ops = { .release= tipc_release, .bind = tipc_bind, .connect= tipc_connect, - .socketpair = sock_no_socketpair, + .socketpair = tipc_socketpair, .accept = tipc_accept, .getname= tipc_getname, .poll = tipc_poll, @@ -2561,7 +2571,7 @@ static const struct proto_ops stream_ops = { .release= tipc_release, .bind = tipc_bind, .connect= tipc_connect, - .socketpair = sock_no_socketpair, + .socketpair = tipc_socketpair, .accept = tipc_accept, .getname= tipc_getname, .poll = tipc_poll, -- 2.1.4 -- Check out the vibrant tech community on one of the world's most engaging tech sites, Slashdot.org! http://sdm.link/slashdot ___ tipc-discussion mailing list tipc-discussion@lists.sourceforge.net https://lists.sourceforge.net/lists/listinfo/tipc-discussion
[tipc-discussion] [PATCH net-next v1 0/2] tipc: add socketpair support
We add socketpair support for connection oriented sockets in the first patch and for connection less in the second. Erik Hugne (2): tipc: add support for stream/seqpacket socketpairs tipc: allow rdm/dgram socketpairs net/tipc/socket.c | 28 +--- 1 file changed, 25 insertions(+), 3 deletions(-) -- 2.1.4 -- Check out the vibrant tech community on one of the world's most engaging tech sites, Slashdot.org! http://sdm.link/slashdot ___ tipc-discussion mailing list tipc-discussion@lists.sourceforge.net https://lists.sourceforge.net/lists/listinfo/tipc-discussion
[tipc-discussion] [PATCH net v1 1/1] tipc: fix nametbl deadlock at tipc_nametbl_unsubscribe
From: Ying Xue <ying@windriver.com> Until now, tipc_nametbl_unsubscribe() is called at subscriptions reference count cleanup. Usually the subscriptions cleanup is called at subscription timeout or at subscription cancel or at subscriber delete. We have ignored the possibility of this being called from other locations, which causes deadlock as we try to grab the tn->nametbl_lock while holding it already. CPU1: CPU2: -- tipc_nametbl_publish spin_lock_bh(>nametbl_lock) tipc_nametbl_insert_publ tipc_nameseq_insert_publ tipc_subscrp_report_overlap tipc_subscrp_get tipc_subscrp_send_event tipc_close_conn tipc_subscrb_release_cb tipc_subscrb_delete tipc_subscrp_put tipc_subscrp_put tipc_subscrp_kref_release tipc_nametbl_unsubscribe spin_lock_bh(>nametbl_lock) <> CPU1: CPU2: -- tipc_nametbl_stop spin_lock_bh(>nametbl_lock) tipc_purge_publications tipc_nameseq_remove_publ tipc_subscrp_report_overlap tipc_subscrp_get tipc_subscrp_send_event tipc_close_conn tipc_subscrb_release_cb tipc_subscrb_delete tipc_subscrp_put tipc_subscrp_put tipc_subscrp_kref_release tipc_nametbl_unsubscribe spin_lock_bh(>nametbl_lock) <> In this commit, we advance the calling of tipc_nametbl_unsubscribe() from the refcount cleanup to the intended callers. Fixes: d094c4d5f5c7 ("tipc: add subscription refcount to avoid invalid delete") Reported-by: John Thompson <thompa@gmail.com> Acked-by: Jon Maloy <jon.ma...@ericsson.com> Signed-off-by: Ying Xue <ying....@windriver.com> Signed-off-by: Parthasarathy Bhuvaragan <parthasarathy.bhuvara...@ericsson.com> --- net/tipc/subscr.c | 7 ++- 1 file changed, 6 insertions(+), 1 deletion(-) diff --git a/net/tipc/subscr.c b/net/tipc/subscr.c index 9d94e65d0894..271cd66e4b3b 100644 --- a/net/tipc/subscr.c +++ b/net/tipc/subscr.c @@ -141,6 +141,11 @@ void tipc_subscrp_report_overlap(struct tipc_subscription *sub, u32 found_lower, static void tipc_subscrp_timeout(unsigned long data) { struct tipc_subscription *sub = (struct tipc_subscription *)data; + struct tipc_subscriber *subscriber = sub->subscriber; + + spin_lock_bh(>lock); + tipc_nametbl_unsubscribe(sub); + spin_unlock_bh(>lock); /* Notify subscriber of timeout */ tipc_subscrp_send_event(sub, sub->evt.s.seq.lower, sub->evt.s.seq.upper, @@ -173,7 +178,6 @@ static void tipc_subscrp_kref_release(struct kref *kref) struct tipc_subscriber *subscriber = sub->subscriber; spin_lock_bh(>lock); - tipc_nametbl_unsubscribe(sub); list_del(>subscrp_list); atomic_dec(>subscription_count); spin_unlock_bh(>lock); @@ -205,6 +209,7 @@ static void tipc_subscrb_subscrp_delete(struct tipc_subscriber *subscriber, if (s && memcmp(s, >evt.s, sizeof(struct tipc_subscr))) continue; + tipc_nametbl_unsubscribe(sub); tipc_subscrp_get(sub); spin_unlock_bh(>lock); tipc_subscrp_delete(sub); -- 2.1.4 -- Check out the vibrant tech community on one of the world's most engaging tech sites, Slashdot.org! http://sdm.link/slashdot ___ tipc-discussion mailing list tipc-discussion@lists.sourceforge.net https://lists.sourceforge.net/lists/listinfo/tipc-discussion
Re: [tipc-discussion] [PATCH v2 net-next 6/6] tipc: delete expired subscription
Hi Jon, Ok, I will submit it today. I usually add the Fixes tag in the tag section instead of specifying it in the body of the commit message like you do. I followed the instructions specified in: http://lxr.free-electrons.com/source/Documentation/SubmittingPatches?v=3.16#L135 This commit to net has the following tag: Fixes: d094c4d5f5c7 ("tipc: add subscription refcount to avoid invalid delete") /Partha From: Jon Maloy Sent: Monday, March 20, 2017 8:30 PM To: Jon Maloy; Parthasarathy Bhuvaragan; John Thompson Cc: tipc-discussion@lists.sourceforge.net Subject: RE: [tipc-discussion] [PATCH v2 net-next 6/6] tipc: delete expired subscription BTW, I think you should add a reference to the patch where the subscription refcount was introduced in the log message, so that Andrew will know how far back he should apply this patch on "stable". ///jon > -Original Message- > From: Jon Maloy [mailto:jon.ma...@ericsson.com] > Sent: Monday, March 20, 2017 03:18 PM > To: Parthasarathy Bhuvaragan <parthasarathy.bhuvara...@ericsson.com>; > John Thompson <thompa@gmail.com> > Cc: tipc-discussion@lists.sourceforge.net > Subject: Re: [tipc-discussion] [PATCH v2 net-next 6/6] tipc: delete expired > subscription > > No objections. Just go ahead. > > ///jon > > > From: Parthasarathy Bhuvaragan > Sent: Monday, March 20, 2017 12:00 PM > To: Jon Maloy <jon.ma...@ericsson.com>; John Thompson > <thompa@gmail.com> > Cc: Ying Xue <ying@windriver.com>; tipc- > discuss...@lists.sourceforge.net > Subject: Re: [tipc-discussion] [PATCH v2 net-next 6/6] tipc: delete expired > subscription > > > Hi, > > In that case I plan to send the only the first fix to net: > > tipc: advance the time of calling tipc_nametbl_unsubscribe > > Then wait till net is merged to netnext and submit the following to netnext: >tipc: advance the time of deleting subscription from > subscriber->subscrp_list >tipc: adjust the policy of holding subscription kref > > Any objections? > > /Partha > > > From: Jon Maloy > Sent: Monday, March 20, 2017 1:25:57 PM > To: John Thompson; Parthasarathy Bhuvaragan > Cc: Ying Xue; tipc-discussion@lists.sourceforge.net<mailto:tipc- > discuss...@lists.sourceforge.net> > Subject: RE: [tipc-discussion] [PATCH v2 net-next 6/6] tipc: delete expired > subscription > > Series acked by me. Please send to 'net' since this problem really occurs > frequently even in the released version. > > ///jon > > > From: John Thompson [mailto:thompa@gmail.com] > Sent: Sunday, March 19, 2017 09:19 PM > To: Parthasarathy Bhuvaragan > <parthasarathy.bhuvara...@ericsson.com<mailto:parthasarathy.bhuvaraga > n...@ericsson.com>> > Cc: Ying Xue <ying@windriver.com<mailto:ying@windriver.com>>; > tipc-discussion@lists.sourceforge.net<mailto:tipc- > discuss...@lists.sourceforge.net>; Jon Maloy > <jon.ma...@ericsson.com<mailto:jon.ma...@ericsson.com>> > Subject: Re: [tipc-discussion] [PATCH v2 net-next 6/6] tipc: delete expired > subscription > > > On Fri, Mar 17, 2017 at 3:44 PM, John Thompson > <thompa@gmail.com<mailto:thompa@gmail.com>> wrote: > > > On Fri, Mar 17, 2017 at 9:35 AM, John Thompson > <thompa@gmail.com<mailto:thompa@gmail.com>> wrote: > > > On Thu, Mar 16, 2017 at 1:45 AM, Parthasarathy Bhuvaragan > <parthasarathy.bhuvara...@ericsson.com<mailto:parthasarathy.bhuvaraga > n...@ericsson.com>> wrote: > > Hi John, > > Can you run your test with my series and provide some feedback? > Hi Partha > I have started running some tests and will be in touch later today with > initial > results. > Thanks > Jt > > Hi Partha, > My testing is looking good. I have generally seen problems reasonably > quickly. This patch series seems good. I will run a longer run test over the > weekend to be certain but it should be ok. > > Thanks, > JT > Hi Partha, > > My testing over the weekend has looked good. I have not seen any soft > lockups or other errors of any form. > > JT > > -- > Check out the vibrant tech community on one of the world's most engaging > tech sites, Slashdot.org! http://sdm.link/slashdot > ___ > tipc-discussion mailing list > tipc-discussion@lists.sourceforge.net > https://lists.sourceforge.net/lists/listinfo/tipc-discussion -- Check out the vibrant tech community on one of the world's most engaging tech sites, Slashdot.org! http://sdm.link/slashdot ___ tipc-discussion mailing list tipc-discussion@lists.sourceforge.net https://lists.sourceforge.net/lists/listinfo/tipc-discussion
Re: [tipc-discussion] [PATCH v2 net-next 6/6] tipc: delete expired subscription
Hi, In that case I plan to send the only the first fix to net: tipc: advance the time of calling tipc_nametbl_unsubscribe Then wait till net is merged to netnext and submit the following to netnext: tipc: advance the time of deleting subscription from subscriber->subscrp_list tipc: adjust the policy of holding subscription kref Any objections? /Partha From: Jon Maloy Sent: Monday, March 20, 2017 1:25:57 PM To: John Thompson; Parthasarathy Bhuvaragan Cc: Ying Xue; tipc-discussion@lists.sourceforge.net Subject: RE: [tipc-discussion] [PATCH v2 net-next 6/6] tipc: delete expired subscription Series acked by me. Please send to ‘net’ since this problem really occurs frequently even in the released version. ///jon From: John Thompson [mailto:thompa@gmail.com] Sent: Sunday, March 19, 2017 09:19 PM To: Parthasarathy Bhuvaragan <parthasarathy.bhuvara...@ericsson.com> Cc: Ying Xue <ying@windriver.com>; tipc-discussion@lists.sourceforge.net; Jon Maloy <jon.ma...@ericsson.com> Subject: Re: [tipc-discussion] [PATCH v2 net-next 6/6] tipc: delete expired subscription On Fri, Mar 17, 2017 at 3:44 PM, John Thompson <thompa@gmail.com<mailto:thompa@gmail.com>> wrote: On Fri, Mar 17, 2017 at 9:35 AM, John Thompson <thompa@gmail.com<mailto:thompa@gmail.com>> wrote: On Thu, Mar 16, 2017 at 1:45 AM, Parthasarathy Bhuvaragan <parthasarathy.bhuvara...@ericsson.com<mailto:parthasarathy.bhuvara...@ericsson.com>> wrote: Hi John, Can you run your test with my series and provide some feedback? Hi Partha I have started running some tests and will be in touch later today with initial results. Thanks Jt Hi Partha, My testing is looking good. I have generally seen problems reasonably quickly. This patch series seems good. I will run a longer run test over the weekend to be certain but it should be ok. Thanks, JT Hi Partha, My testing over the weekend has looked good. I have not seen any soft lockups or other errors of any form. JT -- Check out the vibrant tech community on one of the world's most engaging tech sites, Slashdot.org! http://sdm.link/slashdot ___ tipc-discussion mailing list tipc-discussion@lists.sourceforge.net https://lists.sourceforge.net/lists/listinfo/tipc-discussion
[tipc-discussion] [PATCH net v1 4/7] tipc: Fix missing connection request handling
In filter_connect, we use waitqueue_active() to check for any connections to wakeup. But waitqueue_active() is missing memory barriers while accessing the critical sections, leading to inconsistent results. In this commit, we replace this with an SMP safe wq_has_sleeper(). Signed-off-by: Parthasarathy Bhuvaragan <parthasarathy.bhuvara...@ericsson.com> --- v2: Address comments from Ying Xue by replacing wq_has_sleeper().. with sk->sk_data_ready(sk). Signed-off-by: Parthasarathy Bhuvaragan <parthasarathy.bhuvara...@ericsson.com> --- net/tipc/socket.c | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) diff --git a/net/tipc/socket.c b/net/tipc/socket.c index 2c2ea2e15c45..cb8d2dd2b10c 100644 --- a/net/tipc/socket.c +++ b/net/tipc/socket.c @@ -1584,8 +1584,7 @@ static bool filter_connect(struct tipc_sock *tsk, struct sk_buff *skb) return true; /* If empty 'ACK-' message, wake up sleeping connect() */ - if (waitqueue_active(sk_sleep(sk))) - wake_up_interruptible(sk_sleep(sk)); + sk->sk_data_ready(sk); /* 'ACK-' message is neither accepted nor rejected: */ msg_set_dest_droppable(hdr, 1); -- 2.1.4 -- Check out the vibrant tech community on one of the world's most engaging tech sites, Slashdot.org! http://sdm.link/slashdot ___ tipc-discussion mailing list tipc-discussion@lists.sourceforge.net https://lists.sourceforge.net/lists/listinfo/tipc-discussion
[tipc-discussion] [PATCH net v1 7/7] tipc: fix node read lock imbalance for unidentified bearers
When we fail to identify a bearer in tipc_node_get_linkname(), we call node unlock without locking the node read lock. This leads to node read lock imbalance. In this commit, we skip the node read unlock for unidentified bearer. Signed-off-by: Parthasarathy Bhuvaragan <parthasarathy.bhuvara...@ericsson.com> --- net/tipc/node.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/net/tipc/node.c b/net/tipc/node.c index 4512e83652b1..e062ef0f01dd 100644 --- a/net/tipc/node.c +++ b/net/tipc/node.c @@ -1126,8 +1126,8 @@ int tipc_node_get_linkname(struct net *net, u32 bearer_id, u32 addr, strncpy(linkname, tipc_link_name(link), len); err = 0; } -exit: tipc_node_read_unlock(node); +exit: tipc_node_put(node); return err; } -- 2.1.4 -- Check out the vibrant tech community on one of the world's most engaging tech sites, Slashdot.org! http://sdm.link/slashdot ___ tipc-discussion mailing list tipc-discussion@lists.sourceforge.net https://lists.sourceforge.net/lists/listinfo/tipc-discussion
[tipc-discussion] [PATCH net v1 0/7] tipc socket layer bug fixes
This series primarily contains fixes for bugs exposed by Commit 10724cc7bb78 ("tipc: redesign connection-level flow control"). Patch#1-3: Fixes which are required for the new socket flow control to work correctly. They fix holes in flow control accounting. Patch#4-6: Fixes the legacy bugs exposed by the new flow control, as the new flow control is way stricter than the previous one based on packet count. Patch#7: Fix node read lock imbalance. Parthasarathy Bhuvaragan (7): tipc: fix socket flow control accounting error at tipc_send_stream tipc: fix socket flow control accounting error at tipc_recv_stream tipc: fix flow control accounting for implicit connect tipc: Fix missing connection request handling tipc: improve error validations for sockets in CONNECTING state tipc: close the connection if protocol messages contain errors tipc: fix node read lock imbalance for unidentified bearers net/tipc/node.c | 2 +- net/tipc/socket.c | 46 +- 2 files changed, 38 insertions(+), 10 deletions(-) -- 2.1.4 -- Check out the vibrant tech community on one of the world's most engaging tech sites, Slashdot.org! http://sdm.link/slashdot ___ tipc-discussion mailing list tipc-discussion@lists.sourceforge.net https://lists.sourceforge.net/lists/listinfo/tipc-discussion
[tipc-discussion] [PATCH net v1 6/7] tipc: close the connection if protocol messages contain errors
When a socket is shutting down, we notify the peer node about the connection termination by reusing an incoming message if possible. If the last received message was a connection acknowledgment message, we reverse this message and set the error code to TIPC_ERR_NO_PORT and send it to peer. In tipc_sk_proto_rcv(), we never check for message errors while processing the connection acknowledgment or probe messages. Thus this message performs the usual flow control accounting and leaves the session hanging. In this commit, we terminate the connection when we receive such error messages. Signed-off-by: Parthasarathy Bhuvaragan <parthasarathy.bhuvara...@ericsson.com> --- net/tipc/socket.c | 9 - 1 file changed, 8 insertions(+), 1 deletion(-) diff --git a/net/tipc/socket.c b/net/tipc/socket.c index 1f18b435f915..c8137fa5465b 100644 --- a/net/tipc/socket.c +++ b/net/tipc/socket.c @@ -73,7 +73,6 @@ enum { * @publications: list of publications for port * @blocking_link: address of the congested link we are currently sleeping on * @pub_count: total # of publications port has made during its lifetime - * @probing_state: * @conn_timeout: the time we can wait for an unresponded setup request * @dupl_rcvcnt: number of bytes counted twice, in both backlog and rcv queue * @cong_link_cnt: number of congested links @@ -866,6 +865,14 @@ static void tipc_sk_proto_rcv(struct tipc_sock *tsk, struct sk_buff *skb, if (!tsk_peer_msg(tsk, hdr)) goto exit; + if (unlikely(msg_errcode(hdr))) { + tipc_set_sk_state(sk, TIPC_DISCONNECTING); + tipc_node_remove_conn(sock_net(sk), tsk_peer_node(tsk), + tsk_peer_port(tsk)); + sk->sk_state_change(sk); + goto exit; + } + tsk->probe_unacked = false; if (mtyp == CONN_PROBE) { -- 2.1.4 -- Check out the vibrant tech community on one of the world's most engaging tech sites, Slashdot.org! http://sdm.link/slashdot ___ tipc-discussion mailing list tipc-discussion@lists.sourceforge.net https://lists.sourceforge.net/lists/listinfo/tipc-discussion
[tipc-discussion] [PATCH net v1 2/7] tipc: fix socket flow control accounting error at tipc_recv_stream
Until now in tipc_recv_stream(), we update the received unacknowledged bytes based on a stack variable and not based on the actual message size. If the user buffer passed at tipc_recv_stream() is smaller than the received skb, the size variable in stack differs from the actual message size in the skb. This leads to a flow control accounting error causing permanent congestion. In this commit, we fix this accounting error by always using the size of the incoming message. Fixes: 10724cc7bb78 ("tipc: redesign connection-level flow control") Signed-off-by: Parthasarathy Bhuvaragan <parthasarathy.bhuvara...@ericsson.com> --- net/tipc/socket.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/net/tipc/socket.c b/net/tipc/socket.c index b28e94f1c739..566906795c8c 100644 --- a/net/tipc/socket.c +++ b/net/tipc/socket.c @@ -1484,7 +1484,7 @@ static int tipc_recv_stream(struct socket *sock, struct msghdr *m, if (unlikely(flags & MSG_PEEK)) goto exit; - tsk->rcv_unacked += tsk_inc(tsk, hlen + sz); + tsk->rcv_unacked += tsk_inc(tsk, hlen + msg_data_sz(msg)); if (unlikely(tsk->rcv_unacked >= (tsk->rcv_win / 4))) tipc_sk_send_ack(tsk); tsk_advance_rx_queue(sk); -- 2.1.4 -- Check out the vibrant tech community on one of the world's most engaging tech sites, Slashdot.org! http://sdm.link/slashdot ___ tipc-discussion mailing list tipc-discussion@lists.sourceforge.net https://lists.sourceforge.net/lists/listinfo/tipc-discussion
[tipc-discussion] [PATCH net v1 3/7] tipc: fix flow control accounting for implicit connect
In the case of implicit connect message with data > 1K, the flow control accounting is incorrect. At this state, the socket does not know the peer nodes capability and falls back to legacy flow control by return 1, however the receiver of this message will perform the new block accounting. This leads to an accounting error and eventually traffic disturbance. In this commit, we perform tipc_node_get_capabilities() at implicit connect and perform accounting based on the peer's capability. Fixes: 10724cc7bb78 ("tipc: redesign connection-level flow control") Signed-off-by: Parthasarathy Bhuvaragan <parthasarathy.bhuvara...@ericsson.com> --- net/tipc/socket.c | 5 - 1 file changed, 4 insertions(+), 1 deletion(-) diff --git a/net/tipc/socket.c b/net/tipc/socket.c index 566906795c8c..2c2ea2e15c45 100644 --- a/net/tipc/socket.c +++ b/net/tipc/socket.c @@ -1054,8 +1054,11 @@ static int __tipc_sendstream(struct socket *sock, struct msghdr *m, size_t dlen) /* Handle implicit connection setup */ if (unlikely(dest)) { rc = __tipc_sendmsg(sock, m, dlen); - if (dlen && (dlen == rc)) + if (dlen && (dlen == rc)) { + dnode = dest->addr.id.node; + tsk->peer_caps = tipc_node_get_capabilities(net, dnode); tsk->snt_unacked = tsk_inc(tsk, dlen + msg_hdr_sz(hdr)); + } return rc; } -- 2.1.4 -- Check out the vibrant tech community on one of the world's most engaging tech sites, Slashdot.org! http://sdm.link/slashdot ___ tipc-discussion mailing list tipc-discussion@lists.sourceforge.net https://lists.sourceforge.net/lists/listinfo/tipc-discussion
[tipc-discussion] [PATCH net v1 5/7] tipc: improve error validations for sockets in CONNECTING state
Until now, the checks for sockets in CONNECTING state was based on the assumption that the incoming message was always from the peer's accepted data socket. However an application using a non-blocking socket sends an implicit connect, this socket which is in CONNECTING state can receive error messages from the peer's listening socket. As we discard these messages, the application socket hangs as there due to inactivity. In addition to this, there are other places where we process errors but do not notify the user. In this commit, we process such incoming error messages and notify out users about them using sk_state_change(). Signed-off-by: Parthasarathy Bhuvaragan <parthasarathy.bhuvara...@ericsson.com> --- net/tipc/socket.c | 25 ++--- 1 file changed, 22 insertions(+), 3 deletions(-) diff --git a/net/tipc/socket.c b/net/tipc/socket.c index cb8d2dd2b10c..1f18b435f915 100644 --- a/net/tipc/socket.c +++ b/net/tipc/socket.c @@ -1262,7 +1262,10 @@ static int tipc_wait_for_rcvmsg(struct socket *sock, long *timeop) struct sock *sk = sock->sk; DEFINE_WAIT(wait); long timeo = *timeop; - int err; + int err = sock_error(sk); + + if (err) + return err; for (;;) { prepare_to_wait(sk_sleep(sk), , TASK_INTERRUPTIBLE); @@ -1284,6 +1287,10 @@ static int tipc_wait_for_rcvmsg(struct socket *sock, long *timeop) err = sock_intr_errno(timeo); if (signal_pending(current)) break; + + err = sock_error(sk); + if (err) + break; } finish_wait(sk_sleep(sk), ); *timeop = timeo; @@ -1554,6 +1561,8 @@ static bool filter_connect(struct tipc_sock *tsk, struct sk_buff *skb) struct sock *sk = >sk; struct net *net = sock_net(sk); struct tipc_msg *hdr = buf_msg(skb); + u32 pport = msg_origport(hdr); + u32 pnode = msg_orignode(hdr); if (unlikely(msg_mcast(hdr))) return false; @@ -1561,18 +1570,28 @@ static bool filter_connect(struct tipc_sock *tsk, struct sk_buff *skb) switch (sk->sk_state) { case TIPC_CONNECTING: /* Accept only ACK or NACK message */ - if (unlikely(!msg_connected(hdr))) - return false; + if (unlikely(!msg_connected(hdr))) { + if (pport != tsk_peer_port(tsk) || + pnode != tsk_peer_node(tsk)) + return false; + + tipc_set_sk_state(sk, TIPC_DISCONNECTING); + sk->sk_err = ECONNREFUSED; + sk->sk_state_change(sk); + return true; + } if (unlikely(msg_errcode(hdr))) { tipc_set_sk_state(sk, TIPC_DISCONNECTING); sk->sk_err = ECONNREFUSED; + sk->sk_state_change(sk); return true; } if (unlikely(!msg_isdata(hdr))) { tipc_set_sk_state(sk, TIPC_DISCONNECTING); sk->sk_err = EINVAL; + sk->sk_state_change(sk); return true; } -- 2.1.4 -- Check out the vibrant tech community on one of the world's most engaging tech sites, Slashdot.org! http://sdm.link/slashdot ___ tipc-discussion mailing list tipc-discussion@lists.sourceforge.net https://lists.sourceforge.net/lists/listinfo/tipc-discussion
Re: [tipc-discussion] [PATCH v2 net-next 6/6] tipc: delete expired subscription
Hi Ying, Thanks for your feedback. I keep learning from your style of producing readable small patches doing one thing at a time. Hi John, Can you run your test with my series and provide some feedback? [PATCH v3 net-next 0/3] solve two deadlock issues: tipc: advance the time of calling tipc_nametbl_unsubscribe tipc: advance the time of deleting subscription from subscriber->subscrp_list tipc: adjust the policy of holding subscription kref regards partha From: Ying Xue <ying@windriver.com> Sent: Wednesday, March 15, 2017 12:33 PM To: Parthasarathy Bhuvaragan; Jon Maloy; thompa@gmail.com Cc: tipc-discussion@lists.sourceforge.net Subject: Re: [tipc-discussion] [PATCH v2 net-next 6/6] tipc: delete expired subscription Hi Partha, Thank you for the review and improvement. On 03/14/2017 01:43 AM, Parthasarathy Bhuvaragan wrote: > Hi Ying, > > I have a new patch sets which fixes this issue using fixes from your > patches. It deviates from your patch the following way: > In my solution, the subscription refcount keeps track of a subscription > with or without timer. I do not increment refcount for timer, and use > the subscriber lock plus the del_timer to find outstanding timers. I > will send the series shortly. I have carefully reviewed your solution as well as your revised patches. Your method is pretty simpler than mine. Instead the thing I image is too complex. Now I can confirm that it's unnecessary to increase subscription refcount before its timer is started, and it's absolutely safe for us now. Good work Parth! > > I applied your series and ran some tests with it. If I run test against > each patch individually, they seem to introduce new warnings/panics. I > think every patch should be as correct as possible. I tried to moved > around the patches in this series to get every patch correct, which led > me to my series above. > If we can ensure every single patch of a patchset can independently work very well, that's very good. But in many cases, it's hard to reach that goal. The most reason is that on one hand, we must have patch easily readable for reviewer, on another hand, we must keep every single work well. So it is sometimes very hard. Anyway, your revised patches are very good. If Jon or other guys have no any different opinion, please submit them to net-next as soon as possible. Of course, if possible, please let John verify them again. Thanks, Ying -- Check out the vibrant tech community on one of the world's most engaging tech sites, Slashdot.org! http://sdm.link/slashdot ___ tipc-discussion mailing list tipc-discussion@lists.sourceforge.net https://lists.sourceforge.net/lists/listinfo/tipc-discussion
Re: [tipc-discussion] [PATCH net v1 1/2] tipc: fix socket flow control errors
Please ignore this patch and 'tipc: Fix missing connection request handling'. They dont belong to this series and they are not the right version. /Partha On 03/13/2017 06:57 PM, Parthasarathy Bhuvaragan wrote: > In this commit, we fix the following two errors: > 1. In tipc_send_stream(), fix the return value during congestion >when the send is partially successful. Until now, we return -1 >instead of returning the partial sent bytes. > 2. In tipc_recv_stream(), we update the rcv_unack not based on the >message size, but on sz. Usually they are the same, but in cases >where the socket receivers buffer is smaller than the incoming >message, these two parameters differ greatly. This introduces a >slack in accounting leading to permanent congestion. In this >commit, we perform accounting always based on the incoming message. > > Signed-off-by: Parthasarathy Bhuvaragan > <parthasarathy.bhuvara...@ericsson.com> > --- > net/tipc/socket.c | 9 - > 1 file changed, 4 insertions(+), 5 deletions(-) > > diff --git a/net/tipc/socket.c b/net/tipc/socket.c > index 6b09a778cc71..79e628cd08a9 100644 > --- a/net/tipc/socket.c > +++ b/net/tipc/socket.c > @@ -1080,7 +1080,7 @@ static int __tipc_sendstream(struct socket *sock, > struct msghdr *m, size_t dlen) > } > } while (sent < dlen && !rc); > > - return rc ? rc : sent; > + return sent ? sent : rc; > } > > /** > @@ -1481,16 +1481,15 @@ static int tipc_recv_stream(struct socket *sock, > struct msghdr *m, > if (unlikely(flags & MSG_PEEK)) > goto exit; > > - tsk->rcv_unacked += tsk_inc(tsk, hlen + sz); > + tsk->rcv_unacked += tsk_inc(tsk, hlen + msg_data_sz(msg)); > if (unlikely(tsk->rcv_unacked >= (tsk->rcv_win / 4))) > tipc_sk_send_ack(tsk); > tsk_advance_rx_queue(sk); > > /* Loop around if more data is required */ > - if ((sz_copied < buf_len) &&/* didn't get all requested data */ > + if ((!err) && (sz_copied < buf_len) && > (!skb_queue_empty(>sk_receive_queue) || > - (sz_copied < target)) &&/* and more is ready or required */ > - (!err)) /* and haven't reached a FIN */ > + (sz_copied < target))) > goto restart; > > exit: > -- Check out the vibrant tech community on one of the world's most engaging tech sites, Slashdot.org! http://sdm.link/slashdot ___ tipc-discussion mailing list tipc-discussion@lists.sourceforge.net https://lists.sourceforge.net/lists/listinfo/tipc-discussion
[tipc-discussion] [PATCH net v1 1/2] tipc: fix socket flow control errors
In this commit, we fix the following two errors: 1. In tipc_send_stream(), fix the return value during congestion when the send is partially successful. Until now, we return -1 instead of returning the partial sent bytes. 2. In tipc_recv_stream(), we update the rcv_unack not based on the message size, but on sz. Usually they are the same, but in cases where the socket receivers buffer is smaller than the incoming message, these two parameters differ greatly. This introduces a slack in accounting leading to permanent congestion. In this commit, we perform accounting always based on the incoming message. Signed-off-by: Parthasarathy Bhuvaragan <parthasarathy.bhuvara...@ericsson.com> --- net/tipc/socket.c | 9 - 1 file changed, 4 insertions(+), 5 deletions(-) diff --git a/net/tipc/socket.c b/net/tipc/socket.c index 6b09a778cc71..79e628cd08a9 100644 --- a/net/tipc/socket.c +++ b/net/tipc/socket.c @@ -1080,7 +1080,7 @@ static int __tipc_sendstream(struct socket *sock, struct msghdr *m, size_t dlen) } } while (sent < dlen && !rc); - return rc ? rc : sent; + return sent ? sent : rc; } /** @@ -1481,16 +1481,15 @@ static int tipc_recv_stream(struct socket *sock, struct msghdr *m, if (unlikely(flags & MSG_PEEK)) goto exit; - tsk->rcv_unacked += tsk_inc(tsk, hlen + sz); + tsk->rcv_unacked += tsk_inc(tsk, hlen + msg_data_sz(msg)); if (unlikely(tsk->rcv_unacked >= (tsk->rcv_win / 4))) tipc_sk_send_ack(tsk); tsk_advance_rx_queue(sk); /* Loop around if more data is required */ - if ((sz_copied < buf_len) &&/* didn't get all requested data */ + if ((!err) && (sz_copied < buf_len) && (!skb_queue_empty(>sk_receive_queue) || - (sz_copied < target)) &&/* and more is ready or required */ - (!err)) /* and haven't reached a FIN */ +(sz_copied < target))) goto restart; exit: -- 2.1.4 -- Check out the vibrant tech community on one of the world's most engaging tech sites, Slashdot.org! http://sdm.link/slashdot ___ tipc-discussion mailing list tipc-discussion@lists.sourceforge.net https://lists.sourceforge.net/lists/listinfo/tipc-discussion
[tipc-discussion] [PATCH net v1 2/2] tipc: Fix missing connection request handling
In filter_connect, we use waitqueue_active() to check for any connections to wakeup. But waitqueue_active() is missing memory barriers while accessing the critical sections, leading to inconsistent results. In this commit, we replace this with an SMB safe wq_has_sleeper(). Signed-off-by: Parthasarathy Bhuvaragan <parthasarathy.bhuvara...@ericsson.com> --- net/tipc/socket.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/net/tipc/socket.c b/net/tipc/socket.c index 79e628cd08a9..ce6ed0955e36 100644 --- a/net/tipc/socket.c +++ b/net/tipc/socket.c @@ -1577,7 +1577,7 @@ static bool filter_connect(struct tipc_sock *tsk, struct sk_buff *skb) return true; /* If empty 'ACK-' message, wake up sleeping connect() */ - if (waitqueue_active(sk_sleep(sk))) + if (wq_has_sleeper(rcu_dereference(sk->sk_wq))) wake_up_interruptible(sk_sleep(sk)); /* 'ACK-' message is neither accepted nor rejected: */ -- 2.1.4 -- Check out the vibrant tech community on one of the world's most engaging tech sites, Slashdot.org! http://sdm.link/slashdot ___ tipc-discussion mailing list tipc-discussion@lists.sourceforge.net https://lists.sourceforge.net/lists/listinfo/tipc-discussion
[tipc-discussion] [PATCH v3 net-next 0/3] solve two deadlock issues
commit d094c4d5f5 ("tipc: add subscription refcount to avoid invalid delete") accidently introduce the following deadlock scenarios: CPU1: CPU2: -- tipc_nametbl_publish spin_lock_bh(>nametbl_lock) tipc_nametbl_insert_publ tipc_nameseq_insert_publ tipc_subscrp_report_overlap tipc_subscrp_get tipc_subscrp_send_event tipc_close_conn tipc_subscrb_release_cb tipc_subscrb_delete tipc_subscrp_put tipc_subscrp_put tipc_subscrp_kref_release tipc_nametbl_unsubscribe spin_lock_bh(>nametbl_lock) <> CPU1: CPU2: -- tipc_nametbl_stop spin_lock_bh(>nametbl_lock) tipc_purge_publications tipc_nameseq_remove_publ tipc_subscrp_report_overlap tipc_subscrp_get tipc_subscrp_send_event tipc_close_conn tipc_subscrb_release_cb tipc_subscrb_delete tipc_subscrp_put tipc_subscrp_put tipc_subscrp_kref_release tipc_nametbl_unsubscribe spin_lock_bh(>nametbl_lock) <> The root cause of two deadlocks is that we have to hold nametbl lock when subscription is freed in tipc_subscrp_kref_release(). In this series we simplify tipc_subscrp_kref_release(), by moving functions which acquire locks to other relevant places. Patch #1: bug fix for the above issue. Patch #2-3: improve subscription locking/refcounting. Change log: v3: In v3, the subscription refcount keeps track of a subscription with or without timer. We do not increment refcount for timer, and use the subscriber lock plus the del_timer to find outstanding timers. After this change tipc_subscrp_kref_release() is lock free. v2: As Parth's comments, subscription is still present name table after it's expired. To fix it, we introduce a workqueue, and when subscription's timer is expired, the subscription will be pushed to the workqueue through its work. When the work is scheduled, the subscription be deleted finally. Ying Xue (3): tipc: advance the time of calling tipc_nametbl_unsubscribe tipc: advance the time of deleting subscription from subscriber->subscrp_list tipc: adjust the policy of holding subscription kref net/tipc/name_table.c | 2 ++ net/tipc/subscr.c | 24 ++-- net/tipc/subscr.h | 3 +++ 3 files changed, 15 insertions(+), 14 deletions(-) -- 2.1.4 -- Check out the vibrant tech community on one of the world's most engaging tech sites, Slashdot.org! http://sdm.link/slashdot ___ tipc-discussion mailing list tipc-discussion@lists.sourceforge.net https://lists.sourceforge.net/lists/listinfo/tipc-discussion
[tipc-discussion] [PATCH v3 net-next 3/3] tipc: adjust the policy of holding subscription kref
From: Ying XueWhen a new subscription object is inserted into name_seq->subscriptions list, it's under name_seq->lock protection; when a subscription is deleted from the list, it's also under the same lock protection; similarly, when accessing a subscription by going through subscriptions list, the entire process is also protected by the name_seq->lock. Therefore, if subscription refcount is increased before it's inserted into subscriptions list, and its refcount is decreased after it's deleted from the list, it will be unnecessary to hold refcount at all before accessing subscription object which is obtained by going through subscriptions list under name_seq->lock protection. Signed-off-by: Ying Xue Reviewed-by: Jon Maloy --- net/tipc/name_table.c | 2 ++ net/tipc/subscr.c | 8 ++-- net/tipc/subscr.h | 3 +++ 3 files changed, 7 insertions(+), 6 deletions(-) diff --git a/net/tipc/name_table.c b/net/tipc/name_table.c index 9be6592e4a6f..bd0aac87b41a 100644 --- a/net/tipc/name_table.c +++ b/net/tipc/name_table.c @@ -416,6 +416,7 @@ static void tipc_nameseq_subscribe(struct name_seq *nseq, tipc_subscrp_convert_seq(>evt.s.seq, s->swap, ); + tipc_subscrp_get(s); list_add(>nameseq_list, >subscriptions); if (!sseq) @@ -787,6 +788,7 @@ void tipc_nametbl_unsubscribe(struct tipc_subscription *s) if (seq != NULL) { spin_lock_bh(>lock); list_del_init(>nameseq_list); + tipc_subscrp_put(s); if (!seq->first_free && list_empty(>subscriptions)) { hlist_del_init_rcu(>ns_list); kfree(seq->sseqs); diff --git a/net/tipc/subscr.c b/net/tipc/subscr.c index 0649bc29c6bb..0bf91cd3733c 100644 --- a/net/tipc/subscr.c +++ b/net/tipc/subscr.c @@ -54,8 +54,6 @@ struct tipc_subscriber { static void tipc_subscrp_delete(struct tipc_subscription *sub); static void tipc_subscrb_put(struct tipc_subscriber *subscriber); -static void tipc_subscrp_put(struct tipc_subscription *subscription); -static void tipc_subscrp_get(struct tipc_subscription *subscription); /** * htohl - convert value to endianness used by destination @@ -125,7 +123,6 @@ void tipc_subscrp_report_overlap(struct tipc_subscription *sub, u32 found_lower, { struct tipc_name_seq seq; - tipc_subscrp_get(sub); tipc_subscrp_convert_seq(>evt.s.seq, sub->swap, ); if (!tipc_subscrp_check_overlap(, found_lower, found_upper)) return; @@ -135,7 +132,6 @@ void tipc_subscrp_report_overlap(struct tipc_subscription *sub, u32 found_lower, tipc_subscrp_send_event(sub, found_lower, found_upper, event, port_ref, node); - tipc_subscrp_put(sub); } static void tipc_subscrp_timeout(unsigned long data) @@ -183,12 +179,12 @@ static void tipc_subscrp_kref_release(struct kref *kref) tipc_subscrb_put(subscriber); } -static void tipc_subscrp_put(struct tipc_subscription *subscription) +void tipc_subscrp_put(struct tipc_subscription *subscription) { kref_put(>kref, tipc_subscrp_kref_release); } -static void tipc_subscrp_get(struct tipc_subscription *subscription) +void tipc_subscrp_get(struct tipc_subscription *subscription) { kref_get(>kref); } diff --git a/net/tipc/subscr.h b/net/tipc/subscr.h index ffdc214c117a..ee52957dc952 100644 --- a/net/tipc/subscr.h +++ b/net/tipc/subscr.h @@ -78,4 +78,7 @@ u32 tipc_subscrp_convert_seq_type(u32 type, int swap); int tipc_topsrv_start(struct net *net); void tipc_topsrv_stop(struct net *net); +void tipc_subscrp_put(struct tipc_subscription *subscription); +void tipc_subscrp_get(struct tipc_subscription *subscription); + #endif -- 2.1.4 -- Check out the vibrant tech community on one of the world's most engaging tech sites, Slashdot.org! http://sdm.link/slashdot ___ tipc-discussion mailing list tipc-discussion@lists.sourceforge.net https://lists.sourceforge.net/lists/listinfo/tipc-discussion
[tipc-discussion] [PATCH v3 net-next 2/3] tipc: advance the time of deleting subscription from subscriber->subscrp_list
From: Ying Xue <ying@windriver.com> After a subscription object is created, it's inserted into its subscriber subscrp_list list under subscriber lock protection, similarly, before it's destroyed, it should be first removed from its subscriber->subscrp_list. Since the subscription list is accessed with subscriber lock, all the subscriptions are valid during the lock duration. Hence in tipc_subscrb_subscrp_delete(), we remove subscription get/put and the extra subscriber unlock/lock. After this change, the subscriptions refcount cleanup is very simple and does not access any lock. Signed-off-by: Ying Xue <ying@windriver.com> Signed-off-by: Parthasarathy Bhuvaragan <parthasarathy.bhuvara...@ericsson.com> --- v2: Remove the subscrp_list at tipc_subscrp_timeout() also. This prevents any subscription cancel or subscriber delete thread running in parallel seeing the timeout out subscription. --- net/tipc/subscr.c | 9 ++--- 1 file changed, 2 insertions(+), 7 deletions(-) diff --git a/net/tipc/subscr.c b/net/tipc/subscr.c index 271cd66e4b3b..0649bc29c6bb 100644 --- a/net/tipc/subscr.c +++ b/net/tipc/subscr.c @@ -145,6 +145,7 @@ static void tipc_subscrp_timeout(unsigned long data) spin_lock_bh(>lock); tipc_nametbl_unsubscribe(sub); + list_del(>subscrp_list); spin_unlock_bh(>lock); /* Notify subscriber of timeout */ @@ -177,10 +178,7 @@ static void tipc_subscrp_kref_release(struct kref *kref) struct tipc_net *tn = net_generic(sub->net, tipc_net_id); struct tipc_subscriber *subscriber = sub->subscriber; - spin_lock_bh(>lock); - list_del(>subscrp_list); atomic_dec(>subscription_count); - spin_unlock_bh(>lock); kfree(sub); tipc_subscrb_put(subscriber); } @@ -210,11 +208,8 @@ static void tipc_subscrb_subscrp_delete(struct tipc_subscriber *subscriber, continue; tipc_nametbl_unsubscribe(sub); - tipc_subscrp_get(sub); - spin_unlock_bh(>lock); + list_del(>subscrp_list); tipc_subscrp_delete(sub); - tipc_subscrp_put(sub); - spin_lock_bh(>lock); if (s) break; -- 2.1.4 -- Check out the vibrant tech community on one of the world's most engaging tech sites, Slashdot.org! http://sdm.link/slashdot ___ tipc-discussion mailing list tipc-discussion@lists.sourceforge.net https://lists.sourceforge.net/lists/listinfo/tipc-discussion
[tipc-discussion] [PATCH v3 net-next 1/3] tipc: advance the time of calling tipc_nametbl_unsubscribe
From: Ying Xue <ying@windriver.com> Until now, tipc_nametbl_unsubscribe() is called at subscriptions reference count cleanup. Usually the subscriptions cleanup is called at subscription timeout or at subscription cancel or at subscriber delete. We have ignored the possibility of this being called from other locations, which causes deadlock as we try to grab the tn->nametbl_lock while holding it already. CPU1: CPU2: -- tipc_nametbl_publish spin_lock_bh(>nametbl_lock) tipc_nametbl_insert_publ tipc_nameseq_insert_publ tipc_subscrp_report_overlap tipc_subscrp_get tipc_subscrp_send_event tipc_close_conn tipc_subscrb_release_cb tipc_subscrb_delete tipc_subscrp_put tipc_subscrp_put tipc_subscrp_kref_release tipc_nametbl_unsubscribe spin_lock_bh(>nametbl_lock) <> CPU1: CPU2: -- tipc_nametbl_stop spin_lock_bh(>nametbl_lock) tipc_purge_publications tipc_nameseq_remove_publ tipc_subscrp_report_overlap tipc_subscrp_get tipc_subscrp_send_event tipc_close_conn tipc_subscrb_release_cb tipc_subscrb_delete tipc_subscrp_put tipc_subscrp_put tipc_subscrp_kref_release tipc_nametbl_unsubscribe spin_lock_bh(>nametbl_lock) <> In this commit, we advance the calling of tipc_nametbl_unsubscribe() from the refcount cleanup to the intended callers. Fixes: d094c4d5f5c7 ("tipc: add subscription refcount to avoid invalid delete") Reported-by: John Thompson <thompa@gmail.com> Reported-by: Jon Maloy <jon.ma...@ericsson.com> Signed-off-by: Ying Xue <ying....@windriver.com> Signed-off-by: Parthasarathy Bhuvaragan <parthasarathy.bhuvara...@ericsson.com> --- v2: Call tipc_nametbl_unsubscribe() at tipc_subscrp_timeout() too. This should fix the deadlock and also ensure that applications will not receive any new notifications after timeout event. --- net/tipc/subscr.c | 7 ++- 1 file changed, 6 insertions(+), 1 deletion(-) diff --git a/net/tipc/subscr.c b/net/tipc/subscr.c index 9d94e65d0894..271cd66e4b3b 100644 --- a/net/tipc/subscr.c +++ b/net/tipc/subscr.c @@ -141,6 +141,11 @@ void tipc_subscrp_report_overlap(struct tipc_subscription *sub, u32 found_lower, static void tipc_subscrp_timeout(unsigned long data) { struct tipc_subscription *sub = (struct tipc_subscription *)data; + struct tipc_subscriber *subscriber = sub->subscriber; + + spin_lock_bh(>lock); + tipc_nametbl_unsubscribe(sub); + spin_unlock_bh(>lock); /* Notify subscriber of timeout */ tipc_subscrp_send_event(sub, sub->evt.s.seq.lower, sub->evt.s.seq.upper, @@ -173,7 +178,6 @@ static void tipc_subscrp_kref_release(struct kref *kref) struct tipc_subscriber *subscriber = sub->subscriber; spin_lock_bh(>lock); - tipc_nametbl_unsubscribe(sub); list_del(>subscrp_list); atomic_dec(>subscription_count); spin_unlock_bh(>lock); @@ -205,6 +209,7 @@ static void tipc_subscrb_subscrp_delete(struct tipc_subscriber *subscriber, if (s && memcmp(s, >evt.s, sizeof(struct tipc_subscr))) continue; + tipc_nametbl_unsubscribe(sub); tipc_subscrp_get(sub); spin_unlock_bh(>lock); tipc_subscrp_delete(sub); -- 2.1.4 -- Check out the vibrant tech community on one of the world's most engaging tech sites, Slashdot.org! http://sdm.link/slashdot ___ tipc-discussion mailing list tipc-discussion@lists.sourceforge.net https://lists.sourceforge.net/lists/listinfo/tipc-discussion
Re: [tipc-discussion] [PATCH v2 net-next 6/6] tipc: delete expired subscription
Hi Ying, I have a new patch sets which fixes this issue using fixes from your patches. It deviates from your patch the following way: In my solution, the subscription refcount keeps track of a subscription with or without timer. I do not increment refcount for timer, and use the subscriber lock plus the del_timer to find outstanding timers. I will send the series shortly. I applied your series and ran some tests with it. If I run test against each patch individually, they seem to introduce new warnings/panics. I think every patch should be as correct as possible. I tried to moved around the patches in this series to get every patch correct, which led me to my series above. 1. tipc: advance the time of deleting subscription from subscriber->subscrp_list I get several warnings like: WARNING: CPU: 15 PID: 356 at lib/refcount.c:128 refcount_sub_and_test+0x5e/0x70 refcount_t: underflow; use-after-free. Modules linked in: tipc ip6_udp_tunnel udp_tunnel 9pnet_virtio 9p 9pnet virtio_net virtio_pci virtio_ring virtio CPU: 15 PID: 356 Comm: topsrv_tester Tainted: G W4.10.0+ #111 Hardware name: Ericsson AB CXC1060320/ROJ208840/5, BIOS 4.6.5 11/07/2013 Call Trace: dump_stack+0x4d/0x72 __warn+0xd1/0xf0 warn_slowpath_fmt+0x4f/0x60 ? conn_put+0x12/0x30 [tipc] refcount_sub_and_test+0x5e/0x70 refcount_dec_and_test+0x11/0x20 tipc_subscrp_put+0x12/0x30 [tipc] tipc_subscrp_report_overlap+0xa7/0xc0 [tipc] tipc_nameseq_remove_publ+0x179/0x210 [tipc] tipc_nametbl_remove_publ+0x59/0xf0 [tipc] tipc_nametbl_withdraw+0x6c/0x130 [tipc] tipc_sk_withdraw+0xc0/0x100 [tipc] tipc_release+0x56/0x290 [tipc] sock_release+0x1f/0x80 sock_close+0x12/0x20 __fput+0xbe/0x200 fput+0xe/0x10 task_work_run+0x7e/0xb0 do_exit+0x3fd/0xc10 ? __do_page_fault+0x27a/0x500 do_group_exit+0x43/0xc0 SyS_exit_group+0x14/0x20 entry_SYSCALL_64_fastpath+0x13/0x94 RIP: 0033:0x7fcc289d22e9 RSP: 002b:7ffc804f0ef8 EFLAGS: 0246 ORIG_RAX: 00e7 RAX: ffda RBX: RCX: 7fcc289d22e9 RDX: RSI: 7fcc28cbe323 RDI: RBP: 7fcc28cb9860 R08: 003c R09: 00e7 R10: ff90 R11: 0246 R12: 7fcc28cb9860 R13: 7fcc28cbec60 R14: R15: And, the one below as you forgot to delete the subscription list at subscription timeout. general protection fault: [#1] SMP Modules linked in: tipc ip6_udp_tunnel udp_tunnel 9pnet_virtio 9p 9pnet virtio_net virtio_pci virtio_ring virtio CPU: 17 PID: 222 Comm: kworker/u40:2 Tainted: GW 4.10.0+ #111 Hardware name: Ericsson AB CXC1060320/ROJ208840/5, BIOS 4.6.5 11/07/2013 Workqueue: tipc_send tipc_send_work [tipc] task: 880c444eef00 task.stack: c90007734000 RIP: 0010:tipc_subscrb_subscrp_delete+0x6c/0x110 [tipc] RSP: 0018:c90007737d48 EFLAGS: 00010246 RAX: dead0200 RBX: 880c3cebd480 RCX: 0101 RDX: dead0100 RSI: 0001 RDI: 880c3cebd480 RBP: c90007737d70 R08: 0001 R09: R10: 0011 R11: R12: dead00a8 R13: R14: 880c44638088 R15: 880c446380a0 FS: () GS:880c7f44() knlGS: CS: 0010 DS: ES: CR0: 80050033 CR2: 0084f0c0 CR3: 01c0b000 CR4: 001406e0 Call Trace: tipc_subscrb_release_cb+0x17/0x30 [tipc] tipc_close_conn+0x80/0x90 [tipc] tipc_send_work+0x1a7/0x1b0 [tipc] process_one_work+0x145/0x410 worker_thread+0x69/0x4c0 kthread+0x128/0x160 ? process_one_work+0x410/0x410 ? kthread_create_on_node+0x40/0x40 ret_from_fork+0x29/0x40 Code: 49 89 c4 4d 85 ed 74 18 48 8d b3 80 00 00 00 ba 1c 00 00 00 4c 89 ef e8 e3 51 2e e1 85 c0 75 76 48 8b 53 58 48 8b 43 60 48 89 df <48> 89 42 08 48 89 10 48 b8 00 01 00 00 00 00 ad de 48 89 43 58 RIP: tipc_subscrb_subscrp_delete+0x6c/0x110 [tipc] RSP: c90007737d48 2. tipc: adjust policy that sub->timer holds subscription kref My testcases fail, i think the waitq should fix it. So these commits should be squashed together. - create max_subscriptions for a non existent subscriber - wait for subscription timeout - create again the same max subscription and cancel them. They fail as: [16761.924321] Subscription rejected, limit reached (65535) [16761.929661] Subscription rejected, limit reached (65535) [16761.934966] Subscription rejected, limit reached (65535) [16761.940281] Subscription rejected, limit reached (65535) And with one of the your patches (the last one "tipc: delete expired subscription" which fixed it), I got: general protection fault: [#1] SMP Modules linked in: tipc(-) ip6_udp_tunnel udp_tunnel 9pnet_virtio 9p 9pnet virtio_net virtio_pci virtio_ring virtio [last unloaded: ip6_udp_tunnel] CPU: 15 PID: 8105 Comm: modprobe Tainted: GW 4.10.0+ #111 Hardware name: Ericsson AB
Re: [tipc-discussion] TIPC Oops in tipc_sk_recv
On 02/27/2017 04:07 PM, Butler, Peter wrote: > Partha et al, > > Thanks for the heads-up. Also note that our user-space applications were > built against headers (i.e. /usr/include/...) that came from the 4.4.0 > kernel. Are there any backward-compatibility issues running these binaries > against the newly built 4.9.11-based TIPC kernel module? It is my > understanding that kernel code is meant to be backward-compatible in > principle... > The kernel ABI (/usr/include) is always backward compatible. If you see issues with it, then they are bugs. /Partha > Peter > > > > -Original Message- > From: Parthasarathy Bhuvaragan [mailto:parthasarathy.bhuvara...@ericsson.com] > Sent: February-27-17 7:37 AM > To: Butler, Peter <pbut...@sonusnet.com> > Cc: Jon Maloy <jon.ma...@ericsson.com>; tipc-discussion@lists.sourceforge.net > Subject: Re: TIPC Oops in tipc_sk_recv > > On 02/24/2017 04:15 PM, Butler, Peter wrote: >> Hi Partha, >> >> >> >> In our situation we do not need to support the delivery of RPMs in any >> way. Literally the only thing we are changing on the target systems >> is the tipc.ko file. That is, the original 4.4.0 kernel and all other >> 4.4.0-specific kernel modules will be left untouched. >> >> >> >> I am doing this actually removing the include/uapi/linux/tipc* and >> net/tipc/* files from within our 4.4.0 kernel source tree, and >> replacing them with the files from kernel 4.9.11. (Note that kernel >> 4.9.11 actually has a couple more TIPC-related files than the 4.4.0 >> kernel.) To accomplish I had to make a few changes (as per the email >> thread between Jon and myself) to get it to compile. >> >> >> >> Then, when I kick off a 'make' (no 'make clean' is performed) at the >> top level of the kernel source tree the build process detects that >> everything TIPC-related requires building, and a new tipc.ko is >> generated. This tipc.ko is literally taken and installed onto the >> existing 4.4.0 systems without any other changes (e.g. no new bzImage >> is installed - the original kernel file is left untouched). >> >> >> >> We're not concerned about maintainability for now, as we plan on doing >> a full upgrade of the entire kernel at some point in the next few months. >> The hybrid of a 4.4.0 kernel running a TIPC source from 4.9.11 is only >> a stop-gap measure for an emergency fix needed asap. >> >> >> >> If you can foresee any issues with our short-term plan here let me >> know. As it stands I have the module built and running - but that of >> course doesn't mean that run-time issues won't occur. >> > Hi Peter, > > If you are taking the latest tipc, please patch yours with these two fixes > for the socket. > [PATCH net v1 1/2] tipc: fix socket flow control errors [PATCH net v1 2/2] > tipc: Fix missing connection request handling > > A word of caution: My users still face stability issues (connection are > permanently congested) while running load over several connections > (~1000+) and i am yet to find the root cause. > > /Partha >> >> >> /Peter >> >> >> >> *From:*Parthasarathy Bhuvaragan >> [mailto:parthasarathy.bhuvara...@ericsson.com] >> *Sent:* February-24-17 5:21 AM >> *To:* Butler, Peter <pbut...@sonusnet.com> >> *Cc:* Jon Maloy <jon.ma...@ericsson.com>; >> tipc-discussion@lists.sourceforge.net >> *Subject:* Re: TIPC Oops in tipc_sk_recv >> >> >> >> Hi Peter, >> >> >> >> The backporting strategy varies depending on: >> >> 1. Supporting upgrades of rpm's. Ex: can you deliver a new tipc rpm >> and update it on an existing kernel. >> >> 2. Delivering / Upgrading the entire kernel. No individual rpm updates >> are delivered. >> >> >> >> If its option 2, then you may be allowed to update tipc ABI i.e >> include the commits which touch include/uapi/linux/tipc*. >> >> I have to support option 1, so I cannot include any commit which >> touches files outside net/tipc/ without manual intervention. >> >> >> >> The way I do it is to using git and walk all the commits from a >> specific version upto say v4.9 and follow these rules: >> >> 1. Skip commits which are not tipc specific, i.e its introduced as a >> part of core net cleanup. They usually break the ABI. >> >> 2. If you skip a commit, the subsequent commit needs to be amended to >> apply cleanly. >> >> 3. When cherry-picking commits, use option "-x"
Re: [tipc-discussion] TIPC Oops in tipc_sk_recv
On 02/24/2017 04:15 PM, Butler, Peter wrote: > Hi Partha, > > > > In our situation we do not need to support the delivery of RPMs in any > way. Literally the only thing we are changing on the target systems is > the tipc.ko file. That is, the original 4.4.0 kernel and all other > 4.4.0-specific kernel modules will be left untouched. > > > > I am doing this actually removing the include/uapi/linux/tipc* and > net/tipc/* files from within our 4.4.0 kernel source tree, and replacing > them with the files from kernel 4.9.11. (Note that kernel 4.9.11 > actually has a couple more TIPC-related files than the 4.4.0 kernel.) > To accomplish I had to make a few changes (as per the email thread > between Jon and myself) to get it to compile. > > > > Then, when I kick off a ‘make’ (no ‘make clean’ is performed) at the top > level of the kernel source tree the build process detects that > everything TIPC-related requires building, and a new tipc.ko is > generated. This tipc.ko is literally taken and installed onto the > existing 4.4.0 systems without any other changes (e.g. no new bzImage is > installed – the original kernel file is left untouched). > > > > We’re not concerned about maintainability for now, as we plan on doing a > full upgrade of the entire kernel at some point in the next few months. > The hybrid of a 4.4.0 kernel running a TIPC source from 4.9.11 is only a > stop-gap measure for an emergency fix needed asap. > > > > If you can foresee any issues with our short-term plan here let me > know. As it stands I have the module built and running – but that of > course doesn’t mean that run-time issues won’t occur. > Hi Peter, If you are taking the latest tipc, please patch yours with these two fixes for the socket. [PATCH net v1 1/2] tipc: fix socket flow control errors [PATCH net v1 2/2] tipc: Fix missing connection request handling A word of caution: My users still face stability issues (connection are permanently congested) while running load over several connections (~1000+) and i am yet to find the root cause. /Partha > > > /Peter > > > > *From:*Parthasarathy Bhuvaragan > [mailto:parthasarathy.bhuvara...@ericsson.com] > *Sent:* February-24-17 5:21 AM > *To:* Butler, Peter <pbut...@sonusnet.com> > *Cc:* Jon Maloy <jon.ma...@ericsson.com>; > tipc-discussion@lists.sourceforge.net > *Subject:* Re: TIPC Oops in tipc_sk_recv > > > > Hi Peter, > > > > The backporting strategy varies depending on: > > 1. Supporting upgrades of rpm's. Ex: can you deliver a new tipc rpm and > update it on an existing kernel. > > 2. Delivering / Upgrading the entire kernel. No individual rpm updates > are delivered. > > > > If its option 2, then you may be allowed to update tipc ABI i.e include > the commits which touch include/uapi/linux/tipc*. > > I have to support option 1, so I cannot include any commit which touches > files outside net/tipc/ without manual intervention. > > > > The way I do it is to using git and walk all the commits from a specific > version upto say v4.9 and follow these rules: > > 1. Skip commits which are not tipc specific, i.e its introduced as a > part of core net cleanup. They usually break the ABI. > > 2. If you skip a commit, the subsequent commit needs to be amended to > apply cleanly. > > 3. When cherry-picking commits, use option "-x" to record the upstream > commit id. This way you can do git-blame and find out the history. > > This is a slow process, but you will be sure of the commits you pick and > its history. > > > > If you copy the tipc source from a later kernel to say v4.4.x, then you > loose the history. This will hinder maintainability in the long run. > > > > /Partha > > > > *From:*Butler, Peter <pbut...@sonusnet.com <mailto:pbut...@sonusnet.com>> > *Sent:* Thursday, February 23, 2017 9:29 PM > *To:* Jon Maloy; tipc-discussion@lists.sourceforge.net > <mailto:tipc-discussion@lists.sourceforge.net>; Parthasarathy Bhuvaragan > *Cc:* Butler, Peter > *Subject:* RE: TIPC Oops in tipc_sk_recv > > > > I have made the following final change: this change works around the > different function signature for udp_tunnel6_xmit_skb() in udp_media.c > (function is defined in net/ipv6/ip6_udp_tunnel.c): > > Change: > err = udp_tunnel6_xmit_skb(ndst, ub->ubsock->sk, skb, > ndst->dev, >ipv6, > >ipv6, 0, ttl, 0, src->port, > dst->port, false); > > To be: > err = udp_tunnel6_xmit_skb(ndst, ub->ubsock->sk, skb, >
[tipc-discussion] [PATCH net v1 2/2] tipc: Fix missing connection request handling
In filter_connect, we use waitqueue_active() to check for any connections to wakeup. But waitqueue_active() is missing memory barriers while accessing the critical sections, leading to inconsistent results. In this commit, we replace this with an SMB safe wq_has_sleeper(). Signed-off-by: Parthasarathy Bhuvaragan <parthasarathy.bhuvara...@ericsson.com> --- net/tipc/socket.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/net/tipc/socket.c b/net/tipc/socket.c index 79e628cd08a9..ce6ed0955e36 100644 --- a/net/tipc/socket.c +++ b/net/tipc/socket.c @@ -1577,7 +1577,7 @@ static bool filter_connect(struct tipc_sock *tsk, struct sk_buff *skb) return true; /* If empty 'ACK-' message, wake up sleeping connect() */ - if (waitqueue_active(sk_sleep(sk))) + if (wq_has_sleeper(rcu_dereference(sk->sk_wq))) wake_up_interruptible(sk_sleep(sk)); /* 'ACK-' message is neither accepted nor rejected: */ -- 2.1.4 -- Check out the vibrant tech community on one of the world's most engaging tech sites, SlashDot.org! http://sdm.link/slashdot ___ tipc-discussion mailing list tipc-discussion@lists.sourceforge.net https://lists.sourceforge.net/lists/listinfo/tipc-discussion
[tipc-discussion] [PATCH net v1 1/2] tipc: fix socket flow control errors
In this commit, we fix the following two errors: 1. In tipc_send_stream(), fix the return value during congestion when the send is partially successful. Until now, we return -1 instead of returning the partial sent bytes. 2. In tipc_recv_stream(), we update the rcv_unack not based on the message size, but on sz. Usually they are the same, but in cases where the socket receivers buffer is smaller than the incoming message, these two parameters differ greatly. This introduces a slack in accounting leading to permanent congestion. In this commit, we perform accounting always based on the incoming message. Signed-off-by: Parthasarathy Bhuvaragan <parthasarathy.bhuvara...@ericsson.com> --- net/tipc/socket.c | 9 - 1 file changed, 4 insertions(+), 5 deletions(-) diff --git a/net/tipc/socket.c b/net/tipc/socket.c index 6b09a778cc71..79e628cd08a9 100644 --- a/net/tipc/socket.c +++ b/net/tipc/socket.c @@ -1080,7 +1080,7 @@ static int __tipc_sendstream(struct socket *sock, struct msghdr *m, size_t dlen) } } while (sent < dlen && !rc); - return rc ? rc : sent; + return sent ? sent : rc; } /** @@ -1481,16 +1481,15 @@ static int tipc_recv_stream(struct socket *sock, struct msghdr *m, if (unlikely(flags & MSG_PEEK)) goto exit; - tsk->rcv_unacked += tsk_inc(tsk, hlen + sz); + tsk->rcv_unacked += tsk_inc(tsk, hlen + msg_data_sz(msg)); if (unlikely(tsk->rcv_unacked >= (tsk->rcv_win / 4))) tipc_sk_send_ack(tsk); tsk_advance_rx_queue(sk); /* Loop around if more data is required */ - if ((sz_copied < buf_len) &&/* didn't get all requested data */ + if ((!err) && (sz_copied < buf_len) && (!skb_queue_empty(>sk_receive_queue) || - (sz_copied < target)) &&/* and more is ready or required */ - (!err)) /* and haven't reached a FIN */ +(sz_copied < target))) goto restart; exit: -- 2.1.4 -- Check out the vibrant tech community on one of the world's most engaging tech sites, SlashDot.org! http://sdm.link/slashdot ___ tipc-discussion mailing list tipc-discussion@lists.sourceforge.net https://lists.sourceforge.net/lists/listinfo/tipc-discussion
Re: [tipc-discussion] TIPC Oops in tipc_sk_recv
Hi Peter, The backporting strategy varies depending on: 1. Supporting upgrades of rpm's. Ex: can you deliver a new tipc rpm and update it on an existing kernel. 2. Delivering / Upgrading the entire kernel. No individual rpm updates are delivered. If its option 2, then you may be allowed to update tipc ABI i.e include the commits which touch include/uapi/linux/tipc*. I have to support option 1, so I cannot include any commit which touches files outside net/tipc/ without manual intervention. The way I do it is to using git and walk all the commits from a specific version upto say v4.9 and follow these rules: 1. Skip commits which are not tipc specific, i.e its introduced as a part of core net cleanup. They usually break the ABI. 2. If you skip a commit, the subsequent commit needs to be amended to apply cleanly. 3. When cherry-picking commits, use option "-x" to record the upstream commit id. This way you can do git-blame and find out the history. This is a slow process, but you will be sure of the commits you pick and its history. If you copy the tipc source from a later kernel to say v4.4.x, then you loose the history. This will hinder maintainability in the long run. /Partha From: Butler, Peter <pbut...@sonusnet.com> Sent: Thursday, February 23, 2017 9:29 PM To: Jon Maloy; tipc-discussion@lists.sourceforge.net; Parthasarathy Bhuvaragan Cc: Butler, Peter Subject: RE: TIPC Oops in tipc_sk_recv I have made the following final change: this change works around the different function signature for udp_tunnel6_xmit_skb() in udp_media.c (function is defined in net/ipv6/ip6_udp_tunnel.c): Change: err = udp_tunnel6_xmit_skb(ndst, ub->ubsock->sk, skb, ndst->dev, >ipv6, >ipv6, 0, ttl, 0, src->port, dst->port, false); To be: err = udp_tunnel6_xmit_skb(ndst, ub->ubsock->sk, skb, ndst->dev, >ipv6, >ipv6, 0, ttl, src->port, dst->port, false); That is, simply remove the '0' parameter (which comes immediately after the ttl parameter). In 4.9.11 this is a variable called 'label' and is being passed as '0', while in 4.4.0 it appears to be explicitly set to 0 directly within the udp_tunnel6_xmit_skb() function anyway. With that last change in effect, everything now compiles. (I have not tested anything, mind you.) Note that I did not come across any errors regarding the iov handling in msg_build() that you mentioned. Were you expecting compilation to fail there? Or were you expecting it to succeed, but the resulting TIPC functionality to simply be erroneous at run-time? Peter -Original Message- From: Butler, Peter Sent: February-23-17 2:48 PM To: Jon Maloy <jon.ma...@ericsson.com>; tipc-discussion@lists.sourceforge.net; Parthasarathy Bhuvaragan <parthasarathy.bhuvara...@ericsson.com> Cc: Butler, Peter <pbut...@sonusnet.com> Subject: RE: TIPC Oops in tipc_sk_recv I have made the following change so as to work around the missing skwq_has_sleeper() function in our 4.4.0 kernel source stream (as required for the 4.9.11 TIPC source). This change was based on a comparison of 4.4.0 and 4.9.11 kernel code (include/net/sock.h and include/linux/wait.h). Change: if (skwq_has_sleeper(wq)) To be: if (wq && wq_has_sleeper(wq)) Let me know if that seems reasonable to you. With this change in effect, my compilation now proceeds further (see below). As always, any insight is much appreciated. CHK include/config/kernel.release CHK include/generated/uapi/linux/version.h CHK include/generated/utsrelease.h CHK include/generated/bounds.h CHK include/generated/timeconst.h CHK include/generated/asm-offsets.h CALLscripts/checksyscalls.sh LD net/tipc/built-in.o CC [M] net/tipc/addr.o CC [M] net/tipc/bcast.o CC [M] net/tipc/bearer.o CC [M] net/tipc/core.o CC [M] net/tipc/link.o CC [M] net/tipc/discover.o CC [M] net/tipc/msg.o CC [M] net/tipc/name_distr.o CC [M] net/tipc/subscr.o CC [M] net/tipc/monitor.o CC [M] net/tipc/name_table.o CC [M] net/tipc/net.o CC [M] net/tipc/netlink.o CC [M] net/tipc/netlink_compat.o CC [M] net/tipc/node.o CC [M] net/tipc/socket.o CC [M] net/tipc/eth_media.o CC [M] net/tipc/server.o CC [M] net/tipc/udp_media.o net/tipc/udp_media.c: In function 'tipc_udp_xmit': net/tipc/udp_media.c:199:9: error: too many arguments to function 'udp_tunnel6_xmit_skb' include/net/udp_tunnel.h:87:5: note: declared here make[1]: *** [net/tipc/udp_media.o] Error 1 make: *** [net/tipc/] Error 2 -Original Message- From: Butler, Peter Sent: February-23-17 2:14 PM To: Jon Maloy <jon.ma...@ericsson.com>; tipc-discussion@lists.sourceforge.net; Parthasarathy Bhuvaragan <parthasarathy.bhuvara...@ericsson.com>
[tipc-discussion] [PATCH net-next v3 1/2] tipc: add support for stream socketpairs
From: Erik Hugnesockets A and B are connected back-to-back, similar to what AF_UNIX does. Signed-off-by: Erik Hugne --- net/tipc/socket.c | 12 +++- 1 file changed, 11 insertions(+), 1 deletion(-) diff --git a/net/tipc/socket.c b/net/tipc/socket.c index 103d1fd058c0..eafc9569e679 100644 --- a/net/tipc/socket.c +++ b/net/tipc/socket.c @@ -2499,6 +2499,16 @@ static int tipc_ioctl(struct socket *sock, unsigned int cmd, unsigned long arg) } } +static int tipc_socketpair(struct socket *sock1, struct socket *sock2) +{ + struct tipc_sock *tsk2 = tipc_sk(sock2->sk); + struct tipc_sock *tsk1 = tipc_sk(sock1->sk); + + tipc_sk_finish_conn(tsk1, tsk2->portid, 0); + tipc_sk_finish_conn(tsk2, tsk1->portid, 0); + return 0; +} + /* Protocol switches for the various types of TIPC sockets */ static const struct proto_ops msg_ops = { @@ -2528,7 +2538,7 @@ static const struct proto_ops packet_ops = { .release= tipc_release, .bind = tipc_bind, .connect= tipc_connect, - .socketpair = sock_no_socketpair, + .socketpair = tipc_socketpair, .accept = tipc_accept, .getname= tipc_getname, .poll = tipc_poll, -- 2.1.4 -- Check out the vibrant tech community on one of the world's most engaging tech sites, SlashDot.org! http://sdm.link/slashdot ___ tipc-discussion mailing list tipc-discussion@lists.sourceforge.net https://lists.sourceforge.net/lists/listinfo/tipc-discussion
[tipc-discussion] [PATCH 2/2] tipc: allow rdm/dgram socketpairs
From: Erik Hugne <erik.hu...@gmail.com> for socketpairs using connectionless transport, we cache the respective node local TIPC portid to use in subsequent calls to send() in the socket's private data. Signed-off-by: Erik Hugne <erik.hu...@gmail.com> Signed-off-by: Parthasarathy Bhuvaragan <parthasarathy.bhuvara...@ericsson.com> --- v3: Specify own_addr() as destination in tipc_sk_finish_conn(). v2: node is set to own_addr() instead of 0. Signed-off-by: Parthasarathy Bhuvaragan <parthasarathy.bhuvara...@ericsson.com> --- net/tipc/socket.c | 22 +- 1 file changed, 17 insertions(+), 5 deletions(-) diff --git a/net/tipc/socket.c b/net/tipc/socket.c index eafc9569e679..b86bd62c4415 100644 --- a/net/tipc/socket.c +++ b/net/tipc/socket.c @@ -2503,9 +2503,21 @@ static int tipc_socketpair(struct socket *sock1, struct socket *sock2) { struct tipc_sock *tsk2 = tipc_sk(sock2->sk); struct tipc_sock *tsk1 = tipc_sk(sock1->sk); - - tipc_sk_finish_conn(tsk1, tsk2->portid, 0); - tipc_sk_finish_conn(tsk2, tsk1->portid, 0); + u32 onode = tipc_own_addr(sock_net(sock1->sk)); + + tsk1->peer.family = AF_TIPC; + tsk1->peer.addrtype = TIPC_ADDR_ID; + tsk1->peer.scope = TIPC_NODE_SCOPE; + tsk1->peer.addr.id.ref = tsk2->portid; + tsk1->peer.addr.id.node = onode; + tsk2->peer.family = AF_TIPC; + tsk2->peer.addrtype = TIPC_ADDR_ID; + tsk2->peer.scope = TIPC_NODE_SCOPE; + tsk2->peer.addr.id.ref = tsk1->portid; + tsk2->peer.addr.id.node = onode; + + tipc_sk_finish_conn(tsk1, tsk2->portid, onode); + tipc_sk_finish_conn(tsk2, tsk1->portid, onode); return 0; } @@ -2517,7 +2529,7 @@ static const struct proto_ops msg_ops = { .release= tipc_release, .bind = tipc_bind, .connect= tipc_connect, - .socketpair = sock_no_socketpair, + .socketpair = tipc_socketpair, .accept = sock_no_accept, .getname= tipc_getname, .poll = tipc_poll, @@ -2559,7 +2571,7 @@ static const struct proto_ops stream_ops = { .release= tipc_release, .bind = tipc_bind, .connect= tipc_connect, - .socketpair = sock_no_socketpair, + .socketpair = tipc_socketpair, .accept = tipc_accept, .getname= tipc_getname, .poll = tipc_poll, -- 2.1.4 -- Check out the vibrant tech community on one of the world's most engaging tech sites, SlashDot.org! http://sdm.link/slashdot ___ tipc-discussion mailing list tipc-discussion@lists.sourceforge.net https://lists.sourceforge.net/lists/listinfo/tipc-discussion
[tipc-discussion] [PATCH 1/2] tipc: add support for stream socketpairs
From: Erik Hugnesockets A and B are connected back-to-back, similar to what AF_UNIX does. Signed-off-by: Erik Hugne --- net/tipc/socket.c | 12 +++- 1 file changed, 11 insertions(+), 1 deletion(-) diff --git a/net/tipc/socket.c b/net/tipc/socket.c index 103d1fd058c0..eafc9569e679 100644 --- a/net/tipc/socket.c +++ b/net/tipc/socket.c @@ -2499,6 +2499,16 @@ static int tipc_ioctl(struct socket *sock, unsigned int cmd, unsigned long arg) } } +static int tipc_socketpair(struct socket *sock1, struct socket *sock2) +{ + struct tipc_sock *tsk2 = tipc_sk(sock2->sk); + struct tipc_sock *tsk1 = tipc_sk(sock1->sk); + + tipc_sk_finish_conn(tsk1, tsk2->portid, 0); + tipc_sk_finish_conn(tsk2, tsk1->portid, 0); + return 0; +} + /* Protocol switches for the various types of TIPC sockets */ static const struct proto_ops msg_ops = { @@ -2528,7 +2538,7 @@ static const struct proto_ops packet_ops = { .release= tipc_release, .bind = tipc_bind, .connect= tipc_connect, - .socketpair = sock_no_socketpair, + .socketpair = tipc_socketpair, .accept = tipc_accept, .getname= tipc_getname, .poll = tipc_poll, -- 2.1.4 -- Check out the vibrant tech community on one of the world's most engaging tech sites, SlashDot.org! http://sdm.link/slashdot ___ tipc-discussion mailing list tipc-discussion@lists.sourceforge.net https://lists.sourceforge.net/lists/listinfo/tipc-discussion
Re: [tipc-discussion] [PATCH net-next v2 2/2] tipc: allow rdm/dgram socketpairs
On 01/26/2017 03:45 PM, Jon Maloy wrote: > I think you could do even better. Why not set socket state to > TIPC_ESTABLISHED, and along with a couple of other tweeks you will have a > full-fledged connection, with flow control and peer crash detection? But thats what we do by calling tipc_sk_finish_conn(). This patch has is not correct, I need to pass the onode as the peer_node in that api instead of 0 and that should do all of the above. Or were you thinking of something else? /Partha > > ///jon > > >> -Original Message----- >> From: Parthasarathy Bhuvaragan >> Sent: Thursday, 26 January, 2017 02:47 >> To: tipc-discussion@lists.sourceforge.net; Jon Maloy >> <jon.ma...@ericsson.com>; >> Ying Xue <ying@windriver.com> >> Cc: erik.hu...@gmail.com >> Subject: [PATCH net-next v2 2/2] tipc: allow rdm/dgram socketpairs >> >> From: Erik Hugne <erik.hu...@gmail.com> >> >> for socketpairs using connectionless transport, we cache >> the respective node local TIPC portid to use in subsequent >> calls to send() in the socket's private data. >> >> Signed-off-by: Erik Hugne <erik.hu...@gmail.com> >> Signed-off-by: Parthasarathy Bhuvaragan >> <parthasarathy.bhuvara...@ericsson.com> >> >> --- >> v2: node is set to own_addr() instead of 0. >> --- >> net/tipc/socket.c | 16 ++-- >> 1 file changed, 14 insertions(+), 2 deletions(-) >> >> diff --git a/net/tipc/socket.c b/net/tipc/socket.c >> index eafc9569e679..199e82307491 100644 >> --- a/net/tipc/socket.c >> +++ b/net/tipc/socket.c >> @@ -2503,6 +2503,18 @@ static int tipc_socketpair(struct socket *sock1, >> struct >> socket *sock2) >> { >> struct tipc_sock *tsk2 = tipc_sk(sock2->sk); >> struct tipc_sock *tsk1 = tipc_sk(sock1->sk); >> + u32 onode = tipc_own_addr(sock_net(sock1->sk)); >> + >> + tsk1->peer.family = AF_TIPC; >> + tsk1->peer.addrtype = TIPC_ADDR_ID; >> + tsk1->peer.scope = TIPC_NODE_SCOPE; >> + tsk1->peer.addr.id.ref = tsk2->portid; >> + tsk1->peer.addr.id.node = onode; >> + tsk2->peer.family = AF_TIPC; >> + tsk2->peer.addrtype = TIPC_ADDR_ID; >> + tsk2->peer.scope = TIPC_NODE_SCOPE; >> + tsk2->peer.addr.id.ref = tsk1->portid; >> + tsk2->peer.addr.id.node = onode; >> >> tipc_sk_finish_conn(tsk1, tsk2->portid, 0); >> tipc_sk_finish_conn(tsk2, tsk1->portid, 0); >> @@ -2517,7 +2529,7 @@ static const struct proto_ops msg_ops = { >> .release= tipc_release, >> .bind = tipc_bind, >> .connect= tipc_connect, >> - .socketpair = sock_no_socketpair, >> + .socketpair = tipc_socketpair, >> .accept = sock_no_accept, >> .getname= tipc_getname, >> .poll = tipc_poll, >> @@ -2559,7 +2571,7 @@ static const struct proto_ops stream_ops = { >> .release= tipc_release, >> .bind = tipc_bind, >> .connect= tipc_connect, >> - .socketpair = sock_no_socketpair, >> + .socketpair = tipc_socketpair, >> .accept = tipc_accept, >> .getname= tipc_getname, >> .poll = tipc_poll, >> -- >> 2.1.4 > -- Check out the vibrant tech community on one of the world's most engaging tech sites, SlashDot.org! http://sdm.link/slashdot ___ tipc-discussion mailing list tipc-discussion@lists.sourceforge.net https://lists.sourceforge.net/lists/listinfo/tipc-discussion
[tipc-discussion] [PATCH net-next v2 1/2] tipc: add support for stream socketpairs
From: Erik Hugnesockets A and B are connected back-to-back, similar to what AF_UNIX does. Signed-off-by: Erik Hugne --- net/tipc/socket.c | 12 +++- 1 file changed, 11 insertions(+), 1 deletion(-) diff --git a/net/tipc/socket.c b/net/tipc/socket.c index 103d1fd058c0..eafc9569e679 100644 --- a/net/tipc/socket.c +++ b/net/tipc/socket.c @@ -2499,6 +2499,16 @@ static int tipc_ioctl(struct socket *sock, unsigned int cmd, unsigned long arg) } } +static int tipc_socketpair(struct socket *sock1, struct socket *sock2) +{ + struct tipc_sock *tsk2 = tipc_sk(sock2->sk); + struct tipc_sock *tsk1 = tipc_sk(sock1->sk); + + tipc_sk_finish_conn(tsk1, tsk2->portid, 0); + tipc_sk_finish_conn(tsk2, tsk1->portid, 0); + return 0; +} + /* Protocol switches for the various types of TIPC sockets */ static const struct proto_ops msg_ops = { @@ -2528,7 +2538,7 @@ static const struct proto_ops packet_ops = { .release= tipc_release, .bind = tipc_bind, .connect= tipc_connect, - .socketpair = sock_no_socketpair, + .socketpair = tipc_socketpair, .accept = tipc_accept, .getname= tipc_getname, .poll = tipc_poll, -- 2.1.4 -- Check out the vibrant tech community on one of the world's most engaging tech sites, SlashDot.org! http://sdm.link/slashdot ___ tipc-discussion mailing list tipc-discussion@lists.sourceforge.net https://lists.sourceforge.net/lists/listinfo/tipc-discussion
[tipc-discussion] [PATCH net v1 3/6] tipc: fix connection refcount error
Until now, the generic server framework maintains the connection id's per subscriber in server's conn_idr. At tipc_close_conn, we remove the connection id from the server list, but the connection is valid until we call the refcount cleanup. Hence we have a window where the server allocates the same connection to an new subscriber leading to inconsistent reference count. We have another refcount warning we grab the refcount in tipc_conn_lookup() for connections with flag with CF_CONNECTED not set. This usually occurs at shutdown when the we stop the topology server and withdraw TIPC_CFG_SRV publication thereby triggering a withdraw message to subscribers. In this commit, we: 1. remove the connection from the server list at recount cleanup. 2. grab the refcount for a connection only if CF_CONNECTED is set. Tested-by: John Thompson <thompa@gmail.com> Acked-by: Ying Xue <ying@windriver.com> Acked-by: Jon Maloy <jon.ma...@ericsson.com> Signed-off-by: Parthasarathy Bhuvaragan <parthasarathy.bhuvara...@ericsson.com> --- net/tipc/server.c | 19 ++- 1 file changed, 10 insertions(+), 9 deletions(-) diff --git a/net/tipc/server.c b/net/tipc/server.c index 215849ce453d..2e803601aa99 100644 --- a/net/tipc/server.c +++ b/net/tipc/server.c @@ -91,7 +91,8 @@ static void tipc_sock_release(struct tipc_conn *con); static void tipc_conn_kref_release(struct kref *kref) { struct tipc_conn *con = container_of(kref, struct tipc_conn, kref); - struct sockaddr_tipc *saddr = con->server->saddr; + struct tipc_server *s = con->server; + struct sockaddr_tipc *saddr = s->saddr; struct socket *sock = con->sock; struct sock *sk; @@ -106,6 +107,11 @@ static void tipc_conn_kref_release(struct kref *kref) tipc_sock_release(con); sock_release(sock); con->sock = NULL; + + spin_lock_bh(>idr_lock); + idr_remove(>conn_idr, con->conid); + s->idr_in_use--; + spin_unlock_bh(>idr_lock); } tipc_clean_outqueues(con); @@ -128,8 +134,10 @@ static struct tipc_conn *tipc_conn_lookup(struct tipc_server *s, int conid) spin_lock_bh(>idr_lock); con = idr_find(>conn_idr, conid); - if (con) + if (con && test_bit(CF_CONNECTED, >flags)) conn_get(con); + else + con = NULL; spin_unlock_bh(>idr_lock); return con; } @@ -198,15 +206,8 @@ static void tipc_sock_release(struct tipc_conn *con) static void tipc_close_conn(struct tipc_conn *con) { - struct tipc_server *s = con->server; - if (test_and_clear_bit(CF_CONNECTED, >flags)) { - spin_lock_bh(>idr_lock); - idr_remove(>conn_idr, con->conid); - s->idr_in_use--; - spin_unlock_bh(>idr_lock); - /* We shouldn't flush pending works as we may be in the * thread. In fact the races with pending rx/tx work structs * are harmless for us here as we have already deleted this -- 2.1.4 -- Check out the vibrant tech community on one of the world's most engaging tech sites, SlashDot.org! http://sdm.link/slashdot ___ tipc-discussion mailing list tipc-discussion@lists.sourceforge.net https://lists.sourceforge.net/lists/listinfo/tipc-discussion
[tipc-discussion] [PATCH net v1 0/6] topology server fixes for nametable soft lockup
In this series, we revert the commit 333f796235a527 ("tipc: fix a race condition leading to subscriber refcnt bug") and provide an alternate solution to fix the race conditions in commits 2-4. We have to do this as the above commit introduced a nametbl soft lockup at module exit as described by patch#4. Parthasarathy Bhuvaragan (6): tipc: fix nametbl_lock soft lockup at node/link events tipc: add subscription refcount to avoid invalid delete tipc: fix connection refcount error tipc: fix nametbl_lock soft lockup at module exit tipc: ignore requests when the connection state is not CONNECTED tipc: fix cleanup at module unload net/tipc/node.c | 9 +++- net/tipc/server.c | 48 + net/tipc/subscr.c | 124 ++ net/tipc/subscr.h | 1 + 4 files changed, 99 insertions(+), 83 deletions(-) -- 2.1.4 -- Check out the vibrant tech community on one of the world's most engaging tech sites, SlashDot.org! http://sdm.link/slashdot ___ tipc-discussion mailing list tipc-discussion@lists.sourceforge.net https://lists.sourceforge.net/lists/listinfo/tipc-discussion
[tipc-discussion] [PATCH net v1 5/6] tipc: ignore requests when the connection state is not CONNECTED
In tipc_conn_sendmsg(), we first queue the request to the outqueue followed by the connection state check. If the connection is not connected, we should not queue this message. In this commit, we reject the messages if the connection state is not CF_CONNECTED. Acked-by: Ying Xue <ying@windriver.com> Acked-by: Jon Maloy <jon.ma...@ericsson.com> Tested-by: John Thompson <thompa@gmail.com> Signed-off-by: Parthasarathy Bhuvaragan <parthasarathy.bhuvara...@ericsson.com> --- net/tipc/server.c | 13 +++-- 1 file changed, 7 insertions(+), 6 deletions(-) diff --git a/net/tipc/server.c b/net/tipc/server.c index 826cde2c401e..04ff441b8065 100644 --- a/net/tipc/server.c +++ b/net/tipc/server.c @@ -453,6 +453,11 @@ int tipc_conn_sendmsg(struct tipc_server *s, int conid, if (!con) return -EINVAL; + if (!test_bit(CF_CONNECTED, >flags)) { + conn_put(con); + return 0; + } + e = tipc_alloc_entry(data, len); if (!e) { conn_put(con); @@ -466,12 +471,8 @@ int tipc_conn_sendmsg(struct tipc_server *s, int conid, list_add_tail(>list, >outqueue); spin_unlock_bh(>outqueue_lock); - if (test_bit(CF_CONNECTED, >flags)) { - if (!queue_work(s->send_wq, >swork)) - conn_put(con); - } else { + if (!queue_work(s->send_wq, >swork)) conn_put(con); - } return 0; } @@ -495,7 +496,7 @@ static void tipc_send_to_sock(struct tipc_conn *con) int ret; spin_lock_bh(>outqueue_lock); - while (1) { + while (test_bit(CF_CONNECTED, >flags)) { e = list_entry(con->outqueue.next, struct outqueue_entry, list); if ((struct list_head *) e == >outqueue) -- 2.1.4 -- Check out the vibrant tech community on one of the world's most engaging tech sites, SlashDot.org! http://sdm.link/slashdot ___ tipc-discussion mailing list tipc-discussion@lists.sourceforge.net https://lists.sourceforge.net/lists/listinfo/tipc-discussion
[tipc-discussion] [PATCH net v1 2/6] tipc: add subscription refcount to avoid invalid delete
Until now, the subscribers keep track of the subscriptions using reference count at subscriber level. At subscription cancel or subscriber delete, we delete the subscription only if the timer was pending for the subscription. This approach is incorrect as: 1. del_timer() is not SMP safe, if on CPU0 the check for pending timer returns true but CPU1 might schedule the timer callback thereby deleting the subscription. Thus when CPU0 is scheduled, it deletes an invalid subscription. 2. We export tipc_subscrp_report_overlap(), which accesses the subscription pointer multiple times. Meanwhile the subscription timer can expire thereby freeing the subscription and we might continue to access the subscription pointer leading to memory violations. In this commit, we introduce subscription refcount to avoid deleting an invalid subscription. Reported-and-Tested-by: John Thompson <thompa@gmail.com> Acked-by: Ying Xue <ying@windriver.com> Acked-by: Jon Maloy <jon.ma...@ericsson.com> Signed-off-by: Parthasarathy Bhuvaragan <parthasarathy.bhuvara...@ericsson.com> --- net/tipc/subscr.c | 124 ++ net/tipc/subscr.h | 1 + 2 files changed, 71 insertions(+), 54 deletions(-) diff --git a/net/tipc/subscr.c b/net/tipc/subscr.c index 0dd02244e21d..9d94e65d0894 100644 --- a/net/tipc/subscr.c +++ b/net/tipc/subscr.c @@ -54,6 +54,8 @@ struct tipc_subscriber { static void tipc_subscrp_delete(struct tipc_subscription *sub); static void tipc_subscrb_put(struct tipc_subscriber *subscriber); +static void tipc_subscrp_put(struct tipc_subscription *subscription); +static void tipc_subscrp_get(struct tipc_subscription *subscription); /** * htohl - convert value to endianness used by destination @@ -123,6 +125,7 @@ void tipc_subscrp_report_overlap(struct tipc_subscription *sub, u32 found_lower, { struct tipc_name_seq seq; + tipc_subscrp_get(sub); tipc_subscrp_convert_seq(>evt.s.seq, sub->swap, ); if (!tipc_subscrp_check_overlap(, found_lower, found_upper)) return; @@ -132,30 +135,23 @@ void tipc_subscrp_report_overlap(struct tipc_subscription *sub, u32 found_lower, tipc_subscrp_send_event(sub, found_lower, found_upper, event, port_ref, node); + tipc_subscrp_put(sub); } static void tipc_subscrp_timeout(unsigned long data) { struct tipc_subscription *sub = (struct tipc_subscription *)data; - struct tipc_subscriber *subscriber = sub->subscriber; /* Notify subscriber of timeout */ tipc_subscrp_send_event(sub, sub->evt.s.seq.lower, sub->evt.s.seq.upper, TIPC_SUBSCR_TIMEOUT, 0, 0); - spin_lock_bh(>lock); - tipc_subscrp_delete(sub); - spin_unlock_bh(>lock); - - tipc_subscrb_put(subscriber); + tipc_subscrp_put(sub); } static void tipc_subscrb_kref_release(struct kref *kref) { - struct tipc_subscriber *subcriber = container_of(kref, - struct tipc_subscriber, kref); - - kfree(subcriber); + kfree(container_of(kref,struct tipc_subscriber, kref)); } static void tipc_subscrb_put(struct tipc_subscriber *subscriber) @@ -168,6 +164,59 @@ static void tipc_subscrb_get(struct tipc_subscriber *subscriber) kref_get(>kref); } +static void tipc_subscrp_kref_release(struct kref *kref) +{ + struct tipc_subscription *sub = container_of(kref, +struct tipc_subscription, +kref); + struct tipc_net *tn = net_generic(sub->net, tipc_net_id); + struct tipc_subscriber *subscriber = sub->subscriber; + + spin_lock_bh(>lock); + tipc_nametbl_unsubscribe(sub); + list_del(>subscrp_list); + atomic_dec(>subscription_count); + spin_unlock_bh(>lock); + kfree(sub); + tipc_subscrb_put(subscriber); +} + +static void tipc_subscrp_put(struct tipc_subscription *subscription) +{ + kref_put(>kref, tipc_subscrp_kref_release); +} + +static void tipc_subscrp_get(struct tipc_subscription *subscription) +{ + kref_get(>kref); +} + +/* tipc_subscrb_subscrp_delete - delete a specific subscription or all + * subscriptions for a given subscriber. + */ +static void tipc_subscrb_subscrp_delete(struct tipc_subscriber *subscriber, + struct tipc_subscr *s) +{ + struct list_head *subscription_list = >subscrp_list; + struct tipc_subscription *sub, *temp; + + spin_lock_bh(>lock); + list_for_each_entry_safe(sub, temp, subscription_list, subscrp_list) { + if (s && memcmp(s, >evt.s, sizeof(struct tipc_subscr))) + continue; + + tipc_subscrp_get(sub); + spin_unl
[tipc-discussion] [PATCH net v1 4/6] tipc: fix nametbl_lock soft lockup at module exit
Commit 333f796235a527 ("tipc: fix a race condition leading to subscriber refcnt bug") reveals a soft lockup while acquiring nametbl_lock. Before commit 333f796235a527, we call tipc_conn_shutdown() from tipc_close_conn() in the context of tipc_topsrv_stop(). In that context, we are allowed to grab the nametbl_lock. Commit 333f796235a527, moved tipc_conn_release (renamed from tipc_conn_shutdown) to the connection refcount cleanup. This allows either tipc_nametbl_withdraw() or tipc_topsrv_stop() to the cleanup. Since tipc_exit_net() first calls tipc_topsrv_stop() and then tipc_nametble_withdraw() increases the chances for the later to perform the connection cleanup. The soft lockup occurs in the call chain of tipc_nametbl_withdraw(), when it performs the tipc_conn_kref_release() as it tries to grab nametbl_lock again while holding it already. tipc_nametbl_withdraw() grabs nametbl_lock tipc_nametbl_remove_publ() tipc_subscrp_report_overlap() tipc_subscrp_send_event() tipc_conn_sendmsg() << if (con->flags != CF_CONNECTED) we do conn_put(), triggering the cleanup as refcount=0. >> tipc_conn_kref_release tipc_sock_release tipc_conn_release tipc_subscrb_delete tipc_subscrp_delete tipc_nametbl_unsubscribe << Soft Lockup >> The previous changes in this series fixes the race conditions fixed by commit 333f796235a527. Hence we can now revert the commit. Fixes: 333f796235a52727 ("tipc: fix a race condition leading to subscriber refcnt bug") Reported-and-Tested-by: John Thompson <thompa@gmail.com> Acked-by: Ying Xue <ying@windriver.com> Acked-by: Jon Maloy <jon.ma...@ericsson.com> Signed-off-by: Parthasarathy Bhuvaragan <parthasarathy.bhuvara...@ericsson.com> --- net/tipc/server.c | 16 +--- 1 file changed, 5 insertions(+), 11 deletions(-) diff --git a/net/tipc/server.c b/net/tipc/server.c index 2e803601aa99..826cde2c401e 100644 --- a/net/tipc/server.c +++ b/net/tipc/server.c @@ -86,7 +86,6 @@ struct outqueue_entry { static void tipc_recv_work(struct work_struct *work); static void tipc_send_work(struct work_struct *work); static void tipc_clean_outqueues(struct tipc_conn *con); -static void tipc_sock_release(struct tipc_conn *con); static void tipc_conn_kref_release(struct kref *kref) { @@ -104,7 +103,6 @@ static void tipc_conn_kref_release(struct kref *kref) } saddr->scope = -TIPC_NODE_SCOPE; kernel_bind(sock, (struct sockaddr *)saddr, sizeof(*saddr)); - tipc_sock_release(con); sock_release(sock); con->sock = NULL; @@ -194,19 +192,15 @@ static void tipc_unregister_callbacks(struct tipc_conn *con) write_unlock_bh(>sk_callback_lock); } -static void tipc_sock_release(struct tipc_conn *con) +static void tipc_close_conn(struct tipc_conn *con) { struct tipc_server *s = con->server; - if (con->conid) - s->tipc_conn_release(con->conid, con->usr_data); - - tipc_unregister_callbacks(con); -} - -static void tipc_close_conn(struct tipc_conn *con) -{ if (test_and_clear_bit(CF_CONNECTED, >flags)) { + tipc_unregister_callbacks(con); + + if (con->conid) + s->tipc_conn_release(con->conid, con->usr_data); /* We shouldn't flush pending works as we may be in the * thread. In fact the races with pending rx/tx work structs -- 2.1.4 -- Check out the vibrant tech community on one of the world's most engaging tech sites, SlashDot.org! http://sdm.link/slashdot ___ tipc-discussion mailing list tipc-discussion@lists.sourceforge.net https://lists.sourceforge.net/lists/listinfo/tipc-discussion
Re: [tipc-discussion] [PATCH 2/2] tipc: allow rdm/dgram socketpairs
Hi Erik, Thanks for your patch, they look good except for the minor comment inline. You may add the tag if you want to for your series. Reviewed-by:Parthasarathy BhuvaraganI added a new option to tipc-pipe as described in: https://github.com/parbhu/tipcutils/commit/0156d7d73be8cad3a9d5d1f69a778be5daf61620 tipc-pipe: add socketpair support with --tee option With this --tee option, you can dump the stdin to a file and can be used as: Start a server as: tipc-pipe --tee -s 9000 2> /dev/null | md5sum Generate random data using /dev/urandom and pipe to tipc-pipe: dd if=/dev/urandom bs=1 count=1000 | tipc-pipe --tee 9000 2> /dev/null | md5sum Data integrity can be confirmed by verifying the md5sum result. So, you can run generate long session data using /dev/urandom and compare the integrity using md5sum at the end. On 12/23/2016 10:36 PM, Erik Hugne wrote: > for socketpairs using connectionless transport, we cache > the respective node local TIPC portid to use in subsequent > calls to send() in the socket's private data. > > Signed-off-by: Erik Hugne > --- > net/tipc/socket.c | 15 +-- > 1 file changed, 13 insertions(+), 2 deletions(-) > > diff --git a/net/tipc/socket.c b/net/tipc/socket.c > index 1a7f7c9..c285f74 100644 > --- a/net/tipc/socket.c > +++ b/net/tipc/socket.c > @@ -2519,6 +2519,17 @@ static int tipc_socketpair(struct socket *sock1, > struct socket *sock2) > struct tipc_sock *tsk2 = tipc_sk(sock2->sk); > struct tipc_sock *tsk1 = tipc_sk(sock1->sk); > > + tsk1->peer.family = AF_TIPC; > + tsk1->peer.addrtype = TIPC_ADDR_ID; > + tsk1->peer.scope = TIPC_NODE_SCOPE; > + tsk1->peer.addr.id.ref = tsk2->portid; > + tsk1->peer.addr.id.node = 0; Please use tipc_own_addr(), instead of hardcoding to 0. With this patch the output of "tipc socket list", make it look as if we are still running in standalone mode (as node address is 0). $ tipc socket list socket 3661056118 connected to <0.0.0:1636075240> socket 1636075240 connected to <0.0.0:3661056118> > + tsk2->peer.family = AF_TIPC; > + tsk2->peer.addrtype = TIPC_ADDR_ID; > + tsk2->peer.scope = TIPC_NODE_SCOPE; > + tsk2->peer.addr.id.ref = tsk1->portid; > + tsk2->peer.addr.id.node = 0; Same as above. /Partha > + > tipc_sk_finish_conn(tsk1, tsk2->portid, 0); > tipc_sk_finish_conn(tsk2, tsk1->portid, 0); > return 0; > @@ -2532,7 +2543,7 @@ static const struct proto_ops msg_ops = { > .release= tipc_release, > .bind = tipc_bind, > .connect= tipc_connect, > - .socketpair = sock_no_socketpair, > + .socketpair = tipc_socketpair, > .accept = sock_no_accept, > .getname= tipc_getname, > .poll = tipc_poll, > @@ -2553,7 +2564,7 @@ static const struct proto_ops packet_ops = { > .release= tipc_release, > .bind = tipc_bind, > .connect= tipc_connect, > - .socketpair = sock_no_socketpair, > + .socketpair = tipc_socketpair, > .accept = tipc_accept, > .getname= tipc_getname, > .poll = tipc_poll, > -- Check out the vibrant tech community on one of the world's most engaging tech sites, SlashDot.org! http://sdm.link/slashdot ___ tipc-discussion mailing list tipc-discussion@lists.sourceforge.net https://lists.sourceforge.net/lists/listinfo/tipc-discussion
[tipc-discussion] [PATCH net-next v7 0/6] topology server fixes for nametable soft lockup
In this series, we revert the commit 333f796235a527 ("tipc: fix a race condition leading to subscriber refcnt bug") and provide an alternate solution to fix the race conditions in commits 2-4. We have to do this as the above commit introduced a nametbl soft lockup at module exit as described by patch#4. --- v7: Following updates in Patch #2: Fix incorrect deletion of all prior subscriptions until the specified subscription. Protect exported tipc_subscrp_report_overlap() with subscription refcount. Ensure that subscription can be freed correctly at subscription timer expiry. The earlier patch#2 in v5/v6, had refcount bug which prevents the above. This was introduced when we skipped get/put refcount in tipc_subscrb_subscrp_delete(), but instead do get in tipc_subscrp_subscribe() before starting the timer. Thus the subscription_create() initialized the refcount and tipc_subscrp_subscribe steps it to 2. At subscription timeout, we perform put only once and we cannot compensate for this additional refcount safely. v6: Address krefcount warning for John Thompson in Patch#3 v5: Address Ying's comment in Patch #2 to remove del_timer_sync(). v4: Address Ying's comment by introducing subscription refcount. Parthasarathy Bhuvaragan (6): tipc: fix nametbl_lock soft lockup at node/link events tipc: add subscription refcount to avoid invalid delete tipc: fix connection refcount error tipc: fix nametbl_lock soft lockup at module exit tipc: ignore requests when the connection state is not CONNECTED tipc: fix cleanup at module unload net/tipc/node.c | 9 +++- net/tipc/server.c | 48 + net/tipc/subscr.c | 124 ++ net/tipc/subscr.h | 1 + 4 files changed, 99 insertions(+), 83 deletions(-) -- 2.1.4 -- Check out the vibrant tech community on one of the world's most engaging tech sites, SlashDot.org! http://sdm.link/slashdot ___ tipc-discussion mailing list tipc-discussion@lists.sourceforge.net https://lists.sourceforge.net/lists/listinfo/tipc-discussion
[tipc-discussion] [PATCH net-next v7 5/6] tipc: ignore requests when the connection state is not CONNECTED
In tipc_conn_sendmsg(), we first queue the request to the outqueue followed by the connection state check. If the connection is not connected, we should not queue this message. In this commit, we reject the messages if the connection state is not CF_CONNECTED. Acked-by: Ying Xue <ying@windriver.com> Signed-off-by: Parthasarathy Bhuvaragan <parthasarathy.bhuvara...@ericsson.com> --- net/tipc/server.c | 13 +++-- 1 file changed, 7 insertions(+), 6 deletions(-) diff --git a/net/tipc/server.c b/net/tipc/server.c index 826cde2c401e..04ff441b8065 100644 --- a/net/tipc/server.c +++ b/net/tipc/server.c @@ -453,6 +453,11 @@ int tipc_conn_sendmsg(struct tipc_server *s, int conid, if (!con) return -EINVAL; + if (!test_bit(CF_CONNECTED, >flags)) { + conn_put(con); + return 0; + } + e = tipc_alloc_entry(data, len); if (!e) { conn_put(con); @@ -466,12 +471,8 @@ int tipc_conn_sendmsg(struct tipc_server *s, int conid, list_add_tail(>list, >outqueue); spin_unlock_bh(>outqueue_lock); - if (test_bit(CF_CONNECTED, >flags)) { - if (!queue_work(s->send_wq, >swork)) - conn_put(con); - } else { + if (!queue_work(s->send_wq, >swork)) conn_put(con); - } return 0; } @@ -495,7 +496,7 @@ static void tipc_send_to_sock(struct tipc_conn *con) int ret; spin_lock_bh(>outqueue_lock); - while (1) { + while (test_bit(CF_CONNECTED, >flags)) { e = list_entry(con->outqueue.next, struct outqueue_entry, list); if ((struct list_head *) e == >outqueue) -- 2.1.4 -- Check out the vibrant tech community on one of the world's most engaging tech sites, SlashDot.org! http://sdm.link/slashdot ___ tipc-discussion mailing list tipc-discussion@lists.sourceforge.net https://lists.sourceforge.net/lists/listinfo/tipc-discussion
[tipc-discussion] [PATCH net-next v7 4/6] tipc: fix nametbl_lock soft lockup at module exit
Commit 333f796235a527 ("tipc: fix a race condition leading to subscriber refcnt bug") reveals a soft lockup while acquiring nametbl_lock. Before commit 333f796235a527, we call tipc_conn_shutdown() from tipc_close_conn() in the context of tipc_topsrv_stop(). In that context, we are allowed to grab the nametbl_lock. Commit 333f796235a527, moved tipc_conn_release (renamed from tipc_conn_shutdown) to the connection refcount cleanup. This allows either tipc_nametbl_withdraw() or tipc_topsrv_stop() to the cleanup. Since tipc_exit_net() first calls tipc_topsrv_stop() and then tipc_nametble_withdraw() increases the chances for the later to perform the connection cleanup. The soft lockup occurs in the call chain of tipc_nametbl_withdraw(), when it performs the tipc_conn_kref_release() as it tries to grab nametbl_lock again while holding it already. tipc_nametbl_withdraw() grabs nametbl_lock tipc_nametbl_remove_publ() tipc_subscrp_report_overlap() tipc_subscrp_send_event() tipc_conn_sendmsg() << if (con->flags != CF_CONNECTED) we do conn_put(), triggering the cleanup as refcount=0. >> tipc_conn_kref_release tipc_sock_release tipc_conn_release tipc_subscrb_delete tipc_subscrp_delete tipc_nametbl_unsubscribe << Soft Lockup >> The previous changes in this series fixes the race conditions fixed by commit 333f796235a527. Hence we can now revert the commit. Fixes: 333f796235a52727 ("tipc: fix a race condition leading to subscriber refcnt bug") Reported-by: John Thompson <thompa@gmail.com> Acked-by: Ying Xue <ying....@windriver.com> Signed-off-by: Parthasarathy Bhuvaragan <parthasarathy.bhuvara...@ericsson.com> --- net/tipc/server.c | 16 +--- 1 file changed, 5 insertions(+), 11 deletions(-) diff --git a/net/tipc/server.c b/net/tipc/server.c index 2e803601aa99..826cde2c401e 100644 --- a/net/tipc/server.c +++ b/net/tipc/server.c @@ -86,7 +86,6 @@ struct outqueue_entry { static void tipc_recv_work(struct work_struct *work); static void tipc_send_work(struct work_struct *work); static void tipc_clean_outqueues(struct tipc_conn *con); -static void tipc_sock_release(struct tipc_conn *con); static void tipc_conn_kref_release(struct kref *kref) { @@ -104,7 +103,6 @@ static void tipc_conn_kref_release(struct kref *kref) } saddr->scope = -TIPC_NODE_SCOPE; kernel_bind(sock, (struct sockaddr *)saddr, sizeof(*saddr)); - tipc_sock_release(con); sock_release(sock); con->sock = NULL; @@ -194,19 +192,15 @@ static void tipc_unregister_callbacks(struct tipc_conn *con) write_unlock_bh(>sk_callback_lock); } -static void tipc_sock_release(struct tipc_conn *con) +static void tipc_close_conn(struct tipc_conn *con) { struct tipc_server *s = con->server; - if (con->conid) - s->tipc_conn_release(con->conid, con->usr_data); - - tipc_unregister_callbacks(con); -} - -static void tipc_close_conn(struct tipc_conn *con) -{ if (test_and_clear_bit(CF_CONNECTED, >flags)) { + tipc_unregister_callbacks(con); + + if (con->conid) + s->tipc_conn_release(con->conid, con->usr_data); /* We shouldn't flush pending works as we may be in the * thread. In fact the races with pending rx/tx work structs -- 2.1.4 -- Check out the vibrant tech community on one of the world's most engaging tech sites, SlashDot.org! http://sdm.link/slashdot ___ tipc-discussion mailing list tipc-discussion@lists.sourceforge.net https://lists.sourceforge.net/lists/listinfo/tipc-discussion
[tipc-discussion] [PATCH net-next v7 3/6] tipc: fix connection refcount error
Until now, the generic server framework maintains the connection id's per subscriber in server's conn_idr. At tipc_close_conn, we remove the connection id from the server list, but the connection is valid until we call the refcount cleanup. Hence we have a window where the server allocates the same connection to an new subscriber leading to inconsistent reference count. We have another refcount warning we grab the refcount in tipc_conn_lookup() for connections with flag with CF_CONNECTED not set. This usually occurs at shutdown when the we stop the topology server and withdraw TIPC_CFG_SRV publication thereby triggering a withdraw message to subscribers. In this commit, we: 1. remove the connection from the server list at recount cleanup. 2. grab the refcount for a connection only if CF_CONNECTED is set. Signed-off-by: Parthasarathy Bhuvaragan <parthasarathy.bhuvara...@ericsson.com> --- v2: fix refcount warning at tipc_exit_net(), when the topology server shutdown and nametable withdraw for TIPC_CFG_SRV run simultaneously. --- net/tipc/server.c | 19 ++- 1 file changed, 10 insertions(+), 9 deletions(-) diff --git a/net/tipc/server.c b/net/tipc/server.c index 215849ce453d..2e803601aa99 100644 --- a/net/tipc/server.c +++ b/net/tipc/server.c @@ -91,7 +91,8 @@ static void tipc_sock_release(struct tipc_conn *con); static void tipc_conn_kref_release(struct kref *kref) { struct tipc_conn *con = container_of(kref, struct tipc_conn, kref); - struct sockaddr_tipc *saddr = con->server->saddr; + struct tipc_server *s = con->server; + struct sockaddr_tipc *saddr = s->saddr; struct socket *sock = con->sock; struct sock *sk; @@ -106,6 +107,11 @@ static void tipc_conn_kref_release(struct kref *kref) tipc_sock_release(con); sock_release(sock); con->sock = NULL; + + spin_lock_bh(>idr_lock); + idr_remove(>conn_idr, con->conid); + s->idr_in_use--; + spin_unlock_bh(>idr_lock); } tipc_clean_outqueues(con); @@ -128,8 +134,10 @@ static struct tipc_conn *tipc_conn_lookup(struct tipc_server *s, int conid) spin_lock_bh(>idr_lock); con = idr_find(>conn_idr, conid); - if (con) + if (con && test_bit(CF_CONNECTED, >flags)) conn_get(con); + else + con = NULL; spin_unlock_bh(>idr_lock); return con; } @@ -198,15 +206,8 @@ static void tipc_sock_release(struct tipc_conn *con) static void tipc_close_conn(struct tipc_conn *con) { - struct tipc_server *s = con->server; - if (test_and_clear_bit(CF_CONNECTED, >flags)) { - spin_lock_bh(>idr_lock); - idr_remove(>conn_idr, con->conid); - s->idr_in_use--; - spin_unlock_bh(>idr_lock); - /* We shouldn't flush pending works as we may be in the * thread. In fact the races with pending rx/tx work structs * are harmless for us here as we have already deleted this -- 2.1.4 -- Check out the vibrant tech community on one of the world's most engaging tech sites, SlashDot.org! http://sdm.link/slashdot ___ tipc-discussion mailing list tipc-discussion@lists.sourceforge.net https://lists.sourceforge.net/lists/listinfo/tipc-discussion
[tipc-discussion] [PATCH net-next v7 6/6] tipc: fix cleanup at module unload
In tipc_server_stop(), we iterate over the connections with limiting factor as server's idr_in_use. We ignore the fact that this variable is decremented in tipc_close_conn(), leading to premature exit. In this commit, we iterate until the we have no connections left. Acked-by: Ying Xue <ying@windriver.com> Signed-off-by: Parthasarathy Bhuvaragan <parthasarathy.bhuvara...@ericsson.com> --- net/tipc/server.c | 4 +--- 1 file changed, 1 insertion(+), 3 deletions(-) diff --git a/net/tipc/server.c b/net/tipc/server.c index 04ff441b8065..3cd6402e812c 100644 --- a/net/tipc/server.c +++ b/net/tipc/server.c @@ -619,14 +619,12 @@ int tipc_server_start(struct tipc_server *s) void tipc_server_stop(struct tipc_server *s) { struct tipc_conn *con; - int total = 0; int id; spin_lock_bh(>idr_lock); - for (id = 0; total < s->idr_in_use; id++) { + for (id = 0; s->idr_in_use; id++) { con = idr_find(>conn_idr, id); if (con) { - total++; spin_unlock_bh(>idr_lock); tipc_close_conn(con); spin_lock_bh(>idr_lock); -- 2.1.4 -- Check out the vibrant tech community on one of the world's most engaging tech sites, SlashDot.org! http://sdm.link/slashdot ___ tipc-discussion mailing list tipc-discussion@lists.sourceforge.net https://lists.sourceforge.net/lists/listinfo/tipc-discussion
Re: [tipc-discussion] [PATCH net-next v5 2/6] tipc: add subscription refcount to avoid invalid delete
On 01/18/2017 01:07 PM, Parthasarathy Bhuvaragan wrote: > On 01/18/2017 11:06 AM, Ying Xue wrote: >> On 01/16/2017 11:41 PM, Parthasarathy Bhuvaragan wrote: >>> Until now, the subscribers keep track of the subscriptions using >>> reference count at subscriber level. At subscription cancel or >>> subscriber delete, we delete the subscription by checking for >>> pending timer using del_timer(). del_timer() is not SMP safe, if >>> on CPU0 the check for pending timer returns true but CPU1 might >>> schedule the timer callback thereby deleting the subscription. >>> Thus when CPU0 is scheduled, it deletes an invalid subscription. >>> >>> We can fix the above issue using either a synchronous timer function >>> or by introducing subscription reference count. The later solution >>> is preferred as it's consistent with the asynchronous design model >>> used by the rest of the code. >>> >>> In this commit, we introduce subscription refcount to avoid deleting >>> an invalid subscription. >>> >>> Signed-off-by: Parthasarathy Bhuvaragan >>> <parthasarathy.bhuvara...@ericsson.com> >>> --- >>> v2: Address comments from Ying Xue, to avoid race conditions by using >>> refcount only instead of del_timer_sync(). >>> --- >>> net/tipc/subscr.c | 120 >>> +- >>> net/tipc/subscr.h | 1 + >>> 2 files changed, 66 insertions(+), 55 deletions(-) >>> >>> diff --git a/net/tipc/subscr.c b/net/tipc/subscr.c >>> index 0dd02244e21d..d6c1f35f6518 100644 >>> --- a/net/tipc/subscr.c >>> +++ b/net/tipc/subscr.c >>> @@ -54,6 +54,7 @@ struct tipc_subscriber { >>> >>> static void tipc_subscrp_delete(struct tipc_subscription *sub); >>> static void tipc_subscrb_put(struct tipc_subscriber *subscriber); >>> +static void tipc_subscrp_put(struct tipc_subscription *subscription); >>> >>> /** >>> * htohl - convert value to endianness used by destination >>> @@ -137,25 +138,17 @@ void tipc_subscrp_report_overlap(struct >>> tipc_subscription *sub, u32 found_lower, >>> static void tipc_subscrp_timeout(unsigned long data) >>> { >>> struct tipc_subscription *sub = (struct tipc_subscription *)data; >>> -struct tipc_subscriber *subscriber = sub->subscriber; >>> >>> /* Notify subscriber of timeout */ >>> tipc_subscrp_send_event(sub, sub->evt.s.seq.lower, >>> sub->evt.s.seq.upper, >>> TIPC_SUBSCR_TIMEOUT, 0, 0); >>> >>> -spin_lock_bh(>lock); >>> -tipc_subscrp_delete(sub); >>> -spin_unlock_bh(>lock); >>> - >>> -tipc_subscrb_put(subscriber); >>> +tipc_subscrp_put(sub); >>> } >>> >>> static void tipc_subscrb_kref_release(struct kref *kref) >>> { >>> -struct tipc_subscriber *subcriber = container_of(kref, >>> -struct tipc_subscriber, kref); >>> - >>> -kfree(subcriber); >>> +kfree(container_of(kref,struct tipc_subscriber, kref)); >>> } >>> >>> static void tipc_subscrb_put(struct tipc_subscriber *subscriber) >>> @@ -168,6 +161,56 @@ static void tipc_subscrb_get(struct >>> tipc_subscriber *subscriber) >>> kref_get(>kref); >>> } >>> >>> +static void tipc_subscrp_kref_release(struct kref *kref) >>> +{ >>> +struct tipc_subscription *sub = container_of(kref, >>> + struct tipc_subscription, >>> + kref); >>> +struct tipc_net *tn = net_generic(sub->net, tipc_net_id); >>> +struct tipc_subscriber *subscriber = sub->subscriber; >>> + >>> +spin_lock_bh(>lock); >>> +tipc_nametbl_unsubscribe(sub); >>> +list_del(>subscrp_list); >>> +atomic_dec(>subscription_count); >>> +spin_unlock_bh(>lock); >>> +kfree(sub); >>> +tipc_subscrb_put(subscriber); >>> +} >>> + >>> +static void tipc_subscrp_put(struct tipc_subscription *subscription) >>> +{ >>> +kref_put(>kref, tipc_subscrp_kref_release); >>> +} >>> + >>> +static void tipc_subscrp_get(struct tipc_subscription *subscription) >>> +{ >>> +kref_get(>kref); >>> +} >>> + >>> +/* tipc_subscrb_subscrp_delete - delete
Re: [tipc-discussion] [PATCH net-next v4 0/6] topology server fixes for nametable soft lockup
On 01/18/2017 11:30 AM, Xue, Ying wrote: > Hi John, > > > > Thank you for the testing. > > > > I think your suggestion is reasonable. But we need to find out its exact > scenario. Regarding the following message, after one object refcnt is > decreased to zero by one thread, another thread tries to increment its > refcnt, which means that we have a race condition. So we must solve the > race condition first before we adopt your advice. > > I have fixed this in Patch #3 of v6 series that i have sent. The root case is: We have another refcount warning when we grab the refcount in tipc_conn_lookup() for connections with flag with CF_CONNECTED not set. This usually occurs at shutdown when the we stop the topology server and start reset CF_CONNECTED flag, while withdrawing TIPC_CFG_SRV. This withdrawal of publication triggers a withdraw message to subscribers but the connection flag is not CF_CONNECTED. This issue is seen now, as I moved the connection deletion from the topology server shutdown procedure to the connection refcount cleanup. Thus the lifetime of the connection is longer and there might be users who can find connections in the idr_list which have the connection flag is not CF_CONNECTED. After this change the refcount handling at receive and send look identical. /Partha > > Regards, > > Ying > > *From:*John Thompson [mailto:thompa@gmail.com] > *Sent:* Wednesday, January 18, 2017 12:23 PM > *To:* Parthasarathy Bhuvaragan > *Cc:* tipc-discussion@lists.sourceforge.net; Jon Maloy; Xue, Ying > *Subject:* Re: [PATCH net-next v4 0/6] topology server fixes for > nametable soft lockup > > > > Hi Partha, > > > > Thanks for the new patches. I have tested them and had a kernel warning > as per below. This is running on a SMP system with 4 cores. > > The warning reads as though one thread has gone to free the item while > another thread has gotten a reference to it. > > The suggestion is to use kref_get_unless_zero() instead of kref_get(). > > > > [ cut here ] > > WARNING: at /home/johnt/views/main/linux/include/linux/kref.h:46 > > Modules linked in: tipc jitterentropy_rng echainiv drbg platform_driver(O) > > CPU: 1 PID: 1527 Comm: intrTokenReset Tainted: P O > > task: a52c8600 ti: a4c98000 task.ti: a4c98000 > > NIP: c161809c LR: c1618120 CTR: 80695270 > > REGS: a4c99b30 TRAP: 0700 Tainted: P O > > MSR: 00029002 <CE,EE,ME> CR: 28002482 XER: > > > > GPR00: c1618694 a4c99be0 a52c8600 a4c4e840 a2763aa0 a4ab623c > 0030 > > GPR08: 01001005 0001 c162 0005 80695270 107d12f0 6c23e000 > 0009 > > GPR16: 808e 808e 00040100 00040006 807c9428 808f > a2e39ac0 > > GPR24: 808dba00 01001005 a4ab623c a50fb300 0030 a50fb31c a50fb300 > a4c4e840 > > NIP [c161809c] tipc_nl_publ_dump+0x93c/0xf10 [tipc] > > LR [c1618120] tipc_nl_publ_dump+0x9c0/0xf10 [tipc] > > Call Trace: > > [a4c99be0] [800b9e2c] free_pages_prepare+0x18c/0x2a0 (unreliable) > > [a4c99c00] [c1618694] tipc_conn_sendmsg+0x24/0x150 [tipc] > > [a4c99c30] [c160ad5c] tipc_subscrp_report_overlap+0xbc/0xd0 [tipc] > > [a4c99c70] [c160b31c] tipc_topsrv_stop+0x45c/0x4f0 [tipc] > > [a4c99ca0] [c160b848] tipc_nametbl_remove_publ+0x58/0x110 [tipc] > > [a4c99cd0] [c160bd08] tipc_nametbl_withdraw+0x68/0x140 [tipc] > > [a4c99d00] [c1613ce4] tipc_nl_node_dump_link+0x1904/0x45d0 [tipc] > > [a4c99d30] [c16148f8] tipc_nl_node_dump_link+0x2518/0x45d0 [tipc] > > [a4c99d70] [804f5a40] sock_release+0x30/0xf0 > > [a4c99d80] [804f5b14] sock_close+0x14/0x30 > > [a4c99d90] [80105844] __fput+0x94/0x200 > > [a4c99db0] [8003dca4] task_work_run+0xd4/0x100 > > [a4c99dd0] [80023620] do_exit+0x280/0x980 > > [a4c99e10] [80024c48] do_group_exit+0x48/0xb0 > > [a4c99e30] [80030344] get_signal+0x244/0x4f0 > > [a4c99e80] [80007734] do_signal+0x34/0x1c0 > > [a4c99f30] [800079a8] do_notify_resume+0x68/0x80 > > [a4c99f40] [8000fa1c] do_user_signal+0x74/0xc4 > > --- interrupt: c00 at 0xf5b0cfc > > LR = 0xf5b0ce8 > > Instruction dump: > > 4ba8 7c0004ac 7d201828 31290001 7d20192d 40a2fff4 7c0004ac 2f890001 > > 4dbd0020 3d40c162 892ac11e 69290001 <0f09> 2f89 4dbe0020 3921 > > ---[ end trace 544bc785f9258108 ]--- > > > > JT > > > > > > On Thu, Jan 12, 2017 at 1:19 AM, Parthasarathy Bhuvaragan > <parthasarathy.bhuvara...@ericsson.com > <mailto:parthasarathy.bhuvara...@ericsson.com>> wrote: > > In this series, we revert the commit 333f796235a527 ("tipc: fix a > race condition leading to subscriber refcnt bug") and provid
[tipc-discussion] [PATCH net-next v6 2/6] tipc: add subscription refcount to avoid invalid delete
Until now, the subscribers keep track of the subscriptions using reference count at subscriber level. At subscription cancel or subscriber delete, we delete the subscription by checking for pending timer using del_timer(). del_timer() is not SMP safe, if on CPU0 the check for pending timer returns true but CPU1 might schedule the timer callback thereby deleting the subscription. Thus when CPU0 is scheduled, it deletes an invalid subscription. We can fix the above issue using either a synchronous timer function or by introducing subscription reference count. The later solution is preferred as it's consistent with the asynchronous design model used by the rest of the code. In this commit, we introduce subscription refcount to avoid deleting an invalid subscription. Signed-off-by: Parthasarathy Bhuvaragan <parthasarathy.bhuvara...@ericsson.com> --- v2: Address comments from Ying Xue, to avoid race conditions by using refcount only instead of del_timer_sync(). --- net/tipc/subscr.c | 120 +- net/tipc/subscr.h | 1 + 2 files changed, 66 insertions(+), 55 deletions(-) diff --git a/net/tipc/subscr.c b/net/tipc/subscr.c index 0dd02244e21d..d6c1f35f6518 100644 --- a/net/tipc/subscr.c +++ b/net/tipc/subscr.c @@ -54,6 +54,7 @@ struct tipc_subscriber { static void tipc_subscrp_delete(struct tipc_subscription *sub); static void tipc_subscrb_put(struct tipc_subscriber *subscriber); +static void tipc_subscrp_put(struct tipc_subscription *subscription); /** * htohl - convert value to endianness used by destination @@ -137,25 +138,17 @@ void tipc_subscrp_report_overlap(struct tipc_subscription *sub, u32 found_lower, static void tipc_subscrp_timeout(unsigned long data) { struct tipc_subscription *sub = (struct tipc_subscription *)data; - struct tipc_subscriber *subscriber = sub->subscriber; /* Notify subscriber of timeout */ tipc_subscrp_send_event(sub, sub->evt.s.seq.lower, sub->evt.s.seq.upper, TIPC_SUBSCR_TIMEOUT, 0, 0); - spin_lock_bh(>lock); - tipc_subscrp_delete(sub); - spin_unlock_bh(>lock); - - tipc_subscrb_put(subscriber); + tipc_subscrp_put(sub); } static void tipc_subscrb_kref_release(struct kref *kref) { - struct tipc_subscriber *subcriber = container_of(kref, - struct tipc_subscriber, kref); - - kfree(subcriber); + kfree(container_of(kref,struct tipc_subscriber, kref)); } static void tipc_subscrb_put(struct tipc_subscriber *subscriber) @@ -168,6 +161,56 @@ static void tipc_subscrb_get(struct tipc_subscriber *subscriber) kref_get(>kref); } +static void tipc_subscrp_kref_release(struct kref *kref) +{ + struct tipc_subscription *sub = container_of(kref, +struct tipc_subscription, +kref); + struct tipc_net *tn = net_generic(sub->net, tipc_net_id); + struct tipc_subscriber *subscriber = sub->subscriber; + + spin_lock_bh(>lock); + tipc_nametbl_unsubscribe(sub); + list_del(>subscrp_list); + atomic_dec(>subscription_count); + spin_unlock_bh(>lock); + kfree(sub); + tipc_subscrb_put(subscriber); +} + +static void tipc_subscrp_put(struct tipc_subscription *subscription) +{ + kref_put(>kref, tipc_subscrp_kref_release); +} + +static void tipc_subscrp_get(struct tipc_subscription *subscription) +{ + kref_get(>kref); +} + +/* tipc_subscrb_subscrp_delete - delete a specific subscription or all + * subscriptions for a given subscriber. + */ +static void tipc_subscrb_subscrp_delete(struct tipc_subscriber *subscriber, + struct tipc_subscr *s) +{ + struct tipc_subscription *sub, *temp; + + spin_lock_bh(>lock); + list_for_each_entry_safe(sub, temp, >subscrp_list, +subscrp_list) { + spin_unlock_bh(>lock); + if (s && !memcmp(s, >evt.s, sizeof(struct tipc_subscr))) { + tipc_subscrp_delete(sub); + return; + } else { + tipc_subscrp_delete(sub); + } + spin_lock_bh(>lock); + } + spin_unlock_bh(>lock); +} + static struct tipc_subscriber *tipc_subscrb_create(int conid) { struct tipc_subscriber *subscriber; @@ -177,8 +220,8 @@ static struct tipc_subscriber *tipc_subscrb_create(int conid) pr_warn("Subscriber rejected, no memory\n"); return NULL; } - kref_init(>kref); INIT_LIST_HEAD(>subscrp_list); + kref_init(>kref); subscriber->conid = conid; spin_lock_init(>lock); @@ -187,55 +230,21 @@ stati
[tipc-discussion] [PATCH net-next v6 6/6] tipc: fix cleanup at module unload
In tipc_server_stop(), we iterate over the connections with limiting factor as server's idr_in_use. We ignore the fact that this variable is decremented in tipc_close_conn(), leading to premature exit. In this commit, we iterate until the we have no connections left. Acked-by: Ying Xue <ying@windriver.com> Signed-off-by: Parthasarathy Bhuvaragan <parthasarathy.bhuvara...@ericsson.com> --- net/tipc/server.c | 4 +--- 1 file changed, 1 insertion(+), 3 deletions(-) diff --git a/net/tipc/server.c b/net/tipc/server.c index 04ff441b8065..3cd6402e812c 100644 --- a/net/tipc/server.c +++ b/net/tipc/server.c @@ -619,14 +619,12 @@ int tipc_server_start(struct tipc_server *s) void tipc_server_stop(struct tipc_server *s) { struct tipc_conn *con; - int total = 0; int id; spin_lock_bh(>idr_lock); - for (id = 0; total < s->idr_in_use; id++) { + for (id = 0; s->idr_in_use; id++) { con = idr_find(>conn_idr, id); if (con) { - total++; spin_unlock_bh(>idr_lock); tipc_close_conn(con); spin_lock_bh(>idr_lock); -- 2.1.4 -- Check out the vibrant tech community on one of the world's most engaging tech sites, SlashDot.org! http://sdm.link/slashdot ___ tipc-discussion mailing list tipc-discussion@lists.sourceforge.net https://lists.sourceforge.net/lists/listinfo/tipc-discussion
[tipc-discussion] [PATCH net-next v6 4/6] tipc: fix nametbl_lock soft lockup at module exit
Commit 333f796235a527 ("tipc: fix a race condition leading to subscriber refcnt bug") reveals a soft lockup while acquiring nametbl_lock. Before commit 333f796235a527, we call tipc_conn_shutdown() from tipc_close_conn() in the context of tipc_topsrv_stop(). In that context, we are allowed to grab the nametbl_lock. Commit 333f796235a527, moved tipc_conn_release (renamed from tipc_conn_shutdown) to the connection refcount cleanup. This allows either tipc_nametbl_withdraw() or tipc_topsrv_stop() to the cleanup. Since tipc_exit_net() first calls tipc_topsrv_stop() and then tipc_nametble_withdraw() increases the chances for the later to perform the connection cleanup. The soft lockup occurs in the call chain of tipc_nametbl_withdraw(), when it performs the tipc_conn_kref_release() as it tries to grab nametbl_lock again while holding it already. tipc_nametbl_withdraw() grabs nametbl_lock tipc_nametbl_remove_publ() tipc_subscrp_report_overlap() tipc_subscrp_send_event() tipc_conn_sendmsg() << if (con->flags != CF_CONNECTED) we do conn_put(), triggering the cleanup as refcount=0. >> tipc_conn_kref_release tipc_sock_release tipc_conn_release tipc_subscrb_delete tipc_subscrp_delete tipc_nametbl_unsubscribe << Soft Lockup >> The previous changes in this series fixes the race conditions fixed by commit 333f796235a527. Hence we can now revert the commit. Fixes: 333f796235a52727 ("tipc: fix a race condition leading to subscriber refcnt bug") Reported-by: John Thompson <thompa@gmail.com> Acked-by: Ying Xue <ying....@windriver.com> Signed-off-by: Parthasarathy Bhuvaragan <parthasarathy.bhuvara...@ericsson.com> --- net/tipc/server.c | 16 +--- 1 file changed, 5 insertions(+), 11 deletions(-) diff --git a/net/tipc/server.c b/net/tipc/server.c index 2e803601aa99..826cde2c401e 100644 --- a/net/tipc/server.c +++ b/net/tipc/server.c @@ -86,7 +86,6 @@ struct outqueue_entry { static void tipc_recv_work(struct work_struct *work); static void tipc_send_work(struct work_struct *work); static void tipc_clean_outqueues(struct tipc_conn *con); -static void tipc_sock_release(struct tipc_conn *con); static void tipc_conn_kref_release(struct kref *kref) { @@ -104,7 +103,6 @@ static void tipc_conn_kref_release(struct kref *kref) } saddr->scope = -TIPC_NODE_SCOPE; kernel_bind(sock, (struct sockaddr *)saddr, sizeof(*saddr)); - tipc_sock_release(con); sock_release(sock); con->sock = NULL; @@ -194,19 +192,15 @@ static void tipc_unregister_callbacks(struct tipc_conn *con) write_unlock_bh(>sk_callback_lock); } -static void tipc_sock_release(struct tipc_conn *con) +static void tipc_close_conn(struct tipc_conn *con) { struct tipc_server *s = con->server; - if (con->conid) - s->tipc_conn_release(con->conid, con->usr_data); - - tipc_unregister_callbacks(con); -} - -static void tipc_close_conn(struct tipc_conn *con) -{ if (test_and_clear_bit(CF_CONNECTED, >flags)) { + tipc_unregister_callbacks(con); + + if (con->conid) + s->tipc_conn_release(con->conid, con->usr_data); /* We shouldn't flush pending works as we may be in the * thread. In fact the races with pending rx/tx work structs -- 2.1.4 -- Check out the vibrant tech community on one of the world's most engaging tech sites, SlashDot.org! http://sdm.link/slashdot ___ tipc-discussion mailing list tipc-discussion@lists.sourceforge.net https://lists.sourceforge.net/lists/listinfo/tipc-discussion
[tipc-discussion] [PATCH net-next v6 5/6] tipc: ignore requests when the connection state is not CONNECTED
In tipc_conn_sendmsg(), we first queue the request to the outqueue followed by the connection state check. If the connection is not connected, we should not queue this message. In this commit, we reject the messages if the connection state is not CF_CONNECTED. Acked-by: Ying Xue <ying@windriver.com> Signed-off-by: Parthasarathy Bhuvaragan <parthasarathy.bhuvara...@ericsson.com> --- net/tipc/server.c | 13 +++-- 1 file changed, 7 insertions(+), 6 deletions(-) diff --git a/net/tipc/server.c b/net/tipc/server.c index 826cde2c401e..04ff441b8065 100644 --- a/net/tipc/server.c +++ b/net/tipc/server.c @@ -453,6 +453,11 @@ int tipc_conn_sendmsg(struct tipc_server *s, int conid, if (!con) return -EINVAL; + if (!test_bit(CF_CONNECTED, >flags)) { + conn_put(con); + return 0; + } + e = tipc_alloc_entry(data, len); if (!e) { conn_put(con); @@ -466,12 +471,8 @@ int tipc_conn_sendmsg(struct tipc_server *s, int conid, list_add_tail(>list, >outqueue); spin_unlock_bh(>outqueue_lock); - if (test_bit(CF_CONNECTED, >flags)) { - if (!queue_work(s->send_wq, >swork)) - conn_put(con); - } else { + if (!queue_work(s->send_wq, >swork)) conn_put(con); - } return 0; } @@ -495,7 +496,7 @@ static void tipc_send_to_sock(struct tipc_conn *con) int ret; spin_lock_bh(>outqueue_lock); - while (1) { + while (test_bit(CF_CONNECTED, >flags)) { e = list_entry(con->outqueue.next, struct outqueue_entry, list); if ((struct list_head *) e == >outqueue) -- 2.1.4 -- Check out the vibrant tech community on one of the world's most engaging tech sites, SlashDot.org! http://sdm.link/slashdot ___ tipc-discussion mailing list tipc-discussion@lists.sourceforge.net https://lists.sourceforge.net/lists/listinfo/tipc-discussion
[tipc-discussion] [PATCH net-next v6 1/6] tipc: fix nametbl_lock soft lockup at node/link events
We trigger a soft lockup as we grab nametbl_lock twice if the node has a pending node up/down or link up/down event while: - we process an incoming named message in tipc_named_rcv() and perform an tipc_update_nametbl(). - we have pending backlog items in the name distributor queue during a nametable update using tipc_nametbl_publish() or tipc_nametbl_withdraw(). The following are the call chain associated: tipc_named_rcv() Grabs nametbl_lock tipc_update_nametbl() (publish/withdraw) tipc_node_subscribe()/unsubscribe() tipc_node_write_unlock() << lockup occurs if an outstanding node/link event exits, as we grabs nametbl_lock again >> tipc_nametbl_withdraw() Grab nametbl_lock tipc_named_process_backlog() tipc_update_nametbl() << rest as above >> The function tipc_node_write_unlock(), in addition to releasing the lock processes the outstanding node/link up/down events. To do this, we need to grab the nametbl_lock again leading to the lockup. In this commit we fix the soft lockup by introducing a fast variant of node_unlock(), where we just release the lock. We adapt the node_subscribe()/node_unsubscribe() to use the fast variants. Acked-by: Ying Xue <ying@windriver.com> Signed-off-by: Parthasarathy Bhuvaragan <parthasarathy.bhuvara...@ericsson.com> --- net/tipc/node.c | 9 +++-- 1 file changed, 7 insertions(+), 2 deletions(-) diff --git a/net/tipc/node.c b/net/tipc/node.c index 2883f6a0ed98..796dec65944a 100644 --- a/net/tipc/node.c +++ b/net/tipc/node.c @@ -263,6 +263,11 @@ static void tipc_node_write_lock(struct tipc_node *n) write_lock_bh(>lock); } +static void tipc_node_write_unlock_fast(struct tipc_node *n) +{ + write_unlock_bh(>lock); +} + static void tipc_node_write_unlock(struct tipc_node *n) { struct net *net = n->net; @@ -417,7 +422,7 @@ void tipc_node_subscribe(struct net *net, struct list_head *subscr, u32 addr) } tipc_node_write_lock(n); list_add_tail(subscr, >publ_list); - tipc_node_write_unlock(n); + tipc_node_write_unlock_fast(n); tipc_node_put(n); } @@ -435,7 +440,7 @@ void tipc_node_unsubscribe(struct net *net, struct list_head *subscr, u32 addr) } tipc_node_write_lock(n); list_del_init(subscr); - tipc_node_write_unlock(n); + tipc_node_write_unlock_fast(n); tipc_node_put(n); } -- 2.1.4 -- Check out the vibrant tech community on one of the world's most engaging tech sites, SlashDot.org! http://sdm.link/slashdot ___ tipc-discussion mailing list tipc-discussion@lists.sourceforge.net https://lists.sourceforge.net/lists/listinfo/tipc-discussion
[tipc-discussion] [PATCH net-next v6 0/6] topology server fixes for nametable soft lockup
In this series, we revert the commit 333f796235a527 ("tipc: fix a race condition leading to subscriber refcnt bug") and provide an alternate solution to fix the race conditions in commits 2-4. We have to do this as the above commit introduced a nametbl soft lockup at module exit as described by patch#4. --- v6: Address krefcount warning for John Thompson in Patch#3 v5: Address Ying's comment in Patch #2 to remove del_timer_sync(). v4: Address Ying's comment by introducing subscription refcount. v3: introduce cleanup workqueue to fix nametbl soft lockup. Parthasarathy Bhuvaragan (6): tipc: fix nametbl_lock soft lockup at node/link events tipc: add subscription refcount to avoid invalid delete tipc: fix connection refcount error tipc: fix nametbl_lock soft lockup at module exit tipc: ignore requests when the connection state is not CONNECTED tipc: fix cleanup at module unload net/tipc/node.c | 9 +++- net/tipc/server.c | 48 ++ net/tipc/subscr.c | 120 +- net/tipc/subscr.h | 1 + 4 files changed, 94 insertions(+), 84 deletions(-) -- 2.1.4 -- Check out the vibrant tech community on one of the world's most engaging tech sites, SlashDot.org! http://sdm.link/slashdot ___ tipc-discussion mailing list tipc-discussion@lists.sourceforge.net https://lists.sourceforge.net/lists/listinfo/tipc-discussion
Re: [tipc-discussion] [PATCH net-next v5 2/6] tipc: add subscription refcount to avoid invalid delete
On 01/18/2017 11:06 AM, Ying Xue wrote: > On 01/16/2017 11:41 PM, Parthasarathy Bhuvaragan wrote: >> Until now, the subscribers keep track of the subscriptions using >> reference count at subscriber level. At subscription cancel or >> subscriber delete, we delete the subscription by checking for >> pending timer using del_timer(). del_timer() is not SMP safe, if >> on CPU0 the check for pending timer returns true but CPU1 might >> schedule the timer callback thereby deleting the subscription. >> Thus when CPU0 is scheduled, it deletes an invalid subscription. >> >> We can fix the above issue using either a synchronous timer function >> or by introducing subscription reference count. The later solution >> is preferred as it's consistent with the asynchronous design model >> used by the rest of the code. >> >> In this commit, we introduce subscription refcount to avoid deleting >> an invalid subscription. >> >> Signed-off-by: Parthasarathy Bhuvaragan >> <parthasarathy.bhuvara...@ericsson.com> >> --- >> v2: Address comments from Ying Xue, to avoid race conditions by using >> refcount only instead of del_timer_sync(). >> --- >> net/tipc/subscr.c | 120 >> +- >> net/tipc/subscr.h | 1 + >> 2 files changed, 66 insertions(+), 55 deletions(-) >> >> diff --git a/net/tipc/subscr.c b/net/tipc/subscr.c >> index 0dd02244e21d..d6c1f35f6518 100644 >> --- a/net/tipc/subscr.c >> +++ b/net/tipc/subscr.c >> @@ -54,6 +54,7 @@ struct tipc_subscriber { >> >> static void tipc_subscrp_delete(struct tipc_subscription *sub); >> static void tipc_subscrb_put(struct tipc_subscriber *subscriber); >> +static void tipc_subscrp_put(struct tipc_subscription *subscription); >> >> /** >> * htohl - convert value to endianness used by destination >> @@ -137,25 +138,17 @@ void tipc_subscrp_report_overlap(struct >> tipc_subscription *sub, u32 found_lower, >> static void tipc_subscrp_timeout(unsigned long data) >> { >> struct tipc_subscription *sub = (struct tipc_subscription *)data; >> -struct tipc_subscriber *subscriber = sub->subscriber; >> >> /* Notify subscriber of timeout */ >> tipc_subscrp_send_event(sub, sub->evt.s.seq.lower, >> sub->evt.s.seq.upper, >> TIPC_SUBSCR_TIMEOUT, 0, 0); >> >> -spin_lock_bh(>lock); >> -tipc_subscrp_delete(sub); >> -spin_unlock_bh(>lock); >> - >> -tipc_subscrb_put(subscriber); >> +tipc_subscrp_put(sub); >> } >> >> static void tipc_subscrb_kref_release(struct kref *kref) >> { >> -struct tipc_subscriber *subcriber = container_of(kref, >> -struct tipc_subscriber, kref); >> - >> -kfree(subcriber); >> +kfree(container_of(kref,struct tipc_subscriber, kref)); >> } >> >> static void tipc_subscrb_put(struct tipc_subscriber *subscriber) >> @@ -168,6 +161,56 @@ static void tipc_subscrb_get(struct >> tipc_subscriber *subscriber) >> kref_get(>kref); >> } >> >> +static void tipc_subscrp_kref_release(struct kref *kref) >> +{ >> +struct tipc_subscription *sub = container_of(kref, >> + struct tipc_subscription, >> + kref); >> +struct tipc_net *tn = net_generic(sub->net, tipc_net_id); >> +struct tipc_subscriber *subscriber = sub->subscriber; >> + >> +spin_lock_bh(>lock); >> +tipc_nametbl_unsubscribe(sub); >> +list_del(>subscrp_list); >> +atomic_dec(>subscription_count); >> +spin_unlock_bh(>lock); >> +kfree(sub); >> +tipc_subscrb_put(subscriber); >> +} >> + >> +static void tipc_subscrp_put(struct tipc_subscription *subscription) >> +{ >> +kref_put(>kref, tipc_subscrp_kref_release); >> +} >> + >> +static void tipc_subscrp_get(struct tipc_subscription *subscription) >> +{ >> +kref_get(>kref); >> +} >> + >> +/* tipc_subscrb_subscrp_delete - delete a specific subscription or all >> + * subscriptions for a given subscriber. >> + */ >> +static void tipc_subscrb_subscrp_delete(struct tipc_subscriber >> *subscriber, >> +struct tipc_subscr *s) >> +{ >> +struct tipc_subscription *sub, *temp; >> + >> +spin_lock_bh(>lock); >> +list_for_each_entry_safe(sub, temp, >subscrp_list, >> + subscrp_list) { >>
Re: [tipc-discussion] [net-next v3 3/4] tipc: introduce replicast as transport option for multicast
On 01/16/2017 03:13 PM, Jon Maloy wrote: > > >> -Original Message----- >> From: Parthasarathy Bhuvaragan >> Sent: Monday, 16 January, 2017 05:20 >> To: Jon Maloy <jon.ma...@ericsson.com>; >> tipc-discussion@lists.sourceforge.net; >> Ying Xue <ying@windriver.com> >> Subject: Re: [tipc-discussion] [net-next v3 3/4] tipc: introduce replicast as >> transport option for multicast >> >> On 01/13/2017 04:18 PM, Jon Maloy wrote: >>> >>> >>>> -Original Message- >>>> From: Parthasarathy Bhuvaragan >>>> Sent: Friday, 13 January, 2017 04:24 >>>> To: Jon Maloy <jon.ma...@ericsson.com>; tipc- >> discuss...@lists.sourceforge.net; >>>> Ying Xue <ying@windriver.com> >>>> Subject: Re: [tipc-discussion] [net-next v3 3/4] tipc: introduce replicast >>>> as >>>> transport option for multicast >>>> >>>> On 01/04/2017 06:05 PM, Parthasarathy Bhuvaragan wrote: >>>>> Hi Jon, >>>>> >>>>> Added some minor comments inline in this patch, apart from that the >>>>> major concern is the following: >>>>> >>>>> All my tests which passed before this patch, fails while sending >>>>> multicast to a receiver on own node. >>>>> >>>>> With this patch, we increase the likelyhood of receive buffer overflow >>>>> if the sender & receivers are running on the same host as we bypass the >>>>> link layer completely. I confirmed this with some traces in filter_rcv(). >>>>> >>>>> If I add another multicast listener running on another node, this >>>>> pacifies the sender (put the sender to sleep at link congestion) and >>>>> relatively slow link layer reduces the buffer overflow. >>>>> >>>>> We need to find a way reduce the aggressiveness of the sender. >>>>> We want users to be transparent about the location of the services, so >>>>> we should to provide similar charecteristics regardless of the service >>>>> location. >>>>> >>>> Jon, running ptts sever and client on a standalone node without your >>>> updates failed. So in that aspect, iam ok with this patch. >>>> >>>> If the ethernet bearer lacks broadcast ability, then neighbor discovery >>>> will not work. So do we intend to introduce support to add ethernet >>>> peers manually as we do for udp bearers? otherwise we can never use >>>> replicast for non udp bearers. >>> >>> I believe all Ethernet implementations, even overlay networks, provide some >> form of broadcast, or in lack thereof, an emulated broadcast. >>> So, discovery should work, but it will be very inefficient when we do link >> broadcast, because tipc will think that genuine Ethernet broadcast is >> supported. >>> We actually need some way to find out what kind of "Ethernet" we are >> attached to, e.g. VXLAN, so that the "bcast supported" flag can be set >> correctly. >>> I wonder if that if possible, or if it has to be configured. >>> >> I assumed that, but thanks for the clarification. I infer from your >> statement that its the User, who shall configure this per socket in case >> tipc is running over some kind of overlay networks. Tipc has no >> knowledge about the tunnel mechanisms used under the exposed bearers. > > No, I don't think that is a good option. I think it will be in only very > special cases the user will want to enforce replicast, (e.g., security, or if > he know there will always be very few destinations), and those would not be > related to the deployment. > >> >> In that case, its more logical if we set this as a node attribute and >> the sockets inherit them? >> Since the applications itself usually are agnostic of the deployment >> environments, it's not correct to place the platform specific logic >> inside the application. > > I was thinking more of setting this as a configured bearer attribute, unless > we find a way to deduce it from the interface attributes. I just didn't see > this as part of this series, but it should be done. > >> >> I think you need to rephrase the commit message. I read the commit >> message as if we are introducing a complete link level broadcast service >> replacement. But that's incorrect, as still broadcast link for neighbor >> discovery. > > Yes. This is only an optimization for broadcast traffic,
[tipc-discussion] [PATCH net-next v5 2/6] tipc: add subscription refcount to avoid invalid delete
Until now, the subscribers keep track of the subscriptions using reference count at subscriber level. At subscription cancel or subscriber delete, we delete the subscription by checking for pending timer using del_timer(). del_timer() is not SMP safe, if on CPU0 the check for pending timer returns true but CPU1 might schedule the timer callback thereby deleting the subscription. Thus when CPU0 is scheduled, it deletes an invalid subscription. We can fix the above issue using either a synchronous timer function or by introducing subscription reference count. The later solution is preferred as it's consistent with the asynchronous design model used by the rest of the code. In this commit, we introduce subscription refcount to avoid deleting an invalid subscription. Signed-off-by: Parthasarathy Bhuvaragan <parthasarathy.bhuvara...@ericsson.com> --- v2: Address comments from Ying Xue, to avoid race conditions by using refcount only instead of del_timer_sync(). --- net/tipc/subscr.c | 120 +- net/tipc/subscr.h | 1 + 2 files changed, 66 insertions(+), 55 deletions(-) diff --git a/net/tipc/subscr.c b/net/tipc/subscr.c index 0dd02244e21d..d6c1f35f6518 100644 --- a/net/tipc/subscr.c +++ b/net/tipc/subscr.c @@ -54,6 +54,7 @@ struct tipc_subscriber { static void tipc_subscrp_delete(struct tipc_subscription *sub); static void tipc_subscrb_put(struct tipc_subscriber *subscriber); +static void tipc_subscrp_put(struct tipc_subscription *subscription); /** * htohl - convert value to endianness used by destination @@ -137,25 +138,17 @@ void tipc_subscrp_report_overlap(struct tipc_subscription *sub, u32 found_lower, static void tipc_subscrp_timeout(unsigned long data) { struct tipc_subscription *sub = (struct tipc_subscription *)data; - struct tipc_subscriber *subscriber = sub->subscriber; /* Notify subscriber of timeout */ tipc_subscrp_send_event(sub, sub->evt.s.seq.lower, sub->evt.s.seq.upper, TIPC_SUBSCR_TIMEOUT, 0, 0); - spin_lock_bh(>lock); - tipc_subscrp_delete(sub); - spin_unlock_bh(>lock); - - tipc_subscrb_put(subscriber); + tipc_subscrp_put(sub); } static void tipc_subscrb_kref_release(struct kref *kref) { - struct tipc_subscriber *subcriber = container_of(kref, - struct tipc_subscriber, kref); - - kfree(subcriber); + kfree(container_of(kref,struct tipc_subscriber, kref)); } static void tipc_subscrb_put(struct tipc_subscriber *subscriber) @@ -168,6 +161,56 @@ static void tipc_subscrb_get(struct tipc_subscriber *subscriber) kref_get(>kref); } +static void tipc_subscrp_kref_release(struct kref *kref) +{ + struct tipc_subscription *sub = container_of(kref, +struct tipc_subscription, +kref); + struct tipc_net *tn = net_generic(sub->net, tipc_net_id); + struct tipc_subscriber *subscriber = sub->subscriber; + + spin_lock_bh(>lock); + tipc_nametbl_unsubscribe(sub); + list_del(>subscrp_list); + atomic_dec(>subscription_count); + spin_unlock_bh(>lock); + kfree(sub); + tipc_subscrb_put(subscriber); +} + +static void tipc_subscrp_put(struct tipc_subscription *subscription) +{ + kref_put(>kref, tipc_subscrp_kref_release); +} + +static void tipc_subscrp_get(struct tipc_subscription *subscription) +{ + kref_get(>kref); +} + +/* tipc_subscrb_subscrp_delete - delete a specific subscription or all + * subscriptions for a given subscriber. + */ +static void tipc_subscrb_subscrp_delete(struct tipc_subscriber *subscriber, + struct tipc_subscr *s) +{ + struct tipc_subscription *sub, *temp; + + spin_lock_bh(>lock); + list_for_each_entry_safe(sub, temp, >subscrp_list, +subscrp_list) { + spin_unlock_bh(>lock); + if (s && !memcmp(s, >evt.s, sizeof(struct tipc_subscr))) { + tipc_subscrp_delete(sub); + return; + } else { + tipc_subscrp_delete(sub); + } + spin_lock_bh(>lock); + } + spin_unlock_bh(>lock); +} + static struct tipc_subscriber *tipc_subscrb_create(int conid) { struct tipc_subscriber *subscriber; @@ -177,8 +220,8 @@ static struct tipc_subscriber *tipc_subscrb_create(int conid) pr_warn("Subscriber rejected, no memory\n"); return NULL; } - kref_init(>kref); INIT_LIST_HEAD(>subscrp_list); + kref_init(>kref); subscriber->conid = conid; spin_lock_init(>lock); @@ -187,55 +230,21 @@ stati
[tipc-discussion] [PATCH net-next v5 3/6] tipc: fix connection refcount error
Until now, the generic server framework maintains the connection id's per subscriber in server's conn_idr. At tipc_close_conn, we remove the connection id from the server list, but the connection is valid until we call the refcount cleanup. Hence we have a window where the server allocates the same connection to an new subscriber leading to inconsistent reference count. In this commit, we remove the connection from the server list at recount cleanup. Acked-by: Ying Xue <ying@windriver.com> Signed-off-by: Parthasarathy Bhuvaragan <parthasarathy.bhuvara...@ericsson.com> --- net/tipc/server.c | 15 +++ 1 file changed, 7 insertions(+), 8 deletions(-) diff --git a/net/tipc/server.c b/net/tipc/server.c index 215849ce453d..e178d41e1a68 100644 --- a/net/tipc/server.c +++ b/net/tipc/server.c @@ -91,7 +91,8 @@ static void tipc_sock_release(struct tipc_conn *con); static void tipc_conn_kref_release(struct kref *kref) { struct tipc_conn *con = container_of(kref, struct tipc_conn, kref); - struct sockaddr_tipc *saddr = con->server->saddr; + struct tipc_server *s = con->server; + struct sockaddr_tipc *saddr = s->saddr; struct socket *sock = con->sock; struct sock *sk; @@ -106,6 +107,11 @@ static void tipc_conn_kref_release(struct kref *kref) tipc_sock_release(con); sock_release(sock); con->sock = NULL; + + spin_lock_bh(>idr_lock); + idr_remove(>conn_idr, con->conid); + s->idr_in_use--; + spin_unlock_bh(>idr_lock); } tipc_clean_outqueues(con); @@ -198,15 +204,8 @@ static void tipc_sock_release(struct tipc_conn *con) static void tipc_close_conn(struct tipc_conn *con) { - struct tipc_server *s = con->server; - if (test_and_clear_bit(CF_CONNECTED, >flags)) { - spin_lock_bh(>idr_lock); - idr_remove(>conn_idr, con->conid); - s->idr_in_use--; - spin_unlock_bh(>idr_lock); - /* We shouldn't flush pending works as we may be in the * thread. In fact the races with pending rx/tx work structs * are harmless for us here as we have already deleted this -- 2.1.4 -- Check out the vibrant tech community on one of the world's most engaging tech sites, SlashDot.org! http://sdm.link/slashdot ___ tipc-discussion mailing list tipc-discussion@lists.sourceforge.net https://lists.sourceforge.net/lists/listinfo/tipc-discussion
[tipc-discussion] [PATCH net-next v5 4/6] tipc: fix nametbl_lock soft lockup at module exit
Commit 333f796235a527 ("tipc: fix a race condition leading to subscriber refcnt bug") reveals a soft lockup while acquiring nametbl_lock. Before commit 333f796235a527, we call tipc_conn_shutdown() from tipc_close_conn() in the context of tipc_topsrv_stop(). In that context, we are allowed to grab the nametbl_lock. Commit 333f796235a527, moved tipc_conn_release (renamed from tipc_conn_shutdown) to the connection refcount cleanup. This allows either tipc_nametbl_withdraw() or tipc_topsrv_stop() to the cleanup. Since tipc_exit_net() first calls tipc_topsrv_stop() and then tipc_nametble_withdraw() increases the chances for the later to perform the connection cleanup. The soft lockup occurs in the call chain of tipc_nametbl_withdraw(), when it performs the tipc_conn_kref_release() as it tries to grab nametbl_lock again while holding it already. tipc_nametbl_withdraw() grabs nametbl_lock tipc_nametbl_remove_publ() tipc_subscrp_report_overlap() tipc_subscrp_send_event() tipc_conn_sendmsg() << if (con->flags != CF_CONNECTED) we do conn_put(), triggering the cleanup as refcount=0. >> tipc_conn_kref_release tipc_sock_release tipc_conn_release tipc_subscrb_delete tipc_subscrp_delete tipc_nametbl_unsubscribe << Soft Lockup >> The previous changes in this series fixes the race conditions fixed by commit 333f796235a527. Hence we can now revert the commit. Fixes: 333f796235a52727 ("tipc: fix a race condition leading to subscriber refcnt bug") Reported-by: John Thompson <thompa@gmail.com> Acked-by: Ying Xue <ying....@windriver.com> Signed-off-by: Parthasarathy Bhuvaragan <parthasarathy.bhuvara...@ericsson.com> --- net/tipc/server.c | 16 +--- 1 file changed, 5 insertions(+), 11 deletions(-) diff --git a/net/tipc/server.c b/net/tipc/server.c index e178d41e1a68..e206b8fb826f 100644 --- a/net/tipc/server.c +++ b/net/tipc/server.c @@ -86,7 +86,6 @@ struct outqueue_entry { static void tipc_recv_work(struct work_struct *work); static void tipc_send_work(struct work_struct *work); static void tipc_clean_outqueues(struct tipc_conn *con); -static void tipc_sock_release(struct tipc_conn *con); static void tipc_conn_kref_release(struct kref *kref) { @@ -104,7 +103,6 @@ static void tipc_conn_kref_release(struct kref *kref) } saddr->scope = -TIPC_NODE_SCOPE; kernel_bind(sock, (struct sockaddr *)saddr, sizeof(*saddr)); - tipc_sock_release(con); sock_release(sock); con->sock = NULL; @@ -192,19 +190,15 @@ static void tipc_unregister_callbacks(struct tipc_conn *con) write_unlock_bh(>sk_callback_lock); } -static void tipc_sock_release(struct tipc_conn *con) +static void tipc_close_conn(struct tipc_conn *con) { struct tipc_server *s = con->server; - if (con->conid) - s->tipc_conn_release(con->conid, con->usr_data); - - tipc_unregister_callbacks(con); -} - -static void tipc_close_conn(struct tipc_conn *con) -{ if (test_and_clear_bit(CF_CONNECTED, >flags)) { + tipc_unregister_callbacks(con); + + if (con->conid) + s->tipc_conn_release(con->conid, con->usr_data); /* We shouldn't flush pending works as we may be in the * thread. In fact the races with pending rx/tx work structs -- 2.1.4 -- Check out the vibrant tech community on one of the world's most engaging tech sites, SlashDot.org! http://sdm.link/slashdot ___ tipc-discussion mailing list tipc-discussion@lists.sourceforge.net https://lists.sourceforge.net/lists/listinfo/tipc-discussion
[tipc-discussion] [PATCH net-next v5 6/6] tipc: fix cleanup at module unload
In tipc_server_stop(), we iterate over the connections with limiting factor as server's idr_in_use. We ignore the fact that this variable is decremented in tipc_close_conn(), leading to premature exit. In this commit, we iterate until the we have no connections left. Acked-by: Ying Xue <ying@windriver.com> Signed-off-by: Parthasarathy Bhuvaragan <parthasarathy.bhuvara...@ericsson.com> --- net/tipc/server.c | 4 +--- 1 file changed, 1 insertion(+), 3 deletions(-) diff --git a/net/tipc/server.c b/net/tipc/server.c index fa54a2760ee0..ad455f128925 100644 --- a/net/tipc/server.c +++ b/net/tipc/server.c @@ -617,14 +617,12 @@ int tipc_server_start(struct tipc_server *s) void tipc_server_stop(struct tipc_server *s) { struct tipc_conn *con; - int total = 0; int id; spin_lock_bh(>idr_lock); - for (id = 0; total < s->idr_in_use; id++) { + for (id = 0; s->idr_in_use; id++) { con = idr_find(>conn_idr, id); if (con) { - total++; spin_unlock_bh(>idr_lock); tipc_close_conn(con); spin_lock_bh(>idr_lock); -- 2.1.4 -- Check out the vibrant tech community on one of the world's most engaging tech sites, SlashDot.org! http://sdm.link/slashdot ___ tipc-discussion mailing list tipc-discussion@lists.sourceforge.net https://lists.sourceforge.net/lists/listinfo/tipc-discussion
[tipc-discussion] [PATCH net-next v5 0/6] topology server fixes for nametable soft lockup
In this series, we revert the commit 333f796235a527 ("tipc: fix a race condition leading to subscriber refcnt bug") and provide an alternate solution to fix the race conditions in commits 2-4. We have to do this as the above commit introduced a nametbl soft lockup at module exit as described by patch#4. --- v5: Address Ying's comment in Patch #2 to remove del_timer_sync(). v4: Address Ying's comment by introducing subscription refcount. v3: introduce cleanup workqueue to fix nametbl soft lockup. Parthasarathy Bhuvaragan (6): tipc: fix nametbl_lock soft lockup at node/link events tipc: add subscription refcount to avoid invalid delete tipc: fix connection refcount error tipc: fix nametbl_lock soft lockup at module exit tipc: ignore requests when the connection state is not CONNECTED tipc: fix cleanup at module unload net/tipc/node.c | 9 +++- net/tipc/server.c | 44 net/tipc/subscr.c | 120 +- net/tipc/subscr.h | 1 + 4 files changed, 91 insertions(+), 83 deletions(-) -- 2.1.4 -- Check out the vibrant tech community on one of the world's most engaging tech sites, SlashDot.org! http://sdm.link/slashdot ___ tipc-discussion mailing list tipc-discussion@lists.sourceforge.net https://lists.sourceforge.net/lists/listinfo/tipc-discussion
[tipc-discussion] [PATCH net-next v5 1/6] tipc: fix nametbl_lock soft lockup at node/link events
We trigger a soft lockup as we grab nametbl_lock twice if the node has a pending node up/down or link up/down event while: - we process an incoming named message in tipc_named_rcv() and perform an tipc_update_nametbl(). - we have pending backlog items in the name distributor queue during a nametable update using tipc_nametbl_publish() or tipc_nametbl_withdraw(). The following are the call chain associated: tipc_named_rcv() Grabs nametbl_lock tipc_update_nametbl() (publish/withdraw) tipc_node_subscribe()/unsubscribe() tipc_node_write_unlock() << lockup occurs if an outstanding node/link event exits, as we grabs nametbl_lock again >> tipc_nametbl_withdraw() Grab nametbl_lock tipc_named_process_backlog() tipc_update_nametbl() << rest as above >> The function tipc_node_write_unlock(), in addition to releasing the lock processes the outstanding node/link up/down events. To do this, we need to grab the nametbl_lock again leading to the lockup. In this commit we fix the soft lockup by introducing a fast variant of node_unlock(), where we just release the lock. We adapt the node_subscribe()/node_unsubscribe() to use the fast variants. Acked-by: Ying Xue <ying@windriver.com> Signed-off-by: Parthasarathy Bhuvaragan <parthasarathy.bhuvara...@ericsson.com> --- net/tipc/node.c | 9 +++-- 1 file changed, 7 insertions(+), 2 deletions(-) diff --git a/net/tipc/node.c b/net/tipc/node.c index 2883f6a0ed98..796dec65944a 100644 --- a/net/tipc/node.c +++ b/net/tipc/node.c @@ -263,6 +263,11 @@ static void tipc_node_write_lock(struct tipc_node *n) write_lock_bh(>lock); } +static void tipc_node_write_unlock_fast(struct tipc_node *n) +{ + write_unlock_bh(>lock); +} + static void tipc_node_write_unlock(struct tipc_node *n) { struct net *net = n->net; @@ -417,7 +422,7 @@ void tipc_node_subscribe(struct net *net, struct list_head *subscr, u32 addr) } tipc_node_write_lock(n); list_add_tail(subscr, >publ_list); - tipc_node_write_unlock(n); + tipc_node_write_unlock_fast(n); tipc_node_put(n); } @@ -435,7 +440,7 @@ void tipc_node_unsubscribe(struct net *net, struct list_head *subscr, u32 addr) } tipc_node_write_lock(n); list_del_init(subscr); - tipc_node_write_unlock(n); + tipc_node_write_unlock_fast(n); tipc_node_put(n); } -- 2.1.4 -- Check out the vibrant tech community on one of the world's most engaging tech sites, SlashDot.org! http://sdm.link/slashdot ___ tipc-discussion mailing list tipc-discussion@lists.sourceforge.net https://lists.sourceforge.net/lists/listinfo/tipc-discussion
Re: [tipc-discussion] [net-next v3 3/4] tipc: introduce replicast as transport option for multicast
On 01/13/2017 04:18 PM, Jon Maloy wrote: > > >> -Original Message----- >> From: Parthasarathy Bhuvaragan >> Sent: Friday, 13 January, 2017 04:24 >> To: Jon Maloy <jon.ma...@ericsson.com>; >> tipc-discussion@lists.sourceforge.net; >> Ying Xue <ying@windriver.com> >> Subject: Re: [tipc-discussion] [net-next v3 3/4] tipc: introduce replicast as >> transport option for multicast >> >> On 01/04/2017 06:05 PM, Parthasarathy Bhuvaragan wrote: >>> Hi Jon, >>> >>> Added some minor comments inline in this patch, apart from that the >>> major concern is the following: >>> >>> All my tests which passed before this patch, fails while sending >>> multicast to a receiver on own node. >>> >>> With this patch, we increase the likelyhood of receive buffer overflow >>> if the sender & receivers are running on the same host as we bypass the >>> link layer completely. I confirmed this with some traces in filter_rcv(). >>> >>> If I add another multicast listener running on another node, this >>> pacifies the sender (put the sender to sleep at link congestion) and >>> relatively slow link layer reduces the buffer overflow. >>> >>> We need to find a way reduce the aggressiveness of the sender. >>> We want users to be transparent about the location of the services, so >>> we should to provide similar charecteristics regardless of the service >>> location. >>> >> Jon, running ptts sever and client on a standalone node without your >> updates failed. So in that aspect, iam ok with this patch. >> >> If the ethernet bearer lacks broadcast ability, then neighbor discovery >> will not work. So do we intend to introduce support to add ethernet >> peers manually as we do for udp bearers? otherwise we can never use >> replicast for non udp bearers. > > I believe all Ethernet implementations, even overlay networks, provide some > form of broadcast, or in lack thereof, an emulated broadcast. > So, discovery should work, but it will be very inefficient when we do link > broadcast, because tipc will think that genuine Ethernet broadcast is > supported. > We actually need some way to find out what kind of "Ethernet" we are attached > to, e.g. VXLAN, so that the "bcast supported" flag can be set correctly. > I wonder if that if possible, or if it has to be configured. > I assumed that, but thanks for the clarification. I infer from your statement that its the User, who shall configure this per socket in case tipc is running over some kind of overlay networks. Tipc has no knowledge about the tunnel mechanisms used under the exposed bearers. In that case, its more logical if we set this as a node attribute and the sockets inherit them? Since the applications itself usually are agnostic of the deployment environments, it's not correct to place the platform specific logic inside the application. I think you need to rephrase the commit message. I read the commit message as if we are introducing a complete link level broadcast service replacement. But that's incorrect, as still broadcast link for neighbor discovery. Thanks for your patience, probably its just me getting confused with the purpose of the change-sets. /Partha > ///jon > >> >> /Partha >> >>> /Partha >>> >>> On 01/02/2017 03:34 PM, Parthasarathy Bhuvaragan wrote: >>>> Hi jon, >>>> >>>> When I include this patch, ptts case 12 (multicast) fails when the >>>> client and server are running on the same node. >>>> >>>> /Partha >>>> >>>> On 12/22/2016 04:15 PM, Jon Maloy wrote: >>>>> TIPC multicast messages are currently carried over a reliable >>>>> 'broadcast link', making use of the underlying media's ability to >>>>> transport packets as L2 broadcast or IP multicast to all nodes in >>>>> the cluster. >>>>> >>>>> When the used bearer is lacking that ability, we can instead emulate >>>>> the broadcast service by relicating and sending the packets over as >>>>> many unicast links as needed to reach all identified destinations. >>>>> We now introduce a new TIPC link-level 'replicast' service that does >>>>> this. >>>>> >>>>> Signed-off-by: Jon Maloy <jon.ma...@ericsson.com> >>>>> --- >>>>> net/tipc/bcast.c | 105 >> ++ >>>>> net/tipc/bcast.h | 3 +- >>>>> ne
[tipc-discussion] [PATCH net v1 1/1] tipc: allocate user memory with GFP_KERNEL flag
Until now, we allocate memory always with GFP_ATOMIC flag. When the system is under memory pressure and a user tries to send, the send fails due to low memory. However, the user application can wait for free memory if we allocate it using GFP_KERNEL flag. In this commit, we use allocate memory with GFP_KERNEL for all user allocation. Reported-by: Rune Torgersen <ru...@innovsys.com> Acked-by: Jon Maloy <jon.ma...@ericsson.com> Signed-off-by: Parthasarathy Bhuvaragan <parthasarathy.bhuvara...@ericsson.com> --- net/tipc/discover.c | 4 ++-- net/tipc/link.c | 2 +- net/tipc/msg.c| 16 net/tipc/msg.h| 2 +- net/tipc/name_distr.c | 2 +- 5 files changed, 13 insertions(+), 13 deletions(-) diff --git a/net/tipc/discover.c b/net/tipc/discover.c index 6b109a808d4c..02462d67d191 100644 --- a/net/tipc/discover.c +++ b/net/tipc/discover.c @@ -169,7 +169,7 @@ void tipc_disc_rcv(struct net *net, struct sk_buff *skb, /* Send response, if necessary */ if (respond && (mtyp == DSC_REQ_MSG)) { - rskb = tipc_buf_acquire(MAX_H_SIZE); + rskb = tipc_buf_acquire(MAX_H_SIZE, GFP_ATOMIC); if (!rskb) return; tipc_disc_init_msg(net, rskb, DSC_RESP_MSG, bearer); @@ -278,7 +278,7 @@ int tipc_disc_create(struct net *net, struct tipc_bearer *b, req = kmalloc(sizeof(*req), GFP_ATOMIC); if (!req) return -ENOMEM; - req->buf = tipc_buf_acquire(MAX_H_SIZE); + req->buf = tipc_buf_acquire(MAX_H_SIZE, GFP_ATOMIC); if (!req->buf) { kfree(req); return -ENOMEM; diff --git a/net/tipc/link.c b/net/tipc/link.c index b758ca8b2f79..b0f8646e0631 100644 --- a/net/tipc/link.c +++ b/net/tipc/link.c @@ -1384,7 +1384,7 @@ void tipc_link_tnl_prepare(struct tipc_link *l, struct tipc_link *tnl, msg_set_seqno(hdr, seqno++); pktlen = msg_size(hdr); msg_set_size(, pktlen + INT_H_SIZE); - tnlskb = tipc_buf_acquire(pktlen + INT_H_SIZE); + tnlskb = tipc_buf_acquire(pktlen + INT_H_SIZE, GFP_ATOMIC); if (!tnlskb) { pr_warn("%sunable to send packet\n", link_co_err); return; diff --git a/net/tipc/msg.c b/net/tipc/msg.c index a22be502f1bd..ab02d0742476 100644 --- a/net/tipc/msg.c +++ b/net/tipc/msg.c @@ -58,12 +58,12 @@ static unsigned int align(unsigned int i) * NOTE: Headroom is reserved to allow prepending of a data link header. * There may also be unrequested tailroom present at the buffer's end. */ -struct sk_buff *tipc_buf_acquire(u32 size) +struct sk_buff *tipc_buf_acquire(u32 size, gfp_t gfp) { struct sk_buff *skb; unsigned int buf_size = (BUF_HEADROOM + size + 3) & ~3u; - skb = alloc_skb_fclone(buf_size, GFP_ATOMIC); + skb = alloc_skb_fclone(buf_size, gfp); if (skb) { skb_reserve(skb, BUF_HEADROOM); skb_put(skb, size); @@ -95,7 +95,7 @@ struct sk_buff *tipc_msg_create(uint user, uint type, struct tipc_msg *msg; struct sk_buff *buf; - buf = tipc_buf_acquire(hdr_sz + data_sz); + buf = tipc_buf_acquire(hdr_sz + data_sz, GFP_ATOMIC); if (unlikely(!buf)) return NULL; @@ -261,7 +261,7 @@ int tipc_msg_build(struct tipc_msg *mhdr, struct msghdr *m, /* No fragmentation needed? */ if (likely(msz <= pktmax)) { - skb = tipc_buf_acquire(msz); + skb = tipc_buf_acquire(msz, GFP_KERNEL); if (unlikely(!skb)) return -ENOMEM; skb_orphan(skb); @@ -282,7 +282,7 @@ int tipc_msg_build(struct tipc_msg *mhdr, struct msghdr *m, msg_set_importance(, msg_importance(mhdr)); /* Prepare first fragment */ - skb = tipc_buf_acquire(pktmax); + skb = tipc_buf_acquire(pktmax, GFP_KERNEL); if (!skb) return -ENOMEM; skb_orphan(skb); @@ -313,7 +313,7 @@ int tipc_msg_build(struct tipc_msg *mhdr, struct msghdr *m, pktsz = drem + INT_H_SIZE; else pktsz = pktmax; - skb = tipc_buf_acquire(pktsz); + skb = tipc_buf_acquire(pktsz, GFP_KERNEL); if (!skb) { rc = -ENOMEM; goto error; @@ -448,7 +448,7 @@ bool tipc_msg_make_bundle(struct sk_buff **skb, struct tipc_msg *msg, if (msz > (max / 2)) return false; - _skb = tipc_buf_acquire(max); + _skb = tipc_buf_acquire(max, GFP_ATOMIC); if (!_skb) return false; @@ -496,7 +496,7 @@ bool tipc_msg_reverse(u32 own_node, struct sk_buff **skb, int err) /* Never return SHORT header; expand by rep
[tipc-discussion] [PATCH net v1 1/1] tipc: allocate user memory with GFP_KERNEL flag
Until now, we allocate memory always with GFP_ATOMIC flag. When the system is under memory pressure and a user tries to send, the send fails due to low memory. However, the user application can wait for free memory if we allocate it using GFP_KERNEL flag. In this commit, we use allocate memory with GFP_KERNEL for all user allocation. Reported-by: Rune Torgersen <ru...@innovsys.com> Signed-off-by: Parthasarathy Bhuvaragan <parthasarathy.bhuvara...@ericsson.com> --- net/tipc/discover.c | 4 ++-- net/tipc/link.c | 2 +- net/tipc/msg.c| 16 net/tipc/msg.h| 2 +- net/tipc/name_distr.c | 2 +- 5 files changed, 13 insertions(+), 13 deletions(-) diff --git a/net/tipc/discover.c b/net/tipc/discover.c index 6b109a808d4c..02462d67d191 100644 --- a/net/tipc/discover.c +++ b/net/tipc/discover.c @@ -169,7 +169,7 @@ void tipc_disc_rcv(struct net *net, struct sk_buff *skb, /* Send response, if necessary */ if (respond && (mtyp == DSC_REQ_MSG)) { - rskb = tipc_buf_acquire(MAX_H_SIZE); + rskb = tipc_buf_acquire(MAX_H_SIZE, GFP_ATOMIC); if (!rskb) return; tipc_disc_init_msg(net, rskb, DSC_RESP_MSG, bearer); @@ -278,7 +278,7 @@ int tipc_disc_create(struct net *net, struct tipc_bearer *b, req = kmalloc(sizeof(*req), GFP_ATOMIC); if (!req) return -ENOMEM; - req->buf = tipc_buf_acquire(MAX_H_SIZE); + req->buf = tipc_buf_acquire(MAX_H_SIZE, GFP_ATOMIC); if (!req->buf) { kfree(req); return -ENOMEM; diff --git a/net/tipc/link.c b/net/tipc/link.c index b758ca8b2f79..b0f8646e0631 100644 --- a/net/tipc/link.c +++ b/net/tipc/link.c @@ -1384,7 +1384,7 @@ void tipc_link_tnl_prepare(struct tipc_link *l, struct tipc_link *tnl, msg_set_seqno(hdr, seqno++); pktlen = msg_size(hdr); msg_set_size(, pktlen + INT_H_SIZE); - tnlskb = tipc_buf_acquire(pktlen + INT_H_SIZE); + tnlskb = tipc_buf_acquire(pktlen + INT_H_SIZE, GFP_ATOMIC); if (!tnlskb) { pr_warn("%sunable to send packet\n", link_co_err); return; diff --git a/net/tipc/msg.c b/net/tipc/msg.c index a22be502f1bd..ab02d0742476 100644 --- a/net/tipc/msg.c +++ b/net/tipc/msg.c @@ -58,12 +58,12 @@ static unsigned int align(unsigned int i) * NOTE: Headroom is reserved to allow prepending of a data link header. * There may also be unrequested tailroom present at the buffer's end. */ -struct sk_buff *tipc_buf_acquire(u32 size) +struct sk_buff *tipc_buf_acquire(u32 size, gfp_t gfp) { struct sk_buff *skb; unsigned int buf_size = (BUF_HEADROOM + size + 3) & ~3u; - skb = alloc_skb_fclone(buf_size, GFP_ATOMIC); + skb = alloc_skb_fclone(buf_size, gfp); if (skb) { skb_reserve(skb, BUF_HEADROOM); skb_put(skb, size); @@ -95,7 +95,7 @@ struct sk_buff *tipc_msg_create(uint user, uint type, struct tipc_msg *msg; struct sk_buff *buf; - buf = tipc_buf_acquire(hdr_sz + data_sz); + buf = tipc_buf_acquire(hdr_sz + data_sz, GFP_ATOMIC); if (unlikely(!buf)) return NULL; @@ -261,7 +261,7 @@ int tipc_msg_build(struct tipc_msg *mhdr, struct msghdr *m, /* No fragmentation needed? */ if (likely(msz <= pktmax)) { - skb = tipc_buf_acquire(msz); + skb = tipc_buf_acquire(msz, GFP_KERNEL); if (unlikely(!skb)) return -ENOMEM; skb_orphan(skb); @@ -282,7 +282,7 @@ int tipc_msg_build(struct tipc_msg *mhdr, struct msghdr *m, msg_set_importance(, msg_importance(mhdr)); /* Prepare first fragment */ - skb = tipc_buf_acquire(pktmax); + skb = tipc_buf_acquire(pktmax, GFP_KERNEL); if (!skb) return -ENOMEM; skb_orphan(skb); @@ -313,7 +313,7 @@ int tipc_msg_build(struct tipc_msg *mhdr, struct msghdr *m, pktsz = drem + INT_H_SIZE; else pktsz = pktmax; - skb = tipc_buf_acquire(pktsz); + skb = tipc_buf_acquire(pktsz, GFP_KERNEL); if (!skb) { rc = -ENOMEM; goto error; @@ -448,7 +448,7 @@ bool tipc_msg_make_bundle(struct sk_buff **skb, struct tipc_msg *msg, if (msz > (max / 2)) return false; - _skb = tipc_buf_acquire(max); + _skb = tipc_buf_acquire(max, GFP_ATOMIC); if (!_skb) return false; @@ -496,7 +496,7 @@ bool tipc_msg_reverse(u32 own_node, struct sk_buff **skb, int err) /* Never return SHORT header; expand by replacing buffer if necessary */ if (msg_short(hdr)) { -
[tipc-discussion] [PATCH net-next v4 5/6] tipc: ignore requests when the connection state is not CONNECTED
In tipc_conn_sendmsg(), we first queue the request to the outqueue followed by the connection state check. If the connection is not connected, we should not queue this message. In this commit, we reject the messages if the connection state is not CF_CONNECTED. Signed-off-by: Parthasarathy Bhuvaragan <parthasarathy.bhuvara...@ericsson.com> --- net/tipc/server.c | 13 +++-- 1 file changed, 7 insertions(+), 6 deletions(-) diff --git a/net/tipc/server.c b/net/tipc/server.c index e206b8fb826f..fa54a2760ee0 100644 --- a/net/tipc/server.c +++ b/net/tipc/server.c @@ -451,6 +451,11 @@ int tipc_conn_sendmsg(struct tipc_server *s, int conid, if (!con) return -EINVAL; + if (!test_bit(CF_CONNECTED, >flags)) { + conn_put(con); + return 0; + } + e = tipc_alloc_entry(data, len); if (!e) { conn_put(con); @@ -464,12 +469,8 @@ int tipc_conn_sendmsg(struct tipc_server *s, int conid, list_add_tail(>list, >outqueue); spin_unlock_bh(>outqueue_lock); - if (test_bit(CF_CONNECTED, >flags)) { - if (!queue_work(s->send_wq, >swork)) - conn_put(con); - } else { + if (!queue_work(s->send_wq, >swork)) conn_put(con); - } return 0; } @@ -493,7 +494,7 @@ static void tipc_send_to_sock(struct tipc_conn *con) int ret; spin_lock_bh(>outqueue_lock); - while (1) { + while (test_bit(CF_CONNECTED, >flags)) { e = list_entry(con->outqueue.next, struct outqueue_entry, list); if ((struct list_head *) e == >outqueue) -- 2.1.4 -- Developer Access Program for Intel Xeon Phi Processors Access to Intel Xeon Phi processor-based developer platforms. With one year of Intel Parallel Studio XE. Training and support from Colfax. Order your platform today. http://sdm.link/xeonphi ___ tipc-discussion mailing list tipc-discussion@lists.sourceforge.net https://lists.sourceforge.net/lists/listinfo/tipc-discussion
[tipc-discussion] [PATCH net-next v4 6/6] tipc: fix cleanup at module unload
In tipc_server_stop(), we iterate over the connections with limiting factor as server's idr_in_use. We ignore the fact that this variable is decremented in tipc_close_conn(), leading to premature exit. In this commit, we iterate until the we have no connections left. Signed-off-by: Parthasarathy Bhuvaragan <parthasarathy.bhuvara...@ericsson.com> --- net/tipc/server.c | 4 +--- 1 file changed, 1 insertion(+), 3 deletions(-) diff --git a/net/tipc/server.c b/net/tipc/server.c index fa54a2760ee0..ad455f128925 100644 --- a/net/tipc/server.c +++ b/net/tipc/server.c @@ -617,14 +617,12 @@ int tipc_server_start(struct tipc_server *s) void tipc_server_stop(struct tipc_server *s) { struct tipc_conn *con; - int total = 0; int id; spin_lock_bh(>idr_lock); - for (id = 0; total < s->idr_in_use; id++) { + for (id = 0; s->idr_in_use; id++) { con = idr_find(>conn_idr, id); if (con) { - total++; spin_unlock_bh(>idr_lock); tipc_close_conn(con); spin_lock_bh(>idr_lock); -- 2.1.4 -- Developer Access Program for Intel Xeon Phi Processors Access to Intel Xeon Phi processor-based developer platforms. With one year of Intel Parallel Studio XE. Training and support from Colfax. Order your platform today. http://sdm.link/xeonphi ___ tipc-discussion mailing list tipc-discussion@lists.sourceforge.net https://lists.sourceforge.net/lists/listinfo/tipc-discussion
[tipc-discussion] [PATCH net-next v4 1/6] tipc: fix nametbl_lock soft lockup at node/link events
We trigger a soft lockup as we grab nametbl_lock twice if the node has a pending node up/down or link up/down event while: - we process an incoming named message in tipc_named_rcv() and perform an tipc_update_nametbl(). - we have pending backlog items in the name distributor queue during a nametable update using tipc_nametbl_publish() or tipc_nametbl_withdraw(). The following are the call chain associated: tipc_named_rcv() Grabs nametbl_lock tipc_update_nametbl() (publish/withdraw) tipc_node_subscribe()/unsubscribe() tipc_node_write_unlock() << lockup occurs if an outstanding node/link event exits, as we grabs nametbl_lock again >> tipc_nametbl_withdraw() Grab nametbl_lock tipc_named_process_backlog() tipc_update_nametbl() << rest as above >> The function tipc_node_write_unlock(), in addition to releasing the lock processes the outstanding node/link up/down events. To do this, we need to grab the nametbl_lock again leading to the lockup. In this commit we fix the soft lockup by introducing a fast variant of node_unlock(), where we just release the lock. We adapt the node_subscribe()/node_unsubscribe() to use the fast variants. Signed-off-by: Parthasarathy Bhuvaragan <parthasarathy.bhuvara...@ericsson.com> --- net/tipc/node.c | 9 +++-- 1 file changed, 7 insertions(+), 2 deletions(-) diff --git a/net/tipc/node.c b/net/tipc/node.c index 2883f6a0ed98..796dec65944a 100644 --- a/net/tipc/node.c +++ b/net/tipc/node.c @@ -263,6 +263,11 @@ static void tipc_node_write_lock(struct tipc_node *n) write_lock_bh(>lock); } +static void tipc_node_write_unlock_fast(struct tipc_node *n) +{ + write_unlock_bh(>lock); +} + static void tipc_node_write_unlock(struct tipc_node *n) { struct net *net = n->net; @@ -417,7 +422,7 @@ void tipc_node_subscribe(struct net *net, struct list_head *subscr, u32 addr) } tipc_node_write_lock(n); list_add_tail(subscr, >publ_list); - tipc_node_write_unlock(n); + tipc_node_write_unlock_fast(n); tipc_node_put(n); } @@ -435,7 +440,7 @@ void tipc_node_unsubscribe(struct net *net, struct list_head *subscr, u32 addr) } tipc_node_write_lock(n); list_del_init(subscr); - tipc_node_write_unlock(n); + tipc_node_write_unlock_fast(n); tipc_node_put(n); } -- 2.1.4 -- Developer Access Program for Intel Xeon Phi Processors Access to Intel Xeon Phi processor-based developer platforms. With one year of Intel Parallel Studio XE. Training and support from Colfax. Order your platform today. http://sdm.link/xeonphi ___ tipc-discussion mailing list tipc-discussion@lists.sourceforge.net https://lists.sourceforge.net/lists/listinfo/tipc-discussion
[tipc-discussion] [PATCH net-next v4 3/6] tipc: fix connection refcount error
Until now, the generic server framework maintains the connection id's per subscriber in server's conn_idr. At tipc_close_conn, we remove the connection id from the server list, but the connection is valid until we call the refcount cleanup. Hence we have a window where the server allocates the same connection to an new subscriber leading to inconsistent reference count. In this commit, we remove the connection from the server list at recount cleanup. Signed-off-by: Parthasarathy Bhuvaragan <parthasarathy.bhuvara...@ericsson.com> --- net/tipc/server.c | 15 +++ 1 file changed, 7 insertions(+), 8 deletions(-) diff --git a/net/tipc/server.c b/net/tipc/server.c index 215849ce453d..e178d41e1a68 100644 --- a/net/tipc/server.c +++ b/net/tipc/server.c @@ -91,7 +91,8 @@ static void tipc_sock_release(struct tipc_conn *con); static void tipc_conn_kref_release(struct kref *kref) { struct tipc_conn *con = container_of(kref, struct tipc_conn, kref); - struct sockaddr_tipc *saddr = con->server->saddr; + struct tipc_server *s = con->server; + struct sockaddr_tipc *saddr = s->saddr; struct socket *sock = con->sock; struct sock *sk; @@ -106,6 +107,11 @@ static void tipc_conn_kref_release(struct kref *kref) tipc_sock_release(con); sock_release(sock); con->sock = NULL; + + spin_lock_bh(>idr_lock); + idr_remove(>conn_idr, con->conid); + s->idr_in_use--; + spin_unlock_bh(>idr_lock); } tipc_clean_outqueues(con); @@ -198,15 +204,8 @@ static void tipc_sock_release(struct tipc_conn *con) static void tipc_close_conn(struct tipc_conn *con) { - struct tipc_server *s = con->server; - if (test_and_clear_bit(CF_CONNECTED, >flags)) { - spin_lock_bh(>idr_lock); - idr_remove(>conn_idr, con->conid); - s->idr_in_use--; - spin_unlock_bh(>idr_lock); - /* We shouldn't flush pending works as we may be in the * thread. In fact the races with pending rx/tx work structs * are harmless for us here as we have already deleted this -- 2.1.4 -- Developer Access Program for Intel Xeon Phi Processors Access to Intel Xeon Phi processor-based developer platforms. With one year of Intel Parallel Studio XE. Training and support from Colfax. Order your platform today. http://sdm.link/xeonphi ___ tipc-discussion mailing list tipc-discussion@lists.sourceforge.net https://lists.sourceforge.net/lists/listinfo/tipc-discussion
[tipc-discussion] [PATCH net-next v4 2/6] tipc: add subscription refcount
Until now, the subscriptions are deleted at cancel or subscriber delete by checking for pending timer using del_timer(). del_timer() is not SMP safe, if on CPU0 the check for pending timer returns true but CPU1 might schedule the timer callback thereby deleting the subscription. Thus when CPU0 proceeds to delete the invalid subscription. In this commit, we do the following updates to fix it: 1. perform refcount per subscription in addition to subscriber. 2. use SMP safe del_timer_sync() to remove pending timers. Signed-off-by: Parthasarathy Bhuvaragan <parthasarathy.bhuvara...@ericsson.com> --- net/tipc/subscr.c | 116 -- net/tipc/subscr.h | 1 + 2 files changed, 69 insertions(+), 48 deletions(-) diff --git a/net/tipc/subscr.c b/net/tipc/subscr.c index 0dd02244e21d..d3f450037ad5 100644 --- a/net/tipc/subscr.c +++ b/net/tipc/subscr.c @@ -54,6 +54,7 @@ struct tipc_subscriber { static void tipc_subscrp_delete(struct tipc_subscription *sub); static void tipc_subscrb_put(struct tipc_subscriber *subscriber); +static void tipc_subscrp_put(struct tipc_subscription *subscription); /** * htohl - convert value to endianness used by destination @@ -143,19 +144,14 @@ static void tipc_subscrp_timeout(unsigned long data) tipc_subscrp_send_event(sub, sub->evt.s.seq.lower, sub->evt.s.seq.upper, TIPC_SUBSCR_TIMEOUT, 0, 0); - spin_lock_bh(>lock); - tipc_subscrp_delete(sub); - spin_unlock_bh(>lock); - + tipc_nametbl_unsubscribe(sub); + tipc_subscrp_put(sub); tipc_subscrb_put(subscriber); } static void tipc_subscrb_kref_release(struct kref *kref) { - struct tipc_subscriber *subcriber = container_of(kref, - struct tipc_subscriber, kref); - - kfree(subcriber); + kfree(container_of(kref,struct tipc_subscriber, kref)); } static void tipc_subscrb_put(struct tipc_subscriber *subscriber) @@ -168,6 +164,58 @@ static void tipc_subscrb_get(struct tipc_subscriber *subscriber) kref_get(>kref); } +static void tipc_subscrp_kref_release(struct kref *kref) +{ + struct tipc_subscription *sub; + struct tipc_net *tn; + + sub = container_of(kref, struct tipc_subscription, kref); + tn = net_generic(sub->net, tipc_net_id); + + spin_lock_bh(>subscriber->lock); + list_del(>subscrp_list); + atomic_dec(>subscription_count); + spin_unlock_bh(>subscriber->lock); + kfree(sub); +} + +static void tipc_subscrp_put(struct tipc_subscription *subscription) +{ + kref_put(>kref, tipc_subscrp_kref_release); +} + +static void tipc_subscrp_get(struct tipc_subscription *subscription) +{ + kref_get(>kref); +} + +/* tipc_subscrb_subscrp_delete - delete a specific subscription or all + * subscriptions for a given subscriber. + */ +static void tipc_subscrb_subscrp_delete(struct tipc_subscriber *subscriber, + struct tipc_subscr *s) +{ + struct tipc_subscription *sub, *temp; + + spin_lock_bh(>lock); + list_for_each_entry_safe(sub, temp, >subscrp_list, +subscrp_list) { + if (s && memcmp(s, >evt.s, sizeof(struct tipc_subscr))) + continue; + /* Delete subscriptions with pending timers */ + if (timer_pending(>timer)) { + tipc_subscrp_get(sub); + spin_unlock_bh(>lock); + tipc_subscrp_delete(sub); + tipc_subscrp_put(sub); + spin_lock_bh(>lock); + } + if (s) + break; + } + spin_unlock_bh(>lock); +} + static struct tipc_subscriber *tipc_subscrb_create(int conid) { struct tipc_subscriber *subscriber; @@ -177,8 +225,8 @@ static struct tipc_subscriber *tipc_subscrb_create(int conid) pr_warn("Subscriber rejected, no memory\n"); return NULL; } - kref_init(>kref); INIT_LIST_HEAD(>subscrp_list); + kref_init(>kref); subscriber->conid = conid; spin_lock_init(>lock); @@ -187,55 +235,26 @@ static struct tipc_subscriber *tipc_subscrb_create(int conid) static void tipc_subscrb_delete(struct tipc_subscriber *subscriber) { - struct tipc_subscription *sub, *temp; - u32 timeout; - - spin_lock_bh(>lock); - /* Destroy any existing subscriptions for subscriber */ - list_for_each_entry_safe(sub, temp, >subscrp_list, -subscrp_list) { - timeout = htohl(sub->evt.s.timeout, sub->swap); - if ((timeout == TIPC_WAIT_FOREVER) || del_timer(>timer)) { - tipc_subscrp_delete
Re: [tipc-discussion] [net-next v3 3/4] tipc: introduce replicast as transport option for multicast
Hi Jon, Added some minor comments inline in this patch, apart from that the major concern is the following: All my tests which passed before this patch, fails while sending multicast to a receiver on own node. With this patch, we increase the likelyhood of receive buffer overflow if the sender & receivers are running on the same host as we bypass the link layer completely. I confirmed this with some traces in filter_rcv(). If I add another multicast listener running on another node, this pacifies the sender (put the sender to sleep at link congestion) and relatively slow link layer reduces the buffer overflow. We need to find a way reduce the aggressiveness of the sender. We want users to be transparent about the location of the services, so we should to provide similar charecteristics regardless of the service location. /Partha On 01/02/2017 03:34 PM, Parthasarathy Bhuvaragan wrote: > Hi jon, > > When I include this patch, ptts case 12 (multicast) fails when the > client and server are running on the same node. > > /Partha > > On 12/22/2016 04:15 PM, Jon Maloy wrote: >> TIPC multicast messages are currently carried over a reliable >> 'broadcast link', making use of the underlying media's ability to >> transport packets as L2 broadcast or IP multicast to all nodes in >> the cluster. >> >> When the used bearer is lacking that ability, we can instead emulate >> the broadcast service by replicating and sending the packets over as >> many unicast links as needed to reach all identified destinations. >> We now introduce a new TIPC link-level 'replicast' service that does >> this. >> >> Signed-off-by: Jon Maloy <jon.ma...@ericsson.com> >> --- >> net/tipc/bcast.c | 105 >> ++ >> net/tipc/bcast.h | 3 +- >> net/tipc/link.c | 8 - >> net/tipc/msg.c| 17 + >> net/tipc/msg.h| 9 +++-- >> net/tipc/node.c | 27 +- >> net/tipc/socket.c | 27 +- >> 7 files changed, 149 insertions(+), 47 deletions(-) >> >> diff --git a/net/tipc/bcast.c b/net/tipc/bcast.c >> index 412d335..672e6ef 100644 >> --- a/net/tipc/bcast.c >> +++ b/net/tipc/bcast.c >> @@ -70,7 +70,7 @@ static struct tipc_bc_base *tipc_bc_base(struct net *net) >> >> int tipc_bcast_get_mtu(struct net *net) >> { >> -return tipc_link_mtu(tipc_bc_sndlink(net)); >> +return tipc_link_mtu(tipc_bc_sndlink(net)) - INT_H_SIZE; >> } >> >> /* tipc_bcbase_select_primary(): find a bearer with links to all >> destinations, >> @@ -175,42 +175,101 @@ static void tipc_bcbase_xmit(struct net *net, struct >> sk_buff_head *xmitq) >> __skb_queue_purge(&_xmitq); >> } >> >> -/* tipc_bcast_xmit - deliver buffer chain to all nodes in cluster >> - *and to identified node local sockets >> +/* tipc_bcast_xmit - broadcast the buffer chain to all external nodes >> * @net: the applicable net namespace >> - * @list: chain of buffers containing message >> + * @pkts: chain of buffers containing message >> + * @cong_link_cnt: set to 1 if broadcast link is congested, otherwise 0 >> * Consumes the buffer chain. >> - * Returns 0 if success, otherwise errno: -ELINKCONG,-EHOSTUNREACH,-EMSGSIZE >> + * Returns 0 if success, otherwise errno: -EHOSTUNREACH,-EMSGSIZE >> */ >> -int tipc_bcast_xmit(struct net *net, struct sk_buff_head *list) >> +static int tipc_bcast_xmit(struct net *net, struct sk_buff_head *pkts, >> + u16 *cong_link_cnt) >> { >> struct tipc_link *l = tipc_bc_sndlink(net); >> -struct sk_buff_head xmitq, inputq, rcvq; >> +struct sk_buff_head xmitq; >> int rc = 0; >> >> -__skb_queue_head_init(); >> __skb_queue_head_init(); >> -skb_queue_head_init(); >> - >> -/* Prepare message clone for local node */ >> -if (unlikely(!tipc_msg_reassemble(list, ))) >> -return -EHOSTUNREACH; >> - >> tipc_bcast_lock(net); >> if (tipc_link_bc_peers(l)) >> -rc = tipc_link_xmit(l, list, ); >> +rc = tipc_link_xmit(l, pkts, ); >> tipc_bcast_unlock(net); >> +tipc_bcbase_xmit(net, ); >> +__skb_queue_purge(pkts); >> +if (rc == -ELINKCONG) { >> +*cong_link_cnt = 1; >> +rc = 0; >> +} >> +return rc; >> +} >> >> -/* Don't send to local node if adding to link failed */ >> -if (unlikely(rc && (rc != -ELINKCONG))) { >> -__skb_q
Re: [tipc-discussion] [net-next v4 0/3] tipc: improve interaction socket-link
On 12/22/2016 03:50 PM, Jon Maloy wrote: > We fix a very real starvation problem that may occur when a link > encounters send buffer congestion. At the same time we make the > interaction between the socket and link layer simpler and more > consistent. > > v2: - Simplified link congestion check to only check against own > importance limit. This reduces the risk of higher levels > starving out lower levels. > v3: - Adding one sent message to to link backlog queue even if there is > congestion, as suggested by Partha. > - Allowing link_wakeup() loop to continue adding messages to the > backlog queue even if one or more levels are congested. This > seems to have a positive effect on performance. > v4: - Added Partha's fixes, except for #4. I think having a multicast > being blocked after unicast link congestion is an acceptable > behavior when weighed against the risks of just purging the > congestion list. > > Jon Maloy (3): > tipc: unify tipc_wait_for_sndpkt() and tipc_wait_for_sndmsg() > functions > tipc: modify struct tipc_plist to be more versatile > tipc: reduce risk of user starvation during link congestion > > net/tipc/bcast.c | 6 +- > net/tipc/link.c | 75 - > net/tipc/msg.h| 2 - > net/tipc/name_table.c | 100 +++ > net/tipc/name_table.h | 21 +-- > net/tipc/node.c | 15 +- > net/tipc/socket.c | 449 > ++ > 7 files changed, 319 insertions(+), 349 deletions(-) > Nice work Jon. For these 3 patchsets, please feel free to add Reviewed-by: Parthasarathy Bhuvaragan <parthasarathy.bhuvara...@ericsson.com> /Partha -- Check out the vibrant tech community on one of the world's most engaging tech sites, SlashDot.org! http://sdm.link/slashdot ___ tipc-discussion mailing list tipc-discussion@lists.sourceforge.net https://lists.sourceforge.net/lists/listinfo/tipc-discussion
Re: [tipc-discussion] ENOMEM
On 12/28/2016 09:46 PM, Rune Torgersen wrote: > What will cause a sendto() call to a Tipc RDM socket to return a ENOMEM? > I am sending a roughly 30K message, and occasionally get that error. > (Ubuntu 16.04 LTS kernel) If your message receiver is the same local node, then tipc will try avoid fragmentation/reassembly by allocating an skb large enough to fix the message. The memory (if not cached) will be allocated from the buddy pool. The current memory situation for the buddypool can be listed using: cat /proc/buddyinfo When the system runs out of buffers of size >= 32K (2^3 => 8 * 4K page size) in buddypool, the memory allocation will fail for intranode. To fix this, we need to send this message chaining several smaller buffers without inserting the fragment header only for intranode. This will surely impact the latency and throughputs for intra-node communication for sizes > MTU. /Partha > > -- > Check out the vibrant tech community on one of the world's most > engaging tech sites, SlashDot.org! http://sdm.link/slashdot > ___ > tipc-discussion mailing list > tipc-discussion@lists.sourceforge.net > https://lists.sourceforge.net/lists/listinfo/tipc-discussion > -- Check out the vibrant tech community on one of the world's most engaging tech sites, SlashDot.org! http://sdm.link/slashdot ___ tipc-discussion mailing list tipc-discussion@lists.sourceforge.net https://lists.sourceforge.net/lists/listinfo/tipc-discussion
Re: [tipc-discussion] [net-next v3 3/3] tipc: reduce risk of user starvation during link congestion
Hi Jon, Iam adding one more patch, where i allow only user sockets to sleep. The protocol messages should never be in the wakeupq. "tipc: check for user before making congestion decision" /Partha On 12/20/2016 02:21 PM, Jon Maloy wrote: > Thank you for your thorough testing. This is really the kind of work that > helps us keeping out of trouble. > See my comments below. > > ///jon --- From f18c49d672eb991396a234165b534f9cfdeca5b3 Mon Sep 17 00:00:00 2001 From: Parthasarathy Bhuvaragan <parthasarathy.bhuvara...@ericsson.com> Date: Wed, 21 Dec 2016 10:53:44 +0100 Subject: [PATCH net-next 1/5] tipc: check for user before making congestion decision In this commit, we test link backlog limits only for users since they are forced to sleep during congestion. Signed-off-by: Parthasarathy Bhuvaragan <parthasarathy.bhuvara...@ericsson.com> --- net/tipc/link.c | 23 --- 1 file changed, 12 insertions(+), 11 deletions(-) diff --git a/net/tipc/link.c b/net/tipc/link.c index 5f2b478ac720..a3819613fe92 100644 --- a/net/tipc/link.c +++ b/net/tipc/link.c @@ -783,24 +783,17 @@ int tipc_link_timeout(struct tipc_link *l, struct sk_buff_head *xmitq) */ static int link_schedule_user(struct tipc_link *l, struct tipc_msg *hdr) { - int imp = msg_importance(hdr); u32 dnode = tipc_own_addr(l->net); u32 dport = msg_origport(hdr); struct sk_buff *skb; - /* This really cannot happen... */ - if (unlikely(imp > TIPC_CRITICAL_IMPORTANCE)) { - pr_warn("%s<%s>, send queue full", link_rst_msg, l->name); - return -ENOBUFS; - } - /* Create and schedule wakeup pseudo message */ skb = tipc_msg_create(SOCK_WAKEUP, 0, INT_H_SIZE, 0, dnode, l->addr, dport, 0, 0); if (!skb) return -ENOBUFS; msg_set_dest_droppable(buf_msg(skb), true); - TIPC_SKB_CB(skb)->chain_imp = imp; + TIPC_SKB_CB(skb)->chain_imp = msg_importance(hdr); skb_queue_tail(>wakeupq, skb); l->stats.link_congs++; return -ELINKCONG; @@ -889,9 +882,17 @@ int tipc_link_xmit(struct tipc_link *l, struct sk_buff_head *list, return -EMSGSIZE; } - /* Accept oversubscription of one message per source at congestion */ - if (unlikely(l->backlog[imp].len >= l->backlog[imp].limit)) - rc = link_schedule_user(l, hdr); + if (unlikely(l->backlog[imp].len >= l->backlog[imp].limit)) { + if (unlikely(imp > TIPC_CRITICAL_IMPORTANCE)) { + pr_warn("%s<%s>, send queue full", + link_rst_msg, l->name); + return -ENOBUFS; + } + + /* Let oversubscription of one message per user at congestion */ + if (msg_importance(hdr) < TIPC_SYSTEM_IMPORTANCE) + rc = link_schedule_user(l, hdr); + } if (pkt_cnt > 1) { l->stats.sent_fragmented++; -- 2.1.4 From 3d7023fe53b85db6010539d6b05e1b3acbaaef37 Mon Sep 17 00:00:00 2001 From: Parthasarathy Bhuvaragan <parthasarathy.bhuvara...@ericsson.com> Date: Thu, 15 Dec 2016 16:02:20 +0100 Subject: [PATCH net-next 2/5] tipc: fix node error handling at link congestion Since the link can return ELINKCONG, but still transmits the frame on the link, we need to adjust the return code handling in the node. Signed-off-by: Parthasarathy Bhuvaragan <parthasarathy.bhuvara...@ericsson.com> --- net/tipc/node.c | 16 +--- 1 file changed, 5 insertions(+), 11 deletions(-) diff --git a/net/tipc/node.c b/net/tipc/node.c index f51d360dac77..edfb05746131 100644 --- a/net/tipc/node.c +++ b/net/tipc/node.c @@ -1206,10 +1206,10 @@ int tipc_node_xmit(struct net *net, struct sk_buff_head *list, spin_unlock_bh(>lock); tipc_node_read_unlock(n); - if (likely(rc == 0)) - tipc_bearer_xmit(net, bearer_id, , >maddr); - else if (rc == -ENOBUFS) + if (unlikely(rc == -ENOBUFS)) tipc_node_link_down(n, bearer_id, false); + else + tipc_bearer_xmit(net, bearer_id, , >maddr); tipc_node_put(n); @@ -1219,22 +1219,16 @@ int tipc_node_xmit(struct net *net, struct sk_buff_head *list, /* tipc_node_xmit_skb(): send single buffer to destination * Buffers sent via this functon are generally TIPC_SYSTEM_IMPORTANCE * messages, which will not be rejected - * The only exception is datagram messages rerouted after secondary - * lookup, which are rare and safe to dispose of anyway. - * TODO: Return real return value, and let callers use - * tipc_wait_for_sendpkt() where applicable */ int tipc_node_xmit_skb(struct net *net, struct sk_buff *skb, u32 dnode, u32 selector) {
Re: [tipc-discussion] [net-next v3 3/3] tipc: reduce risk of user starvation during link congestion
On 12/20/2016 12:32 PM, Parthasarathy Bhuvaragan wrote: > Hi Jon, > > This patch in the series caused multiple stability issues in my test. I > spent couple of days to fix all of them in the patches attached. > > If you are ok with the fixes, then > Reviewed-by: Parthasarathy Bhuvaragan > <parthasarathy.bhuvara...@ericsson.com> > > /Partha I see that the attachments were not delivered. So sending all the 4 patches inline. /Partha --- From 5e1d48de1e441c5583872d2c2d95cc9353ddd63e Mon Sep 17 00:00:00 2001 From: Parthasarathy Bhuvaragan <parthasarathy.bhuvara...@ericsson.com> Date: Thu, 15 Dec 2016 16:02:20 +0100 Subject: [PATCH net-next v1 1/4] tipc: fix node error handling at link congestion Since the link can return ELINKCONG, but still transmits the frame on the link, we need to adjust the return code handling in the node. Signed-off-by: Parthasarathy Bhuvaragan <parthasarathy.bhuvara...@ericsson.com> --- net/tipc/node.c | 16 +--- 1 file changed, 5 insertions(+), 11 deletions(-) diff --git a/net/tipc/node.c b/net/tipc/node.c index f51d360dac77..edfb05746131 100644 --- a/net/tipc/node.c +++ b/net/tipc/node.c @@ -1206,10 +1206,10 @@ int tipc_node_xmit(struct net *net, struct sk_buff_head *list, spin_unlock_bh(>lock); tipc_node_read_unlock(n); - if (likely(rc == 0)) - tipc_bearer_xmit(net, bearer_id, , >maddr); - else if (rc == -ENOBUFS) + if (unlikely(rc == -ENOBUFS)) tipc_node_link_down(n, bearer_id, false); + else + tipc_bearer_xmit(net, bearer_id, , >maddr); tipc_node_put(n); @@ -1219,22 +1219,16 @@ int tipc_node_xmit(struct net *net, struct sk_buff_head *list, /* tipc_node_xmit_skb(): send single buffer to destination * Buffers sent via this functon are generally TIPC_SYSTEM_IMPORTANCE * messages, which will not be rejected - * The only exception is datagram messages rerouted after secondary - * lookup, which are rare and safe to dispose of anyway. - * TODO: Return real return value, and let callers use - * tipc_wait_for_sendpkt() where applicable */ int tipc_node_xmit_skb(struct net *net, struct sk_buff *skb, u32 dnode, u32 selector) { struct sk_buff_head head; - int rc; skb_queue_head_init(); __skb_queue_tail(, skb); - rc = tipc_node_xmit(net, , dnode, selector); - if (rc == -ELINKCONG) - kfree_skb(skb); + tipc_node_xmit(net, , dnode, selector); + return 0; } -- 2.1.4 From 2c8a355536bf2089b9e216fae919150acc357b40 Mon Sep 17 00:00:00 2001 From: Parthasarathy Bhuvaragan <parthasarathy.bhuvara...@ericsson.com> Date: Tue, 20 Dec 2016 12:04:28 +0100 Subject: [PATCH net-next v1 2/4] tipc: fix sending to own node even at link congestion We let the broadcast layer to transmit a copy to the own node even if the link returns ELINKCONG. Signed-off-by: Parthasarathy Bhuvaragan <parthasarathy.bhuvara...@ericsson.com> --- net/tipc/bcast.c | 6 -- 1 file changed, 4 insertions(+), 2 deletions(-) diff --git a/net/tipc/bcast.c b/net/tipc/bcast.c index 1a56cabbf389..247f87518565 100644 --- a/net/tipc/bcast.c +++ b/net/tipc/bcast.c @@ -196,8 +196,10 @@ int tipc_bcast_xmit(struct net *net, struct sk_buff_head *list) rc = tipc_link_xmit(l, list, ); tipc_bcast_unlock(net); - /* Don't send to local node if adding to link failed */ - if (unlikely(rc)) { + /* Don't send to local node if adding to link failed +* for reasons other than -ELINKCONG. +*/ + if (unlikely(rc) && (rc != -ELINKCONG)) { __skb_queue_purge(); return rc; } -- 2.1.4 From e69d41d9836062429069364244191c15442fca43 Mon Sep 17 00:00:00 2001 From: Parthasarathy Bhuvaragan <parthasarathy.bhuvara...@ericsson.com> Date: Tue, 20 Dec 2016 08:33:21 +0100 Subject: [PATCH net-next v1 3/4] tipc: fix return code from bcast_xmit We must send the return code from tipc_bcast_xmit() to the caller tipc_sendmcast() to detect link congestion in socket layer. Signed-off-by: Parthasarathy Bhuvaragan <parthasarathy.bhuvara...@ericsson.com> --- net/tipc/bcast.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/net/tipc/bcast.c b/net/tipc/bcast.c index 247f87518565..c91b276f6831 100644 --- a/net/tipc/bcast.c +++ b/net/tipc/bcast.c @@ -208,7 +208,7 @@ int tipc_bcast_xmit(struct net *net, struct sk_buff_head *list) tipc_bcbase_xmit(net, ); tipc_sk_mcast_rcv(net, , ); __skb_queue_purge(list); - return 0; + return rc; } /* tipc_bcast_rcv - receive a broadcast packet, and deliver to rcv link -- 2.1.4 From b3c1ad38d0fa710630cf6df02d1ec627277b5144 Mon Sep 17 00:00:00 2001 From: Parthasarathy Bhuvaragan <parthasarathy.bhuvara...@ericsson.com> Date: Tue, 20 Dec 2016 10
Re: [tipc-discussion] [net-next v3 3/3] tipc: reduce risk of user starvation during link congestion
Hi Jon, This patch in the series caused multiple stability issues in my test. I spent couple of days to fix all of them in the patches attached. If you are ok with the fixes, then Reviewed-by: Parthasarathy Bhuvaragan <parthasarathy.bhuvara...@ericsson.com> /Partha On 12/12/2016 11:42 PM, Jon Maloy wrote: The socket code currently handles link congestion by either blocking and trying to send again when the congestion has abated, or just returning to the user with -EAGAIN and let him re-try later. This mechanism is prone to starvation, because the wakeup algorithm is non-atomic. During the time the link issues a wakeup signal, until the socket wakes up and re-attempts sending, other senders may have come in between and occupied the free buffer space in the link. This in turn may lead to a socket having to make many send attempts before it is successful. In extremely loaded systems we have observed latency times of several seconds before a low-priority socket is able to send out a message. In this commit, we simplify this mechanism and reduce the risk of the described scenario happening. When a message is attempted sent via a congested link, we now let it be added to the link's backlog queue anyway, thus permitting an oversubscription of one message per source socket. We still create a wakeup item and return an error code, hence instructing the sender to block or stop sending. Only when enough space has been freed up in the link's backlog queue do we issue a wakeup event that allows the sender to continue with the next message, if any. The fact that a socket now can consider a message sent even when the link returns a congestion code means that the sending socket code can be simplified. Also, since this is a good opportunity to get rid of the obsolete 'mtu change' condition in the three socket send functions, we now choose to refactor those functions completely. Signed-off-by: Jon Maloy <jon.ma...@ericsson.com> --- net/tipc/bcast.c | 2 +- net/tipc/link.c | 66 +-- net/tipc/msg.h| 2 - net/tipc/node.c | 2 +- net/tipc/socket.c | 346 -- 5 files changed, 184 insertions(+), 234 deletions(-) diff --git a/net/tipc/bcast.c b/net/tipc/bcast.c index aa1babb..1a56cab 100644 --- a/net/tipc/bcast.c +++ b/net/tipc/bcast.c @@ -174,7 +174,7 @@ static void tipc_bcbase_xmit(struct net *net, struct sk_buff_head *xmitq) *and to identified node local sockets * @net: the applicable net namespace * @list: chain of buffers containing message - * Consumes the buffer chain, except when returning -ELINKCONG + * Consumes the buffer chain. * Returns 0 if success, otherwise errno: -ELINKCONG,-EHOSTUNREACH,-EMSGSIZE */ int tipc_bcast_xmit(struct net *net, struct sk_buff_head *list) diff --git a/net/tipc/link.c b/net/tipc/link.c index bda89bf..5f2b478 100644 --- a/net/tipc/link.c +++ b/net/tipc/link.c @@ -776,60 +776,55 @@ int tipc_link_timeout(struct tipc_link *l, struct sk_buff_head *xmitq) /** * link_schedule_user - schedule a message sender for wakeup after congestion - * @link: congested link - * @list: message that was attempted sent + * @l: congested link + * @hdr: header of message that is being sent * Create pseudo msg to send back to user when congestion abates - * Does not consume buffer list + * Consumes buffer list */ -static int link_schedule_user(struct tipc_link *link, struct sk_buff_head *list) +static int link_schedule_user(struct tipc_link *l, struct tipc_msg *hdr) { - struct tipc_msg *msg = buf_msg(skb_peek(list)); - int imp = msg_importance(msg); - u32 oport = msg_origport(msg); - u32 addr = tipc_own_addr(link->net); + int imp = msg_importance(hdr); + u32 dnode = tipc_own_addr(l->net); + u32 dport = msg_origport(hdr); struct sk_buff *skb; /* This really cannot happen... */ if (unlikely(imp > TIPC_CRITICAL_IMPORTANCE)) { - pr_warn("%s<%s>, send queue full", link_rst_msg, link->name); + pr_warn("%s<%s>, send queue full", link_rst_msg, l->name); return -ENOBUFS; } - /* Non-blocking sender: */ - if (TIPC_SKB_CB(skb_peek(list))->wakeup_pending) - return -ELINKCONG; /* Create and schedule wakeup pseudo message */ skb = tipc_msg_create(SOCK_WAKEUP, 0, INT_H_SIZE, 0, - addr, addr, oport, 0, 0); + dnode, l->addr, dport, 0, 0); if (!skb) return -ENOBUFS; - TIPC_SKB_CB(skb)->chain_sz = skb_queue_len(list); + msg_set_dest_droppable(buf_msg(skb), true); TIPC_SKB_CB(skb)->chain_imp = imp; - skb_queue_tail(>wakeupq, skb); - link->stats.link_congs++; + skb_queue_tail(>wakeupq, skb); + l->stats.link_congs++; return -ELIN