I will check it and return to you. - Amir
On 07/15/2009 03:08 PM, Maarten van Malland wrote: > Hi Amir, > > We tried your patch and unfortunately it didn't help much. Actually, it made > it worse. Here is the dmesg output when we ran nttcp over sdp: > > inftsttwin03 ~ # dmesg > sdp_reset_sk:447 sdp_sock( 4453:0 5038:51621): setting state to error > BUG: unable to handle kernel paging request at 0000000000001398 > IP: [<ffffffffa0104ff0>] sdp_post_recv+0x299/0x66b [ib_sdp] > PGD 11a961067 PUD 12dcac067 PMD 0 > Oops: 0002 [#1] PREEMPT SMP > last sysfs file: /sys/devices/system/cpu/cpu7/cache/index2/shared_cpu_map > CPU 5 > Modules linked in: ib_sdp w83793 hwmon_vid rdma_ucm rdma_cm iw_cm ib_addr > mlx4_ib ib_ipoib ib_cm ib_sa ib_ > uverbs ib_umad ib_mthca ib_mad i2c_i801 ib_core mlx4_core e1000e i2c_core > i5k_amb hwmon [last unloaded: ib > _sdp] > Pid: 12072, comm: nttcp Not tainted 2.6.30.1 #1 X7DWT > RIP: 0010:[<ffffffffa0104ff0>] [<ffffffffa0104ff0>] > sdp_post_recv+0x299/0x66b [ib_sdp] > RSP: 0018:ffff88011b4d1ab8 EFLAGS: 00010202 > RAX: 0000000000001398 RBX: 0000000000000008 RCX: ffffe20003df9d40 > RDX: 0000000000000000 RSI: 0000000000000000 RDI: ffffffff80277789 > RBP: ffff88011b4d1bf8 R08: 0000000000000000 R09: ffff880000015e00 > R10: 0000000000000000 R11: ffffe20003df9d40 R12: ffff88011099f500 > R13: ffff880110474080 R14: 00000000000000d2 R15: 0000000000000000 > FS: 00007f337b97c6f0(0000) GS:ffff880028186000(0000) knlGS:0000000000000000 > CS: 0010 DS: 0000 ES: 0000 CR0: 000000008005003b > CR2: 0000000000001398 CR3: 000000012d8bb000 CR4: 00000000000406e0 > DR0: 0000000000000000 DR1: 0000000000000000 DR2: 0000000000000000 > DR3: 0000000000000000 DR6: 00000000ffff0ff0 DR7: 0000000000000400 > Process nttcp (pid: 12072, threadinfo ffff88011b4d0000, task > ffff88011b49e640) > Stack: > ffff88011b4d1af8 ffffffff802308a8 00000000ffffffff ffff88012d4b1000 > 0000000000001398 ffff88012d965800 0000000000000139 0000000000011a80 > ffff88011b4d1b98 ffffffff80553141 000000011b4c8000 0000000000000001 > Call Trace: > [<ffffffff802308a8>] ? finish_task_switch+0xb5/0xc4 > [<ffffffff80553141>] ? thread_return+0x4e/0x9d > [<ffffffff8023c49e>] ? local_bh_disable+0xd/0xf > [<ffffffff80554ffa>] ? _spin_lock_bh+0x11/0x34 > [<ffffffff80554f99>] ? _spin_lock_irqsave+0x1d/0x39 > [<ffffffff805579d2>] ? sub_preempt_count+0x9e/0xbe > [<ffffffffa01060e3>] sdp_do_posts+0xa71/0xa91 [ib_sdp] > [<ffffffff804c7700>] ? sk_wait_data+0xb8/0xcd > [<ffffffff8024b10f>] ? autoremove_wake_function+0x0/0x38 > [<ffffffffa00ff006>] sdp_recvmsg+0x360/0x5dc [ib_sdp] > [<ffffffff804c63ed>] sock_common_recvmsg+0x32/0x47 > [<ffffffff804c3fdb>] sock_aio_read+0xe8/0xfc > [<ffffffff8023c703>] ? irq_exit+0x4f/0x51 > [<ffffffff802a2334>] ? do_sync_read+0x62/0x126 > [<ffffffff802a23b4>] do_sync_read+0xe2/0x126 > [<ffffffff80554d46>] ? trace_hardirqs_on_thunk+0x3a/0x3c > [<ffffffff8020b8ad>] ? restore_args+0x0/0x30 > [<ffffffff8024b10f>] ? autoremove_wake_function+0x0/0x38 > [<ffffffff805579d2>] ? sub_preempt_count+0x9e/0xbe > [<ffffffff80555295>] ? _spin_unlock+0x10/0x29 > [<ffffffff802a2af3>] vfs_read+0xbe/0x134 > [<ffffffff802a2e5d>] sys_read+0x47/0x70 > [<ffffffff8020aeeb>] system_call_fastpath+0x16/0x1b > Code: b8 00 00 00 41 3b 9d 6c 05 00 00 7c 83 8b 85 f0 fe ff ff 49 8b 95 30 > 04 00 00 83 e0 3f 48 6b c0 58 4 > 8 01 d0 48 89 85 e0 fe ff ff <4c> 89 20 4d 8b bd d0 04 00 00 49 8b 87 98 02 > 00 00 48 85 c0 74 > RIP [<ffffffffa0104ff0>] sdp_post_recv+0x299/0x66b [ib_sdp] > RSP <ffff88011b4d1ab8> > CR2: 0000000000001398 > ---[ end trace f3d85b6359a569b5 ]--- > > Perhaps you can take a second look at it, > > Best regards, > > Maarten > > > -----Oorspronkelijk bericht----- > Van: [email protected] > [mailto:[email protected]] Namens Amir Vadai > Verzonden: woensdag 15 juli 2009 12:28 > Aan: [email protected] > CC: Amir Vadai > Onderwerp: [ofa-general] [PATCH] sdp: fix some warning and bugs in porting > to ofed 1.5 > > Signed-off-by: Amir Vadai <[email protected]> > --- > drivers/infiniband/ulp/sdp/sdp.h | 9 +++++++-- > drivers/infiniband/ulp/sdp/sdp_cma.c | 2 +- > drivers/infiniband/ulp/sdp/sdp_main.c | 17 ++++++++--------- > drivers/infiniband/ulp/sdp/sdp_rx.c | 14 ++++---------- > 4 files changed, 20 insertions(+), 22 deletions(-) > > diff --git a/drivers/infiniband/ulp/sdp/sdp.h > b/drivers/infiniband/ulp/sdp/sdp.h > index d03d6af..ec8dbe1 100644 > --- a/drivers/infiniband/ulp/sdp/sdp.h > +++ b/drivers/infiniband/ulp/sdp/sdp.h > @@ -11,12 +11,15 @@ > #define SDPSTATS_ON > /* #define SDP_PROFILING */ > > -#define _sdp_printk(func, line, level, sk, format, arg...) \ > +#define _sdp_printk(func, line, level, sk, format, arg...) do { > \ > + preempt_disable(); \ > printk(level "%s:%d sdp_sock(%5d:%d %d:%d): " format, \ > func, line, \ > current->pid, smp_processor_id(), \ > (sk) ? inet_sk(sk)->num : -1, \ > - (sk) ? ntohs(inet_sk(sk)->dport) : -1, ## arg) > + (sk) ? ntohs(inet_sk(sk)->dport) : -1, ## arg); \ > + preempt_enable(); \ > +} while (0) > #define sdp_printk(level, sk, format, arg...) \ > _sdp_printk(__func__, __LINE__, level, sk, format, ## arg) > #define sdp_warn(sk, format, arg...) \ > @@ -71,6 +74,7 @@ static inline unsigned long long current_nsec(void) > #define sdp_prf1(sk, s, format, arg...) ({ \ > struct sdpprf_log *l = \ > &sdpprf_log[sdpprf_log_count++ & (SDPPRF_LOG_SIZE - 1)]; \ > + preempt_disable(); \ > l->idx = sdpprf_log_count - 1; \ > l->pid = current->pid; \ > l->sk_num = (sk) ? inet_sk(sk)->num : -1; \ > @@ -81,6 +85,7 @@ static inline unsigned long long current_nsec(void) > l->time = current_nsec(); \ > l->func = __func__; \ > l->line = __LINE__; \ > + preempt_enable(); \ > 1; \ > }) > #define sdp_prf(sk, s, format, arg...) > diff --git a/drivers/infiniband/ulp/sdp/sdp_cma.c > b/drivers/infiniband/ulp/sdp/sdp_cma.c > index 2354fff..f8a7303 100644 > --- a/drivers/infiniband/ulp/sdp/sdp_cma.c > +++ b/drivers/infiniband/ulp/sdp/sdp_cma.c > @@ -308,7 +308,7 @@ int sdp_cma_handler(struct rdma_cm_id *id, struct > rdma_cm_event *event) > -EINVAL : 0; > } > > - lock_sock(sk); > + lock_sock_nested(sk, SINGLE_DEPTH_NESTING); > sdp_dbg(sk, "%s event %d id %p\n", __func__, event->event, id); > if (!sdp_sk(sk)->id) { > sdp_dbg(sk, "socket is being torn down\n"); > diff --git a/drivers/infiniband/ulp/sdp/sdp_main.c > b/drivers/infiniband/ulp/sdp/sdp_main.c > index 48e43dc..ab49846 100644 > --- a/drivers/infiniband/ulp/sdp/sdp_main.c > +++ b/drivers/infiniband/ulp/sdp/sdp_main.c > @@ -181,7 +181,6 @@ static int sdp_get_port(struct sock *sk, unsigned short > snum) > static void sdp_destroy_qp(struct sdp_sock *ssk) > { > struct ib_pd *pd = NULL; > - unsigned long flags; > > > sdp_dbg(&ssk->isk.sk, "destroying qp\n"); > @@ -189,7 +188,6 @@ static void sdp_destroy_qp(struct sdp_sock *ssk) > > del_timer(&ssk->tx_ring.timer); > > - rx_ring_lock(ssk, flags); > > sdp_rx_ring_destroy(ssk); > sdp_tx_ring_destroy(ssk); > @@ -209,9 +207,6 @@ static void sdp_destroy_qp(struct sdp_sock *ssk) > ib_dealloc_pd(pd); > > sdp_remove_large_sock(ssk); > - > - rx_ring_unlock(ssk, flags); > - > } > > static void sdp_reset_keepalive_timer(struct sock *sk, unsigned long len) > @@ -453,10 +448,6 @@ void sdp_reset_sk(struct sock *sk, int rc) > sdp_set_error(sk, rc); > } > > - sdp_destroy_qp(ssk); > - > - memset((void *)&ssk->id, 0, sizeof(*ssk) - offsetof(typeof(*ssk), > id)); > - > sk->sk_state_change(sk); > > /* Don't destroy socket before destroy work does its job */ > @@ -976,6 +967,10 @@ void sdp_destroy_work(struct work_struct *work) > struct sock *sk = &ssk->isk.sk; > sdp_dbg(sk, "%s: refcnt %d\n", __func__, > atomic_read(&sk->sk_refcnt)); > > + sdp_destroy_qp(ssk); > + > + memset((void *)&ssk->id, 0, sizeof(*ssk) - offsetof(typeof(*ssk), > id)); > + > sdp_cancel_dreq_wait_timeout(ssk); > > if (sk->sk_state == TCP_TIME_WAIT) > @@ -2559,6 +2554,10 @@ static void __exit sdp_exit(void) > sdp_proc_unregister(); > > ib_unregister_client(&sdp_client); > + > + percpu_counter_destroy(sockets_allocated); > + percpu_counter_destroy(orphan_count); > + > kfree(orphan_count); > kfree(sockets_allocated); > } > diff --git a/drivers/infiniband/ulp/sdp/sdp_rx.c > b/drivers/infiniband/ulp/sdp/sdp_rx.c > index 749a83a..38417c4 100644 > --- a/drivers/infiniband/ulp/sdp/sdp_rx.c > +++ b/drivers/infiniband/ulp/sdp/sdp_rx.c > @@ -447,12 +447,12 @@ static int sdp_process_rx_ctl_skb(struct sdp_sock > *ssk, struct sk_buff *skb) > break; > case SDP_MID_CHRCVBUF: > sdp_dbg_data(sk, "Handling RX CHRCVBUF\n"); > - sdp_handle_resize_request(ssk, (struct sdp_chrecvbuf *)h); > + sdp_handle_resize_request(ssk, (struct sdp_chrecvbuf > *)(h+1)); > __kfree_skb(skb); > break; > case SDP_MID_CHRCVBUF_ACK: > sdp_dbg_data(sk, "Handling RX CHRCVBUF_ACK\n"); > - sdp_handle_resize_ack(ssk, (struct sdp_chrecvbuf *)h); > + sdp_handle_resize_ack(ssk, (struct sdp_chrecvbuf *)(h+1)); > __kfree_skb(skb); > break; > default: > @@ -787,9 +787,6 @@ int sdp_rx_ring_create(struct sdp_sock *ssk, struct > ib_device *device) > { > struct ib_cq *rx_cq; > int rc = 0; > - unsigned long flags; > - > - rx_ring_lock(ssk, flags); > > atomic_set(&ssk->rx_ring.head, 1); > atomic_set(&ssk->rx_ring.tail, 1); > @@ -797,12 +794,11 @@ int sdp_rx_ring_create(struct sdp_sock *ssk, struct > ib_device *device) > ssk->rx_ring.buffer = kmalloc( > sizeof *ssk->rx_ring.buffer * SDP_RX_SIZE, > GFP_KERNEL); > if (!ssk->rx_ring.buffer) { > - rc = -ENOMEM; > sdp_warn(&ssk->isk.sk, > "Unable to allocate RX Ring size %zd.\n", > sizeof(*ssk->rx_ring.buffer) * SDP_RX_SIZE); > > - goto out; > + return -ENOMEM; > } > > /* TODO: use vector=IB_CQ_VECTOR_LEAST_ATTACHED when implemented > @@ -822,13 +818,11 @@ int sdp_rx_ring_create(struct sdp_sock *ssk, struct > ib_device *device) > > sdp_arm_rx_cq(&ssk->isk.sk); > > - goto out; > + return 0; > > err_cq: > kfree(ssk->rx_ring.buffer); > ssk->rx_ring.buffer = NULL; > -out: > - rx_ring_unlock(ssk, flags); > return rc; > } > > _______________________________________________ general mailing list [email protected] http://lists.openfabrics.org/cgi-bin/mailman/listinfo/general To unsubscribe, please visit http://openib.org/mailman/listinfo/openib-general
