On 08/04/2009 01:12 PM, Erez Zilber wrote: > On Tue, Aug 4, 2009 at 8:17 PM, Mike Christie<micha...@cs.wisc.edu> wrote: >> Erez Zilber wrote: >>> I'm running with open-iscsi.git HEAD + the check suspend bit patch + >>> the wake xmit on error patch. If I disconnect the cable on the >>> initiator side (even while not running IO), I see that after sending >>> the signal, the iscsi_q_XX thread reaches 100% cpu. I ran it over >>> several 1GB/ 10 GB drivers and got the same results. >>> >>> If I remove the wake xmit on error patch, I don't see this behavior. >>> >> Shoot, I have been running the xmit wakeup and suspend bit patch here >> fine. Let me do some more testing. >> >> Is this something you always hit? Could you send me the final patch you >> ended up using? > > I see this every time. Note that I'm not running with > linux-2.6-iscsi.git. I'm using the open-iscsi.git tree + the 2 patches > that I took without any change (using git-show) from the > linux-2.6-iscsi.git tree. Which tree did you test it on? > > I added some printks to the code and saw that the signal does get sent > from iscsi_sw_tcp_conn_stop, but I didn't see that (rc == -EINTR || rc > == -EAGAIN) in iscsi_sw_tcp_xmit (), even when I ran IO on that > session. >
Does r in iscsi_sw_tcp_xmit_segment == 0? If not I think you need a diffferent patch. In one of the patch versions iscsi_sw_tcp_xmit_segment could return -ENODATA (this is when I had a check for suspend_tx in there). iscsi_sw_tcp_xmit did not check this and so I think we can loop. Could you try the attached patch. It was made over open-iscsi.git for you. I dropped the suspend bit check in iscsi_sw_tcp_xmit_segment, because it is not needed. If we end up blocking the signal will wake us. --~--~---------~--~----~------------~-------~--~----~ You received this message because you are subscribed to the Google Groups "open-iscsi" group. To post to this group, send email to open-iscsi@googlegroups.com To unsubscribe from this group, send email to open-iscsi+unsubscr...@googlegroups.com For more options, visit this group at http://groups.google.com/group/open-iscsi -~----------~----~----~----~------~----~------~--~---
diff --git a/kernel/iscsi_tcp.c b/kernel/iscsi_tcp.c index bce1594..028b295 100644 --- a/kernel/iscsi_tcp.c +++ b/kernel/iscsi_tcp.c @@ -279,7 +279,7 @@ static int iscsi_sw_tcp_xmit(struct iscsi_conn *conn) * is getting stopped. libiscsi will know so propogate err * for it to do the right thing. */ - if (rc == -EAGAIN) + if (rc == -EINTR || rc == -EAGAIN) return rc; else if (rc < 0) { rc = ISCSI_ERR_XMIT_FAILED; @@ -483,6 +483,23 @@ static int iscsi_sw_tcp_pdu_alloc(struct iscsi_task *task, uint8_t opcode) return 0; } +static void __iscsi_sw_tcp_prep_xmit_wq(struct work_struct *work) +{ + struct iscsi_sw_tcp_conn *tcp_sw_conn = + container_of(work, struct iscsi_sw_tcp_conn, prep_work); + + allow_signal(SIGUSR1); + tcp_sw_conn->xmit_task = current; +} + +static void iscsi_sw_tcp_prep_xmit_wq(struct iscsi_host *ihost, + struct iscsi_sw_tcp_conn *tcp_sw_conn) +{ + INIT_WORK(&tcp_sw_conn->prep_work, __iscsi_sw_tcp_prep_xmit_wq); + queue_work(ihost->workq, &tcp_sw_conn->prep_work); + flush_workqueue(ihost->workq); +} + static struct iscsi_cls_conn * iscsi_sw_tcp_conn_create(struct iscsi_cls_session *cls_session, uint32_t conn_idx) @@ -513,6 +530,8 @@ iscsi_sw_tcp_conn_create(struct iscsi_cls_session *cls_session, goto free_tx_tfm; tcp_conn->rx_hash = &tcp_sw_conn->rx_hash; + iscsi_sw_tcp_prep_xmit_wq(shost_priv(conn->session->host), + tcp_sw_conn); return cls_conn; free_tx_tfm: @@ -581,6 +600,10 @@ static void iscsi_sw_tcp_conn_stop(struct iscsi_cls_conn *cls_conn, int flag) set_bit(ISCSI_SUSPEND_BIT, &conn->suspend_rx); write_unlock_bh(&tcp_sw_conn->sock->sk->sk_callback_lock); + set_bit(ISCSI_SUSPEND_BIT, &conn->suspend_tx); + if (tcp_sw_conn->xmit_task) + send_sig(SIGUSR1, tcp_sw_conn->xmit_task, 0); + iscsi_conn_stop(cls_conn, flag); iscsi_sw_tcp_release_conn(conn); } diff --git a/kernel/iscsi_tcp.h b/kernel/iscsi_tcp.h index f9a4044..49393f1 100644 --- a/kernel/iscsi_tcp.h +++ b/kernel/iscsi_tcp.h @@ -36,8 +36,9 @@ struct iscsi_sw_tcp_send { }; struct iscsi_sw_tcp_conn { - struct iscsi_conn *iscsi_conn; struct socket *sock; + struct task_struct *xmit_task; + struct work_struct prep_work; struct iscsi_sw_tcp_send out; /* old values for socket callbacks */ diff --git a/kernel/libiscsi.c b/kernel/libiscsi.c index 73c4231..0ba612e 100644 --- a/kernel/libiscsi.c +++ b/kernel/libiscsi.c @@ -1212,6 +1212,9 @@ static int iscsi_xmit_task(struct iscsi_conn *conn) struct iscsi_task *task = conn->task; int rc; + if (test_bit(ISCSI_SUSPEND_BIT, &conn->suspend_tx)) + return -ENODATA; + __iscsi_get_task(task); spin_unlock_bh(&conn->session->lock); rc = conn->session->tt->xmit_task(task); @@ -1261,7 +1264,10 @@ static int iscsi_data_xmit(struct iscsi_conn *conn) int rc = 0; spin_lock_bh(&conn->session->lock); - if (unlikely(conn->suspend_tx)) { + if (signal_pending(current)) + flush_signals(current); + + if (test_bit(ISCSI_SUSPEND_BIT, &conn->suspend_tx)) { ISCSI_DBG_SESSION(conn->session, "Tx suspended!\n"); spin_unlock_bh(&conn->session->lock); return -ENODATA; @@ -1270,7 +1276,7 @@ static int iscsi_data_xmit(struct iscsi_conn *conn) if (conn->task) { rc = iscsi_xmit_task(conn); if (rc) - goto again; + goto done; } /* @@ -1290,7 +1296,7 @@ check_mgmt: } rc = iscsi_xmit_task(conn); if (rc) - goto again; + goto done; } /* process pending command queue */ @@ -1311,14 +1317,14 @@ check_mgmt: list_add_tail(&conn->task->running, &conn->cmdqueue); conn->task = NULL; - goto again; + goto done; } else fail_scsi_task(conn->task, DID_ABORT); continue; } rc = iscsi_xmit_task(conn); if (rc) - goto again; + goto done; /* * we could continuously get new task requests so * we need to check the mgmt queue for nops that need to @@ -1344,16 +1350,14 @@ check_mgmt: conn->task->state = ISCSI_TASK_RUNNING; rc = iscsi_xmit_task(conn); if (rc) - goto again; + goto done; if (!list_empty(&conn->mgmtqueue)) goto check_mgmt; } spin_unlock_bh(&conn->session->lock); return -ENODATA; -again: - if (unlikely(conn->suspend_tx)) - rc = -ENODATA; +done: spin_unlock_bh(&conn->session->lock); return rc; }