On 08/04/2009 01:12 PM, Erez Zilber wrote:
> On Tue, Aug 4, 2009 at 8:17 PM, Mike Christie<micha...@cs.wisc.edu>  wrote:
>> Erez Zilber wrote:
>>> I'm running with open-iscsi.git HEAD + the check suspend bit patch +
>>> the wake xmit on error patch. If I disconnect the cable on the
>>> initiator side (even while not running IO), I see that after sending
>>> the signal, the  iscsi_q_XX thread reaches 100% cpu. I ran it over
>>> several 1GB/ 10 GB drivers and got the same results.
>>>
>>> If I remove the  wake xmit on error patch, I don't see this behavior.
>>>
>> Shoot, I have been running the xmit wakeup and suspend bit patch here
>> fine. Let me do some more testing.
>>
>> Is this something you always hit? Could you send me the final patch you
>> ended up using?
>
> I see this every time. Note that I'm not running with
> linux-2.6-iscsi.git. I'm using the open-iscsi.git tree + the 2 patches
> that I took without any change (using git-show) from the
> linux-2.6-iscsi.git tree. Which tree did you test it on?
>
> I added some printks to the code and saw that the signal does get sent
> from iscsi_sw_tcp_conn_stop, but I didn't see that (rc == -EINTR || rc
> == -EAGAIN) in  iscsi_sw_tcp_xmit (), even when I ran IO on that
> session.
>

Does r in iscsi_sw_tcp_xmit_segment == 0?

If not I think you need a diffferent patch. In one of the patch versions 
iscsi_sw_tcp_xmit_segment could return -ENODATA (this is when I had a 
check for suspend_tx in there). iscsi_sw_tcp_xmit did not check this and 
so I think  we can loop.

Could you try the attached patch. It was made over open-iscsi.git for 
you. I dropped the suspend bit check in iscsi_sw_tcp_xmit_segment, 
because it is not needed. If we end up blocking the signal will wake us.

--~--~---------~--~----~------------~-------~--~----~
You received this message because you are subscribed to the Google Groups 
"open-iscsi" group.
To post to this group, send email to open-iscsi@googlegroups.com
To unsubscribe from this group, send email to 
open-iscsi+unsubscr...@googlegroups.com
For more options, visit this group at http://groups.google.com/group/open-iscsi
-~----------~----~----~----~------~----~------~--~---

diff --git a/kernel/iscsi_tcp.c b/kernel/iscsi_tcp.c
index bce1594..028b295 100644
--- a/kernel/iscsi_tcp.c
+++ b/kernel/iscsi_tcp.c
@@ -279,7 +279,7 @@ static int iscsi_sw_tcp_xmit(struct iscsi_conn *conn)
                 * is getting stopped. libiscsi will know so propogate err
                 * for it to do the right thing.
                 */
-               if (rc == -EAGAIN)
+               if (rc == -EINTR || rc == -EAGAIN)
                        return rc;
                else if (rc < 0) {
                        rc = ISCSI_ERR_XMIT_FAILED;
@@ -483,6 +483,23 @@ static int iscsi_sw_tcp_pdu_alloc(struct iscsi_task *task, 
uint8_t opcode)
        return 0;
 }
 
+static void __iscsi_sw_tcp_prep_xmit_wq(struct work_struct *work)
+{
+       struct iscsi_sw_tcp_conn *tcp_sw_conn =
+                       container_of(work, struct iscsi_sw_tcp_conn, prep_work);
+
+       allow_signal(SIGUSR1);
+       tcp_sw_conn->xmit_task = current;
+}
+
+static void iscsi_sw_tcp_prep_xmit_wq(struct iscsi_host *ihost,
+                                    struct iscsi_sw_tcp_conn *tcp_sw_conn)
+{
+       INIT_WORK(&tcp_sw_conn->prep_work, __iscsi_sw_tcp_prep_xmit_wq);
+       queue_work(ihost->workq, &tcp_sw_conn->prep_work);
+       flush_workqueue(ihost->workq);
+}
+
 static struct iscsi_cls_conn *
 iscsi_sw_tcp_conn_create(struct iscsi_cls_session *cls_session,
                         uint32_t conn_idx)
@@ -513,6 +530,8 @@ iscsi_sw_tcp_conn_create(struct iscsi_cls_session 
*cls_session,
                goto free_tx_tfm;
        tcp_conn->rx_hash = &tcp_sw_conn->rx_hash;
 
+       iscsi_sw_tcp_prep_xmit_wq(shost_priv(conn->session->host),
+                                 tcp_sw_conn);
        return cls_conn;
 
 free_tx_tfm:
@@ -581,6 +600,10 @@ static void iscsi_sw_tcp_conn_stop(struct iscsi_cls_conn 
*cls_conn, int flag)
        set_bit(ISCSI_SUSPEND_BIT, &conn->suspend_rx);
        write_unlock_bh(&tcp_sw_conn->sock->sk->sk_callback_lock);
 
+       set_bit(ISCSI_SUSPEND_BIT, &conn->suspend_tx);
+       if (tcp_sw_conn->xmit_task)
+               send_sig(SIGUSR1, tcp_sw_conn->xmit_task, 0);
+
        iscsi_conn_stop(cls_conn, flag);
        iscsi_sw_tcp_release_conn(conn);
 }
diff --git a/kernel/iscsi_tcp.h b/kernel/iscsi_tcp.h
index f9a4044..49393f1 100644
--- a/kernel/iscsi_tcp.h
+++ b/kernel/iscsi_tcp.h
@@ -36,8 +36,9 @@ struct iscsi_sw_tcp_send {
 };
 
 struct iscsi_sw_tcp_conn {
-       struct iscsi_conn       *iscsi_conn;
        struct socket           *sock;
+       struct task_struct      *xmit_task;
+       struct work_struct      prep_work;
 
        struct iscsi_sw_tcp_send out;
        /* old values for socket callbacks */
diff --git a/kernel/libiscsi.c b/kernel/libiscsi.c
index 73c4231..0ba612e 100644
--- a/kernel/libiscsi.c
+++ b/kernel/libiscsi.c
@@ -1212,6 +1212,9 @@ static int iscsi_xmit_task(struct iscsi_conn *conn)
        struct iscsi_task *task = conn->task;
        int rc;
 
+       if (test_bit(ISCSI_SUSPEND_BIT, &conn->suspend_tx))
+               return -ENODATA;
+
        __iscsi_get_task(task);
        spin_unlock_bh(&conn->session->lock);
        rc = conn->session->tt->xmit_task(task);
@@ -1261,7 +1264,10 @@ static int iscsi_data_xmit(struct iscsi_conn *conn)
        int rc = 0;
 
        spin_lock_bh(&conn->session->lock);
-       if (unlikely(conn->suspend_tx)) {
+       if (signal_pending(current))
+               flush_signals(current);
+
+       if (test_bit(ISCSI_SUSPEND_BIT, &conn->suspend_tx)) {
                ISCSI_DBG_SESSION(conn->session, "Tx suspended!\n");
                spin_unlock_bh(&conn->session->lock);
                return -ENODATA;
@@ -1270,7 +1276,7 @@ static int iscsi_data_xmit(struct iscsi_conn *conn)
        if (conn->task) {
                rc = iscsi_xmit_task(conn);
                if (rc)
-                       goto again;
+                       goto done;
        }
 
        /*
@@ -1290,7 +1296,7 @@ check_mgmt:
                }
                rc = iscsi_xmit_task(conn);
                if (rc)
-                       goto again;
+                       goto done;
        }
 
        /* process pending command queue */
@@ -1311,14 +1317,14 @@ check_mgmt:
                                list_add_tail(&conn->task->running,
                                              &conn->cmdqueue);
                                conn->task = NULL;
-                               goto again;
+                               goto done;
                        } else
                                fail_scsi_task(conn->task, DID_ABORT);
                        continue;
                }
                rc = iscsi_xmit_task(conn);
                if (rc)
-                       goto again;
+                       goto done;
                /*
                 * we could continuously get new task requests so
                 * we need to check the mgmt queue for nops that need to
@@ -1344,16 +1350,14 @@ check_mgmt:
                conn->task->state = ISCSI_TASK_RUNNING;
                rc = iscsi_xmit_task(conn);
                if (rc)
-                       goto again;
+                       goto done;
                if (!list_empty(&conn->mgmtqueue))
                        goto check_mgmt;
        }
        spin_unlock_bh(&conn->session->lock);
        return -ENODATA;
 
-again:
-       if (unlikely(conn->suspend_tx))
-               rc = -ENODATA;
+done:
        spin_unlock_bh(&conn->session->lock);
        return rc;
 }

Reply via email to