Yikes, my git-send-email settings suppressed the important CCs.  Sorry!

Guilherme and Ilkka, can you comment about your testing results or review 
please?

- Chris Leech

----- Original Message -----
> There's a rather long standing regression from commit
> 659743b [SCSI] libiscsi: Reduce locking contention in fast path
> 
> Depending on iSCSI target behavior, it's possible to hit the case in
> iscsi_complete_task where the task is still on a pending list
> (!list_empty(&task->running)).  When that happens the task is removed
> from the list while holding the session back_lock, but other task list
> modification occur under the frwd_lock.  That leads to linked list
> corruption and eventually a panicked system.
> 
> Rather than back out the session lock split entirely, in order to try
> and keep some of the performance gains this patch adds another lock to
> maintain the task lists integrity.
> 
> Major enterprise supported kernels have been backing out the lock split
> for while now, thanks to the efforts at IBM where a lab setup has the
> most reliable reproducer I've seen on this issue.  This patch has been
> tested there successfully.
> 
> Signed-off-by: Chris Leech <cle...@redhat.com>
> ---
>  drivers/scsi/libiscsi.c | 26 +++++++++++++++++++++++++-
>  include/scsi/libiscsi.h |  1 +
>  2 files changed, 26 insertions(+), 1 deletion(-)
> 
> diff --git a/drivers/scsi/libiscsi.c b/drivers/scsi/libiscsi.c
> index 834d121..acb5ef3 100644
> --- a/drivers/scsi/libiscsi.c
> +++ b/drivers/scsi/libiscsi.c
> @@ -560,8 +560,12 @@ static void iscsi_complete_task(struct iscsi_task *task,
> int state)
>       WARN_ON_ONCE(task->state == ISCSI_TASK_FREE);
>       task->state = state;
>  
> -     if (!list_empty(&task->running))
> +     spin_lock_bh(&conn->taskqueuelock);
> +     if (!list_empty(&task->running)) {
> +             WARN_ONCE(1, "iscsi_complete_task while task on list");
>               list_del_init(&task->running);
> +     }
> +     spin_unlock_bh(&conn->taskqueuelock);
>  
>       if (conn->task == task)
>               conn->task = NULL;
> @@ -783,7 +787,9 @@ __iscsi_conn_send_pdu(struct iscsi_conn *conn, struct
> iscsi_hdr *hdr,
>               if (session->tt->xmit_task(task))
>                       goto free_task;
>       } else {
> +             spin_lock_bh(&conn->taskqueuelock);
>               list_add_tail(&task->running, &conn->mgmtqueue);
> +             spin_unlock_bh(&conn->taskqueuelock);
>               iscsi_conn_queue_work(conn);
>       }
>  
> @@ -1474,8 +1480,10 @@ void iscsi_requeue_task(struct iscsi_task *task)
>        * this may be on the requeue list already if the xmit_task callout
>        * is handling the r2ts while we are adding new ones
>        */
> +     spin_lock_bh(&conn->taskqueuelock);
>       if (list_empty(&task->running))
>               list_add_tail(&task->running, &conn->requeue);
> +     spin_unlock_bh(&conn->taskqueuelock);
>       iscsi_conn_queue_work(conn);
>  }
>  EXPORT_SYMBOL_GPL(iscsi_requeue_task);
> @@ -1512,22 +1520,26 @@ static int iscsi_data_xmit(struct iscsi_conn *conn)
>        * only have one nop-out as a ping from us and targets should not
>        * overflow us with nop-ins
>        */
> +     spin_lock_bh(&conn->taskqueuelock);
>  check_mgmt:
>       while (!list_empty(&conn->mgmtqueue)) {
>               conn->task = list_entry(conn->mgmtqueue.next,
>                                        struct iscsi_task, running);
>               list_del_init(&conn->task->running);
> +             spin_unlock_bh(&conn->taskqueuelock);
>               if (iscsi_prep_mgmt_task(conn, conn->task)) {
>                       /* regular RX path uses back_lock */
>                       spin_lock_bh(&conn->session->back_lock);
>                       __iscsi_put_task(conn->task);
>                       spin_unlock_bh(&conn->session->back_lock);
>                       conn->task = NULL;
> +                     spin_lock_bh(&conn->taskqueuelock);
>                       continue;
>               }
>               rc = iscsi_xmit_task(conn);
>               if (rc)
>                       goto done;
> +             spin_lock_bh(&conn->taskqueuelock);
>       }
>  
>       /* process pending command queue */
> @@ -1535,19 +1547,24 @@ static int iscsi_data_xmit(struct iscsi_conn *conn)
>               conn->task = list_entry(conn->cmdqueue.next, struct iscsi_task,
>                                       running);
>               list_del_init(&conn->task->running);
> +             spin_unlock_bh(&conn->taskqueuelock);
>               if (conn->session->state == ISCSI_STATE_LOGGING_OUT) {
>                       fail_scsi_task(conn->task, DID_IMM_RETRY);
> +                     spin_lock_bh(&conn->taskqueuelock);
>                       continue;
>               }
>               rc = iscsi_prep_scsi_cmd_pdu(conn->task);
>               if (rc) {
>                       if (rc == -ENOMEM || rc == -EACCES) {
> +                             spin_lock_bh(&conn->taskqueuelock);
>                               list_add_tail(&conn->task->running,
>                                             &conn->cmdqueue);
>                               conn->task = NULL;
> +                             spin_unlock_bh(&conn->taskqueuelock);
>                               goto done;
>                       } else
>                               fail_scsi_task(conn->task, DID_ABORT);
> +                     spin_lock_bh(&conn->taskqueuelock);
>                       continue;
>               }
>               rc = iscsi_xmit_task(conn);
> @@ -1558,6 +1575,7 @@ static int iscsi_data_xmit(struct iscsi_conn *conn)
>                * we need to check the mgmt queue for nops that need to
>                * be sent to aviod starvation
>                */
> +             spin_lock_bh(&conn->taskqueuelock);
>               if (!list_empty(&conn->mgmtqueue))
>                       goto check_mgmt;
>       }
> @@ -1577,12 +1595,15 @@ static int iscsi_data_xmit(struct iscsi_conn *conn)
>               conn->task = task;
>               list_del_init(&conn->task->running);
>               conn->task->state = ISCSI_TASK_RUNNING;
> +             spin_unlock_bh(&conn->taskqueuelock);
>               rc = iscsi_xmit_task(conn);
>               if (rc)
>                       goto done;
> +             spin_lock_bh(&conn->taskqueuelock);
>               if (!list_empty(&conn->mgmtqueue))
>                       goto check_mgmt;
>       }
> +     spin_unlock_bh(&conn->taskqueuelock);
>       spin_unlock_bh(&conn->session->frwd_lock);
>       return -ENODATA;
>  
> @@ -1738,7 +1759,9 @@ int iscsi_queuecommand(struct Scsi_Host *host, struct
> scsi_cmnd *sc)
>                       goto prepd_reject;
>               }
>       } else {
> +             spin_lock_bh(&conn->taskqueuelock);
>               list_add_tail(&task->running, &conn->cmdqueue);
> +             spin_unlock_bh(&conn->taskqueuelock);
>               iscsi_conn_queue_work(conn);
>       }
>  
> @@ -2896,6 +2919,7 @@ iscsi_conn_setup(struct iscsi_cls_session *cls_session,
> int dd_size,
>       INIT_LIST_HEAD(&conn->mgmtqueue);
>       INIT_LIST_HEAD(&conn->cmdqueue);
>       INIT_LIST_HEAD(&conn->requeue);
> +     spin_lock_init(&conn->taskqueuelock);
>       INIT_WORK(&conn->xmitwork, iscsi_xmitworker);
>  
>       /* allocate login_task used for the login/text sequences */
> diff --git a/include/scsi/libiscsi.h b/include/scsi/libiscsi.h
> index b0e275d..583875e 100644
> --- a/include/scsi/libiscsi.h
> +++ b/include/scsi/libiscsi.h
> @@ -196,6 +196,7 @@ struct iscsi_conn {
>       struct iscsi_task       *task;          /* xmit task in progress */
>  
>       /* xmit */
> +     spinlock_t              taskqueuelock;  /* protects the next three 
> lists */
>       struct list_head        mgmtqueue;      /* mgmt (control) xmit queue */
>       struct list_head        cmdqueue;       /* data-path cmd queue */
>       struct list_head        requeue;        /* tasks needing another run */
> --
> 2.9.3
> 
> 

Reply via email to