From: Linas Vepstas <[EMAIL PROTECTED]>
This patch adds the PCI error recovery callbacks to the Symbios SCSI device
driver. It includes support for First Failure Data Capture.
Signed-off-by: Linas Vepstas <[EMAIL PROTECTED]>
Assorted changes to initial patches, including returning IRQ_NONE from the
interrupt handler if the device is offline and re-using the eh_done completion
in the scsi error handler.
Signed-off-by: Matthew Wilcox <[EMAIL PROTECTED]>
---
drivers/scsi/sym53c8xx_2/sym_glue.c | 179 ++-
drivers/scsi/sym53c8xx_2/sym_glue.h |3 +
drivers/scsi/sym53c8xx_2/sym_hipd.c | 25 -
3 files changed, 200 insertions(+), 7 deletions(-)
diff --git a/drivers/scsi/sym53c8xx_2/sym_glue.c
b/drivers/scsi/sym53c8xx_2/sym_glue.c
index 6bc8789..fec9c9c 100644
--- a/drivers/scsi/sym53c8xx_2/sym_glue.c
+++ b/drivers/scsi/sym53c8xx_2/sym_glue.c
@@ -134,7 +134,7 @@ static struct scsi_transport_template
*sym2_transport_template = NULL;
* Driver private area in the SCSI command structure.
*/
struct sym_ucmd { /* Override the SCSI pointer structure */
- struct completion *eh_done; /* For error handling */
+ struct completion *eh_done; /* SCSI error handling */
};
#define SYM_UCMD_PTR(cmd) ((struct sym_ucmd *)(&(cmd)->SCp))
@@ -556,6 +556,10 @@ static irqreturn_t sym53c8xx_intr(int irq, void *dev_id)
{
struct sym_hcb *np = dev_id;
+ /* Avoid spinloop trying to handle interrupts on frozen device */
+ if (pci_channel_offline(np->s.device))
+ return IRQ_NONE;
+
if (DEBUG_FLAGS & DEBUG_TINY) printf_debug ("[");
spin_lock(np->s.host->host_lock);
@@ -598,6 +602,7 @@ static int sym_eh_handler(int op, char *opname, struct
scsi_cmnd *cmd)
struct sym_hcb *np = SYM_SOFTC_PTR(cmd);
struct sym_ucmd *ucmd = SYM_UCMD_PTR(cmd);
struct Scsi_Host *host = cmd->device->host;
+ struct pci_dev *pdev = np->s.device;
SYM_QUEHEAD *qp;
int cmd_queued = 0;
int sts = -1;
@@ -605,6 +610,38 @@ static int sym_eh_handler(int op, char *opname, struct
scsi_cmnd *cmd)
dev_warn(&cmd->device->sdev_gendev, "%s operation started.\n", opname);
+ /* We may be in an error condition because the PCI bus
+* went down. In this case, we need to wait until the
+* PCI bus is reset, the card is reset, and only then
+* proceed with the scsi error recovery. There's no
+* point in hurrying; take a leisurely wait.
+*/
+#define WAIT_FOR_PCI_RECOVERY 35
+ if (pci_channel_offline(pdev)) {
+ struct host_data *hostdata = shost_priv(host);
+ struct completion *io_reset;
+ int finished_reset = 0;
+ init_completion(&eh_done);
+ spin_lock_irq(host->host_lock);
+ /* Make sure we didn't race */
+ if (pci_channel_offline(pdev)) {
+ if (!hostdata->io_reset)
+ hostdata->io_reset = &eh_done;
+ io_reset = hostdata->io_reset;
+ } else {
+ io_reset = NULL;
+ }
+
+ if (!pci_channel_offline(pdev))
+ finished_reset = 1;
+ spin_unlock_irq(host->host_lock);
+ if (!finished_reset)
+ finished_reset = wait_for_completion_timeout(io_reset,
+ WAIT_FOR_PCI_RECOVERY*HZ);
+ if (!finished_reset)
+ return SCSI_FAILED;
+ }
+
spin_lock_irq(host->host_lock);
/* This one is queued in some place -> to wait for completion */
FOR_EACH_QUEUED_ELEMENT(&np->busy_ccbq, qp) {
@@ -630,7 +667,7 @@ static int sym_eh_handler(int op, char *opname, struct
scsi_cmnd *cmd)
break;
case SYM_EH_HOST_RESET:
sym_reset_scsi_bus(np, 0);
- sym_start_up (np, 1);
+ sym_start_up(np, 1);
sts = 0;
break;
default:
@@ -1435,7 +1472,7 @@ static struct Scsi_Host * __devinit sym_attach(struct
scsi_host_template *tpnt,
/*
* Start the SCRIPTS.
*/
- sym_start_up (np, 1);
+ sym_start_up(np, 1);
/*
* Start the timer daemon
@@ -1822,6 +1859,134 @@ static void __devexit sym2_remove(struct pci_dev *pdev)
attach_count--;
}
+/**
+ * sym2_io_error_detected() - called when PCI error is detected
+ * @pdev: pointer to PCI device
+ * @state: current state of the PCI slot
+ */
+static pci_ers_result_t sym2_io_error_detected(struct pci_dev *pdev,
+ enum pci_channel_state state)
+{
+ /* If slot is permanently frozen, turn everything off */
+ if (state == pci_channel_io_perm_failure) {
+ sym2_remove(pdev);
+ retur