Various PCI bus errors can be signaled by newer PCI controllers.
This patch adds the PCI error recovery callbacks to the Symbios
SCSI device driver. The patch has been tested, and appears to
work well.
Signed-off-by: Linas Vepstas [EMAIL PROTECTED]
Hi,
This patch has been bouncing around for a long time, and has made
appearences in various -mm trees since 2.6.something-teen. However,
it has never made it into mainline, and I'm starting to get concerned
that it will miss 2.6.23 as well.
There was some discussion, and I think I addressed all of the various
issues that came up. I'd really like to get this patch in, but am unclear
on exactly who to pester at this point. Matt Wilcox seems to be looking
for a job (???) and I am unable to git-clone James Bottmley's
git://kernel.org:/pub/scm/linux/kernel/git/jejb/scsi-misc-2.6.git
git tree; there's some error on the server side.
Linas.
drivers/scsi/sym53c8xx_2/sym_glue.c | 136
drivers/scsi/sym53c8xx_2/sym_glue.h |4 +
drivers/scsi/sym53c8xx_2/sym_hipd.c |6 +
3 files changed, 146 insertions(+)
Index: linux-2.6.22-rc1/drivers/scsi/sym53c8xx_2/sym_glue.c
===
--- linux-2.6.22-rc1.orig/drivers/scsi/sym53c8xx_2/sym_glue.c 2007-04-25
22:08:32.0 -0500
+++ linux-2.6.22-rc1/drivers/scsi/sym53c8xx_2/sym_glue.c2007-05-14
17:31:44.0 -0500
@@ -657,6 +657,10 @@ static irqreturn_t sym53c8xx_intr(int ir
unsigned long flags;
struct sym_hcb *np = (struct sym_hcb *)dev_id;
+ /* Avoid spinloop trying to handle interrupts on frozen device */
+ if (pci_channel_offline(np-s.device))
+ return IRQ_HANDLED;
+
if (DEBUG_FLAGS DEBUG_TINY) printf_debug ([);
spin_lock_irqsave(np-s.host-host_lock, flags);
@@ -726,6 +730,20 @@ static int sym_eh_handler(int op, char *
dev_warn(cmd-device-sdev_gendev, %s operation started.\n, opname);
+ /* We may be in an error condition because the PCI bus
+* went down. In this case, we need to wait until the
+* PCI bus is reset, the card is reset, and only then
+* proceed with the scsi error recovery. There's no
+* point in hurrying; take a leisurely wait.
+*/
+#define WAIT_FOR_PCI_RECOVERY 35
+ if (pci_channel_offline(np-s.device)) {
+ int finished_reset = wait_for_completion_timeout(
+ np-s.io_reset_wait, WAIT_FOR_PCI_RECOVERY*HZ);
+ if (!finished_reset)
+ return SCSI_FAILED;
+ }
+
spin_lock_irq(host-host_lock);
/* This one is queued in some place - to wait for completion */
FOR_EACH_QUEUED_ELEMENT(np-busy_ccbq, qp) {
@@ -1510,6 +1528,7 @@ static struct Scsi_Host * __devinit sym_
np-maxoffs = dev-chip.offset_max;
np-maxburst= dev-chip.burst_max;
np-myaddr = dev-host_id;
+ init_completion(np-s.io_reset_wait);
/*
* Edit its name.
@@ -1948,6 +1967,116 @@ static void __devexit sym2_remove(struct
attach_count--;
}
+/**
+ * sym2_io_error_detected() -- called when PCI error is detected
+ * @pdev: pointer to PCI device
+ * @state: current state of the PCI slot
+ */
+static pci_ers_result_t sym2_io_error_detected(struct pci_dev *pdev,
+ enum pci_channel_state state)
+{
+ struct sym_hcb *np = pci_get_drvdata(pdev);
+
+ /* If slot is permanently frozen, turn everything off */
+ if (state == pci_channel_io_perm_failure) {
+ sym2_remove(pdev);
+ return PCI_ERS_RESULT_DISCONNECT;
+ }
+
+ init_completion(np-s.io_reset_wait);
+ disable_irq(pdev-irq);
+ pci_disable_device(pdev);
+
+ /* Request a slot reset. */
+ return PCI_ERS_RESULT_NEED_RESET;
+}
+
+/**
+ * sym2_reset_workarounds -- hardware-specific work-arounds
+ *
+ * This routine is similar to sym_set_workarounds(), except
+ * that, at this point, we already know that the device was
+ * succesfully intialized at least once before, and so most
+ * of the steps taken there are un-needed here.
+ */
+static void sym2_reset_workarounds(struct pci_dev *pdev)
+{
+ u_char revision;
+ u_short status_reg;
+ struct sym_chip *chip;
+
+ pci_read_config_byte(pdev, PCI_CLASS_REVISION, revision);
+ chip = sym_lookup_chip_table(pdev-device, revision);
+
+ /* Work around for errant bit in 895A, in a fashion
+* similar to what is done in sym_set_workarounds().
+*/
+ pci_read_config_word(pdev, PCI_STATUS, status_reg);
+ if (!(chip-features FE_66MHZ) (status_reg PCI_STATUS_66MHZ)) {
+ status_reg = PCI_STATUS_66MHZ;
+ pci_write_config_word(pdev, PCI_STATUS, status_reg);
+ pci_read_config_word(pdev, PCI_STATUS, status_reg);
+ }
+}
+
+/**
+ *