ohci1394 has basically no protection against concurrent access to
registers.  I suspect this as a reason for bug "Host disappears on 1394
bus reset". http://bugzilla.kernel.org/show_bug.cgi?id=7569

The following changes are done here:
  - Use the ohci->event_lock to serialize the trigger for software-
    enforced bus resets against a large portion of the IRQ handler.
  - To accomplish this, cover much more regions of ohci_irq_handler
    by event_lock.  Needless to say, this has to be tested for a lot
    of scenarios.
  - Use spin_lock instead of spin_lock_irqsave in ohci_irq_handler.

It is somehow disturbing to see how the event_lock was originally taken
and released without discernable reason.

Todo:
  - Test.
  - Audit *all* of ohci1394's register accesses for the need of
    serialization by spinlocks + mmiowb.
  - Look into unifying ohci1394's internal locking to a single lock
    per host.
  - Audit the selfID complete event handling, including what happens
    higher up in ieee1394 core, for the possibility to move workload
    into tasklets or workqueue jobs.

Signed-off-by: Stefan Richter <[EMAIL PROTECTED]>
---

Robert, does this do anything for your setup?  So far I only did a quick
test with a UP PC (kernel configured for SMP PREEMPT) and a FireWire HDD
and at least it didn't lock up...


 drivers/ieee1394/ohci1394.c |   51 ++++++++++++++++--------------------
 1 file changed, 23 insertions(+), 28 deletions(-)


Index: linux-2.6.20-rc6/drivers/ieee1394/ohci1394.c
===================================================================
--- linux-2.6.20-rc6.orig/drivers/ieee1394/ohci1394.c   2007-01-26 
16:59:45.000000000 +0100
+++ linux-2.6.20-rc6/drivers/ieee1394/ohci1394.c        2007-01-26 
18:21:01.000000000 +0100
@@ -943,6 +943,7 @@ static int ohci_devctl(struct hpsb_host 
 
        switch (cmd) {
        case RESET_BUS:
+               spin_lock_irqsave(&ohci->event_lock, flags);
                switch (arg) {
                case SHORT_RESET:
                        phy_reg = get_phy_reg(ohci, 5);
@@ -990,6 +991,8 @@ static int ohci_devctl(struct hpsb_host 
                default:
                        retval = -1;
                }
+               mmiowb();
+               spin_unlock_irqrestore(&ohci->event_lock, flags);
                break;
 
        case GET_CYCLE_COUNTER:
@@ -2304,27 +2307,21 @@ static irqreturn_t ohci_irq_handler(int 
        quadlet_t event, node_id;
        struct ti_ohci *ohci = (struct ti_ohci *)dev_id;
        struct hpsb_host *host = ohci->host;
-       int phyid = -1, isroot = 0;
-       unsigned long flags;
+       int phyid = -1, isroot = 0, call_selfid_complete = 0;
 
-       /* Read and clear the interrupt event register.  Don't clear
-        * the busReset event, though. This is done when we get the
-        * selfIDComplete interrupt. */
-       spin_lock_irqsave(&ohci->event_lock, flags);
-       event = reg_read(ohci, OHCI1394_IntEventClear);
-       reg_write(ohci, OHCI1394_IntEventClear, event & ~OHCI1394_busReset);
-       spin_unlock_irqrestore(&ohci->event_lock, flags);
-
-       if (!event)
-               return IRQ_NONE;
+       spin_lock(&ohci->event_lock);
 
-       /* If event is ~(u32)0 cardbus card was ejected.  In this case
-        * we just return, and clean up in the ohci1394_pci_remove
-        * function. */
-       if (event == ~(u32) 0) {
-               DBGMSG("Device removed.");
+       /* Read and clear the interrupt event register.  Don't clear the
+        * busReset event though.  This is done when we get the selfIDComplete
+        * interrupt.
+        * If event is ~0, the CardBus card was ejected.  In this case we just
+        * return and clean up in ohci1394_pci_remove. */
+       event = reg_read(ohci, OHCI1394_IntEventClear);
+       if (!event || !~event) {
+               spin_unlock(&ohci->event_lock);
                return IRQ_NONE;
        }
+       reg_write(ohci, OHCI1394_IntEventClear, event & ~OHCI1394_busReset);
 
        DBGMSG("IntEvent: %08x", event);
 
@@ -2399,20 +2396,17 @@ static irqreturn_t ohci_irq_handler(int 
                /* The busReset event bit can't be cleared during the
                 * selfID phase, so we disable busReset interrupts, to
                 * avoid burying the cpu in interrupt requests. */
-               spin_lock_irqsave(&ohci->event_lock, flags);
                reg_write(ohci, OHCI1394_IntMaskClear, OHCI1394_busReset);
 
                if (ohci->check_busreset) {
                        int loop_count = 0;
 
+                       /* Remember kids: Don't do this at home. */
+                       spin_unlock(&ohci->event_lock);
                        udelay(10);
-
                        while (reg_read(ohci, OHCI1394_IntEventSet) & 
OHCI1394_busReset) {
                                reg_write(ohci, OHCI1394_IntEventClear, 
OHCI1394_busReset);
-
-                               spin_unlock_irqrestore(&ohci->event_lock, 
flags);
                                udelay(10);
-                               spin_lock_irqsave(&ohci->event_lock, flags);
 
                                /* The loop counter check is to prevent the 
driver
                                 * from remaining in this state forever. For the
@@ -2428,8 +2422,8 @@ static irqreturn_t ohci_irq_handler(int 
 
                                loop_count++;
                        }
+                       spin_lock(&ohci->event_lock);
                }
-               spin_unlock_irqrestore(&ohci->event_lock, flags);
                if (!host->in_bus_reset) {
                        DBGMSG("irq_handler: Bus reset requested");
 
@@ -2520,10 +2514,8 @@ static irqreturn_t ohci_irq_handler(int 
 
                        /* Clear the bus reset event and re-enable the
                         * busReset interrupt.  */
-                       spin_lock_irqsave(&ohci->event_lock, flags);
                        reg_write(ohci, OHCI1394_IntEventClear, 
OHCI1394_busReset);
                        reg_write(ohci, OHCI1394_IntMaskSet, OHCI1394_busReset);
-                       spin_unlock_irqrestore(&ohci->event_lock, flags);
 
                        /* Turn on phys dma reception.
                         *
@@ -2540,7 +2532,7 @@ static irqreturn_t ohci_irq_handler(int 
                               reg_read(ohci, OHCI1394_PhyReqFilterHiSet),
                               reg_read(ohci, OHCI1394_PhyReqFilterLoSet));
 
-                       hpsb_selfid_complete(host, phyid, isroot);
+                       call_selfid_complete = 1;
                } else
                        PRINT(KERN_ERR,
                              "SelfID received outside of bus reset sequence");
@@ -2552,9 +2544,12 @@ selfid_not_valid:
        /* Make sure we handle everything, just in case we accidentally
         * enabled an interrupt that we didn't write a handler for.  */
        if (event)
-               PRINT(KERN_ERR, "Unhandled interrupt(s) 0x%08x",
-                     event);
+               PRINT(KERN_ERR, "Unhandled interrupt(s) 0x%08x", event);
 
+       mmiowb();
+       spin_unlock(&ohci->event_lock);
+       if (call_selfid_complete)
+               hpsb_selfid_complete(host, phyid, isroot);
        return IRQ_HANDLED;
 }
 

-- 
Stefan Richter
-=====-=-=== ---= ==-=-
http://arcgraph.de/sr/

-
To unsubscribe from this list: send the line "unsubscribe linux-kernel" in
the body of a message to [EMAIL PROTECTED]
More majordomo info at  http://vger.kernel.org/majordomo-info.html
Please read the FAQ at  http://www.tux.org/lkml/

Reply via email to