Because both MSI-X interrupt messages and MSI-X table writes are posted,
it's possible for them to cross while in-flight.  This results in
interrupts being received long after the kernel thinks they're disabled,
and in interrupts being sent to stale vectors after rebalancing.

This patch performs a read flush after writes to the MSI-X table for
enable/disable and rebalancing operations.  Because this is an expensive
operation, we do not perform the read flush after mask/unmask
operations.  Hardware which supports MSI-X typically also supports some
sort of interrupt moderation, so a read-flush is not necessary for
mask/unmask operations.

This patch has been validated with (unreleased) network hardware which
uses MSI-X.

Signed-off-by: Mitch Williams <[EMAIL PROTECTED]>

diff -urpN -X dontdiff linux-2.6.20.3-clean/arch/i386/kernel/io_apic.c 
linux-2.6.20.3/arch/i386/kernel/io_apic.c
--- linux-2.6.20.3-clean/arch/i386/kernel/io_apic.c     2007-02-04 
10:44:54.000000000 -0800
+++ linux-2.6.20.3/arch/i386/kernel/io_apic.c   2007-03-22 10:33:47.000000000 
-0700
@@ -2597,6 +2597,8 @@ static void set_msi_irq_affinity(unsigne
  */
 static struct irq_chip msi_chip = {
        .name           = "PCI-MSI",
+       .enable         = enable_msi_irq,
+       .disable        = disable_msi_irq,
        .unmask         = unmask_msi_irq,
        .mask           = mask_msi_irq,
        .ack            = ack_ioapic_irq,
diff -urpN -X dontdiff linux-2.6.20.3-clean/arch/ia64/kernel/msi_ia64.c 
linux-2.6.20.3/arch/ia64/kernel/msi_ia64.c
--- linux-2.6.20.3-clean/arch/ia64/kernel/msi_ia64.c    2007-02-04 
10:44:54.000000000 -0800
+++ linux-2.6.20.3/arch/ia64/kernel/msi_ia64.c  2007-03-22 10:33:47.000000000 
-0700
@@ -116,6 +116,8 @@ static int ia64_msi_retrigger_irq(unsign
  */
 static struct irq_chip ia64_msi_chip = {
        .name           = "PCI-MSI",
+       .enable         = enable_msi_irq,
+       .disable        = disable_msi_irq,
        .mask           = mask_msi_irq,
        .unmask         = unmask_msi_irq,
        .ack            = ia64_ack_msi_irq,
diff -urpN -X dontdiff linux-2.6.20.3-clean/arch/ia64/sn/kernel/msi_sn.c 
linux-2.6.20.3/arch/ia64/sn/kernel/msi_sn.c
--- linux-2.6.20.3-clean/arch/ia64/sn/kernel/msi_sn.c   2007-02-04 
10:44:54.000000000 -0800
+++ linux-2.6.20.3/arch/ia64/sn/kernel/msi_sn.c 2007-03-22 10:33:47.000000000 
-0700
@@ -216,6 +216,8 @@ static int sn_msi_retrigger_irq(unsigned
 
 static struct irq_chip sn_msi_chip = {
        .name           = "PCI-MSI",
+       .enable         = enable_msi_irq,
+       .disable        = disable_msi_irq,
        .mask           = mask_msi_irq,
        .unmask         = unmask_msi_irq,
        .ack            = sn_ack_msi_irq,
diff -urpN -X dontdiff linux-2.6.20.3-clean/arch/x86_64/kernel/io_apic.c 
linux-2.6.20.3/arch/x86_64/kernel/io_apic.c
--- linux-2.6.20.3-clean/arch/x86_64/kernel/io_apic.c   2007-02-04 
10:44:54.000000000 -0800
+++ linux-2.6.20.3/arch/x86_64/kernel/io_apic.c 2007-03-22 10:36:03.000000000 
-0700
@@ -1923,6 +1923,7 @@ static void set_msi_irq_affinity(unsigne
 
        cpus_and(mask, tmp, CPU_MASK_ALL);
 
+       msix_flush_writes(irq);
        vector = assign_irq_vector(irq, mask, &tmp);
        if (vector < 0)
                return;
@@ -1937,6 +1938,7 @@ static void set_msi_irq_affinity(unsigne
        msg.address_lo |= MSI_ADDR_DEST_ID(dest);
 
        write_msi_msg(irq, &msg);
+       msix_flush_writes(irq);
        set_native_irq_info(irq, mask);
 }
 #endif /* CONFIG_SMP */
@@ -1947,6 +1949,8 @@ static void set_msi_irq_affinity(unsigne
  */
 static struct irq_chip msi_chip = {
        .name           = "PCI-MSI",
+       .enable         = enable_msi_irq,
+       .disable        = disable_msi_irq,
        .unmask         = unmask_msi_irq,
        .mask           = mask_msi_irq,
        .ack            = ack_apic_edge,
diff -urpN -X dontdiff linux-2.6.20.3-clean/drivers/pci/msi.c 
linux-2.6.20.3/drivers/pci/msi.c
--- linux-2.6.20.3-clean/drivers/pci/msi.c      2007-02-04 10:44:54.000000000 
-0800
+++ linux-2.6.20.3/drivers/pci/msi.c    2007-03-22 10:33:47.000000000 -0700
@@ -40,6 +40,29 @@ static int msi_cache_init(void)
        return 0;
 }
 
+void msix_flush_writes(unsigned int irq)
+{
+       struct msi_desc *entry;
+
+       entry = msi_desc[irq];
+       BUG_ON(!entry || !entry->dev);
+       switch (entry->msi_attrib.type) {
+       case PCI_CAP_ID_MSI:
+               /* nothing to do */
+               break;
+       case PCI_CAP_ID_MSIX:
+       {
+               int offset = entry->msi_attrib.entry_nr * PCI_MSIX_ENTRY_SIZE +
+                       PCI_MSIX_ENTRY_VECTOR_CTRL_OFFSET;
+               readl(entry->mask_base + offset);
+               break;
+       }
+       default:
+               BUG();
+               break;
+       }
+}
+
 static void msi_set_mask_bit(unsigned int irq, int flag)
 {
        struct msi_desc *entry;
@@ -161,6 +184,17 @@ void unmask_msi_irq(unsigned int irq)
        msi_set_mask_bit(irq, 0);
 }
 
+void disable_msi_irq(unsigned int irq)
+{
+       msi_set_mask_bit(irq, 1);
+       msix_flush_writes(irq);
+}
+
+void enable_msi_irq(unsigned int irq)
+{
+       msi_set_mask_bit(irq, 0);
+       msix_flush_writes(irq);
+}
 static int msi_free_irq(struct pci_dev* dev, int irq);
 static int msi_init(void)
 {
diff -urpN -X dontdiff linux-2.6.20.3-clean/include/linux/msi.h 
linux-2.6.20.3/include/linux/msi.h
--- linux-2.6.20.3-clean/include/linux/msi.h    2007-02-04 10:44:54.000000000 
-0800
+++ linux-2.6.20.3/include/linux/msi.h  2007-03-22 10:33:47.000000000 -0700
@@ -10,9 +10,12 @@ struct msi_msg {
 /* Heper functions */
 extern void mask_msi_irq(unsigned int irq);
 extern void unmask_msi_irq(unsigned int irq);
+extern void disable_msi_irq(unsigned int irq);
+extern void enable_msi_irq(unsigned int irq);
 extern void read_msi_msg(unsigned int irq, struct msi_msg *msg);
 
 extern void write_msi_msg(unsigned int irq, struct msi_msg *msg);
+extern void msix_flush_writes(unsigned int irq);
 
 struct msi_desc {
        struct {
-
To unsubscribe from this list: send the line "unsubscribe linux-kernel" in
the body of a message to [EMAIL PROTECTED]
More majordomo info at  http://vger.kernel.org/majordomo-info.html
Please read the FAQ at  http://www.tux.org/lkml/

Reply via email to