Because both MSI-X interrupt messages and MSI-X table writes are posted, it's possible for them to cross while in-flight. This results in interrupts being received long after the kernel thinks they're disabled, and in interrupts being sent to stale vectors after rebalancing.
This patch performs a read flush after writes to the MSI-X table for enable/disable and rebalancing operations. Because this is an expensive operation, we do not perform the read flush after mask/unmask operations. Hardware which supports MSI-X typically also supports some sort of interrupt moderation, so a read-flush is not necessary for mask/unmask operations. This patch has been validated with (unreleased) network hardware which uses MSI-X. Signed-off-by: Mitch Williams <[EMAIL PROTECTED]> diff -urpN -X dontdiff linux-2.6.20.3-clean/arch/i386/kernel/io_apic.c linux-2.6.20.3/arch/i386/kernel/io_apic.c --- linux-2.6.20.3-clean/arch/i386/kernel/io_apic.c 2007-02-04 10:44:54.000000000 -0800 +++ linux-2.6.20.3/arch/i386/kernel/io_apic.c 2007-03-22 10:33:47.000000000 -0700 @@ -2597,6 +2597,8 @@ static void set_msi_irq_affinity(unsigne */ static struct irq_chip msi_chip = { .name = "PCI-MSI", + .enable = enable_msi_irq, + .disable = disable_msi_irq, .unmask = unmask_msi_irq, .mask = mask_msi_irq, .ack = ack_ioapic_irq, diff -urpN -X dontdiff linux-2.6.20.3-clean/arch/ia64/kernel/msi_ia64.c linux-2.6.20.3/arch/ia64/kernel/msi_ia64.c --- linux-2.6.20.3-clean/arch/ia64/kernel/msi_ia64.c 2007-02-04 10:44:54.000000000 -0800 +++ linux-2.6.20.3/arch/ia64/kernel/msi_ia64.c 2007-03-22 10:33:47.000000000 -0700 @@ -116,6 +116,8 @@ static int ia64_msi_retrigger_irq(unsign */ static struct irq_chip ia64_msi_chip = { .name = "PCI-MSI", + .enable = enable_msi_irq, + .disable = disable_msi_irq, .mask = mask_msi_irq, .unmask = unmask_msi_irq, .ack = ia64_ack_msi_irq, diff -urpN -X dontdiff linux-2.6.20.3-clean/arch/ia64/sn/kernel/msi_sn.c linux-2.6.20.3/arch/ia64/sn/kernel/msi_sn.c --- linux-2.6.20.3-clean/arch/ia64/sn/kernel/msi_sn.c 2007-02-04 10:44:54.000000000 -0800 +++ linux-2.6.20.3/arch/ia64/sn/kernel/msi_sn.c 2007-03-22 10:33:47.000000000 -0700 @@ -216,6 +216,8 @@ static int sn_msi_retrigger_irq(unsigned static struct irq_chip sn_msi_chip = { .name = "PCI-MSI", + .enable = enable_msi_irq, + .disable = disable_msi_irq, .mask = mask_msi_irq, .unmask = unmask_msi_irq, .ack = sn_ack_msi_irq, diff -urpN -X dontdiff linux-2.6.20.3-clean/arch/x86_64/kernel/io_apic.c linux-2.6.20.3/arch/x86_64/kernel/io_apic.c --- linux-2.6.20.3-clean/arch/x86_64/kernel/io_apic.c 2007-02-04 10:44:54.000000000 -0800 +++ linux-2.6.20.3/arch/x86_64/kernel/io_apic.c 2007-03-22 10:36:03.000000000 -0700 @@ -1923,6 +1923,7 @@ static void set_msi_irq_affinity(unsigne cpus_and(mask, tmp, CPU_MASK_ALL); + msix_flush_writes(irq); vector = assign_irq_vector(irq, mask, &tmp); if (vector < 0) return; @@ -1937,6 +1938,7 @@ static void set_msi_irq_affinity(unsigne msg.address_lo |= MSI_ADDR_DEST_ID(dest); write_msi_msg(irq, &msg); + msix_flush_writes(irq); set_native_irq_info(irq, mask); } #endif /* CONFIG_SMP */ @@ -1947,6 +1949,8 @@ static void set_msi_irq_affinity(unsigne */ static struct irq_chip msi_chip = { .name = "PCI-MSI", + .enable = enable_msi_irq, + .disable = disable_msi_irq, .unmask = unmask_msi_irq, .mask = mask_msi_irq, .ack = ack_apic_edge, diff -urpN -X dontdiff linux-2.6.20.3-clean/drivers/pci/msi.c linux-2.6.20.3/drivers/pci/msi.c --- linux-2.6.20.3-clean/drivers/pci/msi.c 2007-02-04 10:44:54.000000000 -0800 +++ linux-2.6.20.3/drivers/pci/msi.c 2007-03-22 10:33:47.000000000 -0700 @@ -40,6 +40,29 @@ static int msi_cache_init(void) return 0; } +void msix_flush_writes(unsigned int irq) +{ + struct msi_desc *entry; + + entry = msi_desc[irq]; + BUG_ON(!entry || !entry->dev); + switch (entry->msi_attrib.type) { + case PCI_CAP_ID_MSI: + /* nothing to do */ + break; + case PCI_CAP_ID_MSIX: + { + int offset = entry->msi_attrib.entry_nr * PCI_MSIX_ENTRY_SIZE + + PCI_MSIX_ENTRY_VECTOR_CTRL_OFFSET; + readl(entry->mask_base + offset); + break; + } + default: + BUG(); + break; + } +} + static void msi_set_mask_bit(unsigned int irq, int flag) { struct msi_desc *entry; @@ -161,6 +184,17 @@ void unmask_msi_irq(unsigned int irq) msi_set_mask_bit(irq, 0); } +void disable_msi_irq(unsigned int irq) +{ + msi_set_mask_bit(irq, 1); + msix_flush_writes(irq); +} + +void enable_msi_irq(unsigned int irq) +{ + msi_set_mask_bit(irq, 0); + msix_flush_writes(irq); +} static int msi_free_irq(struct pci_dev* dev, int irq); static int msi_init(void) { diff -urpN -X dontdiff linux-2.6.20.3-clean/include/linux/msi.h linux-2.6.20.3/include/linux/msi.h --- linux-2.6.20.3-clean/include/linux/msi.h 2007-02-04 10:44:54.000000000 -0800 +++ linux-2.6.20.3/include/linux/msi.h 2007-03-22 10:33:47.000000000 -0700 @@ -10,9 +10,12 @@ struct msi_msg { /* Heper functions */ extern void mask_msi_irq(unsigned int irq); extern void unmask_msi_irq(unsigned int irq); +extern void disable_msi_irq(unsigned int irq); +extern void enable_msi_irq(unsigned int irq); extern void read_msi_msg(unsigned int irq, struct msi_msg *msg); extern void write_msi_msg(unsigned int irq, struct msi_msg *msg); +extern void msix_flush_writes(unsigned int irq); struct msi_desc { struct { - To unsubscribe from this list: send the line "unsubscribe linux-kernel" in the body of a message to [EMAIL PROTECTED] More majordomo info at http://vger.kernel.org/majordomo-info.html Please read the FAQ at http://www.tux.org/lkml/