On 11/11/2022 09:44, Jan Beulich wrote:
The idea of the WARN() / BUG_ON() is to
- not leave failed unmasking unrecorded,
- not continue after failure to mask an entry.
Then lets make msi_set_mask_bit() unable to fail with something like
this (untested) patch. Would this be acceptable?
David
From 837649a70d44455f4fd98e2eaa46dcf35a56d00a Mon Sep 17 00:00:00 2001
From: David Vrabel <dvra...@amazon.co.uk>
Date: Fri, 11 Nov 2022 14:30:16 +0000
Subject: [PATCH] x86: Always enable memory space decodes when using MSI-X
Instead of the numerous (racy) checks for memory space accesses being
enabled before writing the the MSI-X table, force Memory Space Enable
to be set in the Command register if MSI-X is used.
This allows the memory_decoded() function and the associated error
paths to be removed (since it will always return true). In particular,
msi_set_mask_bit() can no longer fail and its return value is removed.
Note that if the PCI device is a virtual function, the relevant
command register is in the corresponding physical function.
Signed-off-by: David Vrabel <dvra...@amazon.co.uk>
---
xen/arch/x86/include/asm/pci.h | 3 +
xen/arch/x86/msi.c | 116 +++++++++------------------------
xen/arch/x86/pci.c | 39 ++++++++++-
3 files changed, 71 insertions(+), 87 deletions(-)
diff --git a/xen/arch/x86/include/asm/pci.h b/xen/arch/x86/include/asm/pci.h
index f4a58c8acf..4f59b70959 100644
--- a/xen/arch/x86/include/asm/pci.h
+++ b/xen/arch/x86/include/asm/pci.h
@@ -32,8 +32,11 @@ struct arch_pci_dev {
domid_t pseudo_domid;
mfn_t leaf_mfn;
struct page_list_head pgtables_list;
+ uint16_t host_command;
+ uint16_t guest_command;
};
+void pci_command_override(struct pci_dev *pdev, uint16_t val);
int pci_conf_write_intercept(unsigned int seg, unsigned int bdf,
unsigned int reg, unsigned int size,
uint32_t *data);
diff --git a/xen/arch/x86/msi.c b/xen/arch/x86/msi.c
index d0bf63df1d..2f8667aa7b 100644
--- a/xen/arch/x86/msi.c
+++ b/xen/arch/x86/msi.c
@@ -124,27 +124,11 @@ static void msix_put_fixmap(struct arch_msix
*msix, int idx)
spin_unlock(&msix->table_lock);
}
-static bool memory_decoded(const struct pci_dev *dev)
-{
- pci_sbdf_t sbdf = dev->sbdf;
-
- if ( dev->info.is_virtfn )
- {
- sbdf.bus = dev->info.physfn.bus;
- sbdf.devfn = dev->info.physfn.devfn;
- }
-
- return pci_conf_read16(sbdf, PCI_COMMAND) & PCI_COMMAND_MEMORY;
-}
-
static bool msix_memory_decoded(const struct pci_dev *dev, unsigned
int pos)
{
uint16_t control = pci_conf_read16(dev->sbdf, msix_control_reg(pos));
- if ( !(control & PCI_MSIX_FLAGS_ENABLE) )
- return false;
-
- return memory_decoded(dev);
+ return control & PCI_MSIX_FLAGS_ENABLE;
}
/*
@@ -314,7 +298,7 @@ int msi_maskable_irq(const struct msi_desc *entry)
|| entry->msi_attrib.maskbit;
}
-static bool msi_set_mask_bit(struct irq_desc *desc, bool host, bool guest)
+static void msi_set_mask_bit(struct irq_desc *desc, bool host, bool guest)
{
struct msi_desc *entry = desc->msi_desc;
struct pci_dev *pdev;
@@ -354,45 +338,26 @@ static bool msi_set_mask_bit(struct irq_desc
*desc, bool host, bool guest)
control | (PCI_MSIX_FLAGS_ENABLE |
PCI_MSIX_FLAGS_MASKALL));
}
- if ( likely(memory_decoded(pdev)) )
- {
- writel(flag, entry->mask_base +
PCI_MSIX_ENTRY_VECTOR_CTRL_OFFSET);
- readl(entry->mask_base + PCI_MSIX_ENTRY_VECTOR_CTRL_OFFSET);
- if ( likely(control & PCI_MSIX_FLAGS_ENABLE) )
- break;
-
- entry->msi_attrib.host_masked = host;
- entry->msi_attrib.guest_masked = guest;
+ writel(flag, entry->mask_base + PCI_MSIX_ENTRY_VECTOR_CTRL_OFFSET);
+ readl(entry->mask_base + PCI_MSIX_ENTRY_VECTOR_CTRL_OFFSET);
- flag = true;
- }
- else if ( flag && !(control & PCI_MSIX_FLAGS_MASKALL) )
+ if ( unlikely(!(control & PCI_MSIX_FLAGS_ENABLE)) )
{
- domid_t domid = pdev->domain->domain_id;
-
- maskall = true;
- if ( pdev->msix->warned != domid )
- {
- pdev->msix->warned = domid;
- printk(XENLOG_G_WARNING
- "cannot mask IRQ %d: masking MSI-X on Dom%d's
%pp\n",
- desc->irq, domid, &pdev->sbdf);
- }
+ pdev->msix->host_maskall = maskall;
+ if ( maskall || pdev->msix->guest_maskall )
+ control |= PCI_MSIX_FLAGS_MASKALL;
+ pci_conf_write16(pdev->sbdf,
+ msix_control_reg(entry->msi_attrib.pos),
control);
}
- pdev->msix->host_maskall = maskall;
- if ( maskall || pdev->msix->guest_maskall )
- control |= PCI_MSIX_FLAGS_MASKALL;
- pci_conf_write16(pdev->sbdf,
- msix_control_reg(entry->msi_attrib.pos), control);
- return flag;
+ break;
default:
- return 0;
+ ASSERT_UNREACHABLE();
+ break;
}
+
entry->msi_attrib.host_masked = host;
entry->msi_attrib.guest_masked = guest;
-
- return 1;
}
static int msi_get_mask_bit(const struct msi_desc *entry)
@@ -418,16 +383,12 @@ static int msi_get_mask_bit(const struct msi_desc
*entry)
void cf_check mask_msi_irq(struct irq_desc *desc)
{
- if ( unlikely(!msi_set_mask_bit(desc, 1,
-
desc->msi_desc->msi_attrib.guest_masked)) )
- BUG_ON(!(desc->status & IRQ_DISABLED));
+ msi_set_mask_bit(desc, 1, desc->msi_desc->msi_attrib.guest_masked);
}
void cf_check unmask_msi_irq(struct irq_desc *desc)
{
- if ( unlikely(!msi_set_mask_bit(desc, 0,
-
desc->msi_desc->msi_attrib.guest_masked)) )
- WARN();
+ msi_set_mask_bit(desc, 0, desc->msi_desc->msi_attrib.guest_masked);
}
void guest_mask_msi_irq(struct irq_desc *desc, bool mask)
@@ -437,15 +398,13 @@ void guest_mask_msi_irq(struct irq_desc *desc,
bool mask)
static unsigned int cf_check startup_msi_irq(struct irq_desc *desc)
{
- if ( unlikely(!msi_set_mask_bit(desc, 0, !!(desc->status &
IRQ_GUEST))) )
- WARN();
+ msi_set_mask_bit(desc, 0, !!(desc->status & IRQ_GUEST));
return 0;
}
static void cf_check shutdown_msi_irq(struct irq_desc *desc)
{
- if ( unlikely(!msi_set_mask_bit(desc, 1, 1)) )
- BUG_ON(!(desc->status & IRQ_DISABLED));
+ msi_set_mask_bit(desc, 1, 1);
}
void cf_check ack_nonmaskable_msi_irq(struct irq_desc *desc)
@@ -785,6 +744,12 @@ static int msix_capability_init(struct pci_dev *dev,
ASSERT(pcidevs_locked());
+ /*
+ * Force enable access to the MSI-X tables, so access to the
+ * per-vector mask bits always works.
+ */
+ pci_command_override(dev, PCI_COMMAND_MEMORY);
+
control = pci_conf_read16(dev->sbdf, msix_control_reg(pos));
/*
* Ensure MSI-X interrupts are masked during setup. Some devices
require
@@ -797,13 +762,6 @@ static int msix_capability_init(struct pci_dev *dev,
control | (PCI_MSIX_FLAGS_ENABLE |
PCI_MSIX_FLAGS_MASKALL));
- if ( unlikely(!memory_decoded(dev)) )
- {
- pci_conf_write16(dev->sbdf, msix_control_reg(pos),
- control & ~PCI_MSIX_FLAGS_ENABLE);
- return -ENXIO;
- }
-
if ( desc )
{
entry = alloc_msi_entry(1);
@@ -1122,19 +1080,15 @@ static void __pci_disable_msix(struct msi_desc
*entry)
BUG_ON(list_empty(&dev->msi_list));
- if ( likely(memory_decoded(dev)) )
- writel(1, entry->mask_base + PCI_MSIX_ENTRY_VECTOR_CTRL_OFFSET);
- else if ( !(control & PCI_MSIX_FLAGS_MASKALL) )
- {
- printk(XENLOG_WARNING "cannot disable IRQ %d: masking MSI-X on
%pp\n",
- entry->irq, &dev->sbdf);
- maskall = true;
- }
+ writel(1, entry->mask_base + PCI_MSIX_ENTRY_VECTOR_CTRL_OFFSET);
+
dev->msix->host_maskall = maskall;
if ( maskall || dev->msix->guest_maskall )
control |= PCI_MSIX_FLAGS_MASKALL;
pci_conf_write16(dev->sbdf, msix_control_reg(pos), control);
+ pci_command_override(dev, 0);
+
_pci_cleanup_msix(dev->msix);
}
@@ -1353,13 +1307,6 @@ int pci_restore_msi_state(struct pci_dev *pdev)
pci_conf_write16(pdev->sbdf, msix_control_reg(pos),
control | (PCI_MSIX_FLAGS_ENABLE |
PCI_MSIX_FLAGS_MASKALL));
- if ( unlikely(!memory_decoded(pdev)) )
- {
- spin_unlock_irqrestore(&desc->lock, flags);
- pci_conf_write16(pdev->sbdf, msix_control_reg(pos),
- control & ~PCI_MSIX_FLAGS_ENABLE);
- return -ENXIO;
- }
}
type = entry->msi_attrib.type;
@@ -1368,10 +1315,9 @@ int pci_restore_msi_state(struct pci_dev *pdev)
for ( i = 0; ; )
{
- if ( unlikely(!msi_set_mask_bit(desc,
-
entry[i].msi_attrib.host_masked,
-
entry[i].msi_attrib.guest_masked)) )
- BUG();
+ msi_set_mask_bit(desc,
+ entry[i].msi_attrib.host_masked,
+ entry[i].msi_attrib.guest_masked);
if ( !--nr )
break;
diff --git a/xen/arch/x86/pci.c b/xen/arch/x86/pci.c
index 97b792e578..0c4b49f042 100644
--- a/xen/arch/x86/pci.c
+++ b/xen/arch/x86/pci.c
@@ -69,6 +69,24 @@ void pci_conf_write(uint32_t cf8, uint8_t offset,
uint8_t bytes, uint32_t data)
spin_unlock_irqrestore(&pci_config_lock, flags);
}
+void pci_command_override(struct pci_dev *pdev, uint16_t val)
+{
+ pci_sbdf_t sbdf = pdev->sbdf;
+
+ ASSERT(pcidevs_locked());
+
+ if ( pdev->info.is_virtfn )
+ {
+ sbdf.bus = pdev->info.physfn.bus;
+ sbdf.devfn = pdev->info.physfn.devfn;
+
+ pdev = pci_get_pdev(NULL, sbdf);
+ }
+
+ pdev->arch.host_command = val;
+ pci_conf_write16(sbdf, PCI_COMMAND, pdev->arch.host_command |
pdev->arch.guest_command);
+}
+
int pci_conf_write_intercept(unsigned int seg, unsigned int bdf,
unsigned int reg, unsigned int size,
uint32_t *data)
@@ -85,14 +103,31 @@ int pci_conf_write_intercept(unsigned int seg,
unsigned int bdf,
* Avoid expensive operations when no hook is going to do anything
* for the access anyway.
*/
- if ( reg < 64 || reg >= 256 )
+ if ( reg != PCI_COMMAND && (reg < 64 || reg >= 256) )
return 0;
pcidevs_lock();
pdev = pci_get_pdev(NULL, PCI_SBDF(seg, bdf));
if ( pdev )
- rc = pci_msi_conf_write_intercept(pdev, reg, size, data);
+ {
+ switch ( reg )
+ {
+ case PCI_COMMAND:
+ if ( size == 2 )
+ {
+ pdev->arch.guest_command = *data;
+ *data |= pdev->arch.host_command;
+ }
+ else
+ rc = -EACCESS;
+ break;
+
+ default:
+ rc = pci_msi_conf_write_intercept(pdev, reg, size, data);
+ break;
+ }
+ }
pcidevs_unlock();
--
2.37.1