From: Oleksandr Andrushchenko <oleksandr_andrushche...@epam.com>

Currently pcidevs lock is a global recursive spinlock which is fine for
the existing use cases. It is used to both protect pdev instances
themselves from being removed while in use and to make sure the update
of the relevant pdev properties is synchronized.

Moving towards vPCI is used for guests this becomes problematic in terms
of lock contention. For example, during vpci_{read|write} the access to
pdev must be protected to prevent pdev disappearing under our feet.
This needs to be done with the help of pcidevs_{lock|unlock}.
On the other hand it is highly undesirable to lock all other pdev accesses
which only use pdevs in read mode, e.g. those which do not remove or
add pdevs.

For the above reasons introduce a read/write lock which will help
preventing locking contentions between pdev readers and writers. This
read/write lock replaces current recursive spinlock.

By default all existing code uses pcidevs_lock() which takes write
lock. This ensures that all existing locking logic stays the same.

To justify this change, replace locks in (V)MSI code with read
locks. This code is a perfect target, as (V)MSI code only requires
that pdevs will not disappear during handling, while (V)MSI state
is protected by own locks.

This is in preparation for vPCI to be used for guests.

Signed-off-by: Oleksandr Andrushchenko <oleksandr_andrushche...@epam.com>
Signed-off-by: Volodymyr Babchuk <volodymyr_babc...@epam.com>

---
Since v1:
- user per CPU variable to track recursive readers and writers
- use read locks in vmsi code
---
 xen/arch/x86/hvm/vmsi.c       | 22 ++++++------
 xen/arch/x86/irq.c            |  8 ++---
 xen/arch/x86/msi.c            | 16 ++++-----
 xen/drivers/passthrough/pci.c | 65 +++++++++++++++++++++++++++++++----
 xen/include/xen/pci.h         | 10 +++++-
 5 files changed, 91 insertions(+), 30 deletions(-)

diff --git a/xen/arch/x86/hvm/vmsi.c b/xen/arch/x86/hvm/vmsi.c
index d4a8c953e2..c1ede676d0 100644
--- a/xen/arch/x86/hvm/vmsi.c
+++ b/xen/arch/x86/hvm/vmsi.c
@@ -466,7 +466,7 @@ int msixtbl_pt_register(struct domain *d, struct pirq 
*pirq, uint64_t gtable)
     struct msixtbl_entry *entry, *new_entry;
     int r = -EINVAL;
 
-    ASSERT(pcidevs_locked());
+    ASSERT(pcidevs_read_locked());
     ASSERT(spin_is_locked(&d->event_lock));
 
     if ( !msixtbl_initialised(d) )
@@ -536,7 +536,7 @@ void msixtbl_pt_unregister(struct domain *d, struct pirq 
*pirq)
     struct pci_dev *pdev;
     struct msixtbl_entry *entry;
 
-    ASSERT(pcidevs_locked());
+    ASSERT(pcidevs_read_locked());
     ASSERT(spin_is_locked(&d->event_lock));
 
     if ( !msixtbl_initialised(d) )
@@ -682,7 +682,7 @@ static int vpci_msi_update(const struct pci_dev *pdev, 
uint32_t data,
 {
     unsigned int i;
 
-    ASSERT(pcidevs_locked());
+    ASSERT(pcidevs_read_locked());
 
     if ( (address & MSI_ADDR_BASE_MASK) != MSI_ADDR_HEADER )
     {
@@ -724,7 +724,7 @@ void vpci_msi_arch_update(struct vpci_msi *msi, const 
struct pci_dev *pdev)
 
     ASSERT(msi->arch.pirq != INVALID_PIRQ);
 
-    pcidevs_lock();
+    pcidevs_read_lock();
     for ( i = 0; i < msi->vectors && msi->arch.bound; i++ )
     {
         struct xen_domctl_bind_pt_irq unbind = {
@@ -743,7 +743,7 @@ void vpci_msi_arch_update(struct vpci_msi *msi, const 
struct pci_dev *pdev)
 
     msi->arch.bound = !vpci_msi_update(pdev, msi->data, msi->address,
                                        msi->vectors, msi->arch.pirq, 
msi->mask);
-    pcidevs_unlock();
+    pcidevs_read_unlock();
 }
 
 static int vpci_msi_enable(const struct pci_dev *pdev, unsigned int nr,
@@ -783,10 +783,10 @@ int vpci_msi_arch_enable(struct vpci_msi *msi, const 
struct pci_dev *pdev,
         return rc;
     msi->arch.pirq = rc;
 
-    pcidevs_lock();
+    pcidevs_read_lock();
     msi->arch.bound = !vpci_msi_update(pdev, msi->data, msi->address, vectors,
                                        msi->arch.pirq, msi->mask);
-    pcidevs_unlock();
+    pcidevs_read_unlock();
 
     return 0;
 }
@@ -798,7 +798,7 @@ static void vpci_msi_disable(const struct pci_dev *pdev, 
int pirq,
 
     ASSERT(pirq != INVALID_PIRQ);
 
-    pcidevs_lock();
+    pcidevs_read_lock();
     for ( i = 0; i < nr && bound; i++ )
     {
         struct xen_domctl_bind_pt_irq bind = {
@@ -814,7 +814,7 @@ static void vpci_msi_disable(const struct pci_dev *pdev, 
int pirq,
     spin_lock(&pdev->domain->event_lock);
     unmap_domain_pirq(pdev->domain, pirq);
     spin_unlock(&pdev->domain->event_lock);
-    pcidevs_unlock();
+    pcidevs_read_unlock();
 }
 
 void vpci_msi_arch_disable(struct vpci_msi *msi, const struct pci_dev *pdev)
@@ -861,7 +861,7 @@ int vpci_msix_arch_enable_entry(struct vpci_msix_entry 
*entry,
 
     entry->arch.pirq = rc;
 
-    pcidevs_lock();
+    pcidevs_read_lock();
     rc = vpci_msi_update(pdev, entry->data, entry->addr, 1, entry->arch.pirq,
                          entry->masked);
     if ( rc )
@@ -869,7 +869,7 @@ int vpci_msix_arch_enable_entry(struct vpci_msix_entry 
*entry,
         vpci_msi_disable(pdev, entry->arch.pirq, 1, false);
         entry->arch.pirq = INVALID_PIRQ;
     }
-    pcidevs_unlock();
+    pcidevs_read_unlock();
 
     return rc;
 }
diff --git a/xen/arch/x86/irq.c b/xen/arch/x86/irq.c
index de30ee7779..7b4832ffb1 100644
--- a/xen/arch/x86/irq.c
+++ b/xen/arch/x86/irq.c
@@ -2156,7 +2156,7 @@ int map_domain_pirq(
         struct pci_dev *pdev;
         unsigned int nr = 0;
 
-        ASSERT(pcidevs_locked());
+        ASSERT(pcidevs_read_locked());
 
         ret = -ENODEV;
         if ( !cpu_has_apic )
@@ -2313,7 +2313,7 @@ int unmap_domain_pirq(struct domain *d, int pirq)
     if ( (pirq < 0) || (pirq >= d->nr_pirqs) )
         return -EINVAL;
 
-    ASSERT(pcidevs_locked());
+    ASSERT(pcidevs_read_locked());
     ASSERT(spin_is_locked(&d->event_lock));
 
     info = pirq_info(d, pirq);
@@ -2907,7 +2907,7 @@ int allocate_and_map_msi_pirq(struct domain *d, int 
index, int *pirq_p,
 
     msi->irq = irq;
 
-    pcidevs_lock();
+    pcidevs_read_lock();
     /* Verify or get pirq. */
     spin_lock(&d->event_lock);
     pirq = allocate_pirq(d, index, *pirq_p, irq, type, &msi->entry_nr);
@@ -2923,7 +2923,7 @@ int allocate_and_map_msi_pirq(struct domain *d, int 
index, int *pirq_p,
 
  done:
     spin_unlock(&d->event_lock);
-    pcidevs_unlock();
+    pcidevs_read_unlock();
     if ( ret )
     {
         switch ( type )
diff --git a/xen/arch/x86/msi.c b/xen/arch/x86/msi.c
index 6be81e6c3b..6ce7b5523a 100644
--- a/xen/arch/x86/msi.c
+++ b/xen/arch/x86/msi.c
@@ -613,7 +613,7 @@ static int msi_capability_init(struct pci_dev *dev,
     u8 slot = PCI_SLOT(dev->devfn);
     u8 func = PCI_FUNC(dev->devfn);
 
-    ASSERT(pcidevs_locked());
+    ASSERT(pcidevs_read_locked());
     pos = pci_find_cap_offset(seg, bus, slot, func, PCI_CAP_ID_MSI);
     if ( !pos )
         return -ENODEV;
@@ -782,7 +782,7 @@ static int msix_capability_init(struct pci_dev *dev,
     if ( !pos )
         return -ENODEV;
 
-    ASSERT(pcidevs_locked());
+    ASSERT(pcidevs_read_locked());
 
     control = pci_conf_read16(dev->sbdf, msix_control_reg(pos));
     /*
@@ -999,7 +999,7 @@ static int __pci_enable_msi(struct msi_info *msi, struct 
msi_desc **desc)
     struct pci_dev *pdev;
     struct msi_desc *old_desc;
 
-    ASSERT(pcidevs_locked());
+    ASSERT(pcidevs_read_locked());
     pdev = pci_get_pdev(msi->seg, msi->bus, msi->devfn);
     if ( !pdev )
         return -ENODEV;
@@ -1054,7 +1054,7 @@ static int __pci_enable_msix(struct msi_info *msi, struct 
msi_desc **desc)
     struct pci_dev *pdev;
     struct msi_desc *old_desc;
 
-    ASSERT(pcidevs_locked());
+    ASSERT(pcidevs_read_locked());
     pdev = pci_get_pdev(msi->seg, msi->bus, msi->devfn);
     if ( !pdev || !pdev->msix )
         return -ENODEV;
@@ -1145,7 +1145,7 @@ int pci_prepare_msix(u16 seg, u8 bus, u8 devfn, bool off)
     if ( !use_msi )
         return 0;
 
-    pcidevs_lock();
+    pcidevs_read_lock();
     pdev = pci_get_pdev(seg, bus, devfn);
     if ( !pdev )
         rc = -ENODEV;
@@ -1158,7 +1158,7 @@ int pci_prepare_msix(u16 seg, u8 bus, u8 devfn, bool off)
     }
     else
         rc = msix_capability_init(pdev, NULL, NULL);
-    pcidevs_unlock();
+    pcidevs_read_unlock();
 
     return rc;
 }
@@ -1169,7 +1169,7 @@ int pci_prepare_msix(u16 seg, u8 bus, u8 devfn, bool off)
  */
 int pci_enable_msi(struct msi_info *msi, struct msi_desc **desc)
 {
-    ASSERT(pcidevs_locked());
+    ASSERT(pcidevs_read_locked());
 
     if ( !use_msi )
         return -EPERM;
@@ -1305,7 +1305,7 @@ int pci_restore_msi_state(struct pci_dev *pdev)
     unsigned int type = 0, pos = 0;
     u16 control = 0;
 
-    ASSERT(pcidevs_locked());
+    ASSERT(pcidevs_read_locked());
 
     if ( !use_msi )
         return -EOPNOTSUPP;
diff --git a/xen/drivers/passthrough/pci.c b/xen/drivers/passthrough/pci.c
index 938821e593..f93922acc8 100644
--- a/xen/drivers/passthrough/pci.c
+++ b/xen/drivers/passthrough/pci.c
@@ -50,21 +50,74 @@ struct pci_seg {
     } bus2bridge[MAX_BUSES];
 };
 
-static spinlock_t _pcidevs_lock = SPIN_LOCK_UNLOCKED;
+static DEFINE_RWLOCK(_pcidevs_rwlock);
+static DEFINE_PER_CPU(unsigned int, pcidevs_read_cnt);
+static DEFINE_PER_CPU(unsigned int, pcidevs_write_cnt);
 
 void pcidevs_lock(void)
 {
-    spin_lock_recursive(&_pcidevs_lock);
+    pcidevs_write_lock();
 }
 
 void pcidevs_unlock(void)
 {
-    spin_unlock_recursive(&_pcidevs_lock);
+    pcidevs_write_unlock();
 }
 
-bool_t pcidevs_locked(void)
+bool pcidevs_locked(void)
 {
-    return !!spin_is_locked(&_pcidevs_lock);
+    return pcidevs_write_locked();
+}
+
+void pcidevs_read_lock(void)
+{
+    if ( this_cpu(pcidevs_read_cnt)++ == 0 )
+        read_lock(&_pcidevs_rwlock);
+}
+
+int pcidevs_read_trylock(void)
+{
+    int ret = 1;
+
+    if ( this_cpu(pcidevs_read_cnt) == 0 )
+        ret = read_trylock(&_pcidevs_rwlock);
+    if ( ret )
+        this_cpu(pcidevs_read_cnt)++;
+
+    return ret;
+}
+
+void pcidevs_read_unlock(void)
+{
+    ASSERT(this_cpu(pcidevs_read_cnt));
+
+    if ( --this_cpu(pcidevs_read_cnt) == 0 )
+        read_unlock(&_pcidevs_rwlock);
+}
+
+bool pcidevs_read_locked(void)
+{
+    /* Write lock implies read lock */
+    return this_cpu(pcidevs_read_cnt) || this_cpu(pcidevs_write_cnt);
+}
+
+void pcidevs_write_lock(void)
+{
+    if ( this_cpu(pcidevs_write_cnt)++ == 0 )
+        write_lock(&_pcidevs_rwlock);
+}
+
+void pcidevs_write_unlock(void)
+{
+    ASSERT(this_cpu(pcidevs_write_cnt));
+
+    if ( --this_cpu(pcidevs_write_cnt) == 0 )
+        write_unlock(&_pcidevs_rwlock);
+}
+
+bool pcidevs_write_locked(void)
+{
+    return rw_is_write_locked(&_pcidevs_rwlock);
 }
 
 static struct radix_tree_root pci_segments;
@@ -581,7 +634,7 @@ struct pci_dev *pci_get_pdev(int seg, int bus, int devfn)
     struct pci_seg *pseg = get_pseg(seg);
     struct pci_dev *pdev = NULL;
 
-    ASSERT(pcidevs_locked());
+    ASSERT(pcidevs_read_locked());
     ASSERT(seg != -1 || bus == -1);
     ASSERT(bus != -1 || devfn == -1);
 
diff --git a/xen/include/xen/pci.h b/xen/include/xen/pci.h
index f34368643c..052b2ddf9f 100644
--- a/xen/include/xen/pci.h
+++ b/xen/include/xen/pci.h
@@ -158,7 +158,15 @@ struct pci_dev {
 
 void pcidevs_lock(void);
 void pcidevs_unlock(void);
-bool_t __must_check pcidevs_locked(void);
+bool __must_check pcidevs_locked(void);
+
+void pcidevs_read_lock(void);
+void pcidevs_read_unlock(void);
+bool __must_check pcidevs_read_locked(void);
+
+void pcidevs_write_lock(void);
+void pcidevs_write_unlock(void);
+bool __must_check pcidevs_write_locked(void);
 
 bool_t pci_known_segment(u16 seg);
 bool_t pci_device_detect(u16 seg, u8 bus, u8 dev, u8 func);
-- 
2.36.1

Reply via email to