Thanks to Marcelo's observation, The following code have potential issue:

if (cancel_work_sync(&assigned_dev->interrupt_work))
        kvm_put_kvm(kvm);

In fact, cancel_work_sync() would return true either work struct is only
scheduled or the callback of work struct is executed. This code only
consider the former situation.

Also, we have a window between cancel_work_sync() and free_irq. This patch fixs
them two.

Signed-off-by: Sheng Yang <[email protected]>
---
 include/linux/kvm_host.h |    1 +
 virt/kvm/kvm_main.c      |   34 ++++++++++++++++++++++++++++++----
 2 files changed, 31 insertions(+), 4 deletions(-)

diff --git a/include/linux/kvm_host.h b/include/linux/kvm_host.h
index 58e4b7e..e0775b9 100644
--- a/include/linux/kvm_host.h
+++ b/include/linux/kvm_host.h
@@ -318,6 +318,7 @@ struct kvm_assigned_dev_kernel {
 #define KVM_ASSIGNED_DEV_HOST_MSI      (1 << 9)
        unsigned long irq_requested_type;
 #define KVM_ASSIGNED_DEV_HOST_IRQ_DISABLED     (1 << 0)
+#define KVM_ASSIGNED_DEV_IRQ_GOT_KVM           (1 << 1)
        unsigned long state;
        int irq_source_id;
        struct pci_dev *dev;
diff --git a/virt/kvm/kvm_main.c b/virt/kvm/kvm_main.c
index 065af2d..9ffa601 100644
--- a/virt/kvm/kvm_main.c
+++ b/virt/kvm/kvm_main.c
@@ -119,6 +119,7 @@ static void kvm_assigned_dev_interrupt_work_handler(struct 
work_struct *work)
 
        mutex_unlock(&assigned_dev->kvm->lock);
        kvm_put_kvm(assigned_dev->kvm);
+       assigned_dev->state &= ~KVM_ASSIGNED_DEV_IRQ_GOT_KVM;
 }
 
 static irqreturn_t kvm_assigned_dev_intr(int irq, void *dev_id)
@@ -126,7 +127,15 @@ static irqreturn_t kvm_assigned_dev_intr(int irq, void 
*dev_id)
        struct kvm_assigned_dev_kernel *assigned_dev =
                (struct kvm_assigned_dev_kernel *) dev_id;
 
+       /*
+        * In kvm_free_device_irq, cancel_work_sync return true if:
+        * 1. work is scheduled, and then cancelled.
+        * 2. work callback is executed.
+        *
+        * We need to call kvm_put_kvm() for the former, but not the later.
+        */
        kvm_get_kvm(assigned_dev->kvm);
+       assigned_dev->state |= KVM_ASSIGNED_DEV_IRQ_GOT_KVM;
 
        schedule_work(&assigned_dev->interrupt_work);
 
@@ -173,10 +182,27 @@ static void kvm_free_assigned_irq(struct kvm *kvm,
        if (!assigned_dev->irq_requested_type)
                return;
 
-       if (cancel_work_sync(&assigned_dev->interrupt_work))
-               /* We had pending work. That means we will have to take
-                * care of kvm_put_kvm.
-                */
+       /*
+        * We need to ensure: kvm_put_kvm() paired with kvm_get_kvm() in
+        * kvm_assigned_dev_intr, and no more interrupt after we cancelled
+        * current one.
+        *
+        * Here we have two possiblities for cancel_work_sync() return true:
+        * 1. The work is scheduled, but callback haven't been called.  We need
+        * to call kvm_put_kvm() here. And IRQ is already disabled without
+        * doubt.
+        *
+        * 2. The callback have executed, here we don't need to call
+        * kvm_put_kvm(), but we may need to disable irq(e.g. for MSI).
+        *
+        * We judge the two condition according assigned_dev->state. And we
+        * disable irq here anyway, and it may resulted in IRQ nested disable,
+        * but it's fine, for we are going to free it.
+        */
+       disable_irq_nosync(assigned_dev->host_irq);
+
+       if (cancel_work_sync(&assigned_dev->interrupt_work) &&
+           assigned_dev->state & KVM_ASSIGNED_DEV_IRQ_GOT_KVM)
                kvm_put_kvm(kvm);
 
        free_irq(assigned_dev->host_irq, (void *)assigned_dev);
-- 
1.5.4.5

--
To unsubscribe from this list: send the line "unsubscribe kvm" in
the body of a message to [email protected]
More majordomo info at  http://vger.kernel.org/majordomo-info.html

Reply via email to