Thanks to Marcelo's observation, The following code have potential issue:
if (cancel_work_sync(&assigned_dev->interrupt_work))
kvm_put_kvm(kvm);
In fact, cancel_work_sync() would return true either work struct is only
scheduled or the callback of work struct is executed. This code only
consider the former situation.
Also, we have a window between cancel_work_sync() and free_irq. This patch fixs
them two.
Signed-off-by: Sheng Yang <[email protected]>
---
include/linux/kvm_host.h | 1 +
virt/kvm/kvm_main.c | 34 ++++++++++++++++++++++++++++++----
2 files changed, 31 insertions(+), 4 deletions(-)
diff --git a/include/linux/kvm_host.h b/include/linux/kvm_host.h
index 58e4b7e..e0775b9 100644
--- a/include/linux/kvm_host.h
+++ b/include/linux/kvm_host.h
@@ -318,6 +318,7 @@ struct kvm_assigned_dev_kernel {
#define KVM_ASSIGNED_DEV_HOST_MSI (1 << 9)
unsigned long irq_requested_type;
#define KVM_ASSIGNED_DEV_HOST_IRQ_DISABLED (1 << 0)
+#define KVM_ASSIGNED_DEV_IRQ_GOT_KVM (1 << 1)
unsigned long state;
int irq_source_id;
struct pci_dev *dev;
diff --git a/virt/kvm/kvm_main.c b/virt/kvm/kvm_main.c
index 065af2d..9ffa601 100644
--- a/virt/kvm/kvm_main.c
+++ b/virt/kvm/kvm_main.c
@@ -119,6 +119,7 @@ static void kvm_assigned_dev_interrupt_work_handler(struct
work_struct *work)
mutex_unlock(&assigned_dev->kvm->lock);
kvm_put_kvm(assigned_dev->kvm);
+ assigned_dev->state &= ~KVM_ASSIGNED_DEV_IRQ_GOT_KVM;
}
static irqreturn_t kvm_assigned_dev_intr(int irq, void *dev_id)
@@ -126,7 +127,15 @@ static irqreturn_t kvm_assigned_dev_intr(int irq, void
*dev_id)
struct kvm_assigned_dev_kernel *assigned_dev =
(struct kvm_assigned_dev_kernel *) dev_id;
+ /*
+ * In kvm_free_device_irq, cancel_work_sync return true if:
+ * 1. work is scheduled, and then cancelled.
+ * 2. work callback is executed.
+ *
+ * We need to call kvm_put_kvm() for the former, but not the later.
+ */
kvm_get_kvm(assigned_dev->kvm);
+ assigned_dev->state |= KVM_ASSIGNED_DEV_IRQ_GOT_KVM;
schedule_work(&assigned_dev->interrupt_work);
@@ -173,10 +182,27 @@ static void kvm_free_assigned_irq(struct kvm *kvm,
if (!assigned_dev->irq_requested_type)
return;
- if (cancel_work_sync(&assigned_dev->interrupt_work))
- /* We had pending work. That means we will have to take
- * care of kvm_put_kvm.
- */
+ /*
+ * We need to ensure: kvm_put_kvm() paired with kvm_get_kvm() in
+ * kvm_assigned_dev_intr, and no more interrupt after we cancelled
+ * current one.
+ *
+ * Here we have two possiblities for cancel_work_sync() return true:
+ * 1. The work is scheduled, but callback haven't been called. We need
+ * to call kvm_put_kvm() here. And IRQ is already disabled without
+ * doubt.
+ *
+ * 2. The callback have executed, here we don't need to call
+ * kvm_put_kvm(), but we may need to disable irq(e.g. for MSI).
+ *
+ * We judge the two condition according assigned_dev->state. And we
+ * disable irq here anyway, and it may resulted in IRQ nested disable,
+ * but it's fine, for we are going to free it.
+ */
+ disable_irq_nosync(assigned_dev->host_irq);
+
+ if (cancel_work_sync(&assigned_dev->interrupt_work) &&
+ assigned_dev->state & KVM_ASSIGNED_DEV_IRQ_GOT_KVM)
kvm_put_kvm(kvm);
free_irq(assigned_dev->host_irq, (void *)assigned_dev);
--
1.5.4.5
--
To unsubscribe from this list: send the line "unsubscribe kvm" in
the body of a message to [email protected]
More majordomo info at http://vger.kernel.org/majordomo-info.html