mshv_irqfd_update() writes both irqfd_girq_ent and irqfd_lapic_irq as a logical unit under seqcount write protection. Readers must snapshot these fields inside the seqcount begin/retry loop to obtain a consistent point-in-time view — otherwise a concurrent update can produce a torn read where one field comes from the old state and the other from the new.
Both mshv_assert_irq_slow() and mshv_irqfd_wakeup() get this wrong: the seqcount loop bodies are empty (just spinning until a stable sequence is observed), and all reads of the protected fields happen after the loop with no protection from concurrent writes. If mshv_irqfd_update() races with interrupt assertion, the caller may use a stale or mixed vector/apic_id/control combination — delivering an interrupt to the wrong vCPU, with the wrong vector, or with the wrong trigger mode. This can cause spurious or lost interrupts in the guest, or a stuck interrupt line in the level-triggered case. Fix mshv_assert_irq_slow() by snapshotting both irqfd_girq_ent and irqfd_lapic_irq into local variables inside the seqcount loop, then using those locals for the validity check and the hypercall. Fix mshv_irqfd_wakeup() by snapshotting irqfd_lapic_irq inside its seqcount loop and passing the snapshot to mshv_try_assert_irq_fast(), so the fast path operates on the consistent copy rather than reading the field directly outside seqcount protection. Signed-off-by: Stanislav Kinsburskii <[email protected]> --- drivers/hv/mshv_eventfd.c | 47 +++++++++++++++++++++++++-------------------- 1 file changed, 26 insertions(+), 21 deletions(-) diff --git a/drivers/hv/mshv_eventfd.c b/drivers/hv/mshv_eventfd.c index 704c229ee3b19..d9491a14f30f1 100644 --- a/drivers/hv/mshv_eventfd.c +++ b/drivers/hv/mshv_eventfd.c @@ -151,10 +151,10 @@ static int mshv_vp_irq_set_vector(struct mshv_vp *vp, u32 vector) * Try to raise irq for guest via shared vector array. hyp does the actual * inject of the interrupt. */ -static int mshv_try_assert_irq_fast(struct mshv_irqfd *irqfd) +static int mshv_try_assert_irq_fast(struct mshv_irqfd *irqfd, + const struct mshv_lapic_irq *irq) { struct mshv_partition *partition = irqfd->irqfd_partn; - struct mshv_lapic_irq *irq = &irqfd->irqfd_lapic_irq; struct mshv_vp *vp; if (!(ms_hyperv.ext_features & @@ -186,7 +186,8 @@ static int mshv_try_assert_irq_fast(struct mshv_irqfd *irqfd) return 0; } #else /* CONFIG_X86_64 */ -static int mshv_try_assert_irq_fast(struct mshv_irqfd *irqfd) +static int mshv_try_assert_irq_fast(struct mshv_irqfd *irqfd, + const struct mshv_lapic_irq *irq) { return -EOPNOTSUPP; } @@ -195,30 +196,32 @@ static int mshv_try_assert_irq_fast(struct mshv_irqfd *irqfd) static void mshv_assert_irq_slow(struct mshv_irqfd *irqfd) { struct mshv_partition *partition = irqfd->irqfd_partn; - struct mshv_lapic_irq *irq = &irqfd->irqfd_lapic_irq; + struct mshv_guest_irq_ent girq_ent; + struct mshv_lapic_irq irq; unsigned int seq; int idx; -#if IS_ENABLED(CONFIG_X86) - WARN_ON(irqfd->irqfd_resampler && - !irq->lapic_control.level_triggered); -#endif - idx = srcu_read_lock(&partition->pt_irq_srcu); - if (irqfd->irqfd_girq_ent.guest_irq_num) { - if (!irqfd->irqfd_girq_ent.girq_entry_valid) { - srcu_read_unlock(&partition->pt_irq_srcu, idx); - return; - } - do { - seq = read_seqcount_begin(&irqfd->irqfd_irqe_sc); - } while (read_seqcount_retry(&irqfd->irqfd_irqe_sc, seq)); + do { + seq = read_seqcount_begin(&irqfd->irqfd_irqe_sc); + girq_ent = irqfd->irqfd_girq_ent; + irq = irqfd->irqfd_lapic_irq; + } while (read_seqcount_retry(&irqfd->irqfd_irqe_sc, seq)); + + if (girq_ent.guest_irq_num && !girq_ent.girq_entry_valid) { + srcu_read_unlock(&partition->pt_irq_srcu, idx); + return; } - hv_call_assert_virtual_interrupt(irqfd->irqfd_partn->pt_id, - irq->lapic_vector, irq->lapic_apic_id, - irq->lapic_control); +#if IS_ENABLED(CONFIG_X86) + WARN_ON(irqfd->irqfd_resampler && + !irq.lapic_control.level_triggered); +#endif + + hv_call_assert_virtual_interrupt(partition->pt_id, + irq.lapic_vector, irq.lapic_apic_id, + irq.lapic_control); srcu_read_unlock(&partition->pt_irq_srcu, idx); } @@ -304,16 +307,18 @@ static int mshv_irqfd_wakeup(wait_queue_entry_t *wait, unsigned int mode, int ret = 0; if (flags & EPOLLIN) { + struct mshv_lapic_irq irq; u64 cnt; eventfd_ctx_do_read(irqfd->irqfd_eventfd_ctx, &cnt); idx = srcu_read_lock(&pt->pt_irq_srcu); do { seq = read_seqcount_begin(&irqfd->irqfd_irqe_sc); + irq = irqfd->irqfd_lapic_irq; } while (read_seqcount_retry(&irqfd->irqfd_irqe_sc, seq)); /* An event has been signaled, raise an interrupt */ - ret = mshv_try_assert_irq_fast(irqfd); + ret = mshv_try_assert_irq_fast(irqfd, &irq); if (ret) mshv_assert_irq_slow(irqfd);

