On Thu, Mar 23, 2017 at 06:24:19PM +0100, David Hildenbrand wrote: > No caller currently checks the return value of > kvm_io_bus_unregister_dev(). This is evil, as all callers silently go on > freeing their device. A stale reference will remain in the io_bus, > getting at least used again, when the iobus gets teared down on > kvm_destroy_vm() - leading to use after free errors. > > There is nothing the callers could do, except retrying over and over > again. > > So let's simply remove the bus altogether, print an error and make > sure no one can access this broken bus again (returning -ENOMEM on any > attempt to access it). > > Fixes: e93f8a0f821e ("KVM: convert io_bus to SRCU") > Cc: sta...@vger.kernel.org # 3.4+ > Reported-by: Dmitry Vyukov <dvyu...@google.com> > Signed-off-by: David Hildenbrand <da...@redhat.com> > --- > > Based on kvm/queue, where we just got 2a108a4e7c1 ("KVM: x86: clear bus > pointer when destroyed"), which added a check we need here. > > v1 -> v2: > - added a check in kvm_destroy_vm() > - added a check in virt/kvm/eventfd.c > > Using 'git grep -C 4 "kvm->buses"' should help to find all users. The other > user in virt/kvm/eventfd.c should be fine. > > --- > include/linux/kvm_host.h | 4 ++-- > virt/kvm/eventfd.c | 3 ++- > virt/kvm/kvm_main.c | 42 +++++++++++++++++++++++++----------------- > 3 files changed, 29 insertions(+), 20 deletions(-) > > diff --git a/include/linux/kvm_host.h b/include/linux/kvm_host.h > index 2c14ad9..d025074 100644 > --- a/include/linux/kvm_host.h > +++ b/include/linux/kvm_host.h > @@ -162,8 +162,8 @@ int kvm_io_bus_read(struct kvm_vcpu *vcpu, enum kvm_bus > bus_idx, gpa_t addr, > int len, void *val); > int kvm_io_bus_register_dev(struct kvm *kvm, enum kvm_bus bus_idx, gpa_t > addr, > int len, struct kvm_io_device *dev); > -int kvm_io_bus_unregister_dev(struct kvm *kvm, enum kvm_bus bus_idx, > - struct kvm_io_device *dev); > +void kvm_io_bus_unregister_dev(struct kvm *kvm, enum kvm_bus bus_idx, > + struct kvm_io_device *dev); > struct kvm_io_device *kvm_io_bus_get_dev(struct kvm *kvm, enum kvm_bus > bus_idx, > gpa_t addr); > > diff --git a/virt/kvm/eventfd.c b/virt/kvm/eventfd.c > index a29786d..4d28a9d 100644 > --- a/virt/kvm/eventfd.c > +++ b/virt/kvm/eventfd.c > @@ -870,7 +870,8 @@ kvm_deassign_ioeventfd_idx(struct kvm *kvm, enum kvm_bus > bus_idx, > continue; > > kvm_io_bus_unregister_dev(kvm, bus_idx, &p->dev); > - kvm->buses[bus_idx]->ioeventfd_count--; > + if (kvm->buses[bus_idx]) > + kvm->buses[bus_idx]->ioeventfd_count--; > ioeventfd_release(p); > ret = 0; > break; > diff --git a/virt/kvm/kvm_main.c b/virt/kvm/kvm_main.c > index 7445566..ef1aa7f 100644 > --- a/virt/kvm/kvm_main.c > +++ b/virt/kvm/kvm_main.c > @@ -728,7 +728,8 @@ static void kvm_destroy_vm(struct kvm *kvm) > spin_unlock(&kvm_lock); > kvm_free_irq_routing(kvm); > for (i = 0; i < KVM_NR_BUSES; i++) { > - kvm_io_bus_destroy(kvm->buses[i]); > + if (kvm->buses[i]) > + kvm_io_bus_destroy(kvm->buses[i]); > kvm->buses[i] = NULL; > } > kvm_coalesced_mmio_free(kvm); > @@ -3476,6 +3477,8 @@ int kvm_io_bus_write(struct kvm_vcpu *vcpu, enum > kvm_bus bus_idx, gpa_t addr, > }; > > bus = srcu_dereference(vcpu->kvm->buses[bus_idx], &vcpu->kvm->srcu); > + if (!bus) > + return -ENOMEM; > r = __kvm_io_bus_write(vcpu, bus, &range, val); > return r < 0 ? r : 0; > } > @@ -3493,6 +3496,8 @@ int kvm_io_bus_write_cookie(struct kvm_vcpu *vcpu, enum > kvm_bus bus_idx, > }; > > bus = srcu_dereference(vcpu->kvm->buses[bus_idx], &vcpu->kvm->srcu); > + if (!bus) > + return -ENOMEM; > > /* First try the device referenced by cookie. */ > if ((cookie >= 0) && (cookie < bus->dev_count) && > @@ -3543,6 +3548,8 @@ int kvm_io_bus_read(struct kvm_vcpu *vcpu, enum kvm_bus > bus_idx, gpa_t addr, > }; > > bus = srcu_dereference(vcpu->kvm->buses[bus_idx], &vcpu->kvm->srcu); > + if (!bus) > + return -ENOMEM; > r = __kvm_io_bus_read(vcpu, bus, &range, val); > return r < 0 ? r : 0; > } > @@ -3555,6 +3562,9 @@ int kvm_io_bus_register_dev(struct kvm *kvm, enum > kvm_bus bus_idx, gpa_t addr, > struct kvm_io_bus *new_bus, *bus; > > bus = kvm->buses[bus_idx]; > + if (!bus) > + return -ENOMEM; > + > /* exclude ioeventfd which is limited by maximum fd */ > if (bus->dev_count - bus->ioeventfd_count > NR_IOBUS_DEVS - 1) > return -ENOSPC; > @@ -3574,45 +3584,41 @@ int kvm_io_bus_register_dev(struct kvm *kvm, enum > kvm_bus bus_idx, gpa_t addr, > } > > /* Caller must hold slots_lock. */ > -int kvm_io_bus_unregister_dev(struct kvm *kvm, enum kvm_bus bus_idx, > - struct kvm_io_device *dev) > +void kvm_io_bus_unregister_dev(struct kvm *kvm, enum kvm_bus bus_idx, > + struct kvm_io_device *dev) > { > - int i, r; > + int i; > struct kvm_io_bus *new_bus, *bus; > > bus = kvm->buses[bus_idx]; > - > - /* > - * It's possible the bus being released before hand. If so, > - * we're done here. > - */ > if (!bus) > - return 0; > + return; > > - r = -ENOENT; > for (i = 0; i < bus->dev_count; i++) > if (bus->range[i].dev == dev) { > - r = 0; > break; > } > > - if (r) > - return r; > + if (i == bus->dev_count) > + return; > > new_bus = kmalloc(sizeof(*bus) + ((bus->dev_count - 1) * > sizeof(struct kvm_io_range)), GFP_KERNEL); > - if (!new_bus) > - return -ENOMEM; > + if (!new_bus) { > + pr_err("kvm: failed to shrink bus, removing it completely\n"); > + goto broken;
The guest will fail in mysterious ways, if you do this (and io_bus_unregister_dev can be called during runtime): in-kernel device accesses will fail with unknown behaviour in the guest. Can't you retry a handful of times with GFP_KERNEL before switching to GFP_ATOMIC? (which in case fails the machine is likely to be crashing soon).