There used to be a limit of 6 KVM io bus devices inside the kernel. On such a kernel, don't use ioeventfd for virtqueue host notification since the limit is reached too easily. This ensures that existing vhost-net setups (which always use ioeventfd) have ioeventfds available so they can continue to work.
Signed-off-by: Stefan Hajnoczi <stefa...@linux.vnet.ibm.com> --- hw/virtio-pci.c | 4 ++++ kvm-all.c | 49 +++++++++++++++++++++++++++++++++++++++++++++++++ kvm-stub.c | 5 +++++ kvm.h | 1 + 4 files changed, 59 insertions(+), 0 deletions(-) diff --git a/hw/virtio-pci.c b/hw/virtio-pci.c index 6b12248..b2181fc 100644 --- a/hw/virtio-pci.c +++ b/hw/virtio-pci.c @@ -690,6 +690,10 @@ static void virtio_init_pci(VirtIOPCIProxy *proxy, VirtIODevice *vdev, pci_register_bar(&proxy->pci_dev, 0, size, PCI_BASE_ADDRESS_SPACE_IO, virtio_map); + if (!kvm_has_many_ioeventfds()) { + proxy->flags &= ~VIRTIO_PCI_FLAG_USE_IOEVENTFD; + } + virtio_bind_device(vdev, &virtio_pci_bindings, proxy); proxy->host_features |= 0x1 << VIRTIO_F_NOTIFY_ON_EMPTY; proxy->host_features |= 0x1 << VIRTIO_F_BAD_FEATURE; diff --git a/kvm-all.c b/kvm-all.c index cae24bb..255b6fa 100644 --- a/kvm-all.c +++ b/kvm-all.c @@ -28,6 +28,11 @@ #include "kvm.h" #include "bswap.h" +/* This check must be after config-host.h is included */ +#ifdef CONFIG_EVENTFD +#include <sys/eventfd.h> +#endif + /* KVM uses PAGE_SIZE in it's definition of COALESCED_MMIO_MAX */ #define PAGE_SIZE TARGET_PAGE_SIZE @@ -72,6 +77,7 @@ struct KVMState int irqchip_in_kernel; int pit_in_kernel; int xsave, xcrs; + int many_ioeventfds; }; static KVMState *kvm_state; @@ -441,6 +447,39 @@ int kvm_check_extension(KVMState *s, unsigned int extension) return ret; } +static int kvm_check_many_ioeventfds(void) +{ + /* Older kernels have a 6 device limit on the KVM io bus. Find out so we + * can avoid creating too many ioeventfds. + */ +#ifdef CONFIG_EVENTFD + int ioeventfds[7]; + int i, ret = 0; + for (i = 0; i < ARRAY_SIZE(ioeventfds); i++) { + ioeventfds[i] = eventfd(0, EFD_CLOEXEC); + if (ioeventfds[i] < 0) { + break; + } + ret = kvm_set_ioeventfd_pio_word(ioeventfds[i], 0, i, true); + if (ret < 0) { + close(ioeventfds[i]); + break; + } + } + + /* Decide whether many devices are supported or not */ + ret = i == ARRAY_SIZE(ioeventfds); + + while (i-- > 0) { + kvm_set_ioeventfd_pio_word(ioeventfds[i], 0, i, false); + close(ioeventfds[i]); + } + return ret; +#else + return 0; +#endif +} + static void kvm_set_phys_mem(target_phys_addr_t start_addr, ram_addr_t size, ram_addr_t phys_offset) @@ -717,6 +756,8 @@ int kvm_init(int smp_cpus) kvm_state = s; cpu_register_phys_memory_client(&kvm_cpu_phys_memory_client); + s->many_ioeventfds = kvm_check_many_ioeventfds(); + return 0; err: @@ -1046,6 +1087,14 @@ int kvm_has_xcrs(void) return kvm_state->xcrs; } +int kvm_has_many_ioeventfds(void) +{ + if (!kvm_enabled()) { + return 0; + } + return kvm_state->many_ioeventfds; +} + void kvm_setup_guest_memory(void *start, size_t size) { if (!kvm_has_sync_mmu()) { diff --git a/kvm-stub.c b/kvm-stub.c index 5384a4b..33d4476 100644 --- a/kvm-stub.c +++ b/kvm-stub.c @@ -99,6 +99,11 @@ int kvm_has_robust_singlestep(void) return 0; } +int kvm_has_many_ioeventfds(void) +{ + return 0; +} + void kvm_setup_guest_memory(void *start, size_t size) { } diff --git a/kvm.h b/kvm.h index 60a9b42..ce08d42 100644 --- a/kvm.h +++ b/kvm.h @@ -42,6 +42,7 @@ int kvm_has_robust_singlestep(void); int kvm_has_debugregs(void); int kvm_has_xsave(void); int kvm_has_xcrs(void); +int kvm_has_many_ioeventfds(void); #ifdef NEED_CPU_H int kvm_init_vcpu(CPUState *env); -- 1.7.2.3