Re: [Qemu-devel] [PATCH v6 5/6] Inter-VM shared memory PCI device
On Mon, Jun 7, 2010 at 4:41 PM, Cam Macdonell wrote: > On Sat, Jun 5, 2010 at 3:44 AM, Blue Swirl wrote: >> On Fri, Jun 4, 2010 at 9:45 PM, Cam Macdonell wrote: >>> Support an inter-vm shared memory device that maps a shared-memory object >>> as a >>> PCI device in the guest. This patch also supports interrupts between guest >>> by >>> communicating over a unix domain socket. This patch applies to the qemu-kvm >>> repository. >>> >>> -device ivshmem,size=[,shm=] >>> >>> Interrupts are supported between multiple VMs by using a shared memory >>> server >>> by using a chardev socket. >>> >>> -device ivshmem,size=[,shm=] >>> [,chardev=][,msi=on][,irqfd=on][,vectors=n][,role=peer|master] >>> -chardev socket,path=,id= >>> >>> (shared memory server is qemu.git/contrib/ivshmem-server) >>> >>> Sample programs and init scripts are in a git repo here: >>> >>> www.gitorious.org/nahanni >>> --- >>> Makefile.target | 3 + >>> hw/ivshmem.c | 852 >>> +++ >>> qemu-char.c | 6 + >>> qemu-char.h | 3 + >>> qemu-doc.texi | 43 +++ >>> 5 files changed, 907 insertions(+), 0 deletions(-) >>> create mode 100644 hw/ivshmem.c >>> >>> diff --git a/Makefile.target b/Makefile.target >>> index c4ba592..4888308 100644 >>> --- a/Makefile.target >>> +++ b/Makefile.target >>> @@ -202,6 +202,9 @@ obj-$(CONFIG_USB_OHCI) += usb-ohci.o >>> obj-y += rtl8139.o >>> obj-y += e1000.o >>> >>> +# Inter-VM PCI shared memory >>> +obj-y += ivshmem.o >>> + >> >> Can this be compiled once, simply by moving this to Makefile.objs >> instead of Makefile.target? Also, because the code seems to be KVM >> specific, it can't be compiled unconditionally but depending on at >> least CONFIG_KVM and maybe CONFIG_EVENTFD. > > The device uses eventfds for signalling, but never creates the > eventfds itself as they are passed from a server using SCM_RIGHTS. > So, it does not depend on the eventfd API. Its dependence on > irqfd/ioeventfd (the only KVM specific bits) are optional via the > command-line. > > A runtime check for KVM is done for the reasons Avi mentioned. > >> >> Why is this KVM specific BTW, Posix SHM is available on many >> platforms? What would happen if kvm_set_foobar functions were not >> called when KVM is not being used? Is host eventfd support essential? >> >>> # Hardware support >>> obj-i386-y += vga.o >>> obj-i386-y += mc146818rtc.o i8259.o pc.o >>> diff --git a/hw/ivshmem.c b/hw/ivshmem.c >>> new file mode 100644 >>> index 000..9057612 >>> --- /dev/null >>> +++ b/hw/ivshmem.c >>> @@ -0,0 +1,852 @@ >>> +/* >>> + * Inter-VM Shared Memory PCI device. >>> + * >>> + * Author: >>> + * Cam Macdonell >>> + * >>> + * Based On: cirrus_vga.c >>> + * Copyright (c) 2004 Fabrice Bellard >>> + * Copyright (c) 2004 Makoto Suzuki (suzu) >>> + * >>> + * and rtl8139.c >>> + * Copyright (c) 2006 Igor Kovalenko >>> + * >>> + * This code is licensed under the GNU GPL v2. >>> + */ >>> +#include >>> +#include >>> +#include >>> +#include >>> +#include >>> +#include "hw.h" >>> +#include "console.h" >>> +#include "pc.h" >>> +#include "pci.h" >>> +#include "sysemu.h" >>> + >>> +#include "msix.h" >>> +#include "qemu-kvm.h" >>> +#include "libkvm.h" >>> + >>> +#include >>> +#include >>> +#include >>> +#include >>> + >>> +#define IVSHMEM_IRQFD 0 >>> +#define IVSHMEM_MSI 1 >>> + >>> +//#define DEBUG_IVSHMEM >>> +#ifdef DEBUG_IVSHMEM >>> +#define IVSHMEM_DPRINTF(fmt, args...) \ >>> + do {printf("IVSHMEM: " fmt, ##args); } while (0) >> >> Please use __VA_ARGS__. >> >>> +#else >>> +#define IVSHMEM_DPRINTF(fmt, args...) >>> +#endif >>> + >>> +typedef struct Peer { >>> + int nb_eventfds; >>> + int *eventfds; >>> +} Peer; >>> + >>> +typedef struct EventfdEntry { >>> + PCIDevice *pdev; >>> + int vector; >>> +} EventfdEntry; >>> + >>> +typedef struct IVShmemState { >>> + PCIDevice dev; >>> + uint32_t intrmask; >>> + uint32_t intrstatus; >>> + uint32_t doorbell; >>> + >>> + CharDriverState ** eventfd_chr; >> >> I'd remove the space between '**' and 'eventfd_chr', it's used >> inconsistently. >> >>> + CharDriverState * server_chr; >>> + int ivshmem_mmio_io_addr; >>> + >>> + pcibus_t mmio_addr; >>> + pcibus_t shm_pci_addr; >>> + uint64_t ivshmem_offset; >>> + uint64_t ivshmem_size; /* size of shared memory region */ >>> + int shm_fd; /* shared memory file descriptor */ >>> + >>> + Peer *peers; >>> + int nb_peers; /* how many guests we have space for */ >>> + int max_peer; /* maximum numbered peer */ >>> + >>> + int vm_id; >>> + uint32_t vectors; >>> + uint32_t features; >>> + EventfdEntry *eventfd_table; >>> + >>> + char * shmobj; >>> + char * sizearg; >>> + char * role; >>> +} IVShmemState; >>> + >>> +/* registers for the Inter-VM shared memory device */ >>> +enum ivshmem_registers { >>> + IntrMask = 0, >>> + IntrStatus = 4,
Re: [Qemu-devel] [PATCH v6 5/6] Inter-VM shared memory PCI device
On Sat, Jun 5, 2010 at 3:44 AM, Blue Swirl wrote: > On Fri, Jun 4, 2010 at 9:45 PM, Cam Macdonell wrote: >> Support an inter-vm shared memory device that maps a shared-memory object as >> a >> PCI device in the guest. This patch also supports interrupts between guest >> by >> communicating over a unix domain socket. This patch applies to the qemu-kvm >> repository. >> >> -device ivshmem,size=[,shm=] >> >> Interrupts are supported between multiple VMs by using a shared memory server >> by using a chardev socket. >> >> -device ivshmem,size=[,shm=] >> [,chardev=][,msi=on][,irqfd=on][,vectors=n][,role=peer|master] >> -chardev socket,path=,id= >> >> (shared memory server is qemu.git/contrib/ivshmem-server) >> >> Sample programs and init scripts are in a git repo here: >> >> www.gitorious.org/nahanni >> --- >> Makefile.target | 3 + >> hw/ivshmem.c | 852 >> +++ >> qemu-char.c | 6 + >> qemu-char.h | 3 + >> qemu-doc.texi | 43 +++ >> 5 files changed, 907 insertions(+), 0 deletions(-) >> create mode 100644 hw/ivshmem.c >> >> diff --git a/Makefile.target b/Makefile.target >> index c4ba592..4888308 100644 >> --- a/Makefile.target >> +++ b/Makefile.target >> @@ -202,6 +202,9 @@ obj-$(CONFIG_USB_OHCI) += usb-ohci.o >> obj-y += rtl8139.o >> obj-y += e1000.o >> >> +# Inter-VM PCI shared memory >> +obj-y += ivshmem.o >> + > > Can this be compiled once, simply by moving this to Makefile.objs > instead of Makefile.target? Also, because the code seems to be KVM > specific, it can't be compiled unconditionally but depending on at > least CONFIG_KVM and maybe CONFIG_EVENTFD. The device uses eventfds for signalling, but never creates the eventfds itself as they are passed from a server using SCM_RIGHTS. So, it does not depend on the eventfd API. Its dependence on irqfd/ioeventfd (the only KVM specific bits) are optional via the command-line. A runtime check for KVM is done for the reasons Avi mentioned. > > Why is this KVM specific BTW, Posix SHM is available on many > platforms? What would happen if kvm_set_foobar functions were not > called when KVM is not being used? Is host eventfd support essential? > >> # Hardware support >> obj-i386-y += vga.o >> obj-i386-y += mc146818rtc.o i8259.o pc.o >> diff --git a/hw/ivshmem.c b/hw/ivshmem.c >> new file mode 100644 >> index 000..9057612 >> --- /dev/null >> +++ b/hw/ivshmem.c >> @@ -0,0 +1,852 @@ >> +/* >> + * Inter-VM Shared Memory PCI device. >> + * >> + * Author: >> + * Cam Macdonell >> + * >> + * Based On: cirrus_vga.c >> + * Copyright (c) 2004 Fabrice Bellard >> + * Copyright (c) 2004 Makoto Suzuki (suzu) >> + * >> + * and rtl8139.c >> + * Copyright (c) 2006 Igor Kovalenko >> + * >> + * This code is licensed under the GNU GPL v2. >> + */ >> +#include >> +#include >> +#include >> +#include >> +#include >> +#include "hw.h" >> +#include "console.h" >> +#include "pc.h" >> +#include "pci.h" >> +#include "sysemu.h" >> + >> +#include "msix.h" >> +#include "qemu-kvm.h" >> +#include "libkvm.h" >> + >> +#include >> +#include >> +#include >> +#include >> + >> +#define IVSHMEM_IRQFD 0 >> +#define IVSHMEM_MSI 1 >> + >> +//#define DEBUG_IVSHMEM >> +#ifdef DEBUG_IVSHMEM >> +#define IVSHMEM_DPRINTF(fmt, args...) \ >> + do {printf("IVSHMEM: " fmt, ##args); } while (0) > > Please use __VA_ARGS__. > >> +#else >> +#define IVSHMEM_DPRINTF(fmt, args...) >> +#endif >> + >> +typedef struct Peer { >> + int nb_eventfds; >> + int *eventfds; >> +} Peer; >> + >> +typedef struct EventfdEntry { >> + PCIDevice *pdev; >> + int vector; >> +} EventfdEntry; >> + >> +typedef struct IVShmemState { >> + PCIDevice dev; >> + uint32_t intrmask; >> + uint32_t intrstatus; >> + uint32_t doorbell; >> + >> + CharDriverState ** eventfd_chr; > > I'd remove the space between '**' and 'eventfd_chr', it's used inconsistently. > >> + CharDriverState * server_chr; >> + int ivshmem_mmio_io_addr; >> + >> + pcibus_t mmio_addr; >> + pcibus_t shm_pci_addr; >> + uint64_t ivshmem_offset; >> + uint64_t ivshmem_size; /* size of shared memory region */ >> + int shm_fd; /* shared memory file descriptor */ >> + >> + Peer *peers; >> + int nb_peers; /* how many guests we have space for */ >> + int max_peer; /* maximum numbered peer */ >> + >> + int vm_id; >> + uint32_t vectors; >> + uint32_t features; >> + EventfdEntry *eventfd_table; >> + >> + char * shmobj; >> + char * sizearg; >> + char * role; >> +} IVShmemState; >> + >> +/* registers for the Inter-VM shared memory device */ >> +enum ivshmem_registers { >> + IntrMask = 0, >> + IntrStatus = 4, >> + IVPosition = 8, >> + Doorbell = 12, >> +}; > > IIRC these should be uppercase. I worked from rtl8139 which doesn't have them uppercase. But doing a quick search, I can see most devices do, I'll fix that. > >> + >> +static i
Re: [Qemu-devel] [PATCH v6 5/6] Inter-VM shared memory PCI device
On 06/05/2010 12:44 PM, Blue Swirl wrote: On Fri, Jun 4, 2010 at 9:45 PM, Cam Macdonell wrote: Support an inter-vm shared memory device that maps a shared-memory object as a PCI device in the guest. This patch also supports interrupts between guest by communicating over a unix domain socket. This patch applies to the qemu-kvm repository. -device ivshmem,size=[,shm=] Interrupts are supported between multiple VMs by using a shared memory server by using a chardev socket. -device ivshmem,size=[,shm=] [,chardev=][,msi=on][,irqfd=on][,vectors=n][,role=peer|master] -chardev socket,path=,id= (shared memory server is qemu.git/contrib/ivshmem-server) Sample programs and init scripts are in a git repo here: Why is this KVM specific BTW, Posix SHM is available on many platforms? What would happen if kvm_set_foobar functions were not called when KVM is not being used? Is host eventfd support essential? It's not kvm specific, it's atomic-ops-on-shared-memory-are-visible-as-atomic-ops specific, which is currently only available with kvm. When tcg gains true smp support (and not just against other tcg threads) this can work with tcg as well. I guess that needs a host with at least 32/64 bit CAS for 32/64 bit targets respectively, and double that if the target has DCAS. Not sure how targets with ll/sc can be implemented, especially if there are limits as to what can go in between. -- error compiling committee.c: too many arguments to function
Re: [Qemu-devel] [PATCH v6 5/6] Inter-VM shared memory PCI device
On Fri, Jun 4, 2010 at 9:45 PM, Cam Macdonell wrote: > Support an inter-vm shared memory device that maps a shared-memory object as a > PCI device in the guest. This patch also supports interrupts between guest by > communicating over a unix domain socket. This patch applies to the qemu-kvm > repository. > > -device ivshmem,size=[,shm=] > > Interrupts are supported between multiple VMs by using a shared memory server > by using a chardev socket. > > -device ivshmem,size=[,shm=] > [,chardev=][,msi=on][,irqfd=on][,vectors=n][,role=peer|master] > -chardev socket,path=,id= > > (shared memory server is qemu.git/contrib/ivshmem-server) > > Sample programs and init scripts are in a git repo here: > > www.gitorious.org/nahanni > --- > Makefile.target | 3 + > hw/ivshmem.c | 852 > +++ > qemu-char.c | 6 + > qemu-char.h | 3 + > qemu-doc.texi | 43 +++ > 5 files changed, 907 insertions(+), 0 deletions(-) > create mode 100644 hw/ivshmem.c > > diff --git a/Makefile.target b/Makefile.target > index c4ba592..4888308 100644 > --- a/Makefile.target > +++ b/Makefile.target > @@ -202,6 +202,9 @@ obj-$(CONFIG_USB_OHCI) += usb-ohci.o > obj-y += rtl8139.o > obj-y += e1000.o > > +# Inter-VM PCI shared memory > +obj-y += ivshmem.o > + Can this be compiled once, simply by moving this to Makefile.objs instead of Makefile.target? Also, because the code seems to be KVM specific, it can't be compiled unconditionally but depending on at least CONFIG_KVM and maybe CONFIG_EVENTFD. Why is this KVM specific BTW, Posix SHM is available on many platforms? What would happen if kvm_set_foobar functions were not called when KVM is not being used? Is host eventfd support essential? > # Hardware support > obj-i386-y += vga.o > obj-i386-y += mc146818rtc.o i8259.o pc.o > diff --git a/hw/ivshmem.c b/hw/ivshmem.c > new file mode 100644 > index 000..9057612 > --- /dev/null > +++ b/hw/ivshmem.c > @@ -0,0 +1,852 @@ > +/* > + * Inter-VM Shared Memory PCI device. > + * > + * Author: > + * Cam Macdonell > + * > + * Based On: cirrus_vga.c > + * Copyright (c) 2004 Fabrice Bellard > + * Copyright (c) 2004 Makoto Suzuki (suzu) > + * > + * and rtl8139.c > + * Copyright (c) 2006 Igor Kovalenko > + * > + * This code is licensed under the GNU GPL v2. > + */ > +#include > +#include > +#include > +#include > +#include > +#include "hw.h" > +#include "console.h" > +#include "pc.h" > +#include "pci.h" > +#include "sysemu.h" > + > +#include "msix.h" > +#include "qemu-kvm.h" > +#include "libkvm.h" > + > +#include > +#include > +#include > +#include > + > +#define IVSHMEM_IRQFD 0 > +#define IVSHMEM_MSI 1 > + > +//#define DEBUG_IVSHMEM > +#ifdef DEBUG_IVSHMEM > +#define IVSHMEM_DPRINTF(fmt, args...) \ > + do {printf("IVSHMEM: " fmt, ##args); } while (0) Please use __VA_ARGS__. > +#else > +#define IVSHMEM_DPRINTF(fmt, args...) > +#endif > + > +typedef struct Peer { > + int nb_eventfds; > + int *eventfds; > +} Peer; > + > +typedef struct EventfdEntry { > + PCIDevice *pdev; > + int vector; > +} EventfdEntry; > + > +typedef struct IVShmemState { > + PCIDevice dev; > + uint32_t intrmask; > + uint32_t intrstatus; > + uint32_t doorbell; > + > + CharDriverState ** eventfd_chr; I'd remove the space between '**' and 'eventfd_chr', it's used inconsistently. > + CharDriverState * server_chr; > + int ivshmem_mmio_io_addr; > + > + pcibus_t mmio_addr; > + pcibus_t shm_pci_addr; > + uint64_t ivshmem_offset; > + uint64_t ivshmem_size; /* size of shared memory region */ > + int shm_fd; /* shared memory file descriptor */ > + > + Peer *peers; > + int nb_peers; /* how many guests we have space for */ > + int max_peer; /* maximum numbered peer */ > + > + int vm_id; > + uint32_t vectors; > + uint32_t features; > + EventfdEntry *eventfd_table; > + > + char * shmobj; > + char * sizearg; > + char * role; > +} IVShmemState; > + > +/* registers for the Inter-VM shared memory device */ > +enum ivshmem_registers { > + IntrMask = 0, > + IntrStatus = 4, > + IVPosition = 8, > + Doorbell = 12, > +}; IIRC these should be uppercase. > + > +static inline uint32_t ivshmem_has_feature(IVShmemState *ivs, int feature) { > + return (ivs->features & (1 << feature)); > +} Since this is the first version, do we need any features at this point, can't we expect that all features are available now? Why does the user need to specify the features? To avoid a negative shift, I'd make 'feature' unsigned. > + > +static inline bool is_power_of_two(uint64_t x) { > + return (x & (x - 1)) == 0; > +} > + > +static void ivshmem_map(PCIDevice *pci_dev, int region_num, > + pcibus_t addr, pcibus_t size, int type) > +{ > + IVShmemState *s = DO_UPCAST(IVShmemState, dev, pci_dev); > + > + s->shm_pci_addr = addr; > + > + if (s-
[Qemu-devel] [PATCH v6 5/6] Inter-VM shared memory PCI device
Support an inter-vm shared memory device that maps a shared-memory object as a PCI device in the guest. This patch also supports interrupts between guest by communicating over a unix domain socket. This patch applies to the qemu-kvm repository. -device ivshmem,size=[,shm=] Interrupts are supported between multiple VMs by using a shared memory server by using a chardev socket. -device ivshmem,size=[,shm=] [,chardev=][,msi=on][,irqfd=on][,vectors=n][,role=peer|master] -chardev socket,path=,id= (shared memory server is qemu.git/contrib/ivshmem-server) Sample programs and init scripts are in a git repo here: www.gitorious.org/nahanni --- Makefile.target |3 + hw/ivshmem.c| 852 +++ qemu-char.c |6 + qemu-char.h |3 + qemu-doc.texi | 43 +++ 5 files changed, 907 insertions(+), 0 deletions(-) create mode 100644 hw/ivshmem.c diff --git a/Makefile.target b/Makefile.target index c4ba592..4888308 100644 --- a/Makefile.target +++ b/Makefile.target @@ -202,6 +202,9 @@ obj-$(CONFIG_USB_OHCI) += usb-ohci.o obj-y += rtl8139.o obj-y += e1000.o +# Inter-VM PCI shared memory +obj-y += ivshmem.o + # Hardware support obj-i386-y += vga.o obj-i386-y += mc146818rtc.o i8259.o pc.o diff --git a/hw/ivshmem.c b/hw/ivshmem.c new file mode 100644 index 000..9057612 --- /dev/null +++ b/hw/ivshmem.c @@ -0,0 +1,852 @@ +/* + * Inter-VM Shared Memory PCI device. + * + * Author: + * Cam Macdonell + * + * Based On: cirrus_vga.c + * Copyright (c) 2004 Fabrice Bellard + * Copyright (c) 2004 Makoto Suzuki (suzu) + * + * and rtl8139.c + * Copyright (c) 2006 Igor Kovalenko + * + * This code is licensed under the GNU GPL v2. + */ +#include +#include +#include +#include +#include +#include "hw.h" +#include "console.h" +#include "pc.h" +#include "pci.h" +#include "sysemu.h" + +#include "msix.h" +#include "qemu-kvm.h" +#include "libkvm.h" + +#include +#include +#include +#include + +#define IVSHMEM_IRQFD 0 +#define IVSHMEM_MSI 1 + +//#define DEBUG_IVSHMEM +#ifdef DEBUG_IVSHMEM +#define IVSHMEM_DPRINTF(fmt, args...)\ +do {printf("IVSHMEM: " fmt, ##args); } while (0) +#else +#define IVSHMEM_DPRINTF(fmt, args...) +#endif + +typedef struct Peer { +int nb_eventfds; +int *eventfds; +} Peer; + +typedef struct EventfdEntry { +PCIDevice *pdev; +int vector; +} EventfdEntry; + +typedef struct IVShmemState { +PCIDevice dev; +uint32_t intrmask; +uint32_t intrstatus; +uint32_t doorbell; + +CharDriverState ** eventfd_chr; +CharDriverState * server_chr; +int ivshmem_mmio_io_addr; + +pcibus_t mmio_addr; +pcibus_t shm_pci_addr; +uint64_t ivshmem_offset; +uint64_t ivshmem_size; /* size of shared memory region */ +int shm_fd; /* shared memory file descriptor */ + +Peer *peers; +int nb_peers; /* how many guests we have space for */ +int max_peer; /* maximum numbered peer */ + +int vm_id; +uint32_t vectors; +uint32_t features; +EventfdEntry *eventfd_table; + +char * shmobj; +char * sizearg; +char * role; +} IVShmemState; + +/* registers for the Inter-VM shared memory device */ +enum ivshmem_registers { +IntrMask = 0, +IntrStatus = 4, +IVPosition = 8, +Doorbell = 12, +}; + +static inline uint32_t ivshmem_has_feature(IVShmemState *ivs, int feature) { +return (ivs->features & (1 << feature)); +} + +static inline bool is_power_of_two(uint64_t x) { +return (x & (x - 1)) == 0; +} + +static void ivshmem_map(PCIDevice *pci_dev, int region_num, +pcibus_t addr, pcibus_t size, int type) +{ +IVShmemState *s = DO_UPCAST(IVShmemState, dev, pci_dev); + +s->shm_pci_addr = addr; + +if (s->ivshmem_offset > 0) { +cpu_register_physical_memory(s->shm_pci_addr, s->ivshmem_size, +s->ivshmem_offset); +if (s->role && strncmp(s->role, "peer", 4) == 0) { +IVSHMEM_DPRINTF("marking pages no migrate\n"); +cpu_mark_pages_no_migrate(s->ivshmem_offset, s->ivshmem_size); +} +} + +IVSHMEM_DPRINTF("guest pci addr = %u, guest h/w addr = %u, size = %u\n", +(uint32_t)addr, (uint32_t)s->ivshmem_offset, (uint32_t)size); + +} + +/* accessing registers - based on rtl8139 */ +static void ivshmem_update_irq(IVShmemState *s, int val) +{ +int isr; +isr = (s->intrstatus & s->intrmask) & 0x; + +/* don't print ISR resets */ +if (isr) { +IVSHMEM_DPRINTF("Set IRQ to %d (%04x %04x)\n", + isr ? 1 : 0, s->intrstatus, s->intrmask); +} + +qemu_set_irq(s->dev.irq[0], (isr != 0)); +} + +static void ivshmem_IntrMask_write(IVShmemState *s, uint32_t val) +{ +IVSHMEM_DPRINTF("IntrMask write(w) val = 0x%04x\n", val); + +s->intrmask = val; + +ivshmem_update_irq(s, val); +} + +static uint32_t ivshm