On 04/21/2010 08:53 PM, Cam Macdonell wrote:
Support an inter-vm shared memory device that maps a shared-memory object as a
PCI device in the guest. This patch also supports interrupts between guest by
communicating over a unix domain socket. This patch applies to the qemu-kvm
repository.
-device ivshmem,size=<size in format accepted by -m>[,shm=<shm name>]
Interrupts are supported between multiple VMs by using a shared memory server
by using a chardev socket.
-device ivshmem,size=<size in format accepted by -m>[,shm=<shm name>]
[,chardev=<id>][,msi=on][,irqfd=on][,vectors=n]
-chardev socket,path=<path>,id=<id>
(shared memory server is qemu.git/contrib/ivshmem-server)
Sample programs and init scripts are in a git repo here:
+typedef struct EventfdEntry {
+ PCIDevice *pdev;
+ int vector;
+} EventfdEntry;
+
+typedef struct IVShmemState {
+ PCIDevice dev;
+ uint32_t intrmask;
+ uint32_t intrstatus;
+ uint32_t doorbell;
+
+ CharDriverState * chr;
+ CharDriverState ** eventfd_chr;
+ int ivshmem_mmio_io_addr;
+
+ pcibus_t mmio_addr;
+ unsigned long ivshmem_offset;
+ uint64_t ivshmem_size; /* size of shared memory region */
+ int shm_fd; /* shared memory file descriptor */
+
+ int nr_allocated_vms;
+ /* array of eventfds for each guest */
+ int ** eventfds;
+ /* keep track of # of eventfds for each guest*/
+ int * eventfds_posn_count;
More readable:
typedef struct Peer {
int nb_eventfds;
int *eventfds;
} Peer;
int nb_peers;
Peer *peers;
Does eventfd_chr need to be there as well?
+
+ int nr_alloc_guests;
+ int vm_id;
+ int num_eventfds;
+ uint32_t vectors;
+ uint32_t features;
+ EventfdEntry *eventfd_table;
+
+ char * shmobj;
+ char * sizearg;
Does this need to be part of the state?
+} IVShmemState;
+
+/* registers for the Inter-VM shared memory device */
+enum ivshmem_registers {
+ IntrMask = 0,
+ IntrStatus = 4,
+ IVPosition = 8,
+ Doorbell = 12,
+};
+
+static inline uint32_t ivshmem_has_feature(IVShmemState *ivs, int feature) {
+ return (ivs->features& (1<< feature));
+}
+
+static inline int is_power_of_two(int x) {
+ return (x& (x-1)) == 0;
+}
argument needs to be uint64_t to avoid overflow with large BARs. Return
type can be bool.
+static void ivshmem_io_writel(void *opaque, uint8_t addr, uint32_t val)
+{
+ IVShmemState *s = opaque;
+
+ u_int64_t write_one = 1;
+ u_int16_t dest = val>> 16;
+ u_int16_t vector = val& 0xff;
+
+ addr&= 0xfe;
Why 0xfe? Can understand 0xfc or 0xff.
+
+ switch (addr)
+ {
+ case IntrMask:
+ ivshmem_IntrMask_write(s, val);
+ break;
+
+ case IntrStatus:
+ ivshmem_IntrStatus_write(s, val);
+ break;
+
+ case Doorbell:
+ /* check doorbell range */
+ if ((vector>= 0)&& (vector< s->eventfds_posn_count[dest])) {
What if dest is too big? We overflow s->eventfds_posn_count.
+
+static void close_guest_eventfds(IVShmemState *s, int posn)
+{
+ int i, guest_curr_max;
+
+ guest_curr_max = s->eventfds_posn_count[posn];
+
+ for (i = 0; i< guest_curr_max; i++)
+ close(s->eventfds[posn][i]);
+
+ free(s->eventfds[posn]);
qemu_free().
+/* this function increase the dynamic storage need to store data about other
+ * guests */
+static void increase_dynamic_storage(IVShmemState *s, int new_min_size) {
+
+ int j, old_nr_alloc;
+
+ old_nr_alloc = s->nr_alloc_guests;
+
+ while (s->nr_alloc_guests< new_min_size)
+ s->nr_alloc_guests = s->nr_alloc_guests * 2;
+
+ IVSHMEM_DPRINTF("bumping storage to %d guests\n", s->nr_alloc_guests);
+ s->eventfds = qemu_realloc(s->eventfds, s->nr_alloc_guests *
+ sizeof(int *));
+ s->eventfds_posn_count = qemu_realloc(s->eventfds_posn_count,
+ s->nr_alloc_guests *
+ sizeof(int));
+ s->eventfd_table = qemu_realloc(s->eventfd_table, s->nr_alloc_guests *
+ sizeof(EventfdEntry));
+
+ if ((s->eventfds == NULL) || (s->eventfds_posn_count == NULL) ||
+ (s->eventfd_table == NULL)) {
+ fprintf(stderr, "Allocation error - exiting\n");
+ exit(1);
+ }
+
+ if (!ivshmem_has_feature(s, IVSHMEM_IRQFD)) {
+ s->eventfd_chr = (CharDriverState **)qemu_realloc(s->eventfd_chr,
+ s->nr_alloc_guests * sizeof(void *));
+ if (s->eventfd_chr == NULL) {
+ fprintf(stderr, "Allocation error - exiting\n");
+ exit(1);
+ }
+ }
+
+ /* zero out new pointers */
+ for (j = old_nr_alloc; j< s->nr_alloc_guests; j++) {
+ s->eventfds[j] = NULL;
eventfds_posn_count and eventfd_table want zeroing as well.
+ }
+}
+
+static void ivshmem_read(void *opaque, const uint8_t * buf, int flags)
+{
+ IVShmemState *s = opaque;
+ int incoming_fd, tmp_fd;
+ int guest_curr_max;
+ long incoming_posn;
+
+ memcpy(&incoming_posn, buf, sizeof(long));
+ /* pick off s->chr->msgfd and store it, posn should accompany msg */
+ tmp_fd = qemu_chr_get_msgfd(s->chr);
+ IVSHMEM_DPRINTF("posn is %ld, fd is %d\n", incoming_posn, tmp_fd);
+
+ /* make sure we have enough space for this guest */
+ if (incoming_posn>= s->nr_alloc_guests) {
+ increase_dynamic_storage(s, incoming_posn);
+ }
+
+ if (tmp_fd == -1) {
+ /* if posn is positive and unseen before then this is our posn*/
+ if ((incoming_posn>= 0)&& (s->eventfds[incoming_posn] == NULL)) {
+ /* receive our posn */
+ s->vm_id = incoming_posn;
+ return;
+ } else {
+ /* otherwise an fd == -1 means an existing guest has gone away */
+ IVSHMEM_DPRINTF("posn %ld has gone away\n", incoming_posn);
+ close_guest_eventfds(s, incoming_posn);
+ return;
+ }
+ }
+
+ /* because of the implementation of get_msgfd, we need a dup */
+ incoming_fd = dup(tmp_fd);
Error check.
+
+ /* if the position is -1, then it's shared memory region fd */
+ if (incoming_posn == -1) {
+
+ s->num_eventfds = 0;
+
+ if (check_shm_size(s, incoming_fd) == -1) {
+ exit(-1);
+ }
+
+ /* creating a BAR in qemu_chr callback may be crazy */
+ create_shared_memory_BAR(s, incoming_fd);
It probably is... why can't you create it during initialization?
+
+ return;
+ }
+
+ /* each guest has an array of eventfds, and we keep track of how many
+ * guests for each VM */
+ guest_curr_max = s->eventfds_posn_count[incoming_posn];
+ if (guest_curr_max == 0) {
+ /* one eventfd per MSI vector */
+ s->eventfds[incoming_posn] = (int *) qemu_malloc(s->vectors *
+ sizeof(int));
+ }
+
+ /* this is an eventfd for a particular guest VM */
+ IVSHMEM_DPRINTF("eventfds[%ld][%d] = %d\n", incoming_posn, guest_curr_max,
+ incoming_fd);
+ s->eventfds[incoming_posn][guest_curr_max] = incoming_fd;
+
+ /* increment count for particular guest */
+ s->eventfds_posn_count[incoming_posn]++;
Not sure I follow exactly, but perhaps this needs to be
s->eventfds_posn_count[incoming_posn] = guest_curr_max + 1;
Oh, it is.
+
+ /* allocate/initialize space for interrupt handling */
+ s->eventfds = qemu_mallocz(s->nr_alloc_guests * sizeof(int *));
+ s->eventfd_table = qemu_mallocz(s->vectors * sizeof(EventfdEntry));
+ s->eventfds_posn_count = qemu_mallocz(s->nr_alloc_guests *
sizeof(int));
+
+ pci_conf[PCI_INTERRUPT_PIN] = 1; /* we are going to support interrupts
*/
This is done by the guest BIOS.
--
error compiling committee.c: too many arguments to function
--
To unsubscribe from this list: send the line "unsubscribe kvm" in
the body of a message to majord...@vger.kernel.org
More majordomo info at http://vger.kernel.org/majordomo-info.html