This patch introduces a DMA API and plumbs support through the DMA layer. We use a mostly opaque structure, IOVector to represent a scatter/gather list of physical memory. Associated with each IOVector is a read/write function and an opaque pointer. This allows arbitrary transformation/mapping of the data while providing an easy mechanism to short-cut the zero-copy case in the block/net backends.
Signed-off-by: Anthony Liguori <[EMAIL PROTECTED]> diff --git a/Makefile b/Makefile index adb50a8..a8df278 100644 --- a/Makefile +++ b/Makefile @@ -34,7 +34,7 @@ recurse-all: $(patsubst %,subdir-%, $(TARGET_DIRS)) ####################################################################### # BLOCK_OBJS is code used by both qemu system emulation and qemu-img -BLOCK_OBJS=cutils.o +BLOCK_OBJS=cutils.o iovector.o BLOCK_OBJS+=block-cow.o block-qcow.o aes.o block-vmdk.o block-cloop.o BLOCK_OBJS+=block-dmg.o block-bochs.o block-vpc.o block-vvfat.o BLOCK_OBJS+=block-qcow2.o block-parallels.o diff --git a/block.c b/block.c index 0730954..eb610e0 100644 --- a/block.c +++ b/block.c @@ -570,6 +570,55 @@ int bdrv_write(BlockDriverState *bs, int64_t sector_num, } } +int bdrv_readv(BlockDriverState *bs, int64_t sector_num, + IOVector *iovec) +{ + char *buffer; + size_t size; + int ret; + + /* it's possible that we'll see a OOM condition here if the transfer size + * is sufficiently large. + */ + size = iovector_size(iovec); + buffer = qemu_malloc(size); + if (buffer == NULL) + return -ENOMEM; + + ret = bdrv_read(bs, sector_num, buffer, size / 512); + + if (ret >= 0) + memcpy_to_iovector(iovec, 0, size, buffer); + + qemu_free(buffer); + + return ret; +} + +int bdrv_writev(BlockDriverState *bs, int64_t sector_num, + const IOVector *iovec) +{ + char *buffer; + size_t size; + int ret; + + /* it's possible that we'll see a OOM condition here if the transfer size + * is sufficiently large. + */ + size = iovector_size(iovec); + buffer = qemu_malloc(size); + if (buffer == NULL) + return -ENOMEM; + + memcpy_from_iovector(buffer, 0, size, iovec); + + ret = bdrv_write(bs, sector_num, buffer, size / 512); + + qemu_free(buffer); + + return ret; +} + static int bdrv_pread_em(BlockDriverState *bs, int64_t offset, uint8_t *buf, int count1) { diff --git a/block.h b/block.h index b730505..9d30db2 100644 --- a/block.h +++ b/block.h @@ -1,6 +1,8 @@ #ifndef BLOCK_H #define BLOCK_H +#include "iovector.h" + /* block.c */ typedef struct BlockDriver BlockDriver; @@ -67,6 +69,9 @@ int bdrv_read(BlockDriverState *bs, int64_t sector_num, uint8_t *buf, int nb_sectors); int bdrv_write(BlockDriverState *bs, int64_t sector_num, const uint8_t *buf, int nb_sectors); +int bdrv_readv(BlockDriverState *bs, int64_t sector_num, IOVector *iovec); +int bdrv_writev(BlockDriverState *bs, int64_t sector_num, + const IOVector *iovec); int bdrv_pread(BlockDriverState *bs, int64_t offset, void *buf, int count); int bdrv_pwrite(BlockDriverState *bs, int64_t offset, diff --git a/hw/pci.c b/hw/pci.c index bc55989..3282478 100644 --- a/hw/pci.c +++ b/hw/pci.c @@ -145,6 +145,18 @@ int pci_device_load(PCIDevice *s, QEMUFile *f) return 0; } +void pci_device_dma_write(PCIDevice *s, target_phys_addr_t addr, + const void *buffer, size_t len) +{ + cpu_physical_memory_write(addr, buffer, len); +} + +void pci_device_dma_read(PCIDevice *s, target_phys_addr_t addr, + void *buffer, size_t len) +{ + cpu_physical_memory_read(addr, buffer, len); +} + /* -1 for devfn means auto assign */ PCIDevice *pci_register_device(PCIBus *bus, const char *name, int instance_size, int devfn, diff --git a/hw/pci.h b/hw/pci.h index e870987..c885cc5 100644 --- a/hw/pci.h +++ b/hw/pci.h @@ -81,6 +81,12 @@ void pci_default_write_config(PCIDevice *d, void pci_device_save(PCIDevice *s, QEMUFile *f); int pci_device_load(PCIDevice *s, QEMUFile *f); +void pci_device_dma_write(PCIDevice *s, target_phys_addr_t addr, + const void *buffer, size_t len); + +void pci_device_dma_read(PCIDevice *s, target_phys_addr_t addr, + void *buffer, size_t len); + typedef void (*pci_set_irq_fn)(qemu_irq *pic, int irq_num, int level); typedef int (*pci_map_irq_fn)(PCIDevice *pci_dev, int irq_num); PCIBus *pci_register_bus(pci_set_irq_fn set_irq, pci_map_irq_fn map_irq, diff --git a/iovector.c b/iovector.c new file mode 100644 index 0000000..056a86e --- /dev/null +++ b/iovector.c @@ -0,0 +1,144 @@ +/* + * IO Vectors + * + * Copyright IBM, Corp. 2008 + * + * Authors: + * Anthony Liguori <[EMAIL PROTECTED]> + * + * This work is licensed under the terms of the GNU GPL, version 2. See + * the COPYING file in the top-level directory. + * + */ + +#include "qemu-common.h" +#include "iovector.h" + +static size_t iovector_rw(void *buffer, size_t size, IOVector *iov, int read) +{ + uint8_t *ptr = buffer; + size_t offset = 0; + int i; + + for (i = 0; i < iov->num; i++) { + size_t len; + + len = MIN(iov->sg[i].len, size - offset); + + if (read) + iov->read(iov->opaque, iov->sg[i].base, ptr + offset, len); + else + iov->write(iov->opaque, iov->sg[i].base, ptr + offset, len); + + offset += len; + } + + return offset; +} + +size_t memcpy_from_iovector(void *buffer, size_t offset, size_t size, + const IOVector *iov) +{ + IOVector *sg; + size_t len; + + if (offset) + sg = iovector_trim(iov, offset, size); + else + sg = (IOVector *)iov; + + len = iovector_rw(buffer, size, sg, 1); + + if (offset) + iovector_free(sg); + + return len; +} + +size_t memcpy_to_iovector(IOVector *iovec, size_t offset, size_t size, + const void *buffer) +{ + IOVector *sg; + size_t len; + + if (offset) + sg = iovector_trim(iovec, offset, size); + else + sg = iovec; + + len = iovector_rw((void *)buffer, size, sg, 0); + + if (offset) + iovector_free(sg); + + return len; +} + +IOVector *iovector_new(int num, DMAReadHandler *read, DMAWriteHandler *write, + void *opaque) +{ + IOVector *ret; + + ret = qemu_malloc(sizeof(IOVector) + sizeof(IOVectorElement) * num); + if (ret == NULL) + return NULL; + + ret->num = num; + ret->read = read; + ret->write = write; + ret->opaque = opaque; + + return ret; +} + +void iovector_free(IOVector *iov) +{ + qemu_free(iov); +} + +IOVector *iovector_trim(const IOVector *iov, size_t offset, size_t size) +{ + IOVector *ret; + size_t off, total_size; + int i; + + ret = iovector_new(iov->num, iov->read, iov->write, iov->opaque); + if (ret == NULL) + return NULL; + + total_size = 0; + ret->num = 0; + + off = 0; + for (i = 0; i < iov->num; i++) { + if (off >= offset || offset < (off + iov->sg[i].len)) { + size_t fudge = 0; + if (off < offset) + fudge = offset - off; + + ret->sg[ret->num].base = iov->sg[i].base + fudge; + ret->sg[ret->num].len = MIN(iov->sg[i].len - fudge, + size - total_size); + total_size += ret->sg[ret->num].len; + ret->num++; + + if (total_size == size) + break; + } + + off += iov->sg[i].len; + } + + return ret; +} + +size_t iovector_size(const IOVector *iov) +{ + size_t size = 0; + int i; + + for (i = 0; i < iov->num; i++) + size += iov->sg[i].len; + + return size; +} diff --git a/iovector.h b/iovector.h new file mode 100644 index 0000000..f40f0a0 --- /dev/null +++ b/iovector.h @@ -0,0 +1,63 @@ +/* + * IO Vectors + * + * Copyright IBM, Corp. 2008 + * + * Authors: + * Anthony Liguori <[EMAIL PROTECTED]> + * + * This work is licensed under the terms of the GNU GPL, version 2. See + * the COPYING file in the top-level directory. + * + */ + +#ifndef _QEMU_IOVECTOR_H +#define _QEMU_IOVECTOR_H + +typedef struct IOVectorElement IOVectorElement; + +typedef void (DMAReadHandler)(void *opaque, uint64_t addr, + void *data, size_t len); + +typedef void (DMAWriteHandler)(void *opaque, uint64_t addr, + const void *data, size_t len); + +typedef struct IOVector +{ + DMAWriteHandler *write; + DMAReadHandler *read; + void *opaque; + + int num; + struct IOVectorElement { + uint64_t base; + size_t len; + } sg[0]; +} IOVector; + +/* Copy from an IOVector to a flat buffer. Be careful to pass in a fully + * translated IOVector here. */ +size_t memcpy_from_iovector(void *buffer, size_t offset, size_t size, + const IOVector *iov); + +/* Copy to an IOVector from a flat buffer. Be careful to pass in a fully + * translated IOVector here. */ +size_t memcpy_to_iovector(IOVector *iovec, size_t offset, size_t size, + const void *buffer); + +/* Return a new IOVector that's a subset of the passed in IOVector. It should + * be freed with iovector_free when you are done with it. */ +IOVector *iovector_trim(const IOVector *iov, size_t offset, size_t size); + +/* Returns the size of an IOVector in bytes */ +size_t iovector_size(const IOVector *iov); + +/* Returns a new IOVector with num elements. iov->num will be set to num on + * return */ +IOVector *iovector_new(int num, DMAReadHandler *read, DMAWriteHandler *write, + void *opaque); + +/* Frees an IOVector */ +void iovector_free(IOVector *iov); + +#endif diff --git a/net.h b/net.h index 2dfff8d..0b3a155 100644 --- a/net.h +++ b/net.h @@ -1,6 +1,8 @@ #ifndef QEMU_NET_H #define QEMU_NET_H +#include "iovector.h" + /* VLANs support */ typedef struct VLANClientState VLANClientState; @@ -30,6 +32,7 @@ VLANClientState *qemu_new_vlan_client(VLANState *vlan, void *opaque); int qemu_can_send_packet(VLANClientState *vc); void qemu_send_packet(VLANClientState *vc, const uint8_t *buf, int size); +void qemu_sendv_packet(VLANClientState *vc, const IOVector *iovec); void qemu_handler_true(void *opaque); void do_info_network(void); diff --git a/vl.c b/vl.c index 318eb35..821c05d 100644 --- a/vl.c +++ b/vl.c @@ -3731,6 +3731,22 @@ void qemu_send_packet(VLANClientState *vc1, const uint8_t *buf, int size) } } +void qemu_sendv_packet(VLANClientState *vc, const IOVector *iovec) +{ + size_t size; + uint8_t *data; + + size = iovector_size(iovec); + data = qemu_malloc(size); + if (data == NULL) + return; + + memcpy_from_iovector(data, 0, size, iovec); + qemu_send_packet(vc, data, size); + + qemu_free(data); +} + #if defined(CONFIG_SLIRP) /* slirp network adapter */ ------------------------------------------------------------------------- This SF.net email is sponsored by the 2008 JavaOne(SM) Conference Don't miss this year's exciting event. There's still time to save $100. Use priority code J8TL2D2. http://ad.doubleclick.net/clk;198757673;13503038;p?http://java.sun.com/javaone _______________________________________________ kvm-devel mailing list kvm-devel@lists.sourceforge.net https://lists.sourceforge.net/lists/listinfo/kvm-devel