Amit,
Below is the patch for PCI passthrough tree, it enables a guest to access
a device's
memory mapped I/O regions directly, without requiring the host to trap and
emulate every MMIO access.
This patch requires only userspace changes and it is relaying on the
kernel patch by Anthony: "Handle vma regions with no backing page". Note
that this patch requires CONFIG_NUMA to be set. It does require a change
to the VT-d that Allen sent a while ago, to avoid mapping of memory slots
with no backing page.
This patch was tested with the pci-passthrough VT-d using an e1000 NIC.
Regards,
Ben
From 8fe13bcea014a3b896a79fca5d15ddd32050694c Mon Sep 17 00:00:00 2001
From: Ben-Ami Yassour <[EMAIL PROTECTED]>
Date: Tue, 3 Jun 2008 12:34:51 +0300
Subject: [PATCH] KVM: PCIPT: direct mmio
This patch for PCI passthrough devices enables a guest to access a
device's memory mapped I/O regions directly, without requiring the host to trap
and emulate every MMIO access.
Signed-off-by: Ben-Ami Yassour <[EMAIL PROTECTED]>
Signed-off-by: Muli Ben-Yehuda <[EMAIL PROTECTED]>
---
libkvm/libkvm.c | 24 ++++++++----
qemu/hw/pci-passthrough.c | 89 +++++++++++----------------------------------
qemu/hw/pci-passthrough.h | 2 +
3 files changed, 40 insertions(+), 75 deletions(-)
diff --git a/libkvm/libkvm.c b/libkvm/libkvm.c
index d1e95a4..ce062cb 100644
--- a/libkvm/libkvm.c
+++ b/libkvm/libkvm.c
@@ -400,7 +400,7 @@ void *kvm_create_userspace_phys_mem(kvm_context_t kvm,
unsigned long phys_start,
{
int r;
int prot = PROT_READ;
- void *ptr;
+ void *ptr = NULL;
struct kvm_userspace_memory_region memory = {
.memory_size = len,
.guest_phys_addr = phys_start,
@@ -410,16 +410,24 @@ void *kvm_create_userspace_phys_mem(kvm_context_t kvm,
unsigned long phys_start,
if (writable)
prot |= PROT_WRITE;
- ptr = mmap(NULL, len, prot, MAP_ANONYMOUS | MAP_SHARED, -1, 0);
- if (ptr == MAP_FAILED) {
- fprintf(stderr, "create_userspace_phys_mem: %s",
strerror(errno));
- return 0;
- }
+ if (len > 0) {
+ ptr = mmap(NULL, len, prot, MAP_ANONYMOUS | MAP_SHARED, -1, 0);
+ if (ptr == MAP_FAILED) {
+ fprintf(stderr, "create_userspace_phys_mem: %s",
+ strerror(errno));
+ return 0;
+ }
- memset(ptr, 0, len);
+ memset(ptr, 0, len);
+ }
memory.userspace_addr = (unsigned long)ptr;
- memory.slot = get_free_slot(kvm);
+
+ if (len > 0)
+ memory.slot = get_free_slot(kvm);
+ else
+ memory.slot = get_slot(phys_start);
+
r = ioctl(kvm->vm_fd, KVM_SET_USER_MEMORY_REGION, &memory);
if (r == -1) {
fprintf(stderr, "create_userspace_phys_mem: %s",
strerror(errno));
diff --git a/qemu/hw/pci-passthrough.c b/qemu/hw/pci-passthrough.c
index 62953ae..1a3e50c 100644
--- a/qemu/hw/pci-passthrough.c
+++ b/qemu/hw/pci-passthrough.c
@@ -25,18 +25,6 @@ typedef __u64 resource_size_t;
extern kvm_context_t kvm_context;
extern FILE *logfile;
-CPUReadMemoryFunc *pt_mmio_read_cb[3] = {
- pt_mmio_readb,
- pt_mmio_readw,
- pt_mmio_readl
-};
-
-CPUWriteMemoryFunc *pt_mmio_write_cb[3] = {
- pt_mmio_writeb,
- pt_mmio_writew,
- pt_mmio_writel
-};
-
//#define PT_DEBUG
#ifdef PT_DEBUG
@@ -45,47 +33,6 @@ CPUWriteMemoryFunc *pt_mmio_write_cb[3] = {
#define DEBUG(fmt, args...)
#endif
-#define pt_mmio_write(suffix, type) \
-void pt_mmio_write##suffix(void *opaque, target_phys_addr_t e_phys, \
- uint32_t value) \
-{ \
- pt_region_t *r_access = (pt_region_t *)opaque; \
- void *r_virt = (u8 *)r_access->r_virtbase + \
- (e_phys - r_access->e_physbase); \
- if (r_access->debug & PT_DEBUG_MMIO) { \
- fprintf(logfile, "pt_mmio_write" #suffix \
- ": e_physbase=%p e_phys=%p r_virt=%p value=%08x\n", \
- (void *)r_access->e_physbase, (void *)e_phys, \
- r_virt, value); \
- } \
- *(type *)r_virt = (type)value; \
-}
-
-pt_mmio_write(b, u8)
-pt_mmio_write(w, u16)
-pt_mmio_write(l, u32)
-
-#define pt_mmio_read(suffix, type) \
-uint32_t pt_mmio_read##suffix(void *opaque, target_phys_addr_t e_phys) \
-{ \
- pt_region_t *r_access = (pt_region_t *)opaque; \
- void *r_virt = (u8 *)r_access->r_virtbase + \
- (e_phys - r_access->e_physbase); \
- uint32_t value = (u32) (*(type *) r_virt); \
- if (r_access->debug & PT_DEBUG_MMIO) { \
- fprintf(logfile, \
- "pt_mmio_read" #suffix ": e_physbase=%p " \
- "e_phys=%p r_virt=%p value=%08x\n", \
- (void *)r_access->e_physbase, \
- (void *)e_phys, r_virt, value); \
- } \
- return value; \
-}
-
-pt_mmio_read(b, u8)
-pt_mmio_read(w, u16)
-pt_mmio_read(l, u32)
-
#define pt_ioport_write(suffix)
\
void pt_ioport_write##suffix(void *opaque, uint32_t addr, uint32_t value) \
{ \
@@ -127,22 +74,33 @@ pt_ioport_read(b)
pt_ioport_read(w)
pt_ioport_read(l)
-static void pt_iomem_map(PCIDevice * d, int region_num,
- uint32_t e_phys, uint32_t e_size, int type)
+void pt_iomem_map(PCIDevice * pci_dev, int region_num, uint32_t e_phys,
+ uint32_t e_size, int type)
{
- pt_dev_t *r_dev = (pt_dev_t *) d;
-
- r_dev->v_addrs[region_num].e_physbase = e_phys;
+ pt_dev_t *r_dev = (pt_dev_t *) pci_dev;
+ pt_region_t *region = &r_dev->v_addrs[region_num];
+ int first_map = (region->e_size == 0);
+ int ret = 0;
DEBUG("e_phys=%08x r_virt=%p type=%d len=%08x region_num=%d \n",
e_phys, r_dev->v_addrs[region_num].r_virtbase, type, e_size,
region_num);
- cpu_register_physical_memory(e_phys,
- r_dev->dev.io_regions[region_num].size,
- r_dev->v_addrs[region_num].memory_index);
-}
+ region->e_physbase = e_phys;
+ region->e_size = e_size;
+
+ if (!first_map)
+ kvm_destroy_phys_mem(kvm_context, e_phys, e_size);
+ if (e_size > 0)
+ ret = kvm_register_userspace_phys_mem(kvm_context,
+ e_phys,
+ region->r_virtbase,
+ e_size,
+ 0);
+ if (ret != 0)
+ fprintf(logfile, "Error: create new mapping failed\n");
+}
static void pt_ioport_map(PCIDevice * pci_dev, int region_num,
uint32_t addr, uint32_t size, int type)
@@ -265,6 +223,8 @@ static int pt_register_regions(pci_region_t * io_regions,
(uint32_t) (cur_region->base_addr));
return (-1);
}
+ pci_dev->v_addrs[i].r_size = cur_region->size;
+ pci_dev->v_addrs[i].e_size = 0;
/* add offset */
pci_dev->v_addrs[i].r_virtbase +=
@@ -274,11 +234,6 @@ static int pt_register_regions(pci_region_t * io_regions,
cur_region->size, t,
pt_iomem_map);
- pci_dev->v_addrs[i].memory_index =
- cpu_register_io_memory(0, pt_mmio_read_cb,
- pt_mmio_write_cb,
- (void *)
&(pci_dev->v_addrs[i]));
-
continue;
}
/* handle port io regions */
diff --git a/qemu/hw/pci-passthrough.h b/qemu/hw/pci-passthrough.h
index 1dcf752..67ac4dc 100644
--- a/qemu/hw/pci-passthrough.h
+++ b/qemu/hw/pci-passthrough.h
@@ -54,6 +54,8 @@ typedef struct pt_region_s {
uint32_t memory_index;
void *r_virtbase; /* mmapped access address */
int num; /* our index within v_addrs[] */
+ uint32_t e_size; /* emulated size of region in bytes */
+ uint32_t r_size; /* real size of region in bytes */
uint32_t debug;
} pt_region_t;
--
1.5.5.1
--
To unsubscribe from this list: send the line "unsubscribe kvm" in
the body of a message to [EMAIL PROTECTED]
More majordomo info at http://vger.kernel.org/majordomo-info.html