This patch attempts to clean up kvmctl so that it can be more easily made to
work for multiple architectures and to support more emulation.
It introduces a io dispatch mechanism. This mechanism supports subpage
granularity but is optimized to efficiently cover regions of pages too. It's
a 64-bit address space that's structured as a 5-level table.
For x86, we'll have two tables, a pio_table and an mmio_table. For PPC we can
just have a single table. The IO functions can support accesses of up to 8
bytes and can handle input/output in the same function.
I tried to keep this nice and simple so as to not add too much complexity to
kvmctl.
Signed-off-by: Anthony Liguori <[EMAIL PROTECTED]>
diff --git a/user/main.c b/user/main.c
index 70de8e6..2bc2182 100644
--- a/user/main.c
+++ b/user/main.c
@@ -51,6 +51,29 @@ kvm_context_t kvm;
#define IPI_SIGNAL (SIGRTMIN + 4)
+#define PAGE_SHIFT 12
+#define PAGE_SIZE (1ULL << PAGE_SHIFT)
+#define PAGE_MASK (~(PAGE_SIZE - 1))
+
+typedef int (io_table_handler_t)(void *, int, int, uint64_t, uint64_t *);
+
+struct io_table_entry
+{
+ io_table_handler_t *handler;
+ void *opaque;
+};
+
+struct io_table_level
+{
+ struct io_table_entry full_range;
+ struct io_table_level *table;
+};
+
+struct io_table
+{
+ struct io_table_level table;
+};
+
static int ncpus = 1;
static sem_t init_sem;
static __thread int vcpu;
@@ -58,6 +81,8 @@ static int apic_ipi_vector = 0xff;
static sigset_t kernel_sigmask;
static sigset_t ipi_sigmask;
+static struct io_table pio_table;
+
struct vcpu_info {
pid_t tid;
sem_t sipi_sem;
@@ -67,9 +92,98 @@ struct vcpu_info *vcpus;
static uint32_t apic_sipi_addr;
-static int apic_range(unsigned addr)
+static void *mallocz(size_t size)
{
- return (addr >= APIC_BASE) && (addr < APIC_BASE + APIC_SIZE);
+ void *ptr;
+ ptr = malloc(size);
+ if (!ptr)
+ return NULL;
+ memset(ptr, 0, size);
+ return ptr;
+}
+
+struct io_table_entry *io_table_lookup(struct io_table *io_table,
+ uint64_t addr, uint64_t size,
+ int alloc)
+{
+ struct io_table_level *tbl = &io_table->table;
+ int level = 1;
+
+ while (level != 5) {
+ int shift = (4 - level) * 9 + PAGE_SHIFT;
+ int bits, mask;
+
+ if (level == 1)
+ bits = 64 - ((3 * 9) + PAGE_SHIFT);
+ else
+ bits = 9;
+
+ mask = (1 << bits) - 1;
+
+ if (alloc) {
+ if ((1ULL << shift) == size && !(addr & (size - 1)))
+ return &tbl->full_range;
+ } else if (tbl->full_range.handler)
+ return &tbl->full_range;
+
+ if (tbl->table == NULL) {
+ tbl->table = mallocz((1 << bits) *
+ sizeof(struct io_table_level));
+ if (!tbl->table)
+ return NULL;
+ }
+
+ tbl = &tbl->table[(addr >> shift) & mask];
+ level++;
+ }
+
+ if (tbl->table == NULL) {
+ tbl->table = mallocz(PAGE_SIZE * sizeof(struct io_table_level));
+ if (!tbl->table)
+ return NULL;
+ }
+
+ return &tbl->table[addr & ~PAGE_MASK].full_range;
+}
+
+int io_table_register_address(struct io_table *io_table,
+ uint64_t addr, uint64_t size,
+ io_table_handler_t *handler,
+ void *opaque)
+{
+ struct io_table_entry *entry;
+
+ entry = io_table_lookup(io_table, addr, size, 1);
+ if (entry == NULL)
+ return -ENOMEM;
+
+ entry->handler = handler;
+ entry->opaque = opaque;
+
+ return 0;
+}
+
+int io_table_register_range(struct io_table *io_table,
+ uint64_t start, uint64_t size,
+ io_table_handler_t *handler,
+ void *opaque)
+{
+ uint64_t step = 1;
+ uint64_t i;
+ int ret = 0;
+
+ if (!(size & ~PAGE_MASK) && !(start & ~PAGE_MASK))
+ step = PAGE_SHIFT;
+
+ for (i = 0; i < size; i += step) {
+ ret = io_table_register_address(io_table,
+ start + i, step,
+ handler, opaque);
+ if (ret < 0)
+ break;
+ }
+
+ return ret;
}
static void apic_send_sipi(int vcpu)
@@ -87,11 +201,9 @@ static void apic_send_ipi(int vcpu)
tkill(v->tid, IPI_SIGNAL);
}
-static int apic_io(unsigned addr, int is_write, uint32_t *value)
+static int apic_io(void *opaque, int size, int is_write,
+ uint64_t addr, uint64_t *value)
{
- if (!apic_range(addr))
- return 0;
-
if (!is_write)
*value = -1u;
@@ -125,61 +237,146 @@ static int apic_io(unsigned addr, int is_write, uint32_t
*value)
apic_send_ipi(*value);
break;
}
- return 1;
+
+ return 0;
+}
+
+static int apic_init(void)
+{
+ return io_table_register_range(&pio_table,
+ APIC_BASE, APIC_SIZE,
+ apic_io, NULL);
+}
+
+static int misc_io(void *opaque, int size, int is_write,
+ uint64_t addr, uint64_t *value)
+{
+ static int newline = 1;
+
+ if (!is_write) {
+ *value = -1;
+ return 0;
+ }
+
+ switch (addr) {
+ case 0xff: // irq injector
+ printf("injecting interrupt 0x%x\n", (uint8_t)*value);
+ kvm_inject_irq(kvm, 0, *value);
+ break;
+ case 0xf1: // serial
+ if (newline)
+ fputs("GUEST: ", stdout);
+ putchar(*value);
+ newline = *value == '\n';
+ break;
+ }
+
+ return 0;
+}
+
+static int misc_init(void)
+{
+ int err;
+
+ err = io_table_register_range(&pio_table,
+ 0xff, 1, misc_io, NULL);
+ if (err < 0)
+ return err;
+
+ return io_table_register_address(&pio_table,
+ 0xf1, 1, misc_io, NULL);
}
static int test_inb(void *opaque, uint16_t addr, uint8_t *value)
{
- printf("inb 0x%x\n", addr);
+ struct io_table_entry *entry;
+
+ entry = io_table_lookup(&pio_table, addr, 1, 0);
+ if (entry) {
+ uint64_t val;
+ entry->handler(entry->opaque, 1, 0, addr, &val);
+ *value = val;
+ } else {
+ *value = -1;
+ printf("inb 0x%x\n", addr);
+ }
+
return 0;
}
static int test_inw(void *opaque, uint16_t addr, uint16_t *value)
{
- printf("inw 0x%x\n", addr);
+ struct io_table_entry *entry;
+
+ entry = io_table_lookup(&pio_table, addr, 2, 0);
+ if (entry) {
+ uint64_t val;
+ entry->handler(entry->opaque, 2, 0, addr, &val);
+ *value = val;
+ } else {
+ *value = -1;
+ printf("inw 0x%x\n", addr);
+ }
+
return 0;
}
static int test_inl(void *opaque, uint16_t addr, uint32_t *value)
{
- if (apic_io(addr, 0, value))
- return 0;
- printf("inl 0x%x\n", addr);
+ struct io_table_entry *entry;
+
+ entry = io_table_lookup(&pio_table, addr, 4, 0);
+ if (entry) {
+ uint64_t val;
+ entry->handler(entry->opaque, 4, 0, addr, &val);
+ *value = val;
+ } else {
+ *value = -1;
+ printf("inl 0x%x\n", addr);
+ }
+
return 0;
}
static int test_outb(void *opaque, uint16_t addr, uint8_t value)
{
- static int newline = 1;
+ struct io_table_entry *entry;
- switch (addr) {
- case 0xff: // irq injector
- printf("injecting interrupt 0x%x\n", value);
- kvm_inject_irq(kvm, 0, value);
- break;
- case 0xf1: // serial
- if (newline)
- fputs("GUEST: ", stdout);
- putchar(value);
- newline = value == '\n';
- break;
- default:
+ entry = io_table_lookup(&pio_table, addr, 1, 0);
+ if (entry) {
+ uint64_t val = value;
+ entry->handler(entry->opaque, 1, 1, addr, &val);
+ } else
printf("outb $0x%x, 0x%x\n", value, addr);
- }
+
return 0;
}
static int test_outw(void *opaque, uint16_t addr, uint16_t value)
{
- printf("outw $0x%x, 0x%x\n", value, addr);
+ struct io_table_entry *entry;
+
+ entry = io_table_lookup(&pio_table, addr, 2, 0);
+ if (entry) {
+ uint64_t val = value;
+ entry->handler(entry->opaque, 2, 1, addr, &val);
+ } else
+ printf("outw $0x%x, 0x%x\n", value, addr);
+
return 0;
}
static int test_outl(void *opaque, uint16_t addr, uint32_t value)
{
- if (apic_io(addr, 1, &value))
- return 0;
- printf("outl $0x%x, 0x%x\n", value, addr);
+ struct io_table_entry *entry;
+
+ entry = io_table_lookup(&pio_table, addr, 4, 0);
+ if (entry) {
+ uint64_t val = value;
+ entry->handler(entry->opaque, 4, 1, addr, &val);
+ } else
+ printf("outl $0x%x, 0x%x\n", value, addr);
+
return 0;
}
@@ -430,6 +627,9 @@ int main(int argc, char **argv)
if (nb_args > 1)
load_file(vm_mem + 0x100000, argv[optind + 1]);
+ apic_init();
+ misc_init();
+
sem_init(&init_sem, 0, 0);
init_vcpu(0);
for (i = 1; i < ncpus; ++i)
-------------------------------------------------------------------------
This SF.net email is sponsored by: Splunk Inc.
Still grepping through log files to find problems? Stop.
Now Search log events and configuration files using AJAX and a browser.
Download your FREE copy of Splunk now >> http://get.splunk.com/
_______________________________________________
kvm-devel mailing list
[email protected]
https://lists.sourceforge.net/lists/listinfo/kvm-devel