This patch attempts to clean up kvmctl so that it can be more easily made to work for multiple architectures and to support more emulation.
It introduces a io dispatch mechanism. This mechanism supports subpage granularity but is optimized to efficiently cover regions of pages too. It's a 64-bit address space that's structured as a 5-level table. For x86, we'll have two tables, a pio_table and an mmio_table. For PPC we can just have a single table. The IO functions can support accesses of up to 8 bytes and can handle input/output in the same function. I tried to keep this nice and simple so as to not add too much complexity to kvmctl. Signed-off-by: Anthony Liguori <[EMAIL PROTECTED]> diff --git a/user/main.c b/user/main.c index 70de8e6..2bc2182 100644 --- a/user/main.c +++ b/user/main.c @@ -51,6 +51,29 @@ kvm_context_t kvm; #define IPI_SIGNAL (SIGRTMIN + 4) +#define PAGE_SHIFT 12 +#define PAGE_SIZE (1ULL << PAGE_SHIFT) +#define PAGE_MASK (~(PAGE_SIZE - 1)) + +typedef int (io_table_handler_t)(void *, int, int, uint64_t, uint64_t *); + +struct io_table_entry +{ + io_table_handler_t *handler; + void *opaque; +}; + +struct io_table_level +{ + struct io_table_entry full_range; + struct io_table_level *table; +}; + +struct io_table +{ + struct io_table_level table; +}; + static int ncpus = 1; static sem_t init_sem; static __thread int vcpu; @@ -58,6 +81,8 @@ static int apic_ipi_vector = 0xff; static sigset_t kernel_sigmask; static sigset_t ipi_sigmask; +static struct io_table pio_table; + struct vcpu_info { pid_t tid; sem_t sipi_sem; @@ -67,9 +92,98 @@ struct vcpu_info *vcpus; static uint32_t apic_sipi_addr; -static int apic_range(unsigned addr) +static void *mallocz(size_t size) { - return (addr >= APIC_BASE) && (addr < APIC_BASE + APIC_SIZE); + void *ptr; + ptr = malloc(size); + if (!ptr) + return NULL; + memset(ptr, 0, size); + return ptr; +} + +struct io_table_entry *io_table_lookup(struct io_table *io_table, + uint64_t addr, uint64_t size, + int alloc) +{ + struct io_table_level *tbl = &io_table->table; + int level = 1; + + while (level != 5) { + int shift = (4 - level) * 9 + PAGE_SHIFT; + int bits, mask; + + if (level == 1) + bits = 64 - ((3 * 9) + PAGE_SHIFT); + else + bits = 9; + + mask = (1 << bits) - 1; + + if (alloc) { + if ((1ULL << shift) == size && !(addr & (size - 1))) + return &tbl->full_range; + } else if (tbl->full_range.handler) + return &tbl->full_range; + + if (tbl->table == NULL) { + tbl->table = mallocz((1 << bits) * + sizeof(struct io_table_level)); + if (!tbl->table) + return NULL; + } + + tbl = &tbl->table[(addr >> shift) & mask]; + level++; + } + + if (tbl->table == NULL) { + tbl->table = mallocz(PAGE_SIZE * sizeof(struct io_table_level)); + if (!tbl->table) + return NULL; + } + + return &tbl->table[addr & ~PAGE_MASK].full_range; +} + +int io_table_register_address(struct io_table *io_table, + uint64_t addr, uint64_t size, + io_table_handler_t *handler, + void *opaque) +{ + struct io_table_entry *entry; + + entry = io_table_lookup(io_table, addr, size, 1); + if (entry == NULL) + return -ENOMEM; + + entry->handler = handler; + entry->opaque = opaque; + + return 0; +} + +int io_table_register_range(struct io_table *io_table, + uint64_t start, uint64_t size, + io_table_handler_t *handler, + void *opaque) +{ + uint64_t step = 1; + uint64_t i; + int ret = 0; + + if (!(size & ~PAGE_MASK) && !(start & ~PAGE_MASK)) + step = PAGE_SHIFT; + + for (i = 0; i < size; i += step) { + ret = io_table_register_address(io_table, + start + i, step, + handler, opaque); + if (ret < 0) + break; + } + + return ret; } static void apic_send_sipi(int vcpu) @@ -87,11 +201,9 @@ static void apic_send_ipi(int vcpu) tkill(v->tid, IPI_SIGNAL); } -static int apic_io(unsigned addr, int is_write, uint32_t *value) +static int apic_io(void *opaque, int size, int is_write, + uint64_t addr, uint64_t *value) { - if (!apic_range(addr)) - return 0; - if (!is_write) *value = -1u; @@ -125,61 +237,146 @@ static int apic_io(unsigned addr, int is_write, uint32_t *value) apic_send_ipi(*value); break; } - return 1; + + return 0; +} + +static int apic_init(void) +{ + return io_table_register_range(&pio_table, + APIC_BASE, APIC_SIZE, + apic_io, NULL); +} + +static int misc_io(void *opaque, int size, int is_write, + uint64_t addr, uint64_t *value) +{ + static int newline = 1; + + if (!is_write) { + *value = -1; + return 0; + } + + switch (addr) { + case 0xff: // irq injector + printf("injecting interrupt 0x%x\n", (uint8_t)*value); + kvm_inject_irq(kvm, 0, *value); + break; + case 0xf1: // serial + if (newline) + fputs("GUEST: ", stdout); + putchar(*value); + newline = *value == '\n'; + break; + } + + return 0; +} + +static int misc_init(void) +{ + int err; + + err = io_table_register_range(&pio_table, + 0xff, 1, misc_io, NULL); + if (err < 0) + return err; + + return io_table_register_address(&pio_table, + 0xf1, 1, misc_io, NULL); } static int test_inb(void *opaque, uint16_t addr, uint8_t *value) { - printf("inb 0x%x\n", addr); + struct io_table_entry *entry; + + entry = io_table_lookup(&pio_table, addr, 1, 0); + if (entry) { + uint64_t val; + entry->handler(entry->opaque, 1, 0, addr, &val); + *value = val; + } else { + *value = -1; + printf("inb 0x%x\n", addr); + } + return 0; } static int test_inw(void *opaque, uint16_t addr, uint16_t *value) { - printf("inw 0x%x\n", addr); + struct io_table_entry *entry; + + entry = io_table_lookup(&pio_table, addr, 2, 0); + if (entry) { + uint64_t val; + entry->handler(entry->opaque, 2, 0, addr, &val); + *value = val; + } else { + *value = -1; + printf("inw 0x%x\n", addr); + } + return 0; } static int test_inl(void *opaque, uint16_t addr, uint32_t *value) { - if (apic_io(addr, 0, value)) - return 0; - printf("inl 0x%x\n", addr); + struct io_table_entry *entry; + + entry = io_table_lookup(&pio_table, addr, 4, 0); + if (entry) { + uint64_t val; + entry->handler(entry->opaque, 4, 0, addr, &val); + *value = val; + } else { + *value = -1; + printf("inl 0x%x\n", addr); + } + return 0; } static int test_outb(void *opaque, uint16_t addr, uint8_t value) { - static int newline = 1; + struct io_table_entry *entry; - switch (addr) { - case 0xff: // irq injector - printf("injecting interrupt 0x%x\n", value); - kvm_inject_irq(kvm, 0, value); - break; - case 0xf1: // serial - if (newline) - fputs("GUEST: ", stdout); - putchar(value); - newline = value == '\n'; - break; - default: + entry = io_table_lookup(&pio_table, addr, 1, 0); + if (entry) { + uint64_t val = value; + entry->handler(entry->opaque, 1, 1, addr, &val); + } else printf("outb $0x%x, 0x%x\n", value, addr); - } + return 0; } static int test_outw(void *opaque, uint16_t addr, uint16_t value) { - printf("outw $0x%x, 0x%x\n", value, addr); + struct io_table_entry *entry; + + entry = io_table_lookup(&pio_table, addr, 2, 0); + if (entry) { + uint64_t val = value; + entry->handler(entry->opaque, 2, 1, addr, &val); + } else + printf("outw $0x%x, 0x%x\n", value, addr); + return 0; } static int test_outl(void *opaque, uint16_t addr, uint32_t value) { - if (apic_io(addr, 1, &value)) - return 0; - printf("outl $0x%x, 0x%x\n", value, addr); + struct io_table_entry *entry; + + entry = io_table_lookup(&pio_table, addr, 4, 0); + if (entry) { + uint64_t val = value; + entry->handler(entry->opaque, 4, 1, addr, &val); + } else + printf("outl $0x%x, 0x%x\n", value, addr); + return 0; } @@ -430,6 +627,9 @@ int main(int argc, char **argv) if (nb_args > 1) load_file(vm_mem + 0x100000, argv[optind + 1]); + apic_init(); + misc_init(); + sem_init(&init_sem, 0, 0); init_vcpu(0); for (i = 1; i < ncpus; ++i) ------------------------------------------------------------------------- This SF.net email is sponsored by: Splunk Inc. Still grepping through log files to find problems? Stop. Now Search log events and configuration files using AJAX and a browser. Download your FREE copy of Splunk now >> http://get.splunk.com/ _______________________________________________ kvm-devel mailing list kvm-devel@lists.sourceforge.net https://lists.sourceforge.net/lists/listinfo/kvm-devel