[COMMIT master] qemu-kvm-x86.c: remove extraneous line continuation
From: Avi Kivity a...@redhat.com Signed-off-by: Avi Kivity a...@redhat.com Signed-off-by: Marcelo Tosatti mtosa...@redhat.com diff --git a/qemu-kvm-x86.c b/qemu-kvm-x86.c index f2c81f0..c5d44e0 100644 --- a/qemu-kvm-x86.c +++ b/qemu-kvm-x86.c @@ -659,7 +659,7 @@ static int get_msr_entry(struct kvm_msr_entry *entry, CPUState *env) #endif default: #ifdef KVM_CAP_MCE -if (entry-index = MSR_MC0_CTL \ +if (entry-index = MSR_MC0_CTL entry-index MSR_MC0_CTL + (env-mcg_cap 0xff) * 4) { env-mce_banks[entry-index - MSR_MC0_CTL] = entry-data; break; -- To unsubscribe from this list: send the line unsubscribe kvm-commits in the body of a message to majord...@vger.kernel.org More majordomo info at http://vger.kernel.org/majordomo-info.html
[COMMIT master] qemu-kvm-x86.c: reindent
From: Avi Kivity a...@redhat.com Reindent qemu-kvm-x86.c according to CODING_STYLE. The original used a mix of qemu and linux indentation styles. Signed-off-by: Avi Kivity a...@redhat.com Signed-off-by: Marcelo Tosatti mtosa...@redhat.com diff --git a/qemu-kvm-x86.c b/qemu-kvm-x86.c index 4c32771..f2c81f0 100644 --- a/qemu-kvm-x86.c +++ b/qemu-kvm-x86.c @@ -24,7 +24,7 @@ #include kvm.h #include hw/apic.h -#define MSR_IA32_TSC 0x10 +#define MSR_IA32_TSC0x10 static struct kvm_msr_list *kvm_msr_list; extern unsigned int kvm_shadow_memory; @@ -35,205 +35,203 @@ static int lm_capable_kernel; int kvm_set_tss_addr(kvm_context_t kvm, unsigned long addr) { - int r; -/* - * Tell fw_cfg to notify the BIOS to reserve the range. - */ -if (e820_add_entry(addr, 0x4000, E820_RESERVED) 0) { -perror(e820_add_entry() table is full); -exit(1); -} +int r; +/* + * Tell fw_cfg to notify the BIOS to reserve the range. + */ +if (e820_add_entry(addr, 0x4000, E820_RESERVED) 0) { +perror(e820_add_entry() table is full); +exit(1); +} - r = kvm_vm_ioctl(kvm_state, KVM_SET_TSS_ADDR, addr); - if (r 0) { - fprintf(stderr, kvm_set_tss_addr: %m\n); - return r; - } - return 0; +r = kvm_vm_ioctl(kvm_state, KVM_SET_TSS_ADDR, addr); +if (r 0) { +fprintf(stderr, kvm_set_tss_addr: %m\n); +return r; +} +return 0; } static int kvm_init_tss(kvm_context_t kvm) { - int r; - - r = kvm_ioctl(kvm_state, KVM_CHECK_EXTENSION, KVM_CAP_SET_TSS_ADDR); - if (r 0) { - /* -* this address is 3 pages before the bios, and the bios should present -* as unavaible memory -*/ - r = kvm_set_tss_addr(kvm, 0xfeffd000); - if (r 0) { - fprintf(stderr, kvm_init_tss: unable to set tss addr\n); - return r; - } - } else { - fprintf(stderr, kvm does not support KVM_CAP_SET_TSS_ADDR\n); - } - return 0; +int r; + +r = kvm_ioctl(kvm_state, KVM_CHECK_EXTENSION, KVM_CAP_SET_TSS_ADDR); +if (r 0) { +/* + * this address is 3 pages before the bios, and the bios should present + * as unavaible memory + */ +r = kvm_set_tss_addr(kvm, 0xfeffd000); +if (r 0) { +fprintf(stderr, kvm_init_tss: unable to set tss addr\n); +return r; +} +} else { +fprintf(stderr, kvm does not support KVM_CAP_SET_TSS_ADDR\n); +} +return 0; } static int kvm_set_identity_map_addr(kvm_context_t kvm, uint64_t addr) { #ifdef KVM_CAP_SET_IDENTITY_MAP_ADDR - int r; - - r = kvm_ioctl(kvm_state, KVM_CHECK_EXTENSION, KVM_CAP_SET_IDENTITY_MAP_ADDR); - if (r 0) { - r = kvm_vm_ioctl(kvm_state, KVM_SET_IDENTITY_MAP_ADDR, addr); - if (r == -1) { - fprintf(stderr, kvm_set_identity_map_addr: %m\n); - return -errno; - } - return 0; - } +int r; + +r = kvm_ioctl(kvm_state, KVM_CHECK_EXTENSION, KVM_CAP_SET_IDENTITY_MAP_ADDR); +if (r 0) { +r = kvm_vm_ioctl(kvm_state, KVM_SET_IDENTITY_MAP_ADDR, addr); +if (r == -1) { +fprintf(stderr, kvm_set_identity_map_addr: %m\n); +return -errno; +} +return 0; +} #endif - return -ENOSYS; +return -ENOSYS; } static int kvm_init_identity_map_page(kvm_context_t kvm) { #ifdef KVM_CAP_SET_IDENTITY_MAP_ADDR - int r; - - r = kvm_ioctl(kvm_state, KVM_CHECK_EXTENSION, KVM_CAP_SET_IDENTITY_MAP_ADDR); - if (r 0) { - /* -* this address is 4 pages before the bios, and the bios should present -* as unavaible memory -*/ - r = kvm_set_identity_map_addr(kvm, 0xfeffc000); - if (r 0) { - fprintf(stderr, kvm_init_identity_map_page: - unable to set identity mapping addr\n); - return r; - } - - } +int r; + +r = kvm_ioctl(kvm_state, KVM_CHECK_EXTENSION, KVM_CAP_SET_IDENTITY_MAP_ADDR); +if (r 0) { +/* + * this address is 4 pages before the bios, and the bios should present + * as unavaible memory + */ +r = kvm_set_identity_map_addr(kvm, 0xfeffc000); +if (r 0) { +fprintf(stderr, kvm_init_identity_map_page: +unable to set identity mapping addr\n); +return r; +} +} #endif - return 0; +return 0; } static int kvm_create_pit(kvm_context_t kvm) { #ifdef KVM_CAP_PIT - int r; - - kvm_state-pit_in_kernel = 0; - if
[COMMIT master] qemu-kvm-x86.c: add braces where appropriate
From: Avi Kivity a...@redhat.com Adjust to comply with CODING_STYLE, at least where braces are concerned. Signed-off-by: Avi Kivity a...@redhat.com Signed-off-by: Marcelo Tosatti mtosa...@redhat.com diff --git a/qemu-kvm-x86.c b/qemu-kvm-x86.c index c5d44e0..46257d6 100644 --- a/qemu-kvm-x86.c +++ b/qemu-kvm-x86.c @@ -123,9 +123,9 @@ static int kvm_create_pit(kvm_context_t kvm) r = kvm_ioctl(kvm_state, KVM_CHECK_EXTENSION, KVM_CAP_PIT); if (r 0) { r = kvm_vm_ioctl(kvm_state, KVM_CREATE_PIT); -if (r = 0) +if (r = 0) { kvm_state-pit_in_kernel = 1; -else { +} else { fprintf(stderr, Create kernel PIC irqchip failed\n); return r; } @@ -141,20 +141,24 @@ int kvm_arch_create(kvm_context_t kvm, unsigned long phys_mem_bytes, int r = 0; r = kvm_init_tss(kvm); -if (r 0) +if (r 0) { return r; +} r = kvm_init_identity_map_page(kvm); -if (r 0) +if (r 0) { return r; +} r = kvm_create_pit(kvm); -if (r 0) +if (r 0) { return r; +} r = kvm_init_coalesced_mmio(kvm); -if (r 0) +if (r 0) { return r; +} return 0; } @@ -211,12 +215,14 @@ int kvm_get_lapic(CPUState *env, struct kvm_lapic_state *s) { int r = 0; -if (!kvm_irqchip_in_kernel()) +if (!kvm_irqchip_in_kernel()) { return r; +} r = kvm_vcpu_ioctl(env, KVM_GET_LAPIC, s); -if (r 0) +if (r 0) { fprintf(stderr, KVM_GET_LAPIC failed\n); +} return r; } @@ -224,13 +230,15 @@ int kvm_set_lapic(CPUState *env, struct kvm_lapic_state *s) { int r = 0; -if (!kvm_irqchip_in_kernel()) +if (!kvm_irqchip_in_kernel()) { return 0; +} r = kvm_vcpu_ioctl(env, KVM_SET_LAPIC, s); -if (r 0) +if (r 0) { fprintf(stderr, KVM_SET_LAPIC failed\n); +} return r; } @@ -240,30 +248,34 @@ int kvm_set_lapic(CPUState *env, struct kvm_lapic_state *s) int kvm_get_pit(kvm_context_t kvm, struct kvm_pit_state *s) { -if (!kvm_pit_in_kernel()) +if (!kvm_pit_in_kernel()) { return 0; +} return kvm_vm_ioctl(kvm_state, KVM_GET_PIT, s); } int kvm_set_pit(kvm_context_t kvm, struct kvm_pit_state *s) { -if (!kvm_pit_in_kernel()) +if (!kvm_pit_in_kernel()) { return 0; +} return kvm_vm_ioctl(kvm_state, KVM_SET_PIT, s); } #ifdef KVM_CAP_PIT_STATE2 int kvm_get_pit2(kvm_context_t kvm, struct kvm_pit_state2 *ps2) { -if (!kvm_pit_in_kernel()) +if (!kvm_pit_in_kernel()) { return 0; +} return kvm_vm_ioctl(kvm_state, KVM_GET_PIT2, ps2); } int kvm_set_pit2(kvm_context_t kvm, struct kvm_pit_state2 *ps2) { -if (!kvm_pit_in_kernel()) +if (!kvm_pit_in_kernel()) { return 0; +} return kvm_vm_ioctl(kvm_state, KVM_SET_PIT2, ps2); } @@ -303,12 +315,14 @@ void kvm_show_code(CPUState *env) } rip = sregs.cs.base + regs.rip; back_offset = regs.rip; -if (back_offset 20) +if (back_offset 20) { back_offset = 20; +} *code_str = 0; for (n = -back_offset; n SHOW_CODE_LEN-back_offset; ++n) { -if (n == 0) +if (n == 0) { strcat(code_str, --); +} cpu_physical_memory_rw(rip + n, code, 1, 1); sprintf(code_str + strlen(code_str), %02x, code); } @@ -326,8 +340,9 @@ static struct kvm_msr_list *kvm_get_msr_list(void) sizer.nmsrs = 0; r = kvm_ioctl(kvm_state, KVM_GET_MSR_INDEX_LIST, sizer); -if (r 0 r != -E2BIG) +if (r 0 r != -E2BIG) { return NULL; +} /* Old kernel modules had a bug and could write beyond the provided memory. Allocate at least a safe amount of 1K. */ msrs = qemu_malloc(MAX(1024, sizeof(*msrs) + @@ -536,8 +551,9 @@ static int kvm_enable_tpr_access_reporting(CPUState *env) struct kvm_tpr_access_ctl tac = { .enabled = 1 }; r = kvm_ioctl(env-kvm_state, KVM_CHECK_EXTENSION, KVM_CAP_VAPIC); -if (r = 0) +if (r = 0) { return -ENOSYS; +} return kvm_vcpu_ioctl(env, KVM_TPR_ACCESS_REPORTING, tac); } #endif @@ -581,22 +597,27 @@ int kvm_arch_qemu_create_context(void) uname(utsname); lm_capable_kernel = strcmp(utsname.machine, x86_64) == 0; -if (kvm_shadow_memory) +if (kvm_shadow_memory) { kvm_set_shadow_pages(kvm_context, kvm_shadow_memory); +} kvm_msr_list = kvm_get_msr_list(); -if (!kvm_msr_list) +if (!kvm_msr_list) { return -1; +} for (i = 0; i kvm_msr_list-nmsrs; ++i) { -if (kvm_msr_list-indices[i] == MSR_STAR) +if (kvm_msr_list-indices[i] == MSR_STAR) { kvm_has_msr_star = 1; -if (kvm_msr_list-indices[i] == MSR_VM_HSAVE_PA) +} +if (kvm_msr_list-indices[i] == MSR_VM_HSAVE_PA) {
[COMMIT master] qemu-kvm.c: add braces where appropriate
From: Avi Kivity a...@redhat.com Adjust to comply with CODING_STYLE, at least where braces are concerned. Signed-off-by: Avi Kivity a...@redhat.com Signed-off-by: Marcelo Tosatti mtosa...@redhat.com diff --git a/qemu-kvm.c b/qemu-kvm.c index c9818de..36f3a2e 100644 --- a/qemu-kvm.c +++ b/qemu-kvm.c @@ -201,8 +201,9 @@ int kvm_init(int smp_cpus) kvm_context-max_gsi = gsi_bits; /* Mark any over-allocated bits as already in use */ -for (i = gsi_count; i gsi_bits; i++) +for (i = gsi_count; i gsi_bits; i++) { set_gsi(kvm_context, i); +} } kvm_cpu_register_phys_memory_client(); @@ -296,8 +297,9 @@ static int kvm_set_boot_vcpu_id(kvm_context_t kvm, uint32_t id) { #ifdef KVM_CAP_SET_BOOT_CPU_ID int r = kvm_ioctl(kvm_state, KVM_CHECK_EXTENSION, KVM_CAP_SET_BOOT_CPU_ID); -if (r 0) +if (r 0) { return kvm_vm_ioctl(kvm_state, KVM_SET_BOOT_CPU_ID, id); +} return -ENOSYS; #else return -ENOSYS; @@ -352,8 +354,9 @@ void kvm_create_irqchip(kvm_context_t kvm) #if defined(KVM_CAP_IRQ_INJECT_STATUS) defined(KVM_IRQ_LINE_STATUS) r = kvm_ioctl(kvm_state, KVM_CHECK_EXTENSION, KVM_CAP_IRQ_INJECT_STATUS); -if (r 0) +if (r 0) { kvm-irqchip_inject_ioctl = KVM_IRQ_LINE_STATUS; +} #endif kvm-irqchip_in_kernel = 1; } else @@ -369,17 +372,22 @@ int kvm_create(kvm_context_t kvm, unsigned long phys_mem_bytes, void **vm_mem) int r, i; r = kvm_create_vm(kvm); -if (r 0) +if (r 0) { return r; +} r = kvm_arch_create(kvm, phys_mem_bytes, vm_mem); -if (r 0) +if (r 0) { return r; -for (i = 0; i ARRAY_SIZE(kvm_state-slots); i++) +} +for (i = 0; i ARRAY_SIZE(kvm_state-slots); i++) { kvm_state-slots[i].slot = i; +} r = kvm_create_default_phys_mem(kvm, phys_mem_bytes, vm_mem); -if (r 0) +if (r 0) { return r; +} + kvm_create_irqchip(kvm); return 0; @@ -392,13 +400,15 @@ int kvm_set_irq_level(kvm_context_t kvm, int irq, int level, int *status) struct kvm_irq_level event; int r; -if (!kvm-irqchip_in_kernel) +if (!kvm-irqchip_in_kernel) { return 0; +} event.level = level; event.irq = irq; r = kvm_vm_ioctl(kvm_state, kvm-irqchip_inject_ioctl, event); -if (r 0) +if (r 0) { perror(kvm_set_irq_level); +} if (status) { #ifdef KVM_CAP_IRQ_INJECT_STATUS @@ -416,8 +426,9 @@ int kvm_get_irqchip(kvm_context_t kvm, struct kvm_irqchip *chip) { int r; -if (!kvm-irqchip_in_kernel) +if (!kvm-irqchip_in_kernel) { return 0; +} r = kvm_vm_ioctl(kvm_state, KVM_GET_IRQCHIP, chip); if (r 0) { perror(kvm_get_irqchip\n); @@ -429,8 +440,9 @@ int kvm_set_irqchip(kvm_context_t kvm, struct kvm_irqchip *chip) { int r; -if (!kvm-irqchip_in_kernel) +if (!kvm-irqchip_in_kernel) { return 0; +} r = kvm_vm_ioctl(kvm_state, KVM_SET_IRQCHIP, chip); if (r 0) { perror(kvm_set_irqchip\n); @@ -487,8 +499,9 @@ int kvm_get_mpstate(CPUState *env, struct kvm_mp_state *mp_state) int r; r = kvm_ioctl(kvm_state, KVM_CHECK_EXTENSION, KVM_CAP_MP_STATE); -if (r 0) +if (r 0) { return kvm_vcpu_ioctl(env, KVM_GET_MP_STATE, mp_state); +} return -ENOSYS; } @@ -497,8 +510,9 @@ int kvm_set_mpstate(CPUState *env, struct kvm_mp_state *mp_state) int r; r = kvm_ioctl(kvm_state, KVM_CHECK_EXTENSION, KVM_CAP_MP_STATE); -if (r 0) +if (r 0) { return kvm_vcpu_ioctl(env, KVM_SET_MP_STATE, mp_state); +} return -ENOSYS; } #endif @@ -534,8 +548,9 @@ static int handle_mmio(CPUState *env) void *data = kvm_run-mmio.data; /* hack: Red Hat 7.1 generates these weird accesses. */ -if ((addr 0xa - 4 addr = 0xa) kvm_run-mmio.len == 3) +if ((addr 0xa - 4 addr = 0xa) kvm_run-mmio.len == 3) { return 0; +} cpu_physical_memory_rw(addr, data, kvm_run-mmio.len, kvm_run-mmio.is_write); return 0; @@ -596,13 +611,15 @@ int kvm_run(CPUState *env) } push_nmi(kvm); #if !defined(__s390__) -if (!kvm-irqchip_in_kernel) +if (!kvm-irqchip_in_kernel) { run-request_interrupt_window = kvm_arch_try_push_interrupts(env); +} #endif r = pre_kvm_run(kvm, env); -if (r) +if (r) { return r; +} if (env-exit_request) { env-exit_request = 0; pthread_kill(env-kvm_cpu_state.thread, SIG_IPI); @@ -684,9 +701,10 @@ int kvm_run(CPUState *env) break; } } - more: -if (!r) +more: +if (!r) { goto again; +} return r; } @@ -822,13 +840,15 @@ int kvm_add_routing_entry(kvm_context_t kvm, if (kvm-irq_routes-nr ==
[COMMIT master] kvm: reset MSR_IA32_CR_PAT correctly
From: Avi Kivity a...@redhat.com The power-on value of MSR_IA32_CR_PAT is not 0 - that disables cacheing and makes everything dog slow. Fix to reset MSR_IA32_CR_PAT to the correct value. Signed-off-by: Avi Kivity a...@redhat.com Signed-off-by: Marcelo Tosatti mtosa...@redhat.com diff --git a/qemu-kvm-x86.c b/qemu-kvm-x86.c index 46257d6..016dcf1 100644 --- a/qemu-kvm-x86.c +++ b/qemu-kvm-x86.c @@ -1323,12 +1323,21 @@ static int kvm_reset_msrs(CPUState *env) } msr_data; int n; struct kvm_msr_entry *msrs = msr_data.entries; +uint32_t index; +uint64_t data; if (!kvm_msr_list) { return -1; } for (n = 0; n kvm_msr_list-nmsrs; n++) { +index = kvm_msr_list-indices[n]; +switch (index) { +case MSR_PAT: +data = 0x0007040600070406ULL; +default: +data = 0; +} kvm_msr_entry_set(msrs[n], kvm_msr_list-indices[n], 0); } -- To unsubscribe from this list: send the line unsubscribe kvm-commits in the body of a message to majord...@vger.kernel.org More majordomo info at http://vger.kernel.org/majordomo-info.html
[COMMIT master] qemu-kvm: drop posix-aio-compat.cs signalfd usage
From: Marcelo Tosatti mtosa...@redhat.com Block SIGUSR2, which makes the signal be handled through qemu-kvm.c's signalfd. Signed-off-by: Marcelo Tosatti mtosa...@redhat.com diff --git a/posix-aio-compat.c b/posix-aio-compat.c index c05c77b..a67ffe3 100644 --- a/posix-aio-compat.c +++ b/posix-aio-compat.c @@ -26,7 +26,6 @@ #include osdep.h #include qemu-common.h #include block_int.h -#include compatfd.h #include block/raw-posix-aio.h @@ -54,7 +53,7 @@ struct qemu_paiocb { }; typedef struct PosixAioState { -int fd; +int rfd, wfd; struct qemu_paiocb *first_aio; } PosixAioState; @@ -473,29 +472,18 @@ static int posix_aio_process_queue(void *opaque) static void posix_aio_read(void *opaque) { PosixAioState *s = opaque; -union { -struct qemu_signalfd_siginfo siginfo; -char buf[128]; -} sig; -size_t offset; +ssize_t len; -/* try to read from signalfd, don't freak out if we can't read anything */ -offset = 0; -while (offset 128) { -ssize_t len; +/* read all bytes from signal pipe */ +for (;;) { +char bytes[16]; -len = read(s-fd, sig.buf + offset, 128 - offset); +len = read(s-rfd, bytes, sizeof(bytes)); if (len == -1 errno == EINTR) -continue; -if (len == -1 errno == EAGAIN) { -/* there is no natural reason for this to happen, - * so we'll spin hard until we get everything just - * to be on the safe side. */ -if (offset 0) -continue; -} - -offset += len; +continue; /* try again */ +if (len == sizeof(bytes)) +continue; /* more to read */ +break; } posix_aio_process_queue(s); @@ -509,6 +497,20 @@ static int posix_aio_flush(void *opaque) static PosixAioState *posix_aio_state; +static void aio_signal_handler(int signum) +{ +if (posix_aio_state) { +char byte = 0; +ssize_t ret; + +ret = write(posix_aio_state-wfd, byte, sizeof(byte)); +if (ret 0 errno != EAGAIN) +die(write()); +} + +qemu_service_io(); +} + static void paio_remove(struct qemu_paiocb *acb) { struct qemu_paiocb **pacb; @@ -610,8 +612,9 @@ BlockDriverAIOCB *paio_ioctl(BlockDriverState *bs, int fd, int paio_init(void) { -sigset_t mask; +struct sigaction act; PosixAioState *s; +int fds[2]; int ret; if (posix_aio_state) @@ -619,21 +622,24 @@ int paio_init(void) s = qemu_malloc(sizeof(PosixAioState)); -/* Make sure to block AIO signal */ -sigemptyset(mask); -sigaddset(mask, SIGUSR2); -sigprocmask(SIG_BLOCK, mask, NULL); +sigfillset(act.sa_mask); +act.sa_flags = 0; /* do not restart syscalls to interrupt select() */ +act.sa_handler = aio_signal_handler; +sigaction(SIGUSR2, act, NULL); s-first_aio = NULL; -s-fd = qemu_signalfd(mask); -if (s-fd == -1) { -fprintf(stderr, failed to create signalfd\n); +if (qemu_pipe(fds) == -1) { +fprintf(stderr, failed to create pipe\n); return -1; } -fcntl(s-fd, F_SETFL, O_NONBLOCK); +s-rfd = fds[0]; +s-wfd = fds[1]; + +fcntl(s-rfd, F_SETFL, O_NONBLOCK); +fcntl(s-wfd, F_SETFL, O_NONBLOCK); -qemu_aio_set_fd_handler(s-fd, posix_aio_read, NULL, posix_aio_flush, +qemu_aio_set_fd_handler(s-rfd, posix_aio_read, NULL, posix_aio_flush, posix_aio_process_queue, s); ret = pthread_attr_init(attr); diff --git a/qemu-kvm.c b/qemu-kvm.c index 060c47d..2fb927c 100644 --- a/qemu-kvm.c +++ b/qemu-kvm.c @@ -1680,6 +1680,7 @@ int kvm_main_loop(void) sigemptyset(mask); sigaddset(mask, SIGIO); sigaddset(mask, SIGALRM); +sigaddset(mask, SIGUSR2); sigaddset(mask, SIGBUS); sigprocmask(SIG_BLOCK, mask, NULL); -- To unsubscribe from this list: send the line unsubscribe kvm-commits in the body of a message to majord...@vger.kernel.org More majordomo info at http://vger.kernel.org/majordomo-info.html
[COMMIT master] Fix kvm: reset MSR_IA32_CR_PAT correctly thinkos
From: Marcelo Tosatti mtosa...@redhat.com Missing break wrong parameter. Signed-off-by: Marcelo Tosatti mtosa...@redhat.com diff --git a/qemu-kvm-x86.c b/qemu-kvm-x86.c index 016dcf1..fd974b3 100644 --- a/qemu-kvm-x86.c +++ b/qemu-kvm-x86.c @@ -1335,10 +1335,11 @@ static int kvm_reset_msrs(CPUState *env) switch (index) { case MSR_PAT: data = 0x0007040600070406ULL; +break; default: data = 0; } -kvm_msr_entry_set(msrs[n], kvm_msr_list-indices[n], 0); +kvm_msr_entry_set(msrs[n], kvm_msr_list-indices[n], data); } msr_data.info.nmsrs = n; -- To unsubscribe from this list: send the line unsubscribe kvm-commits in the body of a message to majord...@vger.kernel.org More majordomo info at http://vger.kernel.org/majordomo-info.html
[COMMIT master] Add missing tcg_prologue_init() for --disable-cpu-emulation build
From: Avi Kivity a...@redhat.com Add missing tcg_prologue_init() and tcg_ctx, remove code_gen_max_block_size. Fixes ./configure --disable-cpu-emulation. Signed-off-by: Avi Kivity a...@redhat.com diff --git a/target-i386/fake-exec.c b/target-i386/fake-exec.c index dfa202d..e6f8363 100644 --- a/target-i386/fake-exec.c +++ b/target-i386/fake-exec.c @@ -12,22 +12,20 @@ */ #include exec.h #include cpu.h +#include tcg.h int code_copy_enabled = 0; CCTable cc_table[CC_OP_NB]; +TCGContext tcg_ctx; + void cpu_dump_statistics (CPUState *env, FILE*f, int (*cpu_fprintf)(FILE *f, const char *fmt, ...), int flags) { } -unsigned long code_gen_max_block_size(void) -{ -return 32; -} - void cpu_gen_init(void) { } @@ -48,3 +46,7 @@ int cpu_x86_gen_code(CPUState *env, TranslationBlock *tb, int *gen_code_size_ptr void optimize_flags_init(void) { } + +void tcg_prologue_init(TCGContext *ctx) +{ +} -- To unsubscribe from this list: send the line unsubscribe kvm-commits in the body of a message to majord...@vger.kernel.org More majordomo info at http://vger.kernel.org/majordomo-info.html
[COMMIT master] qemu-kvm: use usptream eventfd code
From: Marcelo Tosatti mtosa...@redhat.com Upstream code is equivalent. Signed-off-by: Marcelo Tosatti mtosa...@redhat.com diff --git a/cpus.c b/cpus.c index 8319d4e..c545a62 100644 --- a/cpus.c +++ b/cpus.c @@ -290,11 +290,6 @@ void qemu_notify_event(void) { CPUState *env = cpu_single_env; -if (kvm_enabled()) { -qemu_kvm_notify_work(); -return; -} - qemu_event_increment (); if (env) { cpu_exit(env); diff --git a/qemu-kvm.c b/qemu-kvm.c index 36f3a2e..060c47d 100644 --- a/qemu-kvm.c +++ b/qemu-kvm.c @@ -71,7 +71,6 @@ static int qemu_system_ready; #define SIG_IPI (SIGRTMIN+4) pthread_t io_thread; -static int io_thread_fd = -1; static int io_thread_sigfd = -1; static CPUState *kvm_debug_cpu_requested; @@ -1634,28 +1633,6 @@ int kvm_init_ap(void) return 0; } -void qemu_kvm_notify_work(void) -{ -/* Write 8 bytes to be compatible with eventfd. */ -static uint64_t val = 1; -ssize_t ret; - -if (io_thread_fd == -1) { -return; -} - -do { -ret = write(io_thread_fd, val, sizeof(val)); -} while (ret 0 errno == EINTR); - -/* EAGAIN is fine in case we have a pipe. */ -if (ret 0 errno != EAGAIN) { - fprintf(stderr, qemu_kvm_notify_work: write() filed: %s\n, - strerror(errno)); - exit (1); -} -} - /* If we have signalfd, we mask out the signals we want to handle and then * use signalfd to listen for them. We rely on whatever the current signal * handler is to dispatch the signals when we receive them. @@ -1692,41 +1669,14 @@ static void sigfd_handler(void *opaque) } } -/* Used to break IO thread out of select */ -static void io_thread_wakeup(void *opaque) -{ -int fd = (unsigned long) opaque; -ssize_t len; -char buffer[512]; - -/* Drain the notify pipe. For eventfd, only 8 bytes will be read. */ -do { -len = read(fd, buffer, sizeof(buffer)); -} while ((len == -1 errno == EINTR) || len == sizeof(buffer)); -} - int kvm_main_loop(void) { -int fds[2]; sigset_t mask; int sigfd; io_thread = pthread_self(); qemu_system_ready = 1; -if (qemu_eventfd(fds) == -1) { -fprintf(stderr, failed to create eventfd\n); -return -errno; -} - -fcntl(fds[0], F_SETFL, O_NONBLOCK); -fcntl(fds[1], F_SETFL, O_NONBLOCK); - -qemu_set_fd_handler2(fds[0], NULL, io_thread_wakeup, NULL, - (void *)(unsigned long) fds[0]); - -io_thread_fd = fds[1]; - sigemptyset(mask); sigaddset(mask, SIGIO); sigaddset(mask, SIGALRM); diff --git a/qemu-kvm.h b/qemu-kvm.h index 42c990d..9809574 100644 --- a/qemu-kvm.h +++ b/qemu-kvm.h @@ -863,8 +863,6 @@ void qemu_kvm_aio_wait_start(void); void qemu_kvm_aio_wait(void); void qemu_kvm_aio_wait_end(void); -void qemu_kvm_notify_work(void); - void kvm_tpr_access_report(CPUState *env, uint64_t rip, int is_write); int kvm_arch_init_irq_routing(void); -- To unsubscribe from this list: send the line unsubscribe kvm-commits in the body of a message to majord...@vger.kernel.org More majordomo info at http://vger.kernel.org/majordomo-info.html
[COMMIT master] Remove kvm/doxygen.conf
From: Avi Kivity a...@redhat.com Unused. Signed-off-by: Avi Kivity a...@redhat.com diff --git a/kvm/doxygen.conf b/kvm/doxygen.conf deleted file mode 100644 index 21a04c0..000 --- a/kvm/doxygen.conf +++ /dev/null @@ -1,1252 +0,0 @@ -# Doxyfile 1.5.1 - -# This file describes the settings to be used by the documentation system -# doxygen (www.doxygen.org) for a project -# -# All text after a hash (#) is considered a comment and will be ignored -# The format is: -# TAG = value [value, ...] -# For lists items can also be appended using: -# TAG += value [value, ...] -# Values that contain spaces should be placed between quotes ( ) - -#--- -# Project related configuration options -#--- - -# The PROJECT_NAME tag is a single word (or a sequence of words surrounded -# by quotes) that should identify the project. - -PROJECT_NAME = KVM - -# The PROJECT_NUMBER tag can be used to enter a project or revision number. -# This could be handy for archiving the generated documentation or -# if some version control system is used. - -PROJECT_NUMBER = Release 7 - -# The OUTPUT_DIRECTORY tag is used to specify the (relative or absolute) -# base path where the generated documentation will be put. -# If a relative path is entered, it will be relative to the location -# where doxygen was started. If left blank the current directory will be used. - -OUTPUT_DIRECTORY = docs - -# If the CREATE_SUBDIRS tag is set to YES, then doxygen will create -# 4096 sub-directories (in 2 levels) under the output directory of each output -# format and will distribute the generated files over these directories. -# Enabling this option can be useful when feeding doxygen a huge amount of -# source files, where putting all generated files in the same directory would -# otherwise cause performance problems for the file system. - -CREATE_SUBDIRS = NO - -# The OUTPUT_LANGUAGE tag is used to specify the language in which all -# documentation generated by doxygen is written. Doxygen will use this -# information to generate all constant output in the proper language. -# The default language is English, other supported languages are: -# Afrikaans, Arabic, Brazilian, Catalan, Chinese, Chinese-Traditional, -# Croatian, Czech, Danish, Dutch, Finnish, French, German, Greek, Hungarian, -# Italian, Japanese, Japanese-en (Japanese with English messages), Korean, -# Korean-en, Lithuanian, Norwegian, Polish, Portuguese, Romanian, Russian, -# Serbian, Slovak, Slovene, Spanish, Swedish, and Ukrainian. - -OUTPUT_LANGUAGE= English - -# This tag can be used to specify the encoding used in the generated output. -# The encoding is not always determined by the language that is chosen, -# but also whether or not the output is meant for Windows or non-Windows users. -# In case there is a difference, setting the USE_WINDOWS_ENCODING tag to YES -# forces the Windows encoding (this is the default for the Windows binary), -# whereas setting the tag to NO uses a Unix-style encoding (the default for -# all platforms other than Windows). - -USE_WINDOWS_ENCODING = NO - -# If the BRIEF_MEMBER_DESC tag is set to YES (the default) Doxygen will -# include brief member descriptions after the members that are listed in -# the file and class documentation (similar to JavaDoc). -# Set to NO to disable this. - -BRIEF_MEMBER_DESC = YES - -# If the REPEAT_BRIEF tag is set to YES (the default) Doxygen will prepend -# the brief description of a member or function before the detailed description. -# Note: if both HIDE_UNDOC_MEMBERS and BRIEF_MEMBER_DESC are set to NO, the -# brief descriptions will be completely suppressed. - -REPEAT_BRIEF = YES - -# This tag implements a quasi-intelligent brief description abbreviator -# that is used to form the text in various listings. Each string -# in this list, if found as the leading text of the brief description, will be -# stripped from the text and the result after processing the whole list, is -# used as the annotated text. Otherwise, the brief description is used as-is. -# If left blank, the following values are used ($name is automatically -# replaced with the name of the entity): The $name class The $name widget -# The $name file is provides specifies contains -# represents a an the - -ABBREVIATE_BRIEF = - -# If the ALWAYS_DETAILED_SEC and REPEAT_BRIEF tags are both set to YES then -# Doxygen will generate a detailed section even if there is only a brief -# description. - -ALWAYS_DETAILED_SEC= NO - -# If the INLINE_INHERITED_MEMB tag is set to YES, doxygen will show all -# inherited members of a class in the documentation of that class as if those -# members were ordinary class members. Constructors, destructors and assignment -# operators of the base classes will not be shown.
[COMMIT master] Revert qemu-kvm: drop posix-aio-compat.cs signalfd usage
From: Marcelo Tosatti mtosa...@redhat.com This reverts commit cb375ad1a62ba9de0207d144d0ad8ca1bee09d33. Breaks FC8 32/64 install. Signed-off-by: Marcelo Tosatti mtosa...@redhat.com diff --git a/posix-aio-compat.c b/posix-aio-compat.c index a67ffe3..c05c77b 100644 --- a/posix-aio-compat.c +++ b/posix-aio-compat.c @@ -26,6 +26,7 @@ #include osdep.h #include qemu-common.h #include block_int.h +#include compatfd.h #include block/raw-posix-aio.h @@ -53,7 +54,7 @@ struct qemu_paiocb { }; typedef struct PosixAioState { -int rfd, wfd; +int fd; struct qemu_paiocb *first_aio; } PosixAioState; @@ -472,18 +473,29 @@ static int posix_aio_process_queue(void *opaque) static void posix_aio_read(void *opaque) { PosixAioState *s = opaque; -ssize_t len; +union { +struct qemu_signalfd_siginfo siginfo; +char buf[128]; +} sig; +size_t offset; -/* read all bytes from signal pipe */ -for (;;) { -char bytes[16]; +/* try to read from signalfd, don't freak out if we can't read anything */ +offset = 0; +while (offset 128) { +ssize_t len; -len = read(s-rfd, bytes, sizeof(bytes)); +len = read(s-fd, sig.buf + offset, 128 - offset); if (len == -1 errno == EINTR) -continue; /* try again */ -if (len == sizeof(bytes)) -continue; /* more to read */ -break; +continue; +if (len == -1 errno == EAGAIN) { +/* there is no natural reason for this to happen, + * so we'll spin hard until we get everything just + * to be on the safe side. */ +if (offset 0) +continue; +} + +offset += len; } posix_aio_process_queue(s); @@ -497,20 +509,6 @@ static int posix_aio_flush(void *opaque) static PosixAioState *posix_aio_state; -static void aio_signal_handler(int signum) -{ -if (posix_aio_state) { -char byte = 0; -ssize_t ret; - -ret = write(posix_aio_state-wfd, byte, sizeof(byte)); -if (ret 0 errno != EAGAIN) -die(write()); -} - -qemu_service_io(); -} - static void paio_remove(struct qemu_paiocb *acb) { struct qemu_paiocb **pacb; @@ -612,9 +610,8 @@ BlockDriverAIOCB *paio_ioctl(BlockDriverState *bs, int fd, int paio_init(void) { -struct sigaction act; +sigset_t mask; PosixAioState *s; -int fds[2]; int ret; if (posix_aio_state) @@ -622,24 +619,21 @@ int paio_init(void) s = qemu_malloc(sizeof(PosixAioState)); -sigfillset(act.sa_mask); -act.sa_flags = 0; /* do not restart syscalls to interrupt select() */ -act.sa_handler = aio_signal_handler; -sigaction(SIGUSR2, act, NULL); +/* Make sure to block AIO signal */ +sigemptyset(mask); +sigaddset(mask, SIGUSR2); +sigprocmask(SIG_BLOCK, mask, NULL); s-first_aio = NULL; -if (qemu_pipe(fds) == -1) { -fprintf(stderr, failed to create pipe\n); +s-fd = qemu_signalfd(mask); +if (s-fd == -1) { +fprintf(stderr, failed to create signalfd\n); return -1; } -s-rfd = fds[0]; -s-wfd = fds[1]; - -fcntl(s-rfd, F_SETFL, O_NONBLOCK); -fcntl(s-wfd, F_SETFL, O_NONBLOCK); +fcntl(s-fd, F_SETFL, O_NONBLOCK); -qemu_aio_set_fd_handler(s-rfd, posix_aio_read, NULL, posix_aio_flush, +qemu_aio_set_fd_handler(s-fd, posix_aio_read, NULL, posix_aio_flush, posix_aio_process_queue, s); ret = pthread_attr_init(attr); diff --git a/qemu-kvm.c b/qemu-kvm.c index 2fb927c..060c47d 100644 --- a/qemu-kvm.c +++ b/qemu-kvm.c @@ -1680,7 +1680,6 @@ int kvm_main_loop(void) sigemptyset(mask); sigaddset(mask, SIGIO); sigaddset(mask, SIGALRM); -sigaddset(mask, SIGUSR2); sigaddset(mask, SIGBUS); sigprocmask(SIG_BLOCK, mask, NULL); -- To unsubscribe from this list: send the line unsubscribe kvm-commits in the body of a message to majord...@vger.kernel.org More majordomo info at http://vger.kernel.org/majordomo-info.html
[COMMIT master] Don't launch guest if -no-kvm when tcg is not configured in
From: Avi Kivity a...@redhat.com Signed-off-by: Avi Kivity a...@redhat.com diff --git a/vl.c b/vl.c index 2a32cc5..22a3616 100644 --- a/vl.c +++ b/vl.c @@ -2473,6 +2473,10 @@ int main(int argc, char **argv, char **envp) break; case QEMU_OPTION_no_kvm: kvm_allowed = 0; +#ifdef CONFIG_NO_CPU_EMULATION +fprintf(stderr, cpu emulation not configured\n); +exit(1); +#endif break; #ifdef CONFIG_KVM case QEMU_OPTION_no_kvm_irqchip: { -- To unsubscribe from this list: send the line unsubscribe kvm-commits in the body of a message to majord...@vger.kernel.org More majordomo info at http://vger.kernel.org/majordomo-info.html
[COMMIT master] KVM: MMU: Fix 32 bit legacy paging with NPT
From: Joerg Roedel joerg.roe...@amd.com This patch fixes 32 bit legacy paging with NPT enabled. The mmu_check_root call on the top-level of the loop causes root_gfn to take values (in the tdp_enabled path) which are outside of guest memory. So the mmu_check_root call fails at some point in the loop interation causing the guest to tiple-fault. This patch changes the mmu_check_root calls to the places where they are really necessary. As a side-effect it introduces a check for the root of a pae page table too. Signed-off-by: Joerg Roedel joerg.roe...@amd.com Signed-off-by: Marcelo Tosatti mtosa...@redhat.com diff --git a/arch/x86/kvm/mmu.c b/arch/x86/kvm/mmu.c index d2dad65..b2136f9 100644 --- a/arch/x86/kvm/mmu.c +++ b/arch/x86/kvm/mmu.c @@ -2387,6 +2387,10 @@ static int mmu_alloc_roots(struct kvm_vcpu *vcpu) return 0; } direct = !is_paging(vcpu); + + if (mmu_check_root(vcpu, root_gfn)) + return 1; + for (i = 0; i 4; ++i) { hpa_t root = vcpu-arch.mmu.pae_root[i]; @@ -2398,10 +2402,10 @@ static int mmu_alloc_roots(struct kvm_vcpu *vcpu) continue; } root_gfn = pdptr PAGE_SHIFT; + if (mmu_check_root(vcpu, root_gfn)) + return 1; } else if (vcpu-arch.mmu.root_level == 0) root_gfn = 0; - if (mmu_check_root(vcpu, root_gfn)) - return 1; if (tdp_enabled) { direct = 1; root_gfn = i 30; -- To unsubscribe from this list: send the line unsubscribe kvm-commits in the body of a message to majord...@vger.kernel.org More majordomo info at http://vger.kernel.org/majordomo-info.html
[COMMIT master] KVM: x86: Emulate MSR_EBC_FREQUENCY_ID
From: Jes Sorensen jes.soren...@redhat.com Some operating systems store data about the host processor at the time of installation, and when booted on a more uptodate cpu tries to read MSR_EBC_FREQUENCY_ID. This has been found with XP. Signed-off-by: Jes Sorensen jes.soren...@redhat.com Reviewed-by: Juan Quintela quint...@redhat.com Signed-off-by: Marcelo Tosatti mtosa...@redhat.com diff --git a/arch/x86/kvm/x86.c b/arch/x86/kvm/x86.c index f47db25..9d43477 100644 --- a/arch/x86/kvm/x86.c +++ b/arch/x86/kvm/x86.c @@ -1651,6 +1651,20 @@ int kvm_get_msr_common(struct kvm_vcpu *vcpu, u32 msr, u64 *pdata) case 0xcd: /* fsb frequency */ data = 3; break; + /* +* MSR_EBC_FREQUENCY_ID +* Conservative value valid for even the basic CPU models. +* Models 0,1: 000 in bits 23:21 indicating a bus speed of +* 100MHz, model 2 000 in bits 18:16 indicating 100MHz, +* and 266MHz for model 3, or 4. Set Core Clock +* Frequency to System Bus Frequency Ratio to 1 (bits +* 31:24) even though these are only valid for CPU +* models 2, however guests may end up dividing or +* multiplying by zero otherwise. +*/ + case MSR_EBC_FREQUENCY_ID: + data = 1 24; + break; case MSR_IA32_APICBASE: data = kvm_get_apic_base(vcpu); break; -- To unsubscribe from this list: send the line unsubscribe kvm-commits in the body of a message to majord...@vger.kernel.org More majordomo info at http://vger.kernel.org/majordomo-info.html
[COMMIT master] KVM: MMU: Introduce inject_page_fault function pointer
From: Joerg Roedel joerg.roe...@amd.com This patch introduces an inject_page_fault function pointer into struct kvm_mmu which will be used to inject a page fault. This will be used later when Nested Nested Paging is implemented. Signed-off-by: Joerg Roedel joerg.roe...@amd.com Signed-off-by: Avi Kivity a...@redhat.com diff --git a/arch/x86/include/asm/kvm_host.h b/arch/x86/include/asm/kvm_host.h index ab708ee..3fefcd8 100644 --- a/arch/x86/include/asm/kvm_host.h +++ b/arch/x86/include/asm/kvm_host.h @@ -239,6 +239,9 @@ struct kvm_mmu { void (*set_cr3)(struct kvm_vcpu *vcpu, unsigned long root); unsigned long (*get_cr3)(struct kvm_vcpu *vcpu); int (*page_fault)(struct kvm_vcpu *vcpu, gva_t gva, u32 err); + void (*inject_page_fault)(struct kvm_vcpu *vcpu, + unsigned long addr, + u32 error_code); void (*free)(struct kvm_vcpu *vcpu); gpa_t (*gva_to_gpa)(struct kvm_vcpu *vcpu, gva_t gva, u32 access, u32 *error); diff --git a/arch/x86/kvm/mmu.c b/arch/x86/kvm/mmu.c index e4a7de4..a751dfc 100644 --- a/arch/x86/kvm/mmu.c +++ b/arch/x86/kvm/mmu.c @@ -2571,7 +2571,7 @@ static void inject_page_fault(struct kvm_vcpu *vcpu, u64 addr, u32 err_code) { - kvm_inject_page_fault(vcpu, addr, err_code); + vcpu-arch.mmu.inject_page_fault(vcpu, addr, err_code); } static void paging_free(struct kvm_vcpu *vcpu) @@ -2721,6 +2721,7 @@ static int init_kvm_tdp_mmu(struct kvm_vcpu *vcpu) context-direct_map = true; context-set_cr3 = kvm_x86_ops-set_tdp_cr3; context-get_cr3 = get_cr3; + context-inject_page_fault = kvm_inject_page_fault; if (!is_paging(vcpu)) { context-gva_to_gpa = nonpaging_gva_to_gpa; @@ -2762,6 +2763,7 @@ static int init_kvm_softmmu(struct kvm_vcpu *vcpu) vcpu-arch.mmu.base_role.cr0_wp = is_write_protection(vcpu); vcpu-arch.mmu.set_cr3 = kvm_x86_ops-set_cr3; vcpu-arch.mmu.get_cr3 = get_cr3; + vcpu-arch.mmu.inject_page_fault = kvm_inject_page_fault; return r; } -- To unsubscribe from this list: send the line unsubscribe kvm-commits in the body of a message to majord...@vger.kernel.org More majordomo info at http://vger.kernel.org/majordomo-info.html
[COMMIT master] KVM: MMU: Make set_cr3 a function pointer in kvm_mmu
From: Joerg Roedel joerg.roe...@amd.com This is necessary to implement Nested Nested Paging. As a side effect this allows some cleanups in the SVM nested paging code. Signed-off-by: Joerg Roedel joerg.roe...@amd.com Signed-off-by: Avi Kivity a...@redhat.com diff --git a/arch/x86/include/asm/kvm_host.h b/arch/x86/include/asm/kvm_host.h index 53cdf39..43c8db0 100644 --- a/arch/x86/include/asm/kvm_host.h +++ b/arch/x86/include/asm/kvm_host.h @@ -236,6 +236,7 @@ struct kvm_pio_request { */ struct kvm_mmu { void (*new_cr3)(struct kvm_vcpu *vcpu); + void (*set_cr3)(struct kvm_vcpu *vcpu, unsigned long root); int (*page_fault)(struct kvm_vcpu *vcpu, gva_t gva, u32 err); void (*free)(struct kvm_vcpu *vcpu); gpa_t (*gva_to_gpa)(struct kvm_vcpu *vcpu, gva_t gva, u32 access, diff --git a/arch/x86/kvm/mmu.c b/arch/x86/kvm/mmu.c index 5c28e97..c8acb96 100644 --- a/arch/x86/kvm/mmu.c +++ b/arch/x86/kvm/mmu.c @@ -2714,6 +2714,7 @@ static int init_kvm_tdp_mmu(struct kvm_vcpu *vcpu) context-shadow_root_level = kvm_x86_ops-get_tdp_level(); context-root_hpa = INVALID_PAGE; context-direct_map = true; + context-set_cr3 = kvm_x86_ops-set_cr3; if (!is_paging(vcpu)) { context-gva_to_gpa = nonpaging_gva_to_gpa; @@ -2752,7 +2753,8 @@ static int init_kvm_softmmu(struct kvm_vcpu *vcpu) r = paging32_init_context(vcpu); vcpu-arch.mmu.base_role.cr4_pae = !!is_pae(vcpu); - vcpu-arch.mmu.base_role.cr0_wp = is_write_protection(vcpu); + vcpu-arch.mmu.base_role.cr0_wp = is_write_protection(vcpu); + vcpu-arch.mmu.set_cr3 = kvm_x86_ops-set_cr3; return r; } @@ -2796,7 +2798,7 @@ int kvm_mmu_load(struct kvm_vcpu *vcpu) if (r) goto out; /* set_cr3() should ensure TLB has been flushed */ - kvm_x86_ops-set_cr3(vcpu, vcpu-arch.mmu.root_hpa); + vcpu-arch.mmu.set_cr3(vcpu, vcpu-arch.mmu.root_hpa); out: return r; } -- To unsubscribe from this list: send the line unsubscribe kvm-commits in the body of a message to majord...@vger.kernel.org More majordomo info at http://vger.kernel.org/majordomo-info.html
[COMMIT master] KVM: X86: Introduce a tdp_set_cr3 function
From: Joerg Roedel joerg.roe...@amd.com This patch introduces a special set_tdp_cr3 function pointer in kvm_x86_ops which is only used for tpd enabled mmu contexts. This allows to remove some hacks from svm code. Signed-off-by: Joerg Roedel joerg.roe...@amd.com Signed-off-by: Avi Kivity a...@redhat.com diff --git a/arch/x86/include/asm/kvm_host.h b/arch/x86/include/asm/kvm_host.h index 43c8db0..aeeea9c 100644 --- a/arch/x86/include/asm/kvm_host.h +++ b/arch/x86/include/asm/kvm_host.h @@ -526,6 +526,8 @@ struct kvm_x86_ops { bool (*rdtscp_supported)(void); void (*adjust_tsc_offset)(struct kvm_vcpu *vcpu, s64 adjustment); + void (*set_tdp_cr3)(struct kvm_vcpu *vcpu, unsigned long cr3); + void (*set_supported_cpuid)(u32 func, struct kvm_cpuid_entry2 *entry); bool (*has_wbinvd_exit)(void); diff --git a/arch/x86/kvm/mmu.c b/arch/x86/kvm/mmu.c index c8acb96..a55f8d5 100644 --- a/arch/x86/kvm/mmu.c +++ b/arch/x86/kvm/mmu.c @@ -2714,7 +2714,7 @@ static int init_kvm_tdp_mmu(struct kvm_vcpu *vcpu) context-shadow_root_level = kvm_x86_ops-get_tdp_level(); context-root_hpa = INVALID_PAGE; context-direct_map = true; - context-set_cr3 = kvm_x86_ops-set_cr3; + context-set_cr3 = kvm_x86_ops-set_tdp_cr3; if (!is_paging(vcpu)) { context-gva_to_gpa = nonpaging_gva_to_gpa; diff --git a/arch/x86/kvm/svm.c b/arch/x86/kvm/svm.c index 6808f64..094df31 100644 --- a/arch/x86/kvm/svm.c +++ b/arch/x86/kvm/svm.c @@ -3216,9 +3216,6 @@ static void svm_vcpu_run(struct kvm_vcpu *vcpu) gs_selector = kvm_read_gs(); ldt_selector = kvm_read_ldt(); svm-vmcb-save.cr2 = vcpu-arch.cr2; - /* required for live migration with NPT */ - if (npt_enabled) - svm-vmcb-save.cr3 = vcpu-arch.cr3; clgi(); @@ -3335,16 +3332,22 @@ static void svm_set_cr3(struct kvm_vcpu *vcpu, unsigned long root) { struct vcpu_svm *svm = to_svm(vcpu); - if (npt_enabled) { - svm-vmcb-control.nested_cr3 = root; - force_new_asid(vcpu); - return; - } - svm-vmcb-save.cr3 = root; force_new_asid(vcpu); } +static void set_tdp_cr3(struct kvm_vcpu *vcpu, unsigned long root) +{ + struct vcpu_svm *svm = to_svm(vcpu); + + svm-vmcb-control.nested_cr3 = root; + + /* Also sync guest cr3 here in case we live migrate */ + svm-vmcb-save.cr3 = vcpu-arch.cr3; + + force_new_asid(vcpu); +} + static int is_disabled(void) { u64 vm_cr; @@ -3571,6 +3574,8 @@ static struct kvm_x86_ops svm_x86_ops = { .write_tsc_offset = svm_write_tsc_offset, .adjust_tsc_offset = svm_adjust_tsc_offset, + + .set_tdp_cr3 = set_tdp_cr3, }; static int __init svm_init(void) diff --git a/arch/x86/kvm/vmx.c b/arch/x86/kvm/vmx.c index 676555c..0e62d8a 100644 --- a/arch/x86/kvm/vmx.c +++ b/arch/x86/kvm/vmx.c @@ -4347,6 +4347,8 @@ static struct kvm_x86_ops vmx_x86_ops = { .write_tsc_offset = vmx_write_tsc_offset, .adjust_tsc_offset = vmx_adjust_tsc_offset, + + .set_tdp_cr3 = vmx_set_cr3, }; static int __init vmx_init(void) -- To unsubscribe from this list: send the line unsubscribe kvm-commits in the body of a message to majord...@vger.kernel.org More majordomo info at http://vger.kernel.org/majordomo-info.html
[COMMIT master] KVM: SVM: Restore correct registers after sel_cr0 intercept emulation
From: Joerg Roedel joerg.roe...@amd.com This patch implements restoring of the correct rip, rsp, and rax after the svm emulation in KVM injected a selective_cr0 write intercept into the guest hypervisor. The problem was that the vmexit is emulated in the instruction emulation which later commits the registers right after the write-cr0 instruction. So the l1 guest will continue to run with the l2 rip, rsp and rax resulting in unpredictable behavior. This patch is not the final word, it is just an easy patch to fix the issue. The real fix will be done when the instruction emulator is made aware of nested virtualization. Until this is done this patch fixes the issue and provides an easy way to fix this in -stable too. Cc: sta...@kernel.org Signed-off-by: Joerg Roedel joerg.roe...@amd.com Signed-off-by: Marcelo Tosatti mtosa...@redhat.com diff --git a/arch/x86/kvm/svm.c b/arch/x86/kvm/svm.c index 55743ab..ecd4e58 100644 --- a/arch/x86/kvm/svm.c +++ b/arch/x86/kvm/svm.c @@ -88,6 +88,14 @@ struct nested_state { /* A VMEXIT is required but not yet emulated */ bool exit_required; + /* +* If we vmexit during an instruction emulation we need this to restore +* the l1 guest rip after the emulation +*/ + unsigned long vmexit_rip; + unsigned long vmexit_rsp; + unsigned long vmexit_rax; + /* cache for intercepts of the guest */ u16 intercept_cr_read; u16 intercept_cr_write; @@ -1213,8 +1221,12 @@ static void svm_set_cr0(struct kvm_vcpu *vcpu, unsigned long cr0) if (old == new) { /* cr0 write with ts and mp unchanged */ svm-vmcb-control.exit_code = SVM_EXIT_CR0_SEL_WRITE; - if (nested_svm_exit_handled(svm) == NESTED_EXIT_DONE) + if (nested_svm_exit_handled(svm) == NESTED_EXIT_DONE) { + svm-nested.vmexit_rip = kvm_rip_read(vcpu); + svm-nested.vmexit_rsp = kvm_register_read(vcpu, VCPU_REGS_RSP); + svm-nested.vmexit_rax = kvm_register_read(vcpu, VCPU_REGS_RAX); return; + } } } @@ -2430,6 +2442,23 @@ static int emulate_on_interception(struct vcpu_svm *svm) return emulate_instruction(svm-vcpu, 0, 0, 0) == EMULATE_DONE; } +static int cr0_write_interception(struct vcpu_svm *svm) +{ + struct kvm_vcpu *vcpu = svm-vcpu; + int r; + + r = emulate_instruction(svm-vcpu, 0, 0, 0); + + if (svm-nested.vmexit_rip) { + kvm_register_write(vcpu, VCPU_REGS_RIP, svm-nested.vmexit_rip); + kvm_register_write(vcpu, VCPU_REGS_RSP, svm-nested.vmexit_rsp); + kvm_register_write(vcpu, VCPU_REGS_RAX, svm-nested.vmexit_rax); + svm-nested.vmexit_rip = 0; + } + + return r == EMULATE_DONE; +} + static int cr8_write_interception(struct vcpu_svm *svm) { struct kvm_run *kvm_run = svm-vcpu.run; @@ -2692,7 +2721,7 @@ static int (*svm_exit_handlers[])(struct vcpu_svm *svm) = { [SVM_EXIT_READ_CR4] = emulate_on_interception, [SVM_EXIT_READ_CR8] = emulate_on_interception, [SVM_EXIT_CR0_SEL_WRITE]= emulate_on_interception, - [SVM_EXIT_WRITE_CR0]= emulate_on_interception, + [SVM_EXIT_WRITE_CR0]= cr0_write_interception, [SVM_EXIT_WRITE_CR3]= emulate_on_interception, [SVM_EXIT_WRITE_CR4]= emulate_on_interception, [SVM_EXIT_WRITE_CR8]= cr8_write_interception, -- To unsubscribe from this list: send the line unsubscribe kvm-commits in the body of a message to majord...@vger.kernel.org More majordomo info at http://vger.kernel.org/majordomo-info.html
[COMMIT master] KVM: MMU: Make walk_addr_generic capable for two-level walking
From: Joerg Roedel joerg.roe...@amd.com This patch uses kvm_read_guest_page_tdp to make the walk_addr_generic functions suitable for two-level page table walking. Signed-off-by: Joerg Roedel joerg.roe...@amd.com Signed-off-by: Avi Kivity a...@redhat.com diff --git a/arch/x86/kvm/paging_tmpl.h b/arch/x86/kvm/paging_tmpl.h index eefe363..f4e09d3 100644 --- a/arch/x86/kvm/paging_tmpl.h +++ b/arch/x86/kvm/paging_tmpl.h @@ -124,6 +124,8 @@ static int FNAME(walk_addr_generic)(struct guest_walker *walker, unsigned index, pt_access, uninitialized_var(pte_access); gpa_t pte_gpa; bool eperm, present, rsvd_fault; + int offset; + u32 access = 0; trace_kvm_mmu_pagetable_walk(addr, write_fault, user_fault, fetch_fault); @@ -153,12 +155,14 @@ walk: index = PT_INDEX(addr, walker-level); table_gfn = gpte_to_gfn(pte); - pte_gpa = gfn_to_gpa(table_gfn); - pte_gpa += index * sizeof(pt_element_t); + offset= index * sizeof(pt_element_t); + pte_gpa = gfn_to_gpa(table_gfn) + offset; walker-table_gfn[walker-level - 1] = table_gfn; walker-pte_gpa[walker-level - 1] = pte_gpa; - if (kvm_read_guest(vcpu-kvm, pte_gpa, pte, sizeof(pte))) { + if (kvm_read_guest_page_mmu(vcpu, mmu, table_gfn, pte, + offset, sizeof(pte), + PFERR_USER_MASK|PFERR_WRITE_MASK)) { present = false; break; } @@ -209,15 +213,27 @@ walk: is_large_pte(pte) mmu-root_level == PT64_ROOT_LEVEL)) { int lvl = walker-level; + gpa_t real_gpa; + gfn_t gfn; - walker-gfn = gpte_to_gfn_lvl(pte, lvl); - walker-gfn += (addr PT_LVL_OFFSET_MASK(lvl)) -PAGE_SHIFT; + gfn = gpte_to_gfn_lvl(pte, lvl); + gfn += (addr PT_LVL_OFFSET_MASK(lvl)) PAGE_SHIFT; if (PTTYPE == 32 walker-level == PT_DIRECTORY_LEVEL is_cpuid_PSE36()) - walker-gfn += pse36_gfn_delta(pte); + gfn += pse36_gfn_delta(pte); + + access |= write_fault ? PFERR_WRITE_MASK : 0; + access |= fetch_fault ? PFERR_FETCH_MASK : 0; + access |= user_fault ? PFERR_USER_MASK : 0; + + real_gpa = mmu-translate_gpa(vcpu, gfn_to_gpa(gfn), + access); + if (real_gpa == UNMAPPED_GVA) + return 0; + + walker-gfn = real_gpa PAGE_SHIFT; break; } -- To unsubscribe from this list: send the line unsubscribe kvm-commits in the body of a message to majord...@vger.kernel.org More majordomo info at http://vger.kernel.org/majordomo-info.html
[COMMIT master] KVM: X86: Introduce pointer to mmu context used for gva_to_gpa
From: Joerg Roedel joerg.roe...@amd.com This patch introduces the walk_mmu pointer which points to the mmu-context currently used for gva_to_gpa translations. Signed-off-by: Joerg Roedel joerg.roe...@amd.com Signed-off-by: Avi Kivity a...@redhat.com diff --git a/arch/x86/include/asm/kvm_host.h b/arch/x86/include/asm/kvm_host.h index 7f95260..91c7d35 100644 --- a/arch/x86/include/asm/kvm_host.h +++ b/arch/x86/include/asm/kvm_host.h @@ -286,9 +286,22 @@ struct kvm_vcpu_arch { u64 ia32_misc_enable_msr; bool tpr_access_reporting; + /* +* Paging state of the vcpu +* +* If the vcpu runs in guest mode with two level paging this still saves +* the paging mode of the l1 guest. This context is always used to +* handle faults. +*/ struct kvm_mmu mmu; /* +* Pointer to the mmu context currently used for +* gva_to_gpa translations. +*/ + struct kvm_mmu *walk_mmu; + + /* * This struct is filled with the necessary information to propagate a * page fault into the guest */ diff --git a/arch/x86/kvm/mmu.c b/arch/x86/kvm/mmu.c index 9936727..cb06ada 100644 --- a/arch/x86/kvm/mmu.c +++ b/arch/x86/kvm/mmu.c @@ -2708,7 +2708,7 @@ static int paging32E_init_context(struct kvm_vcpu *vcpu, static int init_kvm_tdp_mmu(struct kvm_vcpu *vcpu) { - struct kvm_mmu *context = vcpu-arch.mmu; + struct kvm_mmu *context = vcpu-arch.walk_mmu; context-new_cr3 = nonpaging_new_cr3; context-page_fault = tdp_page_fault; @@ -2767,11 +2767,11 @@ EXPORT_SYMBOL_GPL(kvm_init_shadow_mmu); static int init_kvm_softmmu(struct kvm_vcpu *vcpu) { - int r = kvm_init_shadow_mmu(vcpu, vcpu-arch.mmu); + int r = kvm_init_shadow_mmu(vcpu, vcpu-arch.walk_mmu); - vcpu-arch.mmu.set_cr3 = kvm_x86_ops-set_cr3; - vcpu-arch.mmu.get_cr3 = get_cr3; - vcpu-arch.mmu.inject_page_fault = kvm_inject_page_fault; + vcpu-arch.walk_mmu-set_cr3 = kvm_x86_ops-set_cr3; + vcpu-arch.walk_mmu-get_cr3 = get_cr3; + vcpu-arch.walk_mmu-inject_page_fault = kvm_inject_page_fault; return r; } diff --git a/arch/x86/kvm/x86.c b/arch/x86/kvm/x86.c index 2364c2c..4196fc7 100644 --- a/arch/x86/kvm/x86.c +++ b/arch/x86/kvm/x86.c @@ -3456,27 +3456,27 @@ static gpa_t translate_gpa(struct kvm_vcpu *vcpu, gpa_t gpa, u32 access) gpa_t kvm_mmu_gva_to_gpa_read(struct kvm_vcpu *vcpu, gva_t gva, u32 *error) { u32 access = (kvm_x86_ops-get_cpl(vcpu) == 3) ? PFERR_USER_MASK : 0; - return vcpu-arch.mmu.gva_to_gpa(vcpu, gva, access, error); + return vcpu-arch.walk_mmu-gva_to_gpa(vcpu, gva, access, error); } gpa_t kvm_mmu_gva_to_gpa_fetch(struct kvm_vcpu *vcpu, gva_t gva, u32 *error) { u32 access = (kvm_x86_ops-get_cpl(vcpu) == 3) ? PFERR_USER_MASK : 0; access |= PFERR_FETCH_MASK; - return vcpu-arch.mmu.gva_to_gpa(vcpu, gva, access, error); + return vcpu-arch.walk_mmu-gva_to_gpa(vcpu, gva, access, error); } gpa_t kvm_mmu_gva_to_gpa_write(struct kvm_vcpu *vcpu, gva_t gva, u32 *error) { u32 access = (kvm_x86_ops-get_cpl(vcpu) == 3) ? PFERR_USER_MASK : 0; access |= PFERR_WRITE_MASK; - return vcpu-arch.mmu.gva_to_gpa(vcpu, gva, access, error); + return vcpu-arch.walk_mmu-gva_to_gpa(vcpu, gva, access, error); } /* uses this to access any guest's mapped memory without checking CPL */ gpa_t kvm_mmu_gva_to_gpa_system(struct kvm_vcpu *vcpu, gva_t gva, u32 *error) { - return vcpu-arch.mmu.gva_to_gpa(vcpu, gva, 0, error); + return vcpu-arch.walk_mmu-gva_to_gpa(vcpu, gva, 0, error); } static int kvm_read_guest_virt_helper(gva_t addr, void *val, unsigned int bytes, @@ -3487,7 +3487,8 @@ static int kvm_read_guest_virt_helper(gva_t addr, void *val, unsigned int bytes, int r = X86EMUL_CONTINUE; while (bytes) { - gpa_t gpa = vcpu-arch.mmu.gva_to_gpa(vcpu, addr, access, error); + gpa_t gpa = vcpu-arch.walk_mmu-gva_to_gpa(vcpu, addr, access, + error); unsigned offset = addr (PAGE_SIZE-1); unsigned toread = min(bytes, (unsigned)PAGE_SIZE - offset); int ret; @@ -3542,8 +3543,9 @@ static int kvm_write_guest_virt_system(gva_t addr, void *val, int r = X86EMUL_CONTINUE; while (bytes) { - gpa_t gpa = vcpu-arch.mmu.gva_to_gpa(vcpu, addr, - PFERR_WRITE_MASK, error); + gpa_t gpa = vcpu-arch.walk_mmu-gva_to_gpa(vcpu, addr, +PFERR_WRITE_MASK, +error); unsigned offset = addr (PAGE_SIZE-1); unsigned towrite = min(bytes, (unsigned)PAGE_SIZE -
[COMMIT master] KVM: MMU: Let is_rsvd_bits_set take mmu context instead of vcpu
From: Joerg Roedel joerg.roe...@amd.com This patch changes is_rsvd_bits_set() function prototype to take only a kvm_mmu context instead of a full vcpu. Signed-off-by: Joerg Roedel joerg.roe...@amd.com Signed-off-by: Avi Kivity a...@redhat.com diff --git a/arch/x86/kvm/mmu.c b/arch/x86/kvm/mmu.c index 9e48a77..86f7557 100644 --- a/arch/x86/kvm/mmu.c +++ b/arch/x86/kvm/mmu.c @@ -2578,12 +2578,12 @@ static void paging_free(struct kvm_vcpu *vcpu) nonpaging_free(vcpu); } -static bool is_rsvd_bits_set(struct kvm_vcpu *vcpu, u64 gpte, int level) +static bool is_rsvd_bits_set(struct kvm_mmu *mmu, u64 gpte, int level) { int bit7; bit7 = (gpte 7) 1; - return (gpte vcpu-arch.mmu.rsvd_bits_mask[bit7][level-1]) != 0; + return (gpte mmu-rsvd_bits_mask[bit7][level-1]) != 0; } #define PTTYPE 64 @@ -2859,7 +2859,7 @@ static void mmu_pte_write_new_pte(struct kvm_vcpu *vcpu, return; } - if (is_rsvd_bits_set(vcpu, *(u64 *)new, PT_PAGE_TABLE_LEVEL)) + if (is_rsvd_bits_set(vcpu-arch.mmu, *(u64 *)new, PT_PAGE_TABLE_LEVEL)) return; ++vcpu-kvm-stat.mmu_pte_updated; diff --git a/arch/x86/kvm/paging_tmpl.h b/arch/x86/kvm/paging_tmpl.h index 13d0c06..68ee1b7 100644 --- a/arch/x86/kvm/paging_tmpl.h +++ b/arch/x86/kvm/paging_tmpl.h @@ -168,7 +168,7 @@ walk: break; } - if (is_rsvd_bits_set(vcpu, pte, walker-level)) { + if (is_rsvd_bits_set(vcpu-arch.mmu, pte, walker-level)) { rsvd_fault = true; break; } @@ -327,6 +327,7 @@ static void FNAME(pte_prefetch)(struct kvm_vcpu *vcpu, struct guest_walker *gw, u64 *sptep) { struct kvm_mmu_page *sp; + struct kvm_mmu *mmu = vcpu-arch.mmu; pt_element_t *gptep = gw-prefetch_ptes; u64 *spte; int i; @@ -358,7 +359,7 @@ static void FNAME(pte_prefetch)(struct kvm_vcpu *vcpu, struct guest_walker *gw, gpte = gptep[i]; if (!is_present_gpte(gpte) || - is_rsvd_bits_set(vcpu, gpte, PT_PAGE_TABLE_LEVEL)) { + is_rsvd_bits_set(mmu, gpte, PT_PAGE_TABLE_LEVEL)) { if (!sp-unsync) __set_spte(spte, shadow_notrap_nonpresent_pte); continue; @@ -713,7 +714,7 @@ static int FNAME(sync_page)(struct kvm_vcpu *vcpu, struct kvm_mmu_page *sp, return -EINVAL; gfn = gpte_to_gfn(gpte); - if (is_rsvd_bits_set(vcpu, gpte, PT_PAGE_TABLE_LEVEL) + if (is_rsvd_bits_set(vcpu-arch.mmu, gpte, PT_PAGE_TABLE_LEVEL) || gfn != sp-gfns[i] || !is_present_gpte(gpte) || !(gpte PT_ACCESSED_MASK)) { u64 nonpresent; -- To unsubscribe from this list: send the line unsubscribe kvm-commits in the body of a message to majord...@vger.kernel.org More majordomo info at http://vger.kernel.org/majordomo-info.html
[COMMIT master] KVM: SVM: Expect two more candiates for exit_int_info
From: Joerg Roedel joerg.roe...@amd.com This patch adds INTR and NMI intercepts to the list of expected intercepts with an exit_int_info set. While this can't happen on bare metal it is architectural legal and may happen with KVMs SVM emulation. Signed-off-by: Joerg Roedel joerg.roe...@amd.com Signed-off-by: Avi Kivity a...@redhat.com diff --git a/arch/x86/kvm/svm.c b/arch/x86/kvm/svm.c index 9df60c3..ede95e0 100644 --- a/arch/x86/kvm/svm.c +++ b/arch/x86/kvm/svm.c @@ -2991,7 +2991,8 @@ static int handle_exit(struct kvm_vcpu *vcpu) if (is_external_interrupt(svm-vmcb-control.exit_int_info) exit_code != SVM_EXIT_EXCP_BASE + PF_VECTOR - exit_code != SVM_EXIT_NPF exit_code != SVM_EXIT_TASK_SWITCH) + exit_code != SVM_EXIT_NPF exit_code != SVM_EXIT_TASK_SWITCH + exit_code != SVM_EXIT_INTR exit_code != SVM_EXIT_NMI) printk(KERN_ERR %s: unexpected exit_ini_info 0x%x exit_code 0x%x\n, __func__, svm-vmcb-control.exit_int_info, -- To unsubscribe from this list: send the line unsubscribe kvm-commits in the body of a message to majord...@vger.kernel.org More majordomo info at http://vger.kernel.org/majordomo-info.html
[COMMIT master] KVM: MMU: Introduce generic walk_addr function
From: Joerg Roedel joerg.roe...@amd.com This is the first patch in the series towards a generic walk_addr implementation which could walk two-dimensional page tables in the end. In this first step the walk_addr function is renamed into walk_addr_generic which takes a mmu context as an additional parameter. Signed-off-by: Joerg Roedel joerg.roe...@amd.com Signed-off-by: Avi Kivity a...@redhat.com diff --git a/arch/x86/kvm/paging_tmpl.h b/arch/x86/kvm/paging_tmpl.h index d07f48a..a704a81 100644 --- a/arch/x86/kvm/paging_tmpl.h +++ b/arch/x86/kvm/paging_tmpl.h @@ -114,9 +114,10 @@ static unsigned FNAME(gpte_access)(struct kvm_vcpu *vcpu, pt_element_t gpte) /* * Fetch a guest pte for a guest virtual address */ -static int FNAME(walk_addr)(struct guest_walker *walker, - struct kvm_vcpu *vcpu, gva_t addr, - int write_fault, int user_fault, int fetch_fault) +static int FNAME(walk_addr_generic)(struct guest_walker *walker, + struct kvm_vcpu *vcpu, struct kvm_mmu *mmu, + gva_t addr, int write_fault, + int user_fault, int fetch_fault) { pt_element_t pte; gfn_t table_gfn; @@ -129,10 +130,11 @@ static int FNAME(walk_addr)(struct guest_walker *walker, walk: present = true; eperm = rsvd_fault = false; - walker-level = vcpu-arch.mmu.root_level; - pte = vcpu-arch.mmu.get_cr3(vcpu); + walker-level = mmu-root_level; + pte = mmu-get_cr3(vcpu); + #if PTTYPE == 64 - if (vcpu-arch.mmu.root_level == PT32E_ROOT_LEVEL) { + if (walker-level == PT32E_ROOT_LEVEL) { pte = kvm_pdptr_read(vcpu, (addr 30) 3); trace_kvm_mmu_paging_element(pte, walker-level); if (!is_present_gpte(pte)) { @@ -143,7 +145,7 @@ walk: } #endif ASSERT((!is_long_mode(vcpu) is_pae(vcpu)) || - (vcpu-arch.mmu.get_cr3(vcpu) CR3_NONPAE_RESERVED_BITS) == 0); + (mmu-get_cr3(vcpu) CR3_NONPAE_RESERVED_BITS) == 0); pt_access = ACC_ALL; @@ -205,7 +207,7 @@ walk: (PTTYPE == 64 || is_pse(vcpu))) || ((walker-level == PT_PDPE_LEVEL) is_large_pte(pte) - vcpu-arch.mmu.root_level == PT64_ROOT_LEVEL)) { + mmu-root_level == PT64_ROOT_LEVEL)) { int lvl = walker-level; walker-gfn = gpte_to_gfn_lvl(pte, lvl); @@ -266,6 +268,14 @@ error: return 0; } +static int FNAME(walk_addr)(struct guest_walker *walker, + struct kvm_vcpu *vcpu, gva_t addr, + int write_fault, int user_fault, int fetch_fault) +{ + return FNAME(walk_addr_generic)(walker, vcpu, vcpu-arch.mmu, addr, + write_fault, user_fault, fetch_fault); +} + static void FNAME(update_pte)(struct kvm_vcpu *vcpu, struct kvm_mmu_page *sp, u64 *spte, const void *pte) { -- To unsubscribe from this list: send the line unsubscribe kvm-commits in the body of a message to majord...@vger.kernel.org More majordomo info at http://vger.kernel.org/majordomo-info.html
[COMMIT master] KVM: MMU: Track page fault data in struct vcpu
From: Joerg Roedel joerg.roe...@amd.com This patch introduces a struct with two new fields in vcpu_arch for x86: * fault.address * fault.error_code This will be used to correctly propagate page faults back into the guest when we could have either an ordinary page fault or a nested page fault. In the case of a nested page fault the fault-address is different from the original address that should be walked. So we need to keep track about the real fault-address. Signed-off-by: Joerg Roedel joerg.roe...@amd.com Signed-off-by: Avi Kivity a...@redhat.com diff --git a/arch/x86/include/asm/kvm_emulate.h b/arch/x86/include/asm/kvm_emulate.h index 1bf1140..5187dd8 100644 --- a/arch/x86/include/asm/kvm_emulate.h +++ b/arch/x86/include/asm/kvm_emulate.h @@ -229,7 +229,6 @@ struct x86_emulate_ctxt { int exception; /* exception that happens during emulation or -1 */ u32 error_code; /* error code for exception */ bool error_code_valid; - unsigned long cr2; /* faulted address in case of #PF */ /* decode cache */ struct decode_cache decode; diff --git a/arch/x86/include/asm/kvm_host.h b/arch/x86/include/asm/kvm_host.h index 3fefcd8..235023e 100644 --- a/arch/x86/include/asm/kvm_host.h +++ b/arch/x86/include/asm/kvm_host.h @@ -239,9 +239,7 @@ struct kvm_mmu { void (*set_cr3)(struct kvm_vcpu *vcpu, unsigned long root); unsigned long (*get_cr3)(struct kvm_vcpu *vcpu); int (*page_fault)(struct kvm_vcpu *vcpu, gva_t gva, u32 err); - void (*inject_page_fault)(struct kvm_vcpu *vcpu, - unsigned long addr, - u32 error_code); + void (*inject_page_fault)(struct kvm_vcpu *vcpu); void (*free)(struct kvm_vcpu *vcpu); gpa_t (*gva_to_gpa)(struct kvm_vcpu *vcpu, gva_t gva, u32 access, u32 *error); @@ -288,6 +286,16 @@ struct kvm_vcpu_arch { bool tpr_access_reporting; struct kvm_mmu mmu; + + /* +* This struct is filled with the necessary information to propagate a +* page fault into the guest +*/ + struct { + u64 address; + unsigned error_code; + } fault; + /* only needed in kvm_pv_mmu_op() path, but it's hot so * put it here to avoid allocation */ struct kvm_pv_mmu_op_buffer mmu_op_buffer; @@ -624,8 +632,7 @@ void kvm_queue_exception(struct kvm_vcpu *vcpu, unsigned nr); void kvm_queue_exception_e(struct kvm_vcpu *vcpu, unsigned nr, u32 error_code); void kvm_requeue_exception(struct kvm_vcpu *vcpu, unsigned nr); void kvm_requeue_exception_e(struct kvm_vcpu *vcpu, unsigned nr, u32 error_code); -void kvm_inject_page_fault(struct kvm_vcpu *vcpu, unsigned long cr2, - u32 error_code); +void kvm_inject_page_fault(struct kvm_vcpu *vcpu); bool kvm_require_cpl(struct kvm_vcpu *vcpu, int required_cpl); int kvm_pic_set_irq(void *opaque, int irq, int level); diff --git a/arch/x86/kvm/emulate.c b/arch/x86/kvm/emulate.c index 27d2c22..2b08b78 100644 --- a/arch/x86/kvm/emulate.c +++ b/arch/x86/kvm/emulate.c @@ -487,11 +487,9 @@ static void emulate_gp(struct x86_emulate_ctxt *ctxt, int err) emulate_exception(ctxt, GP_VECTOR, err, true); } -static void emulate_pf(struct x86_emulate_ctxt *ctxt, unsigned long addr, - int err) +static void emulate_pf(struct x86_emulate_ctxt *ctxt) { - ctxt-cr2 = addr; - emulate_exception(ctxt, PF_VECTOR, err, true); + emulate_exception(ctxt, PF_VECTOR, 0, true); } static void emulate_ud(struct x86_emulate_ctxt *ctxt) @@ -834,7 +832,7 @@ static int read_emulated(struct x86_emulate_ctxt *ctxt, rc = ops-read_emulated(addr, mc-data + mc-end, n, err, ctxt-vcpu); if (rc == X86EMUL_PROPAGATE_FAULT) - emulate_pf(ctxt, addr, err); + emulate_pf(ctxt); if (rc != X86EMUL_CONTINUE) return rc; mc-end += n; @@ -921,7 +919,7 @@ static int read_segment_descriptor(struct x86_emulate_ctxt *ctxt, addr = dt.address + index * 8; ret = ops-read_std(addr, desc, sizeof *desc, ctxt-vcpu, err); if (ret == X86EMUL_PROPAGATE_FAULT) - emulate_pf(ctxt, addr, err); + emulate_pf(ctxt); return ret; } @@ -947,7 +945,7 @@ static int write_segment_descriptor(struct x86_emulate_ctxt *ctxt, addr = dt.address + index * 8; ret = ops-write_std(addr, desc, sizeof *desc, ctxt-vcpu, err); if (ret == X86EMUL_PROPAGATE_FAULT) - emulate_pf(ctxt, addr, err); + emulate_pf(ctxt); return ret; } @@ -1117,7 +1115,7 @@ static inline int writeback(struct x86_emulate_ctxt *ctxt, err,
[COMMIT master] x86: Define MSR_EBC_FREQUENCY_ID
From: Jes Sorensen jes.soren...@redhat.com Signed-off-by: Jes Sorensen jes.soren...@redhat.com Signed-off-by: Marcelo Tosatti mtosa...@redhat.com diff --git a/arch/x86/include/asm/msr-index.h b/arch/x86/include/asm/msr-index.h index 986f779..83c4bb1 100644 --- a/arch/x86/include/asm/msr-index.h +++ b/arch/x86/include/asm/msr-index.h @@ -198,6 +198,7 @@ #define MSR_IA32_TSC 0x0010 #define MSR_IA32_PLATFORM_ID 0x0017 #define MSR_IA32_EBL_CR_POWERON0x002a +#define MSR_EBC_FREQUENCY_ID 0x002c #define MSR_IA32_FEATURE_CONTROL0x003a #define FEATURE_CONTROL_LOCKED (10) -- To unsubscribe from this list: send the line unsubscribe kvm-commits in the body of a message to majord...@vger.kernel.org More majordomo info at http://vger.kernel.org/majordomo-info.html
[COMMIT master] KVM: MMU: Introduce kvm_read_nested_guest_page()
From: Joerg Roedel joerg.roe...@amd.com This patch introduces the kvm_read_guest_page_x86 function which reads from the physical memory of the guest. If the guest is running in guest-mode itself with nested paging enabled it will read from the guest's guest physical memory instead. The patch also changes changes the code to use this function where it is necessary. Signed-off-by: Joerg Roedel joerg.roe...@amd.com Signed-off-by: Avi Kivity a...@redhat.com diff --git a/arch/x86/kvm/x86.c b/arch/x86/kvm/x86.c index a2efb70..46843ed 100644 --- a/arch/x86/kvm/x86.c +++ b/arch/x86/kvm/x86.c @@ -392,6 +392,13 @@ int kvm_read_guest_page_mmu(struct kvm_vcpu *vcpu, struct kvm_mmu *mmu, } EXPORT_SYMBOL_GPL(kvm_read_guest_page_mmu); +int kvm_read_nested_guest_page(struct kvm_vcpu *vcpu, gfn_t gfn, + void *data, int offset, int len, u32 access) +{ + return kvm_read_guest_page_mmu(vcpu, vcpu-arch.walk_mmu, gfn, + data, offset, len, access); +} + /* * Load the pae pdptrs. Return true is they are all valid. */ @@ -403,8 +410,9 @@ int load_pdptrs(struct kvm_vcpu *vcpu, unsigned long cr3) int ret; u64 pdpte[ARRAY_SIZE(vcpu-arch.pdptrs)]; - ret = kvm_read_guest_page(vcpu-kvm, pdpt_gfn, pdpte, - offset * sizeof(u64), sizeof(pdpte)); + ret = kvm_read_nested_guest_page(vcpu, pdpt_gfn, pdpte, +offset * sizeof(u64), sizeof(pdpte), +PFERR_USER_MASK|PFERR_WRITE_MASK); if (ret 0) { ret = 0; goto out; @@ -433,6 +441,8 @@ static bool pdptrs_changed(struct kvm_vcpu *vcpu) { u64 pdpte[ARRAY_SIZE(vcpu-arch.pdptrs)]; bool changed = true; + int offset; + gfn_t gfn; int r; if (is_long_mode(vcpu) || !is_pae(vcpu)) @@ -442,7 +452,10 @@ static bool pdptrs_changed(struct kvm_vcpu *vcpu) (unsigned long *)vcpu-arch.regs_avail)) return true; - r = kvm_read_guest(vcpu-kvm, vcpu-arch.cr3 ~31u, pdpte, sizeof(pdpte)); + gfn = (vcpu-arch.cr3 ~31u) PAGE_SHIFT; + offset = (vcpu-arch.cr3 ~31u) (PAGE_SIZE - 1); + r = kvm_read_nested_guest_page(vcpu, gfn, pdpte, offset, sizeof(pdpte), + PFERR_USER_MASK | PFERR_WRITE_MASK); if (r 0) goto out; changed = memcmp(pdpte, vcpu-arch.pdptrs, sizeof(pdpte)) != 0; -- To unsubscribe from this list: send the line unsubscribe kvm-commits in the body of a message to majord...@vger.kernel.org More majordomo info at http://vger.kernel.org/majordomo-info.html
[COMMIT master] KVM: SVM: Initialize Nested Nested MMU context on VMRUN
From: Joerg Roedel joerg.roe...@amd.com This patch adds code to initialize the Nested Nested Paging MMU context when the L1 guest executes a VMRUN instruction and has nested paging enabled in its VMCB. Signed-off-by: Joerg Roedel joerg.roe...@amd.com Signed-off-by: Avi Kivity a...@redhat.com diff --git a/arch/x86/kvm/mmu.c b/arch/x86/kvm/mmu.c index 95cbeed..6e248d8 100644 --- a/arch/x86/kvm/mmu.c +++ b/arch/x86/kvm/mmu.c @@ -2962,6 +2962,7 @@ void kvm_mmu_unload(struct kvm_vcpu *vcpu) { mmu_free_roots(vcpu); } +EXPORT_SYMBOL_GPL(kvm_mmu_unload); static void mmu_pte_write_zap_pte(struct kvm_vcpu *vcpu, struct kvm_mmu_page *sp, diff --git a/arch/x86/kvm/svm.c b/arch/x86/kvm/svm.c index a483aa9..9df60c3 100644 --- a/arch/x86/kvm/svm.c +++ b/arch/x86/kvm/svm.c @@ -294,6 +294,15 @@ static inline void flush_guest_tlb(struct kvm_vcpu *vcpu) force_new_asid(vcpu); } +static int get_npt_level(void) +{ +#ifdef CONFIG_X86_64 + return PT64_ROOT_LEVEL; +#else + return PT32E_ROOT_LEVEL; +#endif +} + static void svm_set_efer(struct kvm_vcpu *vcpu, u64 efer) { vcpu-arch.efer = efer; @@ -1630,6 +1639,26 @@ static void nested_svm_inject_npf_exit(struct kvm_vcpu *vcpu) nested_svm_vmexit(svm); } +static int nested_svm_init_mmu_context(struct kvm_vcpu *vcpu) +{ + int r; + + r = kvm_init_shadow_mmu(vcpu, vcpu-arch.mmu); + + vcpu-arch.mmu.set_cr3 = nested_svm_set_tdp_cr3; + vcpu-arch.mmu.get_cr3 = nested_svm_get_tdp_cr3; + vcpu-arch.mmu.inject_page_fault = nested_svm_inject_npf_exit; + vcpu-arch.mmu.shadow_root_level = get_npt_level(); + vcpu-arch.walk_mmu = vcpu-arch.nested_mmu; + + return r; +} + +static void nested_svm_uninit_mmu_context(struct kvm_vcpu *vcpu) +{ + vcpu-arch.walk_mmu = vcpu-arch.mmu; +} + static int nested_svm_check_permissions(struct vcpu_svm *svm) { if (!(svm-vcpu.arch.efer EFER_SVME) @@ -1998,6 +2027,8 @@ static int nested_svm_vmexit(struct vcpu_svm *svm) kvm_clear_exception_queue(svm-vcpu); kvm_clear_interrupt_queue(svm-vcpu); + svm-nested.nested_cr3 = 0; + /* Restore selected save entries */ svm-vmcb-save.es = hsave-save.es; svm-vmcb-save.cs = hsave-save.cs; @@ -2024,6 +2055,7 @@ static int nested_svm_vmexit(struct vcpu_svm *svm) nested_svm_unmap(page); + nested_svm_uninit_mmu_context(svm-vcpu); kvm_mmu_reset_context(svm-vcpu); kvm_mmu_load(svm-vcpu); @@ -2071,6 +2103,9 @@ static bool nested_vmcb_checks(struct vmcb *vmcb) if (vmcb-control.asid == 0) return false; + if (vmcb-control.nested_ctl !npt_enabled) + return false; + return true; } @@ -2143,6 +2178,12 @@ static bool nested_svm_vmrun(struct vcpu_svm *svm) else svm-vcpu.arch.hflags = ~HF_HIF_MASK; + if (nested_vmcb-control.nested_ctl) { + kvm_mmu_unload(svm-vcpu); + svm-nested.nested_cr3 = nested_vmcb-control.nested_cr3; + nested_svm_init_mmu_context(svm-vcpu); + } + /* Load the nested guest state */ svm-vmcb-save.es = nested_vmcb-save.es; svm-vmcb-save.cs = nested_vmcb-save.cs; @@ -3410,15 +3451,6 @@ static bool svm_cpu_has_accelerated_tpr(void) return false; } -static int get_npt_level(void) -{ -#ifdef CONFIG_X86_64 - return PT64_ROOT_LEVEL; -#else - return PT32E_ROOT_LEVEL; -#endif -} - static u64 svm_get_mt_mask(struct kvm_vcpu *vcpu, gfn_t gfn, bool is_mmio) { return 0; -- To unsubscribe from this list: send the line unsubscribe kvm-commits in the body of a message to majord...@vger.kernel.org More majordomo info at http://vger.kernel.org/majordomo-info.html
[COMMIT master] KVM: MMU: Add kvm_mmu parameter to load_pdptrs function
From: Joerg Roedel joerg.roe...@amd.com This function need to be able to load the pdptrs from any mmu context currently in use. So change this function to take an kvm_mmu parameter to fit these needs. As a side effect this patch also moves the cached pdptrs from vcpu_arch into the kvm_mmu struct. Signed-off-by: Joerg Roedel joerg.roe...@amd.com Signed-off-by: Avi Kivity a...@redhat.com diff --git a/arch/x86/include/asm/kvm_host.h b/arch/x86/include/asm/kvm_host.h index 61d94cd..ac95c6f 100644 --- a/arch/x86/include/asm/kvm_host.h +++ b/arch/x86/include/asm/kvm_host.h @@ -257,6 +257,8 @@ struct kvm_mmu { u64 *pae_root; u64 rsvd_bits_mask[2][4]; + + u64 pdptrs[4]; /* pae */ }; struct kvm_vcpu_arch { @@ -276,7 +278,6 @@ struct kvm_vcpu_arch { unsigned long cr4_guest_owned_bits; unsigned long cr8; u32 hflags; - u64 pdptrs[4]; /* pae */ u64 efer; u64 apic_base; struct kvm_lapic *apic;/* kernel irqchip context */ @@ -592,7 +593,7 @@ void kvm_mmu_zap_all(struct kvm *kvm); unsigned int kvm_mmu_calculate_mmu_pages(struct kvm *kvm); void kvm_mmu_change_mmu_pages(struct kvm *kvm, unsigned int kvm_nr_mmu_pages); -int load_pdptrs(struct kvm_vcpu *vcpu, unsigned long cr3); +int load_pdptrs(struct kvm_vcpu *vcpu, struct kvm_mmu *mmu, unsigned long cr3); int emulator_write_phys(struct kvm_vcpu *vcpu, gpa_t gpa, const void *val, int bytes); diff --git a/arch/x86/kvm/kvm_cache_regs.h b/arch/x86/kvm/kvm_cache_regs.h index 6491ac8..a37abe2 100644 --- a/arch/x86/kvm/kvm_cache_regs.h +++ b/arch/x86/kvm/kvm_cache_regs.h @@ -42,7 +42,7 @@ static inline u64 kvm_pdptr_read(struct kvm_vcpu *vcpu, int index) (unsigned long *)vcpu-arch.regs_avail)) kvm_x86_ops-cache_reg(vcpu, VCPU_EXREG_PDPTR); - return vcpu-arch.pdptrs[index]; + return vcpu-arch.walk_mmu-pdptrs[index]; } static inline ulong kvm_read_cr0_bits(struct kvm_vcpu *vcpu, ulong mask) diff --git a/arch/x86/kvm/svm.c b/arch/x86/kvm/svm.c index 094df31..a98ac52 100644 --- a/arch/x86/kvm/svm.c +++ b/arch/x86/kvm/svm.c @@ -1010,7 +1010,7 @@ static void svm_cache_reg(struct kvm_vcpu *vcpu, enum kvm_reg reg) switch (reg) { case VCPU_EXREG_PDPTR: BUG_ON(!npt_enabled); - load_pdptrs(vcpu, vcpu-arch.cr3); + load_pdptrs(vcpu, vcpu-arch.walk_mmu, vcpu-arch.cr3); break; default: BUG(); diff --git a/arch/x86/kvm/vmx.c b/arch/x86/kvm/vmx.c index 0e62d8a..0a70194 100644 --- a/arch/x86/kvm/vmx.c +++ b/arch/x86/kvm/vmx.c @@ -1848,20 +1848,20 @@ static void ept_load_pdptrs(struct kvm_vcpu *vcpu) return; if (is_paging(vcpu) is_pae(vcpu) !is_long_mode(vcpu)) { - vmcs_write64(GUEST_PDPTR0, vcpu-arch.pdptrs[0]); - vmcs_write64(GUEST_PDPTR1, vcpu-arch.pdptrs[1]); - vmcs_write64(GUEST_PDPTR2, vcpu-arch.pdptrs[2]); - vmcs_write64(GUEST_PDPTR3, vcpu-arch.pdptrs[3]); + vmcs_write64(GUEST_PDPTR0, vcpu-arch.mmu.pdptrs[0]); + vmcs_write64(GUEST_PDPTR1, vcpu-arch.mmu.pdptrs[1]); + vmcs_write64(GUEST_PDPTR2, vcpu-arch.mmu.pdptrs[2]); + vmcs_write64(GUEST_PDPTR3, vcpu-arch.mmu.pdptrs[3]); } } static void ept_save_pdptrs(struct kvm_vcpu *vcpu) { if (is_paging(vcpu) is_pae(vcpu) !is_long_mode(vcpu)) { - vcpu-arch.pdptrs[0] = vmcs_read64(GUEST_PDPTR0); - vcpu-arch.pdptrs[1] = vmcs_read64(GUEST_PDPTR1); - vcpu-arch.pdptrs[2] = vmcs_read64(GUEST_PDPTR2); - vcpu-arch.pdptrs[3] = vmcs_read64(GUEST_PDPTR3); + vcpu-arch.mmu.pdptrs[0] = vmcs_read64(GUEST_PDPTR0); + vcpu-arch.mmu.pdptrs[1] = vmcs_read64(GUEST_PDPTR1); + vcpu-arch.mmu.pdptrs[2] = vmcs_read64(GUEST_PDPTR2); + vcpu-arch.mmu.pdptrs[3] = vmcs_read64(GUEST_PDPTR3); } __set_bit(VCPU_EXREG_PDPTR, diff --git a/arch/x86/kvm/x86.c b/arch/x86/kvm/x86.c index 3101060..bbd9f4a 100644 --- a/arch/x86/kvm/x86.c +++ b/arch/x86/kvm/x86.c @@ -418,17 +418,17 @@ int kvm_read_nested_guest_page(struct kvm_vcpu *vcpu, gfn_t gfn, /* * Load the pae pdptrs. Return true is they are all valid. */ -int load_pdptrs(struct kvm_vcpu *vcpu, unsigned long cr3) +int load_pdptrs(struct kvm_vcpu *vcpu, struct kvm_mmu *mmu, unsigned long cr3) { gfn_t pdpt_gfn = cr3 PAGE_SHIFT; unsigned offset = ((cr3 (PAGE_SIZE-1)) 5) 2; int i; int ret; - u64 pdpte[ARRAY_SIZE(vcpu-arch.pdptrs)]; + u64 pdpte[ARRAY_SIZE(mmu-pdptrs)]; - ret = kvm_read_nested_guest_page(vcpu, pdpt_gfn, pdpte, -offset * sizeof(u64), sizeof(pdpte), -PFERR_USER_MASK|PFERR_WRITE_MASK); + ret = kvm_read_guest_page_mmu(vcpu,
[COMMIT master] KVM: MMU: Track NX state in struct kvm_mmu
From: Joerg Roedel joerg.roe...@amd.com With Nested Paging emulation the NX state between the two MMU contexts may differ. To make sure that always the right fault error code is recorded this patch moves the NX state into struct kvm_mmu so that the code can distinguish between L1 and L2 NX state. Signed-off-by: Joerg Roedel joerg.roe...@amd.com Signed-off-by: Avi Kivity a...@redhat.com diff --git a/arch/x86/include/asm/kvm_host.h b/arch/x86/include/asm/kvm_host.h index 88d6c84..3a00741 100644 --- a/arch/x86/include/asm/kvm_host.h +++ b/arch/x86/include/asm/kvm_host.h @@ -259,6 +259,8 @@ struct kvm_mmu { u64 *lm_root; u64 rsvd_bits_mask[2][4]; + bool nx; + u64 pdptrs[4]; /* pae */ }; diff --git a/arch/x86/kvm/mmu.c b/arch/x86/kvm/mmu.c index dd76765..95cbeed 100644 --- a/arch/x86/kvm/mmu.c +++ b/arch/x86/kvm/mmu.c @@ -2634,6 +2634,7 @@ static int nonpaging_init_context(struct kvm_vcpu *vcpu, context-shadow_root_level = PT32E_ROOT_LEVEL; context-root_hpa = INVALID_PAGE; context-direct_map = true; + context-nx = false; return 0; } @@ -2687,7 +2688,7 @@ static void reset_rsvds_bits_mask(struct kvm_vcpu *vcpu, int maxphyaddr = cpuid_maxphyaddr(vcpu); u64 exb_bit_rsvd = 0; - if (!is_nx(vcpu)) + if (!context-nx) exb_bit_rsvd = rsvd_bits(63, 63); switch (level) { case PT32_ROOT_LEVEL: @@ -2746,6 +2747,8 @@ static int paging64_init_context_common(struct kvm_vcpu *vcpu, struct kvm_mmu *context, int level) { + context-nx = is_nx(vcpu); + reset_rsvds_bits_mask(vcpu, context, level); ASSERT(is_pae(vcpu)); @@ -2772,6 +2775,8 @@ static int paging64_init_context(struct kvm_vcpu *vcpu, static int paging32_init_context(struct kvm_vcpu *vcpu, struct kvm_mmu *context) { + context-nx = false; + reset_rsvds_bits_mask(vcpu, context, PT32_ROOT_LEVEL); context-new_cr3 = paging_new_cr3; @@ -2810,19 +2815,24 @@ static int init_kvm_tdp_mmu(struct kvm_vcpu *vcpu) context-set_cr3 = kvm_x86_ops-set_tdp_cr3; context-get_cr3 = get_cr3; context-inject_page_fault = kvm_inject_page_fault; + context-nx = is_nx(vcpu); if (!is_paging(vcpu)) { + context-nx = false; context-gva_to_gpa = nonpaging_gva_to_gpa; context-root_level = 0; } else if (is_long_mode(vcpu)) { + context-nx = is_nx(vcpu); reset_rsvds_bits_mask(vcpu, context, PT64_ROOT_LEVEL); context-gva_to_gpa = paging64_gva_to_gpa; context-root_level = PT64_ROOT_LEVEL; } else if (is_pae(vcpu)) { + context-nx = is_nx(vcpu); reset_rsvds_bits_mask(vcpu, context, PT32E_ROOT_LEVEL); context-gva_to_gpa = paging64_gva_to_gpa; context-root_level = PT32E_ROOT_LEVEL; } else { + context-nx = false; reset_rsvds_bits_mask(vcpu, context, PT32_ROOT_LEVEL); context-gva_to_gpa = paging32_gva_to_gpa; context-root_level = PT32_ROOT_LEVEL; @@ -2878,17 +2888,21 @@ static int init_kvm_nested_mmu(struct kvm_vcpu *vcpu) * functions between mmu and nested_mmu are swapped. */ if (!is_paging(vcpu)) { + g_context-nx = false; g_context-root_level = 0; g_context-gva_to_gpa = nonpaging_gva_to_gpa_nested; } else if (is_long_mode(vcpu)) { + g_context-nx = is_nx(vcpu); reset_rsvds_bits_mask(vcpu, g_context, PT64_ROOT_LEVEL); g_context-root_level = PT64_ROOT_LEVEL; g_context-gva_to_gpa = paging64_gva_to_gpa_nested; } else if (is_pae(vcpu)) { + g_context-nx = is_nx(vcpu); reset_rsvds_bits_mask(vcpu, g_context, PT32E_ROOT_LEVEL); g_context-root_level = PT32E_ROOT_LEVEL; g_context-gva_to_gpa = paging64_gva_to_gpa_nested; } else { + g_context-nx = false; reset_rsvds_bits_mask(vcpu, g_context, PT32_ROOT_LEVEL); g_context-root_level = PT32_ROOT_LEVEL; g_context-gva_to_gpa = paging32_gva_to_gpa_nested; diff --git a/arch/x86/kvm/paging_tmpl.h b/arch/x86/kvm/paging_tmpl.h index a28f09b..2bdd843 100644 --- a/arch/x86/kvm/paging_tmpl.h +++ b/arch/x86/kvm/paging_tmpl.h @@ -105,7 +105,7 @@ static unsigned FNAME(gpte_access)(struct kvm_vcpu *vcpu, pt_element_t gpte) access = (gpte (PT_WRITABLE_MASK | PT_USER_MASK)) | ACC_EXEC_MASK; #if PTTYPE == 64 - if (is_nx(vcpu)) + if (vcpu-arch.mmu.nx) access = ~(gpte PT64_NX_SHIFT); #endif return access; @@ -272,7 +272,7 @@ error: walker-error_code |=
[COMMIT master] KVM: MMU: Fix regression with ept memory types merged into non-ept page tables
From: Avi Kivity a...@redhat.com Commit KVM: MMU: Make tdp_enabled a mmu-context parameter made real-mode set -direct_map, and changed the code that merges in the memory type depend on direct_map instead of tdp_enabled. However, in this case what really matters is tdp, not direct_map, since tdp changes the pte format regardless of whether the mapping is direct or not. As a result, real-mode shadow mappings got corrupted with ept memory types. The result was a huge slowdown, likely due to the cache being disabled. Change it back as the simplest fix for the regression (real fix is to move all that to vmx code, and not use tdp_enabled as a synonym for ept). Signed-off-by: Avi Kivity a...@redhat.com diff --git a/arch/x86/kvm/mmu.c b/arch/x86/kvm/mmu.c index 6e248d8..3ce56bf 100644 --- a/arch/x86/kvm/mmu.c +++ b/arch/x86/kvm/mmu.c @@ -1980,7 +1980,7 @@ static int set_spte(struct kvm_vcpu *vcpu, u64 *sptep, spte |= shadow_user_mask; if (level PT_PAGE_TABLE_LEVEL) spte |= PT_PAGE_SIZE_MASK; - if (vcpu-arch.mmu.direct_map) + if (tdp_enabled) spte |= kvm_x86_ops-get_mt_mask(vcpu, gfn, kvm_is_mmio_pfn(pfn)); -- To unsubscribe from this list: send the line unsubscribe kvm-commits in the body of a message to majord...@vger.kernel.org More majordomo info at http://vger.kernel.org/majordomo-info.html
[COMMIT master] KVM: X86: Propagate fetch faults
From: Joerg Roedel joerg.roe...@amd.com KVM currently ignores fetch faults in the instruction emulator. With nested-npt we could have such faults. This patch adds the code to handle these. Signed-off-by: Joerg Roedel joerg.roe...@amd.com Signed-off-by: Avi Kivity a...@redhat.com diff --git a/arch/x86/kvm/emulate.c b/arch/x86/kvm/emulate.c index 2b08b78..aead72e 100644 --- a/arch/x86/kvm/emulate.c +++ b/arch/x86/kvm/emulate.c @@ -1198,6 +1198,9 @@ static int emulate_popf(struct x86_emulate_ctxt *ctxt, *(unsigned long *)dest = (ctxt-eflags ~change_mask) | (val change_mask); + if (rc == X86EMUL_PROPAGATE_FAULT) + emulate_pf(ctxt); + return rc; } diff --git a/arch/x86/kvm/x86.c b/arch/x86/kvm/x86.c index 0281d92..3101060 100644 --- a/arch/x86/kvm/x86.c +++ b/arch/x86/kvm/x86.c @@ -4247,6 +4247,9 @@ int emulate_instruction(struct kvm_vcpu *vcpu, vcpu-arch.emulate_ctxt.perm_ok = false; r = x86_decode_insn(vcpu-arch.emulate_ctxt); + if (r == X86EMUL_PROPAGATE_FAULT) + goto done; + trace_kvm_emulate_insn_start(vcpu); /* Only allow emulation of specific instructions on #UD @@ -4305,6 +4308,7 @@ restart: return handle_emulation_failure(vcpu); } +done: if (vcpu-arch.emulate_ctxt.exception = 0) { inject_emulated_exception(vcpu); r = EMULATE_DONE; -- To unsubscribe from this list: send the line unsubscribe kvm-commits in the body of a message to majord...@vger.kernel.org More majordomo info at http://vger.kernel.org/majordomo-info.html
[COMMIT master] KVM: MMU: Add infrastructure for two-level page walker
From: Joerg Roedel joerg.roe...@amd.com This patch introduces a mmu-callback to translate gpa addresses in the walk_addr code. This is later used to translate l2_gpa addresses into l1_gpa addresses. Signed-off-by: Joerg Roedel joerg.roe...@amd.com Signed-off-by: Avi Kivity a...@redhat.com diff --git a/arch/x86/include/asm/kvm_host.h b/arch/x86/include/asm/kvm_host.h index 235023e..7f95260 100644 --- a/arch/x86/include/asm/kvm_host.h +++ b/arch/x86/include/asm/kvm_host.h @@ -243,6 +243,7 @@ struct kvm_mmu { void (*free)(struct kvm_vcpu *vcpu); gpa_t (*gva_to_gpa)(struct kvm_vcpu *vcpu, gva_t gva, u32 access, u32 *error); + gpa_t (*translate_gpa)(struct kvm_vcpu *vcpu, gpa_t gpa, u32 access); void (*prefetch_page)(struct kvm_vcpu *vcpu, struct kvm_mmu_page *page); int (*sync_page)(struct kvm_vcpu *vcpu, diff --git a/arch/x86/kvm/x86.c b/arch/x86/kvm/x86.c index 48b74d2..2364c2c 100644 --- a/arch/x86/kvm/x86.c +++ b/arch/x86/kvm/x86.c @@ -3448,6 +3448,11 @@ void kvm_get_segment(struct kvm_vcpu *vcpu, kvm_x86_ops-get_segment(vcpu, var, seg); } +static gpa_t translate_gpa(struct kvm_vcpu *vcpu, gpa_t gpa, u32 access) +{ + return gpa; +} + gpa_t kvm_mmu_gva_to_gpa_read(struct kvm_vcpu *vcpu, gva_t gva, u32 *error) { u32 access = (kvm_x86_ops-get_cpl(vcpu) == 3) ? PFERR_USER_MASK : 0; @@ -5659,6 +5664,7 @@ int kvm_arch_vcpu_init(struct kvm_vcpu *vcpu) vcpu-arch.emulate_ctxt.ops = emulate_ops; vcpu-arch.mmu.root_hpa = INVALID_PAGE; + vcpu-arch.mmu.translate_gpa = translate_gpa; if (!irqchip_in_kernel(kvm) || kvm_vcpu_is_bsp(vcpu)) vcpu-arch.mp_state = KVM_MP_STATE_RUNNABLE; else diff --git a/include/linux/kvm_host.h b/include/linux/kvm_host.h index f2ecdd5..917e68f 100644 --- a/include/linux/kvm_host.h +++ b/include/linux/kvm_host.h @@ -534,6 +534,11 @@ static inline gpa_t gfn_to_gpa(gfn_t gfn) return (gpa_t)gfn PAGE_SHIFT; } +static inline gfn_t gpa_to_gfn(gpa_t gpa) +{ + return (gfn_t)(gpa PAGE_SHIFT); +} + static inline hpa_t pfn_to_hpa(pfn_t pfn) { return (hpa_t)pfn PAGE_SHIFT; -- To unsubscribe from this list: send the line unsubscribe kvm-commits in the body of a message to majord...@vger.kernel.org More majordomo info at http://vger.kernel.org/majordomo-info.html
[COMMIT master] KVM: Document that KVM_GET_SUPPORTED_CPUID may return emulated values
From: Avi Kivity a...@redhat.com Signed-off-by: Avi Kivity a...@redhat.com diff --git a/Documentation/kvm/api.txt b/Documentation/kvm/api.txt index 24d6341..b336266 100644 --- a/Documentation/kvm/api.txt +++ b/Documentation/kvm/api.txt @@ -1042,8 +1042,9 @@ number is just right, the 'nent' field is adjusted to the number of valid entries in the 'entries' array, which is then filled. The entries returned are the host cpuid as returned by the cpuid instruction, -with unknown or unsupported features masked out. The fields in each entry -are defined as follows: +with unknown or unsupported features masked out. Some features (for example, +x2apic), may not be present in the host cpu, but are exposed by kvm if it can +emulate them efficiently. The fields in each entry are defined as follows: function: the eax value used to obtain the entry index: the ecx value used to obtain the entry (for entries that are -- To unsubscribe from this list: send the line unsubscribe kvm-commits in the body of a message to majord...@vger.kernel.org More majordomo info at http://vger.kernel.org/majordomo-info.html
[COMMIT master] KVM: SVM: Report Nested Paging support to userspace
From: Joerg Roedel joerg.roe...@amd.com This patch implements the reporting of the nested paging feature support to userspace. Signed-off-by: Joerg Roedel joerg.roe...@amd.com Signed-off-by: Avi Kivity a...@redhat.com diff --git a/arch/x86/kvm/svm.c b/arch/x86/kvm/svm.c index ede95e0..678602e 100644 --- a/arch/x86/kvm/svm.c +++ b/arch/x86/kvm/svm.c @@ -3476,6 +3476,10 @@ static void svm_set_supported_cpuid(u32 func, struct kvm_cpuid_entry2 *entry) if (svm_has(SVM_FEATURE_NRIP)) entry-edx |= SVM_FEATURE_NRIP; + /* Support NPT for the guest if enabled */ + if (npt_enabled) + entry-edx |= SVM_FEATURE_NPT; + break; } } -- To unsubscribe from this list: send the line unsubscribe kvm-commits in the body of a message to majord...@vger.kernel.org More majordomo info at http://vger.kernel.org/majordomo-info.html
[COMMIT master] Merge branch 'master' of ssh://master.kernel.org/pub/scm/linux/kernel/git/torvalds/linux-2.6 into next
From: Avi Kivity a...@redhat.com * 'master' of ssh://master.kernel.org/pub/scm/linux/kernel/git/torvalds/linux-2.6: (407 commits) PM / Hibernate: Avoid hitting OOM during preallocation of memory x86, tsc: Fix a preemption leak in restore_sched_clock_state() x86, tsc: Fix a preemption leak in restore_sched_clock_state() PM QoS: Correct pr_debug() misuse and improve parameter checks xfs: log IO completion workqueue is a high priority queue execve: make responsive to SIGKILL with large arguments execve: improve interactivity with large arguments setup_arg_pages: diagnose excessive argument size KEYS: Fix bug in keyctl_session_to_parent() if parent has no session keyring KEYS: Fix RCU no-lock warning in keyctl_session_to_parent() drm/i915: don't enable self-refresh on Ironlake xfs: prevent reading uninitialized stack memory AT91: at91sam9261ek: remove C99 comments but keep information AT91: at91sam9261ek board: remove warnings related to use of SPI or SD/MMC AT91: dm9000 initialization update block: Range check cpu in blk_cpu_to_group ipheth: remove incorrect devtype to WWAN MAINTAINERS: Add CAIF tracing: t_start: reset FTRACE_ITER_HASH in case of seek/pread libata-sff: Reenable Port Multiplier after libata-sff remodeling. ... Conflicts: arch/x86/include/asm/kvm_emulate.h virt/kvm/kvm_main.c Signed-off-by: Avi Kivity a...@redhat.com -- To unsubscribe from this list: send the line unsubscribe kvm-commits in the body of a message to majord...@vger.kernel.org More majordomo info at http://vger.kernel.org/majordomo-info.html
[COMMIT master] KVM: X86: Report SVM bit to userspace only when supported
From: Joerg Roedel joerg.roe...@amd.com This patch fixes a bug in KVM where it _always_ reports the support of the SVM feature to userspace. But KVM only supports SVM on AMD hardware and only when it is enabled in the kernel module. This patch fixes the wrong reporting. Cc: sta...@kernel.org Signed-off-by: Joerg Roedel joerg.roe...@amd.com Signed-off-by: Avi Kivity a...@redhat.com diff --git a/arch/x86/kvm/svm.c b/arch/x86/kvm/svm.c index 678602e..eeb08d6 100644 --- a/arch/x86/kvm/svm.c +++ b/arch/x86/kvm/svm.c @@ -3464,6 +3464,10 @@ static void svm_cpuid_update(struct kvm_vcpu *vcpu) static void svm_set_supported_cpuid(u32 func, struct kvm_cpuid_entry2 *entry) { switch (func) { + case 0x8001: + if (nested) + entry-ecx |= (1 2); /* Set SVM bit */ + break; case 0x800A: entry-eax = 1; /* SVM revision 1 */ entry-ebx = 8; /* Lets support 8 ASIDs in case we add proper diff --git a/arch/x86/kvm/x86.c b/arch/x86/kvm/x86.c index bbd9f4a..3ff0a8f 100644 --- a/arch/x86/kvm/x86.c +++ b/arch/x86/kvm/x86.c @@ -2209,7 +2209,7 @@ static void do_cpuid_ent(struct kvm_cpuid_entry2 *entry, u32 function, 0 /* Reserved, AES */ | F(XSAVE) | 0 /* OSXSAVE */ | F(AVX); /* cpuid 0x8001.ecx */ const u32 kvm_supported_word6_x86_features = - F(LAHF_LM) | F(CMP_LEGACY) | F(SVM) | 0 /* ExtApicSpace */ | + F(LAHF_LM) | F(CMP_LEGACY) | 0 /*SVM*/ | 0 /* ExtApicSpace */ | F(CR8_LEGACY) | F(ABM) | F(SSE4A) | F(MISALIGNSSE) | F(3DNOWPREFETCH) | 0 /* OSVW */ | 0 /* IBS */ | F(SSE5) | 0 /* SKINIT */ | 0 /* WDT */; -- To unsubscribe from this list: send the line unsubscribe kvm-commits in the body of a message to majord...@vger.kernel.org More majordomo info at http://vger.kernel.org/majordomo-info.html
[COMMIT master] KVM: MMU: Propagate the right fault back to the guest after gva_to_gpa
From: Joerg Roedel joerg.roe...@amd.com This patch implements logic to make sure that either a page-fault/page-fault-vmexit or a nested-page-fault-vmexit is propagated back to the guest. Signed-off-by: Joerg Roedel joerg.roe...@amd.com Signed-off-by: Avi Kivity a...@redhat.com diff --git a/arch/x86/include/asm/kvm_host.h b/arch/x86/include/asm/kvm_host.h index 5d9e0bb..61d94cd 100644 --- a/arch/x86/include/asm/kvm_host.h +++ b/arch/x86/include/asm/kvm_host.h @@ -660,6 +660,7 @@ void kvm_inject_page_fault(struct kvm_vcpu *vcpu); int kvm_read_guest_page_mmu(struct kvm_vcpu *vcpu, struct kvm_mmu *mmu, gfn_t gfn, void *data, int offset, int len, u32 access); +void kvm_propagate_fault(struct kvm_vcpu *vcpu); bool kvm_require_cpl(struct kvm_vcpu *vcpu, int required_cpl); int kvm_pic_set_irq(void *opaque, int irq, int level); diff --git a/arch/x86/kvm/x86.c b/arch/x86/kvm/x86.c index e4c76bf..0281d92 100644 --- a/arch/x86/kvm/x86.c +++ b/arch/x86/kvm/x86.c @@ -338,6 +338,22 @@ void kvm_inject_page_fault(struct kvm_vcpu *vcpu) kvm_queue_exception_e(vcpu, PF_VECTOR, error_code); } +void kvm_propagate_fault(struct kvm_vcpu *vcpu) +{ + u32 nested, error; + + error = vcpu-arch.fault.error_code; + nested = error PFERR_NESTED_MASK; + error = error ~PFERR_NESTED_MASK; + + vcpu-arch.fault.error_code = error; + + if (mmu_is_nested(vcpu) !nested) + vcpu-arch.nested_mmu.inject_page_fault(vcpu); + else + vcpu-arch.mmu.inject_page_fault(vcpu); +} + void kvm_inject_nmi(struct kvm_vcpu *vcpu) { vcpu-arch.nmi_pending = 1; @@ -4140,7 +4156,7 @@ static void inject_emulated_exception(struct kvm_vcpu *vcpu) { struct x86_emulate_ctxt *ctxt = vcpu-arch.emulate_ctxt; if (ctxt-exception == PF_VECTOR) - kvm_inject_page_fault(vcpu); + kvm_propagate_fault(vcpu); else if (ctxt-error_code_valid) kvm_queue_exception_e(vcpu, ctxt-exception, ctxt-error_code); else -- To unsubscribe from this list: send the line unsubscribe kvm-commits in the body of a message to majord...@vger.kernel.org More majordomo info at http://vger.kernel.org/majordomo-info.html
[COMMIT master] KVM: MMU: Allow long mode shadows for legacy page tables
From: Joerg Roedel joerg.roe...@amd.com Currently the KVM softmmu implementation can not shadow a 32 bit legacy or PAE page table with a long mode page table. This is a required feature for nested paging emulation because the nested page table must alway be in host format. So this patch implements the missing pieces to allow long mode page tables for page table types. Signed-off-by: Joerg Roedel joerg.roe...@amd.com Signed-off-by: Avi Kivity a...@redhat.com diff --git a/arch/x86/include/asm/kvm_host.h b/arch/x86/include/asm/kvm_host.h index ac95c6f..88d6c84 100644 --- a/arch/x86/include/asm/kvm_host.h +++ b/arch/x86/include/asm/kvm_host.h @@ -256,6 +256,7 @@ struct kvm_mmu { bool direct_map; u64 *pae_root; + u64 *lm_root; u64 rsvd_bits_mask[2][4]; u64 pdptrs[4]; /* pae */ diff --git a/arch/x86/kvm/mmu.c b/arch/x86/kvm/mmu.c index 9cd5a71..dd76765 100644 --- a/arch/x86/kvm/mmu.c +++ b/arch/x86/kvm/mmu.c @@ -1504,6 +1504,12 @@ static void shadow_walk_init(struct kvm_shadow_walk_iterator *iterator, iterator-addr = addr; iterator-shadow_addr = vcpu-arch.mmu.root_hpa; iterator-level = vcpu-arch.mmu.shadow_root_level; + + if (iterator-level == PT64_ROOT_LEVEL + vcpu-arch.mmu.root_level PT64_ROOT_LEVEL + !vcpu-arch.mmu.direct_map) + --iterator-level; + if (iterator-level == PT32E_ROOT_LEVEL) { iterator-shadow_addr = vcpu-arch.mmu.pae_root[(addr 30) 3]; @@ -2314,7 +2320,9 @@ static void mmu_free_roots(struct kvm_vcpu *vcpu) if (!VALID_PAGE(vcpu-arch.mmu.root_hpa)) return; spin_lock(vcpu-kvm-mmu_lock); - if (vcpu-arch.mmu.shadow_root_level == PT64_ROOT_LEVEL) { + if (vcpu-arch.mmu.shadow_root_level == PT64_ROOT_LEVEL + (vcpu-arch.mmu.root_level == PT64_ROOT_LEVEL || +vcpu-arch.mmu.direct_map)) { hpa_t root = vcpu-arch.mmu.root_hpa; sp = page_header(root); @@ -2394,10 +2402,10 @@ static int mmu_alloc_direct_roots(struct kvm_vcpu *vcpu) static int mmu_alloc_shadow_roots(struct kvm_vcpu *vcpu) { - int i; - gfn_t root_gfn; struct kvm_mmu_page *sp; - u64 pdptr; + u64 pdptr, pm_mask; + gfn_t root_gfn; + int i; root_gfn = vcpu-arch.mmu.get_cr3(vcpu) PAGE_SHIFT; @@ -2426,8 +2434,13 @@ static int mmu_alloc_shadow_roots(struct kvm_vcpu *vcpu) /* * We shadow a 32 bit page table. This may be a legacy 2-level -* or a PAE 3-level page table. +* or a PAE 3-level page table. In either case we need to be aware that +* the shadow page table may be a PAE or a long mode page table. */ + pm_mask = PT_PRESENT_MASK; + if (vcpu-arch.mmu.shadow_root_level == PT64_ROOT_LEVEL) + pm_mask |= PT_ACCESSED_MASK | PT_WRITABLE_MASK | PT_USER_MASK; + for (i = 0; i 4; ++i) { hpa_t root = vcpu-arch.mmu.pae_root[i]; @@ -2451,9 +2464,35 @@ static int mmu_alloc_shadow_roots(struct kvm_vcpu *vcpu) ++sp-root_count; spin_unlock(vcpu-kvm-mmu_lock); - vcpu-arch.mmu.pae_root[i] = root | PT_PRESENT_MASK; + vcpu-arch.mmu.pae_root[i] = root | pm_mask; + vcpu-arch.mmu.root_hpa = __pa(vcpu-arch.mmu.pae_root); } - vcpu-arch.mmu.root_hpa = __pa(vcpu-arch.mmu.pae_root); + + /* +* If we shadow a 32 bit page table with a long mode page +* table we enter this path. +*/ + if (vcpu-arch.mmu.shadow_root_level == PT64_ROOT_LEVEL) { + if (vcpu-arch.mmu.lm_root == NULL) { + /* +* The additional page necessary for this is only +* allocated on demand. +*/ + + u64 *lm_root; + + lm_root = (void*)get_zeroed_page(GFP_KERNEL); + if (lm_root == NULL) + return 1; + + lm_root[0] = __pa(vcpu-arch.mmu.pae_root) | pm_mask; + + vcpu-arch.mmu.lm_root = lm_root; + } + + vcpu-arch.mmu.root_hpa = __pa(vcpu-arch.mmu.lm_root); + } + return 0; } @@ -2470,9 +2509,12 @@ static void mmu_sync_roots(struct kvm_vcpu *vcpu) int i; struct kvm_mmu_page *sp; + if (vcpu-arch.mmu.direct_map) + return; + if (!VALID_PAGE(vcpu-arch.mmu.root_hpa)) return; - if (vcpu-arch.mmu.shadow_root_level == PT64_ROOT_LEVEL) { + if (vcpu-arch.mmu.root_level == PT64_ROOT_LEVEL) { hpa_t root = vcpu-arch.mmu.root_hpa; sp = page_header(root); mmu_sync_children(vcpu, sp); @@ -3253,6 +3295,8 @@ EXPORT_SYMBOL_GPL(kvm_disable_tdp); static void free_mmu_pages(struct kvm_vcpu
[COMMIT master] KVM: X86: Add kvm_read_guest_page_mmu function
From: Joerg Roedel joerg.roe...@amd.com This patch adds a function which can read from the guests physical memory or from the guest's guest physical memory. This will be used in the two-dimensional page table walker. Signed-off-by: Joerg Roedel joerg.roe...@amd.com Signed-off-by: Avi Kivity a...@redhat.com diff --git a/arch/x86/include/asm/kvm_host.h b/arch/x86/include/asm/kvm_host.h index 10a5ddd..5d9e0bb 100644 --- a/arch/x86/include/asm/kvm_host.h +++ b/arch/x86/include/asm/kvm_host.h @@ -657,6 +657,9 @@ void kvm_queue_exception_e(struct kvm_vcpu *vcpu, unsigned nr, u32 error_code); void kvm_requeue_exception(struct kvm_vcpu *vcpu, unsigned nr); void kvm_requeue_exception_e(struct kvm_vcpu *vcpu, unsigned nr, u32 error_code); void kvm_inject_page_fault(struct kvm_vcpu *vcpu); +int kvm_read_guest_page_mmu(struct kvm_vcpu *vcpu, struct kvm_mmu *mmu, + gfn_t gfn, void *data, int offset, int len, + u32 access); bool kvm_require_cpl(struct kvm_vcpu *vcpu, int required_cpl); int kvm_pic_set_irq(void *opaque, int irq, int level); diff --git a/arch/x86/kvm/x86.c b/arch/x86/kvm/x86.c index 4196fc7..a2efb70 100644 --- a/arch/x86/kvm/x86.c +++ b/arch/x86/kvm/x86.c @@ -370,6 +370,29 @@ bool kvm_require_cpl(struct kvm_vcpu *vcpu, int required_cpl) EXPORT_SYMBOL_GPL(kvm_require_cpl); /* + * This function will be used to read from the physical memory of the currently + * running guest. The difference to kvm_read_guest_page is that this function + * can read from guest physical or from the guest's guest physical memory. + */ +int kvm_read_guest_page_mmu(struct kvm_vcpu *vcpu, struct kvm_mmu *mmu, + gfn_t ngfn, void *data, int offset, int len, + u32 access) +{ + gfn_t real_gfn; + gpa_t ngpa; + + ngpa = gfn_to_gpa(ngfn); + real_gfn = mmu-translate_gpa(vcpu, ngpa, access); + if (real_gfn == UNMAPPED_GVA) + return -EFAULT; + + real_gfn = gpa_to_gfn(real_gfn); + + return kvm_read_guest_page(vcpu-kvm, real_gfn, data, offset, len); +} +EXPORT_SYMBOL_GPL(kvm_read_guest_page_mmu); + +/* * Load the pae pdptrs. Return true is they are all valid. */ int load_pdptrs(struct kvm_vcpu *vcpu, unsigned long cr3) -- To unsubscribe from this list: send the line unsubscribe kvm-commits in the body of a message to majord...@vger.kernel.org More majordomo info at http://vger.kernel.org/majordomo-info.html
[COMMIT master] KVM: SVM: Implement MMU helper functions for Nested Nested Paging
From: Joerg Roedel joerg.roe...@amd.com This patch adds the helper functions which will be used in the mmu context for handling nested nested page faults. Signed-off-by: Joerg Roedel joerg.roe...@amd.com Signed-off-by: Avi Kivity a...@redhat.com diff --git a/arch/x86/kvm/svm.c b/arch/x86/kvm/svm.c index a98ac52..a483aa9 100644 --- a/arch/x86/kvm/svm.c +++ b/arch/x86/kvm/svm.c @@ -104,6 +104,8 @@ struct nested_state { u32 intercept_exceptions; u64 intercept; + /* Nested Paging related state */ + u64 nested_cr3; }; #define MSRPM_OFFSETS 16 @@ -1600,6 +1602,34 @@ static int vmmcall_interception(struct vcpu_svm *svm) return 1; } +static unsigned long nested_svm_get_tdp_cr3(struct kvm_vcpu *vcpu) +{ + struct vcpu_svm *svm = to_svm(vcpu); + + return svm-nested.nested_cr3; +} + +static void nested_svm_set_tdp_cr3(struct kvm_vcpu *vcpu, + unsigned long root) +{ + struct vcpu_svm *svm = to_svm(vcpu); + + svm-vmcb-control.nested_cr3 = root; + force_new_asid(vcpu); +} + +static void nested_svm_inject_npf_exit(struct kvm_vcpu *vcpu) +{ + struct vcpu_svm *svm = to_svm(vcpu); + + svm-vmcb-control.exit_code = SVM_EXIT_NPF; + svm-vmcb-control.exit_code_hi = 0; + svm-vmcb-control.exit_info_1 = vcpu-arch.fault.error_code; + svm-vmcb-control.exit_info_2 = vcpu-arch.fault.address; + + nested_svm_vmexit(svm); +} + static int nested_svm_check_permissions(struct vcpu_svm *svm) { if (!(svm-vcpu.arch.efer EFER_SVME) -- To unsubscribe from this list: send the line unsubscribe kvm-commits in the body of a message to majord...@vger.kernel.org More majordomo info at http://vger.kernel.org/majordomo-info.html
[COMMIT master] KVM: MMU: Refactor mmu_alloc_roots function
From: Joerg Roedel joerg.roe...@amd.com This patch factors out the direct-mapping paths of the mmu_alloc_roots function into a seperate function. This makes it a lot easier to avoid all the unnecessary checks done in the shadow path which may break when running direct. In fact, this patch already fixes a problem when running PAE guests on a PAE shadow page table. Signed-off-by: Joerg Roedel joerg.roe...@amd.com Signed-off-by: Avi Kivity a...@redhat.com diff --git a/arch/x86/kvm/mmu.c b/arch/x86/kvm/mmu.c index a25173a..9cd5a71 100644 --- a/arch/x86/kvm/mmu.c +++ b/arch/x86/kvm/mmu.c @@ -2357,42 +2357,77 @@ static int mmu_check_root(struct kvm_vcpu *vcpu, gfn_t root_gfn) return ret; } -static int mmu_alloc_roots(struct kvm_vcpu *vcpu) +static int mmu_alloc_direct_roots(struct kvm_vcpu *vcpu) +{ + struct kvm_mmu_page *sp; + int i; + + if (vcpu-arch.mmu.shadow_root_level == PT64_ROOT_LEVEL) { + spin_lock(vcpu-kvm-mmu_lock); + kvm_mmu_free_some_pages(vcpu); + sp = kvm_mmu_get_page(vcpu, 0, 0, PT64_ROOT_LEVEL, + 1, ACC_ALL, NULL); + ++sp-root_count; + spin_unlock(vcpu-kvm-mmu_lock); + vcpu-arch.mmu.root_hpa = __pa(sp-spt); + } else if (vcpu-arch.mmu.shadow_root_level == PT32E_ROOT_LEVEL) { + for (i = 0; i 4; ++i) { + hpa_t root = vcpu-arch.mmu.pae_root[i]; + + ASSERT(!VALID_PAGE(root)); + spin_lock(vcpu-kvm-mmu_lock); + kvm_mmu_free_some_pages(vcpu); + sp = kvm_mmu_get_page(vcpu, i 30, i 30, + PT32_ROOT_LEVEL, 1, ACC_ALL, + NULL); + root = __pa(sp-spt); + ++sp-root_count; + spin_unlock(vcpu-kvm-mmu_lock); + vcpu-arch.mmu.pae_root[i] = root | PT_PRESENT_MASK; + vcpu-arch.mmu.root_hpa = __pa(vcpu-arch.mmu.pae_root); + } + } else + BUG(); + + return 0; +} + +static int mmu_alloc_shadow_roots(struct kvm_vcpu *vcpu) { int i; gfn_t root_gfn; struct kvm_mmu_page *sp; - int direct = 0; u64 pdptr; root_gfn = vcpu-arch.mmu.get_cr3(vcpu) PAGE_SHIFT; - if (vcpu-arch.mmu.shadow_root_level == PT64_ROOT_LEVEL) { + if (mmu_check_root(vcpu, root_gfn)) + return 1; + + /* +* Do we shadow a long mode page table? If so we need to +* write-protect the guests page table root. +*/ + if (vcpu-arch.mmu.root_level == PT64_ROOT_LEVEL) { hpa_t root = vcpu-arch.mmu.root_hpa; ASSERT(!VALID_PAGE(root)); - if (mmu_check_root(vcpu, root_gfn)) - return 1; - if (vcpu-arch.mmu.direct_map) { - direct = 1; - root_gfn = 0; - } + spin_lock(vcpu-kvm-mmu_lock); kvm_mmu_free_some_pages(vcpu); - sp = kvm_mmu_get_page(vcpu, root_gfn, 0, - PT64_ROOT_LEVEL, direct, - ACC_ALL, NULL); + sp = kvm_mmu_get_page(vcpu, root_gfn, 0, PT64_ROOT_LEVEL, + 0, ACC_ALL, NULL); root = __pa(sp-spt); ++sp-root_count; spin_unlock(vcpu-kvm-mmu_lock); vcpu-arch.mmu.root_hpa = root; return 0; } - direct = !is_paging(vcpu); - - if (mmu_check_root(vcpu, root_gfn)) - return 1; + /* +* We shadow a 32 bit page table. This may be a legacy 2-level +* or a PAE 3-level page table. +*/ for (i = 0; i 4; ++i) { hpa_t root = vcpu-arch.mmu.pae_root[i]; @@ -2406,16 +2441,11 @@ static int mmu_alloc_roots(struct kvm_vcpu *vcpu) root_gfn = pdptr PAGE_SHIFT; if (mmu_check_root(vcpu, root_gfn)) return 1; - } else if (vcpu-arch.mmu.root_level == 0) - root_gfn = 0; - if (vcpu-arch.mmu.direct_map) { - direct = 1; - root_gfn = i 30; } spin_lock(vcpu-kvm-mmu_lock); kvm_mmu_free_some_pages(vcpu); sp = kvm_mmu_get_page(vcpu, root_gfn, i 30, - PT32_ROOT_LEVEL, direct, + PT32_ROOT_LEVEL, 0, ACC_ALL, NULL); root = __pa(sp-spt); ++sp-root_count; @@ -2427,6 +2457,14 @@ static int mmu_alloc_roots(struct kvm_vcpu *vcpu) return 0;
[COMMIT master] KVM: MMU: Introduce kvm_pdptr_read_mmu
From: Joerg Roedel joerg.roe...@amd.com This function is implemented to load the pdptr pointers of the currently running guest (l1 or l2 guest). Therefore it takes care about the current paging mode and can read pdptrs out of l2 guest physical memory. Signed-off-by: Joerg Roedel joerg.roe...@amd.com Signed-off-by: Avi Kivity a...@redhat.com diff --git a/arch/x86/kvm/kvm_cache_regs.h b/arch/x86/kvm/kvm_cache_regs.h index a37abe2..975bb45 100644 --- a/arch/x86/kvm/kvm_cache_regs.h +++ b/arch/x86/kvm/kvm_cache_regs.h @@ -45,6 +45,13 @@ static inline u64 kvm_pdptr_read(struct kvm_vcpu *vcpu, int index) return vcpu-arch.walk_mmu-pdptrs[index]; } +static inline u64 kvm_pdptr_read_mmu(struct kvm_vcpu *vcpu, struct kvm_mmu *mmu, int index) +{ + load_pdptrs(vcpu, mmu, mmu-get_cr3(vcpu)); + + return mmu-pdptrs[index]; +} + static inline ulong kvm_read_cr0_bits(struct kvm_vcpu *vcpu, ulong mask) { ulong tmask = mask KVM_POSSIBLE_CR0_GUEST_BITS; diff --git a/arch/x86/kvm/mmu.c b/arch/x86/kvm/mmu.c index a26f13b..a25173a 100644 --- a/arch/x86/kvm/mmu.c +++ b/arch/x86/kvm/mmu.c @@ -2398,7 +2398,7 @@ static int mmu_alloc_roots(struct kvm_vcpu *vcpu) ASSERT(!VALID_PAGE(root)); if (vcpu-arch.mmu.root_level == PT32E_ROOT_LEVEL) { - pdptr = kvm_pdptr_read(vcpu, i); + pdptr = kvm_pdptr_read_mmu(vcpu, vcpu-arch.mmu, i); if (!is_present_gpte(pdptr)) { vcpu-arch.mmu.pae_root[i] = 0; continue; diff --git a/arch/x86/kvm/paging_tmpl.h b/arch/x86/kvm/paging_tmpl.h index f4e09d3..a28f09b 100644 --- a/arch/x86/kvm/paging_tmpl.h +++ b/arch/x86/kvm/paging_tmpl.h @@ -137,7 +137,7 @@ walk: #if PTTYPE == 64 if (walker-level == PT32E_ROOT_LEVEL) { - pte = kvm_pdptr_read(vcpu, (addr 30) 3); + pte = kvm_pdptr_read_mmu(vcpu, mmu, (addr 30) 3); trace_kvm_mmu_paging_element(pte, walker-level); if (!is_present_gpte(pte)) { present = false; -- To unsubscribe from this list: send the line unsubscribe kvm-commits in the body of a message to majord...@vger.kernel.org More majordomo info at http://vger.kernel.org/majordomo-info.html
[COMMIT master] KVM: MMU: Introduce get_cr3 function pointer
From: Joerg Roedel joerg.roe...@amd.com This function pointer in the MMU context is required to implement Nested Nested Paging. Signed-off-by: Joerg Roedel joerg.roe...@amd.com Signed-off-by: Avi Kivity a...@redhat.com diff --git a/arch/x86/include/asm/kvm_host.h b/arch/x86/include/asm/kvm_host.h index aeeea9c..ab708ee 100644 --- a/arch/x86/include/asm/kvm_host.h +++ b/arch/x86/include/asm/kvm_host.h @@ -237,6 +237,7 @@ struct kvm_pio_request { struct kvm_mmu { void (*new_cr3)(struct kvm_vcpu *vcpu); void (*set_cr3)(struct kvm_vcpu *vcpu, unsigned long root); + unsigned long (*get_cr3)(struct kvm_vcpu *vcpu); int (*page_fault)(struct kvm_vcpu *vcpu, gva_t gva, u32 err); void (*free)(struct kvm_vcpu *vcpu); gpa_t (*gva_to_gpa)(struct kvm_vcpu *vcpu, gva_t gva, u32 access, diff --git a/arch/x86/kvm/mmu.c b/arch/x86/kvm/mmu.c index a55f8d5..e4a7de4 100644 --- a/arch/x86/kvm/mmu.c +++ b/arch/x86/kvm/mmu.c @@ -2365,7 +2365,7 @@ static int mmu_alloc_roots(struct kvm_vcpu *vcpu) int direct = 0; u64 pdptr; - root_gfn = vcpu-arch.cr3 PAGE_SHIFT; + root_gfn = vcpu-arch.mmu.get_cr3(vcpu) PAGE_SHIFT; if (vcpu-arch.mmu.shadow_root_level == PT64_ROOT_LEVEL) { hpa_t root = vcpu-arch.mmu.root_hpa; @@ -2562,6 +2562,11 @@ static void paging_new_cr3(struct kvm_vcpu *vcpu) mmu_free_roots(vcpu); } +static unsigned long get_cr3(struct kvm_vcpu *vcpu) +{ + return vcpu-arch.cr3; +} + static void inject_page_fault(struct kvm_vcpu *vcpu, u64 addr, u32 err_code) @@ -2715,6 +2720,7 @@ static int init_kvm_tdp_mmu(struct kvm_vcpu *vcpu) context-root_hpa = INVALID_PAGE; context-direct_map = true; context-set_cr3 = kvm_x86_ops-set_tdp_cr3; + context-get_cr3 = get_cr3; if (!is_paging(vcpu)) { context-gva_to_gpa = nonpaging_gva_to_gpa; @@ -2755,6 +2761,7 @@ static int init_kvm_softmmu(struct kvm_vcpu *vcpu) vcpu-arch.mmu.base_role.cr4_pae = !!is_pae(vcpu); vcpu-arch.mmu.base_role.cr0_wp = is_write_protection(vcpu); vcpu-arch.mmu.set_cr3 = kvm_x86_ops-set_cr3; + vcpu-arch.mmu.get_cr3 = get_cr3; return r; } diff --git a/arch/x86/kvm/paging_tmpl.h b/arch/x86/kvm/paging_tmpl.h index e4ad3dc..13d0c06 100644 --- a/arch/x86/kvm/paging_tmpl.h +++ b/arch/x86/kvm/paging_tmpl.h @@ -130,7 +130,7 @@ walk: present = true; eperm = rsvd_fault = false; walker-level = vcpu-arch.mmu.root_level; - pte = vcpu-arch.cr3; + pte = vcpu-arch.mmu.get_cr3(vcpu); #if PTTYPE == 64 if (vcpu-arch.mmu.root_level == PT32E_ROOT_LEVEL) { pte = kvm_pdptr_read(vcpu, (addr 30) 3); @@ -143,7 +143,7 @@ walk: } #endif ASSERT((!is_long_mode(vcpu) is_pae(vcpu)) || - (vcpu-arch.cr3 CR3_NONPAE_RESERVED_BITS) == 0); + (vcpu-arch.mmu.get_cr3(vcpu) CR3_NONPAE_RESERVED_BITS) == 0); pt_access = ACC_ALL; -- To unsubscribe from this list: send the line unsubscribe kvm-commits in the body of a message to majord...@vger.kernel.org More majordomo info at http://vger.kernel.org/majordomo-info.html
Re: [PATCH 2/3] S390: Add virtio hotplug add support
On Mon, 13 Sep 2010 13:05:57 +0930 Rusty Russell ru...@rustcorp.com.au wrote: On Sun, 12 Sep 2010 06:30:43 pm Avi Kivity wrote: On 09/12/2010 02:42 AM, Alexander Graf wrote: On 24.08.2010, at 15:48, Alexander Graf wrote: The one big missing feature in s390-virtio was hotplugging. This is no more. This patch implements hotplug add support, so you can on the fly add new devices in the guest. Keep in mind that this needs a patch for qemu to actually leverage the functionality. Signed-off-by: Alexander Grafag...@suse.de ping (on the patch set)? Actually Marcelo applied it. But the natural place for it is Rusty's virtio tree. Rusty, if you want to take it, let me know and I'll drop it from kvm.git. I thought it would be in the s390 tree, which is why I didn't take it... But I'm *always* happy to let do the work! I didn't pick them up after I saw that Marcelo took them. If others want to do the work, be my guest.. -- blue skies, Martin. Reality continues to ruin my life. - Calvin. -- To unsubscribe from this list: send the line unsubscribe kvm in the body of a message to majord...@vger.kernel.org More majordomo info at http://vger.kernel.org/majordomo-info.html
Re: [PATCH 18/24] Exiting from L2 to L1
On 09/12/2010 09:51 PM, Nadav Har'El wrote: There are two cases where VM_ENTRY_INTR_INFO_FIELD can potentially not be cleared by hardware: ... If neither of these are valid, the code can be removed. If only the second, we might make it conditional. Again, unless I'm misunderstanding what you mean, the hardware only modified vmcs02 (the hardware vmcs), not vmcs12. We need to modify vmcs12 as well, to remove the valid bit. If we don't, when L1 enters into the same L2 again, the same old value will be copied again from vmcs12 to vmcs02, and cause an injection of the same interrupt again. Yes, vmcs12 still needs to be updated. So the code cannot be removed, just the vm -- error compiling committee.c: too many arguments to function -- To unsubscribe from this list: send the line unsubscribe kvm in the body of a message to majord...@vger.kernel.org More majordomo info at http://vger.kernel.org/majordomo-info.html
Re: [PATCH 18/24] Exiting from L2 to L1
On 09/13/2010 07:53 AM, Sheng Yang wrote: What can happen is that the contents of the field is transferred to the IDT_VECTORING_INFO field or VM_EXIT_INTR_INFO field. (question: on a failed vmentry, is this field cleared?) I don't know the answer :-) Sheng? According to SDM 23.7 VM-ENTRY FAILURES DURING OR AFTER LOADING GUEST STATE: Although this process resembles that of a VM exit, many steps taken during a VM exit do not occur for these VM-entry failures: • Most VM-exit information fields are not updated (see step 1 above). • The valid bit in the VM-entry interruption-information field is *not* cleared. • The guest-state area is not modified. • No MSRs are saved into the VM-exit MSR-store area. So VM entry failure would result in _keep_ valid bit of VM_ENTRY_INTR_INFO_FIELD. Ok. So if the exit was actually due to a failed vmentry, then we do need the vmread... (or alternatively, we can avoid clearing the field in the first place). So the following options should work: 1. vmcs12-vm_entry_intr_info_field = vmcs_read32(VM_ENTRY_INTR_INFO_FIELD); 2. if (!(exit_reason FAILED_ENTRY)) vmcs12-vm_exit_intry_info_field = ~VALID; 3. if (exit_reason FAILED_ENTRY) vmcs12-vm_entry_intr_info_field = vmcs_read32(VM_ENTRY_INTR_INFO_FIELD); -- error compiling committee.c: too many arguments to function -- To unsubscribe from this list: send the line unsubscribe kvm in the body of a message to majord...@vger.kernel.org More majordomo info at http://vger.kernel.org/majordomo-info.html
Re: [PATCH 18/24] Exiting from L2 to L1
On Mon, Sep 13, 2010, Avi Kivity wrote about Re: [PATCH 18/24] Exiting from L2 to L1: So the following options should work: 1. vmcs12-vm_entry_intr_info_field = vmcs_read32(VM_ENTRY_INTR_INFO_FIELD); Right, this was the original code in the patch. 2. if (!(exit_reason FAILED_ENTRY)) vmcs12-vm_exit_intry_info_field = ~VALID; I now prefer this code. It doesn't do vmread (but replaces it with a bunch of extra instructions - which might be even slower overall...). But the more interesting thing is that it doesn't copy irrelevant bits from vmcs02 to vmcs12, bits that might not have been set by L1 but rather by L0 which previously injected an interrupt into the same L2. These bits shouldn't matter (when !valid), but a nosy L1 might notice them... 3. if (exit_reason FAILED_ENTRY) vmcs12-vm_entry_intr_info_field = vmcs_read32(VM_ENTRY_INTR_INFO_FIELD); I think you meant the opposite condition? if (!(exit_reason FAILED_ENTRY)) vmcs12-vm_entry_intr_info_field = vmcs_read32(VM_ENTRY_INTR_INFO_FIELD); -- Nadav Har'El| Monday, Sep 13 2010, 5 Tishri 5771 n...@math.technion.ac.il |- Phone +972-523-790466, ICQ 13349191 |Always borrow money from pessimists. They http://nadav.harel.org.il |don't expect to be paid back. -- To unsubscribe from this list: send the line unsubscribe kvm in the body of a message to majord...@vger.kernel.org More majordomo info at http://vger.kernel.org/majordomo-info.html
Re: [RFC PATCH 1/4] Add a new API to virtio-pci
On Mon, Sep 13, 2010 at 09:50:42AM +0530, Krishna Kumar2 wrote: Michael S. Tsirkin m...@redhat.com wrote on 09/12/2010 05:16:37 PM: Michael S. Tsirkin m...@redhat.com 09/12/2010 05:16 PM On Thu, Sep 09, 2010 at 07:19:33PM +0530, Krishna Kumar2 wrote: Unfortunately I need a constant in vhost for now. Maybe not even that: you create multiple vhost-net devices so vhost-net in kernel does not care about these either, right? So this can be just part of vhost_net.h in qemu. Sorry, I didn't understand what you meant. I can remove all socks[] arrays/constants by pre-allocating sockets in vhost_setup_vqs. Then I can remove all socks parameters in vhost_net_stop, vhost_net_release and vhost_net_reset_owner. Does this make sense? Thanks, - KK Here's what I mean: each vhost device includes 1 TX and 1 RX VQ. Instead of teaching vhost about multiqueue, we could simply open /dev/vhost-net multiple times. How many times would be up to qemu. -- MST -- To unsubscribe from this list: send the line unsubscribe kvm in the body of a message to majord...@vger.kernel.org More majordomo info at http://vger.kernel.org/majordomo-info.html
[Autotest] [KVM-AUTOTEST] Patch to install cygwin and run autotest in windows
Hello Lucas, I like to submit patch to do unattended install of cygwin and run autotest test case on Windows guests using cygwin. Cygwin enable running some of test like stress, lmbench, bonnie, compilebench and netperf directly under windows, but most of the tests need to be patched before they could run on Windows. So this patch introduces a new parameter 'test_patch_file' in tests_config to indicate the patch file. Two new files are added related to patching autotest, 'autotest.patch' which is like base patch for autotest, needed to run autotest in windows and 'iozone.patch' which to used run iozone on windows. Similar patches could be developed for test cases like 'dacapo' and sysbench which would enable running tests related to java and mysql/postgresql, i will be happy to write patches for them too. This patch also adds two new files 'scripts/cyg_install.py' and 'tests/cyg_install'. 'scripts/cyg_install.py' is used to do unattended install on cygwin. The parameter 'cyg_path' indicates the path of the setup file. Installation can be done either local, using 'cyg_install.local' or remotly,using cyg_install.remote'. Local installation requires both the cygwin setup and packages files to be present locally at the path indicated by 'cyg_path'. The parameter 'cyg_param' is used to pass the command line options for cygwin setup. So it can contain options for packages to be installed, proxy server to be used to do remote installation and so on. An iso image cyg.iso containing the setup and packages in case of local install and just the setup in case of remote install, is created on the fly. Installation is started using telnet. This patch acts as complement to the existing support for running tests using Autoit.I will be happy to re-implement or modify the patch based to your comments and also write patch for other test cases like sysbench, dacapo. From 745d87681a33cc14431dc1b6b35cd977112b0fee Mon Sep 17 00:00:00 2001 From: Yogananth Subramanian anant...@linux.vnet.ibm.com Date: Fri, 10 Sep 2010 20:40:16 +0530 Subject: [PATCH] This patch enables installing cygwin and running autotest in windows guests The patch creates two new files 'scripts/cyg_install.py' and 'tests/cyg_install'. 'scripts/cyg_install.py' is used to install cygwin in unattended mode. This patch also introduces a new parameter 'patch_file'to run_autotest() in kvm_test_utils.py file, to install patch for autotest to run in windows. The file 'autotest_control/autotest.patch' is base patch to run any autotest test cases in windows and file 'autotest_control/iozone.patch' is a patch to run iozone on windows. Signed-off-by: Yogananth Subramanian anant...@linux.vnet.ibm.com --- client/tests/kvm/autotest_control/autotest.patch | 15 client/tests/kvm/autotest_control/iozone.patch | 12 +++ client/tests/kvm/kvm_test_utils.py | 10 ++- client/tests/kvm/scripts/cyg_install.py | 98 ++ client/tests/kvm/tests/autotest.py | 21 - client/tests/kvm/tests/cyg_install.py| 36 client/tests/kvm/tests_base.cfg.sample | 43 +- 7 files changed, 231 insertions(+), 4 deletions(-) create mode 100644 client/tests/kvm/autotest_control/autotest.patch create mode 100644 client/tests/kvm/autotest_control/iozone.patch create mode 100755 client/tests/kvm/scripts/cyg_install.py create mode 100644 client/tests/kvm/tests/cyg_install.py diff --git a/client/tests/kvm/autotest_control/autotest.patch b/client/tests/kvm/autotest_control/autotest.patch new file mode 100644 index 000..f562f95 --- /dev/null +++ b/client/tests/kvm/autotest_control/autotest.patch @@ -0,0 +1,15 @@ +diff -aurpN client/bin/job.py client-new/bin/job.py +--- client/bin/job.py 2010-08-25 01:42:27.0 -0400 client-new/bin/job.py 2010-09-07 09:54:30.0 -0400 +@@ -296,7 +296,10 @@ class base_client_job(base_job.base_job) + + # extract console= and other args from cmdline and add them into the + # base args that we use for all kernels we install +-cmdline = utils.read_one_line('/proc/cmdline') ++if os.path.exists('/proc/cmdline'): ++cmdline = utils.read_one_line('/proc/cmdline') ++else: ++return + kernel_args = [] + for karg in cmdline.split(): + for param in copy_cmdline: diff --git a/client/tests/kvm/autotest_control/iozone.patch b/client/tests/kvm/autotest_control/iozone.patch new file mode 100644 index 000..6229205 --- /dev/null +++ b/client/tests/kvm/autotest_control/iozone.patch @@ -0,0 +1,12 @@ +diff -aurpN client/tests/iozone/iozone.py client-new/tests/iozone/iozone.py +--- client/tests/iozone/iozone.py 2010-08-25 01:42:27.0 -0400 client-new/tests/iozone/iozone.py 2010-09-02 11:38:42.0 -0400 +@@ -43,7 +43,7 @@ class iozone(test.test): + elif (arch == 'x86_64'): + utils.system('make linux-AMD64') +
Re: [PATCH 18/24] Exiting from L2 to L1
On 09/13/2010 11:01 AM, Nadav Har'El wrote: 3. if (exit_reason FAILED_ENTRY) vmcs12-vm_entry_intr_info_field = vmcs_read32(VM_ENTRY_INTR_INFO_FIELD); I think you meant the opposite condition? if (!(exit_reason FAILED_ENTRY)) vmcs12-vm_entry_intr_info_field = vmcs_read32(VM_ENTRY_INTR_INFO_FIELD); Dunno, I think both are subtly broken. -- error compiling committee.c: too many arguments to function -- To unsubscribe from this list: send the line unsubscribe kvm in the body of a message to majord...@vger.kernel.org More majordomo info at http://vger.kernel.org/majordomo-info.html
Re: [PATCH 2/3] S390: Add virtio hotplug add support
On 09/13/2010 09:41 AM, Martin Schwidefsky wrote: Actually Marcelo applied it. But the natural place for it is Rusty's virtio tree. Rusty, if you want to take it, let me know and I'll drop it from kvm.git. I thought it would be in the s390 tree, which is why I didn't take it... But I'm *always* happy to let do the work! I didn't pick them up after I saw that Marcelo took them. If others want to do the work, be my guest.. I just hope that all this generosity doesn't lead to merge conflicts later, or people basing their stuff on stale code. But it isn't like this is a high churn area. -- error compiling committee.c: too many arguments to function -- To unsubscribe from this list: send the line unsubscribe kvm in the body of a message to majord...@vger.kernel.org More majordomo info at http://vger.kernel.org/majordomo-info.html
Re: [Autotest] [KVM-AUTOTEST] Patch to install cygwin and run autotest in windows
On Mon, 2010-09-13 at 15:01 +0530, yogi wrote: Hello Lucas, I like to submit patch to do unattended install of cygwin and run autotest test case on Windows guests using cygwin. Cygwin enable running some of test like stress, lmbench, bonnie, compilebench and netperf directly under windows, but most of the tests need to be patched before they could run on Windows. So this patch introduces a new parameter 'test_patch_file' in tests_config to indicate the patch file. Two new files are added related to patching autotest, 'autotest.patch' which is like base patch for autotest, needed to run autotest in windows and 'iozone.patch' which to used run iozone on windows. Similar patches could be developed for test cases like 'dacapo' and sysbench which would enable running tests related to java and mysql/postgresql, i will be happy to write patches for them too. Hi Yogi, The idea is very interesting! However, your mail client chewed the patch, you apparently pasted the diffs generated by git format-patch into your mail client window, not an optimal way to do things. Please refer to http://autotest.kernel.org/wiki/GitWorkflow for a quick guide on how to configure git to use git send-email, so your patches will be perfectly mailed to the mailing list. Your patches touch some core infrastructure of autotest, I'll have to review the strategy used very carefully, so please be patient. Thanks for your contribution, Lucas -- To unsubscribe from this list: send the line unsubscribe kvm in the body of a message to majord...@vger.kernel.org More majordomo info at http://vger.kernel.org/majordomo-info.html
Re: [ANNOUNCE] qemu-kvm-0.13.0-rc1
On Sun, Sep 12, 2010 at 05:59:45PM +0200, Avi Kivity wrote: On 09/12/2010 05:31 PM, Anthony Liguori wrote: On 09/12/2010 01:11 AM, Avi Kivity wrote: On 09/10/2010 10:48 PM, Anthony Liguori wrote: I agree, is there any reason not to enable compiling less into the binary? There are folks interested in eliminating as much as possible to reduce the attack surface and auditing requirements, for example. It's not a bad idea, it's just that what --disable-cpu-emulation does is evil. Being that I wrote the implementation, I'm quite confident in declare it as such :-) Oh, I thought you were against the idea in itself for some reason. I'll patch it for 0.13, but any ideas on how it should be rework for master? Glauber's old Accel interface was close to the right approach. We need to abstract the exec.c interfaces to use a function pointer table and have a TCG and KVM implementation. The function pointer tables can then be registered by a module_init() and we can simply not include the kvm or TCG files are build time to disable the functionality. Yes, I remember it now. Glauber, can you bring those patches back from the land of the dead? I could, but I myself was not entirely sure about the correct approach in terms of granularity. The first version was too fine grained, since I was hooking into every possible kqemu operation, (the goal at the time was to take _that_ out, not tcg), and then second version got too coarse, with we having to rewrite whole parts of memory. Now that kqemu is gone, it surely gets easier... -- To unsubscribe from this list: send the line unsubscribe kvm in the body of a message to majord...@vger.kernel.org More majordomo info at http://vger.kernel.org/majordomo-info.html
Re: vhost, something changed between 2.6.35 and 2.6.36 ?
On Mon, Sep 13, 2010 at 04:25:13PM +0200, Dhaval Giani wrote: (BTW, this is a regression from 2.6.35 at least. I will try to figure out the last working version if you would like a bisect!) Sure, that's helpful. BTW, does latest upstream qemu-kvm have this issue as well for you? On Sun, Sep 12, 2010 at 4:40 PM, Michael S. Tsirkin m...@redhat.com wrote: On Sun, Sep 12, 2010 at 04:39:29PM +0200, Dhaval Giani wrote: On Sun, Sep 12, 2010 at 2:05 PM, Michael S. Tsirkin m...@redhat.com wrote: On Fri, Sep 10, 2010 at 03:37:36PM +0200, Dhaval Giani wrote: Hi, I have been trying to get vhost+macvtap to work for me. I run it as /root/qemu-kvm-vhost-net/bin/qemu-system-x86_64 -hda $IMAGE Â -serial stdio -monitor telnet::,server,nowait -vnc :4: -m 3G -net nic,model=virtio,macaddr=$MACADDR,netdev=macvtap0 -netdev tap,id=macvtap0,vhost=on,fd=3 3 /dev/tap5 in 2.6.35, which worked just fine. On the other hand, with 2.6.36, i don't have working networking. I am using the same image and same macaddress. The qemu is the version from git://git.kernel.org/pub/scm/linux/kernel/git/mst/qemu-kvm.git vhost . BTW, by now, all these patches are merged so upstream qemu-kvm should work just fine for you as well. Any suggestions will be welcome! Thanks, Dhaval You are running this as non-root user, correct? nope as root. This could be the permission issue that got fixed by 87d6a412bd1ed82c14cabd4b408003b23bbd2880. Could you please check the latest master from Linus, and let me and the list know? Thanks! this is with git of friday evening CEST. Another thing to try if this does *not* help: enable CONFIG_DYNAMIC_DEBUG in kernel, rebuild the kernel, mount debugfs: Â Â Â Â mount -t debugfs none /sys/kernel/debug and then enable debug for vhost_net as described in Documentation/dynamic-debug-howto.txt: I will give this a run on monday morning when i am at the lab again. So nothing comes out with this. Â Â Â Â echo 'module vhost_net +p' /sys/kernel/debug/dynamic_debug/control Then start qemu, and after running a test, run dmesg and see if there are any messages from vhost_net. If yes please send them to me and to the list. Thanks! thanks! Dhaval Another thing to try check is generic net core issues. For this, try running tcpdump on both tap in host and on virtio net device in guest. Then send packets to host from guest and back, and check whether they appears on virtio and on tap. tcpdump -i macvtap0 on the host leads to nothing. tcpdump -i eth0 on the guest leads to ARP requests, with no responses. Anything more I can try? Thanks! Dhaval So nothing is passed to tap, but no errors either. It might be helpful to enable function tracer and trace functions in our module. Alternatively, let's see if we get anything from the ring (don't forget to unload and reload vhost-net). diff --git a/drivers/vhost/net.c b/drivers/vhost/net.c index 29e850a..6400cd5 100644 --- a/drivers/vhost/net.c +++ b/drivers/vhost/net.c @@ -152,6 +152,8 @@ static void handle_tx(struct vhost_net *net) ARRAY_SIZE(vq-iov), out, in, NULL, NULL); + printk(KERN_ERR vhost_get_vq_desc returned %d\n, head); + /* On error, stop handling until the next kick. */ if (unlikely(head 0)) break; -- To unsubscribe from this list: send the line unsubscribe kvm in the body of a message to majord...@vger.kernel.org More majordomo info at http://vger.kernel.org/majordomo-info.html
[PATCH] KVM: MMU: Fix regression with ept memory types merged into non-ept page tables
Commit KVM: MMU: Make tdp_enabled a mmu-context parameter made real-mode set -direct_map, and changed the code that merges in the memory type depend on direct_map instead of tdp_enabled. However, in this case what really matters is tdp, not direct_map, since tdp changes the pte format regardless of whether the mapping is direct or not. As a result, real-mode shadow mappings got corrupted with ept memory types. The result was a huge slowdown, likely due to the cache being disabled. Change it back as the simplest fix for the regression (real fix is to move all that to vmx code, and not use tdp_enabled as a synonym for ept). Signed-off-by: Avi Kivity a...@redhat.com --- arch/x86/kvm/mmu.c |2 +- 1 files changed, 1 insertions(+), 1 deletions(-) diff --git a/arch/x86/kvm/mmu.c b/arch/x86/kvm/mmu.c index 6e248d8..3ce56bf 100644 --- a/arch/x86/kvm/mmu.c +++ b/arch/x86/kvm/mmu.c @@ -1980,7 +1980,7 @@ static int set_spte(struct kvm_vcpu *vcpu, u64 *sptep, spte |= shadow_user_mask; if (level PT_PAGE_TABLE_LEVEL) spte |= PT_PAGE_SIZE_MASK; - if (vcpu-arch.mmu.direct_map) + if (tdp_enabled) spte |= kvm_x86_ops-get_mt_mask(vcpu, gfn, kvm_is_mmio_pfn(pfn)); -- 1.7.1 -- To unsubscribe from this list: send the line unsubscribe kvm in the body of a message to majord...@vger.kernel.org More majordomo info at http://vger.kernel.org/majordomo-info.html
KVM call agenda for Sept 14
Please send in any agenda items you are interested in covering. thanks, -chris -- To unsubscribe from this list: send the line unsubscribe kvm in the body of a message to majord...@vger.kernel.org More majordomo info at http://vger.kernel.org/majordomo-info.html
Re: [RFC PATCH 1/4] Add a new API to virtio-pci
On 09/13/2010 04:04 AM, Michael S. Tsirkin wrote: On Mon, Sep 13, 2010 at 09:50:42AM +0530, Krishna Kumar2 wrote: Michael S. Tsirkinm...@redhat.com wrote on 09/12/2010 05:16:37 PM: Michael S. Tsirkinm...@redhat.com 09/12/2010 05:16 PM On Thu, Sep 09, 2010 at 07:19:33PM +0530, Krishna Kumar2 wrote: Unfortunately I need a constant in vhost for now. Maybe not even that: you create multiple vhost-net devices so vhost-net in kernel does not care about these either, right? So this can be just part of vhost_net.h in qemu. Sorry, I didn't understand what you meant. I can remove all socks[] arrays/constants by pre-allocating sockets in vhost_setup_vqs. Then I can remove all socks parameters in vhost_net_stop, vhost_net_release and vhost_net_reset_owner. Does this make sense? Thanks, - KK Here's what I mean: each vhost device includes 1 TX and 1 RX VQ. Instead of teaching vhost about multiqueue, we could simply open /dev/vhost-net multiple times. How many times would be up to qemu. Trouble is, each vhost-net device is associated with 1 tun/tap device which means that each vhost-net device is associated with a transmit and receive queue. I don't know if you'll always have an equal number of transmit and receive queues but there's certainly challenge in terms of flexibility with this model. Regards, Anthony Liguori -- To unsubscribe from this list: send the line unsubscribe kvm in the body of a message to majord...@vger.kernel.org More majordomo info at http://vger.kernel.org/majordomo-info.html
Re: KVM call agenda for Sept 14
On 09/13/2010 10:59 AM, Chris Wright wrote: Please send in any agenda items you are interested in covering. 1) 0.13.0 I'll be collecting patches for the next 24 hours so if there are fixes you care about, please ping me between now and then. http://wiki.qemu.org/Releases/0.13.0 Regards, Anthony Liguori thanks, -chris -- To unsubscribe from this list: send the line unsubscribe kvm in the body of a message to majord...@vger.kernel.org More majordomo info at http://vger.kernel.org/majordomo-info.html -- To unsubscribe from this list: send the line unsubscribe kvm in the body of a message to majord...@vger.kernel.org More majordomo info at http://vger.kernel.org/majordomo-info.html
Patch to install cygwin and run autotest in windows
Hello Lucas, Submitting the patch again using git send-email, there was some issue with git config, its resolved now.Thanks for the link. I tried not to make any drastic change to the kvm-autotest infrastructure, run_autotest() accepts an additonal parameter, other then that rest of the patch does not make any major change to existing code. Pasting below the contents of my previous mail, for reference. Autotest part of the patch: This patch introduces a new parameter 'test_patch_file' in tests_config to indicate the patch file. Two new files are added related to patching autotest, 'autotest.patch' which is like base patch for autotest, needed to run autotest in windows and 'iozone.patch' which to used run iozone on windows. Similar patches could be developed for test cases like 'dacapo' and sysbench which would enable running tests related to java and mysql/postgresql, i will be happy to write patches for them too. Cygwin installtion part of the patch: This patch also adds two new files 'scripts/cyg_install.py' and 'tests/cyg_install'. 'scripts/cyg_install.py' is used to do unattended install on cygwin. The parameter 'cyg_path' indicates the path of the setup file. Installation can be done either local, using 'cyg_install.local' or remotly,using cyg_install.remote'. Local installation requires both the cygwin setup and packages files to be present locally at the path indicated by 'cyg_path'. The parameter 'cyg_param' is used to pass the command line options for cygwin setup. So it can contain options for packages to be installed, proxy server to be used to do remote installation and so on. An iso image cyg.iso containing the setup and packages in case of local install and just the setup in case of remote install, is created on the fly. Installation is started using telnet. This patch acts as complement to the existing support for running tests using Autoit.I will be happy to re-implement or modify the patch based to your comments and also write patch for other test cases like sysbench, dacapo. Thanks yogi -- To unsubscribe from this list: send the line unsubscribe kvm in the body of a message to majord...@vger.kernel.org More majordomo info at http://vger.kernel.org/majordomo-info.html
[PATCH] This patch enables installing cygwin and running autotest in windows guests
From: Yogananth subramanian anant...@linux.vnet.ibm.com The patch creates two new files 'scripts/cyg_install.py' and 'tests/cyg_install'. 'scripts/cyg_install.py' is used to install cygwin in unattended mode. This patch also introduces a new parameter 'patch_file'to run_autotest() in kvm_test_utils.py file, to install patch for autotest to run in windows. The file 'autotest_control/autotest.patch' is base patch to run any autotest test cases in windows and file 'autotest_control/iozone.patch' is a patch to run iozone on windows. Signed-off-by: Yogananth subramanian anant...@linux.vnet.ibm.com --- client/tests/kvm/autotest_control/autotest.patch | 15 client/tests/kvm/autotest_control/iozone.patch | 12 +++ client/tests/kvm/kvm_test_utils.py | 10 ++- client/tests/kvm/scripts/cyg_install.py | 98 ++ client/tests/kvm/tests/autotest.py | 21 - client/tests/kvm/tests/cyg_install.py| 36 client/tests/kvm/tests_base.cfg.sample | 43 +- 7 files changed, 231 insertions(+), 4 deletions(-) create mode 100644 client/tests/kvm/autotest_control/autotest.patch create mode 100644 client/tests/kvm/autotest_control/iozone.patch create mode 100644 client/tests/kvm/scripts/cyg_install.py create mode 100644 client/tests/kvm/tests/cyg_install.py diff --git a/client/tests/kvm/autotest_control/autotest.patch b/client/tests/kvm/autotest_control/autotest.patch new file mode 100644 index 000..f562f95 --- /dev/null +++ b/client/tests/kvm/autotest_control/autotest.patch @@ -0,0 +1,15 @@ +diff -aurpN client/bin/job.py client-new/bin/job.py +--- client/bin/job.py 2010-08-25 01:42:27.0 -0400 client-new/bin/job.py 2010-09-07 09:54:30.0 -0400 +@@ -296,7 +296,10 @@ class base_client_job(base_job.base_job) + + # extract console= and other args from cmdline and add them into the + # base args that we use for all kernels we install +-cmdline = utils.read_one_line('/proc/cmdline') ++if os.path.exists('/proc/cmdline'): ++cmdline = utils.read_one_line('/proc/cmdline') ++else: ++return + kernel_args = [] + for karg in cmdline.split(): + for param in copy_cmdline: diff --git a/client/tests/kvm/autotest_control/iozone.patch b/client/tests/kvm/autotest_control/iozone.patch new file mode 100644 index 000..6229205 --- /dev/null +++ b/client/tests/kvm/autotest_control/iozone.patch @@ -0,0 +1,12 @@ +diff -aurpN client/tests/iozone/iozone.py client-new/tests/iozone/iozone.py +--- client/tests/iozone/iozone.py 2010-08-25 01:42:27.0 -0400 client-new/tests/iozone/iozone.py 2010-09-02 11:38:42.0 -0400 +@@ -43,7 +43,7 @@ class iozone(test.test): + elif (arch == 'x86_64'): + utils.system('make linux-AMD64') + else: +-utils.system('make linux') ++utils.system('make Windows') + + + def run_once(self, dir=None, args=None): diff --git a/client/tests/kvm/kvm_test_utils.py b/client/tests/kvm/kvm_test_utils.py index 5412aac..34bccb8 100644 --- a/client/tests/kvm/kvm_test_utils.py +++ b/client/tests/kvm/kvm_test_utils.py @@ -336,7 +336,7 @@ def get_memory_info(lvms): return meminfo -def run_autotest(vm, session, control_path, timeout, outputdir): +def run_autotest(vm, session, control_path, timeout, outputdir, patch_file): Run an autotest control file inside a guest (linux only utility). @@ -346,6 +346,7 @@ def run_autotest(vm, session, control_path, timeout, outputdir): @param timeout: Timeout under which the autotest control file must complete. @param outputdir: Path on host where we should copy the guest autotest results to. +@param patch_file: A path to an autotest patch file. def copy_if_hash_differs(vm, local_path, remote_path): @@ -460,12 +461,19 @@ def run_autotest(vm, session, control_path, timeout, outputdir): os.path.join(autotest_path, 'control')): raise error.TestFail(Could not copy the test control file to guest) +if not patch_file == : +if not vm.copy_files_to(patch_file, +os.path.join(autotest_path, '../test.patch')): +raise error.TestFail(Could not copy the test patch file to guest) + # Run the test logging.info(Running autotest control file %s on guest, timeout %ss, os.path.basename(control_path), timeout) session.get_command_output(cd %s % autotest_path) session.get_command_output(rm -f control.state) session.get_command_output(rm -rf results/*) +if not patch_file == : +session.get_command_output(patch -p1 ../test.patch) logging.info( Test output ) status = session.get_command_status(bin/autotest control,
Re: [RFC PATCH 0/4] Implement multiqueue virtio-net
Michael S. Tsirkin m...@redhat.com wrote on 09/13/2010 05:20:55 PM: Results with the original kernel: _ # BW SD RSD __ 1 20903 1 6 2 21963 6 25 4 22042 23 102 8 21674 97 419 16 22281 379 1663 24 22521 857 3748 32 22976 15286594 40 23197 239010239 48 22973 354215074 64 23809 648627244 80 23564 10169 43118 96 22977 14954 62948 128 23649 27067 113892 With higher number of threads running in parallel, SD increased. In this case most threads run in parallel only till __dev_xmit_skb (#numtxqs=1). With mq TX patch, higher number of threads run in parallel through ndo_start_xmit. I *think* the increase in SD is to do with higher # of threads running for larger code path From the numbers I posted with the patch (cut-n-paste only the % parts), BW increased much more than the SD, sometimes more than twice the increase in SD. Service demand is BW/CPU, right? So if BW goes up by 50% and SD by 40%, this means that CPU more than doubled. I think the SD calculation might be more complicated, I think it does it based on adding up averages sampled and stored during the run. But, I still don't see how CPU can double?? e.g. BW: 1000 - 1500 (50%) SD: 100 - 140 (40%) CPU: 10 - 10.71 (7.1%) N# BW% SD% RSD% 4 54.30 40.00-1.16 8 71.79 46.59-2.68 16 71.89 50.40-2.50 32 72.24 34.26-14.52 48 70.10 31.51-14.35 64 69.01 38.81-9.66 96 70.68 71.2610.74 I also think SD calculation gets skewed for guest-local host testing. If it's broken, let's fix it? For this test, I ran a guest with numtxqs=16. The first result below is with my patch, which creates 16 vhosts. The second result is with a modified patch which creates only 2 vhosts (testing with #netperfs = 64): My guess is it's not a good idea to have more TX VQs than guest CPUs. Definitely, I will try to run tomorrow with more reasonable values, also will test with my second version of the patch that creates restricted number of vhosts and post results. I realize for management it's easier to pass in a single vhost fd, but just for testing it's probably easier to add code in userspace to open /dev/vhost multiple times. #vhosts BW% SD%RSD% 16 20.79 186.01 149.74 230.89 34.55 18.44 The remote SD increases with the number of vhost threads, but that number seems to correlate with guest SD. So though BW% increased slightly from 20% to 30%, SD fell drastically from 186% to 34%. I think it could be a calculation skew with host SD, which also fell from 150% to 18%. I think by default netperf looks in /proc/stat for CPU utilization data: so host CPU utilization will include the guest CPU, I think? It appears that way to me too, but the data above seems to suggest the opposite... I would go further and claim that for host/guest TCP CPU utilization and SD should always be identical. Makes sense? It makes sense to me, but once again I am not sure how SD is really done, or whether it is linear to CPU. Cc'ing Rick in case he can comment I am planning to submit 2nd patch rev with restricted number of vhosts. Likely cause for the 1 stream degradation with multiple vhost patch: 1. Two vhosts run handling the RX and TX respectively. I think the issue is related to cache ping-pong esp since these run on different cpus/sockets. Right. With TCP I think we are better off handling TX and RX for a socket by the same vhost, so that packet and its ack are handled by the same thread. Is this what happens with RX multiqueue patch? How do we select an RX queue to put the packet on? My (unsubmitted) RX patch doesn't do this yet, that is something I will check. Thanks, - KK You'll want to work on top of net-next, I think there's RX flow filtering work going on there. Thanks Michael, I will follow up on that for the RX patch, plus your suggestion on tying RX with TX. Thanks, - KK -- To unsubscribe from this list: send the line unsubscribe kvm in the body of a message to majord...@vger.kernel.org More majordomo info at http://vger.kernel.org/majordomo-info.html
Re: [RFC PATCH 1/4] Add a new API to virtio-pci
On Mon, Sep 13, 2010 at 10:59:34AM -0500, Anthony Liguori wrote: On 09/13/2010 04:04 AM, Michael S. Tsirkin wrote: On Mon, Sep 13, 2010 at 09:50:42AM +0530, Krishna Kumar2 wrote: Michael S. Tsirkinm...@redhat.com wrote on 09/12/2010 05:16:37 PM: Michael S. Tsirkinm...@redhat.com 09/12/2010 05:16 PM On Thu, Sep 09, 2010 at 07:19:33PM +0530, Krishna Kumar2 wrote: Unfortunately I need a constant in vhost for now. Maybe not even that: you create multiple vhost-net devices so vhost-net in kernel does not care about these either, right? So this can be just part of vhost_net.h in qemu. Sorry, I didn't understand what you meant. I can remove all socks[] arrays/constants by pre-allocating sockets in vhost_setup_vqs. Then I can remove all socks parameters in vhost_net_stop, vhost_net_release and vhost_net_reset_owner. Does this make sense? Thanks, - KK Here's what I mean: each vhost device includes 1 TX and 1 RX VQ. Instead of teaching vhost about multiqueue, we could simply open /dev/vhost-net multiple times. How many times would be up to qemu. Trouble is, each vhost-net device is associated with 1 tun/tap device which means that each vhost-net device is associated with a transmit and receive queue. I don't know if you'll always have an equal number of transmit and receive queues but there's certainly challenge in terms of flexibility with this model. Regards, Anthony Liguori Not really, TX and RX can be mapped to different devices, or you can only map one of these. What is the trouble? What other features would you desire in terms of flexibility? -- MST -- To unsubscribe from this list: send the line unsubscribe kvm in the body of a message to majord...@vger.kernel.org More majordomo info at http://vger.kernel.org/majordomo-info.html
Re: Exceed 1GB/s with virtio-net ?
On Mon, Sep 13, 2010 at 4:32 AM, Thibault VINCENT thibault.vinc...@smartjog.com wrote: Hello I'm trying to achieve higher than gigabit transferts over a virtio NIC with no success, and I can't find a recent bug or discussion about such an issue. The simpler test consist of two VM running on a high-end blade server with 4 cores and 4GB RAM each, and a virtio NIC dedicated to the inter-VM communication. On the host, the two vnet interfaces are enslaved into a bridge. I use a combination of 2.6.35 on the host and 2.6.32 in the VMs. Running iperf or netperf on these VMs, with TCP or UDP, result in ~900Mbits/s transferts. This is what could be expected of a 1G interface, and indeed the e1000 emulation performs similar. Changing the txqueuelen, MTU, and offloading settings on every interface (bridge/tap/virtio_net) didn't improve the speed, nor did the installation of irqbalance and the increase in CPU and RAM. Is this normal ? Is the multiple queue patch intended to address this ? It's quite possible I missed something :) I'm able to achieve quite a bit more than 1Gbps using virtio-net between 2 guests on the same host connected via an internal bridge. With the virtio-net TX bottom half handler I can easily hit 7Gbps TCP and 10+Gbps UDP using netperf (TCP_STREAM/UDP_STREAM tests). Even without the bottom half patches (not yet in qemu-kvm.git), I can get ~5Gbps. Maybe you could describe your setup further, host details, bridge setup, guests, specific tests, etc... Thanks, Alex -- To unsubscribe from this list: send the line unsubscribe kvm in the body of a message to majord...@vger.kernel.org More majordomo info at http://vger.kernel.org/majordomo-info.html
Re: [RFC PATCH 1/4] Add a new API to virtio-pci
On 09/13/2010 11:30 AM, Michael S. Tsirkin wrote: On Mon, Sep 13, 2010 at 10:59:34AM -0500, Anthony Liguori wrote: On 09/13/2010 04:04 AM, Michael S. Tsirkin wrote: On Mon, Sep 13, 2010 at 09:50:42AM +0530, Krishna Kumar2 wrote: Michael S. Tsirkinm...@redhat.com wrote on 09/12/2010 05:16:37 PM: Michael S. Tsirkinm...@redhat.com 09/12/2010 05:16 PM On Thu, Sep 09, 2010 at 07:19:33PM +0530, Krishna Kumar2 wrote: Unfortunately I need a constant in vhost for now. Maybe not even that: you create multiple vhost-net devices so vhost-net in kernel does not care about these either, right? So this can be just part of vhost_net.h in qemu. Sorry, I didn't understand what you meant. I can remove all socks[] arrays/constants by pre-allocating sockets in vhost_setup_vqs. Then I can remove all socks parameters in vhost_net_stop, vhost_net_release and vhost_net_reset_owner. Does this make sense? Thanks, - KK Here's what I mean: each vhost device includes 1 TX and 1 RX VQ. Instead of teaching vhost about multiqueue, we could simply open /dev/vhost-net multiple times. How many times would be up to qemu. Trouble is, each vhost-net device is associated with 1 tun/tap device which means that each vhost-net device is associated with a transmit and receive queue. I don't know if you'll always have an equal number of transmit and receive queues but there's certainly challenge in terms of flexibility with this model. Regards, Anthony Liguori Not really, TX and RX can be mapped to different devices, It's just a little odd. Would you bond multiple tun tap devices to achieve multi-queue TX? For RX, do you somehow limit RX to only one of those devices? If we were doing this in QEMU (and btw, there needs to be userspace patches before we implement this in the kernel side), I think it would make more sense to just rely on doing a multithreaded write to a single tun/tap device and then to hope that in can be made smarter at the macvtap layer. Regards, Anthony Liguori Regards, Anthony Liguori or you can only map one of these. What is the trouble? What other features would you desire in terms of flexibility? -- To unsubscribe from this list: send the line unsubscribe kvm in the body of a message to majord...@vger.kernel.org More majordomo info at http://vger.kernel.org/majordomo-info.html
[KVM-AUTOTEST PATCH] tests/kvm: fix -net syntax for new qemu
netdev option in new qemu is mutually exclusive with vlan. Only pass vlan if netdev option is missing. Signed-off-by: Michael S. Tsirkin m...@redhat.com --- diff --git a/client/tests/kvm/kvm_vm.py b/client/tests/kvm/kvm_vm.py index bdc9aab..7e76ed5 100755 --- a/client/tests/kvm/kvm_vm.py +++ b/client/tests/kvm/kvm_vm.py @@ -235,9 +235,10 @@ class VM: return cmd def add_nic(help, vlan, model=None, mac=None, netdev_id=None): -cmd = -net nic,vlan=%d % vlan if has_option(help, netdev): -cmd +=,netdev=%s % netdev_id +cmd = -net nic,netdev=%s % netdev_id +else: +cmd = -net nic,vlan=%d % vlan if model: cmd += ,model=%s % model if mac: cmd += ,macaddr='%s' % mac return cmd -- To unsubscribe from this list: send the line unsubscribe kvm in the body of a message to majord...@vger.kernel.org More majordomo info at http://vger.kernel.org/majordomo-info.html
[PATCH] Rename KVM_UPSTREAM to OBSOLETE_KVM_IMPL
The symbol KVM_UPSTREAM is used to mark sections of code that are part of the upstream kvm implemetation that is not used in qemu-kvm. However the name becomes ambiguous if qemu-kvm is merged upstream. Rename the symbol to avoid confusion. Signed-off-by: Avi Kivity a...@redhat.com --- cpus.c|2 +- kvm-all.c | 16 kvm.h |6 +++--- target-i386/kvm.c | 10 +- vl.c |4 ++-- 5 files changed, 19 insertions(+), 19 deletions(-) diff --git a/cpus.c b/cpus.c index c545a62..99c04d1 100644 --- a/cpus.c +++ b/cpus.c @@ -299,7 +299,7 @@ void qemu_notify_event(void) } } -#if defined(KVM_UPSTREAM) || !defined(CONFIG_KVM) +#if defined(OBSOLETE_KVM_IMPL) || !defined(CONFIG_KVM) void qemu_mutex_lock_iothread(void) {} void qemu_mutex_unlock_iothread(void) {} #endif diff --git a/kvm-all.c b/kvm-all.c index 4ff75c4..d4b0861 100644 --- a/kvm-all.c +++ b/kvm-all.c @@ -41,7 +41,7 @@ do { } while (0) #endif -#ifdef KVM_UPSTREAM +#ifdef OBSOLETE_KVM_IMPL typedef struct KVMSlot { @@ -156,7 +156,7 @@ static int kvm_set_user_memory_region(KVMState *s, KVMSlot *slot) return kvm_vm_ioctl(s, KVM_SET_USER_MEMORY_REGION, mem); } -#ifdef KVM_UPSTREAM +#ifdef OBSOLETE_KVM_IMPL static void kvm_reset_vcpu(void *opaque) { CPUState *env = opaque; @@ -176,7 +176,7 @@ int kvm_pit_in_kernel(void) } -#ifdef KVM_UPSTREAM +#ifdef OBSOLETE_KVM_IMPL int kvm_init_vcpu(CPUState *env) { KVMState *s = kvm_state; @@ -594,7 +594,7 @@ void kvm_cpu_register_phys_memory_client(void) cpu_register_phys_memory_client(kvm_cpu_phys_memory_client); } -#ifdef KVM_UPSTREAM +#ifdef OBSOLETE_KVM_IMPL int kvm_init(int smp_cpus) { @@ -816,7 +816,7 @@ void kvm_flush_coalesced_mmio_buffer(void) #endif } -#ifdef KVM_UPSTREAM +#ifdef OBSOLETE_KVM_IMPL static void do_kvm_cpu_synchronize_state(void *_env) { @@ -1038,7 +1038,7 @@ int kvm_has_debugregs(void) return kvm_state-debugregs; } -#ifdef KVM_UPSTREAM +#ifdef OBSOLETE_KVM_IMPL int kvm_has_xsave(void) { return kvm_state-xsave; @@ -1069,10 +1069,10 @@ void kvm_setup_guest_memory(void *start, size_t size) } #ifdef KVM_CAP_SET_GUEST_DEBUG -#ifndef KVM_UPSTREAM +#ifndef OBSOLETE_KVM_IMPL #define run_on_cpu on_vcpu static void on_vcpu(CPUState *env, void (*func)(void *data), void *data); -#endif /* !KVM_UPSTREAM */ +#endif /* !OBSOLETE_KVM_IMPL */ struct kvm_sw_breakpoint *kvm_find_sw_breakpoint(CPUState *env, target_ulong pc) diff --git a/kvm.h b/kvm.h index d321fce..56236ae 100644 --- a/kvm.h +++ b/kvm.h @@ -31,13 +31,13 @@ extern int kvm_allowed; #define kvm_enabled() (0) #endif -#ifdef KVM_UPSTREAM +#ifdef OBSOLETE_KVM_IMPL struct kvm_run; /* external API */ int kvm_init(int smp_cpus); -#endif /* KVM_UPSTREAM */ +#endif /* OBSOLETE_KVM_IMPL */ int kvm_has_sync_mmu(void); int kvm_has_vcpu_events(void); @@ -96,7 +96,7 @@ int kvm_arch_handle_exit(CPUState *env, struct kvm_run *run); int kvm_arch_pre_run(CPUState *env, struct kvm_run *run); -#ifdef KVM_UPSTREAM +#ifdef OBSOLETE_KVM_IMPL int kvm_arch_process_irqchip_events(CPUState *env); #endif diff --git a/target-i386/kvm.c b/target-i386/kvm.c index b00e80d..f4fc063 100644 --- a/target-i386/kvm.c +++ b/target-i386/kvm.c @@ -188,7 +188,7 @@ int kvm_arch_init_vcpu(CPUState *env) return r; } -#ifdef KVM_UPSTREAM +#ifdef OBSOLETE_KVM_IMPL env-mp_state = KVM_MP_STATE_RUNNABLE; @@ -304,7 +304,7 @@ void kvm_arch_reset_vcpu(CPUState *env) env-mp_state = KVM_MP_STATE_RUNNABLE; } } -#ifdef KVM_UPSTREAM +#ifdef OBSOLETE_KVM_IMPL static int kvm_has_msr_star(CPUState *env) { @@ -644,7 +644,7 @@ static void kvm_msr_entry_set(struct kvm_msr_entry *entry, entry-data = value; } -#ifdef KVM_UPSTREAM +#ifdef OBSOLETE_KVM_IMPL static int kvm_put_msrs(CPUState *env, int level) { struct { @@ -1104,7 +1104,7 @@ static int kvm_get_debugregs(CPUState *env) return 0; } -#ifdef KVM_UPSTREAM +#ifdef OBSOLETE_KVM_IMPL int kvm_arch_put_registers(CPUState *env, int level) { int ret; @@ -1242,7 +1242,7 @@ int kvm_arch_post_run(CPUState *env, struct kvm_run *run) return 0; } -#ifdef KVM_UPSTREAM +#ifdef OBSOLETE_KVM_IMPL int kvm_arch_process_irqchip_events(CPUState *env) { diff --git a/vl.c b/vl.c index 22a3616..378a176 100644 --- a/vl.c +++ b/vl.c @@ -2466,7 +2466,7 @@ int main(int argc, char **argv, char **envp) case QEMU_OPTION_smbios: do_smbios_option(optarg); break; -#ifdef KVM_UPSTREAM +#ifdef OBSOLETE_KVM_IMPL case QEMU_OPTION_enable_kvm: kvm_allowed = 1; #endif @@ -2803,7 +2803,7 @@ int main(int argc, char **argv, char **envp) if (kvm_allowed) { int ret = kvm_init(smp_cpus); if (ret 0) { -#if defined(KVM_UPSTREAM) || defined(CONFIG_NO_CPU_EMULATION) +#if
Re: [PATCH] KVM: MMU: Fix regression with ept memory types merged into non-ept page tables
On Mon, Sep 13, 2010 at 04:56:19PM +0200, Avi Kivity wrote: Commit KVM: MMU: Make tdp_enabled a mmu-context parameter made real-mode set -direct_map, and changed the code that merges in the memory type depend on direct_map instead of tdp_enabled. However, in this case what really matters is tdp, not direct_map, since tdp changes the pte format regardless of whether the mapping is direct or not. As a result, real-mode shadow mappings got corrupted with ept memory types. The result was a huge slowdown, likely due to the cache being disabled. Change it back as the simplest fix for the regression (real fix is to move all that to vmx code, and not use tdp_enabled as a synonym for ept). Signed-off-by: Avi Kivity a...@redhat.com --- arch/x86/kvm/mmu.c |2 +- 1 files changed, 1 insertions(+), 1 deletions(-) diff --git a/arch/x86/kvm/mmu.c b/arch/x86/kvm/mmu.c index 6e248d8..3ce56bf 100644 --- a/arch/x86/kvm/mmu.c +++ b/arch/x86/kvm/mmu.c @@ -1980,7 +1980,7 @@ static int set_spte(struct kvm_vcpu *vcpu, u64 *sptep, spte |= shadow_user_mask; if (level PT_PAGE_TABLE_LEVEL) spte |= PT_PAGE_SIZE_MASK; - if (vcpu-arch.mmu.direct_map) + if (tdp_enabled) spte |= kvm_x86_ops-get_mt_mask(vcpu, gfn, kvm_is_mmio_pfn(pfn)); Oh, indeed. Thanks for fixing this. Acked-by: Joerg Roedel joerg.roe...@amd.com -- To unsubscribe from this list: send the line unsubscribe kvm in the body of a message to majord...@vger.kernel.org More majordomo info at http://vger.kernel.org/majordomo-info.html
Re: [PATCH] Rename KVM_UPSTREAM to OBSOLETE_KVM_IMPL
Am 13.09.2010 19:54, Avi Kivity wrote: The symbol KVM_UPSTREAM is used to mark sections of code that are part of the upstream kvm implemetation that is not used in qemu-kvm. However the name becomes ambiguous if qemu-kvm is merged upstream. I doubt this is describing all cases correctly as well. Some changes should rather happen the other way around (e.g. you surely don't want to obsolete x86 kvm_arch_put/get_registers in favor of kvm_arch_load/save_regs, do you?). Jan Rename the symbol to avoid confusion. Signed-off-by: Avi Kivity a...@redhat.com --- cpus.c|2 +- kvm-all.c | 16 kvm.h |6 +++--- target-i386/kvm.c | 10 +- vl.c |4 ++-- 5 files changed, 19 insertions(+), 19 deletions(-) diff --git a/cpus.c b/cpus.c index c545a62..99c04d1 100644 --- a/cpus.c +++ b/cpus.c @@ -299,7 +299,7 @@ void qemu_notify_event(void) } } -#if defined(KVM_UPSTREAM) || !defined(CONFIG_KVM) +#if defined(OBSOLETE_KVM_IMPL) || !defined(CONFIG_KVM) void qemu_mutex_lock_iothread(void) {} void qemu_mutex_unlock_iothread(void) {} #endif diff --git a/kvm-all.c b/kvm-all.c index 4ff75c4..d4b0861 100644 --- a/kvm-all.c +++ b/kvm-all.c @@ -41,7 +41,7 @@ do { } while (0) #endif -#ifdef KVM_UPSTREAM +#ifdef OBSOLETE_KVM_IMPL typedef struct KVMSlot { @@ -156,7 +156,7 @@ static int kvm_set_user_memory_region(KVMState *s, KVMSlot *slot) return kvm_vm_ioctl(s, KVM_SET_USER_MEMORY_REGION, mem); } -#ifdef KVM_UPSTREAM +#ifdef OBSOLETE_KVM_IMPL static void kvm_reset_vcpu(void *opaque) { CPUState *env = opaque; @@ -176,7 +176,7 @@ int kvm_pit_in_kernel(void) } -#ifdef KVM_UPSTREAM +#ifdef OBSOLETE_KVM_IMPL int kvm_init_vcpu(CPUState *env) { KVMState *s = kvm_state; @@ -594,7 +594,7 @@ void kvm_cpu_register_phys_memory_client(void) cpu_register_phys_memory_client(kvm_cpu_phys_memory_client); } -#ifdef KVM_UPSTREAM +#ifdef OBSOLETE_KVM_IMPL int kvm_init(int smp_cpus) { @@ -816,7 +816,7 @@ void kvm_flush_coalesced_mmio_buffer(void) #endif } -#ifdef KVM_UPSTREAM +#ifdef OBSOLETE_KVM_IMPL static void do_kvm_cpu_synchronize_state(void *_env) { @@ -1038,7 +1038,7 @@ int kvm_has_debugregs(void) return kvm_state-debugregs; } -#ifdef KVM_UPSTREAM +#ifdef OBSOLETE_KVM_IMPL int kvm_has_xsave(void) { return kvm_state-xsave; @@ -1069,10 +1069,10 @@ void kvm_setup_guest_memory(void *start, size_t size) } #ifdef KVM_CAP_SET_GUEST_DEBUG -#ifndef KVM_UPSTREAM +#ifndef OBSOLETE_KVM_IMPL #define run_on_cpu on_vcpu static void on_vcpu(CPUState *env, void (*func)(void *data), void *data); -#endif /* !KVM_UPSTREAM */ +#endif /* !OBSOLETE_KVM_IMPL */ struct kvm_sw_breakpoint *kvm_find_sw_breakpoint(CPUState *env, target_ulong pc) diff --git a/kvm.h b/kvm.h index d321fce..56236ae 100644 --- a/kvm.h +++ b/kvm.h @@ -31,13 +31,13 @@ extern int kvm_allowed; #define kvm_enabled() (0) #endif -#ifdef KVM_UPSTREAM +#ifdef OBSOLETE_KVM_IMPL struct kvm_run; /* external API */ int kvm_init(int smp_cpus); -#endif /* KVM_UPSTREAM */ +#endif /* OBSOLETE_KVM_IMPL */ int kvm_has_sync_mmu(void); int kvm_has_vcpu_events(void); @@ -96,7 +96,7 @@ int kvm_arch_handle_exit(CPUState *env, struct kvm_run *run); int kvm_arch_pre_run(CPUState *env, struct kvm_run *run); -#ifdef KVM_UPSTREAM +#ifdef OBSOLETE_KVM_IMPL int kvm_arch_process_irqchip_events(CPUState *env); #endif diff --git a/target-i386/kvm.c b/target-i386/kvm.c index b00e80d..f4fc063 100644 --- a/target-i386/kvm.c +++ b/target-i386/kvm.c @@ -188,7 +188,7 @@ int kvm_arch_init_vcpu(CPUState *env) return r; } -#ifdef KVM_UPSTREAM +#ifdef OBSOLETE_KVM_IMPL env-mp_state = KVM_MP_STATE_RUNNABLE; @@ -304,7 +304,7 @@ void kvm_arch_reset_vcpu(CPUState *env) env-mp_state = KVM_MP_STATE_RUNNABLE; } } -#ifdef KVM_UPSTREAM +#ifdef OBSOLETE_KVM_IMPL static int kvm_has_msr_star(CPUState *env) { @@ -644,7 +644,7 @@ static void kvm_msr_entry_set(struct kvm_msr_entry *entry, entry-data = value; } -#ifdef KVM_UPSTREAM +#ifdef OBSOLETE_KVM_IMPL static int kvm_put_msrs(CPUState *env, int level) { struct { @@ -1104,7 +1104,7 @@ static int kvm_get_debugregs(CPUState *env) return 0; } -#ifdef KVM_UPSTREAM +#ifdef OBSOLETE_KVM_IMPL int kvm_arch_put_registers(CPUState *env, int level) { int ret; @@ -1242,7 +1242,7 @@ int kvm_arch_post_run(CPUState *env, struct kvm_run *run) return 0; } -#ifdef KVM_UPSTREAM +#ifdef OBSOLETE_KVM_IMPL int kvm_arch_process_irqchip_events(CPUState *env) { diff --git a/vl.c b/vl.c index 22a3616..378a176 100644 --- a/vl.c +++ b/vl.c @@ -2466,7 +2466,7 @@ int main(int argc, char **argv, char **envp)
Re: [PATCH] Rename KVM_UPSTREAM to OBSOLETE_KVM_IMPL
On 09/13/2010 01:52 PM, Jan Kiszka wrote: Am 13.09.2010 19:54, Avi Kivity wrote: The symbol KVM_UPSTREAM is used to mark sections of code that are part of the upstream kvm implemetation that is not used in qemu-kvm. However the name becomes ambiguous if qemu-kvm is merged upstream. I doubt this is describing all cases correctly as well. Some changes should rather happen the other way around (e.g. you surely don't want to obsolete x86 kvm_arch_put/get_registers in favor of kvm_arch_load/save_regs, do you?). There's really no perfect name to describe what we're actually doing here. It's probably not a detail worth worrying that much about. Regards, Anthony Liguori Jan Rename the symbol to avoid confusion. Signed-off-by: Avi Kivitya...@redhat.com --- cpus.c|2 +- kvm-all.c | 16 kvm.h |6 +++--- target-i386/kvm.c | 10 +- vl.c |4 ++-- 5 files changed, 19 insertions(+), 19 deletions(-) diff --git a/cpus.c b/cpus.c index c545a62..99c04d1 100644 --- a/cpus.c +++ b/cpus.c @@ -299,7 +299,7 @@ void qemu_notify_event(void) } } -#if defined(KVM_UPSTREAM) || !defined(CONFIG_KVM) +#if defined(OBSOLETE_KVM_IMPL) || !defined(CONFIG_KVM) void qemu_mutex_lock_iothread(void) {} void qemu_mutex_unlock_iothread(void) {} #endif diff --git a/kvm-all.c b/kvm-all.c index 4ff75c4..d4b0861 100644 --- a/kvm-all.c +++ b/kvm-all.c @@ -41,7 +41,7 @@ do { } while (0) #endif -#ifdef KVM_UPSTREAM +#ifdef OBSOLETE_KVM_IMPL typedef struct KVMSlot { @@ -156,7 +156,7 @@ static int kvm_set_user_memory_region(KVMState *s, KVMSlot *slot) return kvm_vm_ioctl(s, KVM_SET_USER_MEMORY_REGION,mem); } -#ifdef KVM_UPSTREAM +#ifdef OBSOLETE_KVM_IMPL static void kvm_reset_vcpu(void *opaque) { CPUState *env = opaque; @@ -176,7 +176,7 @@ int kvm_pit_in_kernel(void) } -#ifdef KVM_UPSTREAM +#ifdef OBSOLETE_KVM_IMPL int kvm_init_vcpu(CPUState *env) { KVMState *s = kvm_state; @@ -594,7 +594,7 @@ void kvm_cpu_register_phys_memory_client(void) cpu_register_phys_memory_client(kvm_cpu_phys_memory_client); } -#ifdef KVM_UPSTREAM +#ifdef OBSOLETE_KVM_IMPL int kvm_init(int smp_cpus) { @@ -816,7 +816,7 @@ void kvm_flush_coalesced_mmio_buffer(void) #endif } -#ifdef KVM_UPSTREAM +#ifdef OBSOLETE_KVM_IMPL static void do_kvm_cpu_synchronize_state(void *_env) { @@ -1038,7 +1038,7 @@ int kvm_has_debugregs(void) return kvm_state-debugregs; } -#ifdef KVM_UPSTREAM +#ifdef OBSOLETE_KVM_IMPL int kvm_has_xsave(void) { return kvm_state-xsave; @@ -1069,10 +1069,10 @@ void kvm_setup_guest_memory(void *start, size_t size) } #ifdef KVM_CAP_SET_GUEST_DEBUG -#ifndef KVM_UPSTREAM +#ifndef OBSOLETE_KVM_IMPL #define run_on_cpu on_vcpu static void on_vcpu(CPUState *env, void (*func)(void *data), void *data); -#endif /* !KVM_UPSTREAM */ +#endif /* !OBSOLETE_KVM_IMPL */ struct kvm_sw_breakpoint *kvm_find_sw_breakpoint(CPUState *env, target_ulong pc) diff --git a/kvm.h b/kvm.h index d321fce..56236ae 100644 --- a/kvm.h +++ b/kvm.h @@ -31,13 +31,13 @@ extern int kvm_allowed; #define kvm_enabled() (0) #endif -#ifdef KVM_UPSTREAM +#ifdef OBSOLETE_KVM_IMPL struct kvm_run; /* external API */ int kvm_init(int smp_cpus); -#endif /* KVM_UPSTREAM */ +#endif /* OBSOLETE_KVM_IMPL */ int kvm_has_sync_mmu(void); int kvm_has_vcpu_events(void); @@ -96,7 +96,7 @@ int kvm_arch_handle_exit(CPUState *env, struct kvm_run *run); int kvm_arch_pre_run(CPUState *env, struct kvm_run *run); -#ifdef KVM_UPSTREAM +#ifdef OBSOLETE_KVM_IMPL int kvm_arch_process_irqchip_events(CPUState *env); #endif diff --git a/target-i386/kvm.c b/target-i386/kvm.c index b00e80d..f4fc063 100644 --- a/target-i386/kvm.c +++ b/target-i386/kvm.c @@ -188,7 +188,7 @@ int kvm_arch_init_vcpu(CPUState *env) return r; } -#ifdef KVM_UPSTREAM +#ifdef OBSOLETE_KVM_IMPL env-mp_state = KVM_MP_STATE_RUNNABLE; @@ -304,7 +304,7 @@ void kvm_arch_reset_vcpu(CPUState *env) env-mp_state = KVM_MP_STATE_RUNNABLE; } } -#ifdef KVM_UPSTREAM +#ifdef OBSOLETE_KVM_IMPL static int kvm_has_msr_star(CPUState *env) { @@ -644,7 +644,7 @@ static void kvm_msr_entry_set(struct kvm_msr_entry *entry, entry-data = value; } -#ifdef KVM_UPSTREAM +#ifdef OBSOLETE_KVM_IMPL static int kvm_put_msrs(CPUState *env, int level) { struct { @@ -1104,7 +1104,7 @@ static int kvm_get_debugregs(CPUState *env) return 0; } -#ifdef KVM_UPSTREAM +#ifdef OBSOLETE_KVM_IMPL int kvm_arch_put_registers(CPUState *env, int level) { int ret; @@ -1242,7 +1242,7 @@ int kvm_arch_post_run(CPUState *env, struct kvm_run *run) return 0; } -#ifdef KVM_UPSTREAM +#ifdef OBSOLETE_KVM_IMPL int kvm_arch_process_irqchip_events(CPUState *env) { diff --git a/vl.c b/vl.c index 22a3616..378a176 100644 ---
Re: [PATCH] Rename KVM_UPSTREAM to OBSOLETE_KVM_IMPL
Am 13.09.2010 20:56, Anthony Liguori wrote: On 09/13/2010 01:52 PM, Jan Kiszka wrote: Am 13.09.2010 19:54, Avi Kivity wrote: The symbol KVM_UPSTREAM is used to mark sections of code that are part of the upstream kvm implemetation that is not used in qemu-kvm. However the name becomes ambiguous if qemu-kvm is merged upstream. I doubt this is describing all cases correctly as well. Some changes should rather happen the other way around (e.g. you surely don't want to obsolete x86 kvm_arch_put/get_registers in favor of kvm_arch_load/save_regs, do you?). There's really no perfect name to describe what we're actually doing here. It's probably not a detail worth worrying that much about. I don't mind the name as long as it doesn't reflect the strategy (but why this change at all then?). Jan (who would prefer to have the time for doing the cleanups) signature.asc Description: OpenPGP digital signature
[PATCH RFC] dma_rw.h (was Re: [PATCH 0/7] AMD IOMMU emulation patchset v4)
So I think the following will give the idea of what an API might look like that will let us avoid the scary hacks in e.g. the ide layer and other generic layers that need to do DMA, without either binding us to pci, adding more complexity with callbacks, or losing type safety with casts and void*. Basically we have DMADevice that we can use container_of on to get a PCIDevice from, and DMAMmu that will get instanciated in a specific MMU. This is not complete code - just a header - I might complete this later if/when there's interest or hopefully someone interested in iommu emulation will. Notes: the IOMMU_PERM_RW code seem unused, so I replaced this with plain is_write. Is it ever useful? It seems that invalidate callback should be able to get away with just a device, so I switched to that from a void pointer for type safety. Seems enough for the users I saw. I saw devices do stl_le_phys and such, these might need to be wrapped as well. Signed-off-by: Michael S. Tsirkin m...@redhat.com --- diff --git a/hw/dma_rw.h b/hw/dma_rw.h new file mode 100644 index 000..d63fd17 --- /dev/null +++ b/hw/dma_rw.h @@ -0,0 +1,122 @@ +#ifndef DMA_RW_H +#define DMA_RW_H + +#include qemu-common.h + +/* We currently only have pci mmus, but using + a generic type makes it possible to use this + e.g. from the generic ide code without callbacks. */ +typedef uint64_t dma_addr_t; + +typedef struct DMAMmu DMAMmu; +typedef struct DMADevice DMADevice; + +typedef int DMATranslateFunc(DMAMmu *mmu, + DMADevice *dev, + dma_addr_t addr, + dma_addr_t *paddr, + dma_addr_t *len, + int is_write); + +typedef int DMAInvalidateMapFunc(DMADevice *); +struct DMAMmu { + /* invalidate, etc. */ + DmaTranslateFunc *translate; +}; + +struct DMADevice { + DMAMmu *mmu; + DMAInvalidateMapFunc *invalidate; +}; + +void dma_device_init(DMADevice *, DMAMmu *, DMAInvalidateMapFunc *); + +static inline void dma_memory_rw(DMADevice *dev, +dma_addr_t addr, +void *buf, +uint32_t len, +int is_write) +{ +uint32_t plen; +/* Fast-path non-iommu. + * More importantly, makes it obvious what this function does. */ +if (!dev-mmu) { + cpu_physical_memory_rw(paddr, buf, plen, is_write); + return; +} +while (len) { +err = dev-mmu-translate(iommu, dev, addr, paddr, plen, is_write); +if (err) { +return; +} + +/* The translation might be valid for larger regions. */ +if (plen len) { +plen = len; +} + +cpu_physical_memory_rw(paddr, buf, plen, is_write); + +len -= plen; +addr += plen; +buf += plen; +} +} + +void *dma_memory_map(DMADevice *dev, +dma_addr_t addr, +uint32_t *len, +int is_write); +void dma_memory_unmap(DMADevice *dev, + void *buffer, + uint32_t len, + int is_write, + uint32_t access_len); + + ++#define DEFINE_DMA_LD(suffix, size) \ ++uint##size##_t dma_ld##suffix(DMADevice *dev, dma_addr_t addr)\ ++{ \ ++int err; \ ++target_phys_addr_t paddr, plen; \ ++if (!dev-mmu) { \ ++return ld##suffix##_phys(addr, val); \ ++} \ ++ \ ++err = dev-mmu-translate(dev-bus-iommu, dev, \ ++ addr, paddr, plen, IOMMU_PERM_READ); \ ++if (err || (plen size / 8)) \ ++return 0; \ ++ \ ++return ld##suffix##_phys(paddr); \ ++} ++ ++#define DEFINE_DMA_ST(suffix, size) \ ++void dma_st##suffix(DMADevice *dev, dma_addr_t addr, uint##size##_t val) \ ++{ \ ++int err; \ ++target_phys_addr_t paddr, plen; \ ++ \ ++if (!dev-mmu) {
[RFC PATCH 0/1] macvtap TX zero copy between guest and host kernel
This patch induces a new sock flag ZEROCOPY to avoid copy between userspace and kernel. macvtap is the first user of zero copy between guest and host kernel. It only uses when the lower device supports high memory DMA. The first set of patch only addresses transmission TX side. The test has shown big improvement on either CPU utilization reduction or BW increase on 10GbE Intel NIC. Performance data will be submitted in the coming email. thanks Shirley -- To unsubscribe from this list: send the line unsubscribe kvm in the body of a message to majord...@vger.kernel.org More majordomo info at http://vger.kernel.org/majordomo-info.html
Re: [Qemu-devel] [PATCH RFC] dma_rw.h (was Re: [PATCH 0/7] AMD IOMMU emulation patchset v4)
On 09/13/2010 03:01 PM, Michael S. Tsirkin wrote: So I think the following will give the idea of what an API might look like that will let us avoid the scary hacks in e.g. the ide layer and other generic layers that need to do DMA, without either binding us to pci, adding more complexity with callbacks, or losing type safety with casts and void*. Basically we have DMADevice that we can use container_of on to get a PCIDevice from, and DMAMmu that will get instanciated in a specific MMU. This is not complete code - just a header - I might complete this later if/when there's interest or hopefully someone interested in iommu emulation will. Notes: the IOMMU_PERM_RW code seem unused, so I replaced this with plain is_write. Is it ever useful? It seems that invalidate callback should be able to get away with just a device, so I switched to that from a void pointer for type safety. Seems enough for the users I saw. I saw devices do stl_le_phys and such, these might need to be wrapped as well. Signed-off-by: Michael S. Tsirkinm...@redhat.com One of the troubles with an interface like this is that I'm not sure a generic model universally works. For instance, I know some PCI busses do transparent byte swapping. For this to work, there has to be a notion of generic memory reads/writes vs. reads of a 32-bit, 16-bit, and 8-bit value. With a generic API, we lose the flexibility to do this type of bus interface. Regards, Anthony Liguori --- diff --git a/hw/dma_rw.h b/hw/dma_rw.h new file mode 100644 index 000..d63fd17 --- /dev/null +++ b/hw/dma_rw.h @@ -0,0 +1,122 @@ +#ifndef DMA_RW_H +#define DMA_RW_H + +#include qemu-common.h + +/* We currently only have pci mmus, but using + a generic type makes it possible to use this + e.g. from the generic ide code without callbacks. */ +typedef uint64_t dma_addr_t; + +typedef struct DMAMmu DMAMmu; +typedef struct DMADevice DMADevice; + +typedef int DMATranslateFunc(DMAMmu *mmu, + DMADevice *dev, + dma_addr_t addr, + dma_addr_t *paddr, + dma_addr_t *len, + int is_write); + +typedef int DMAInvalidateMapFunc(DMADevice *); +struct DMAMmu { + /* invalidate, etc. */ + DmaTranslateFunc *translate; +}; + +struct DMADevice { + DMAMmu *mmu; + DMAInvalidateMapFunc *invalidate; +}; + +void dma_device_init(DMADevice *, DMAMmu *, DMAInvalidateMapFunc *); + +static inline void dma_memory_rw(DMADevice *dev, +dma_addr_t addr, +void *buf, +uint32_t len, +int is_write) +{ +uint32_t plen; +/* Fast-path non-iommu. + * More importantly, makes it obvious what this function does. */ +if (!dev-mmu) { + cpu_physical_memory_rw(paddr, buf, plen, is_write); + return; +} +while (len) { +err = dev-mmu-translate(iommu, dev, addr,paddr,plen, is_write); +if (err) { +return; +} + +/* The translation might be valid for larger regions. */ +if (plen len) { +plen = len; +} + +cpu_physical_memory_rw(paddr, buf, plen, is_write); + +len -= plen; +addr += plen; +buf += plen; +} +} + +void *dma_memory_map(DMADevice *dev, +dma_addr_t addr, +uint32_t *len, +int is_write); +void dma_memory_unmap(DMADevice *dev, + void *buffer, + uint32_t len, + int is_write, + uint32_t access_len); + + ++#define DEFINE_DMA_LD(suffix, size) \ ++uint##size##_t dma_ld##suffix(DMADevice *dev, dma_addr_t addr)\ ++{ \ ++int err; \ ++target_phys_addr_t paddr, plen; \ ++if (!dev-mmu) { \ ++return ld##suffix##_phys(addr, val); \ ++} \ ++ \ ++err = dev-mmu-translate(dev-bus-iommu, dev, \ ++ addr,paddr,plen, IOMMU_PERM_READ); \ ++if (err || (plen size / 8)) \ ++return 0; \ ++ \ ++return ld##suffix##_phys(paddr); \ ++} ++ ++#define DEFINE_DMA_ST(suffix, size)
RFC PATCH 1/2] macvtap: A new sock zero copy flag
/* Add a new flag to support sock zero copy from user space to kernel */ Signed-off-by: Shirley Ma x...@us.ibm.com --- include/net/sock.h |1 + 1 files changed, 1 insertions(+), 0 deletions(-) diff --git a/include/net/sock.h b/include/net/sock.h index adab9dc..80172de 100644 --- a/include/net/sock.h +++ b/include/net/sock.h @@ -543,6 +543,7 @@ enum sock_flags { SOCK_TIMESTAMPING_SYS_HARDWARE, /* %SOF_TIMESTAMPING_SYS_HARDWARE */ SOCK_FASYNC, /* fasync() active */ SOCK_RXQ_OVFL, + SOCK_ZEROCOPY, /* zerocopy from user space to kernel */ }; static inline void sock_copy_flags(struct sock *nsk, struct sock *osk) -- To unsubscribe from this list: send the line unsubscribe kvm in the body of a message to majord...@vger.kernel.org More majordomo info at http://vger.kernel.org/majordomo-info.html
[RFC PATCH 2/2] macvtap: TX zero copy between guest and host kernel
Add zero copy feature between userspace and kernel in macvtap when lower device supports high memory DMA. Signed-off-by: Shirley Ma x...@us.ibm.com --- drivers/net/macvtap.c | 136 + 1 files changed, 126 insertions(+), 10 deletions(-) diff --git a/drivers/net/macvtap.c b/drivers/net/macvtap.c index 3b1c54a..186cde1 100644 --- a/drivers/net/macvtap.c +++ b/drivers/net/macvtap.c @@ -274,6 +274,7 @@ static int macvtap_open(struct inode *inode, struct file *file) struct net *net = current-nsproxy-net_ns; struct net_device *dev = dev_get_by_index(net, iminor(inode)); struct macvtap_queue *q; + struct macvlan_dev *vlan = netdev_priv(dev); int err; err = -ENODEV; @@ -302,6 +303,17 @@ static int macvtap_open(struct inode *inode, struct file *file) q-flags = IFF_VNET_HDR | IFF_NO_PI | IFF_TAP; q-vnet_hdr_sz = sizeof(struct virtio_net_hdr); + /* +* so far only VM uses macvtap, enable zero copy between guest +* kernel and host kernel when lower device supports high memory +* DMA +*/ + if (vlan) { + if ((vlan-lowerdev-features NETIF_F_HIGHDMA) + (vlan-lowerdev-features NETIF_F_SG)) + sock_set_flag(q-sk, SOCK_ZEROCOPY); + } + err = macvtap_set_queue(dev, file, q); if (err) sock_put(q-sk); @@ -343,6 +355,24 @@ out: return mask; } +#define GOODCOPY_LEN (L1_CACHE_BYTES 64 ? 64 : L1_CACHE_BYTES) + +static inline struct sk_buff *macvtap_alloc_skb_goodcopy(struct sock *sk, +size_t prepad, size_t copy, +int noblock, int *err) +{ + struct sk_buff *skb; + + skb = sock_alloc_send_pskb(sk, prepad + copy, 0, noblock, err); + if (!skb) + return NULL; + skb_reserve(skb, prepad); + skb_put(skb, copy); + + return skb; + +} + static inline struct sk_buff *macvtap_alloc_skb(struct sock *sk, size_t prepad, size_t len, size_t linear, int noblock, int *err) @@ -447,15 +477,91 @@ static int macvtap_skb_to_vnet_hdr(const struct sk_buff *skb, return 0; } +/* set skb frags from iovec, this can move to core network code for reuse */ +static int set_sg_from_iovec_zerocopy(struct sk_buff *skb, + const struct iovec *from, int offset, + size_t count) +{ + int len = iov_length(from, count) - offset; + int copy = skb_headlen(skb); + int size, offset1 = 0; + int i = 0; + skb_frag_t *f; + + /* Skip over from offset */ + while (offset = from-iov_len) { + offset -= from-iov_len; + ++from; + --count; + } + + /* copy up to skb headlen */ + while (copy 0) { + size = min_t(unsigned int, copy, from-iov_len - offset); + if (copy_from_user(skb-data + offset1, from-iov_base + offset, + size)) + return -EFAULT; + if (copy size) { + ++from; + --count; + } + copy -= size; + offset1 += size; + offset = 0; + } + + if (len == offset1) + return 0; + + while (count--) { + struct page *page[MAX_SKB_FRAGS]; + int num_pages; + unsigned long base; + + len = from-iov_len - offset1; + if (!len) { + offset1 = 0; + ++from; + continue; + } + base = (unsigned long)from-iov_base + offset1; + size = ((base ~PAGE_MASK) + len + ~PAGE_MASK) PAGE_SHIFT; + num_pages = get_user_pages_fast(base, size, 0, page[i]); + if ((num_pages != size) || + (num_pages MAX_SKB_FRAGS - skb_shinfo(skb)-nr_frags)) + /* put_page is in skb free */ + return -EFAULT; + while (len) { + f = skb_shinfo(skb)-frags[i]; + f-page = page[i]; + f-page_offset = base ~PAGE_MASK; + f-size = min_t(int, len, PAGE_SIZE - f-page_offset); + skb-data_len += f-size; + skb-len += f-size; + skb-truesize += f-size; + skb_shinfo(skb)-nr_frags++; + /* increase sk_wmem_alloc */ + if (skb-sk skb-destructor == sock_wfree) + atomic_add(f-size, skb-sk-sk_wmem_alloc); +
Re: [PATCH] [RFC] Add support for a USB audio device model
On Fri, Sep 10, 2010 at 02:47:56PM -0700, H. Peter Anvin wrote: I discovered that none of the audio device models supported by current Qemu/KVM appear to be supported out of the box on Win7 64 bit (AC97 works fine on 32 bit). The most logical ways to fix that would be to add a long-term supportable audio device model. Intel HD Audio and USB Audio seemed like the most reasonable options, but I opted for USB Audio for a few reasons: ... diff --git a/configure b/configure index 8228c1c..4fcb829 100755 --- a/configure +++ b/configure @@ -71,8 +71,8 @@ sparc_cpu= cross_prefix= cc=gcc audio_drv_list= -audio_card_list=ac97 es1370 sb16 -audio_possible_cards=ac97 es1370 sb16 cs4231a adlib gus +audio_card_list=ac97 es1370 sb16 usb-audio +audio_possible_cards=ac97 es1370 sb16 cs4231a adlib gus usb-audio block_drv_whitelist= host_cc=gcc ar=ar @@ -2414,7 +2414,7 @@ if test $vde = yes ; then fi for card in $audio_card_list; do def=CONFIG_`echo $card | tr '[:lower:]' '[:upper:]'` -echo $def=y $config_host_mak +echo ${def//-/_}=y $config_host_mak done echo CONFIG_AUDIO_DRIVERS=$audio_drv_list $config_host_mak for drv in $audio_drv_list; do # patch -p1 /tmp/usb-audio.patch # ./configure ... ... preadv supportyes fdatasync yes uuid support no vhost-net support no Trace backend nop Trace output file trace-pid ./configure: 2276: Bad substitution diff --git a/create_config b/create_config index 0098e68..1caa25b 100755 --- a/create_config +++ b/create_config @@ -25,7 +25,7 @@ case $line in CONFIG_AUDIO_DRIVERS=*) drivers=${line#*=} echo #define CONFIG_AUDIO_DRIVERS \\ -for drv in $drivers; do +for drv in ${drivers//-/_}; do echo ${drv}_audio_driver,\\ done echo @@ -39,10 +39,12 @@ case $line in ;; CONFIG_*=y) # configuration name=${line%=*} +name=${name//-/_} echo #define $name 1 ;; CONFIG_*=*) # configuration name=${line%=*} +name=${name//-/_} value=${line#*=} echo #define $name $value ;; ... -- To unsubscribe from this list: send the line unsubscribe kvm in the body of a message to majord...@vger.kernel.org More majordomo info at http://vger.kernel.org/majordomo-info.html
Re: [PATCH] [RFC] Add support for a USB audio device model
On 09/13/2010 01:53 PM, Amos Kong wrote: # patch -p1 /tmp/usb-audio.patch # ./configure ... ... preadv supportyes fdatasync yes uuid support no vhost-net support no Trace backend nop Trace output file trace-pid ./configure: 2276: Bad substitution What shell is your /bin/sh? -hpa -- To unsubscribe from this list: send the line unsubscribe kvm in the body of a message to majord...@vger.kernel.org More majordomo info at http://vger.kernel.org/majordomo-info.html
Re: [PATCH] [RFC] Add support for a USB audio device model
On 09/13/2010 01:53 PM, Amos Kong wrote: # patch -p1 /tmp/usb-audio.patch # ./configure ... ... preadv supportyes fdatasync yes uuid support no vhost-net support no Trace backend nop Trace output file trace-pid ./configure: 2276: Bad substitution diff --git a/create_config b/create_config index 0098e68..1caa25b 100755 --- a/create_config +++ b/create_config @@ -25,7 +25,7 @@ case $line in CONFIG_AUDIO_DRIVERS=*) drivers=${line#*=} echo #define CONFIG_AUDIO_DRIVERS \\ -for drv in $drivers; do +for drv in ${drivers//-/_}; do echo ${drv}_audio_driver,\\ done echo @@ -39,10 +39,12 @@ case $line in ;; CONFIG_*=y) # configuration name=${line%=*} +name=${name//-/_} echo #define $name 1 ;; CONFIG_*=*) # configuration name=${line%=*} +name=${name//-/_} value=${line#*=} echo #define $name $value ;; Looks like ${.../...} is a bashism. One can replace it with: name=`echo $name | tr '-' '_'` and for drv in `echo $drivers | tr '-' '_'`; do -hpa -- To unsubscribe from this list: send the line unsubscribe kvm in the body of a message to majord...@vger.kernel.org More majordomo info at http://vger.kernel.org/majordomo-info.html
Re: [RFC PATCH v9 00/16] Provide a zero-copy method on KVM virtio-net.
On Fri, 2010-09-03 at 13:52 +0300, Michael S. Tsirkin wrote: When binding vhost thread to cpu3, qemu I/O thread to cpu2, macvtap zero copy patch can get 9.4Gb/s. TCP STREAM TEST from 0.0.0.0 (0.0.0.0) port 0 AF_INET to 192.168.10.74 (192.168.10.74) port 0 AF_INET : cpu bind Recv SendSend Utilization Service Demand Socket Socket Message Elapsed Send Recv Send Recv Size SizeSize Time Throughput localremote local remote bytes bytes bytessecs.10^6bits/s % S % S us/KB us/KB 87380 16384 6553660.00 9408.19 55.698.45 0.970 0.589 Shirley OTOH CPU utilization is up too. w/i macvtap zero copy patch, the BW can reach link w/i more cpu usage, w/o macvtap zero copy patch, the BW can't be up to link speed. To achieve same BW, CPU utilization is lower when using zero copy. Shirley -- To unsubscribe from this list: send the line unsubscribe kvm in the body of a message to majord...@vger.kernel.org More majordomo info at http://vger.kernel.org/majordomo-info.html
Re: [KVM-AUTOTEST PATCH] tests/kvm: fix -net syntax for new qemu
On Mon, Sep 13, 2010 at 07:43:22PM +0200, Michael S. Tsirkin wrote: netdev option in new qemu is mutually exclusive with vlan. Only pass vlan if netdev option is missing. Signed-off-by: Michael S. Tsirkin m...@redhat.com This fix looks good for me. Reviewed-by: Amos Kong ak...@redhat.com BTW, we try to produce three kinds of cmdline, possible combinations: 1. Old way: -net nic,model=e1000,vlan=1 -net tap,vlan=1 2. Semi-new: -device e1000,vlan=1-net tap,vlan=1 3. Best way: -netdev type=tap,id=netdev1 -device e1000,id=netdev1 If you think this is good, I'll sent a patch to upstream. --- diff --git a/client/tests/kvm/kvm_vm.py b/client/tests/kvm/kvm_vm.py index bdc9aab..7e76ed5 100755 --- a/client/tests/kvm/kvm_vm.py +++ b/client/tests/kvm/kvm_vm.py @@ -235,9 +235,10 @@ class VM: return cmd def add_nic(help, vlan, model=None, mac=None, netdev_id=None): -cmd = -net nic,vlan=%d % vlan if has_option(help, netdev): -cmd +=,netdev=%s % netdev_id +cmd = -net nic,netdev=%s % netdev_id +else: +cmd = -net nic,vlan=%d % vlan if model: cmd += ,model=%s % model if mac: cmd += ,macaddr='%s' % mac return cmd -- To unsubscribe from this list: send the line unsubscribe kvm in the body of a message to majord...@vger.kernel.org More majordomo info at http://vger.kernel.org/majordomo-info.html
Re: [PATCH] [RFC] Add support for a USB audio device model
On Mon, Sep 13, 2010 at 02:04:57PM -0700, H. Peter Anvin wrote: On 09/13/2010 01:53 PM, Amos Kong wrote: # patch -p1 /tmp/usb-audio.patch # ./configure ... ... preadv supportyes fdatasync yes uuid support no vhost-net support no Trace backend nop Trace output file trace-pid ./configure: 2276: Bad substitution Hello Peter, What shell is your /bin/sh? dash, It's fine when using bash, I suggest to use a common way. I've heard wonderful music (guest:win7), but mixed with a litte noise, not so fluent. The following debug msg is normal? # ./x86_64-softmmu/qemu-system-x86_64 ~/win7-32.qcow2 -m 1024 -vnc :0 -usbdevice tablet -cpu qemu64 -enable-kvm -bios pc-bios/bios.bin -net nic,netdev=idkQlbc8,macaddr=02:BB:3A:D3:b8:29 -netdev tap,id=idkQlbc8,ifname=virtio_0_8000,script=/etc/qemu-ifup-vbr0,downscript=no,vhost=on -snapshot -usb -soundhw usb usb_create: no bus specified, using usb.0 for usb-audio usb-audio: reset usb-audio: control transaction: request 0x0005 value 0x0001 index 0x length 0x usb-audio: control transaction: request 0x8006 value 0x0100 index 0x length 0x0008 usb-audio: control transaction: request 0x8006 value 0x0200 index 0x length 0x0009 usb-audio: control transaction: request 0x8006 value 0x0200 index 0x length 0x0071 usb-audio: reset usb-audio: reset usb-audio: control transaction: request 0x8006 value 0x0100 index 0x length 0x0040 usb-audio: reset usb-audio: control transaction: request 0x0005 value 0x0001 index 0x length 0x usb-audio: control transaction: request 0x8006 value 0x0100 index 0x length 0x0012 usb-audio: control transaction: request 0x8006 value 0x0200 index 0x length 0x00ff usb-audio: control transaction: request 0x8006 value 0x03ee index 0x length 0x0012 usb-audio: control transaction: request 0x8006 value 0x0303 index 0x0409 length 0x00ff usb-audio: control transaction: request 0x8006 value 0x0300 index 0x length 0x00ff usb-audio: control transaction: request 0x8006 value 0x0302 index 0x0409 length 0x00ff usb-audio: control transaction: request 0x8006 value 0x0600 index 0x length 0x000a usb-audio: failed control transaction: request 0x8006 value 0x0600 index 0x length 0x000a usb-audio: control transaction: request 0x8006 value 0x0100 index 0x length 0x0012 usb-audio: control transaction: request 0x8006 value 0x0200 index 0x length 0x0009 usb-audio: control transaction: request 0x8006 value 0x0200 index 0x length 0x0071 usb-audio: control transaction: request 0x0009 value 0x0001 index 0x length 0x usb-audio: control transaction: request 0x010b value 0x index 0x0001 length 0x usb-audio: control transaction: request 0x8006 value 0x0305 index 0x0409 length 0x0004 usb-audio: control transaction: request 0x8006 value 0x0305 index 0x0409 length 0x002c usb-audio: control transaction: request 0x8006 value 0x0307 index 0x0409 length 0x004a usb-audio: control transaction: request 0x8006 value 0x0308 index 0x0409 length 0x0004 usb-audio: control transaction: request 0x8006 value 0x0308 index 0x0409 length 0x003e usb-audio: control transaction: request 0xa181 value 0x0100 index 0x0200 length 0x0001 usb-audio: control transaction: request 0xa181 value 0x0201 index 0x0200 length 0x0002 usb-audio: control transaction: request 0xa182 value 0x0201 index 0x0200 length 0x0002 usb-audio: control transaction: request 0xa183 value 0x0201 index 0x0200 length 0x0002 usb-audio: control transaction: request 0xa184 value 0x0201 index 0x0200 length 0x0002 usb-audio: control transaction: request 0xa181 value 0x0202 index 0x0200 length 0x0002 usb-audio: control transaction: request 0xa182 value 0x0202 index 0x0200 length 0x0002 usb-audio: control transaction: request 0xa183 value 0x0202 index 0x0200 length 0x0002 usb-audio: control transaction: request 0xa184 value 0x0202 index 0x0200 length 0x0002 usb-audio: control transaction: request 0x010b value 0x index 0x0001 length 0x usb-audio: control transaction: request 0x010b value 0x0001 index 0x0001 length 0x usb-audio: set interface 1 usb-audio: control transaction: request 0x010b value 0x index 0x0001 length 0x usb-audio: set interface 0 usb-audio: control transaction: request 0x010b value 0x0001 index 0x0001 length 0x usb-audio: set interface 1 usb-audio: control transaction: request 0x010b value 0x index 0x0001 length 0x usb-audio: set interface 0 usb-audio: control transaction: request 0x010b value 0x0001 index 0x0001 length 0x usb-audio: set interface 1 usb-audio: control transaction: request 0x010b value 0x index 0x0001 length 0x usb-audio: set interface 0 usb-audio: control transaction: request 0x010b value 0x0001 index 0x0001 length 0x usb-audio: set interface 1 usb-audio: control transaction: request 0x010b value 0x index 0x0001 length 0x usb-audio: set interface 0 usb-audio: control transaction: request
Re: [KVM-AUTOTEST PATCH] tests/kvm: fix -net syntax for new qemu
On Mon, 2010-09-13 at 19:43 +0200, Michael S. Tsirkin wrote: netdev option in new qemu is mutually exclusive with vlan. Only pass vlan if netdev option is missing. Looks good to me, applied, thanks! http://autotest.kernel.org/changeset/4783 Signed-off-by: Michael S. Tsirkin m...@redhat.com --- diff --git a/client/tests/kvm/kvm_vm.py b/client/tests/kvm/kvm_vm.py index bdc9aab..7e76ed5 100755 --- a/client/tests/kvm/kvm_vm.py +++ b/client/tests/kvm/kvm_vm.py @@ -235,9 +235,10 @@ class VM: return cmd def add_nic(help, vlan, model=None, mac=None, netdev_id=None): -cmd = -net nic,vlan=%d % vlan if has_option(help, netdev): -cmd +=,netdev=%s % netdev_id +cmd = -net nic,netdev=%s % netdev_id +else: +cmd = -net nic,vlan=%d % vlan if model: cmd += ,model=%s % model if mac: cmd += ,macaddr='%s' % mac return cmd -- To unsubscribe from this list: send the line unsubscribe kvm in the body of a message to majord...@vger.kernel.org More majordomo info at http://vger.kernel.org/majordomo-info.html -- To unsubscribe from this list: send the line unsubscribe kvm in the body of a message to majord...@vger.kernel.org More majordomo info at http://vger.kernel.org/majordomo-info.html
Re: [RFC PATCH 2/2] macvtap: TX zero copy between guest and host kernel
From: Shirley Ma mashi...@us.ibm.com Date: Mon, 13 Sep 2010 13:48:03 -0700 + base = (unsigned long)from-iov_base + offset1; + size = ((base ~PAGE_MASK) + len + ~PAGE_MASK) PAGE_SHIFT; + num_pages = get_user_pages_fast(base, size, 0, page[i]); + if ((num_pages != size) || + (num_pages MAX_SKB_FRAGS - skb_shinfo(skb)-nr_frags)) + /* put_page is in skb free */ + return -EFAULT; What keeps the user from writing to these pages in it's address space after the write call returns? A write() return of success means: I wrote what you gave to me not I wrote what you gave to me, oh and BTW don't touch these pages for a while. In fact a while isn't even defined in any way, as there is no way for the write() invoker to know when the networking card is done with those pages. -- To unsubscribe from this list: send the line unsubscribe kvm in the body of a message to majord...@vger.kernel.org More majordomo info at http://vger.kernel.org/majordomo-info.html
[PATCH next 00/16] treewide: Use static const char * const where possible
Convert the uses that should be const of char *foo[] = {bar, baz}; to const char * const foo[] = {bar, baz}; or static const char * const foo[] = {bar, baz}; Joe Perches (16): arch/alpha: Use static const char * const where possible arch/ia64: Use static const char * const where possible arch/microblaze: Use static const char * const where possible arch/x86: Use static const char * const where possible drivers/gpu: Use static const char * const where possible drivers/isdn: Use static const char * const where possible drivers/net: Use static const char * const where possible drivers/net/pcmcia: Use static const char * const where possible drivers/net/wireless: Use static const char * const where possible drivers/scsi: Use static const char * const where possible drivers/staging: Use static const char * const where possible drivers/usb: Use static const char * const where possible drivers/watchdog: Use static const char * const where possible fs: Use static const char * const where possible net/irda: Use static const char * const where possible sound: Use static const char * const where possible arch/alpha/kernel/err_ev6.c | 12 +--- arch/alpha/kernel/err_marvel.c | 33 arch/alpha/kernel/err_titan.c| 35 ++--- arch/alpha/kernel/osf_sys.c |4 +- arch/ia64/kernel/palinfo.c |2 +- arch/microblaze/kernel/heartbeat.c | 10 +++--- arch/microblaze/kernel/timer.c | 12 arch/x86/kernel/smpboot.c|2 +- arch/x86/kvm/mmu.c |2 +- drivers/gpu/drm/ttm/ttm_page_alloc.c |4 ++- drivers/isdn/pcbit/edss1.c |2 +- drivers/isdn/pcbit/edss1.h |2 +- drivers/net/3c515.c |4 ++- drivers/net/eth16i.c |4 ++- drivers/net/pcmcia/3c589_cs.c|2 +- drivers/net/wireless/rt2x00/rt2x00debug.c|2 +- drivers/scsi/bfa/rport.c |4 ++- drivers/scsi/pcmcia/nsp_debug.c |2 +- drivers/scsi/qla2xxx/qla_nx.c|4 +- drivers/scsi/qla4xxx/ql4_nx.c|2 +- drivers/staging/ath6kl/os/linux/ar6000_drv.c | 14 +- drivers/staging/bcm/Debug.c |5 ++- drivers/usb/host/oxu210hp-hcd.c |2 +- drivers/watchdog/machzwd.c |2 +- fs/binfmt_flat.c |4 ++- include/net/irda/irlan_event.h |2 +- net/irda/irlan/irlan_event.c |2 +- sound/core/misc.c|5 +--- sound/core/pcm_native.c |2 +- 29 files changed, 99 insertions(+), 83 deletions(-) -- 1.7.3.rc1 -- To unsubscribe from this list: send the line unsubscribe kvm in the body of a message to majord...@vger.kernel.org More majordomo info at http://vger.kernel.org/majordomo-info.html
[PATCH next 04/16] arch/x86: Use static const char * const where possible
Signed-off-by: Joe Perches j...@perches.com --- arch/x86/kernel/smpboot.c |2 +- arch/x86/kvm/mmu.c|2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/arch/x86/kernel/smpboot.c b/arch/x86/kernel/smpboot.c index 63a1a55..b745b30 100644 --- a/arch/x86/kernel/smpboot.c +++ b/arch/x86/kernel/smpboot.c @@ -498,7 +498,7 @@ static void impress_friends(void) void __inquire_remote_apic(int apicid) { unsigned i, regs[] = { APIC_ID 4, APIC_LVR 4, APIC_SPIV 4 }; - char *names[] = { ID, VERSION, SPIV }; + const char * const names[] = { ID, VERSION, SPIV }; int timeout; u32 status; diff --git a/arch/x86/kvm/mmu.c b/arch/x86/kvm/mmu.c index d2dad65..2a7d567 100644 --- a/arch/x86/kvm/mmu.c +++ b/arch/x86/kvm/mmu.c @@ -56,7 +56,7 @@ enum { AUDIT_POST_PTE_WRITE }; -char *audit_point_name[] = { +const char * const audit_point_name[] = { pre page fault, post page fault, pre pte write, -- 1.7.3.rc1 -- To unsubscribe from this list: send the line unsubscribe kvm in the body of a message to majord...@vger.kernel.org More majordomo info at http://vger.kernel.org/majordomo-info.html
Re: [PATCH] [RFC] Add support for a USB audio device model
On 09/13/2010 06:37 PM, Amos Kong wrote: Hello Peter, What shell is your /bin/sh? dash, It's fine when using bash, I suggest to use a common way. Yes, I'll fix it. I've heard wonderful music (guest:win7), but mixed with a litte noise, not so fluent. The following debug msg is normal? Yes, all of that is normal. I talked to malc earlier today, and I think I have a pretty good idea for how to deal with the rate-matching issues; I'm going to try to write it up tomorrow. -hpa -- H. Peter Anvin, Intel Open Source Technology Center I work for Intel. I don't speak on their behalf. -- To unsubscribe from this list: send the line unsubscribe kvm in the body of a message to majord...@vger.kernel.org More majordomo info at http://vger.kernel.org/majordomo-info.html